aS.silenceRemoval
https://github.com/tyiannak/pyAudioAnalysis/wiki/5.-Segmentation
def __envelope(x, hop):
'''Compute the max-envelope of x at a stride/frame length of h'''
return util.frame(x, hop_length=hop, frame_length=hop).max(axis=0)
waveplot function
def waveplot(y, sr=22050, max_points=5e4, x_axis='time', offset=0.0,
max_sr=1000, ax=None, **kwargs):
...
if max_points is not None:
if max_points <= 0:
raise ParameterError('max_points must be strictly positive')
if max_points < y.shape[-1]:
target_sr = min(max_sr, (sr * y.shape[-1]) // max_points)
hop_length = sr // target_sr
if y.ndim == 1:
y = __envelope(y, hop_length)
else:
y = np.vstack([__envelope(_, hop_length) for _ in y])
if y.ndim > 1:
y_top = y[0]
y_bottom = -y[1]
else:
y_top = y
y_bottom = -y
...
out = axes.fill_between(locs, y_bottom, y_top, **kwargs)
...
Test id1 ,id26, id 51
[想法] 他是使用svm訓練slience interval 跟 onset interval classifier。
裡面還是有一些heuristic coefficient,或許可以另外寫一個重新調整。
- 10% of total Audio Feature (top 10% and bottom 10%)
- remove small segments (minDruation = 0.2) -> 0.1
- weight > 1 -> weight=0.99, weight < 0 -> weight = 0.01
weight = 0 -> 全部都以low 10% energy threshold 為主
weight = 1 -> 全部以top 10% energy threshold 為主
def silenceRemoval(x, Fs, stWin, stStep, smoothWindow=0.5, Weight=0.5, plot=False):
'''
Event Detection (silence removal)
ARGUMENTS:
- x: the input audio signal
- Fs: sampling freq
- stWin, stStep: window size and step in seconds
- smoothWindow: (optinal) smooth window (in seconds)
- Weight: (optinal) weight factor (0 < Weight < 1) the higher, the more strict
- plot: (optinal) True if results are to be plotted
RETURNS:
- segmentLimits: list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that
the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds
'''
if Weight >= 1:
Weight = 0.99
if Weight <= 0:
Weight = 0.01
# Step 1: feature extraction
x = audioBasicIO.stereo2mono(x) # convert to mono
ShortTermFeatures = aF.stFeatureExtraction(x, Fs, stWin * Fs, stStep * Fs) # extract short-term features
# Step 2: train binary SVM classifier of low vs high energy frames
EnergySt = ShortTermFeatures[1, :] # keep only the energy short-term sequence (2nd feature)
E = numpy.sort(EnergySt) # sort the energy feature values:
L1 = int(len(E) / 10) # number of 10% of the total short-term windows
T1 = numpy.mean(E[0:L1]) + 0.000000000000001 # compute "lower" 10% energy threshold
T2 = numpy.mean(E[-L1:-1]) + 0.000000000000001 # compute "higher" 10% energy threshold
Class1 = ShortTermFeatures[:, numpy.where(EnergySt <= T1)[0]] # get all features that correspond to low energy
Class2 = ShortTermFeatures[:, numpy.where(EnergySt >= T2)[0]] # get all features that correspond to high energy
featuresSS = [Class1.T, Class2.T] # form the binary classification task and ...
[featuresNormSS, MEANSS, STDSS] = aT.normalizeFeatures(featuresSS) # normalize and ...
SVM = aT.trainSVM(featuresNormSS, 1.0) # train the respective SVM probabilistic model (ONSET vs SILENCE)
# Step 3: compute onset probability based on the trained SVM
ProbOnset = []
for i in range(ShortTermFeatures.shape[1]): # for each frame
curFV = (ShortTermFeatures[:, i] - MEANSS) / STDSS # normalize feature vector
ProbOnset.append(SVM.predict_proba(curFV.reshape(1,-1))[0][1]) # get SVM probability (that it belongs to the ONSET class)
ProbOnset = numpy.array(ProbOnset)
ProbOnset = smoothMovingAvg(ProbOnset, smoothWindow / stStep) # smooth probability
# Step 4A: detect onset frame indices:
ProbOnsetSorted = numpy.sort(ProbOnset) # find probability Threshold as a weighted average of top 10% and lower 10% of the values
Nt = ProbOnsetSorted.shape[0] / 10
T = (numpy.mean((1 - Weight) * ProbOnsetSorted[0:Nt]) + Weight * numpy.mean(ProbOnsetSorted[-Nt::]))
MaxIdx = numpy.where(ProbOnset > T)[0] # get the indices of the frames that satisfy the thresholding
i = 0
timeClusters = []
segmentLimits = []
# Step 4B: group frame indices to onset segments
while i < len(MaxIdx): # for each of the detected onset indices
curCluster = [MaxIdx[i]]
if i == len(MaxIdx)-1:
break
while MaxIdx[i+1] - curCluster[-1] <= 2:
curCluster.append(MaxIdx[i+1])
i += 1
if i == len(MaxIdx)-1:
break
i += 1
timeClusters.append(curCluster)
segmentLimits.append([curCluster[0] * stStep, curCluster[-1] * stStep])
# Step 5: Post process: remove very small segments:
minDuration = 0.2
segmentLimits2 = []
for s in segmentLimits:
if s[1] - s[0] > minDuration:
segmentLimits2.append(s)
segmentLimits = segmentLimits2
if plot:
timeX = numpy.arange(0, x.shape[0] / float(Fs), 1.0 / Fs)
plt.subplot(2, 1, 1)
plt.plot(timeX, x)
for s in segmentLimits:
plt.axvline(x=s[0])
plt.axvline(x=s[1])
plt.subplot(2, 1, 2)
plt.plot(numpy.arange(0, ProbOnset.shape[0] * stStep, stStep), ProbOnset)
plt.title('Signal')
for s in segmentLimits:
plt.axvline(x=s[0])
plt.axvline(x=s[1])
plt.title('SVM Probability')
plt.show()
return segmentLimits
[0]
https://github.com/tyiannak/pyAudioAnalysis/wiki/5.-Segmentation
[1]
https://github.com/tyiannak/pyAudioAnalysis/blob/master/audioSegmentation.py