Inspect Audio Non-silence Intervals
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 5 16:21:29 2018
@author: PeterTsai
"""
from pydub import AudioSegment
from pydub.silence import split_on_silence
#import numpy as np
import glob
import re
sliencePeriod = 800
source_dir = 'choices'
summary_length_txt = 'summary_time_length_pydub{}.txt'.format(sliencePeriod)
speechFileList = sorted(glob.glob(source_dir+'/*.wav'))
summary_length_fid = open(summary_length_txt,'w')
for speechFile in speechFileList:
print('{} processing...'.format(speechFile))
speech = AudioSegment.from_wav(speechFile)
#pattern = '\./'+ source_dir + '/(.+)\.wav'
m = re.match('{}/(.+)\.wav'.format(source_dir), speechFile)
filename = m.group(1)
chunks = split_on_silence(speech,
# must be silent for at least 2 seconds or 2000 ms
min_silence_len=sliencePeriod,
# consider it silent if quieter than -16 dBFS
#Adjust this per requirement
silence_thresh=speech.dBFS
)
time_length = [len(chunk)/1000 for chunk in chunks]
#time_str = ', '.join(time_length)
time_str = str(time_length).strip('[]')
summary_length_fid.write('{}: {} ({} elements)\n'.format(filename,time_str,len(time_length)))
summary_length_fid.close()
Figure 1 flowchart of choices audio segment time intervals
Figure 2 result of segments' time interval