Sentence Segmentation
Figure 1 Schematic of Segmentation of Choices Audio.
Figure 1是資料庫裡面choices資料夾下的C0000001.wav
。
使用Sentence Segmentation之後,程式會把不同choice之間的silence 去除,把每個選項的開頭報數(“1","2","3","4")跟真正choice的內容分開
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 7 08:45:09 2018
@author: petertsai
"""
import librosa.display
import matplotlib.pyplot as plt
sr = 16000
y, sr = librosa.load('C0000001.wav', sr=sr)
plt.figure(figsize=(30,10))
librosa.display.waveplot(y, sr=sr)
# plot words boundary
commands_time = [(1.54, 1.92), (3.83, 4.16), (5.88, 6.30), (7.86, 8.26)]
center_commands_time = list(map(lambda x: (x[0]+x[1])/2, commands_time))
choices_time = [(2.31, 3.35), (4.45, 5.52), (6.51, 7.55), (8.52, 9.53)]
center_choices_time = list(map(lambda x: (x[0]+x[1])/2, choices_time))
plt.vlines(commands_time, -max(y), max(y), color='r', alpha=0.9,
linestyle='--', label='command')
plt.vlines(choices_time, -max(y), max(y), color='m', alpha=0.9,
linestyle='--', label='choice')
# plot text
from matplotlib.font_manager import FontProperties
font=FontProperties(fname='BiauKai.ttf',size=24)
commands_text = [u'"1"',u'"2"',u'"3"',u'"4"']
choices_text = [u'"1.5公里"',u'"1.4公里"',u'"1.3公里"',u'"1.6公里"']
alignment = {'horizontalalignment': 'center', 'verticalalignment': 'baseline'}
for c in range(len(commands_text)):
plt.text(center_commands_time[c], max(y), commands_text[c], fontproperties=font,
weight='bold', size=36,
**alignment)
plt.text(center_choices_time[c], max(y), choices_text[c], fontproperties=font,
weight='bold', size=36,
**alignment)
plt.xticks(fontsize = 24)
plt.xlabel('Time',fontsize=36)
plt.yticks(fontsize = 24)
plt.ylabel('envelope value',fontsize=36)
plt.title('Choice Segmentation', fontsize=36)
plt.legend(loc='best', fontsize = 'x-large')
plt.tight_layout()
plt.savefig("filename.png")