Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import os, sys
from threading import Thread
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst, Gtk, Gdk
GObject.threads_init()
Gdk.threads_init()
Gst.init(None)
class ASR(Gtk.HBox):
def __init__(self, hyp_callback=None):
super(ASR, self).__init__()
self.text = Gtk.TextView()
self.text.set_editable(False)
self.text.set_cursor_visible(False)
self.buffer = self.text.get_buffer()
self.text.set_wrap_mode(Gtk.WrapMode.WORD)
self.scrolled = Gtk.ScrolledWindow()
self.scrolled.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.ALWAYS)
self.scrolled.add_with_viewport(self.text)
self.scrolled.set_size_request(-1, 100)
self.pack_start(self.scrolled, True, True, 5)
self.button = Gtk.Button("Speak")
self.button.set_sensitive(False)
self.pack_start(self.button, False, False, 5)
self.button.connect('clicked', self.button_clicked)
self.text.connect("size-allocate", self.autoscroll)
self.hyp = []
self.hyp_callback = hyp_callback
Thread(target=self.init_gst).start()
def init_gst(self):
"""Initialize the speech components"""
GObject.idle_add(self._started_loading_asr)
self.pulsesrc = Gst.ElementFactory.make("pulsesrc", "pulsesrc")
if self.pulsesrc == None:
print >> sys.stderr, "Error loading pulsesrc GST plugin. You probably need the gstreamer1.0-pulseaudio package"
sys.exit()
self.audioconvert = Gst.ElementFactory.make("audioconvert", "audioconvert")
self.audioresample = Gst.ElementFactory.make("audioresample", "audioresample")
self.asr = Gst.ElementFactory.make("kaldinnet2onlinedecoder", "asr")
self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink")
if self.asr:
model_file = "asr/final.mdl"
if not os.path.isfile(model_file):
print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!"
sys.exit(1)
self.asr.set_property("fst", "asr/HCLG.fst")
self.asr.set_property("model", "asr/final.mdl")
self.asr.set_property("word-syms", "asr/words.txt")
self.asr.set_property("feature-type", "mfcc")
self.asr.set_property("mfcc-config", "asr/conf/mfcc.conf")
self.asr.set_property("ivector-extraction-config", "asr/conf/ivector_extractor.fixed.conf")
self.asr.set_property("max-active", 7000) # 7000
self.asr.set_property("beam", 11.0) # 11
self.asr.set_property("lattice-beam", 6.0) # 6
self.asr.set_property("do-endpointing", True)
self.asr.set_property("endpoint-silence-phones", "1:2:3:4:5:6:7:8:9:10")
else:
print >> sys.stderr, "Couldn't create the kaldinnet2onlinedecoder element. "
if os.environ.has_key("GST_PLUGIN_PATH"):
print >> sys.stderr, "Have you compiled the Kaldi GStreamer plugin?"
else:
print >> sys.stderr, "You probably need to set the GST_PLUGIN_PATH envoronment variable"
print >> sys.stderr, "Try running: GST_PLUGIN_PATH=../src %s" % sys.argv[0]
sys.exit();
# initially silence the decoder
self.asr.set_property("silent", True)
self.pipeline = Gst.Pipeline()
for element in [self.pulsesrc, self.audioconvert, self.audioresample, self.asr, self.fakesink]:
self.pipeline.add(element)
self.pulsesrc.link(self.audioconvert)
self.audioconvert.link(self.audioresample)
self.audioresample.link(self.asr)
self.asr.link(self.fakesink)
self.asr.connect('partial-result', self._on_partial_result)
self.asr.connect('final-result', self._on_final_result)
self.pipeline.set_state(Gst.State.PLAYING)
GObject.idle_add(self._finished_loading_asr)
def load_state(self):
try:
with open('state.txt') as fp:
self.asr.set_property('adaptation-state', fp.read())
except:
print >> sys.stderr, 'failed to load asr state'
def save_state(self):
if hasattr(self, 'asr'):
state = self.asr.get_property('adaptation-state')
try:
with open('state.txt', 'w') as fp:
fp.write(state)
except:
print >> sys.stderr, 'failed to save asr state'
def _started_loading_asr(self):
self.button.set_sensitive(False)
self.button.set_label("Loading...")
def _finished_loading_asr(self):
self.button.set_label("Speak")
self.button.set_sensitive(True)
def _on_partial_result(self, asr, hyp):
"""Delete any previous selection, insert text and select it."""
Gdk.threads_enter()
if len(self.hyp) == 0:
self.hyp = ['']
self.hyp[-1] = hyp
if self.hyp_callback:
self.hyp_callback(self.hyp)
hyp += '...'
self.insert = self.buffer.get_iter_at_line(self.buffer.get_line_count() - 1)
self.buffer.delete(self.insert, self.buffer.get_end_iter())
self.buffer.insert(self.insert, hyp)
Gdk.threads_leave()
def _on_final_result(self, asr, hyp):
Gdk.threads_enter()
if len(self.hyp) == 0:
self.hyp = ['']
self.hyp[-1] = hyp
self.hyp.append('')
if self.hyp_callback:
self.hyp_callback(self.hyp)
self.insert = self.buffer.get_iter_at_line(self.buffer.get_line_count() - 1)
self.buffer.delete(self.insert, self.buffer.get_end_iter())
self.buffer.insert(self.insert, hyp + '\n...')
Gdk.threads_leave()
def autoscroll(self, *args):
adj = self.scrolled.get_vadjustment()
adj.set_value(adj.get_upper() - adj.get_page_size())
def button_clicked(self, button):
"""Handle button presses."""
if button.get_label() == "Speak":
button.set_label("Stop")
self.asr.set_property("silent", False)
self.hyp = []
self.buffer.set_text('...')
else:
button.set_label("Speak")
self.asr.set_property("silent", True)