Skip to content
Snippets Groups Projects
Commit 646389c7 authored by Benoit Favre's avatar Benoit Favre
Browse files

add support for mika models

parent 494a0602
No related branches found
No related tags found
No related merge requests found
...@@ -19,11 +19,12 @@ developing with pygtk3: http://lazka.github.io/pgi-docs/, https://python-gtk-3-t ...@@ -19,11 +19,12 @@ developing with pygtk3: http://lazka.github.io/pgi-docs/, https://python-gtk-3-t
Todo: Todo:
- use GtkSourceView to allow editing the xml file directly - model selection in user interface
- model selection in config file
- integrate new xml with actions - integrate new xml with actions
- account for custom acoustic scale and endpoint silence phones
add model from mika
/storage/raid1/homedirs/mickael.rouvier/raid2/kaldi_english/exp/nnet2_online/ /storage/raid1/homedirs/mickael.rouvier/raid2/kaldi_english/exp/nnet2_online/
--acoustic-scale=.04166666666666666666 --acoustic-scale=.04166666666666666666
--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22:23:24:25:26:27:28:29:30:31:32:33:34:35 --endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22:23:24:25:26:27:28:29:30:31:32:33:34:35
- use GtkSourceView to allow editing the xml file directly
...@@ -9,7 +9,7 @@ Gdk.threads_init() ...@@ -9,7 +9,7 @@ Gdk.threads_init()
Gst.init(None) Gst.init(None)
class ASR(Gtk.HBox): class ASR(Gtk.HBox):
def __init__(self, hyp_callback = None, partial_hyp_callback = None): def __init__(self, asr_model_dir, hyp_callback = None, partial_hyp_callback = None):
super(ASR, self).__init__() super(ASR, self).__init__()
self.text = Gtk.TextView() self.text = Gtk.TextView()
...@@ -34,9 +34,9 @@ class ASR(Gtk.HBox): ...@@ -34,9 +34,9 @@ class ASR(Gtk.HBox):
self.hyp = [] self.hyp = []
self.hyp_callback = hyp_callback self.hyp_callback = hyp_callback
self.partial_hyp_callback = partial_hyp_callback self.partial_hyp_callback = partial_hyp_callback
Thread(target=self.init_gst).start() Thread(target=self.init_gst, args=[asr_model_dir]).start()
def init_gst(self, model="model2"): def init_gst(self, model='.'):
"""Initialize the speech components""" """Initialize the speech components"""
GObject.idle_add(self._started_loading_asr) GObject.idle_add(self._started_loading_asr)
...@@ -50,17 +50,17 @@ class ASR(Gtk.HBox): ...@@ -50,17 +50,17 @@ class ASR(Gtk.HBox):
self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink") self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink")
if self.asr: if self.asr:
model_file = "asr/final.mdl" model_file = "%s/final.mdl" % model
if not os.path.isfile(model_file): if not os.path.isfile(model_file):
print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!" print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!"
sys.exit(1) sys.exit(1)
self.asr.set_property("fst", "asr/%s/HCLG.fst" % model) self.asr.set_property("fst", "%s/HCLG.fst" % model)
self.asr.set_property("model", "asr/final.mdl") self.asr.set_property("model", "%s/final.mdl" % model)
self.asr.set_property("word-syms", "asr/%s/words.txt" % model) self.asr.set_property("word-syms", "%s/words.txt" % model)
#self.asr.set_property("acoustic-scale", 0.0416) self.asr.set_property("acoustic-scale", 0.0416)
self.asr.set_property("feature-type", "mfcc") self.asr.set_property("feature-type", "mfcc")
self.asr.set_property("mfcc-config", "asr/conf/mfcc.conf") self.asr.set_property("mfcc-config", "%s/conf/mfcc.conf" % model)
self.asr.set_property("ivector-extraction-config", "asr/conf/ivector_extractor.fixed.conf") self.asr.set_property("ivector-extraction-config", "%s/conf/ivector_extractor.fixed.conf" % model)
self.asr.set_property("max-active", 7000) # 7000 self.asr.set_property("max-active", 7000) # 7000
self.asr.set_property("beam", 11.0) # 11 self.asr.set_property("beam", 11.0) # 11
self.asr.set_property("lattice-beam", 6.0) # 6 self.asr.set_property("lattice-beam", 6.0) # 6
......
...@@ -2,25 +2,31 @@ ...@@ -2,25 +2,31 @@
set -e -u -o pipefail set -e -u -o pipefail
BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5 mkdir -p asr/{mika-fred-1,fisher-benoit-1,fisher-fred-1}
# get local models
scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/mika-model1/* asr/mika-fred-1
scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/model-fred/* asr/fisher-fred-1
scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/model2/* asr/fisher-benoit-1
# get fisher acoustic model
BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5
MODEL=exp/nnet2_online/nnet_a_gpu_online MODEL=exp/nnet2_online/nnet_a_gpu_online
GRAPH=exp/tri5a GRAPH=exp/tri5a
mkdir -p asr/ivector_extractor asr/conf mkdir -p asr/fisher/ivector_extractor asr/fisher/conf
pushd asr/ivector_extractor
wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats}
popd
pushd asr/conf (cd asr/fisher && wget -N $BASE_URL/$MODEL/final.mdl)
wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf} (cd asr/fisher/ivector_extractor && wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats})
popd (cd asr/fisher/conf && wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf})
#(cd asr/fisher && wget -N $BASE_URL/$GRAPH/graph/{HCLG.fst,words.txt})
pushd asr # link to fisher acoustic model
wget -N $BASE_URL/$GRAPH/graph/HCLG.fst (cd asr/fisher-fred-1 && ln -s ../fisher/* .)
wget -N $BASE_URL/$GRAPH/graph/words.txt (cd asr/fisher-benoit-1 && ln -s ../fisher/* .)
wget -N $BASE_URL/$MODEL/final.mdl
popd
cat asr/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\//' > asr/conf/ivector_extractor.fixed.conf # fix ivector extraction paths
for model in mika-fred-1 fisher-fred-1 fisher-benoit-1
do
cat asr/$model/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\/'$model'\//' > asr/$model/conf/ivector_extractor.fixed.conf
done
...@@ -25,7 +25,7 @@ import confirm, asr, action, section, xmlview_widgets ...@@ -25,7 +25,7 @@ import confirm, asr, action, section, xmlview_widgets
import levenstein import levenstein
class ScriptedASR(Gtk.Window): class ScriptedASR(Gtk.Window):
def __init__(self, xml_filename): def __init__(self, xml_filename, asr_model_dir):
super(ScriptedASR, self).__init__() super(ScriptedASR, self).__init__()
self.connect("destroy", self.quit) self.connect("destroy", self.quit)
...@@ -34,31 +34,31 @@ class ScriptedASR(Gtk.Window): ...@@ -34,31 +34,31 @@ class ScriptedASR(Gtk.Window):
self.set_title('ScriptedASR [%s]' % xml_filename) self.set_title('ScriptedASR [%s]' % xml_filename)
vbox = Gtk.VBox() vbox = Gtk.VBox()
self.sections = section.SectionManager() #self.sections = section.SectionManager()
vbox.pack_start(self.sections, False, True, 5) #vbox.pack_start(self.sections, False, True, 5)
self.xmlview = xmlview_widgets.XmlView(xml_filename) self.xmlview = xmlview_widgets.XmlView(xml_filename)
vbox.pack_start(self.xmlview, True, True, 5) vbox.pack_start(self.xmlview, True, True, 5)
self.lines = [x for x in self.xmlview.get_line_iterator()] self.lines = [x for x in self.xmlview.get_line_iterator()]
self.current_line = -1 self.current_line = -1
self.confirmer = confirm.ConfirmationBox() #self.confirmer = confirm.ConfirmationBox()
vbox.pack_start(self.confirmer, False, True, 5) #vbox.pack_start(self.confirmer, False, True, 5)
self.actions = action.ActionView() #self.actions = action.ActionView()
vbox.pack_start(self.actions, False, True, 5) #vbox.pack_start(self.actions, False, True, 5)
self.sections.set_confirmer(self.confirmer) #self.sections.set_confirmer(self.confirmer)
self.actions.set_confirmer(self.confirmer) #self.actions.set_confirmer(self.confirmer)
# transcript view # transcript view
self.asr = asr.ASR(self.hyp_changed) self.asr = asr.ASR(asr_model_dir, self.hyp_changed)
vbox.pack_start(self.asr, False, True, 5) vbox.pack_start(self.asr, False, True, 5)
self.add(vbox) self.add(vbox)
self.show_all() self.show_all()
self.confirmer.hide() #self.confirmer.hide()
# load css style # load css style
style_provider = Gtk.CssProvider() style_provider = Gtk.CssProvider()
...@@ -104,7 +104,10 @@ class ScriptedASR(Gtk.Window): ...@@ -104,7 +104,10 @@ class ScriptedASR(Gtk.Window):
if __name__ == '__main__': if __name__ == '__main__':
xml_filename = 'data/homeostasis_25nov.xml' xml_filename = 'data/homeostasis_25nov.xml'
asr_model_dir = 'asr/mika-fred-1'
if len(sys.argv) > 1: if len(sys.argv) > 1:
xml_filename = sys.argv[1] xml_filename = sys.argv[1]
app = ScriptedASR(xml_filename) if len(sys.argv) > 2:
asr_model_dir = sys.argv[2]
app = ScriptedASR(xml_filename, asr_model_dir)
Gtk.main() Gtk.main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment