From 646389c7937d9a7d41571df94abdc2cb78429970 Mon Sep 17 00:00:00 2001 From: Benoit Favre <benoit.favre@lif.univ-mrs.fr> Date: Sun, 25 Jan 2015 09:28:11 +0100 Subject: [PATCH] add support for mika models --- README | 7 ++++--- asr.py | 20 ++++++++++---------- download-models.sh | 36 +++++++++++++++++++++--------------- main.py | 27 +++++++++++++++------------ 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/README b/README index b2c3bf7..4250215 100644 --- a/README +++ b/README @@ -19,11 +19,12 @@ developing with pygtk3: http://lazka.github.io/pgi-docs/, https://python-gtk-3-t Todo: -- use GtkSourceView to allow editing the xml file directly -- model selection in config file +- model selection in user interface - integrate new xml with actions +- account for custom acoustic scale and endpoint silence phones -add model from mika /storage/raid1/homedirs/mickael.rouvier/raid2/kaldi_english/exp/nnet2_online/ --acoustic-scale=.04166666666666666666 --endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22:23:24:25:26:27:28:29:30:31:32:33:34:35 + +- use GtkSourceView to allow editing the xml file directly diff --git a/asr.py b/asr.py index 0e64abe..e6201f7 100644 --- a/asr.py +++ b/asr.py @@ -9,7 +9,7 @@ Gdk.threads_init() Gst.init(None) class ASR(Gtk.HBox): - def __init__(self, hyp_callback = None, partial_hyp_callback = None): + def __init__(self, asr_model_dir, hyp_callback = None, partial_hyp_callback = None): super(ASR, self).__init__() self.text = Gtk.TextView() @@ -34,9 +34,9 @@ class ASR(Gtk.HBox): self.hyp = [] self.hyp_callback = hyp_callback self.partial_hyp_callback = partial_hyp_callback - Thread(target=self.init_gst).start() + Thread(target=self.init_gst, args=[asr_model_dir]).start() - def init_gst(self, model="model2"): + def init_gst(self, model='.'): """Initialize the speech components""" GObject.idle_add(self._started_loading_asr) @@ -50,17 +50,17 @@ class ASR(Gtk.HBox): self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink") if self.asr: - model_file = "asr/final.mdl" + model_file = "%s/final.mdl" % model if not os.path.isfile(model_file): print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!" sys.exit(1) - self.asr.set_property("fst", "asr/%s/HCLG.fst" % model) - self.asr.set_property("model", "asr/final.mdl") - self.asr.set_property("word-syms", "asr/%s/words.txt" % model) - #self.asr.set_property("acoustic-scale", 0.0416) + self.asr.set_property("fst", "%s/HCLG.fst" % model) + self.asr.set_property("model", "%s/final.mdl" % model) + self.asr.set_property("word-syms", "%s/words.txt" % model) + self.asr.set_property("acoustic-scale", 0.0416) self.asr.set_property("feature-type", "mfcc") - self.asr.set_property("mfcc-config", "asr/conf/mfcc.conf") - self.asr.set_property("ivector-extraction-config", "asr/conf/ivector_extractor.fixed.conf") + self.asr.set_property("mfcc-config", "%s/conf/mfcc.conf" % model) + self.asr.set_property("ivector-extraction-config", "%s/conf/ivector_extractor.fixed.conf" % model) self.asr.set_property("max-active", 7000) # 7000 self.asr.set_property("beam", 11.0) # 11 self.asr.set_property("lattice-beam", 6.0) # 6 diff --git a/download-models.sh b/download-models.sh index 3959dcd..6c8aa1c 100755 --- a/download-models.sh +++ b/download-models.sh @@ -2,25 +2,31 @@ set -e -u -o pipefail -BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5 +mkdir -p asr/{mika-fred-1,fisher-benoit-1,fisher-fred-1} + +# get local models +scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/mika-model1/* asr/mika-fred-1 +scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/model-fred/* asr/fisher-fred-1 +scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/model2/* asr/fisher-benoit-1 +# get fisher acoustic model +BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5 MODEL=exp/nnet2_online/nnet_a_gpu_online GRAPH=exp/tri5a -mkdir -p asr/ivector_extractor asr/conf - -pushd asr/ivector_extractor -wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats} -popd +mkdir -p asr/fisher/ivector_extractor asr/fisher/conf -pushd asr/conf -wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf} -popd +(cd asr/fisher && wget -N $BASE_URL/$MODEL/final.mdl) +(cd asr/fisher/ivector_extractor && wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats}) +(cd asr/fisher/conf && wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf}) +#(cd asr/fisher && wget -N $BASE_URL/$GRAPH/graph/{HCLG.fst,words.txt}) -pushd asr -wget -N $BASE_URL/$GRAPH/graph/HCLG.fst -wget -N $BASE_URL/$GRAPH/graph/words.txt -wget -N $BASE_URL/$MODEL/final.mdl -popd +# link to fisher acoustic model +(cd asr/fisher-fred-1 && ln -s ../fisher/* .) +(cd asr/fisher-benoit-1 && ln -s ../fisher/* .) -cat asr/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\//' > asr/conf/ivector_extractor.fixed.conf +# fix ivector extraction paths +for model in mika-fred-1 fisher-fred-1 fisher-benoit-1 +do + cat asr/$model/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\/'$model'\//' > asr/$model/conf/ivector_extractor.fixed.conf +done diff --git a/main.py b/main.py index 36f71c7..802bf6c 100755 --- a/main.py +++ b/main.py @@ -25,7 +25,7 @@ import confirm, asr, action, section, xmlview_widgets import levenstein class ScriptedASR(Gtk.Window): - def __init__(self, xml_filename): + def __init__(self, xml_filename, asr_model_dir): super(ScriptedASR, self).__init__() self.connect("destroy", self.quit) @@ -34,31 +34,31 @@ class ScriptedASR(Gtk.Window): self.set_title('ScriptedASR [%s]' % xml_filename) vbox = Gtk.VBox() - self.sections = section.SectionManager() - vbox.pack_start(self.sections, False, True, 5) + #self.sections = section.SectionManager() + #vbox.pack_start(self.sections, False, True, 5) self.xmlview = xmlview_widgets.XmlView(xml_filename) vbox.pack_start(self.xmlview, True, True, 5) self.lines = [x for x in self.xmlview.get_line_iterator()] self.current_line = -1 - self.confirmer = confirm.ConfirmationBox() - vbox.pack_start(self.confirmer, False, True, 5) + #self.confirmer = confirm.ConfirmationBox() + #vbox.pack_start(self.confirmer, False, True, 5) - self.actions = action.ActionView() - vbox.pack_start(self.actions, False, True, 5) + #self.actions = action.ActionView() + #vbox.pack_start(self.actions, False, True, 5) - self.sections.set_confirmer(self.confirmer) - self.actions.set_confirmer(self.confirmer) + #self.sections.set_confirmer(self.confirmer) + #self.actions.set_confirmer(self.confirmer) # transcript view - self.asr = asr.ASR(self.hyp_changed) + self.asr = asr.ASR(asr_model_dir, self.hyp_changed) vbox.pack_start(self.asr, False, True, 5) self.add(vbox) self.show_all() - self.confirmer.hide() + #self.confirmer.hide() # load css style style_provider = Gtk.CssProvider() @@ -104,7 +104,10 @@ class ScriptedASR(Gtk.Window): if __name__ == '__main__': xml_filename = 'data/homeostasis_25nov.xml' + asr_model_dir = 'asr/mika-fred-1' if len(sys.argv) > 1: xml_filename = sys.argv[1] - app = ScriptedASR(xml_filename) + if len(sys.argv) > 2: + asr_model_dir = sys.argv[2] + app = ScriptedASR(xml_filename, asr_model_dir) Gtk.main() -- GitLab