From 646389c7937d9a7d41571df94abdc2cb78429970 Mon Sep 17 00:00:00 2001
From: Benoit Favre <benoit.favre@lif.univ-mrs.fr>
Date: Sun, 25 Jan 2015 09:28:11 +0100
Subject: [PATCH] add support for mika models

---
 README             |  7 ++++---
 asr.py             | 20 ++++++++++----------
 download-models.sh | 36 +++++++++++++++++++++---------------
 main.py            | 27 +++++++++++++++------------
 4 files changed, 50 insertions(+), 40 deletions(-)

diff --git a/README b/README
index b2c3bf7..4250215 100644
--- a/README
+++ b/README
@@ -19,11 +19,12 @@ developing with pygtk3: http://lazka.github.io/pgi-docs/, https://python-gtk-3-t
 
 Todo:
 
-- use GtkSourceView to allow editing the xml file directly
-- model selection in config file
+- model selection in user interface
 - integrate new xml with actions
+- account for custom acoustic scale and endpoint silence phones
 
-add model from mika
 /storage/raid1/homedirs/mickael.rouvier/raid2/kaldi_english/exp/nnet2_online/
 --acoustic-scale=.04166666666666666666
 --endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22:23:24:25:26:27:28:29:30:31:32:33:34:35
+
+- use GtkSourceView to allow editing the xml file directly
diff --git a/asr.py b/asr.py
index 0e64abe..e6201f7 100644
--- a/asr.py
+++ b/asr.py
@@ -9,7 +9,7 @@ Gdk.threads_init()
 Gst.init(None)
 
 class ASR(Gtk.HBox):
-    def __init__(self, hyp_callback = None, partial_hyp_callback = None):
+    def __init__(self, asr_model_dir, hyp_callback = None, partial_hyp_callback = None):
         super(ASR, self).__init__()
 
         self.text = Gtk.TextView()
@@ -34,9 +34,9 @@ class ASR(Gtk.HBox):
         self.hyp = []
         self.hyp_callback = hyp_callback
         self.partial_hyp_callback = partial_hyp_callback
-        Thread(target=self.init_gst).start()
+        Thread(target=self.init_gst, args=[asr_model_dir]).start()
 
-    def init_gst(self, model="model2"):
+    def init_gst(self, model='.'):
         """Initialize the speech components"""
         GObject.idle_add(self._started_loading_asr)
 
@@ -50,17 +50,17 @@ class ASR(Gtk.HBox):
         self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink")
         
         if self.asr:
-          model_file = "asr/final.mdl"
+          model_file = "%s/final.mdl" % model
           if not os.path.isfile(model_file):
               print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!"
               sys.exit(1)
-          self.asr.set_property("fst", "asr/%s/HCLG.fst" % model)
-          self.asr.set_property("model", "asr/final.mdl")
-          self.asr.set_property("word-syms", "asr/%s/words.txt" % model)
-          #self.asr.set_property("acoustic-scale", 0.0416)
+          self.asr.set_property("fst", "%s/HCLG.fst" % model)
+          self.asr.set_property("model", "%s/final.mdl" % model)
+          self.asr.set_property("word-syms", "%s/words.txt" % model)
+          self.asr.set_property("acoustic-scale", 0.0416)
           self.asr.set_property("feature-type", "mfcc")
-          self.asr.set_property("mfcc-config", "asr/conf/mfcc.conf")
-          self.asr.set_property("ivector-extraction-config", "asr/conf/ivector_extractor.fixed.conf")
+          self.asr.set_property("mfcc-config", "%s/conf/mfcc.conf" % model)
+          self.asr.set_property("ivector-extraction-config", "%s/conf/ivector_extractor.fixed.conf" % model)
           self.asr.set_property("max-active", 7000) # 7000
           self.asr.set_property("beam", 11.0) # 11
           self.asr.set_property("lattice-beam", 6.0) # 6
diff --git a/download-models.sh b/download-models.sh
index 3959dcd..6c8aa1c 100755
--- a/download-models.sh
+++ b/download-models.sh
@@ -2,25 +2,31 @@
 
 set -e -u -o pipefail
 
-BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5
+mkdir -p asr/{mika-fred-1,fisher-benoit-1,fisher-fred-1}
+
+# get local models
+scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/mika-model1/* asr/mika-fred-1
+scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/model-fred/* asr/fisher-fred-1
+scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/model2/* asr/fisher-benoit-1
 
+# get fisher acoustic model
+BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5
 MODEL=exp/nnet2_online/nnet_a_gpu_online
 GRAPH=exp/tri5a
 
-mkdir -p asr/ivector_extractor asr/conf
-
-pushd asr/ivector_extractor
-wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats}
-popd
+mkdir -p asr/fisher/ivector_extractor asr/fisher/conf
 
-pushd asr/conf
-wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf}
-popd
+(cd asr/fisher && wget -N $BASE_URL/$MODEL/final.mdl)
+(cd asr/fisher/ivector_extractor && wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats})
+(cd asr/fisher/conf && wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf})
+#(cd asr/fisher && wget -N $BASE_URL/$GRAPH/graph/{HCLG.fst,words.txt})
 
-pushd asr
-wget -N $BASE_URL/$GRAPH/graph/HCLG.fst
-wget -N $BASE_URL/$GRAPH/graph/words.txt
-wget -N $BASE_URL/$MODEL/final.mdl
-popd
+# link to fisher acoustic model
+(cd asr/fisher-fred-1 && ln -s ../fisher/* .)
+(cd asr/fisher-benoit-1 && ln -s ../fisher/* .)
 
-cat asr/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\//' > asr/conf/ivector_extractor.fixed.conf
+# fix ivector extraction paths
+for model in mika-fred-1 fisher-fred-1 fisher-benoit-1
+do
+    cat asr/$model/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\/'$model'\//' > asr/$model/conf/ivector_extractor.fixed.conf
+done
diff --git a/main.py b/main.py
index 36f71c7..802bf6c 100755
--- a/main.py
+++ b/main.py
@@ -25,7 +25,7 @@ import confirm, asr, action, section, xmlview_widgets
 import levenstein
 
 class ScriptedASR(Gtk.Window):
-    def __init__(self, xml_filename):
+    def __init__(self, xml_filename, asr_model_dir):
         super(ScriptedASR, self).__init__()
 
         self.connect("destroy", self.quit)
@@ -34,31 +34,31 @@ class ScriptedASR(Gtk.Window):
         self.set_title('ScriptedASR [%s]' % xml_filename)
         vbox = Gtk.VBox()
 
-        self.sections = section.SectionManager()
-        vbox.pack_start(self.sections, False, True, 5)
+        #self.sections = section.SectionManager()
+        #vbox.pack_start(self.sections, False, True, 5)
 
         self.xmlview = xmlview_widgets.XmlView(xml_filename)
         vbox.pack_start(self.xmlview, True, True, 5)
         self.lines = [x for x in self.xmlview.get_line_iterator()]
         self.current_line = -1
 
-        self.confirmer = confirm.ConfirmationBox()
-        vbox.pack_start(self.confirmer, False, True, 5)
+        #self.confirmer = confirm.ConfirmationBox()
+        #vbox.pack_start(self.confirmer, False, True, 5)
 
-        self.actions = action.ActionView()
-        vbox.pack_start(self.actions, False, True, 5)
+        #self.actions = action.ActionView()
+        #vbox.pack_start(self.actions, False, True, 5)
 
-        self.sections.set_confirmer(self.confirmer)
-        self.actions.set_confirmer(self.confirmer)
+        #self.sections.set_confirmer(self.confirmer)
+        #self.actions.set_confirmer(self.confirmer)
 
         # transcript view
-        self.asr = asr.ASR(self.hyp_changed)
+        self.asr = asr.ASR(asr_model_dir, self.hyp_changed)
         vbox.pack_start(self.asr, False, True, 5)
 
         self.add(vbox)
         self.show_all()
 
-        self.confirmer.hide()
+        #self.confirmer.hide()
 
         # load css style
         style_provider = Gtk.CssProvider()
@@ -104,7 +104,10 @@ class ScriptedASR(Gtk.Window):
 
 if __name__ == '__main__':
     xml_filename = 'data/homeostasis_25nov.xml'
+    asr_model_dir = 'asr/mika-fred-1'
     if len(sys.argv) > 1:
         xml_filename = sys.argv[1]
-    app = ScriptedASR(xml_filename)
+    if len(sys.argv) > 2:
+        asr_model_dir = sys.argv[2]
+    app = ScriptedASR(xml_filename, asr_model_dir)
     Gtk.main()
-- 
GitLab