Skip to content
Snippets Groups Projects
Commit 646389c7 authored by Benoit Favre's avatar Benoit Favre
Browse files

add support for mika models

parent 494a0602
No related branches found
No related tags found
No related merge requests found
......@@ -19,11 +19,12 @@ developing with pygtk3: http://lazka.github.io/pgi-docs/, https://python-gtk-3-t
Todo:
- use GtkSourceView to allow editing the xml file directly
- model selection in config file
- model selection in user interface
- integrate new xml with actions
- account for custom acoustic scale and endpoint silence phones
add model from mika
/storage/raid1/homedirs/mickael.rouvier/raid2/kaldi_english/exp/nnet2_online/
--acoustic-scale=.04166666666666666666
--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22:23:24:25:26:27:28:29:30:31:32:33:34:35
- use GtkSourceView to allow editing the xml file directly
......@@ -9,7 +9,7 @@ Gdk.threads_init()
Gst.init(None)
class ASR(Gtk.HBox):
def __init__(self, hyp_callback = None, partial_hyp_callback = None):
def __init__(self, asr_model_dir, hyp_callback = None, partial_hyp_callback = None):
super(ASR, self).__init__()
self.text = Gtk.TextView()
......@@ -34,9 +34,9 @@ class ASR(Gtk.HBox):
self.hyp = []
self.hyp_callback = hyp_callback
self.partial_hyp_callback = partial_hyp_callback
Thread(target=self.init_gst).start()
Thread(target=self.init_gst, args=[asr_model_dir]).start()
def init_gst(self, model="model2"):
def init_gst(self, model='.'):
"""Initialize the speech components"""
GObject.idle_add(self._started_loading_asr)
......@@ -50,17 +50,17 @@ class ASR(Gtk.HBox):
self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink")
if self.asr:
model_file = "asr/final.mdl"
model_file = "%s/final.mdl" % model
if not os.path.isfile(model_file):
print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!"
sys.exit(1)
self.asr.set_property("fst", "asr/%s/HCLG.fst" % model)
self.asr.set_property("model", "asr/final.mdl")
self.asr.set_property("word-syms", "asr/%s/words.txt" % model)
#self.asr.set_property("acoustic-scale", 0.0416)
self.asr.set_property("fst", "%s/HCLG.fst" % model)
self.asr.set_property("model", "%s/final.mdl" % model)
self.asr.set_property("word-syms", "%s/words.txt" % model)
self.asr.set_property("acoustic-scale", 0.0416)
self.asr.set_property("feature-type", "mfcc")
self.asr.set_property("mfcc-config", "asr/conf/mfcc.conf")
self.asr.set_property("ivector-extraction-config", "asr/conf/ivector_extractor.fixed.conf")
self.asr.set_property("mfcc-config", "%s/conf/mfcc.conf" % model)
self.asr.set_property("ivector-extraction-config", "%s/conf/ivector_extractor.fixed.conf" % model)
self.asr.set_property("max-active", 7000) # 7000
self.asr.set_property("beam", 11.0) # 11
self.asr.set_property("lattice-beam", 6.0) # 6
......
......@@ -2,25 +2,31 @@
set -e -u -o pipefail
BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5
mkdir -p asr/{mika-fred-1,fisher-benoit-1,fisher-fred-1}
# get local models
scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/mika-model1/* asr/mika-fred-1
scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/model-fred/* asr/fisher-fred-1
scp -r frontend:~benoit.favre/work/kaldi/kaldi-trunk/egs/fisher_english/s5/lm/model2/* asr/fisher-benoit-1
# get fisher acoustic model
BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5
MODEL=exp/nnet2_online/nnet_a_gpu_online
GRAPH=exp/tri5a
mkdir -p asr/ivector_extractor asr/conf
pushd asr/ivector_extractor
wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats}
popd
mkdir -p asr/fisher/ivector_extractor asr/fisher/conf
pushd asr/conf
wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf}
popd
(cd asr/fisher && wget -N $BASE_URL/$MODEL/final.mdl)
(cd asr/fisher/ivector_extractor && wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats})
(cd asr/fisher/conf && wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf})
#(cd asr/fisher && wget -N $BASE_URL/$GRAPH/graph/{HCLG.fst,words.txt})
pushd asr
wget -N $BASE_URL/$GRAPH/graph/HCLG.fst
wget -N $BASE_URL/$GRAPH/graph/words.txt
wget -N $BASE_URL/$MODEL/final.mdl
popd
# link to fisher acoustic model
(cd asr/fisher-fred-1 && ln -s ../fisher/* .)
(cd asr/fisher-benoit-1 && ln -s ../fisher/* .)
cat asr/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\//' > asr/conf/ivector_extractor.fixed.conf
# fix ivector extraction paths
for model in mika-fred-1 fisher-fred-1 fisher-benoit-1
do
cat asr/$model/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\/'$model'\//' > asr/$model/conf/ivector_extractor.fixed.conf
done
......@@ -25,7 +25,7 @@ import confirm, asr, action, section, xmlview_widgets
import levenstein
class ScriptedASR(Gtk.Window):
def __init__(self, xml_filename):
def __init__(self, xml_filename, asr_model_dir):
super(ScriptedASR, self).__init__()
self.connect("destroy", self.quit)
......@@ -34,31 +34,31 @@ class ScriptedASR(Gtk.Window):
self.set_title('ScriptedASR [%s]' % xml_filename)
vbox = Gtk.VBox()
self.sections = section.SectionManager()
vbox.pack_start(self.sections, False, True, 5)
#self.sections = section.SectionManager()
#vbox.pack_start(self.sections, False, True, 5)
self.xmlview = xmlview_widgets.XmlView(xml_filename)
vbox.pack_start(self.xmlview, True, True, 5)
self.lines = [x for x in self.xmlview.get_line_iterator()]
self.current_line = -1
self.confirmer = confirm.ConfirmationBox()
vbox.pack_start(self.confirmer, False, True, 5)
#self.confirmer = confirm.ConfirmationBox()
#vbox.pack_start(self.confirmer, False, True, 5)
self.actions = action.ActionView()
vbox.pack_start(self.actions, False, True, 5)
#self.actions = action.ActionView()
#vbox.pack_start(self.actions, False, True, 5)
self.sections.set_confirmer(self.confirmer)
self.actions.set_confirmer(self.confirmer)
#self.sections.set_confirmer(self.confirmer)
#self.actions.set_confirmer(self.confirmer)
# transcript view
self.asr = asr.ASR(self.hyp_changed)
self.asr = asr.ASR(asr_model_dir, self.hyp_changed)
vbox.pack_start(self.asr, False, True, 5)
self.add(vbox)
self.show_all()
self.confirmer.hide()
#self.confirmer.hide()
# load css style
style_provider = Gtk.CssProvider()
......@@ -104,7 +104,10 @@ class ScriptedASR(Gtk.Window):
if __name__ == '__main__':
xml_filename = 'data/homeostasis_25nov.xml'
asr_model_dir = 'asr/mika-fred-1'
if len(sys.argv) > 1:
xml_filename = sys.argv[1]
app = ScriptedASR(xml_filename)
if len(sys.argv) > 2:
asr_model_dir = sys.argv[2]
app = ScriptedASR(xml_filename, asr_model_dir)
Gtk.main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment