From bffa87030f245b0fbc5613790beb5dfd304a0d90 Mon Sep 17 00:00:00 2001 From: Benoit Favre <benoit.favre@lif.univ-mrs.fr> Date: Sun, 25 Jan 2015 11:14:54 +0100 Subject: [PATCH] add model selection --- .gitignore | 3 +- asr.py | 40 ++++++++-------------- asr/fisher-benoit-1.cfg | 14 ++++++++ asr/fisher-fred-1.cfg | 14 ++++++++ asr/mika-fred-1.cfg | 15 +++++++++ config.py | 29 ++++++++++++++++ main.py | 14 +++++--- selector.py | 74 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 171 insertions(+), 32 deletions(-) create mode 100644 asr/fisher-benoit-1.cfg create mode 100644 asr/fisher-fred-1.cfg create mode 100644 asr/mika-fred-1.cfg create mode 100644 config.py create mode 100644 selector.py diff --git a/.gitignore b/.gitignore index 2551705..e0726db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc -asr/ +asr/*/* +asr/libgstkaldionline2.so diff --git a/asr.py b/asr.py index e6201f7..a5d5ff7 100644 --- a/asr.py +++ b/asr.py @@ -9,7 +9,7 @@ Gdk.threads_init() Gst.init(None) class ASR(Gtk.HBox): - def __init__(self, asr_model_dir, hyp_callback = None, partial_hyp_callback = None): + def __init__(self, asr_config, hyp_callback = None, partial_hyp_callback = None): super(ASR, self).__init__() self.text = Gtk.TextView() @@ -34,9 +34,9 @@ class ASR(Gtk.HBox): self.hyp = [] self.hyp_callback = hyp_callback self.partial_hyp_callback = partial_hyp_callback - Thread(target=self.init_gst, args=[asr_model_dir]).start() + Thread(target=self.init_gst, args=(asr_config,)).start() - def init_gst(self, model='.'): + def init_gst(self, asr_config_file): """Initialize the speech components""" GObject.idle_add(self._started_loading_asr) @@ -50,30 +50,18 @@ class ASR(Gtk.HBox): self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink") if self.asr: - model_file = "%s/final.mdl" % model - if not os.path.isfile(model_file): - print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!" - sys.exit(1) - self.asr.set_property("fst", "%s/HCLG.fst" % model) - self.asr.set_property("model", "%s/final.mdl" % model) - self.asr.set_property("word-syms", "%s/words.txt" % model) - self.asr.set_property("acoustic-scale", 0.0416) - self.asr.set_property("feature-type", "mfcc") - self.asr.set_property("mfcc-config", "%s/conf/mfcc.conf" % model) - self.asr.set_property("ivector-extraction-config", "%s/conf/ivector_extractor.fixed.conf" % model) - self.asr.set_property("max-active", 7000) # 7000 - self.asr.set_property("beam", 11.0) # 11 - self.asr.set_property("lattice-beam", 6.0) # 6 - self.asr.set_property("do-endpointing", True) - self.asr.set_property("endpoint-silence-phones", "1:2:3:4:5:6:7:8:9:10") + import config + for name, value in config.read(asr_config_file).items(): + if name != 'dir' and name != 'name': + self.asr.set_property(name, value) else: - print >> sys.stderr, "Couldn't create the kaldinnet2onlinedecoder element. " - if os.environ.has_key("GST_PLUGIN_PATH"): - print >> sys.stderr, "Have you compiled the Kaldi GStreamer plugin?" - else: - print >> sys.stderr, "You probably need to set the GST_PLUGIN_PATH envoronment variable" - print >> sys.stderr, "Try running: GST_PLUGIN_PATH=../src %s" % sys.argv[0] - sys.exit(); + print >> sys.stderr, "Couldn't create the kaldinnet2onlinedecoder element. " + if os.environ.has_key("GST_PLUGIN_PATH"): + print >> sys.stderr, "Have you compiled the Kaldi GStreamer plugin?" + else: + print >> sys.stderr, "You probably need to set the GST_PLUGIN_PATH envoronment variable" + print >> sys.stderr, "Try running: GST_PLUGIN_PATH=../src %s" % sys.argv[0] + sys.exit(); # initially silence the decoder self.asr.set_property("silent", True) diff --git a/asr/fisher-benoit-1.cfg b/asr/fisher-benoit-1.cfg new file mode 100644 index 0000000..e57015e --- /dev/null +++ b/asr/fisher-benoit-1.cfg @@ -0,0 +1,14 @@ +dir = asr/fisher-benoit-1 +name = Generic english 1 + +fst = $dir/HCLG.fst +model = $dir/final.mdl +word-syms = $dir/words.txt +feature-type = mfcc +mfcc-config = $dir/conf/mfcc.conf +ivector-extraction-config = $dir/conf/ivector_extractor.fixed.conf +max-active = 7000 +beam = 11.0 +lattice-beam = 6.0 +do-endpointing = True +endpoint-silence-phones = 1:2:3:4:5:6:7:8:9:10 diff --git a/asr/fisher-fred-1.cfg b/asr/fisher-fred-1.cfg new file mode 100644 index 0000000..ce14828 --- /dev/null +++ b/asr/fisher-fred-1.cfg @@ -0,0 +1,14 @@ +dir = asr/fisher-fred-1 +name = Generic english 2 + +fst = $dir/HCLG.fst +model = $dir/final.mdl +word-syms = $dir/words.txt +feature-type = mfcc +mfcc-config = $dir/conf/mfcc.conf +ivector-extraction-config = $dir/conf/ivector_extractor.fixed.conf +max-active = 7000 +beam = 11.0 +lattice-beam = 6.0 +do-endpointing = True +endpoint-silence-phones = 1:2:3:4:5:6:7:8:9:10 diff --git a/asr/mika-fred-1.cfg b/asr/mika-fred-1.cfg new file mode 100644 index 0000000..e9c3073 --- /dev/null +++ b/asr/mika-fred-1.cfg @@ -0,0 +1,15 @@ +dir = asr/mika-fred-1 +name = Roccio's voice 1 + +fst = $dir/HCLG.fst +model = $dir/final.mdl +word-syms = $dir/words.txt +feature-type = mfcc +mfcc-config = $dir/conf/mfcc.conf +ivector-extraction-config = $dir/conf/ivector_extractor.fixed.conf +max-active = 7000 +beam = 11.0 +lattice-beam = 6.0 +do-endpointing = True +acoustic-scale = 0.0416 +endpoint-silence-phones = 1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22:23:24:25:26:27:28:29:30:31:32:33:34:35 diff --git a/config.py b/config.py new file mode 100644 index 0000000..6f50726 --- /dev/null +++ b/config.py @@ -0,0 +1,29 @@ +import sys, re + +def read(filename): + config = {} + with open(filename) as fp: + for line in fp: + line = line.strip() + line = re.sub(r'#.*$', '', line) # remove comments + found = re.match(r'^([a-zA-Z0-9-_]*)\s*=\s*(.*)$', line) + if found: + name = found.group(1) + value = found.group(2).strip() + for other, replacement in config.items(): + value = re.sub(r'\$%s\b' % other, str(replacement), value) + if value in ['True', 'False']: + value = bool(value) + else: + try: + value = int(value) + except: + try: + value = float(value) + except: + pass + config[name] = value + return config + +if __name__ == '__main__': + print read(sys.argv[1]) diff --git a/main.py b/main.py index 802bf6c..bc27805 100755 --- a/main.py +++ b/main.py @@ -25,7 +25,7 @@ import confirm, asr, action, section, xmlview_widgets import levenstein class ScriptedASR(Gtk.Window): - def __init__(self, xml_filename, asr_model_dir): + def __init__(self, xml_filename, asr_config_file): super(ScriptedASR, self).__init__() self.connect("destroy", self.quit) @@ -52,7 +52,7 @@ class ScriptedASR(Gtk.Window): #self.actions.set_confirmer(self.confirmer) # transcript view - self.asr = asr.ASR(asr_model_dir, self.hyp_changed) + self.asr = asr.ASR(asr_config_file, self.hyp_changed) vbox.pack_start(self.asr, False, True, 5) self.add(vbox) @@ -103,11 +103,15 @@ class ScriptedASR(Gtk.Window): if __name__ == '__main__': + import selector xml_filename = 'data/homeostasis_25nov.xml' - asr_model_dir = 'asr/mika-fred-1' + asr_config_file = 'asr/mika-fred-1.cfg' if len(sys.argv) > 1: xml_filename = sys.argv[1] if len(sys.argv) > 2: - asr_model_dir = sys.argv[2] - app = ScriptedASR(xml_filename, asr_model_dir) + asr_config_file = sys.argv[2] + xml_filename, asr_config_file = selector.ModelSelector(xml_filename, asr_config_file).run() + if xml_filename == None or asr_config_file == None: + sys.exit(0) + app = ScriptedASR(xml_filename, asr_config_file) Gtk.main() diff --git a/selector.py b/selector.py new file mode 100644 index 0000000..6fecf17 --- /dev/null +++ b/selector.py @@ -0,0 +1,74 @@ +from gi.repository import GObject, Gtk, Gdk +import os, sys, glob +import config + +class ModelSelector(Gtk.Dialog): + def __init__(self, xml_filename = '', asr_model = ''): + super(ModelSelector, self).__init__() + self.add_button("Cancel", Gtk.ResponseType.CANCEL) + self.add_button("OK", Gtk.ResponseType.OK) + box = self.get_content_area() + + xml_box = Gtk.HBox() + xml_box.pack_start(Gtk.Label('XML file:'), False, False, 10) + xml_entry = Gtk.Entry() + xml_entry.set_text(xml_filename) + xml_entry.set_width_chars(len(xml_filename)) + self.xml_entry = xml_entry + xml_box.pack_start(xml_entry, True, True, 10) + xml_button = Gtk.Button("Choose...") + xml_button.connect('clicked', self.show_filechooser) + xml_box.pack_start(xml_button, False, False, 10) + + box.pack_start(xml_box, False, False, 5) + + model_box = Gtk.HBox() + model_box.pack_start(Gtk.Label('ASR model:'), False, False, 10) + model_chooser = Gtk.ComboBoxText() + model_chooser.set_entry_text_column(0) + for i, model in enumerate(self.list_models()): + model_chooser.append_text(model) + model_chooser.set_active(0) + self.model_chooser = model_chooser + model_box.pack_start(model_chooser, True, True, 10) + + box.pack_start(model_box, False, False, 5) + self.show_all() + + def show_filechooser(self, button): + dialog = Gtk.FileChooserDialog("Please choose a file", self, Gtk.FileChooserAction.OPEN, (Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_OPEN, Gtk.ResponseType.OK)) + filter_text = Gtk.FileFilter() + filter_text.set_name("XML files") + filter_text.add_mime_type("text/xml") + dialog.add_filter(filter_text) + dialog.set_current_folder('%s/data' % os.path.dirname(__file__)) + + response = dialog.run() + if response == Gtk.ResponseType.OK: + self.xml_entry.set_text(dialog.get_filename()) + dialog.destroy() + + def list_models(self): + self.models = [] + model_names = [] + for filename in glob.glob('asr/*.cfg'): + try: + items = config.read(filename) + self.models.append(filename) + if 'name' in items: + model_names.append(items['name']) + else: + model_names.append(filename) + except: + pass + return model_names + + def run(self): + response = super(ModelSelector, self).run() + if response != Gtk.ResponseType.OK: + return None, None + asr_model = self.models[self.model_chooser.get_active()] + xml_filename = self.xml_entry.get_text() + self.destroy() + return xml_filename, asr_model + -- GitLab