From c693149c2578da7884c89c2bf5a91c5e4b10f226 Mon Sep 17 00:00:00 2001 From: Amit Beka <amit.beka@gmail.com> Date: Tue, 25 Nov 2014 20:58:30 +0200 Subject: [PATCH] add LM scaling property LM scaling is intended to control the ratio between the LM strength and the acoustic model strength *after* decoding, when the best path in the lattice is generated. Setting this to a value higher than 1.0 (default) makes the LM more prominent in the lattice path weighting, so the best path may be more aligned with the LM, and not with the acoustic model. Signed-off-by: Amit Beka <amit.beka@gmail.com> --- src/gstkaldinnet2onlinedecoder.cc | 22 ++++++++++++++++++++++ src/gstkaldinnet2onlinedecoder.h | 1 + 2 files changed, 23 insertions(+) diff --git a/src/gstkaldinnet2onlinedecoder.cc b/src/gstkaldinnet2onlinedecoder.cc index 6c8749e..0fd8e81 100644 --- a/src/gstkaldinnet2onlinedecoder.cc +++ b/src/gstkaldinnet2onlinedecoder.cc @@ -71,12 +71,14 @@ enum { PROP_DO_ENDPOINTING, PROP_ADAPTATION_STATE, PROP_INVERSE_SCALE, + PROP_LMWT_SCALE, PROP_LAST }; #define DEFAULT_MODEL "final.mdl" #define DEFAULT_FST "HCLG.fst" #define DEFAULT_WORD_SYMS "words.txt" +#define DEFAULT_LMWT_SCALE 1.0 /* the capabilities of the inputs and outputs. * @@ -195,6 +197,17 @@ static void gst_kaldinnet2onlinedecoder_class_init( FALSE, (GParamFlags) G_PARAM_READWRITE)); + g_object_class_install_property( + gobject_class, + PROP_LMWT_SCALE, + g_param_spec_float( + "lmwt-scale", "LM weight for scaling output lattice", + "LM scaling for the output lattice, usually in conjunction with inverse-scaling=true", + G_MINFLOAT, + G_MAXFLOAT, + DEFAULT_LMWT_SCALE, + (GParamFlags) G_PARAM_READWRITE)); + gst_kaldinnet2onlinedecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new( "partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST, G_STRUCT_OFFSET(Gstkaldinnet2onlinedecoderClass, partial_result), @@ -270,6 +283,7 @@ static void gst_kaldinnet2onlinedecoder_init( filter->feature_info = NULL; filter->sample_rate = 0; filter->decoding = false; + filter->lmwt_scale = DEFAULT_LMWT_SCALE; filter->inverse_scale = FALSE; // init properties from various Kaldi Opts @@ -380,6 +394,9 @@ static void gst_kaldinnet2onlinedecoder_set_property(GObject * object, case PROP_INVERSE_SCALE: filter->inverse_scale = g_value_get_boolean(value); break; + case PROP_LMWT_SCALE: + filter->lmwt_scale = g_value_get_float(value); + break; case PROP_ADAPTATION_STATE: { if (G_VALUE_HOLDS_STRING(value)) { @@ -480,6 +497,9 @@ static void gst_kaldinnet2onlinedecoder_get_property(GObject * object, case PROP_INVERSE_SCALE: g_value_set_boolean(value, filter->inverse_scale); break; + case PROP_LMWT_SCALE: + g_value_set_float(value, filter->lmwt_scale); + break; case PROP_ADAPTATION_STATE: string_stream.clear(); if (filter->adaptation_state) { @@ -544,6 +564,8 @@ static void gst_kaldinnet2onlinedecoder_final_result( fst::ScaleLattice(fst::AcousticLatticeScale(inv_acoustic_scale), &clat); } + fst::ScaleLattice(fst::LatticeScale(filter->lmwt_scale, 1.0), &clat); + CompactLatticeShortestPath(clat, &best_path_clat); Lattice best_path_lat; diff --git a/src/gstkaldinnet2onlinedecoder.h b/src/gstkaldinnet2onlinedecoder.h index a1bb9d2..3adf837 100644 --- a/src/gstkaldinnet2onlinedecoder.h +++ b/src/gstkaldinnet2onlinedecoder.h @@ -61,6 +61,7 @@ struct _Gstkaldinnet2onlinedecoder { gboolean silent; gboolean do_endpointing; gboolean inverse_scale; + float lmwt_scale; GstBufferSource *audio_source; gchar* model_rspecifier; -- GitLab