From e78a50867a6cc733bf2b2a34be7a942cd1758495 Mon Sep 17 00:00:00 2001 From: Amit Beka <amit.beka@gmail.com> Date: Tue, 25 Nov 2014 18:06:34 +0200 Subject: [PATCH] add inverse-scale property When decoding the input, the output lattice is scaled using the acoustic scale parameter. When 'inverse-scale' is set to TRUE, the lattice is re-scaled back (inverse the acoustic scale) before the hypothesis is extracted. This behavior is common when playing with different scales after the decoding phase. Signed-off-by: Amit Beka <amit.beka@gmail.com> --- src/gstkaldinnet2onlinedecoder.cc | 26 +++++++++++++++++++++++++- src/gstkaldinnet2onlinedecoder.h | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/gstkaldinnet2onlinedecoder.cc b/src/gstkaldinnet2onlinedecoder.cc index 3a7455e..6c8749e 100644 --- a/src/gstkaldinnet2onlinedecoder.cc +++ b/src/gstkaldinnet2onlinedecoder.cc @@ -70,6 +70,7 @@ enum { PROP_WORD_SYMS, PROP_DO_ENDPOINTING, PROP_ADAPTATION_STATE, + PROP_INVERSE_SCALE, PROP_LAST }; @@ -185,6 +186,15 @@ static void gst_kaldinnet2onlinedecoder_class_init( "", (GParamFlags) G_PARAM_READWRITE)); + g_object_class_install_property( + gobject_class, + PROP_INVERSE_SCALE, + g_param_spec_boolean( + "inverse-scale", "If true, inverse acoustic scale in lattice", + "If true, inverse the acoustic scaling of the output lattice", + FALSE, + (GParamFlags) G_PARAM_READWRITE)); + gst_kaldinnet2onlinedecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new( "partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST, G_STRUCT_OFFSET(Gstkaldinnet2onlinedecoderClass, partial_result), @@ -260,6 +270,7 @@ static void gst_kaldinnet2onlinedecoder_init( filter->feature_info = NULL; filter->sample_rate = 0; filter->decoding = false; + filter->inverse_scale = FALSE; // init properties from various Kaldi Opts GstElementClass * klass = GST_ELEMENT_GET_CLASS(filter); @@ -366,6 +377,9 @@ static void gst_kaldinnet2onlinedecoder_set_property(GObject * object, case PROP_DO_ENDPOINTING: filter->do_endpointing = g_value_get_boolean(value); break; + case PROP_INVERSE_SCALE: + filter->inverse_scale = g_value_get_boolean(value); + break; case PROP_ADAPTATION_STATE: { if (G_VALUE_HOLDS_STRING(value)) { @@ -463,6 +477,9 @@ static void gst_kaldinnet2onlinedecoder_get_property(GObject * object, case PROP_DO_ENDPOINTING: g_value_set_boolean(value, filter->do_endpointing); break; + case PROP_INVERSE_SCALE: + g_value_set_boolean(value, filter->inverse_scale); + break; case PROP_ADAPTATION_STATE: string_stream.clear(); if (filter->adaptation_state) { @@ -513,13 +530,20 @@ static void gst_kaldinnet2onlinedecoder_get_property(GObject * object, } static void gst_kaldinnet2onlinedecoder_final_result( - Gstkaldinnet2onlinedecoder * filter, const CompactLattice &clat, + Gstkaldinnet2onlinedecoder * filter, CompactLattice &clat, int64 *tot_num_frames, double *tot_like, guint *num_words) { if (clat.NumStates() == 0) { KALDI_WARN<< "Empty lattice."; return; } CompactLattice best_path_clat; + + if (filter->inverse_scale) { + BaseFloat inv_acoustic_scale = 1.0 / filter->nnet2_decoding_config-> + decodable_opts.acoustic_scale; + fst::ScaleLattice(fst::AcousticLatticeScale(inv_acoustic_scale), &clat); + } + CompactLatticeShortestPath(clat, &best_path_clat); Lattice best_path_lat; diff --git a/src/gstkaldinnet2onlinedecoder.h b/src/gstkaldinnet2onlinedecoder.h index ef02fae..a1bb9d2 100644 --- a/src/gstkaldinnet2onlinedecoder.h +++ b/src/gstkaldinnet2onlinedecoder.h @@ -60,6 +60,7 @@ struct _Gstkaldinnet2onlinedecoder { gboolean silent; gboolean do_endpointing; + gboolean inverse_scale; GstBufferSource *audio_source; gchar* model_rspecifier; -- GitLab