From e78a50867a6cc733bf2b2a34be7a942cd1758495 Mon Sep 17 00:00:00 2001
From: Amit Beka <amit.beka@gmail.com>
Date: Tue, 25 Nov 2014 18:06:34 +0200
Subject: [PATCH] add inverse-scale property

When decoding the input, the output lattice is scaled using the acoustic
scale parameter. When 'inverse-scale' is set to TRUE, the lattice is
re-scaled back (inverse the acoustic scale) before the hypothesis is
extracted.

This behavior is common when playing with different scales after the
decoding phase.

Signed-off-by: Amit Beka <amit.beka@gmail.com>
---
 src/gstkaldinnet2onlinedecoder.cc | 26 +++++++++++++++++++++++++-
 src/gstkaldinnet2onlinedecoder.h  |  1 +
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/gstkaldinnet2onlinedecoder.cc b/src/gstkaldinnet2onlinedecoder.cc
index 3a7455e..6c8749e 100644
--- a/src/gstkaldinnet2onlinedecoder.cc
+++ b/src/gstkaldinnet2onlinedecoder.cc
@@ -70,6 +70,7 @@ enum {
   PROP_WORD_SYMS,
   PROP_DO_ENDPOINTING,
   PROP_ADAPTATION_STATE,
+  PROP_INVERSE_SCALE,
   PROP_LAST
 };
 
@@ -185,6 +186,15 @@ static void gst_kaldinnet2onlinedecoder_class_init(
                           "",
                           (GParamFlags) G_PARAM_READWRITE));
 
+  g_object_class_install_property(
+      gobject_class,
+      PROP_INVERSE_SCALE,
+      g_param_spec_boolean(
+          "inverse-scale", "If true, inverse acoustic scale in lattice",
+          "If true, inverse the acoustic scaling of the output lattice",
+          FALSE,
+          (GParamFlags) G_PARAM_READWRITE));
+
   gst_kaldinnet2onlinedecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new(
       "partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
       G_STRUCT_OFFSET(Gstkaldinnet2onlinedecoderClass, partial_result),
@@ -260,6 +270,7 @@ static void gst_kaldinnet2onlinedecoder_init(
   filter->feature_info = NULL;
   filter->sample_rate = 0;
   filter->decoding = false;
+  filter->inverse_scale = FALSE;
 
   // init properties from various Kaldi Opts
   GstElementClass * klass = GST_ELEMENT_GET_CLASS(filter);
@@ -366,6 +377,9 @@ static void gst_kaldinnet2onlinedecoder_set_property(GObject * object,
     case PROP_DO_ENDPOINTING:
       filter->do_endpointing = g_value_get_boolean(value);
       break;
+    case PROP_INVERSE_SCALE:
+      filter->inverse_scale = g_value_get_boolean(value);
+      break;
     case PROP_ADAPTATION_STATE:
       {
         if (G_VALUE_HOLDS_STRING(value)) {
@@ -463,6 +477,9 @@ static void gst_kaldinnet2onlinedecoder_get_property(GObject * object,
     case PROP_DO_ENDPOINTING:
       g_value_set_boolean(value, filter->do_endpointing);
       break;
+    case PROP_INVERSE_SCALE:
+      g_value_set_boolean(value, filter->inverse_scale);
+      break;
     case PROP_ADAPTATION_STATE:
       string_stream.clear();
       if (filter->adaptation_state) {
@@ -513,13 +530,20 @@ static void gst_kaldinnet2onlinedecoder_get_property(GObject * object,
 }
 
 static void gst_kaldinnet2onlinedecoder_final_result(
-    Gstkaldinnet2onlinedecoder * filter, const CompactLattice &clat,
+    Gstkaldinnet2onlinedecoder * filter, CompactLattice &clat,
     int64 *tot_num_frames, double *tot_like, guint *num_words) {
   if (clat.NumStates() == 0) {
     KALDI_WARN<< "Empty lattice.";
     return;
   }
   CompactLattice best_path_clat;
+
+  if (filter->inverse_scale) {
+    BaseFloat inv_acoustic_scale = 1.0 / filter->nnet2_decoding_config->
+	    decodable_opts.acoustic_scale;
+    fst::ScaleLattice(fst::AcousticLatticeScale(inv_acoustic_scale), &clat);
+  }
+
   CompactLatticeShortestPath(clat, &best_path_clat);
 
   Lattice best_path_lat;
diff --git a/src/gstkaldinnet2onlinedecoder.h b/src/gstkaldinnet2onlinedecoder.h
index ef02fae..a1bb9d2 100644
--- a/src/gstkaldinnet2onlinedecoder.h
+++ b/src/gstkaldinnet2onlinedecoder.h
@@ -60,6 +60,7 @@ struct _Gstkaldinnet2onlinedecoder {
 
   gboolean silent;
   gboolean do_endpointing;
+  gboolean inverse_scale;
   GstBufferSource *audio_source;
 
   gchar* model_rspecifier;
-- 
GitLab