Skip to content
Snippets Groups Projects
Commit 28dd859d authored by tanel's avatar tanel
Browse files

Made chunk length configurable, can be increased to e.g. 0.2 to get some...

Made chunk length configurable, can be increased to e.g. 0.2 to get some speedup when multithreaded BLAS is used
parent bb3ac78d
No related branches found
No related tags found
No related merge requests found
...@@ -28,4 +28,5 @@ kaldinnet2onlinedecoder \ ...@@ -28,4 +28,5 @@ kaldinnet2onlinedecoder \
lattice-beam=5.0 \ lattice-beam=5.0 \
do-endpointing=true \ do-endpointing=true \
endpoint-silence-phones="1:2:3:4:5:6:7:8:9:10" \ endpoint-silence-phones="1:2:3:4:5:6:7:8:9:10" \
chunk-length-in-secs=0.2 \
! filesink location=/dev/stdout buffer-mode=2 ! filesink location=/dev/stdout buffer-mode=2
...@@ -72,6 +72,7 @@ enum { ...@@ -72,6 +72,7 @@ enum {
PROP_ADAPTATION_STATE, PROP_ADAPTATION_STATE,
PROP_INVERSE_SCALE, PROP_INVERSE_SCALE,
PROP_LMWT_SCALE, PROP_LMWT_SCALE,
PROP_CHUNK_LENGTH_IN_SECS,
PROP_LAST PROP_LAST
}; };
...@@ -79,6 +80,7 @@ enum { ...@@ -79,6 +80,7 @@ enum {
#define DEFAULT_FST "HCLG.fst" #define DEFAULT_FST "HCLG.fst"
#define DEFAULT_WORD_SYMS "words.txt" #define DEFAULT_WORD_SYMS "words.txt"
#define DEFAULT_LMWT_SCALE 1.0 #define DEFAULT_LMWT_SCALE 1.0
#define DEFAULT_CHUNK_LENGTH_IN_SECS 0.05
/* the capabilities of the inputs and outputs. /* the capabilities of the inputs and outputs.
* *
...@@ -208,6 +210,17 @@ static void gst_kaldinnet2onlinedecoder_class_init( ...@@ -208,6 +210,17 @@ static void gst_kaldinnet2onlinedecoder_class_init(
DEFAULT_LMWT_SCALE, DEFAULT_LMWT_SCALE,
(GParamFlags) G_PARAM_READWRITE)); (GParamFlags) G_PARAM_READWRITE));
g_object_class_install_property(
gobject_class,
PROP_CHUNK_LENGTH_IN_SECS,
g_param_spec_float(
"chunk-length-in-secs", "Length of a audio chunk that is processed at a time",
"Smaller values decrease latency, bigger values (e.g. 0.2) improve speed if multithreaded BLAS/MKL is used",
0.05,
G_MAXFLOAT,
DEFAULT_CHUNK_LENGTH_IN_SECS,
(GParamFlags) G_PARAM_READWRITE));
gst_kaldinnet2onlinedecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new( gst_kaldinnet2onlinedecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new(
"partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST, "partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
G_STRUCT_OFFSET(Gstkaldinnet2onlinedecoderClass, partial_result), G_STRUCT_OFFSET(Gstkaldinnet2onlinedecoderClass, partial_result),
...@@ -285,6 +298,7 @@ static void gst_kaldinnet2onlinedecoder_init( ...@@ -285,6 +298,7 @@ static void gst_kaldinnet2onlinedecoder_init(
filter->decoding = false; filter->decoding = false;
filter->lmwt_scale = DEFAULT_LMWT_SCALE; filter->lmwt_scale = DEFAULT_LMWT_SCALE;
filter->inverse_scale = FALSE; filter->inverse_scale = FALSE;
filter->chunk_length_in_secs = DEFAULT_CHUNK_LENGTH_IN_SECS;
// init properties from various Kaldi Opts // init properties from various Kaldi Opts
GstElementClass * klass = GST_ELEMENT_GET_CLASS(filter); GstElementClass * klass = GST_ELEMENT_GET_CLASS(filter);
...@@ -397,6 +411,9 @@ static void gst_kaldinnet2onlinedecoder_set_property(GObject * object, ...@@ -397,6 +411,9 @@ static void gst_kaldinnet2onlinedecoder_set_property(GObject * object,
case PROP_LMWT_SCALE: case PROP_LMWT_SCALE:
filter->lmwt_scale = g_value_get_float(value); filter->lmwt_scale = g_value_get_float(value);
break; break;
case PROP_CHUNK_LENGTH_IN_SECS:
filter->chunk_length_in_secs = g_value_get_float(value);
break;
case PROP_ADAPTATION_STATE: case PROP_ADAPTATION_STATE:
{ {
if (G_VALUE_HOLDS_STRING(value)) { if (G_VALUE_HOLDS_STRING(value)) {
...@@ -500,6 +517,9 @@ static void gst_kaldinnet2onlinedecoder_get_property(GObject * object, ...@@ -500,6 +517,9 @@ static void gst_kaldinnet2onlinedecoder_get_property(GObject * object,
case PROP_LMWT_SCALE: case PROP_LMWT_SCALE:
g_value_set_float(value, filter->lmwt_scale); g_value_set_float(value, filter->lmwt_scale);
break; break;
case PROP_CHUNK_LENGTH_IN_SECS:
g_value_set_float(value, filter->chunk_length_in_secs);
break;
case PROP_ADAPTATION_STATE: case PROP_ADAPTATION_STATE:
string_stream.clear(); string_stream.clear();
if (filter->adaptation_state) { if (filter->adaptation_state) {
...@@ -639,10 +659,9 @@ static void gst_kaldinnet2onlinedecoder_loop( ...@@ -639,10 +659,9 @@ static void gst_kaldinnet2onlinedecoder_loop(
Gstkaldinnet2onlinedecoder * filter) { Gstkaldinnet2onlinedecoder * filter) {
GST_DEBUG_OBJECT(filter, "Starting decoding loop.."); GST_DEBUG_OBJECT(filter, "Starting decoding loop..");
BaseFloat chunk_length_secs = 0.05;
BaseFloat traceback_period_secs = 1.0; BaseFloat traceback_period_secs = 1.0;
int32 chunk_length = int32(filter->sample_rate * chunk_length_secs); int32 chunk_length = int32(filter->sample_rate * filter->chunk_length_in_secs);
bool more_data = true; bool more_data = true;
while (more_data) { while (more_data) {
...@@ -675,7 +694,7 @@ static void gst_kaldinnet2onlinedecoder_loop( ...@@ -675,7 +694,7 @@ static void gst_kaldinnet2onlinedecoder_loop(
GST_DEBUG_OBJECT(filter, "Endpoint detected!"); GST_DEBUG_OBJECT(filter, "Endpoint detected!");
break; break;
} }
num_seconds_decoded += chunk_length_secs; num_seconds_decoded += filter->chunk_length_in_secs;
if (num_seconds_decoded - last_traceback > traceback_period_secs) { if (num_seconds_decoded - last_traceback > traceback_period_secs) {
Lattice lat; Lattice lat;
decoder.GetBestPath(false, &lat); decoder.GetBestPath(false, &lat);
...@@ -704,13 +723,6 @@ static void gst_kaldinnet2onlinedecoder_loop( ...@@ -704,13 +723,6 @@ static void gst_kaldinnet2onlinedecoder_loop(
} }
} }
// TODO: write adaptation state to string and push it out
// bool binary = false;
// Output ko("adaptation_state.txt", binary);
// adaptation_state.Write(ko.Stream(), binary);
// ko.Close();
GST_DEBUG_OBJECT(filter, "Finished decoding loop"); GST_DEBUG_OBJECT(filter, "Finished decoding loop");
GST_DEBUG_OBJECT(filter, "Pushing EOS event"); GST_DEBUG_OBJECT(filter, "Pushing EOS event");
gst_pad_push_event(filter->srcpad, gst_event_new_eos()); gst_pad_push_event(filter->srcpad, gst_event_new_eos());
...@@ -933,6 +945,10 @@ static void gst_kaldinnet2onlinedecoder_finalize(GObject * object) { ...@@ -933,6 +945,10 @@ static void gst_kaldinnet2onlinedecoder_finalize(GObject * object) {
if (filter->word_syms) { if (filter->word_syms) {
delete filter->word_syms; delete filter->word_syms;
} }
if (filter->adaptation_state) {
delete filter->adaptation_state;
}
G_OBJECT_CLASS(parent_class)->finalize(object); G_OBJECT_CLASS(parent_class)->finalize(object);
} }
......
...@@ -80,6 +80,7 @@ struct _Gstkaldinnet2onlinedecoder { ...@@ -80,6 +80,7 @@ struct _Gstkaldinnet2onlinedecoder {
fst::SymbolTable *word_syms; fst::SymbolTable *word_syms;
int sample_rate; int sample_rate;
gboolean decoding; gboolean decoding;
float chunk_length_in_secs;
OnlineIvectorExtractorAdaptationState *adaptation_state; OnlineIvectorExtractorAdaptationState *adaptation_state;
}; };
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment