Skip to content
Snippets Groups Projects
Commit fb67515f authored by tanel's avatar tanel
Browse files

first

parents
No related branches found
No related tags found
No related merge requests found
README 0 → 100644
WHAT IT IS
----------
GStreamer plugin that wraps Kaldi's SingleUtteranceNnet2Decoder.
HOW TO COMPILE IT
-----------------
Compile Kaldi trunk, using the shared configuration:
In Kaldi's 'src' directory:
./configure --shared
make depend
make
Now we can compil ethis plugin:
Change to 'src' of this project:
cd src
Compile, specifying Kaldi's root directory:
KALDI_ROOT=/path/of/kaldi-trunk make
This should result in 'libgstkaldionline2.so'.
HOW TO USE IT
-------------
TODO
all:
KALDI_ROOT:=/home/tanel/tools/kaldi-online
include $(KALDI_ROOT)/src/kaldi.mk
ifneq ($(KALDI_FLAVOR), dynamic)
$(error Kaldi must compiled with dynamic libraries support. Run configure with --shared flag. )
endif
CXXFLAGS+=-I$(KALDI_ROOT)/src
EXTRA_CXXFLAGS += $(shell pkg-config --cflags gstreamer-1.0)
EXTRA_CXXFLAGS += $(shell pkg-config --cflags glib-2.0)
EXTRA_LDLIBS = -pthread -lgstbase-1.0 -lgstcontroller-1.0 -lgstreamer-1.0 -lgobject-2.0 -lgmodule-2.0 -lgthread-2.0 -lrt -lglib-2.0
#Kaldi shared libraries required by the GStreamer plugin
EXTRA_LDLIBS += -lkaldi-online2 -lkaldi-lat -lkaldi-decoder -lkaldi-feat -lkaldi-transform \
-lkaldi-gmm -lkaldi-hmm \
-lkaldi-tree -lkaldi-matrix -lkaldi-util -lkaldi-base -lkaldi-thread
OBJFILES = gstkaldinnet2onlinedecoder.o simple-options-gst.o gst-audio-source.o kaldimarshal.o
LIBNAME=gstkaldionline2
LIBFILE = lib$(LIBNAME).so
BINFILES= $(LIBFILE)
all: $(LIBFILE)
# MKL libs required when linked via shared library
ifdef MKLROOT
EXTRA_LDLIBS+=-lmkl_p4n -lmkl_def
endif
$(LIBFILE): $(OBJFILES)
$(CXX) -shared -DPIC -o $(LIBFILE) -Wl,-soname=$(LIBFILE) -Wl,--no-as-needed \
-L$(KALDILIBDIR) -Wl,-rpath=$(KALDILIBDIR) $(EXTRA_LDLIBS) $(LDLIBS) $(LDFLAGS) \
$(OBJFILES)
kaldimarshal.h: kaldimarshal.list
glib-genmarshal --header --prefix=kaldi_marshal kaldimarshal.list > kaldimarshal.h.tmp
mv kaldimarshal.h.tmp kaldimarshal.h
kaldimarshal.cc: kaldimarshal.list kaldimarshal.h
echo "#include \"glib-object.h\"" >> kaldimarshal.c.tmp
echo "#include \"kaldimarshal.h\"" >> kaldimarshal.c.tmp
glib-genmarshal --body --prefix=kaldi_marshal kaldimarshal.list >> kaldimarshal.c.tmp
mv kaldimarshal.c.tmp kaldimarshal.cc
clean:
-rm -f *.o *.a $(TESTFILES) $(BINFILES) kaldimarshal.h kaldimarshal.cc
# kaldimarshal.h kaldimarshal.cc
depend:
-$(CXX) -M $(CXXFLAGS) *.cc > .depend.mk
-include .depend.mk
// gst-plugin/gst-audio-source.cc
// Copyright 2013 Tanel Alumae, Tallinn University of Technology
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include "gst-audio-source.h"
namespace kaldi {
GstBufferSource::GstBufferSource() :
ended_(false) {
buf_queue_ = g_async_queue_new();
current_buffer_ = NULL;
pos_in_current_buf_ = 0;
// Monophone, 16-bit input hardcoded
KALDI_ASSERT(sizeof(SampleType) == 2 &&
"The current GstBufferSource code assumes 16-bit input");
g_cond_init(&data_cond_);
g_mutex_init(&lock_);
}
GstBufferSource::~GstBufferSource() {
g_cond_clear(&data_cond_);
g_mutex_clear(&lock_);
g_async_queue_unref(buf_queue_);
if (current_buffer_) {
gst_buffer_unref(current_buffer_);
current_buffer_ = NULL;
}
}
void GstBufferSource::PushBuffer(GstBuffer *buf) {
g_mutex_lock(&lock_);
gst_buffer_ref(buf);
g_async_queue_push(buf_queue_, buf);
g_cond_signal(&data_cond_);
g_mutex_unlock(&lock_);
}
void GstBufferSource::SetEnded(bool ended) {
ended_ = ended;
g_mutex_lock(&lock_);
g_cond_signal(&data_cond_);
g_mutex_unlock(&lock_);
}
bool GstBufferSource::Read(Vector<BaseFloat> *data) {
uint32 nsamples_req = data->Dim(); // (16bit) samples requested
int16 buf[data->Dim()];
uint32 nbytes_transferred = 0;
while ((nbytes_transferred < nsamples_req * sizeof(SampleType))) {
g_mutex_lock(&lock_);
while ((current_buffer_ == NULL) &&
!((g_async_queue_length(buf_queue_) == 0) && ended_)) {
current_buffer_ = reinterpret_cast<GstBuffer*>(g_async_queue_try_pop(buf_queue_));
if (current_buffer_ == NULL) {
g_cond_wait(&data_cond_, &lock_);
}
}
g_mutex_unlock(&lock_);
if (current_buffer_ == NULL) {
break;
}
uint32 nbytes_from_current =
std::min(nsamples_req * sizeof(SampleType) - nbytes_transferred,
(gst_buffer_get_size(current_buffer_) - pos_in_current_buf_));
uint32 nbytes_extracted =
gst_buffer_extract(current_buffer_, pos_in_current_buf_,
(reinterpret_cast<char *>(buf)) + nbytes_transferred,
nbytes_from_current);
KALDI_ASSERT(nbytes_extracted == nbytes_from_current
&& "Unexpected number of bytes extracted from Gst buffer");
nbytes_transferred += nbytes_from_current;
pos_in_current_buf_ += nbytes_from_current;
if (pos_in_current_buf_ == gst_buffer_get_size(current_buffer_)) {
// we are done with the current buffer
gst_buffer_unref(current_buffer_);
current_buffer_ = NULL;
pos_in_current_buf_ = 0;
}
}
uint32 nsamples_received = nbytes_transferred / sizeof(SampleType);
for (int i = 0; i < nsamples_received ; ++i) {
(*data)(i) = static_cast<BaseFloat>(buf[i]);
}
if (nsamples_received < nsamples_req) {
data->Resize(nsamples_received, kCopyData);
}
return !((g_async_queue_length(buf_queue_) < sizeof(SampleType))
&& ended_
&& (current_buffer_ == NULL));
}
}
// gst-plugin/gst-audio-source.h
// Copyright 2013 Tanel Alumae, Tallinn University of Technology
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_GST_PLUGIN_GST_AUDIO_SOURCE_H_
#define KALDI_GST_PLUGIN_GST_AUDIO_SOURCE_H_
#include <matrix/kaldi-vector.h>
#include <gst/gst.h>
namespace kaldi {
// OnlineAudioSourceItf implementation using a queue of Gst Buffers
class GstBufferSource {
public:
typedef int16 SampleType; // hardcoded 16-bit audio
GstBufferSource();
// Implementation of the OnlineAudioSourceItf
bool Read(Vector<BaseFloat> *data);
void PushBuffer(GstBuffer *buf);
void SetEnded(bool ended);
~GstBufferSource();
private:
GAsyncQueue* buf_queue_;
gint pos_in_current_buf_;
GstBuffer *current_buffer_;
bool ended_;
GMutex lock_;
GCond data_cond_;
KALDI_DISALLOW_COPY_AND_ASSIGN(GstBufferSource);
};
} // namespace kaldi
#endif // KALDI_GST_PLUGIN_GST_AUDIO_SOURCE_H_
/*
* GStreamer
* Copyright (C) 2005 Thomas Vander Stichele <thomas@apestaart.org>
* Copyright (C) 2005 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
* Copyright (C) 2014 Tanel Alumae <<user@hostname.org>>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Alternatively, the contents of this file may be used under the
* GNU Lesser General Public License Version 2.1 (the "LGPL"), in
* which case the following provisions apply instead of the ones
* mentioned above:
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
/**
* SECTION:element-kaldinnet2onlinedecoder
*
* FIXME:Describe kaldinnet2onlinedecoder here.
*
* <refsect2>
* <title>Example launch line</title>
* |[
* gst-launch -v -m fakesrc ! kaldinnet2onlinedecoder ! fakesink silent=TRUE
* ]|
* </refsect2>
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#else
# define VERSION "1.0"
#endif
#include <gst/gst.h>
#include "kaldimarshal.h"
#include "gstkaldinnet2onlinedecoder.h"
#include "fstext/fstext-lib.h"
namespace kaldi {
GST_DEBUG_CATEGORY_STATIC(gst_kaldinnet2onlinedecoder_debug);
#define GST_CAT_DEFAULT gst_kaldinnet2onlinedecoder_debug
/* Filter signals and args */
enum {
PARTIAL_RESULT_SIGNAL,
FINAL_RESULT_SIGNAL,
LAST_SIGNAL
};
enum {
PROP_0,
PROP_SILENT,
PROP_MODEL,
PROP_FST,
PROP_WORD_SYMS,
PROP_DO_ENDPOINTING,
PROP_LAST
};
#define DEFAULT_MODEL "final.mdl"
#define DEFAULT_FST "HCLG.fst"
#define DEFAULT_WORD_SYMS "words.txt"
/* the capabilities of the inputs and outputs.
*
*/
static GstStaticPadTemplate sink_factory =
GST_STATIC_PAD_TEMPLATE("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS(
"audio/x-raw, "
"format = (string) S16LE, "
"channels = (int) 1, "
"rate = (int) 16000 "));
static GstStaticPadTemplate src_factory =
GST_STATIC_PAD_TEMPLATE("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
GST_STATIC_CAPS("text/x-raw, format= { utf8 }"));
static guint gst_kaldinnet2onlinedecoder_signals[LAST_SIGNAL];
#define gst_kaldinnet2onlinedecoder_parent_class parent_class
G_DEFINE_TYPE(Gstkaldinnet2onlinedecoder, gst_kaldinnet2onlinedecoder,
GST_TYPE_ELEMENT);
static void gst_kaldinnet2onlinedecoder_set_property(GObject * object,
guint prop_id,
const GValue * value,
GParamSpec * pspec);
static void gst_kaldinnet2onlinedecoder_get_property(GObject * object,
guint prop_id,
GValue * value,
GParamSpec * pspec);
static gboolean gst_kaldinnet2onlinedecoder_sink_event(GstPad * pad,
GstObject * parent,
GstEvent * event);
static GstFlowReturn gst_kaldinnet2onlinedecoder_chain(GstPad * pad,
GstObject * parent,
GstBuffer * buf);
static GstStateChangeReturn gst_kaldinnet2onlinedecoder_change_state(
GstElement *element, GstStateChange transition);
static void gst_kaldinnet2onlinedecoder_finalize(GObject * object);
/* GObject vmethod implementations */
/* initialize the kaldinnet2onlinedecoder's class */
static void gst_kaldinnet2onlinedecoder_class_init(
Gstkaldinnet2onlinedecoderClass * klass) {
GObjectClass *gobject_class;
GstElementClass *gstelement_class;
gobject_class = (GObjectClass *) klass;
gstelement_class = (GstElementClass *) klass;
gobject_class->set_property = gst_kaldinnet2onlinedecoder_set_property;
gobject_class->get_property = gst_kaldinnet2onlinedecoder_get_property;
gobject_class->finalize = gst_kaldinnet2onlinedecoder_finalize;
gstelement_class->change_state = gst_kaldinnet2onlinedecoder_change_state;
g_object_class_install_property(
gobject_class, PROP_SILENT,
g_param_spec_boolean("silent", "Silent", "Silence the decoder",
FALSE,
(GParamFlags) G_PARAM_READWRITE));
g_object_class_install_property(
gobject_class,
PROP_MODEL,
g_param_spec_string("model", "Acoustic model",
"Filename of the acoustic model",
DEFAULT_MODEL,
(GParamFlags) G_PARAM_READWRITE));
g_object_class_install_property(
gobject_class, PROP_FST,
g_param_spec_string("fst", "Decoding FST", "Filename of the HCLG FST",
DEFAULT_FST,
(GParamFlags) G_PARAM_READWRITE));
g_object_class_install_property(
gobject_class,
PROP_WORD_SYMS,
g_param_spec_string("word-syms", "Word symbols",
"Name of word symbols file (typically words.txt)",
DEFAULT_WORD_SYMS,
(GParamFlags) G_PARAM_READWRITE));
g_object_class_install_property(
gobject_class,
PROP_DO_ENDPOINTING,
g_param_spec_boolean(
"do-endpointing", "If true, apply endpoint detection",
"If true, apply endpoint detection, and split the audio at endpoints",
FALSE,
(GParamFlags) G_PARAM_READWRITE));
gst_kaldinnet2onlinedecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new(
"partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
G_STRUCT_OFFSET(Gstkaldinnet2onlinedecoderClass, partial_result),
NULL,
NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
G_TYPE_STRING);
gst_kaldinnet2onlinedecoder_signals[FINAL_RESULT_SIGNAL] = g_signal_new(
"final-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
G_STRUCT_OFFSET(Gstkaldinnet2onlinedecoderClass, final_result),
NULL,
NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
G_TYPE_STRING);
gst_element_class_set_details_simple(
gstelement_class, "KaldiNNet2OnlineDecoder", "Speech/Audio",
"Convert speech to text", "Tanel Alumae <tanel.alumae@phon.ioc.ee>");
gst_element_class_add_pad_template(gstelement_class,
gst_static_pad_template_get(&src_factory));
gst_element_class_add_pad_template(
gstelement_class, gst_static_pad_template_get(&sink_factory));
}
/* initialize the new element
* instantiate pads and add them to element
* set pad calback functions
* initialize instance structure
*/
static void gst_kaldinnet2onlinedecoder_init(
Gstkaldinnet2onlinedecoder * filter) {
bool tmp_bool;
int32 tmp_int;
uint32 tmp_uint;
float tmp_float;
double tmp_double;
std::string tmp_string;
filter->sinkpad = gst_pad_new_from_static_template(&sink_factory, "sink");
gst_pad_set_event_function(
filter->sinkpad,
GST_DEBUG_FUNCPTR(gst_kaldinnet2onlinedecoder_sink_event));
gst_pad_set_chain_function(
filter->sinkpad, GST_DEBUG_FUNCPTR(gst_kaldinnet2onlinedecoder_chain));
gst_pad_use_fixed_caps(filter->sinkpad);
gst_element_add_pad(GST_ELEMENT(filter), filter->sinkpad);
filter->srcpad = gst_pad_new_from_static_template(&src_factory, "src");
gst_pad_use_fixed_caps(filter->srcpad);
gst_element_add_pad(GST_ELEMENT(filter), filter->srcpad);
filter->silent = FALSE;
filter->model_rspecifier = g_strdup(DEFAULT_MODEL);
filter->fst_rspecifier = g_strdup(DEFAULT_FST);
filter->word_syms_filename = g_strdup(DEFAULT_WORD_SYMS);
filter->simple_options = new SimpleOptionsGst();
filter->endpoint_config = new OnlineEndpointConfig();
filter->feature_config = new OnlineNnet2FeaturePipelineConfig();
filter->nnet2_decoding_config = new OnlineNnet2DecodingConfig();
filter->endpoint_config->Register(filter->simple_options);
filter->feature_config->Register(filter->simple_options);
filter->nnet2_decoding_config->Register(filter->simple_options);
// init properties from various Kaldi Opts
GstElementClass * klass = GST_ELEMENT_GET_CLASS(filter);
std::vector<std::pair<std::string, SimpleOptions::OptionInfo> > option_info_list;
option_info_list = filter->simple_options->GetOptionInfoList();
int32 i = 0;
for (vector<std::pair<std::string, SimpleOptions::OptionInfo> >::iterator dx =
option_info_list.begin(); dx != option_info_list.end(); dx++) {
std::pair<std::string, SimpleOptions::OptionInfo> result = (*dx);
SimpleOptions::OptionInfo option_info = result.second;
std::string name = result.first;
switch (option_info.type) {
case SimpleOptions::kBool:
filter->simple_options->GetOption(name, &tmp_bool);
g_object_class_install_property(
G_OBJECT_CLASS(klass),
PROP_LAST + i,
g_param_spec_boolean(name.c_str(), option_info.doc.c_str(),
option_info.doc.c_str(), tmp_bool,
(GParamFlags) G_PARAM_READWRITE));
break;
case SimpleOptions::kInt32:
filter->simple_options->GetOption(name, &tmp_int);
g_object_class_install_property(
G_OBJECT_CLASS(klass),
PROP_LAST + i,
g_param_spec_int(name.c_str(), option_info.doc.c_str(),
option_info.doc.c_str(),
G_MININT,
G_MAXINT, tmp_int,
(GParamFlags) G_PARAM_READWRITE));
break;
case SimpleOptions::kUint32:
filter->simple_options->GetOption(name, &tmp_uint);
g_object_class_install_property(
G_OBJECT_CLASS(klass),
PROP_LAST + i,
g_param_spec_uint(name.c_str(), option_info.doc.c_str(),
option_info.doc.c_str(), 0,
G_MAXUINT,
tmp_uint, (GParamFlags) G_PARAM_READWRITE));
break;
case SimpleOptions::kFloat:
filter->simple_options->GetOption(name, &tmp_float);
g_object_class_install_property(
G_OBJECT_CLASS(klass),
PROP_LAST + i,
g_param_spec_float(name.c_str(), option_info.doc.c_str(),
option_info.doc.c_str(),
G_MINFLOAT,
G_MAXFLOAT, tmp_float,
(GParamFlags) G_PARAM_READWRITE));
break;
case SimpleOptions::kDouble:
filter->simple_options->GetOption(name, &tmp_double);
g_object_class_install_property(
G_OBJECT_CLASS(klass),
PROP_LAST + i,
g_param_spec_double(name.c_str(), option_info.doc.c_str(),
option_info.doc.c_str(),
G_MINDOUBLE,
G_MAXDOUBLE, tmp_double,
(GParamFlags) G_PARAM_READWRITE));
break;
case SimpleOptions::kString:
filter->simple_options->GetOption(name, &tmp_string);
g_object_class_install_property(
G_OBJECT_CLASS(klass),
PROP_LAST + i,
g_param_spec_string(name.c_str(), option_info.doc.c_str(),
option_info.doc.c_str(), tmp_string.c_str(),
(GParamFlags) G_PARAM_READWRITE));
break;
}
i += 1;
}
}
static void gst_kaldinnet2onlinedecoder_set_property(GObject * object,
guint prop_id,
const GValue * value,
GParamSpec * pspec) {
Gstkaldinnet2onlinedecoder *filter = GST_KALDINNET2ONLINEDECODER(object);
switch (prop_id) {
case PROP_SILENT:
filter->silent = g_value_get_boolean(value);
break;
case PROP_MODEL:
g_free(filter->model_rspecifier);
filter->model_rspecifier = g_value_dup_string(value);
break;
case PROP_FST:
g_free(filter->fst_rspecifier);
filter->fst_rspecifier = g_value_dup_string(value);
break;
case PROP_WORD_SYMS:
g_free(filter->word_syms_filename);
filter->word_syms_filename = g_value_dup_string(value);
break;
case PROP_DO_ENDPOINTING:
filter->do_endpointing = g_value_get_boolean(value);
break;
default:
if (prop_id >= PROP_LAST) {
const gchar* name = g_param_spec_get_name(pspec);
SimpleOptions::OptionType option_type;
if (filter->simple_options->GetOptionType(std::string(name),
&option_type)) {
switch (option_type) {
case SimpleOptions::kBool:
filter->simple_options->SetOption(name,
g_value_get_boolean(value));
break;
case SimpleOptions::kInt32:
filter->simple_options->SetOption(name, g_value_get_int(value));
break;
case SimpleOptions::kUint32:
filter->simple_options->SetOption(name, g_value_get_uint(value));
break;
case SimpleOptions::kFloat:
filter->simple_options->SetOption(name, g_value_get_float(value));
break;
case SimpleOptions::kDouble:
filter->simple_options->SetOption(name,
g_value_get_double(value));
break;
case SimpleOptions::kString:
filter->simple_options->SetOption(name,
g_value_dup_string(value));
break;
}
break;
}
}
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void gst_kaldinnet2onlinedecoder_get_property(GObject * object,
guint prop_id,
GValue * value,
GParamSpec * pspec) {
bool tmp_bool;
int32 tmp_int;
uint32 tmp_uint;
float tmp_float;
double tmp_double;
std::string tmp_string;
Gstkaldinnet2onlinedecoder *filter = GST_KALDINNET2ONLINEDECODER(object);
switch (prop_id) {
case PROP_SILENT:
g_value_set_boolean(value, filter->silent);
break;
case PROP_MODEL:
g_value_set_string(value, filter->model_rspecifier);
break;
case PROP_FST:
g_value_set_string(value, filter->fst_rspecifier);
break;
case PROP_WORD_SYMS:
g_value_set_string(value, filter->word_syms_filename);
break;
case PROP_DO_ENDPOINTING:
g_value_set_boolean(value, filter->do_endpointing);
break;
default:
if (prop_id >= PROP_LAST) {
const gchar* name = g_param_spec_get_name(pspec);
SimpleOptions::OptionType option_type;
if (filter->simple_options->GetOptionType(std::string(name),
&option_type)) {
switch (option_type) {
case SimpleOptions::kBool:
filter->simple_options->GetOption(name, &tmp_bool);
g_value_set_boolean(value, tmp_bool);
break;
case SimpleOptions::kInt32:
filter->simple_options->GetOption(name, &tmp_int);
g_value_set_int(value, tmp_int);
break;
case SimpleOptions::kUint32:
filter->simple_options->GetOption(name, &tmp_uint);
g_value_set_uint(value, tmp_uint);
break;
case SimpleOptions::kFloat:
filter->simple_options->GetOption(name, &tmp_float);
g_value_set_float(value, tmp_float);
break;
case SimpleOptions::kDouble:
filter->simple_options->GetOption(name, &tmp_double);
g_value_set_double(value, tmp_double);
break;
case SimpleOptions::kString:
filter->simple_options->GetOption(name, &tmp_string);
g_value_set_string(value, tmp_string.c_str());
break;
}
break;
}
}
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void gst_kaldinnet2onlinedecoder_final_result(
Gstkaldinnet2onlinedecoder * filter, const CompactLattice &clat,
int64 *tot_num_frames, double *tot_like) {
if (clat.NumStates() == 0) {
KALDI_WARN<< "Empty lattice.";
return;
}
CompactLattice best_path_clat;
CompactLatticeShortestPath(clat, &best_path_clat);
Lattice best_path_lat;
ConvertLattice(best_path_clat, &best_path_lat);
double likelihood;
LatticeWeight weight;
int32 num_frames;
std::vector<int32> alignment;
std::vector<int32> words;
GetLinearSymbolSequence(best_path_lat, &alignment, &words, &weight);
num_frames = alignment.size();
likelihood = -(weight.Value1() + weight.Value2());
*tot_num_frames += num_frames;
*tot_like += likelihood;
GST_DEBUG_OBJECT(filter, "Likelihood per frame for is %f over %d frames",
(likelihood / num_frames), num_frames);
std::stringstream sentence;
for (size_t i = 0; i < words.size(); i++) {
std::string s = filter->word_syms->Find(words[i]);
if (s == "")
GST_ERROR_OBJECT(filter, "Word-id %d not in symbol table.", words[i]);
if (i > 0) {
sentence << " ";
}
sentence << s;
}
GST_DEBUG_OBJECT(filter, "Final: %s", sentence.str().c_str());
guint hyp_length = sentence.str().length();
if (hyp_length > 0) {
GstBuffer *buffer = gst_buffer_new_and_alloc(hyp_length + 1);
gst_buffer_fill(buffer, 0, sentence.str().c_str(), hyp_length);
gst_buffer_memset(buffer, hyp_length, '\n', 1);
gst_pad_push(filter->srcpad, buffer);
/* Emit a signal for applications. */
g_signal_emit(filter, gst_kaldinnet2onlinedecoder_signals[FINAL_RESULT_SIGNAL], 0, sentence.str().c_str());
}
}
static void gst_kaldinnet2onlinedecoder_partial_result(
Gstkaldinnet2onlinedecoder * filter, const Lattice lat) {
LatticeWeight weight;
std::vector<int32> alignment;
std::vector<int32> words;
GetLinearSymbolSequence(lat, &alignment, &words, &weight);
std::stringstream sentence;
for (size_t i = 0; i < words.size(); i++) {
std::string s = filter->word_syms->Find(words[i]);
if (s == "")
GST_ERROR_OBJECT(filter, "Word-id %d not in symbol table.", words[i]);
if (i > 0) {
sentence << " ";
}
sentence << s;
}
GST_DEBUG_OBJECT(filter, "Partial: %s", sentence.str().c_str());
if (sentence.str().length() > 0) {
/* Emit a signal for applications. */
g_signal_emit(filter,
gst_kaldinnet2onlinedecoder_signals[PARTIAL_RESULT_SIGNAL], 0,
sentence.str().c_str());
}
}
static void gst_kaldinnet2onlinedecoder_loop(
Gstkaldinnet2onlinedecoder * filter) {
GST_DEBUG_OBJECT(filter, "Starting decoding loop..");
BaseFloat chunk_length_secs = 0.05;
BaseFloat traceback_period_secs = 1.0;
int32 chunk_length = int32(16000 * chunk_length_secs);
bool more_data = true;
while (more_data) {
OnlineIvectorExtractorAdaptationState adaptation_state(
filter->feature_info->ivector_extractor_info);
OnlineNnet2FeaturePipeline feature_pipeline(*(filter->feature_info));
feature_pipeline.SetAdaptationState(adaptation_state);
SingleUtteranceNnet2Decoder decoder(*(filter->nnet2_decoding_config),
*(filter->trans_model), *(filter->nnet),
*(filter->decode_fst),
&feature_pipeline);
Vector<BaseFloat> wave_part = Vector<BaseFloat>(chunk_length);
GST_DEBUG_OBJECT(filter, "Reading audio in %d sample chunks...",
wave_part.Dim());
BaseFloat last_traceback = 0.0;
BaseFloat num_seconds_decoded = 0.0;
while (true) {
more_data = filter->audio_source->Read(&wave_part);
//GST_DEBUG_OBJECT(filter, "Read %d samples", wave_part.Dim());
feature_pipeline.AcceptWaveform(16000, wave_part);
//GST_DEBUG_OBJECT(filter, "Advancing decoding..");
if (!more_data) {
feature_pipeline.InputFinished();
}
decoder.AdvanceDecoding();
if (!more_data) {
break;
}
if (filter->do_endpointing
&& decoder.EndpointDetected(*(filter->endpoint_config))) {
GST_DEBUG_OBJECT(filter, "Endpoint detected!");
break;
}
num_seconds_decoded += chunk_length_secs;
if (num_seconds_decoded - last_traceback > traceback_period_secs) {
Lattice lat;
decoder.GetBestPath(false, &lat);
gst_kaldinnet2onlinedecoder_partial_result(filter, lat);
last_traceback += traceback_period_secs;
}
}
GST_DEBUG_OBJECT(filter, "Getting lattice..");
CompactLattice clat;
bool end_of_utterance = true;
decoder.GetLattice(end_of_utterance, &clat);
GST_DEBUG_OBJECT(filter, "Lattice done");
double tot_like = 0.0;
int64 num_frames = 0;
gst_kaldinnet2onlinedecoder_final_result(filter, clat, &num_frames,
&tot_like);
}
GST_DEBUG_OBJECT(filter, "Finished decoding loop");
GST_DEBUG_OBJECT(filter, "Pushing EOS event");
gst_pad_push_event(filter->srcpad, gst_event_new_eos());
GST_DEBUG_OBJECT(filter, "Pausing decoding task");
gst_pad_pause_task(filter->srcpad);
delete filter->audio_source;
filter->audio_source = new GstBufferSource();
}
/* GstElement vmethod implementations */
/* this function handles sink events */
static gboolean gst_kaldinnet2onlinedecoder_sink_event(GstPad * pad,
GstObject * parent,
GstEvent * event) {
gboolean ret;
Gstkaldinnet2onlinedecoder *filter;
filter = GST_KALDINNET2ONLINEDECODER(parent);
GST_DEBUG_OBJECT(filter, "Handling %s event", GST_EVENT_TYPE_NAME(event));
switch (GST_EVENT_TYPE(event)) {
case GST_EVENT_SEGMENT: {
GST_DEBUG_OBJECT(filter, "Starting decoding task");
gst_pad_start_task(filter->srcpad,
(GstTaskFunction) gst_kaldinnet2onlinedecoder_loop,
filter, NULL);
GST_DEBUG_OBJECT(filter, "Started decoding task");
ret = TRUE;
break;
}
case GST_EVENT_CAPS: {
ret = TRUE;
break;
}
case GST_EVENT_EOS: {
/* end-of-stream, we should close down all stream leftovers here */
GST_DEBUG_OBJECT(filter, "EOS received");
filter->audio_source->SetEnded(true);
ret = TRUE;
break;
}
default:
ret = gst_pad_event_default(pad, parent, event);
break;
}
return ret;
}
/* chain function
* this function does the actual processing
*/
static GstFlowReturn gst_kaldinnet2onlinedecoder_chain(GstPad * pad,
GstObject * parent,
GstBuffer * buf) {
Gstkaldinnet2onlinedecoder *filter;
filter = GST_KALDINNET2ONLINEDECODER(parent);
if (G_UNLIKELY(!filter->audio_source))
goto not_negotiated;
if (!filter->silent) {
filter->audio_source->PushBuffer(buf);
}
gst_buffer_unref(buf);
return GST_FLOW_OK;
/* special cases */
not_negotiated: {
GST_ELEMENT_ERROR(filter, CORE, NEGOTIATION, (NULL),
("decoder wasn't allocated before chain function"));
gst_buffer_unref(buf);
return GST_FLOW_NOT_NEGOTIATED;
}
}
static bool gst_kaldinnet2onlinedecoder_allocate(
Gstkaldinnet2onlinedecoder * filter) {
if (!filter->feature_info) {
GST_INFO_OBJECT(filter, "Loading Kaldi models and feature extractor");
filter->audio_source = new GstBufferSource();
filter->feature_info = new OnlineNnet2FeaturePipelineInfo(
*(filter->feature_config));
filter->trans_model = new TransitionModel();
filter->nnet = new nnet2::AmNnet();
{
bool binary;
Input ki(filter->model_rspecifier, &binary);
filter->trans_model->Read(ki.Stream(), binary);
filter->nnet->Read(ki.Stream(), binary);
}
filter->decode_fst = fst::ReadFstKaldi(filter->fst_rspecifier);
if (!(filter->word_syms = fst::SymbolTable::ReadText(
filter->word_syms_filename))) {
GST_ERROR_OBJECT(filter, "Could not read symbol table from file %s",
filter->word_syms_filename);
return false;
}
}
return true;
}
static bool gst_kaldinnet2onlinedecoder_deallocate(
Gstkaldinnet2onlinedecoder * filter) {
/* We won't deallocate the decoder once it's already allocated, since model loading could take a lot of time */
GST_INFO_OBJECT(filter, "Refusing to unload Kaldi models");
return true;
}
static GstStateChangeReturn gst_kaldinnet2onlinedecoder_change_state(
GstElement *element, GstStateChange transition) {
GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS;
Gstkaldinnet2onlinedecoder *filter = GST_KALDINNET2ONLINEDECODER(element);
switch (transition) {
case GST_STATE_CHANGE_NULL_TO_READY:
if (!gst_kaldinnet2onlinedecoder_allocate(filter))
return GST_STATE_CHANGE_FAILURE;
break;
default:
break;
}
ret = GST_ELEMENT_CLASS(parent_class)->change_state(element, transition);
if (ret == GST_STATE_CHANGE_FAILURE)
return ret;
switch (transition) {
case GST_STATE_CHANGE_READY_TO_NULL:
gst_kaldinnet2onlinedecoder_deallocate(filter);
break;
default:
break;
}
return ret;
}
static void gst_kaldinnet2onlinedecoder_finalize(GObject * object) {
Gstkaldinnet2onlinedecoder *filter = GST_KALDINNET2ONLINEDECODER(object);
g_free(filter->model_rspecifier);
g_free(filter->fst_rspecifier);
g_free(filter->word_syms_filename);
delete filter->endpoint_config;
delete filter->feature_config;
delete filter->nnet2_decoding_config;
if (filter->simple_options) {
delete filter->simple_options;
filter->simple_options = NULL;
}
G_OBJECT_CLASS(parent_class)->finalize(object);
}
/* entry point to initialize the plug-in
* initialize the plug-in itself
* register the element factories and other features
*/
static gboolean kaldinnet2onlinedecoder_init(
GstPlugin * kaldinnet2onlinedecoder) {
/* debug category for fltering log messages
*
* exchange the string 'Template kaldinnet2onlinedecoder' with your description
*/
GST_DEBUG_CATEGORY_INIT(gst_kaldinnet2onlinedecoder_debug,
"kaldinnet2onlinedecoder", 0,
"Template kaldinnet2onlinedecoder");
return gst_element_register(kaldinnet2onlinedecoder,
"kaldinnet2onlinedecoder", GST_RANK_NONE,
GST_TYPE_KALDINNET2ONLINEDECODER);
}
/* PACKAGE: this is usually set by autotools depending on some _INIT macro
* in configure.ac and then written into and defined in config.h, but we can
* just set it ourselves here in case someone doesn't use autotools to
* compile this code. GST_PLUGIN_DEFINE needs PACKAGE to be defined.
*/
#ifndef PACKAGE
#define PACKAGE "myfirstkaldinnet2onlinedecoder"
#endif
/* gstreamer looks for this structure to register kaldinnet2onlinedecoders
*
* exchange the string 'Template kaldinnet2onlinedecoder' with your kaldinnet2onlinedecoder description
*/
GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, GST_VERSION_MINOR, kaldinnet2onlinedecoder,
"Template kaldinnet2onlinedecoder",
kaldinnet2onlinedecoder_init, VERSION, "LGPL", "GStreamer",
"http://gstreamer.net/")
}
/*
* GStreamer
* Copyright (C) 2005 Thomas Vander Stichele <thomas@apestaart.org>
* Copyright (C) 2005 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
* Copyright (C) 2014 Tanel Alumae <<user@hostname.org>>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Alternatively, the contents of this file may be used under the
* GNU Lesser General Public License Version 2.1 (the "LGPL"), in
* which case the following provisions apply instead of the ones
* mentioned above:
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifndef __GST_KALDINNET2ONLINEDECODER_H__
#define __GST_KALDINNET2ONLINEDECODER_H__
#include <gst/gst.h>
#include "simple-options-gst.h"
#include "gst-audio-source.h"
#include "online2/online-nnet2-decoding.h"
#include "online2/onlinebin-util.h"
#include "online2/online-timing.h"
#include "online2/online-endpoint.h"
#include "fstext/fstext-lib.h"
#include "lat/lattice-functions.h"
namespace kaldi {
G_BEGIN_DECLS
/* #defines don't like whitespacey bits */
#define GST_TYPE_KALDINNET2ONLINEDECODER \
(gst_kaldinnet2onlinedecoder_get_type())
#define GST_KALDINNET2ONLINEDECODER(obj) \
(G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_KALDINNET2ONLINEDECODER,Gstkaldinnet2onlinedecoder))
#define GST_KALDINNET2ONLINEDECODER_CLASS(klass) \
(G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_KALDINNET2ONLINEDECODER,Gstkaldinnet2onlinedecoderClass))
#define GST_IS_KALDINNET2ONLINEDECODER(obj) \
(G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_KALDINNET2ONLINEDECODER))
#define GST_IS_KALDINNET2ONLINEDECODER_CLASS(klass) \
(G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_KALDINNET2ONLINEDECODER))
typedef struct _Gstkaldinnet2onlinedecoder Gstkaldinnet2onlinedecoder;
typedef struct _Gstkaldinnet2onlinedecoderClass Gstkaldinnet2onlinedecoderClass;
struct _Gstkaldinnet2onlinedecoder
{
GstElement element;
GstPad *sinkpad, *srcpad;
gboolean silent;
gboolean do_endpointing;
GstBufferSource *audio_source;
gchar* model_rspecifier;
gchar* fst_rspecifier;
gchar* word_syms_filename;
SimpleOptionsGst *simple_options;
OnlineEndpointConfig *endpoint_config;
OnlineNnet2FeaturePipelineConfig *feature_config;
OnlineNnet2DecodingConfig *nnet2_decoding_config;
OnlineNnet2FeaturePipelineInfo *feature_info;
TransitionModel *trans_model;
nnet2::AmNnet *nnet;
fst::Fst<fst::StdArc> *decode_fst;
fst::SymbolTable *word_syms;
//OnlineNnet2FeaturePipeline *feature_pipeline;
};
struct _Gstkaldinnet2onlinedecoderClass
{
GstElementClass parent_class;
void (*partial_result)(GstElement *element, const gchar *result_str);
void (*final_result)(GstElement *element, const gchar *result_str);
};
GType gst_kaldinnet2onlinedecoder_get_type (void);
G_END_DECLS
}
#endif /* __GST_KALDINNET2ONLINEDECODER_H__ */
// simple-options-gst.cc
// Copyright 2014 Tanel Alumae, Tallinn University of Technology
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "simple-options-gst.h"
#include <algorithm>
namespace kaldi {
void SimpleOptionsGst::Register(const std::string &name,
bool *ptr,
const std::string &doc) {
std::string new_name = TransformName(name);
SimpleOptions::Register(new_name, ptr, doc);
}
void SimpleOptionsGst::Register(const std::string &name, int32 *ptr, const std::string &doc) {
std::string new_name = TransformName(name);
SimpleOptions::Register(new_name, ptr, doc);
}
void SimpleOptionsGst::Register(const std::string &name, uint32 *ptr, const std::string &doc) {
std::string new_name = TransformName(name);
SimpleOptions::Register(new_name, ptr, doc);
}
void SimpleOptionsGst::Register(const std::string &name, float *ptr, const std::string &doc) {
std::string new_name = TransformName(name);
SimpleOptions::Register(new_name, ptr, doc);
}
void SimpleOptionsGst::Register(const std::string &name, double *ptr, const std::string &doc) {
std::string new_name = TransformName(name);
SimpleOptions::Register(new_name, ptr, doc);
}
void SimpleOptionsGst::Register(const std::string &name, std::string *ptr,
const std::string &doc) {
std::string new_name = TransformName(name);
SimpleOptions::Register(new_name, ptr, doc);
}
std::string SimpleOptionsGst::TransformName(const std::string &name) {
std::string new_name = name;
std::replace( new_name.begin(), new_name.end(), '.', '-');
return new_name;
}
}
#ifndef SIMPLE_OPTIONS_GST_H_
#define SIMPLE_OPTIONS_GST_H_
#include <string>
#include "util/simple-options.h"
namespace kaldi {
// This class is the same as Kaldi's SimpleOptions except that
// it transforms all '.' characters to '-' in options names,
// in order to avoid GStreamer doing it itself
class SimpleOptionsGst : public SimpleOptions {
void Register(const std::string &name, bool *ptr, const std::string &doc);
void Register(const std::string &name, int32 *ptr, const std::string &doc);
void Register(const std::string &name, uint32 *ptr, const std::string &doc);
void Register(const std::string &name, float *ptr, const std::string &doc);
void Register(const std::string &name, double *ptr, const std::string &doc);
void Register(const std::string &name, std::string *ptr,
const std::string &doc);
private:
std::string TransformName(const std::string &name);
};
}
#endif // SIMPLE_OPTIONS_GST_H_
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment