diff --git a/README b/README index a9f1b64b3c282d3947be4ee6798a0931acab5ed3..1d657acd64c6bc41dff55e8d081f069852fe4995 100644 --- a/README +++ b/README @@ -30,13 +30,15 @@ KALDI_ROOT=/path/of/kaldi-trunk make This should result in 'libgstkaldionline2.so'. -Test id GStreamer can access the plugin: +Test if GStreamer can access the plugin: GST_PLUGIN_PATH=. gst-inspect-1.0 kaldinnet2onlinedecoder - HOW TO USE IT ------------- -TODO +Command-line usage is demonstrated in `demo/`. + +Usage through GSTreamer's Python bindings is demonstrated in +https://github.com/alumae/kaldi-gstreamer-server (kaldigstserver/decoder2.py). diff --git a/demo/dr_strangelove.mp3 b/demo/dr_strangelove.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..6ea8c78669e9d7596dfdf58d3ff8af5c17519c0f Binary files /dev/null and b/demo/dr_strangelove.mp3 differ diff --git a/demo/prepare-models.sh b/demo/prepare-models.sh new file mode 100755 index 0000000000000000000000000000000000000000..48d14cd3dc26ebded05e756df0a705006ecf3b3e --- /dev/null +++ b/demo/prepare-models.sh @@ -0,0 +1,16 @@ +#! /bin/bash + +BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5 + +MODEL=exp/nnet2_online/nnet_a_gpu_online +GRAPH=exp/tri5a + +wget -N $BASE_URL/$MODEL/final.mdl || exit 1 +(mkdir -p ivector_extractor; cd ivector_extractor; wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats}) || exit 1 +(mkdir -p conf; cd conf; wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf}) || exit 1 + +wget -N $BASE_URL/$GRAPH/graph/HCLG.fst || exit 1 +wget -N $BASE_URL/$GRAPH/graph/words.txt || exit 1 + + +cat conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=/' > conf/ivector_extractor.fixed.conf diff --git a/demo/transcribe-audio.sh b/demo/transcribe-audio.sh new file mode 100755 index 0000000000000000000000000000000000000000..dc771b5dcd8e6f8c0d574ab3466977c86eea6cbc --- /dev/null +++ b/demo/transcribe-audio.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +if [ $# != 1 ]; then + echo "Usage: transcribe-audio.sh <audio>" + echo "e.g.: transcribe-audio.sh dr_strangelove.mp3" + exit 1; +fi + +! GST_PLUGIN_PATH=../src gst-inspect-1.0 kaldinnet2onlinedecoder > /dev/null 2>&1 && echo "Compile the plugin in ../src first" && exit 1; + +if [ ! -f HCLG.fst ]; then + echo "Run ./prepare-models.sh first to download models" + exit 1; +fi + +audio=$1 + +GST_PLUGIN_PATH=../src gst-launch-1.0 --gst-debug="" -q filesrc location=$audio ! decodebin ! audioconvert ! audioresample ! \ +kaldinnet2onlinedecoder \ + model=final.mdl \ + fst=HCLG.fst \ + word-syms=words.txt \ + feature-type=mfcc \ + mfcc-config=conf/mfcc.conf \ + ivector-extraction-config=conf/ivector_extractor.fixed.conf \ + max-active=7000 \ + beam=11.0 \ + lattice-beam=5.0 \ + do-endpointing=true \ + endpoint-silence-phones="1:2:3:4:5:6:7:8:9:10" \ +! filesink location=/dev/stdout buffer-mode=2