Skip to content
Snippets Groups Projects
Commit 065e0966 authored by Benoit Favre's avatar Benoit Favre
Browse files

import ui prototype to git

parents
No related branches found
No related tags found
No related merge requests found
*.pyc
asr/
README 0 → 100644
Deps:
- gtk3 for python2 (package is python2-gobject in archlinux, as well as gtk3)
- get and compile https://github.com/alumae/gst-kaldi-nnet2-online (which requires kaldi)
Install:
./download-models.sh
copy libgstkaldionline2.so to ./asr/ or change GST_PLUGIN_PATH in main.py to point to its directory
Run:
python2 main.py
from gi.repository import Gtk
class ActionView(Gtk.HBox):
def __init__(self):
super(ActionView, self).__init__()
self.actions = set(['next', 'previous', 'light-on', 'light-off'])
self.pack_start(Gtk.Label('Actions:'), False, False, 5)
for action in self.actions:
button = Gtk.Button(action)
button.connect('clicked', self._perform)
self.pack_start(button, False, False, 5)
self.confiermer = None
def _perform(self, button):
action = button.get_label()
if self.confirmer:
self.confirmer.confirm('Perform action "%s"?' % action, 4, lambda: self.perform(action))
else:
self.perform(action)
def perform(self, action):
if action in self.actions:
print 'PERFORM', action
return True
return False
def actions(self):
return self.actions
def set_confirmer(self, confirmer):
self.confirmer = confirmer
from gi.repository import GObject, Gtk
import levenstein
class Alignment:
def __init__(self, ref):
self.ref = ref.split()
self.view = Gtk.TextView()
self.view.set_editable(False)
self.view.set_cursor_visible(False)
self.buffer = self.view.get_buffer()
self.buffer.set_text('Section\n' + ref)
self.subst = self.buffer.create_tag("orange_bg", background="orange")
self.ok_word = self.buffer.create_tag("green_bg", background="green")
def get_view(self):
return self.view
def align(self, hyp):
num_errors, num_ref, alignment, score = levenstein.align(self.ref, hyp)
text = self.buffer
text.set_text('Section\n')
levenstein.print_alignment(alignment)
#text.remove_all_tags()
for (ref_word, hyp_word) in alignment:
if ref_word != None:
i = text.get_end_iter()
if hyp_word != None:
if ref_word == hyp_word:
text.insert_with_tags(i, ref_word, self.ok_word)
else:
text.insert_with_tags(i, ref_word, self.subst)
text.insert(text.get_end_iter(), ' ')
else:
text.insert(text.get_end_iter(), ref_word + ' ')
asr.py 0 → 100644
import os, sys
from threading import Thread
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst, Gtk, Gdk
GObject.threads_init()
Gdk.threads_init()
Gst.init(None)
class ASR(Gtk.HBox):
def __init__(self, hyp_callback=None):
super(ASR, self).__init__()
self.text = Gtk.TextView()
self.text.set_editable(False)
self.text.set_cursor_visible(False)
self.buffer = self.text.get_buffer()
self.text.set_wrap_mode(Gtk.WrapMode.WORD)
self.scrolled = Gtk.ScrolledWindow()
self.scrolled.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.ALWAYS)
self.scrolled.add_with_viewport(self.text)
self.scrolled.set_size_request(-1, 100)
self.pack_start(self.scrolled, True, True, 5)
self.button = Gtk.Button("Speak")
self.button.set_sensitive(False)
self.pack_start(self.button, False, False, 5)
self.button.connect('clicked', self.button_clicked)
self.text.connect("size-allocate", self.autoscroll)
self.hyp = []
self.hyp_callback = hyp_callback
Thread(target=self.init_gst).start()
def init_gst(self):
"""Initialize the speech components"""
GObject.idle_add(self._started_loading_asr)
self.pulsesrc = Gst.ElementFactory.make("pulsesrc", "pulsesrc")
if self.pulsesrc == None:
print >> sys.stderr, "Error loading pulsesrc GST plugin. You probably need the gstreamer1.0-pulseaudio package"
sys.exit()
self.audioconvert = Gst.ElementFactory.make("audioconvert", "audioconvert")
self.audioresample = Gst.ElementFactory.make("audioresample", "audioresample")
self.asr = Gst.ElementFactory.make("kaldinnet2onlinedecoder", "asr")
self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink")
if self.asr:
model_file = "asr/final.mdl"
if not os.path.isfile(model_file):
print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!"
sys.exit(1)
self.asr.set_property("fst", "asr/HCLG.fst")
self.asr.set_property("model", "asr/final.mdl")
self.asr.set_property("word-syms", "asr/words.txt")
self.asr.set_property("feature-type", "mfcc")
self.asr.set_property("mfcc-config", "asr/conf/mfcc.conf")
self.asr.set_property("ivector-extraction-config", "asr/conf/ivector_extractor.fixed.conf")
self.asr.set_property("max-active", 7000) # 7000
self.asr.set_property("beam", 11.0) # 11
self.asr.set_property("lattice-beam", 6.0) # 6
self.asr.set_property("do-endpointing", True)
self.asr.set_property("endpoint-silence-phones", "1:2:3:4:5:6:7:8:9:10")
else:
print >> sys.stderr, "Couldn't create the kaldinnet2onlinedecoder element. "
if os.environ.has_key("GST_PLUGIN_PATH"):
print >> sys.stderr, "Have you compiled the Kaldi GStreamer plugin?"
else:
print >> sys.stderr, "You probably need to set the GST_PLUGIN_PATH envoronment variable"
print >> sys.stderr, "Try running: GST_PLUGIN_PATH=../src %s" % sys.argv[0]
sys.exit();
# initially silence the decoder
self.asr.set_property("silent", True)
self.pipeline = Gst.Pipeline()
for element in [self.pulsesrc, self.audioconvert, self.audioresample, self.asr, self.fakesink]:
self.pipeline.add(element)
self.pulsesrc.link(self.audioconvert)
self.audioconvert.link(self.audioresample)
self.audioresample.link(self.asr)
self.asr.link(self.fakesink)
self.asr.connect('partial-result', self._on_partial_result)
self.asr.connect('final-result', self._on_final_result)
self.pipeline.set_state(Gst.State.PLAYING)
GObject.idle_add(self._finished_loading_asr)
def load_state(self):
try:
with open('state.txt') as fp:
self.asr.set_property('adaptation-state', fp.read())
except:
print >> sys.stderr, 'failed to load asr state'
def save_state(self):
if hasattr(self, 'asr'):
state = self.asr.get_property('adaptation-state')
try:
with open('state.txt', 'w') as fp:
fp.write(state)
except:
print >> sys.stderr, 'failed to save asr state'
def _started_loading_asr(self):
self.button.set_sensitive(False)
self.button.set_label("Loading...")
def _finished_loading_asr(self):
self.button.set_label("Speak")
self.button.set_sensitive(True)
def _on_partial_result(self, asr, hyp):
"""Delete any previous selection, insert text and select it."""
Gdk.threads_enter()
if len(self.hyp) == 0:
self.hyp = ['']
self.hyp[-1] = hyp
if self.hyp_callback:
self.hyp_callback(self.hyp)
hyp += '...'
self.insert = self.buffer.get_iter_at_line(self.buffer.get_line_count() - 1)
self.buffer.delete(self.insert, self.buffer.get_end_iter())
self.buffer.insert(self.insert, hyp)
Gdk.threads_leave()
def _on_final_result(self, asr, hyp):
Gdk.threads_enter()
if len(self.hyp) == 0:
self.hyp = ['']
self.hyp[-1] = hyp
self.hyp.append('')
if self.hyp_callback:
self.hyp_callback(self.hyp)
self.insert = self.buffer.get_iter_at_line(self.buffer.get_line_count() - 1)
self.buffer.delete(self.insert, self.buffer.get_end_iter())
self.buffer.insert(self.insert, hyp + '\n...')
Gdk.threads_leave()
def autoscroll(self, *args):
adj = self.scrolled.get_vadjustment()
adj.set_value(adj.get_upper() - adj.get_page_size())
def button_clicked(self, button):
"""Handle button presses."""
if button.get_label() == "Speak":
button.set_label("Stop")
self.asr.set_property("silent", False)
self.hyp = []
self.buffer.set_text('...')
else:
button.set_label("Speak")
self.asr.set_property("silent", True)
from gi.repository import GObject, Gtk
class ConfirmationBox(Gtk.HBox):
def __init__(self):
super(ConfirmationBox, self).__init__()
self.label = Gtk.Label()
self.label.get_style_context().add_class('confirm')
self.yes_button = Gtk.Button("YES")
self.no_button = Gtk.Button("NO")
self.pack_start(self.label, True, True, 5)
self.pack_start(self.yes_button, False, False, 5)
self.pack_start(self.no_button, False, False, 5)
self.yes_button.connect('clicked', self.click_yes)
self.no_button.connect('clicked', self.click_no)
self.counter = 0
self.yes_callback = None
self.no_callback = None
self.timer = None
def confirm(self, message, time, yes_callback=None, no_callback=None):
self.yes_callback = yes_callback
self.no_callback = no_callback
self.label.set_text(message)
self.counter = int(time)
self.yes_button.get_child().set_text("YES (%d)" % self.counter)
self.timer = GObject.timeout_add_seconds(1, self.countdown)
self.show()
def click_yes(self, button = None):
self.cancel_timer()
self.hide()
self.counter = 0
if self.yes_callback:
self.yes_callback()
self.yes_callback = None
def click_no(self, button = None):
self.cancel_timer()
self.hide()
self.counter = 0
if self.no_callback:
self.no_callback()
self.no_callback = None
def countdown(self):
if self.counter > 1:
self.counter -= 1
self.yes_button.get_child().set_text("YES (%d)" % self.counter)
self.timer = GObject.timeout_add_seconds(1, self.countdown)
else:
self.click_yes()
def cancel_timer(self):
if self.timer != None:
GObject.source_remove(self.timer)
self.timer = None
This diff is collapsed.
<homeostasis version="29-09-2014">
<section id="1" action="">
<sequence ordre="strict" repetition="non" action="" lang="eng">
<keyword action="" lang="esp"> uno </keyword>
<keyword action="#end" lang="eng"> open system </keyword>
</sequence>
</section>
<section id="2" action="">
<sequence ordre="strict" repetition="non" action="" lang="esp">
<keyword action="#end" lang="esp"> dos </keyword>
</sequence>
<sequence ordre="strict" repetition="oui" action="" lang="eng">
<keyword action="" lang="eng"> tell me </keyword>
<keyword action="" lang="eng"> open technical characteristics </keyword>
<keyword action="" lang="eng"> read </keyword>
<keyword action="" lang="eng"> next </keyword>
<keyword action="" lang="eng"> yes </keyword>
<keyword action="" lang="eng"> read </keyword>
<keyword action="" lang="eng"> download </keyword>
<keyword action="" lang="eng"> open the terms and conditions of use of body x epsilon system three point zero </keyword>
<keyword action="" lang="eng"> accept terms and conditions of use </keyword>
<keyword action="" lang="eng"> next </keyword>
<keyword action="#end" lang="eng"> install the new version of me </keyword>
</sequence>
</section>
<section id="3" action="">
<sequence ordre="strict" repetition="non" action="" lang="eng">
<keyword action="" lang="esp"> tres </keyword>
<keyword action="#end" lang="eng"> open access to body data </keyword>
</sequence>
<sequence ordre="strict" repetition="oui" action="" lang="eng">
<keyword action="" lang="eng"> import body data </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import organic matter data </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import temperature </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import time </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import space data </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import position </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body subsystems </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import estate </keyword>
<keyword action="#end" lang="eng"> upload </keyword>
</sequence>
</section>
<section id="4" action="">
<sequence ordre="strict" repetition="non" action="" lang="eng">
<keyword action="" lang="esp"> quatro </keyword>
<keyword action="#end" lang="eng"> open access to body functions </keyword>
</sequence>
<sequence ordre="strict" repetition="oui" action="" lang="eng">
<keyword action="" lang="eng"> import body functions space localization </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions sensations </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions passion </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions concentration </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions perception </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions formal force </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions logics </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions imagination </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions effort </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions nervous system </keyword>
<keyword action="" lang="eng"> upload </keyword>
<keyword action="" lang="eng"> import body functions internal network </keyword>
<keyword action="#end" lang="eng"> upload </keyword>
</sequence>
</section>
<section id="5" action="">
<sequence ordre="strict" repetition="non" action="" lang="esp">
<keyword action="#end" lang="esp"> cinco </keyword>
</sequence>
<sequence ordre="strict" repetition="oui" action="" lang="eng">
<keyword action="" lang="eng"> set up the connection </keyword>
<keyword action="" lang="eng"> open access to memory </keyword>
<keyword action="" lang="eng"> import memory data </keyword>
<keyword action="" lang="eng"> open memory </keyword>
<keyword action="" lang="eng"> deactivate the security system </keyword>
<keyword action="" lang="eng"> deactivate the security system of new version </keyword>
<keyword action="" lang="eng"> open conditions and terms of use </keyword>
<keyword action="" lang="eng"> modify the security conditions of new version anyway </keyword>
<keyword action="#end" lang="eng"> import memory </keyword>
</sequence>
</section>
<section id="6" action="#open constellation">
<sequence ordre="strict" repetition="non" action="" lang="eng">
<keyword action="" lang="esp"> seis </keyword>
<keyword action="#end" lang="eng"> open network </keyword>
</sequence>
<sequence ordre="variable" repetition="oui" action="" lang="eng">
clouds
beautiful
<keyword action="" lang="eng"> data </keyword> clouds
rain of identities
storm of possibilities
body wifi
<keyword action="" lang="eng"> search </keyword> and redo <keyword action="" lang="eng"> connection </keyword>
connect me to this network
function looking for
something must stay
something must exist
a <keyword action="" lang="eng"> thread </keyword>
something exists between recollection and oblivion
a tension an echo an emptiness
an emptiness
something rare stays through
nothing more strange than this exile
an absolute abyss
a creaking of the bones
a barbarian invasion
the carelessness of destinies
wringing out the <keyword action="" lang="eng"> blood </keyword>
something should
must or must not
something stays
<keyword action="#end"> try with functional regulation </keyword>
</sequence>
<sequence ordre="variable" repetition="oui" action="" lang="eng">
a like
possible function
display inline
erase the space
important
state
geolocalization
important
encode passion for located constellation
center
six
six
nine
important
some threads are broken
remove
memory
love
love
memory
love
errors
a
identical
identity
identifier
where
answer
being
boing
boot
reboot
border of body
body
being
begun
begin
win
border of being
actions
search
the answer of the
there is
there is something
bo boom
body password
the pa pa pathetic pancreas
give
to drive
drive
to me
to memo
rise
say
say hello
data
gi give me
the
this
mine
mineral
give me
you your
give me your data
give me yours
give me your data
begun begin wins
wine blood
everywhere
give
give your blood or
or go
golden
golden data
me
to go
pro protein protection aminoacid
to go
where is
this is
bilar
violence segment
segregation
memory
encode
where
where is everybody
where is nobody
is where
but
feet
free freedom
give me go go
data
going
fast
clouds of something
else than
be center in the center of being
the place
of the space
where
the end starts
contact skin
left arm conductor nerve sensor
supplementary information like
sex name date of birth place where you live
give me your data
access accede
the door is opened
muscular oxygen
try with functions in realtime
speak with someone from another space in realtime
to consult a realtime information
climate
the exact state
of such or such
a street
to buy
to choose
to access to all the information of the real world
to have virtual sex with
sex with data
with a child
a chicken
a dead terrorist
a screen
other functions
function
analyze function capacity
capacity
capacity
to modify
to reinventing inner information
analyzes of the capacity of modification of basic structures
the problem is not the information
it's the connection
it's the link
the bond
it is the access
it is the key
it is the code
try with utopian function
how to say
that which the nothingness understands
that which fear hides
in the promises of the future
in the frustration of the human
in the disappointment of humanity
to envy
the cadence of machines
their speed
their power
the infinite of the virtual
the utopia of the possibilities
the simplicity of the cables
the capacity of calculation
the efficiency of the systems
<keyword action="#end"> and to want </keyword>
</sequence>
<sequence ordre="variable" repetition="oui" lang="esp" action="">
como decir
lo que la nada entiende
lo que el miedo esconde
en las promesas del futuro
en la frustracion de lo humano
en la decepcion de la humanidad
envidiar
la cadencia de las maquinas
su velocidad
su potencia
lo infinito de lo virtual
la utopia de los possibles
la simplicidad de los cables
la capacidad de calculo
la eficacia de los sistemas
<keyword action="#end"> y querer </keyword>
</sequence>
<sequence ordre="variable" repetition="oui" lang="eng" action="">
try with upper motoneuron
functions functions
code variants
nerve motoneuron
con con con
connect me to this network
i'm just looking for the connection
a connection
a link
something that links me to something
something that tells me that i am not an isolated element
that i am not dead
to generate a new memory
to store it where i want to store it
to visit it when i want to visit it
isolated points in space
drops unable to be a sea
grains of sand that do not conceive of the desert
leaves that do not remember the tree from where they fell
i feed on an electrical juice
on an image that i generate
and that i can change with my fingers
searching fingers
research fingers
search
results
space where there are neither limits nor angles
where the horizon is nothing else than bits
images from a space more real than reality
try with identity function
accede to the infinite spaces
identifier required
something that identify me like i
identity is a movement
random combination
temporal accident
ephemeral system
to replace identity to identifier
liquid identities
composed by codes
codes codes codes
and some private data like
geolocalization
traced actions
autoproduced images
try with existential functions
to be innumerable parallel identities
from innumerable parallel worlds
interconnected parallel worlds
interconnected parallel identities
to be downloaded
to be installed
to be executed in the application of being
to be synchronized
to be shared
to mark an alien silence
to remain in the space of between
to fight in the body
to transcend the effect
the fault
the default
in effect
to effect
to activate
to create
to give to give to give to give
to give to give to give
to go for a walk in the internal desert
to return to the center
to scratch the bones
to join the form
to relead deviations of the brain
to integrate the knot
to undress the center
to be relocated in space and time
to be the extrinsic silence
to undress time
to relead the desert
to close the access
to dance
information in movement
the movement of information
the access code
the code
the access to movement
the body
the fear
the recollection
the naked center
the paralysis of the fear
to replace the fear code
the code of the abyss
to be downloaded
to be installed
to be executed in the application of being
to be synchronized
to be shared
to be
try with cognitive function
transparent margins
elastic borders
perpetual dissociation between container and content
if everything
if if
is possible
in the magnetic center of nothingness
abstract intimacy
of the immaterial universe
try with structural regulation
open body wifi connection
no data detected
link memory function
locate
locate data storage
cells or
shining electronics and very well organized cables
fragile
cells or machines
motivated by a flow of an electrical blood composed of zero and one
a binary logic
everything
everything can't be logic
everything can't be binary
but everything
what has happened and what happens
it's all written down somewhere
try with morphogenetics auto regulation
activation of nervous system
link nervous system to
cells
heard
genetics data bank
send emergency message to
send emergency message to
open body data bank
open cells code
open genetic code
</sequence>
</section>
<section id="8" action="">
<sequence ordre="strict" repetition="non" action="" lang="eng">
<keyword action="" lang="esp"> ocho </keyword>
<keyword action="#end" lang="eng"> search for sequences producing significant alignments in genetic database </keyword>
</sequence>
<sequence ordre="variable" repetition="oui" action="" lang="eng">
<keyword action="" lang="eng"> record </keyword>
<keyword action="" lang="eng"> stop </keyword>
<keyword action="" lang="eng"> save this memory in genetic database </keyword>
<keyword action="" lang="eng"> next </keyword>
<keyword action="" lang="eng"> record </keyword>
<keyword action="" lang="eng"> stop </keyword>
<keyword action="" lang="eng"> erase this memory </keyword>
</sequence>
</section>
</homeostasis>
.current-section {
font: bold 18;
}
.text-line {
}
.section-title {
font: bold 18;
color: white;
background: #999999;
}
.section-body {
background: white;
}
.sequence-title {
font: bold 14;
}
.sequence-body {
}
.confirm {
font: bold 14;
background: #ff9999;
}
.xmlview {
background: white;
}
#! /bin/bash
set -e -u -o pipefail
BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5
MODEL=exp/nnet2_online/nnet_a_gpu_online
GRAPH=exp/tri5a
mkdir -p asr/ivector_extractor asr/conf
pushd asr/ivector_extractor
wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats}
popd
pushd asr/conf
wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf}
popd
pushd asr
wget -N $BASE_URL/$GRAPH/graph/HCLG.fst
wget -N $BASE_URL/$GRAPH/graph/words.txt
wget -N $BASE_URL/$MODEL/final.mdl
popd
cat asr/conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=asr\//' > asr/conf/ivector_extractor.fixed.conf
import sys
def align(ref_words, hyp_words, sub_cost=None, ins_cost=None, del_cost=None):
num_ref = len(ref_words)
num_hyp = len(hyp_words)
if num_hyp == 0:
return num_ref, num_ref, [], []
if num_ref == 0:
return 0, 0, [], []
score = []
backtrack = []
OK = 0
SUB = 1
DEL = 2
INS = 3
for i in range(num_ref + 1):
score.append([0] * (num_hyp + 1))
backtrack.append([0] * (num_hyp + 1))
if del_cost == None or i == 0:
score[i][0] = i
else:
score[i][0] = score[i - 1][0] + del_cost[i - 1][0]
backtrack[i][0] = DEL
for i in range(num_hyp + 1):
if ins_cost == None or i == 0:
score[0][i] = i
else:
score[0][i] = score[0][i - 1] + ins_cost[0][i - 1]
backtrack[0][i] = INS
for i in range(1, num_ref + 1):
for j in range(1, num_hyp + 1):
sub_type = OK
sub_value = score[i - 1][j - 1]
if ref_words[i - 1] != hyp_words[j - 1]:
if sub_cost != None:
sub_value += sub_cost[i - 1][j - 1]
else:
sub_value += 1
sub_type = SUB
if del_cost != None:
del_value = score[i - 1][j] + del_cost[i - 1][j - 1]
else:
del_value = score[i - 1][j] + 0.75
if ins_cost != None:
ins_value = score[i][j - 1] + ins_cost[i - 1][j - 1]
else:
ins_value = score[i][j - 1] + 0.75
if sub_value <= del_value:
if sub_value <= ins_value:
score[i][j] = sub_value
backtrack[i][j] = sub_type
else:
score[i][j] = ins_value
backtrack[i][j] = INS
else:
if del_value < ins_value:
score[i][j] = del_value;
backtrack[i][j] = DEL
else:
score[i][j] = ins_value;
backtrack[i][j] = INS
alignment = []
i = num_ref
j = num_hyp
num_errors = 0
while i > 0 or j > 0:
if backtrack[i][j] == OK:
alignment.insert(0, [ref_words[i - 1], hyp_words[j - 1]])
i = i - 1
j = j - 1
elif backtrack[i][j] == SUB:
num_errors += 1
alignment.insert(0, [ref_words[i - 1], hyp_words[j - 1]])
i = i - 1
j = j - 1
elif backtrack[i][j] == INS:
num_errors += 1
alignment.insert(0, [None, hyp_words[j - 1]])
j = j - 1
elif backtrack[i][j] == DEL:
num_errors += 1
alignment.insert(0, [ref_words[i - 1], None])
i = i - 1
return num_errors, num_ref, alignment, score
def print_alignment(alignment):
ref = []
hyp = []
for pair in alignment:
if pair[0] == None:
ref.append('*' * len(pair[1]))
hyp.append(pair[1])
elif pair[1] == None:
ref.append(pair[0])
hyp.append('*' * len(pair[0]))
else:
if len(pair[0]) > len(pair[1]):
ref.append(pair[0])
hyp.append(pair[1] + ' ' * (len(pair[0]) - len(pair[1])))
else:
ref.append(pair[0] + ' ' * (len(pair[1]) - len(pair[0])))
hyp.append(pair[1])
print ' '.join(ref)
print ' '.join(hyp)
def wer(ref, hyp):
num_errors, num_ref, alignment, score = align(ref, hyp)
return num_errors
if __name__ == '__main__':
ref = "hello"
hyp = "hollow"
num_errors, num_ref, alignment, score = align(ref, hyp)
print_alignment(alignment)
print "error_rate:", float(num_errors) / num_ref
from gi.repository import GObject, Gtk, Pango
class ListView(Gtk.VBox):
def __init__(self):
super(ListView, self).__init__()
store = Gtk.ListStore(GObject.TYPE_PYOBJECT)
store.append([Section('Section 1')])
store.append([Section('Section 2')])
store.append([Section('Section 3')])
tree = Gtk.TreeView(store)
renderer = CellRendererButton()
column = Gtk.TreeViewColumn("Title", renderer, text=0)
tree.append_column(column)
self.pack_start(tree, True, True, 0)
if __name__ == '__main__':
window = Gtk.Window()
window.connect("destroy", Gtk.main_quit)
window.add(ListView())
window.show_all()
Gtk.main()
main.py 0 → 100755
#!/usr/bin/env python2
import sys
import os
# set to location of libgstkaldionline2.so
os.environ['GST_PLUGIN_PATH'] = './asr/'
os.environ['GTK_THEME'] = 'light'
# import gtk stuff
from threading import Thread
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst, Gtk, Gdk
GObject.threads_init()
Gdk.threads_init()
Gst.init(None)
# make sure ctrl-c works
import signal
signal.signal(signal.SIGINT, signal.SIG_DFL)
# import local stuff
import confirm, asr, action, section, xmlview_widgets
import levenstein
class ScriptedASR(Gtk.Window):
def __init__(self, xml_filename):
super(ScriptedASR, self).__init__()
self.connect("destroy", self.quit)
self.set_default_size(800,600)
self.set_border_width(10)
self.set_title('ScriptedASR [%s]' % xml_filename)
vbox = Gtk.VBox()
self.sections = section.SectionManager()
vbox.pack_start(self.sections, False, True, 5)
self.xmlview = xmlview_widgets.XmlView(xml_filename)
vbox.pack_start(self.xmlview, True, True, 5)
self.confirmer = confirm.ConfirmationBox()
vbox.pack_start(self.confirmer, False, True, 5)
self.actions = action.ActionView()
vbox.pack_start(self.actions, False, True, 5)
self.sections.set_confirmer(self.confirmer)
self.actions.set_confirmer(self.confirmer)
# transcript view
self.asr = asr.ASR(self.hyp_changed)
vbox.pack_start(self.asr, False, True, 5)
self.add(vbox)
self.show_all()
self.confirmer.hide()
# load css style
style_provider = Gtk.CssProvider()
style_provider.load_from_data(open('data/style.css', 'rb').read())
Gtk.StyleContext.add_provider_for_screen( Gdk.Screen.get_default(), style_provider, Gtk.STYLE_PROVIDER_PRIORITY_APPLICATION)
def hyp_changed(self, hyp):
hyp = ' '.join(hyp).replace('[noise]', ' ').split()
print 'ASR:', hyp
def quit(self, window):
Gtk.main_quit()
if __name__ == '__main__':
xml_filename = 'data/homeostasis_sept2014.xml'
if len(sys.argv) > 1:
xml_filename = sys.argv[1]
app = ScriptedASR(xml_filename)
Gtk.main()
from gi.repository import Gtk
class SectionManager(Gtk.HBox):
def __init__(self):
super(SectionManager, self).__init__()
self.set_name('SessionManager')
self.button_next = Gtk.Button('Next')
self.button_prev = Gtk.Button('Previous')
self.label = Gtk.Label()
self.label.get_style_context().add_class('current-section')
self.pack_start(self.button_prev, False, False, 5)
self.pack_start(self.label, True, True, 5)
self.pack_start(self.button_next, False, False, 5)
self.set_section(1)
self.confirmer = None
self.button_next.connect('clicked', lambda widget: self.confirmer.confirm('Go to next section?', 3, self.next_section))
self.button_prev.connect('clicked', lambda widget: self.confirmer.confirm('Go to previous section?', 3, self.previous_section))
def set_section(self, section):
self.section = section
self.label.set_text('Current section: %d' % self.section)
def get_view(self):
return self.hbox
def get_section(self):
return self.section
def next_section(self):
self.set_section(self.section + 1)
def previous_section(self):
self.set_section(self.section - 1)
def set_confirmer(self, confirmer):
self.confirmer = confirmer
from gi.repository import GObject, Gtk, Pango
from xml.etree import ElementTree as ET
class Section:
def __init__(self, name, start, end):
self.name = name
self.start = start
self.end = end
class Word:
def __init__(self, text, start, end):
self.text = text
self.start = start
self.end = end
class XmlView(Gtk.ScrolledWindow):
def __init__(self, filename):
super(XmlView, self).__init__()
self.sections = []
self.words = []
self.view = Gtk.TextView()
self.view.set_editable(False)
self.view.set_cursor_visible(False)
self.buffer = self.view.get_buffer()
self.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.ALWAYS)
self.add_with_viewport(self.view)
#self.inactive_section = self.buffer.create_tag("inactive_section", background="#ffffff")
self.active_section = self.buffer.create_tag("active_section", background="red")
self.section_title = self.buffer.create_tag("section_title", scale=2, weight=Pango.Weight.BOLD, justification=Gtk.Justification.CENTER)
self.subsection_title = self.buffer.create_tag("subsection_title", scale=1.5, weight=Pango.Weight.BOLD, justification=Gtk.Justification.CENTER)
self.parse_xml(filename)
self.last_section = None
self.show_section(0)
def get_view(self):
return self
def parse_sequence(self, sequence, name):
self.buffer.insert_with_tags(self.buffer.get_end_iter(), 'Sequence %s\n' % name, self.subsection_title)
text = str(sequence.text)
for node in sequence:
text += node.text
text += node.tail
for line in text.split('\n'):
line = line.strip()
if line != '':
self.buffer.insert_with_tags(self.buffer.get_end_iter(), ' %s\n' % line)
def parse_section(self, section):
name = section.get('id')
section_start = self.buffer.create_mark('section-start %s' % name, self.buffer.get_end_iter(), True)
self.buffer.insert_with_tags(self.buffer.get_end_iter(), 'Section %s\n' % section.get('id'), self.section_title)
num = 1
for sequence in section.findall('./sequence'):
self.parse_sequence(sequence, section.get('id') + '.' + str(num))
num += 1
self.sections.append(Section(name, section_start, self.buffer.create_mark('section-end %s' % name, self.buffer.get_end_iter(), True)))
def parse_xml(self, filename):
root = ET.parse(filename)
treestore = Gtk.TreeStore(str)
for section in root.findall(".//section"):
self.parse_section(section)
return treestore
def show_section(self, section):
if self.last_section != None:
self.buffer.remove_tag(self.active_section,
self.buffer.get_iter_at_mark(self.sections[self.last_section].start),
self.buffer.get_iter_at_mark(self.sections[self.last_section].end))
self.last_section = section
self.buffer.apply_tag(self.active_section,
self.buffer.get_iter_at_mark(self.sections[section].start),
self.buffer.get_iter_at_mark(self.sections[section].end))
from gi.repository import GObject, Gtk, Pango, Gdk
from xml.etree import ElementTree as ET
class Section(Gtk.VBox):
def __init__(self, section):
super(Section, self).__init__()
self.name = section.get('id')
self.get_style_context().add_class('section-body')
self.label = Gtk.Label('Section %s' % self.name)
self.label.get_style_context().add_class('section-title')
self.pack_start(self.label, True, True, 5)
self.sequences = []
#self.override_background_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1,1,1,1))
#self.label.override_background_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(.5,.5,.5,1))
#self.label.override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1,1,1,1))
#self.label.override_font(Pango.FontDescription("bold 18"))
num = 1
for sequence in section.findall('./sequence'):
self.sequences.append(Sequence(sequence, section.get('id') + '.' + str(num)))
self.pack_start(self.sequences[-1], True, True, 5)
num += 1
class Sequence(Gtk.VBox):
def __init__(self, sequence, name):
super(Sequence, self).__init__()
self.name = name
self.get_style_context().add_class('sequence-body')
self.label = Gtk.Label('Sequence %s' % name)
self.label.get_style_context().add_class('sequence-title')
self.pack_start(self.label, True, True, 5)
self.lines = []
text = str(sequence.text)
for node in sequence:
text += node.text
text += node.tail
for line in text.split('\n'):
line = line.strip()
if line != '':
self.lines.append(Line(line))
self.pack_start(self.lines[-1], True, True, 5)
class Line(Gtk.Label):
def __init__(self, text):
super(Line, self).__init__()
self.text = text
self.set_text(' ' + text)
self.set_halign(Gtk.Align.START)
self.get_style_context().add_class('text-line')
class Word:
def __init__(self, text, start, end):
self.text = text
self.start = start
self.end = end
class XmlView(Gtk.ScrolledWindow):
def __init__(self, filename):
super(XmlView, self).__init__()
self.sections = []
self.words = []
self.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.ALWAYS)
self.add_with_viewport(self.parse_xml(filename))
self.last_section = None
self.show_section(0)
def get_view(self):
return self
def parse_xml(self, filename):
self.sections = []
root = ET.parse(filename)
vbox = Gtk.VBox()
vbox.get_style_context().add_class('xmlview')
for section in root.findall(".//section"):
self.sections.append(Section(section))
vbox.pack_start(self.sections[-1], True, True, 5)
return vbox
def show_section(self, section):
pass
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment