Skip to content
Snippets Groups Projects
Commit 0cccbe51 authored by Franck Dary's avatar Franck Dary
Browse files

Working tokenizer for UD_any

parent 33d49eba
No related branches found
No related tags found
No related merge requests found
Name : Tokenizer Machine
Dicts : tokenizer.dicts
%CLASSIFIERS
strategy strategy.cla
tokenizer tokenizer.cla
signature signature.cla
%STATES
strategy strategy
tokenizer tokenizer
signature signature
%TRANSITIONS
strategy signature MOVE signature
strategy tokenizer MOVE tokenizer
signature strategy *
tokenizer strategy *
Name : Signature
Type : Information
Oracle : signature
Oracle Filename : ../../data/fP
Name : Strategy
Type : Information
Oracle : strategy_tokenizer
Oracle Filename : none
#Index Name ref/hyp dict Policy Must print?#
###################################################
0 ID hyp none Final 1
1 FORM hyp form Final 1
0 SGN hyp sgn Final 0
0 EOS hyp none Final 0
Name : Tokenizer
Type : Prediction
Oracle : tokenizer
Feature Model : tokenizer.fm
Action Set : ../../data/tokenizer.as
Topology : (500,RELU,0.3)
Dynamic : no
#########################################################################
Tokenizer_bool 02 Embeddings
Tokenizer_int 05 Embeddings
Tokenizer_letters 30 Embeddings
Tokenizer_form 30 Embeddings
Tokenizer_sgn 10 Embeddings
Tokenizer_actions 05 Embeddings
Tokenizer_entropy 05 Embeddings
# Features classiques
# FORM
b.0#FORM.fasttext
b.-1#FORM.fasttext
b.-2#FORM.fasttext
# SIGNATURES
b.-1#SGN
b.0#SGN
# UPPERCASE
b.0#FORM.U
# UPPERCASE
b.0#FORM.LEN
# SUFFIXES
b.0#FORM.PART.-4.-4
b.0#FORM.PART.-3.-3
b.0#FORM.PART.-2.-2
b.0#FORM.PART.-1.-1
b.0#FORM.PART.0.0
b.0#FORM.PART.1.1
b.0#FORM.PART.2.2
b.0#FORM.PART.3.3
# RAW INPUT
raw.-5
raw.-4
raw.-3
raw.-2
raw.-1
raw.0
raw.2
raw.3
raw.4
raw.5
raw.6
#Index Name ref/hyp dict Policy Must print?#
##################################################
0 ID hyp none FromZero 1
1 FORM hyp form FromZero 1
0 SGN hyp sgn FromZero 0
0 EOS hyp none FromZero 0
...@@ -39,10 +39,26 @@ fi ...@@ -39,10 +39,26 @@ fi
shift shift
shift shift
shift shift
RAWINPUT=""
params=( $* )
index=0
for var in "$@"
do
if [ "$var" = "--rawInput" ]
then
RAWINPUT="$var"
unset params[$((index))]
set -- "${params[@]}"
break
fi
index=$((index+1))
done
TRAIN=$(echo $UD_ROOT*$KEYWORD*/*train*\.conllu) TRAIN=$(echo $UD_ROOT*$KEYWORD*/*train*\.conllu)
DEV=$(echo $UD_ROOT*$KEYWORD*/*dev*\.conllu) DEV=$(echo $UD_ROOT*$KEYWORD*/*dev*\.conllu)
TEST=$(echo $UD_ROOT*$KEYWORD*/*test*\.conllu) TEST=$(echo $UD_ROOT*$KEYWORD*/*test*\.conllu)
TESTRAW=$(echo $UD_ROOT*$KEYWORD*/*test*\.txt)
if has_space "$TRAIN" || has_space "$DEV" || has_space "$TEST"; if has_space "$TRAIN" || has_space "$DEV" || has_space "$TEST";
then then
...@@ -76,8 +92,15 @@ macaon_train --tm machine.tm --bd train.bd --mcd $MCD -T $TRAIN --dev $DEV --exp ...@@ -76,8 +92,15 @@ macaon_train --tm machine.tm --bd train.bd --mcd $MCD -T $TRAIN --dev $DEV --exp
EXPPATH=$LANGPATH/bin/$EXPNAME EXPPATH=$LANGPATH/bin/$EXPNAME
TOOL=$LANGPATH/bin/maca_tm_$EXPNAME TOOL=$LANGPATH/bin/maca_tm_$EXPNAME
if [ -z "$RAWINPUT" ];
then
echo "Evaluation on file" $TEST ":" echo "Evaluation on file" $TEST ":"
$TOOL $TEST $MCD --interactive 0 > $EXPPATH/tmpOutTest.txt $TOOL $TEST $MCD --interactive 0 > $EXPPATH/tmpOutTest.txt
else
echo "Evaluation on file" $TEST ":"
$TOOL $TESTRAW $MCD --interactive 0 --rawInput > $EXPPATH/tmpOutTest.txt
fi
$ADDMISSINGCOLUMNS $EXPPATH/tmpOutTest.txt $MCD > $EXPPATH/tmpOutTest.conllu $ADDMISSINGCOLUMNS $EXPPATH/tmpOutTest.txt $MCD > $EXPPATH/tmpOutTest.conllu
$EVALCONLL $TEST $EXPPATH/tmpOutTest.conllu -v $EVALCONLL $TEST $EXPPATH/tmpOutTest.conllu -v
...@@ -76,7 +76,6 @@ def main() : ...@@ -76,7 +76,6 @@ def main() :
print("WARNING : Abiguity detected in \'%s\'"%(word+" "+str(rules[word])), file=sys.stderr) print("WARNING : Abiguity detected in \'%s\'"%(word+" "+str(rules[word])), file=sys.stderr)
for rule in rules[word] : for rule in rules[word] :
print(prefix+word+rule) print(prefix+word+rule)
break
if __name__ == "__main__" : if __name__ == "__main__" :
main() main()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment