From ecc1b393f0db116b5fd69ffe4927cb3a13abb3f6 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 15 Apr 2022 16:44:46 +0200 Subject: [PATCH] Added new machines templates --- UD_any/evaluate.sh | 30 ++++++++++--- UD_any/launchBatches.py | 6 +-- UD_any/templates/tagparser_incr/machine.rm | 20 ++++----- .../tagparser_incr_nopretrained/machine.rm | 33 ++++++++++++++ UD_any/templates/tagparser_seq/machine.rm | 21 ++++----- .../tagparser_seq_nopretrained/machine.rm | 36 ++++++++++++++++ UD_any/templates/tokeparser_incr/machine.rm | 2 +- .../tokeparser_incr_nopretrained/machine.rm | 39 +++++++++++++++++ UD_any/templates/tokeparser_seq/machine.rm | 2 +- .../tokeparser_seq_nopretrained/machine.rm | 43 +++++++++++++++++++ 10 files changed, 200 insertions(+), 32 deletions(-) create mode 100644 UD_any/templates/tagparser_incr_nopretrained/machine.rm create mode 100644 UD_any/templates/tagparser_seq_nopretrained/machine.rm create mode 100644 UD_any/templates/tokeparser_incr_nopretrained/machine.rm create mode 100644 UD_any/templates/tokeparser_seq_nopretrained/machine.rm diff --git a/UD_any/evaluate.sh b/UD_any/evaluate.sh index 8b8bdda..7f39096 100755 --- a/UD_any/evaluate.sh +++ b/UD_any/evaluate.sh @@ -51,18 +51,34 @@ then print_usage_and_exit fi +MCD="ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL" +NO="" +for arg in "$@" +do + if [ "$NO" = "1" ] + then + MCD="$arg" + NO="" + fi + if [ "$arg" = "--mcd" ] + then + NO="1" + fi +done + EVALCONLL="../scripts/conll18_ud_eval.py" OUTPUT=$EXPPATH"/predicted_eval.tsv" -if [ "$MODE" = "tsv" ]; then -macaon decode --model $EXPPATH --inputTSV $REF $@ > $OUTPUT && $EVALCONLL $REF $OUTPUT || exit 1 -exit 0 +INPUT="$REF" +INPUTARG="--inputTSV" +if [ "$MODE" = "txt" ]; then + INPUT="$REFRAW" + INPUTARG="--inputTXT" fi -if [ "$MODE" = "txt" ]; then -macaon decode --model $EXPPATH --inputTXT $REFRAW $@ > $OUTPUT && $EVALCONLL $REF $OUTPUT || exit 1 -exit 0 +if [ ! -f "$OUTPUT" ]; then + macaon decode --model $EXPPATH $INPUTARG $INPUT $@ > $OUTPUT || exit 1 fi -print_usage_and_exit +$EVALCONLL --mcd $MCD $REF $OUTPUT || exit 1 diff --git a/UD_any/launchBatches.py b/UD_any/launchBatches.py index 3dd7ebb..157d5d7 100755 --- a/UD_any/launchBatches.py +++ b/UD_any/launchBatches.py @@ -26,10 +26,10 @@ def addNamesAndCommandsTrain(names, commands, mode, expName, arguments, seed, pr ############################################################################### ############################################################################### -def addNamesAndCommandsDecode(names, commands, mode, expName, arguments, pretrained) : +def addNamesAndCommandsDecode(names, commands, mode, expName, arguments) : names.append(expName) - commands.append("./evaluate.sh {} bin/{} pretrained={} --silent {}".format(mode, expName, pretrained,arguments)) + commands.append("./evaluate.sh {} bin/{} --silent {}".format(mode, expName, arguments)) ############################################################################### ############################################################################### @@ -64,7 +64,7 @@ if __name__ == "__main__" : prepareExperiment(xp['lang'],xp['template'],xp['expName']) addNamesAndCommandsTrain(names, commands, xp['mode'],xp['expName'],xp['arguments'],seed=100+i, pretrained=pretrained) else : - addNamesAndCommandsDecode(names, commands, xp['mode'],xp['expName'],xp['evalArguments'], pretrained=pretrained) + addNamesAndCommandsDecode(names, commands, xp['mode'],xp['expName'],xp['evalArguments']) launchSlurmArray(names, commands, name, device, nbHours, limit, nbCPU) ############################################################################### diff --git a/UD_any/templates/tagparser_incr/machine.rm b/UD_any/templates/tagparser_incr/machine.rm index a4e542a..14406ad 100644 --- a/UD_any/templates/tagparser_incr/machine.rm +++ b/UD_any/templates/tagparser_incr/machine.rm @@ -1,31 +1,31 @@ Name : Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine Classifier : tagparser { - Transitions : {tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + Transitions : {tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} LossMultiplier : {} Network type : Modular - Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} - Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} LSTM{1 1 0.0 1} In{300} Out{128} w2v{FORM,data/W2V/fasttext.w2v} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{128} Out{64} w2v{} Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} - History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} - InputDropout : 0.3 - MLP : {1600 0.3 1600 0.3} + History : NbElem{10} LSTM{1 1 0 1} In{128} Out{64} + HistoryMine : NbElem{4} LSTM{1 1 0 1} In{128} Out{64} + StateName : Out{64} + Distance : FromBuffer{} FromStack{0 1 2} ToBuffer{0} ToStack{} Threshold{15} LSTM{1 1 0.0 1} In{128} Out{64} + InputDropout : 0.5 + MLP : {3200 0.4 1600 0.4} End Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} Type : classification Loss : crossentropy } -Splitwords : data/splitwords.ts Predictions : UPOS FEATS LEMMA HEAD DEPREL EOS Strategy { Block : End{cannotMove} tagger morpho * 0 morpho lemmatizer_rules * 0 - lemmatizer_rules lemmatizer_case * 0 - lemmatizer_case parser * 0 + lemmatizer_rules parser * 0 parser segmenter eager_SHIFT 0 parser segmenter eager_RIGHT_rel 0 parser parser * 0 diff --git a/UD_any/templates/tagparser_incr_nopretrained/machine.rm b/UD_any/templates/tagparser_incr_nopretrained/machine.rm new file mode 100644 index 0000000..3b513b4 --- /dev/null +++ b/UD_any/templates/tagparser_incr_nopretrained/machine.rm @@ -0,0 +1,33 @@ +Name : Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine +Classifier : tagparser +{ + Transitions : {tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} LSTM{1 1 0.0 1} In{128} Out{128} w2v{FORM,data/FORM.w2v} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{128} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + History : NbElem{10} LSTM{1 1 0 1} In{128} Out{64} + HistoryMine : NbElem{4} LSTM{1 1 0 1} In{128} Out{64} + StateName : Out{64} + Distance : FromBuffer{} FromStack{0 1 2} ToBuffer{0} ToStack{} Threshold{15} LSTM{1 1 0.0 1} In{128} Out{64} + InputDropout : 0.5 + MLP : {3200 0.4 1600 0.4} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Predictions : UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tagger morpho * 0 + morpho lemmatizer_rules * 0 + lemmatizer_rules parser * 0 + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter tagger * 1 +} diff --git a/UD_any/templates/tagparser_seq/machine.rm b/UD_any/templates/tagparser_seq/machine.rm index 80d2729..a605f22 100644 --- a/UD_any/templates/tagparser_seq/machine.rm +++ b/UD_any/templates/tagparser_seq/machine.rm @@ -1,22 +1,24 @@ Name : Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine Classifier : tagparser { - Transitions : {tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} - LossMultiplier : {} + Transitions : {tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} Network type : Modular - Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} - Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} LSTM{1 1 0.0 1} In{300} Out{128} w2v{FORM,data/W2V/fasttext.w2v} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{128} Out{64} w2v{} Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} - History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} - InputDropout : 0.3 - MLP : {1600 0.3 1600 0.3} + History : NbElem{10} LSTM{1 1 0 1} In{128} Out{64} + HistoryMine : NbElem{4} LSTM{1 1 0 1} In{128} Out{64} + StateName : Out{64} + Distance : FromBuffer{} FromStack{0 1 2} ToBuffer{0} ToStack{} Threshold{15} LSTM{1 1 0.0 1} In{128} Out{64} + InputDropout : 0.5 + MLP : {3200 0.4 1600 0.4} End Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} Type : classification Loss : crossentropy } -Splitwords : data/splitwords.ts Predictions : UPOS FEATS LEMMA HEAD DEPREL EOS Strategy { @@ -25,8 +27,7 @@ Strategy Block : End{cannotMove} morpho morpho * 1 Block : End{cannotMove} - lemmatizer_rules lemmatizer_case * 0 - lemmatizer_case lemmatizer_rules * 1 + lemmatizer_rules lemmatizer_rules * 1 Block : End{cannotMove} parser segmenter eager_SHIFT 0 parser segmenter eager_RIGHT_rel 0 diff --git a/UD_any/templates/tagparser_seq_nopretrained/machine.rm b/UD_any/templates/tagparser_seq_nopretrained/machine.rm new file mode 100644 index 0000000..0691aa7 --- /dev/null +++ b/UD_any/templates/tagparser_seq_nopretrained/machine.rm @@ -0,0 +1,36 @@ +Name : Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine +Classifier : tagparser +{ + Transitions : {tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} LSTM{1 1 0.0 1} In{128} Out{128} w2v{FORM,data/FORM.w2v} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{128} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + History : NbElem{10} LSTM{1 1 0 1} In{128} Out{64} + HistoryMine : NbElem{4} LSTM{1 1 0 1} In{128} Out{64} + StateName : Out{64} + Distance : FromBuffer{} FromStack{0 1 2} ToBuffer{0} ToStack{} Threshold{15} LSTM{1 1 0.0 1} In{128} Out{64} + InputDropout : 0.5 + MLP : {3200 0.4 1600 0.4} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Predictions : UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tagger tagger * 1 + Block : End{cannotMove} + morpho morpho * 1 + Block : End{cannotMove} + lemmatizer_rules lemmatizer_rules * 1 + Block : End{cannotMove} + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter parser * 1 +} diff --git a/UD_any/templates/tokeparser_incr/machine.rm b/UD_any/templates/tokeparser_incr/machine.rm index ad72b25..f7ea26a 100644 --- a/UD_any/templates/tokeparser_incr/machine.rm +++ b/UD_any/templates/tokeparser_incr/machine.rm @@ -4,7 +4,7 @@ Classifier : tokeparser Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} LossMultiplier : {} Network type : Modular - Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} Concat{1 1 0.0 1} In{300} Out{64} w2v{FORM,data/W2V/fasttextcleanfiltered.w2v} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} LSTM{1 1 0.0 1} In{300} Out{128} w2v{FORM,data/W2V/fasttext.w2v} Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{128} Out{64} w2v{} Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} diff --git a/UD_any/templates/tokeparser_incr_nopretrained/machine.rm b/UD_any/templates/tokeparser_incr_nopretrained/machine.rm new file mode 100644 index 0000000..a4a37fc --- /dev/null +++ b/UD_any/templates/tokeparser_incr_nopretrained/machine.rm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine +Classifier : tokeparser +{ + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} LSTM{1 1 0.0 1} In{128} Out{128} w2v{FORM,data/FORM.w2v} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{128} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{128} Out{64} + HistoryMine : NbElem{4} LSTM{1 1 0 1} In{128} Out{64} + StateName : Out{64} + Distance : FromBuffer{} FromStack{0 1 2} ToBuffer{0} ToStack{} Threshold{15} LSTM{1 1 0.0 1} In{128} Out{64} + SplitTrans : LSTM{1 1 0.0 1} In{128} Out{64} + InputDropout : 0.5 + MLP : {3200 0.4 1600 0.4} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Splitwords : data/splitwords.ts +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tokenizer tagger ENDWORD 0 + tokenizer tagger SPLIT 0 + tokenizer tokenizer * 0 + tagger morpho * 0 + morpho lemmatizer_rules * 0 + lemmatizer_rules parser * 0 + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter tokenizer * 1 +} diff --git a/UD_any/templates/tokeparser_seq/machine.rm b/UD_any/templates/tokeparser_seq/machine.rm index f63c06a..d3e8010 100644 --- a/UD_any/templates/tokeparser_seq/machine.rm +++ b/UD_any/templates/tokeparser_seq/machine.rm @@ -4,7 +4,7 @@ Classifier : tokeparser Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} LossMultiplier : {} Network type : Modular - Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} Concat{1 1 0.0 1} In{300} Out{64} w2v{FORM,data/W2V/fasttextcleanfiltered.w2v} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} LSTM{1 1 0.0 1} In{300} Out{128} w2v{FORM,data/W2V/fasttext.w2v} Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{128} Out{64} w2v{} Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} diff --git a/UD_any/templates/tokeparser_seq_nopretrained/machine.rm b/UD_any/templates/tokeparser_seq_nopretrained/machine.rm new file mode 100644 index 0000000..eee655b --- /dev/null +++ b/UD_any/templates/tokeparser_seq_nopretrained/machine.rm @@ -0,0 +1,43 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine +Classifier : tokeparser +{ + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{FORM} LSTM{1 1 0.0 1} In{128} Out{128} w2v{FORM,data/FORM.w2v} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{128} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{128} Out{64} + HistoryMine : NbElem{4} LSTM{1 1 0 1} In{128} Out{64} + StateName : Out{64} + Distance : FromBuffer{} FromStack{0 1 2} ToBuffer{0} ToStack{} Threshold{15} LSTM{1 1 0.0 1} In{128} Out{64} + SplitTrans : LSTM{1 1 0.0 1} In{128} Out{64} + InputDropout : 0.5 + MLP : {3200 0.4 1600 0.4} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Splitwords : data/splitwords.ts +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tokenizer tokenizer ENDWORD 1 + tokenizer tokenizer SPLIT 1 + tokenizer tokenizer * 0 + Block : End{cannotMove} + tagger tagger * 1 + Block : End{cannotMove} + morpho morpho * 1 + Block : End{cannotMove} + lemmatizer_rules lemmatizer_rules * 1 + Block : End{cannotMove} + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter parser * 1 +} -- GitLab