From 225428d180f8ac93dc10c5b13934e2d5c8a7a888 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Thu, 10 Dec 2020 14:22:09 +0100 Subject: [PATCH] Added tokeparser machines with one classifier per task --- .../results_tokeparser_multi_10_12_20.txt | 65 +++++++++ .../tokeparser_base_multi/machine.rm | 127 +++++++++++++++++ .../tokeparser_incr_multi/machine.rm | 127 +++++++++++++++++ .../templates/tokeparser_seq_multi/machine.rm | 131 ++++++++++++++++++ 4 files changed, 450 insertions(+) create mode 100644 UD_any/results/results_tokeparser_multi_10_12_20.txt create mode 100644 UD_any/templates/tokeparser_base_multi/machine.rm create mode 100644 UD_any/templates/tokeparser_incr_multi/machine.rm create mode 100644 UD_any/templates/tokeparser_seq_multi/machine.rm diff --git a/UD_any/results/results_tokeparser_multi_10_12_20.txt b/UD_any/results/results_tokeparser_multi_10_12_20.txt new file mode 100644 index 0000000..8cce207 --- /dev/null +++ b/UD_any/results/results_tokeparser_multi_10_12_20.txt @@ -0,0 +1,65 @@ +Corpus Metric F1.score Model +-------------------------------------------------------------------------------- +UD_English-EWT LAS 73.62[±0.06]% tokeparser_incr_multi +UD_English-EWT LAS 74.01% tokeparser_base_multi +UD_English-EWT LAS 76.35[±0.11]% tokeparser_seq_multi + +UD_English-EWT Lemmas 96.97% tokeparser_seq_multi +UD_English-EWT Lemmas 97.03[±0.06]% tokeparser_incr_multi +UD_English-EWT Lemmas 97.03[±0.10]% tokeparser_base_multi + +UD_English-EWT Sentences 69.57[±1.01]% tokeparser_seq_multi +UD_English-EWT Sentences 73.29[±0.41]% tokeparser_incr_multi +UD_English-EWT Sentences 73.94[±0.69]% tokeparser_base_multi + +UD_English-EWT Tokens 98.43[±0.01]% tokeparser_seq_multi +UD_English-EWT Tokens 98.44[±0.02]% tokeparser_incr_multi +UD_English-EWT Tokens 98.47[±0.09]% tokeparser_base_multi + +UD_English-EWT UAS 76.93[±0.09]% tokeparser_incr_multi +UD_English-EWT UAS 77.34[±0.00]% tokeparser_base_multi +UD_English-EWT UAS 79.66[±0.10]% tokeparser_seq_multi + +UD_English-EWT UFeats 95.41[±0.04]% tokeparser_base_multi +UD_English-EWT UFeats 95.41[±0.10]% tokeparser_incr_multi +UD_English-EWT UFeats 95.52[±0.04]% tokeparser_seq_multi + +UD_English-EWT UPOS 93.92[±0.12]% tokeparser_base_multi +UD_English-EWT UPOS 93.97[±0.16]% tokeparser_incr_multi +UD_English-EWT UPOS 94.32[±0.01]% tokeparser_seq_multi + +UD_English-EWT Words 98.43[±0.01]% tokeparser_seq_multi +UD_English-EWT Words 98.44[±0.02]% tokeparser_incr_multi +UD_English-EWT Words 98.47[±0.09]% tokeparser_base_multi +-------------------------------------------------------------------------------- +UD_French-GSD LAS 83.59[±0.34]% tokeparser_incr_multi +UD_French-GSD LAS 84.29[±0.10]% tokeparser_base_multi +UD_French-GSD LAS 85.34[±0.10]% tokeparser_seq_multi + +UD_French-GSD Lemmas 97.61% tokeparser_seq_multi +UD_French-GSD Lemmas 97.61[±0.06]% tokeparser_incr_multi +UD_French-GSD Lemmas 97.62[±0.00]% tokeparser_base_multi + +UD_French-GSD Sentences 91.17[±1.03]% tokeparser_base_multi +UD_French-GSD Sentences 92.41[±0.52]% tokeparser_seq_multi +UD_French-GSD Sentences 92.84[±0.48]% tokeparser_incr_multi + +UD_French-GSD Tokens 99.52[±0.00]% tokeparser_seq_multi +UD_French-GSD Tokens 99.52[±0.04]% tokeparser_base_multi +UD_French-GSD Tokens 99.54[±0.06]% tokeparser_incr_multi + +UD_French-GSD UAS 86.44[±0.34]% tokeparser_incr_multi +UD_French-GSD UAS 87.00[±0.01]% tokeparser_base_multi +UD_French-GSD UAS 87.90[±0.04]% tokeparser_seq_multi + +UD_French-GSD UFeats 95.47[±0.12]% tokeparser_incr_multi +UD_French-GSD UFeats 95.56[±0.01]% tokeparser_base_multi +UD_French-GSD UFeats 95.82[±0.03]% tokeparser_seq_multi + +UD_French-GSD UPOS 96.81[±0.06]% tokeparser_base_multi +UD_French-GSD UPOS 96.84[±0.02]% tokeparser_incr_multi +UD_French-GSD UPOS 96.95[±0.03]% tokeparser_seq_multi + +UD_French-GSD Words 99.25[±0.02]% tokeparser_seq_multi +UD_French-GSD Words 99.25[±0.04]% tokeparser_base_multi +UD_French-GSD Words 99.30[±0.08]% tokeparser_incr_multi diff --git a/UD_any/templates/tokeparser_base_multi/machine.rm b/UD_any/templates/tokeparser_base_multi/machine.rm new file mode 100644 index 0000000..142c979 --- /dev/null +++ b/UD_any/templates/tokeparser_base_multi/machine.rm @@ -0,0 +1,127 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine with one classifier for each state +Classifier : tokenizer +{ + Transitions : {tokenizer,data/tokenizer.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0} + Context : Targets{b.-3 b.-2 b.-1} Columns{ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : tagger +{ + Transitions : {tagger,data/tagger.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{ID UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : morpho +{ + Transitions : {morpho,data/morpho_whole.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{ID UPOS FEATS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : lemmatizer +{ + Transitions : {lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{ID UPOS FEATS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : parser +{ + Transitions : {parser,data/parser_eager_rel_strict.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : segmenter +{ + Transitions : {segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Splitwords : data/splitwords.ts +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tokenizer tagger ENDWORD 0 + tokenizer tagger SPLIT 0 + tokenizer tokenizer * 0 + tagger morpho * 0 + morpho lemmatizer_rules * 0 + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case parser * 0 + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter tokenizer * 1 +} diff --git a/UD_any/templates/tokeparser_incr_multi/machine.rm b/UD_any/templates/tokeparser_incr_multi/machine.rm new file mode 100644 index 0000000..2f5f928 --- /dev/null +++ b/UD_any/templates/tokeparser_incr_multi/machine.rm @@ -0,0 +1,127 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine with one classifier for each state +Classifier : tokenizer +{ + Transitions : {tokenizer,data/tokenizer.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : tagger +{ + Transitions : {tagger,data/tagger.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : morpho +{ + Transitions : {morpho,data/morpho_whole.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : lemmatizer +{ + Transitions : {lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : parser +{ + Transitions : {parser,data/parser_eager_rel_strict.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : segmenter +{ + Transitions : {segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Splitwords : data/splitwords.ts +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tokenizer tagger ENDWORD 0 + tokenizer tagger SPLIT 0 + tokenizer tokenizer * 0 + tagger morpho * 0 + morpho lemmatizer_rules * 0 + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case parser * 0 + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter tokenizer * 1 +} diff --git a/UD_any/templates/tokeparser_seq_multi/machine.rm b/UD_any/templates/tokeparser_seq_multi/machine.rm new file mode 100644 index 0000000..13f6e47 --- /dev/null +++ b/UD_any/templates/tokeparser_seq_multi/machine.rm @@ -0,0 +1,131 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine with one classifier for each state +Classifier : tokenizer +{ + Transitions : {tokenizer,data/tokenizer.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0} + Context : Targets{b.-3 b.-2 b.-1} Columns{ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : tagger +{ + Transitions : {tagger,data/tagger.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2} Columns{ID UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : morpho +{ + Transitions : {morpho,data/morpho_whole.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2} Columns{ID UPOS FEATS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : lemmatizer +{ + Transitions : {lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2} Columns{ID UPOS FEATS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : parser +{ + Transitions : {parser,data/parser_eager_rel_strict.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Classifier : segmenter +{ + Transitions : {segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy +} +Splitwords : data/splitwords.ts +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tokenizer tokenizer ENDWORD 1 + tokenizer tokenizer SPLIT 1 + tokenizer tokenizer * 0 + Block : End{cannotMove} + tagger tagger * 1 + Block : End{cannotMove} + morpho morpho * 1 + Block : End{cannotMove} + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case lemmatizer_rules * 1 + Block : End{cannotMove} + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter parser * 1 +} -- GitLab