From c371b42359c25a7d47dc89324be171f7b4fae3e7 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 3 Jul 2020 17:32:16 +0200 Subject: [PATCH] updated machines --- UD_any/templates/tokeparser_base/machine.rm | 41 ++++++++++----------- UD_any/templates/tokeparser_incr/machine.rm | 40 +++++++++----------- UD_any/templates/tokeparser_seq/machine.rm | 41 ++++++++++----------- 3 files changed, 56 insertions(+), 66 deletions(-) diff --git a/UD_any/templates/tokeparser_base/machine.rm b/UD_any/templates/tokeparser_base/machine.rm index e4cd45b..3de9ff2 100644 --- a/UD_any/templates/tokeparser_base/machine.rm +++ b/UD_any/templates/tokeparser_base/machine.rm @@ -1,28 +1,23 @@ -Name : Tokenizer, Tagger, Morpho and Parser Machine +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine Classifier : tokeparser { - Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} - LossMultiplier : {segmenter,3.0} - Network type : Modular - StateName : Out{1024} - Context : Buffer{-3 -2 -1} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-3 -2 -1 0} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-1 0} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FEATS} NbElem{13} Buffer{-1 0} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - RawInput : Left{5} Right{5} LSTM{1 1 0.0 1} In{32} Out{32} + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} - End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} } Splitwords : data/splitwords.ts -Predictions : ID FORM UPOS FEATS HEAD DEPREL EOS +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS Strategy { Block : End{cannotMove} @@ -32,8 +27,10 @@ Strategy Block : End{cannotMove} tagger tagger * 1 Block : End{cannotMove} - morpho morpho NOTHING 1 - morpho morpho * 0 + morpho morpho * 1 + Block : End{cannotMove} + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case lemmatizer_rules * 1 Block : End{cannotMove} parser segmenter eager_SHIFT 0 parser segmenter eager_RIGHT_rel 0 diff --git a/UD_any/templates/tokeparser_incr/machine.rm b/UD_any/templates/tokeparser_incr/machine.rm index 0afaf01..051f605 100644 --- a/UD_any/templates/tokeparser_incr/machine.rm +++ b/UD_any/templates/tokeparser_incr/machine.rm @@ -1,28 +1,23 @@ -Name : Tokenizer, Tagger, Morpho and Parser Machine +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine Classifier : tokeparser { - Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} - LossMultiplier : {segmenter,3.0} - Network type : Modular - StateName : Out{1024} - Context : Buffer{-3 -2 -1} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-3 -2 -1 0} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-2 -1 0} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FEATS} NbElem{13} Buffer{-1 0} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - RawInput : Left{5} Right{5} LSTM{1 1 0.0 1} In{32} Out{32} + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} - End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} } Splitwords : data/splitwords.ts -Predictions : ID FORM UPOS FEATS HEAD DEPREL EOS +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS Strategy { Block : End{cannotMove} @@ -30,8 +25,9 @@ Strategy tokenizer tagger SPLIT 0 tokenizer tokenizer * 0 tagger morpho * 0 - morpho parser NOTHING 0 - morpho morpho * 0 + morpho lemmatizer_rules * 0 + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case parser * 0 parser segmenter eager_SHIFT 0 parser segmenter eager_RIGHT_rel 0 parser parser * 0 diff --git a/UD_any/templates/tokeparser_seq/machine.rm b/UD_any/templates/tokeparser_seq/machine.rm index 47fb4df..dd7708f 100644 --- a/UD_any/templates/tokeparser_seq/machine.rm +++ b/UD_any/templates/tokeparser_seq/machine.rm @@ -1,28 +1,23 @@ -Name : Tokenizer, Tagger, Morpho and Parser Machine +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine Classifier : tokeparser { - Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} - LossMultiplier : {segmenter,3.0} - Network type : Modular - StateName : Out{1024} - Context : Buffer{-3 -2 -1 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-3 -2 -1 0 1 2} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-1 0 1} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FEATS} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - RawInput : Left{5} Right{5} LSTM{1 1 0.0 1} In{32} Out{32} + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} - End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} } Splitwords : data/splitwords.ts -Predictions : ID FORM UPOS FEATS HEAD DEPREL EOS +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS Strategy { Block : End{cannotMove} @@ -32,8 +27,10 @@ Strategy Block : End{cannotMove} tagger tagger * 1 Block : End{cannotMove} - morpho morpho NOTHING 1 - morpho morpho * 0 + morpho morpho * 1 + Block : End{cannotMove} + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case lemmatizer_rules * 1 Block : End{cannotMove} parser segmenter eager_SHIFT 0 parser segmenter eager_RIGHT_rel 0 -- GitLab