From 3008ab4a353d6547bd17a3dc4a05dcc2e4d623ee Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Sat, 4 Jul 2020 16:33:17 +0200 Subject: [PATCH] Added machiens --- .../templates/tokeparser_base_two/machine.rm | 52 +++++++++++++++++++ .../templates/tokeparser_incr_two/machine.rm | 48 +++++++++++++++++ .../templates/tokeparser_seq_two/machine.rm | 52 +++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 UD_any/templates/tokeparser_base_two/machine.rm create mode 100644 UD_any/templates/tokeparser_incr_two/machine.rm create mode 100644 UD_any/templates/tokeparser_seq_two/machine.rm diff --git a/UD_any/templates/tokeparser_base_two/machine.rm b/UD_any/templates/tokeparser_base_two/machine.rm new file mode 100644 index 0000000..7468c88 --- /dev/null +++ b/UD_any/templates/tokeparser_base_two/machine.rm @@ -0,0 +1,52 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine +Classifier : tokelemmatizer +{ + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} +} +Classifier : parser +{ + Transitions : {parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} +} +Splitwords : data/splitwords.ts +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tokenizer tokenizer ENDWORD 1 + tokenizer tokenizer SPLIT 1 + tokenizer tokenizer * 0 + Block : End{cannotMove} + tagger tagger * 1 + Block : End{cannotMove} + morpho morpho * 1 + Block : End{cannotMove} + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case lemmatizer_rules * 1 + Block : End{cannotMove} + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter parser * 1 +} diff --git a/UD_any/templates/tokeparser_incr_two/machine.rm b/UD_any/templates/tokeparser_incr_two/machine.rm new file mode 100644 index 0000000..cfc2e58 --- /dev/null +++ b/UD_any/templates/tokeparser_incr_two/machine.rm @@ -0,0 +1,48 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine +Classifier : tokelemmatizer +{ + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} +} +Classifier : parser +{ + Transitions : {parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} +} +Splitwords : data/splitwords.ts +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tokenizer tagger ENDWORD 0 + tokenizer tagger SPLIT 0 + tokenizer tokenizer * 0 + tagger morpho * 0 + morpho lemmatizer_rules * 0 + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case parser * 0 + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter tokenizer * 1 +} diff --git a/UD_any/templates/tokeparser_seq_two/machine.rm b/UD_any/templates/tokeparser_seq_two/machine.rm new file mode 100644 index 0000000..3ab1eb2 --- /dev/null +++ b/UD_any/templates/tokeparser_seq_two/machine.rm @@ -0,0 +1,52 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer, Parser and Segmenter Machine +Classifier : tokelemmatizer +{ + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_whole.ts lemmatizer_rules,data/lemmatizer_rules.ts lemmatizer_case,data/lemmatizer_case.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} +} +Classifier : parser +{ + Transitions : {parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} + Context : Targets{b.-3 b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.2.0 s.2.-1} Columns{EOS ID UPOS FEATS DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} +} +Splitwords : data/splitwords.ts +Predictions : ID FORM UPOS FEATS LEMMA HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tokenizer tokenizer ENDWORD 1 + tokenizer tokenizer SPLIT 1 + tokenizer tokenizer * 0 + Block : End{cannotMove} + tagger tagger * 1 + Block : End{cannotMove} + morpho morpho * 1 + Block : End{cannotMove} + lemmatizer_rules lemmatizer_case * 0 + lemmatizer_case lemmatizer_rules * 1 + Block : End{cannotMove} + parser segmenter eager_SHIFT 0 + parser segmenter eager_RIGHT_rel 0 + parser parser * 0 + segmenter parser * 1 +} -- GitLab