From 393d5037e77d12c90dccbd513b501071e5e2448c Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Wed, 29 Apr 2020 15:07:12 +0200 Subject: [PATCH] Updated some machines for modular network --- UD_any/parser/machine.rm | 43 +++++++++++-------------------------- UD_any/tagger/machine.rm | 39 ++++++++------------------------- UD_any/tagmorpho/machine.rm | 19 ++++++++++++++++ UD_any/tokenizer/machine.rm | 41 ++++++++++------------------------- 4 files changed, 52 insertions(+), 90 deletions(-) create mode 100644 UD_any/tagmorpho/machine.rm diff --git a/UD_any/parser/machine.rm b/UD_any/parser/machine.rm index c5fb678..a6a12ac 100644 --- a/UD_any/parser/machine.rm +++ b/UD_any/parser/machine.rm @@ -1,36 +1,19 @@ Name : Parser Machine Classifier : parser { - Transitions : {parser,data/parser.ts} - Network type : LSTM - Unknown value threshold : 1 - Buffer context : {-3 -2 -1 0 1 2} - Stack context : {2 1 0} - Columns : {FORM UPOS} - Focused buffer : {-1 0} - Focused stack : {2 1 0} - Focused columns : {ID EOS FORM FEATS DEPREL} - Max nb elements : {1 1 10 10 1} - Raw input left window : -1 - Raw input right window : -1 - Embeddings size : 128 - MLP : {2048 0.3} - Context LSTM size : 512 - Focused LSTM size : 256 - Rawinput LSTM size : 32 - Split trans LSTM size : 256 - Num layers : 3 - BiLSTM : true - LSTM dropout : 0.1 - Total input dropout : 0.3 - Embeddings dropout : 0.3 - Dropout 2d : false - Tree embedding columns : {DEPREL} - Tree embedding buffer : {-1} - Tree embedding stack : {0 1 2} - Tree embedding nb : {6} - Tree embedding size : 128 - Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} + Transitions : {parser,data/parser.ts} + Network type : Modular + Context : Unk{1} Buffer{-3 -2 -1 0 1 2} Stack{2 1 0} Columns{FORM UPOS} GRU{1 1 0.0 1} In{128} Out{512} + Focused : Column{ID} NbElem{1} Buffer{0} Stack{0} GRU{1 1 0.0 1} In{128} Out{256} + Focused : Column{EOS} NbElem{1} Buffer{-1 0} Stack{} GRU{1 1 0.0 1} In{128} Out{256} + Focused : Column{FORM} NbElem{10} Buffer{-1 0} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{256} + Focused : Column{FEATS} NbElem{10} Buffer{-1 0} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{256} + Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{256} + DepthLayerTree : Columns{DEPREL} Buffer{-1} Stack{2 1 0} LayerSizes{6} GRU{1 1 0.0 1} In{128} Out{128} + InputDropout : 0.3 + MLP : {2048 0.3} + End + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Predictions : HEAD DEPREL EOS Strategy : sequential diff --git a/UD_any/tagger/machine.rm b/UD_any/tagger/machine.rm index cbfd0b7..29f91ee 100644 --- a/UD_any/tagger/machine.rm +++ b/UD_any/tagger/machine.rm @@ -1,36 +1,15 @@ Name : Tagger Machine Classifier : tagger { - Transitions : {tagger,data/tagger.ts} - Network type : LSTM - Unknown value threshold : 1 - Buffer context : {-3 -2 -1 0 1 2} - Stack context : {} - Columns : {FORM UPOS} - Focused buffer : {-1 0} - Focused stack : {} - Focused columns : {ID FORM} - Max nb elements : {1 10} - Raw input left window : -1 - Raw input right window : -1 - Embeddings size : 128 - MLP : {2048 0.3} - Context LSTM size : 512 - Focused LSTM size : 256 - Rawinput LSTM size : 32 - Split trans LSTM size : 256 - Num layers : 3 - BiLSTM : true - LSTM dropout : 0.1 - Total input dropout : 0.3 - Embeddings dropout : 0.3 - Dropout 2d : false - Tree embedding columns : {} - Tree embedding buffer : {} - Tree embedding stack : {} - Tree embedding nb : {} - Tree embedding size : 0 - Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} + Transitions : {tagger,data/tagger.ts} + Network type : Modular + Context : Unk{1} Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM UPOS} GRU{1 1 0.0 1} In{128} Out{512} + Focused : Column{ID} NbElem{1} Buffer{-1 0} Stack{} GRU{1 1 0.0 1} In{128} Out{256} + Focused : Column{FORM} NbElem{10} Buffer{-1 0} Stack{} GRU{1 1 0.0 1} In{128} Out{256} + InputDropout : 0.3 + MLP : {2048 0.3} + End + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Predictions : UPOS Strategy : sequential diff --git a/UD_any/tagmorpho/machine.rm b/UD_any/tagmorpho/machine.rm new file mode 100644 index 0000000..7e07d6f --- /dev/null +++ b/UD_any/tagmorpho/machine.rm @@ -0,0 +1,19 @@ +Name : Feats Machine +Classifier : morpho +{ + Transitions : {tagger,data/tagger.ts morpho,data/morpho_parts.ts} + Network type : Modular + Context : Unk{1} Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM UPOS} LSTM{1 1 0.0 1} In{128} Out{512} + Focused : Column{ID} NbElem{1} Buffer{-1 0} Stack{} LSTM{1 1 0.0 1} In{128} Out{256} + Focused : Column{FORM} NbElem{10} Buffer{-1 0} Stack{} LSTM{1 1 0.0 1} In{128} Out{256} + Focused : Column{FEATS} NbElem{10} Buffer{-1 0} Stack{} LSTM{1 1 0.0 1} In{128} Out{256} + InputDropout : 0.3 + MLP : {2048 0.3} + End + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} +} +Predictions : UPOS FEATS +Strategy : sequential + tagger morpho 1 + morpho morpho NOTHING 1 + morpho morpho 0 diff --git a/UD_any/tokenizer/machine.rm b/UD_any/tokenizer/machine.rm index bc622d2..1b26358 100644 --- a/UD_any/tokenizer/machine.rm +++ b/UD_any/tokenizer/machine.rm @@ -1,36 +1,17 @@ Name : Tokenizer Machine Classifier : tokenizer { - Transitions : {tokenizer,data/tokenizer.ts} - Network type : LSTM - Unknown value threshold : 1 - Buffer context : {-3 -2 -1} - Stack context : {} - Columns : {FORM} - Focused buffer : {-1 0} - Focused stack : {} - Focused columns : {ID FORM} - Max nb elements : {1 10} - Raw input left window : 5 - Raw input right window : 5 - Embeddings size : 128 - MLP : {2048 0.3} - Context LSTM size : 512 - Focused LSTM size : 256 - Rawinput LSTM size : 32 - Split trans LSTM size : 256 - Num layers : 3 - BiLSTM : true - LSTM dropout : 0.1 - Total input dropout : 0.3 - Embeddings dropout : 0.3 - Dropout 2d : false - Tree embedding columns : {} - Tree embedding buffer : {} - Tree embedding stack : {} - Tree embedding nb : {} - Tree embedding size : 0 - Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} + Transitions : {tokenizer,data/tokenizer.ts} + Network type : Modular + Context : Unk{1} Buffer{-3 -2 -1} Stack{} Columns{FORM} GRU{1 1 0.0 1} In{128} Out{512} + Focused : Column{ID} NbElem{1} Buffer{-1 0} Stack{} GRU{1 1 0.0 1} In{128} Out{256} + Focused : Column{FORM} NbElem{10} Buffer{-1 0} Stack{} GRU{1 1 0.0 1} In{128} Out{256} + RawInput : Left{5} Right{5} GRU{1 1 0.0 1} In{32} Out{32} + SplitTrans : GRU{1 1 0.0 1} In{128} Out{128} + InputDropout : 0.3 + MLP : {2048 0.3} + End + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Splitwords : data/splitwords.ts Predictions : ID FORM EOS -- GitLab