diff --git a/UD_any/tokeparser_base/machine.rm b/UD_any/tokeparser_base/machine.rm index 9560d0bf5cdf163a26207f29cf89dffc0572eced..39ec5b5d0b3bf47acd3e0ad88f659447adda6b01 100644 --- a/UD_any/tokeparser_base/machine.rm +++ b/UD_any/tokeparser_base/machine.rm @@ -1,36 +1,22 @@ Name : Tokenizer, Tagger, Morpho and Parser Machine Classifier : tokeparser { - Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts} - Network type : LSTM - Unknown value threshold : 1 - Buffer context : {-3 -2 -1} - Stack context : {2 1 0} - Columns : {FORM UPOS} - Focused buffer : {-1 0} - Focused stack : {2 1 0} - Focused columns : {ID EOS FORM FEATS DEPREL} - Max nb elements : {1 1 10 10 1} - Raw input left window : 5 - Raw input right window : 5 - Embeddings size : 128 - MLP : {2048 0.3} - Context LSTM size : 512 - Focused LSTM size : 256 - Rawinput LSTM size : 32 - Split trans LSTM size : 256 - Num layers : 3 - BiLSTM : true - LSTM dropout : 0.1 - Total input dropout : 0.3 - Embeddings dropout : 0.3 - Dropout 2d : false - Tree embedding columns : {DEPREL} - Tree embedding buffer : {-1} - Tree embedding stack : {0 1 2} - Tree embedding nb : {6} - Tree embedding size : 128 - Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts} + Network type : Modular + StateName : States{tokenizer tagger morpho parser} Out{128} + Context : Buffer{-3 -2 -1} Stack{2 1 0} Columns{FORM UPOS} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{ID} NbElem{1} Buffer{0} Stack{0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{FORM} NbElem{10} Buffer{-1 0} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{FEATS} NbElem{10} Buffer{-1 0} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + RawInput : Left{5} Right{5} GRU{1 1 0.0 1} In{64} Out{64} + SplitTrans : GRU{1 1 0.0 1} In{128} Out{128} + DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} GRU{1 1 0.0 1} In{128} Out{128} + InputDropout : 0.4 + MLP : {2048 0.3 512 0.1} + End + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Splitwords : data/splitwords.ts Predictions : ID FORM UPOS FEATS HEAD DEPREL EOS diff --git a/UD_any/tokeparser_incr/machine.rm b/UD_any/tokeparser_incr/machine.rm index fa36bfe1ea8359dade6ddf473483c5dcaf4f365f..4405538c6b8432f34fbd8aaa0d213715ef1f7383 100644 --- a/UD_any/tokeparser_incr/machine.rm +++ b/UD_any/tokeparser_incr/machine.rm @@ -1,36 +1,22 @@ Name : Tokenizer, Tagger, Morpho and Parser Machine Classifier : tokeparser { - Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts} - Network type : LSTM - Unknown value threshold : 1 - Buffer context : {-3 -2 -1} - Stack context : {2 1 0} - Columns : {FORM UPOS} - Focused buffer : {-1 0} - Focused stack : {2 1 0} - Focused columns : {ID EOS FORM FEATS DEPREL} - Max nb elements : {1 1 10 10 1} - Raw input left window : 5 - Raw input right window : 5 - Embeddings size : 128 - MLP : {2048 0.3} - Context LSTM size : 512 - Focused LSTM size : 256 - Rawinput LSTM size : 32 - Split trans LSTM size : 256 - Num layers : 3 - BiLSTM : true - LSTM dropout : 0.1 - Total input dropout : 0.3 - Embeddings dropout : 0.3 - Dropout 2d : false - Tree embedding columns : {DEPREL} - Tree embedding buffer : {-1} - Tree embedding stack : {0 1 2} - Tree embedding nb : {6} - Tree embedding size : 128 - Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts} + Network type : Modular + StateName : States{tokenizer tagger morpho parser} Out{128} + Context : Buffer{-3 -2 -1} Stack{2 1 0} Columns{FORM UPOS} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{ID} NbElem{1} Buffer{0} Stack{0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{FORM} NbElem{10} Buffer{-1 0} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{FEATS} NbElem{10} Buffer{-1 0} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + RawInput : Left{5} Right{5} GRU{1 1 0.0 1} In{64} Out{64} + SplitTrans : GRU{1 1 0.0 1} In{128} Out{128} + DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} GRU{1 1 0.0 1} In{128} Out{128} + InputDropout : 0.4 + MLP : {2048 0.3 512 0.1} + End + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Splitwords : data/splitwords.ts Predictions : ID FORM UPOS FEATS HEAD DEPREL EOS diff --git a/UD_any/tokeparser_seq/machine.rm b/UD_any/tokeparser_seq/machine.rm index a79fb52144488c19ebfd66840e90261bfc17a237..a7a9bea134856be7ba3e285eeae50dc779b32a98 100644 --- a/UD_any/tokeparser_seq/machine.rm +++ b/UD_any/tokeparser_seq/machine.rm @@ -1,36 +1,22 @@ Name : Tokenizer, Tagger, Morpho and Parser Machine Classifier : tokeparser { - Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts} - Network type : LSTM - Unknown value threshold : 1 - Buffer context : {-3 -2 -1 1 2 3} - Stack context : {2 1 0} - Columns : {FORM UPOS} - Focused buffer : {-1 0 1 2} - Focused stack : {2 1 0} - Focused columns : {ID EOS FORM FEATS DEPREL} - Max nb elements : {1 1 10 10 1} - Raw input left window : 5 - Raw input right window : 5 - Embeddings size : 128 - MLP : {2048 0.3} - Context LSTM size : 512 - Focused LSTM size : 256 - Rawinput LSTM size : 32 - Split trans LSTM size : 256 - Num layers : 3 - BiLSTM : true - LSTM dropout : 0.1 - Total input dropout : 0.3 - Embeddings dropout : 0.3 - Dropout 2d : false - Tree embedding columns : {DEPREL} - Tree embedding buffer : {-1} - Tree embedding stack : {0 1 2} - Tree embedding nb : {6} - Tree embedding size : 128 - Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} + Transitions : {tokenizer,data/tokenizer.ts tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts} + Network type : Modular + StateName : States{tokenizer tagger morpho parser} Out{128} + Context : Buffer{-3 -2 -1 1 2} Stack{2 1 0} Columns{FORM UPOS} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{ID} NbElem{1} Buffer{0} Stack{0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{FORM} NbElem{10} Buffer{-1 0 1} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{FEATS} NbElem{10} Buffer{-1 0 1} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} GRU{1 1 0.0 1} In{128} Out{128} + RawInput : Left{5} Right{5} GRU{1 1 0.0 1} In{64} Out{64} + SplitTrans : GRU{1 1 0.0 1} In{128} Out{128} + DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} GRU{1 1 0.0 1} In{128} Out{128} + InputDropout : 0.4 + MLP : {2048 0.3 512 0.1} + End + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Splitwords : data/splitwords.ts Predictions : ID FORM UPOS FEATS HEAD DEPREL EOS