diff --git a/UD_any/morpho/machine.rm b/UD_any/morpho/machine.rm index 5ff55ba71ed460a06373763423f6cf4bd1f34052..eeaeaa5fe3fdb4ed2235fff8a492e1b45c3f2e60 100644 --- a/UD_any/morpho/machine.rm +++ b/UD_any/morpho/machine.rm @@ -1,9 +1,9 @@ Name : Feats Machine Classifier : morpho { - Transitions : {data/morpho_parts.ts} + Transitions : {morpho,data/morpho_parts.ts} Network type : LSTM - Unknown value threshold : -1 + Unknown value threshold : 1 Buffer context : {-3 -2 -1 0 1 2} Stack context : {} Columns : {FORM UPOS} @@ -13,15 +13,24 @@ Classifier : morpho Max nb elements : {1 10 10} Raw input left window : -1 Raw input right window : -1 - Embeddings size : 256 - MLP : {8192 0.3} - Context LSTM size : 1024 + Embeddings size : 128 + MLP : {2048 0.3} + Context LSTM size : 512 Focused LSTM size : 256 Rawinput LSTM size : 32 Split trans LSTM size : 256 - Num layers : 2 + Num layers : 3 BiLSTM : true - LSTM dropout : 0.3 + LSTM dropout : 0.1 + Total input dropout : 0.3 + Embeddings dropout : 0.3 + Dropout 2d : false + Tree embedding columns : {} + Tree embedding buffer : {} + Tree embedding stack : {} + Tree embedding nb : {} + Tree embedding size : 0 + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Predictions : FEATS Strategy : sequential diff --git a/UD_any/parser/machine.rm b/UD_any/parser/machine.rm index a7105a59100b7bd4a81e3f62880aae38a11f6ecb..c5fb678697662558d121dc5ced060663e9fd6d37 100644 --- a/UD_any/parser/machine.rm +++ b/UD_any/parser/machine.rm @@ -1,27 +1,36 @@ Name : Parser Machine Classifier : parser { - Transitions : {data/parser.ts} + Transitions : {parser,data/parser.ts} Network type : LSTM - Unknown value threshold : -1 + Unknown value threshold : 1 Buffer context : {-3 -2 -1 0 1 2} - Stack context : {} + Stack context : {2 1 0} Columns : {FORM UPOS} Focused buffer : {-1 0} - Focused stack : {-1 0} - Focused columns : {ID FORM FEATS} - Max nb elements : {1 10 10} + Focused stack : {2 1 0} + Focused columns : {ID EOS FORM FEATS DEPREL} + Max nb elements : {1 1 10 10 1} Raw input left window : -1 Raw input right window : -1 - Embeddings size : 256 - MLP : {8192 0.3} - Context LSTM size : 1024 + Embeddings size : 128 + MLP : {2048 0.3} + Context LSTM size : 512 Focused LSTM size : 256 Rawinput LSTM size : 32 Split trans LSTM size : 256 - Num layers : 2 + Num layers : 3 BiLSTM : true - LSTM dropout : 0.3 + LSTM dropout : 0.1 + Total input dropout : 0.3 + Embeddings dropout : 0.3 + Dropout 2d : false + Tree embedding columns : {DEPREL} + Tree embedding buffer : {-1} + Tree embedding stack : {0 1 2} + Tree embedding nb : {6} + Tree embedding size : 128 + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Predictions : HEAD DEPREL EOS Strategy : sequential diff --git a/UD_any/tagger/machine.rm b/UD_any/tagger/machine.rm index c54c5d81132286a6d4d87d3545bbaa7e6caa498d..cbfd0b71b6e31d52528a852051ecec21fe15789c 100644 --- a/UD_any/tagger/machine.rm +++ b/UD_any/tagger/machine.rm @@ -1,9 +1,9 @@ Name : Tagger Machine Classifier : tagger { - Transitions : {data/tagger.ts} + Transitions : {tagger,data/tagger.ts} Network type : LSTM - Unknown value threshold : -1 + Unknown value threshold : 1 Buffer context : {-3 -2 -1 0 1 2} Stack context : {} Columns : {FORM UPOS} @@ -13,15 +13,24 @@ Classifier : tagger Max nb elements : {1 10} Raw input left window : -1 Raw input right window : -1 - Embeddings size : 256 - MLP : {8192 0.3} - Context LSTM size : 1024 + Embeddings size : 128 + MLP : {2048 0.3} + Context LSTM size : 512 Focused LSTM size : 256 Rawinput LSTM size : 32 Split trans LSTM size : 256 - Num layers : 2 + Num layers : 3 BiLSTM : true - LSTM dropout : 0.3 + LSTM dropout : 0.1 + Total input dropout : 0.3 + Embeddings dropout : 0.3 + Dropout 2d : false + Tree embedding columns : {} + Tree embedding buffer : {} + Tree embedding stack : {} + Tree embedding nb : {} + Tree embedding size : 0 + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Predictions : UPOS Strategy : sequential diff --git a/UD_any/tagparser_incr/machine.rm b/UD_any/tagparser_incr/machine.rm deleted file mode 100644 index 334d08ca6bef1af0473708d0929730fde4d46d9f..0000000000000000000000000000000000000000 --- a/UD_any/tagparser_incr/machine.rm +++ /dev/null @@ -1,8 +0,0 @@ -Name : Tagger+Parser Machine -Classifier : any CNN(4,4,2) data/taggerparser.ts -Predictions : UPOS HEAD DEPREL EOS -Strategy : incremental - tagger parser 0 - parser tagger SHIFT 1 - parser tagger RIGHT 1 - parser parser 0 diff --git a/UD_any/tokemorpho_incr/machine.rm b/UD_any/tokemorpho_incr/machine.rm deleted file mode 100644 index 04abba11d6ae795742d0946b9349ea94aa330c85..0000000000000000000000000000000000000000 --- a/UD_any/tokemorpho_incr/machine.rm +++ /dev/null @@ -1,43 +0,0 @@ -Name : Tokenizer, Tagger and Morpho Machine -Classifier : tokemorpho -{ - Transitions : {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts} - Network type : LSTM - Unknown value threshold : 1 - Buffer context : {-3 -2 -1} - Stack context : {} - Columns : {FORM UPOS} - Focused buffer : {-1 0} - Focused stack : {} - Focused columns : {ID FORM FEATS} - Max nb elements : {1 10 10} - Raw input left window : 5 - Raw input right window : 5 - Embeddings size : 64 - MLP : {2048 0.3} - Context LSTM size : 1024 - Focused LSTM size : 256 - Rawinput LSTM size : 16 - Split trans LSTM size : 128 - Num layers : 3 - BiLSTM : true - LSTM dropout : 0.1 - Total input dropout : 0.3 - Embeddings dropout : 0.3 - Dropout 2d : false - Tree embedding columns : {} - Tree embedding buffer : {} - Tree embedding stack : {} - Tree embedding nb : {} - Tree embedding size : 0 - Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} -} -Splitwords : data/splitwords.ts -Predictions : ID FORM UPOS FEATS EOS -Strategy : incremental - tokenizer tagger ENDWORD 0 - tokenizer tagger SPLIT 0 - tokenizer tokenizer 0 - tagger morpho 0 - morpho tokenizer NOTHING 1 - morpho morpho 0 diff --git a/UD_any/tokemorpho_seq/machine.rm b/UD_any/tokemorpho_seq/machine.rm deleted file mode 100644 index 43fcf8bc88b073d09fc4799433e85f4b27408a55..0000000000000000000000000000000000000000 --- a/UD_any/tokemorpho_seq/machine.rm +++ /dev/null @@ -1,34 +0,0 @@ -Name : Tokenizer, Tagger and Morpho Machine -Classifier : tokemorpho -{ - Transitions : {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts} - Network type : LSTM - Unknown value threshold : -1 - Buffer context : {-3 -2 -1 1 2} - Stack context : {} - Columns : {FORM UPOS} - Focused buffer : {-1 0} - Focused stack : {} - Focused columns : {ID FORM FEATS} - Max nb elements : {1 10 10} - Raw input left window : 5 - Raw input right window : 5 - Embeddings size : 256 - MLP : {8192 0.3} - Context LSTM size : 1024 - Focused LSTM size : 256 - Rawinput LSTM size : 32 - Split trans LSTM size : 256 - Num layers : 2 - BiLSTM : true - LSTM dropout : 0.3 -} -Splitwords : data/splitwords.ts -Predictions : ID FORM UPOS FEATS EOS -Strategy : sequential - tokenizer tagger ENDWORD 1 - tokenizer tagger SPLIT 1 - tokenizer tagger 0 - tagger morpho 1 - morpho tokenizer NOTHING 1 - morpho tokenizer 0 diff --git a/UD_any/tokenizer/machine.rm b/UD_any/tokenizer/machine.rm index 9b1252e29109806c3a6919c89d2315aef49bbcc8..bc622d2c5fd99e706f7d30d776f98a65d5ed3d90 100644 --- a/UD_any/tokenizer/machine.rm +++ b/UD_any/tokenizer/machine.rm @@ -1,9 +1,9 @@ Name : Tokenizer Machine Classifier : tokenizer { - Transitions : {data/tokenizer.ts} + Transitions : {tokenizer,data/tokenizer.ts} Network type : LSTM - Unknown value threshold : -1 + Unknown value threshold : 1 Buffer context : {-3 -2 -1} Stack context : {} Columns : {FORM} @@ -13,15 +13,24 @@ Classifier : tokenizer Max nb elements : {1 10} Raw input left window : 5 Raw input right window : 5 - Embeddings size : 256 - MLP : {8192 0.3} - Context LSTM size : 1024 + Embeddings size : 128 + MLP : {2048 0.3} + Context LSTM size : 512 Focused LSTM size : 256 Rawinput LSTM size : 32 Split trans LSTM size : 256 - Num layers : 2 + Num layers : 3 BiLSTM : true - LSTM dropout : 0.3 + LSTM dropout : 0.1 + Total input dropout : 0.3 + Embeddings dropout : 0.3 + Dropout 2d : false + Tree embedding columns : {} + Tree embedding buffer : {} + Tree embedding stack : {} + Tree embedding nb : {} + Tree embedding size : 0 + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Splitwords : data/splitwords.ts Predictions : ID FORM EOS diff --git a/UD_any/toketagger_incr/machine.rm b/UD_any/toketagger_incr/machine.rm deleted file mode 100644 index eedf496f11763fda6400d3d92c2eff7dcf6b7301..0000000000000000000000000000000000000000 --- a/UD_any/toketagger_incr/machine.rm +++ /dev/null @@ -1,9 +0,0 @@ -Name : Tokenizer and Tagger Machine -Classifier : toketagger LSTM(-1,{-3,-2,-1},{},{FORM,UPOS},{-1,0},{},{ID,FORM},{1,10},5,5) {data/tokenizer.ts data/tagger.ts} -Splitwords : data/splitwords.ts -Predictions : ID FORM UPOS EOS -Strategy : incremental - tokenizer tagger ENDWORD 0 - tokenizer tagger SPLIT 0 - tokenizer tokenizer 0 - tagger tokenizer 1 diff --git a/UD_any/toketagger_seq/machine.rm b/UD_any/toketagger_seq/machine.rm deleted file mode 100644 index 92fd15365f9bbb5de6451378ca6b583f20c2b4b8..0000000000000000000000000000000000000000 --- a/UD_any/toketagger_seq/machine.rm +++ /dev/null @@ -1,9 +0,0 @@ -Name : Tokenizer and Tagger Machine -Classifier : toketagger LSTM(-1,{-3,-2,-1,0,1,2},{},{FORM,UPOS},{-1,0},{},{ID,FORM},{1,10},5,5) {data/tokenizer.ts data/tagger.ts} -Splitwords : data/splitwords.ts -Predictions : ID FORM UPOS EOS -Strategy : sequential - tokenizer tagger ENDWORD 1 - tokenizer tagger SPLIT 1 - tokenizer tagger 0 - tagger tokenizer 1