From 650be26d9637f37f25aec37ff4bd73472347e49d Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Wed, 8 Apr 2020 22:56:31 +0200 Subject: [PATCH] updated machines --- UD_any/morpho/machine.rm | 25 ++++++++++++++++++++++++- UD_any/parser/machine.rm | 25 ++++++++++++++++++++++++- UD_any/tagger/machine.rm | 25 ++++++++++++++++++++++++- UD_any/tokemorpho_incr/machine.rm | 25 ++++++++++++++++++++++++- UD_any/tokemorpho_seq/machine.rm | 25 ++++++++++++++++++++++++- UD_any/tokenizer/machine.rm | 25 ++++++++++++++++++++++++- 6 files changed, 144 insertions(+), 6 deletions(-) diff --git a/UD_any/morpho/machine.rm b/UD_any/morpho/machine.rm index 61f4a84..19000fa 100644 --- a/UD_any/morpho/machine.rm +++ b/UD_any/morpho/machine.rm @@ -1,5 +1,28 @@ Name : Feats Machine -Classifier : tokenizer LSTM(-1,{-3,-2,-1,0,1,2},{},{FORM,UPOS},{-1,0},{},{ID,FORM,FEATS},{1,10,10},-1,-1) {data/morpho_parts.ts} +Classifier : morpho +{ + Transitions : {data/morpho_parts.ts} + Network type : LSTM + Unknown value threshold : -1 + Buffer context : {-3 -2 -1 0 1 2} + Stack context : {} + Columns : {FORM UPOS} + Focused buffer : {-1 0} + Focused stack : {} + Focused columns : {ID FORM FEATS} + Max nb elements : {1 10 10} + Raw input left window : -1 + Raw input right window : -1 + Embeddings size : 256 + Hidden size : 8192 + Context LSTM size : 1024 + Focused LSTM size : 256 + Rawinput LSTM size : 32 + Split trans LSTM size : 256 + Num layers : 2 + BiLSTM : true + LSTM dropout : 0.3 +} Predictions : FEATS Strategy : sequential morpho morpho NOTHING 1 diff --git a/UD_any/parser/machine.rm b/UD_any/parser/machine.rm index 5d8e94c..a28b2ee 100644 --- a/UD_any/parser/machine.rm +++ b/UD_any/parser/machine.rm @@ -1,5 +1,28 @@ Name : Parser Machine -Classifier : tagger RLT(15,15,4) data/parser.ts +Classifier : parser +{ + Transitions : {data/parser.ts} + Network type : LSTM + Unknown value threshold : -1 + Buffer context : {-3 -2 -1 0 1 2} + Stack context : {} + Columns : {FORM UPOS} + Focused buffer : {-1 0} + Focused stack : {-1 0} + Focused columns : {ID FORM FEATS} + Max nb elements : {1 10 10} + Raw input left window : -1 + Raw input right window : -1 + Embeddings size : 256 + Hidden size : 8192 + Context LSTM size : 1024 + Focused LSTM size : 256 + Rawinput LSTM size : 32 + Split trans LSTM size : 256 + Num layers : 2 + BiLSTM : true + LSTM dropout : 0.3 +} Predictions : HEAD DEPREL EOS Strategy : sequential parser parser SHIFT 1 diff --git a/UD_any/tagger/machine.rm b/UD_any/tagger/machine.rm index ac8ee82..389942e 100644 --- a/UD_any/tagger/machine.rm +++ b/UD_any/tagger/machine.rm @@ -1,5 +1,28 @@ Name : Tagger Machine -Classifier : tagger LSTM(-1,{-3,-2,-1,0,1,2},{},{FORM,UPOS},{-1,0},{},{ID,FORM},{1,10},-1,-1) {data/tagger.ts} +Classifier : tagger +{ + Transitions : {data/tagger.ts} + Network type : LSTM + Unknown value threshold : -1 + Buffer context : {-3 -2 -1 0 1 2} + Stack context : {} + Columns : {FORM UPOS} + Focused buffer : {-1 0} + Focused stack : {} + Focused columns : {ID FORM} + Max nb elements : {1 10} + Raw input left window : -1 + Raw input right window : -1 + Embeddings size : 256 + Hidden size : 8192 + Context LSTM size : 1024 + Focused LSTM size : 256 + Rawinput LSTM size : 32 + Split trans LSTM size : 256 + Num layers : 2 + BiLSTM : true + LSTM dropout : 0.3 +} Predictions : UPOS Strategy : sequential tagger tagger 1 diff --git a/UD_any/tokemorpho_incr/machine.rm b/UD_any/tokemorpho_incr/machine.rm index 45e91a4..0c9e187 100644 --- a/UD_any/tokemorpho_incr/machine.rm +++ b/UD_any/tokemorpho_incr/machine.rm @@ -1,5 +1,28 @@ Name : Tokenizer, Tagger and Morpho Machine -Classifier : toketagger LSTM(-1,{-3,-2,-1},{},{FORM,UPOS},{-1,0},{},{ID,FORM,FEATS},{1,10,10},5,5) {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts} +Classifier : tokemorpho +{ + Transitions : {data/tokenizer.ts data/tagger.ts data/morpho_parts} + Network type : LSTM + Unknown value threshold : -1 + Buffer context : {-3 -2 -1} + Stack context : {} + Columns : {FORM UPOS} + Focused buffer : {-1 0} + Focused stack : {} + Focused columns : {ID FORM FEATS} + Max nb elements : {1 10 10} + Raw input left window : 5 + Raw input right window : 5 + Embeddings size : 256 + Hidden size : 8192 + Context LSTM size : 1024 + Focused LSTM size : 256 + Rawinput LSTM size : 32 + Split trans LSTM size : 256 + Num layers : 2 + BiLSTM : true + LSTM dropout : 0.3 +} Splitwords : data/splitwords.ts Predictions : ID FORM UPOS FEATS EOS Strategy : incremental diff --git a/UD_any/tokemorpho_seq/machine.rm b/UD_any/tokemorpho_seq/machine.rm index 654d519..13715ac 100644 --- a/UD_any/tokemorpho_seq/machine.rm +++ b/UD_any/tokemorpho_seq/machine.rm @@ -1,5 +1,28 @@ Name : Tokenizer, Tagger and Morpho Machine -Classifier : toketagger LSTM(-1,{-3,-2,-1,1,2},{},{FORM,UPOS},{-1,0},{},{ID,FORM,FEATS},{1,10,10},5,5) {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts} +Classifier : tokemorpho +{ + Transitions : {data/tokenizer.ts data/tagger.ts data/morpho_parts} + Network type : LSTM + Unknown value threshold : -1 + Buffer context : {-3 -2 -1 1 2} + Stack context : {} + Columns : {FORM UPOS} + Focused buffer : {-1 0} + Focused stack : {} + Focused columns : {ID FORM FEATS} + Max nb elements : {1 10 10} + Raw input left window : 5 + Raw input right window : 5 + Embeddings size : 256 + Hidden size : 8192 + Context LSTM size : 1024 + Focused LSTM size : 256 + Rawinput LSTM size : 32 + Split trans LSTM size : 256 + Num layers : 2 + BiLSTM : true + LSTM dropout : 0.3 +} Splitwords : data/splitwords.ts Predictions : ID FORM UPOS FEATS EOS Strategy : sequential diff --git a/UD_any/tokenizer/machine.rm b/UD_any/tokenizer/machine.rm index 91c0331..afadef5 100644 --- a/UD_any/tokenizer/machine.rm +++ b/UD_any/tokenizer/machine.rm @@ -1,5 +1,28 @@ Name : Tokenizer Machine -Classifier : tokenizer LSTM(-1,{-3,-2,-1},{},{FORM},{-1,0},{},{ID,FORM},{1,10},5,5) {data/tokenizer.ts} +Classifier : tokenizer +{ + Transitions : {data/tokenizer.ts} + Network type : LSTM + Unknown value threshold : -1 + Buffer context : {-3 -2 -1} + Stack context : {} + Columns : {FORM} + Focused buffer : {-1 0} + Focused stack : {} + Focused columns : {ID FORM} + Max nb elements : {1 10} + Raw input left window : 5 + Raw input right window : 5 + Embeddings size : 256 + Hidden size : 8192 + Context LSTM size : 1024 + Focused LSTM size : 256 + Rawinput LSTM size : 32 + Split trans LSTM size : 256 + Num layers : 2 + BiLSTM : true + LSTM dropout : 0.3 +} Splitwords : data/splitwords.ts Predictions : ID FORM EOS Strategy : sequential -- GitLab