From 3fdd0bf7c762f0c1040dddbab82ba74242f3e2e6 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Wed, 22 Apr 2020 17:57:54 +0200
Subject: [PATCH] Updated machines

---
 UD_any/morpho/machine.rm          | 23 ++++++++++++-----
 UD_any/parser/machine.rm          | 31 ++++++++++++++--------
 UD_any/tagger/machine.rm          | 23 ++++++++++++-----
 UD_any/tagparser_incr/machine.rm  |  8 ------
 UD_any/tokemorpho_incr/machine.rm | 43 -------------------------------
 UD_any/tokemorpho_seq/machine.rm  | 34 ------------------------
 UD_any/tokenizer/machine.rm       | 23 ++++++++++++-----
 UD_any/toketagger_incr/machine.rm |  9 -------
 UD_any/toketagger_seq/machine.rm  |  9 -------
 9 files changed, 68 insertions(+), 135 deletions(-)
 delete mode 100644 UD_any/tagparser_incr/machine.rm
 delete mode 100644 UD_any/tokemorpho_incr/machine.rm
 delete mode 100644 UD_any/tokemorpho_seq/machine.rm
 delete mode 100644 UD_any/toketagger_incr/machine.rm
 delete mode 100644 UD_any/toketagger_seq/machine.rm

diff --git a/UD_any/morpho/machine.rm b/UD_any/morpho/machine.rm
index 5ff55ba..eeaeaa5 100644
--- a/UD_any/morpho/machine.rm
+++ b/UD_any/morpho/machine.rm
@@ -1,9 +1,9 @@
 Name : Feats Machine
 Classifier : morpho
 {
-  Transitions :             {data/morpho_parts.ts}
+  Transitions :             {morpho,data/morpho_parts.ts}
   Network type :            LSTM
-  Unknown value threshold : -1
+  Unknown value threshold : 1
   Buffer context :          {-3 -2 -1 0 1 2}
   Stack context :           {}
   Columns :                 {FORM UPOS}
@@ -13,15 +13,24 @@ Classifier : morpho
   Max nb elements :         {1 10 10}
   Raw input left window :   -1
   Raw input right window :  -1
-  Embeddings size :         256
-  MLP :                     {8192 0.3}
-  Context LSTM size :       1024
+  Embeddings size :         128
+  MLP :                     {2048 0.3}
+  Context LSTM size :       512
   Focused LSTM size :       256
   Rawinput LSTM size :      32
   Split trans LSTM size :   256
-  Num layers :              2
+  Num layers :              3
   BiLSTM :                  true
-  LSTM dropout :            0.3
+  LSTM dropout :            0.1
+  Total input dropout :     0.3
+  Embeddings dropout :      0.3
+  Dropout 2d :              false
+  Tree embedding columns :  {}
+  Tree embedding buffer :   {}
+  Tree embedding stack :    {}
+  Tree embedding nb :       {}
+  Tree embedding size :     0
+  Optimizer :               Adam {0.0005 0.9 0.999 0.00000001 0.00001 true}
 }
 Predictions : FEATS
 Strategy : sequential
diff --git a/UD_any/parser/machine.rm b/UD_any/parser/machine.rm
index a7105a5..c5fb678 100644
--- a/UD_any/parser/machine.rm
+++ b/UD_any/parser/machine.rm
@@ -1,27 +1,36 @@
 Name : Parser Machine
 Classifier : parser
 {
-  Transitions :             {data/parser.ts}
+  Transitions :             {parser,data/parser.ts}
   Network type :            LSTM
-  Unknown value threshold : -1
+  Unknown value threshold : 1
   Buffer context :          {-3 -2 -1 0 1 2}
-  Stack context :           {}
+  Stack context :           {2 1 0}
   Columns :                 {FORM UPOS}
   Focused buffer :          {-1 0}
-  Focused stack :           {-1 0}
-  Focused columns :         {ID FORM FEATS}
-  Max nb elements :         {1 10 10}
+  Focused stack :           {2 1 0}
+  Focused columns :         {ID EOS FORM FEATS DEPREL}
+  Max nb elements :         {1 1 10 10 1}
   Raw input left window :   -1
   Raw input right window :  -1
-  Embeddings size :         256
-  MLP :                     {8192 0.3}
-  Context LSTM size :       1024
+  Embeddings size :         128
+  MLP :                     {2048 0.3}
+  Context LSTM size :       512
   Focused LSTM size :       256
   Rawinput LSTM size :      32
   Split trans LSTM size :   256
-  Num layers :              2
+  Num layers :              3
   BiLSTM :                  true
-  LSTM dropout :            0.3
+  LSTM dropout :            0.1
+  Total input dropout :     0.3
+  Embeddings dropout :      0.3
+  Dropout 2d :              false
+  Tree embedding columns :  {DEPREL}
+  Tree embedding buffer :   {-1}
+  Tree embedding stack :    {0 1 2}
+  Tree embedding nb :       {6}
+  Tree embedding size :     128
+  Optimizer :               Adam {0.0005 0.9 0.999 0.00000001 0.00001 true}
 }
 Predictions : HEAD DEPREL EOS
 Strategy : sequential
diff --git a/UD_any/tagger/machine.rm b/UD_any/tagger/machine.rm
index c54c5d8..cbfd0b7 100644
--- a/UD_any/tagger/machine.rm
+++ b/UD_any/tagger/machine.rm
@@ -1,9 +1,9 @@
 Name : Tagger Machine
 Classifier : tagger
 {
-  Transitions :             {data/tagger.ts}
+  Transitions :             {tagger,data/tagger.ts}
   Network type :            LSTM
-  Unknown value threshold : -1
+  Unknown value threshold : 1
   Buffer context :          {-3 -2 -1 0 1 2}
   Stack context :           {}
   Columns :                 {FORM UPOS}
@@ -13,15 +13,24 @@ Classifier : tagger
   Max nb elements :         {1 10}
   Raw input left window :   -1
   Raw input right window :  -1
-  Embeddings size :         256
-  MLP :                     {8192 0.3}
-  Context LSTM size :       1024
+  Embeddings size :         128
+  MLP :                     {2048 0.3}
+  Context LSTM size :       512
   Focused LSTM size :       256
   Rawinput LSTM size :      32
   Split trans LSTM size :   256
-  Num layers :              2
+  Num layers :              3
   BiLSTM :                  true
-  LSTM dropout :            0.3
+  LSTM dropout :            0.1
+  Total input dropout :     0.3
+  Embeddings dropout :      0.3
+  Dropout 2d :              false
+  Tree embedding columns :  {}
+  Tree embedding buffer :   {}
+  Tree embedding stack :    {}
+  Tree embedding nb :       {}
+  Tree embedding size :     0
+  Optimizer :               Adam {0.0005 0.9 0.999 0.00000001 0.00001 true}
 }
 Predictions : UPOS
 Strategy : sequential
diff --git a/UD_any/tagparser_incr/machine.rm b/UD_any/tagparser_incr/machine.rm
deleted file mode 100644
index 334d08c..0000000
--- a/UD_any/tagparser_incr/machine.rm
+++ /dev/null
@@ -1,8 +0,0 @@
-Name : Tagger+Parser Machine
-Classifier : any CNN(4,4,2) data/taggerparser.ts
-Predictions : UPOS HEAD DEPREL EOS
-Strategy : incremental
-  tagger parser 0
-  parser tagger SHIFT 1
-  parser tagger RIGHT 1
-  parser parser 0
diff --git a/UD_any/tokemorpho_incr/machine.rm b/UD_any/tokemorpho_incr/machine.rm
deleted file mode 100644
index 04abba1..0000000
--- a/UD_any/tokemorpho_incr/machine.rm
+++ /dev/null
@@ -1,43 +0,0 @@
-Name : Tokenizer, Tagger and Morpho Machine
-Classifier : tokemorpho
-{
-  Transitions :             {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts}
-  Network type :            LSTM
-  Unknown value threshold : 1
-  Buffer context :          {-3 -2 -1}
-  Stack context :           {}
-  Columns :                 {FORM UPOS}
-  Focused buffer :          {-1 0}
-  Focused stack :           {}
-  Focused columns :         {ID FORM FEATS}
-  Max nb elements :         {1 10 10}
-  Raw input left window :   5
-  Raw input right window :  5
-  Embeddings size :         64
-  MLP :                     {2048 0.3}
-  Context LSTM size :       1024
-  Focused LSTM size :       256
-  Rawinput LSTM size :      16
-  Split trans LSTM size :   128
-  Num layers :              3
-  BiLSTM :                  true
-  LSTM dropout :            0.1
-  Total input dropout :     0.3
-  Embeddings dropout :      0.3
-  Dropout 2d :              false
-  Tree embedding columns :  {}
-  Tree embedding buffer :   {}
-  Tree embedding stack :    {}
-  Tree embedding nb :       {}
-  Tree embedding size :     0
-  Optimizer :               Adam {0.0005 0.9 0.999 0.00000001 0.00001 true}
-}
-Splitwords : data/splitwords.ts
-Predictions : ID FORM UPOS FEATS EOS
-Strategy : incremental
-  tokenizer tagger ENDWORD 0
-  tokenizer tagger SPLIT 0
-  tokenizer tokenizer 0
-  tagger morpho 0
-  morpho tokenizer NOTHING 1
-  morpho morpho 0
diff --git a/UD_any/tokemorpho_seq/machine.rm b/UD_any/tokemorpho_seq/machine.rm
deleted file mode 100644
index 43fcf8b..0000000
--- a/UD_any/tokemorpho_seq/machine.rm
+++ /dev/null
@@ -1,34 +0,0 @@
-Name : Tokenizer, Tagger and Morpho Machine
-Classifier : tokemorpho
-{
-  Transitions :             {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts}
-  Network type :            LSTM
-  Unknown value threshold : -1
-  Buffer context :          {-3 -2 -1 1 2}
-  Stack context :           {}
-  Columns :                 {FORM UPOS}
-  Focused buffer :          {-1 0}
-  Focused stack :           {}
-  Focused columns :         {ID FORM FEATS}
-  Max nb elements :         {1 10 10}
-  Raw input left window :   5
-  Raw input right window :  5
-  Embeddings size :         256
-  MLP :                     {8192 0.3}
-  Context LSTM size :       1024
-  Focused LSTM size :       256
-  Rawinput LSTM size :      32
-  Split trans LSTM size :   256
-  Num layers :              2
-  BiLSTM :                  true
-  LSTM dropout :            0.3
-}
-Splitwords : data/splitwords.ts
-Predictions : ID FORM UPOS FEATS EOS
-Strategy : sequential
-  tokenizer tagger ENDWORD 1
-  tokenizer tagger SPLIT 1
-  tokenizer tagger 0
-  tagger morpho 1
-  morpho tokenizer NOTHING 1
-  morpho tokenizer 0
diff --git a/UD_any/tokenizer/machine.rm b/UD_any/tokenizer/machine.rm
index 9b1252e..bc622d2 100644
--- a/UD_any/tokenizer/machine.rm
+++ b/UD_any/tokenizer/machine.rm
@@ -1,9 +1,9 @@
 Name : Tokenizer Machine
 Classifier : tokenizer
 {
-  Transitions :             {data/tokenizer.ts}
+  Transitions :             {tokenizer,data/tokenizer.ts}
   Network type :            LSTM
-  Unknown value threshold : -1
+  Unknown value threshold : 1
   Buffer context :          {-3 -2 -1}
   Stack context :           {}
   Columns :                 {FORM}
@@ -13,15 +13,24 @@ Classifier : tokenizer
   Max nb elements :         {1 10}
   Raw input left window :   5
   Raw input right window :  5
-  Embeddings size :         256
-  MLP :                     {8192 0.3}
-  Context LSTM size :       1024
+  Embeddings size :         128
+  MLP :                     {2048 0.3}
+  Context LSTM size :       512
   Focused LSTM size :       256
   Rawinput LSTM size :      32
   Split trans LSTM size :   256
-  Num layers :              2
+  Num layers :              3
   BiLSTM :                  true
-  LSTM dropout :            0.3
+  LSTM dropout :            0.1
+  Total input dropout :     0.3
+  Embeddings dropout :      0.3
+  Dropout 2d :              false
+  Tree embedding columns :  {}
+  Tree embedding buffer :   {}
+  Tree embedding stack :    {}
+  Tree embedding nb :       {}
+  Tree embedding size :     0
+  Optimizer :               Adam {0.0005 0.9 0.999 0.00000001 0.00001 true}
 }
 Splitwords : data/splitwords.ts
 Predictions : ID FORM EOS
diff --git a/UD_any/toketagger_incr/machine.rm b/UD_any/toketagger_incr/machine.rm
deleted file mode 100644
index eedf496..0000000
--- a/UD_any/toketagger_incr/machine.rm
+++ /dev/null
@@ -1,9 +0,0 @@
-Name : Tokenizer and Tagger Machine
-Classifier : toketagger LSTM(-1,{-3,-2,-1},{},{FORM,UPOS},{-1,0},{},{ID,FORM},{1,10},5,5) {data/tokenizer.ts data/tagger.ts}
-Splitwords : data/splitwords.ts
-Predictions : ID FORM UPOS EOS
-Strategy : incremental
-  tokenizer tagger ENDWORD 0
-  tokenizer tagger SPLIT 0
-  tokenizer tokenizer 0
-  tagger tokenizer 1
diff --git a/UD_any/toketagger_seq/machine.rm b/UD_any/toketagger_seq/machine.rm
deleted file mode 100644
index 92fd153..0000000
--- a/UD_any/toketagger_seq/machine.rm
+++ /dev/null
@@ -1,9 +0,0 @@
-Name : Tokenizer and Tagger Machine
-Classifier : toketagger LSTM(-1,{-3,-2,-1,0,1,2},{},{FORM,UPOS},{-1,0},{},{ID,FORM},{1,10},5,5) {data/tokenizer.ts data/tagger.ts}
-Splitwords : data/splitwords.ts
-Predictions : ID FORM UPOS EOS
-Strategy : sequential
-  tokenizer tagger ENDWORD 1
-  tokenizer tagger SPLIT 1
-  tokenizer tagger 0
-  tagger tokenizer 1
-- 
GitLab