From dc820bb342927cf5aaa0707dbd9127aa07df6127 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Fri, 10 Apr 2020 21:57:35 +0200
Subject: [PATCH] added tokeparser_incr

---
 UD_any/data/getTransitionSets.py  |  2 +-
 UD_any/tokeparser_incr/machine.rm | 42 +++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 UD_any/tokeparser_incr/machine.rm

diff --git a/UD_any/data/getTransitionSets.py b/UD_any/data/getTransitionSets.py
index 6d06e7b..473ec73 100755
--- a/UD_any/data/getTransitionSets.py
+++ b/UD_any/data/getTransitionSets.py
@@ -97,7 +97,7 @@ if __name__ == "__main__" :
         striped = line.strip()
         if len(striped) == 0 or striped == "root" or striped == "_" :
           continue
-        label = striped.split(':')[0]
+        label = striped
         if label not in labels :
           labels.add(striped)
           labelsList.append(striped)
diff --git a/UD_any/tokeparser_incr/machine.rm b/UD_any/tokeparser_incr/machine.rm
new file mode 100644
index 0000000..e4edaf3
--- /dev/null
+++ b/UD_any/tokeparser_incr/machine.rm
@@ -0,0 +1,42 @@
+Name : Tokenizer, Tagger and Morpho Machine
+Classifier : tokemorpho
+{
+  Transitions :             {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts data/parser.ts}
+  Network type :            LSTM
+  Unknown value threshold : -1
+  Buffer context :          {-5 -4 -3 -2 -1}
+  Stack context :           {}
+  Columns :                 {FORM UPOS}
+  Focused buffer :          {-1 0}
+  Focused stack :           {}
+  Focused columns :         {ID FORM FEATS}
+  Max nb elements :         {1 10 10}
+  Raw input left window :   5
+  Raw input right window :  5
+  Embeddings size :         256
+  MLP :                     {2048 0.3 2048 0.3}
+  Context LSTM size :       512
+  Focused LSTM size :       256
+  Rawinput LSTM size :      64
+  Split trans LSTM size :   256
+  Num layers :              3
+  BiLSTM :                  true
+  LSTM dropout :            0.3
+  Tree embedding columns :  {DEPREL}
+  Tree embedding buffer :   {-1}
+  Tree embedding stack :    {0}
+  Tree embedding nb :       {5 10}
+  Tree embedding size :     128
+}
+Splitwords : data/splitwords.ts
+Predictions : ID FORM UPOS FEATS HEAD DEPREL EOS
+Strategy : incremental
+  tokenizer tagger ENDWORD 0
+  tokenizer tagger SPLIT 0
+  tokenizer tokenizer 0
+  tagger morpho 0
+  morpho parser NOTHING 0
+  morpho morpho 0
+  parser tokenizer SHIFT 1
+  parser tokenizer RIGHT 1
+  parser parser 0
-- 
GitLab