From 3e1ff307af70687a53feddcf494ff02d64dc376c Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Sat, 18 Apr 2020 13:47:04 +0200
Subject: [PATCH] added script to save machine descriptions

---
 UD_any/tokemorpho_incr/machine.rm | 23 ++++++++++++++++-------
 scripts/extractMachines.py        | 19 +++++++++++++++++++
 2 files changed, 35 insertions(+), 7 deletions(-)
 create mode 100755 scripts/extractMachines.py

diff --git a/UD_any/tokemorpho_incr/machine.rm b/UD_any/tokemorpho_incr/machine.rm
index 01d4377..04abba1 100644
--- a/UD_any/tokemorpho_incr/machine.rm
+++ b/UD_any/tokemorpho_incr/machine.rm
@@ -3,7 +3,7 @@ Classifier : tokemorpho
 {
   Transitions :             {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts}
   Network type :            LSTM
-  Unknown value threshold : -1
+  Unknown value threshold : 1
   Buffer context :          {-3 -2 -1}
   Stack context :           {}
   Columns :                 {FORM UPOS}
@@ -13,15 +13,24 @@ Classifier : tokemorpho
   Max nb elements :         {1 10 10}
   Raw input left window :   5
   Raw input right window :  5
-  Embeddings size :         256
-  MLP :                     {8192 0.3}
+  Embeddings size :         64
+  MLP :                     {2048 0.3}
   Context LSTM size :       1024
   Focused LSTM size :       256
-  Rawinput LSTM size :      32
-  Split trans LSTM size :   256
-  Num layers :              2
+  Rawinput LSTM size :      16
+  Split trans LSTM size :   128
+  Num layers :              3
   BiLSTM :                  true
-  LSTM dropout :            0.3
+  LSTM dropout :            0.1
+  Total input dropout :     0.3
+  Embeddings dropout :      0.3
+  Dropout 2d :              false
+  Tree embedding columns :  {}
+  Tree embedding buffer :   {}
+  Tree embedding stack :    {}
+  Tree embedding nb :       {}
+  Tree embedding size :     0
+  Optimizer :               Adam {0.0005 0.9 0.999 0.00000001 0.00001 true}
 }
 Splitwords : data/splitwords.ts
 Predictions : ID FORM UPOS FEATS EOS
diff --git a/scripts/extractMachines.py b/scripts/extractMachines.py
new file mode 100755
index 0000000..59bff73
--- /dev/null
+++ b/scripts/extractMachines.py
@@ -0,0 +1,19 @@
+#! /usr/bin/python3
+
+import sys
+import os
+import glob
+import shutil
+
+def printUsageAndExit() :
+  print("USAGE : %s bin dest"%sys.argv[0], file=sys.stderr)
+  exit(1)
+
+if __name__ == "__main__" :
+  if len(sys.argv) != 3 :
+    printUsageAndExit()
+
+  for filename in glob.iglob(sys.argv[1]+"/**", recursive=True) :
+    if os.path.splitext(filename)[1] == ".rm" :
+      shutil.copy(filename, sys.argv[2]+"/"+filename.split('/')[-2]+".rm")
+
-- 
GitLab