From 3e1ff307af70687a53feddcf494ff02d64dc376c Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Sat, 18 Apr 2020 13:47:04 +0200 Subject: [PATCH] added script to save machine descriptions --- UD_any/tokemorpho_incr/machine.rm | 23 ++++++++++++++++------- scripts/extractMachines.py | 19 +++++++++++++++++++ 2 files changed, 35 insertions(+), 7 deletions(-) create mode 100755 scripts/extractMachines.py diff --git a/UD_any/tokemorpho_incr/machine.rm b/UD_any/tokemorpho_incr/machine.rm index 01d4377..04abba1 100644 --- a/UD_any/tokemorpho_incr/machine.rm +++ b/UD_any/tokemorpho_incr/machine.rm @@ -3,7 +3,7 @@ Classifier : tokemorpho { Transitions : {data/tokenizer.ts data/tagger.ts data/morpho_parts.ts} Network type : LSTM - Unknown value threshold : -1 + Unknown value threshold : 1 Buffer context : {-3 -2 -1} Stack context : {} Columns : {FORM UPOS} @@ -13,15 +13,24 @@ Classifier : tokemorpho Max nb elements : {1 10 10} Raw input left window : 5 Raw input right window : 5 - Embeddings size : 256 - MLP : {8192 0.3} + Embeddings size : 64 + MLP : {2048 0.3} Context LSTM size : 1024 Focused LSTM size : 256 - Rawinput LSTM size : 32 - Split trans LSTM size : 256 - Num layers : 2 + Rawinput LSTM size : 16 + Split trans LSTM size : 128 + Num layers : 3 BiLSTM : true - LSTM dropout : 0.3 + LSTM dropout : 0.1 + Total input dropout : 0.3 + Embeddings dropout : 0.3 + Dropout 2d : false + Tree embedding columns : {} + Tree embedding buffer : {} + Tree embedding stack : {} + Tree embedding nb : {} + Tree embedding size : 0 + Optimizer : Adam {0.0005 0.9 0.999 0.00000001 0.00001 true} } Splitwords : data/splitwords.ts Predictions : ID FORM UPOS FEATS EOS diff --git a/scripts/extractMachines.py b/scripts/extractMachines.py new file mode 100755 index 0000000..59bff73 --- /dev/null +++ b/scripts/extractMachines.py @@ -0,0 +1,19 @@ +#! /usr/bin/python3 + +import sys +import os +import glob +import shutil + +def printUsageAndExit() : + print("USAGE : %s bin dest"%sys.argv[0], file=sys.stderr) + exit(1) + +if __name__ == "__main__" : + if len(sys.argv) != 3 : + printUsageAndExit() + + for filename in glob.iglob(sys.argv[1]+"/**", recursive=True) : + if os.path.splitext(filename)[1] == ".rm" : + shutil.copy(filename, sys.argv[2]+"/"+filename.split('/')[-2]+".rm") + -- GitLab