Skip to content
Snippets Groups Projects
Commit 2b062198 authored by Franck Dary's avatar Franck Dary
Browse files

If w2v is installed on the machine, the use of pretrained embeddings is now the default behaviour

parent b0feb4c0
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import sys import sys
import os import os
import subprocess import subprocess
from shutil import which
def printUsageAndExit() : def printUsageAndExit() :
print("USAGE : %s file.conllu embeddingsSize"%sys.argv[0], file=sys.stderr) print("USAGE : %s file.conllu embeddingsSize"%sys.argv[0], file=sys.stderr)
...@@ -17,6 +18,9 @@ if __name__ == "__main__" : ...@@ -17,6 +18,9 @@ if __name__ == "__main__" :
splited = os.path.splitext(pathToFile) splited = os.path.splitext(pathToFile)
target = splited[0] + ".w2v" target = splited[0] + ".w2v"
if which("w2v") is None :
exit(0)
p = subprocess.Popen("word2vec -cbow 0 -size %s -window 10 -negative 5 -hs 0 -sample 1e-1 -threads 2 -binary 0 -iter 15 -min-count 2 -train %s -output %s"%(embeddingsSize, pathToFile, target), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) p = subprocess.Popen("word2vec -cbow 0 -size %s -window 10 -negative 5 -hs 0 -sample 1e-1 -threads 2 -binary 0 -iter 15 -min-count 2 -train %s -output %s"%(embeddingsSize, pathToFile, target), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
p.wait() p.wait()
...@@ -44,6 +44,7 @@ DEVRAW=$(find $CORPUS -type f -name '*dev*.txt') ...@@ -44,6 +44,7 @@ DEVRAW=$(find $CORPUS -type f -name '*dev*.txt')
TEST=$(find $CORPUS -type f -name '*test*.conllu') TEST=$(find $CORPUS -type f -name '*test*.conllu')
TESTRAW=$(find $CORPUS -type f -name '*test*.txt') TESTRAW=$(find $CORPUS -type f -name '*test*.txt')
MCD=$(find $CORPUS -type f -name '*.mcd') MCD=$(find $CORPUS -type f -name '*.mcd')
W2V=$(find $CORPUS -type f -name '*.w2v')
if has_space "$TRAIN" || has_space "$DEV" || has_space "$TEST" || has_space "$MCD"; if has_space "$TRAIN" || has_space "$DEV" || has_space "$TEST" || has_space "$MCD";
then then
...@@ -78,13 +79,19 @@ fi ...@@ -78,13 +79,19 @@ fi
>&2 echo "Using MCD :" $MCD >&2 echo "Using MCD :" $MCD
if test -f $W2V;
then
>&2 echo "Using W2V :" $W2V
W2V="--pretrainedEmbeddings "$W2V
fi
if [ "$MODE" = "tsv" ]; then if [ "$MODE" = "tsv" ]; then
macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --devTSV $DEV "$@" || exit 1 macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --devTSV $DEV $W2V "$@" || exit 1
exit 0 exit 0
fi fi
if [ "$MODE" = "txt" ]; then if [ "$MODE" = "txt" ]; then
macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --trainTXT $TRAINRAW --devTSV $DEV --devTXT $DEVRAW "$@" || exit 1 macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --trainTXT $TRAINRAW --devTSV $DEV --devTXT $DEVRAW $W2V "$@" || exit 1
exit 0 exit 0
fi fi
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment