Skip to content
Snippets Groups Projects
Commit 2b062198 authored by Franck Dary's avatar Franck Dary
Browse files

If w2v is installed on the machine, the use of pretrained embeddings is now the default behaviour

parent b0feb4c0
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@
import sys
import os
import subprocess
from shutil import which
def printUsageAndExit() :
print("USAGE : %s file.conllu embeddingsSize"%sys.argv[0], file=sys.stderr)
......@@ -17,6 +18,9 @@ if __name__ == "__main__" :
splited = os.path.splitext(pathToFile)
target = splited[0] + ".w2v"
if which("w2v") is None :
exit(0)
p = subprocess.Popen("word2vec -cbow 0 -size %s -window 10 -negative 5 -hs 0 -sample 1e-1 -threads 2 -binary 0 -iter 15 -min-count 2 -train %s -output %s"%(embeddingsSize, pathToFile, target), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
p.wait()
......@@ -44,6 +44,7 @@ DEVRAW=$(find $CORPUS -type f -name '*dev*.txt')
TEST=$(find $CORPUS -type f -name '*test*.conllu')
TESTRAW=$(find $CORPUS -type f -name '*test*.txt')
MCD=$(find $CORPUS -type f -name '*.mcd')
W2V=$(find $CORPUS -type f -name '*.w2v')
if has_space "$TRAIN" || has_space "$DEV" || has_space "$TEST" || has_space "$MCD";
then
......@@ -78,13 +79,19 @@ fi
>&2 echo "Using MCD :" $MCD
if test -f $W2V;
then
>&2 echo "Using W2V :" $W2V
W2V="--pretrainedEmbeddings "$W2V
fi
if [ "$MODE" = "tsv" ]; then
macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --devTSV $DEV "$@" || exit 1
macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --devTSV $DEV $W2V "$@" || exit 1
exit 0
fi
if [ "$MODE" = "txt" ]; then
macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --trainTXT $TRAINRAW --devTSV $DEV --devTXT $DEVRAW "$@" || exit 1
macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --trainTXT $TRAINRAW --devTSV $DEV --devTXT $DEVRAW $W2V "$@" || exit 1
exit 0
fi
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment