Skip to content
Snippets Groups Projects
Commit 18f1683c authored by Franck Dary's avatar Franck Dary
Browse files

Using w2v on output from udpipe horizontal output instead of on raw text

parent 26f25b22
No related branches found
No related tags found
No related merge requests found
......@@ -42,8 +42,8 @@ transitions: all_no_test.conllu
texts:
./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
pretrain: texts
./pretrainEmbeddings.py train.txt 64 pretrained.w2v
pretrain:
./pretrainEmbeddings.py $(TRAIN_FILES) 64 pretrained.w2v
$(FPLM_FILENAME): all_no_test.conllu
$(SCRIPTS)/conllu2fplm.py $< > $@
......
......@@ -21,6 +21,25 @@ if __name__ == "__main__" :
if which("word2vec") is None :
exit(0)
p = subprocess.Popen("word2vec -cbow 0 -size %s -window 10 -negative 5 -hs 0 -sample 1e-1 -threads 2 -binary 0 -iter 15 -min-count 2 -train %s -output %s"%(embeddingsSize, pathToFile, target), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
p = subprocess.Popen("~/Documents/ud_pipe/src/udpipe --output=horizontal none %s > in.text"%pathToFile, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
p.wait()
args = [
"word2vec",
"-cbow 0",
"-size %s"%embeddingsSize,
"-window 10",
"-negative 5",
"-hs 0",
"-sample 1e-1",
"-threads 4",
"-binary 0",
"-iter 15",
"-min-count 2",
"-train in.text",
"-output %s"%target,
]
p = subprocess.Popen(" ".join(args), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
p.wait()
......@@ -40,7 +40,6 @@ DEV=$EXPPATH"/data/dev.conllu"
DEVRAW=$EXPPATH"/data/dev.txt"
TEST=$EXPPATH"/data/test.conllu"
TESTRAW=$EXPPATH"/data/test.txt"
W2V=$EXPPATH"/data/pretrained.w2v"
if test ! -f $TRAIN;
then
......@@ -67,19 +66,13 @@ if [ "$MODE" = "txt" ]; then
fi
fi
if test -f $W2V;
then
>&2 echo "Using W2V :" $W2V
W2V="--pretrainedEmbeddings "$W2V
fi
if [ "$MODE" = "tsv" ]; then
macaon train --model $EXPPATH --trainTSV $TRAIN --devTSV $DEV $W2V "$@" || exit 1
macaon train --model $EXPPATH --trainTSV $TRAIN --devTSV $DEV "$@" || exit 1
exit 0
fi
if [ "$MODE" = "txt" ]; then
macaon train --model $EXPPATH --trainTSV $TRAIN --trainTXT $TRAINRAW --devTSV $DEV --devTXT $DEVRAW $W2V "$@" || exit 1
macaon train --model $EXPPATH --trainTSV $TRAIN --trainTXT $TRAINRAW --devTSV $DEV --devTXT $DEVRAW "$@" || exit 1
exit 0
fi
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment