diff --git a/UD_any/data/pretrainEmbeddings.sh b/UD_any/data/pretrainEmbeddings.sh index b22d83db8e9d5558db315cb58f4e3d596eb887ef..2e1ada24ee0ff51fa343e4f8db5090f2d47f6601 100755 --- a/UD_any/data/pretrainEmbeddings.sh +++ b/UD_any/data/pretrainEmbeddings.sh @@ -13,17 +13,22 @@ if [ $2 == "LETTERS" ]; then MINCOUNT=10 fi - cp -r $GLOVE . GLOVE="GloVe/" CURDIR="$(pwd)" cd $GLOVE && make clean && make && cd $CURDIR \ && $HORIZONTAL $1 $2 > in.text \ -&& $GLOVE"build/vocab_count" -min-count $MINCOUNT < in.text > vocab.txt \ +&& $GLOVE"build/vocab_count" -min-count $MINCOUNT < in.text > vocab.txt + +[ -s vocab.txt ] \ && $GLOVE"build/cooccur" -symmetric 0 -window-size 10 -vocab-file vocab.txt -memory 8.0 -overflow-file tempoverflow < in.text > cooccurrences.bin \ && $GLOVE"build/shuffle" -memory 8.0 -seed 100 < cooccurrences.bin > cooccurrence.shuf.bin \ && $GLOVE"build/glove" -iter 50 -save_gradsq 0 -write-header 1 -input-file cooccurrence.shuf.bin -vocab-file vocab.txt -save-file out -gradsq-file gradsq -vector-size $3 -seed 100 -threads 1 -alpha 0.75 -x-max 100.0 -eta 0.05 -binary 0 -model 1 \ -&& mv out.txt $4 +&& cp out.txt $4 + +if [ ! -s $4 ] ; then + echo "0 $3" > $4 +fi rm in.text 2> /dev/null rm vocab.txt 2> /dev/null