diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile
index 926906cd7530888c29ec89a9203eb50533433b1f..ba996647fd7ab8eee980cea24703a76039743d66 100644
--- a/UD_any/data/Makefile
+++ b/UD_any/data/Makefile
@@ -12,7 +12,7 @@ TEST_FILES=$(shell find $(CORPUS) -type f -name '*test*.conllu')
 THRESHOLD=10
 FPLM_FILENAME=fplm
 
-all: tokenizer.ts segmenter.ts texts all_no_test.conllu columns
+all: tokenizer.ts segmenter.ts texts all_no_test.conllu columns pretrain
 	rm -f col_*\.txt
 	rm -f all_no_test.conllu
 
@@ -49,6 +49,9 @@ columns: all_no_test.conllu $(MCD)
 texts:
 	./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
 
+pretrain: texts
+	./pretrainEmbeddings.py $(shell find $(CORPUS) -type f -name '*train*.txt') 64
+
 $(FPLM_FILENAME): all_no_test.conllu $(MCD)
 	$(SCRIPTS)/conllu2fplm.py $< $(MCD) > $@
 
diff --git a/UD_any/data/pretrainEmbeddings.py b/UD_any/data/pretrainEmbeddings.py
new file mode 100755
index 0000000000000000000000000000000000000000..d0a96b4e33d767c4627dea68d2f6ab4aceca441e
--- /dev/null
+++ b/UD_any/data/pretrainEmbeddings.py
@@ -0,0 +1,22 @@
+#! /usr/bin/env python3
+
+import sys
+import os
+import subprocess
+
+def printUsageAndExit() :
+  print("USAGE : %s file.conllu embeddingsSize"%sys.argv[0], file=sys.stderr)
+  exit(1)
+
+if __name__ == "__main__" :
+  if len(sys.argv) != 3 :
+    printUsageAndExit()
+
+  pathToFile = sys.argv[1]
+  embeddingsSize = int(sys.argv[2])
+  splited = os.path.splitext(pathToFile)
+  target = splited[0] + ".w2v"
+
+  p = subprocess.Popen("word2vec -cbow 0 -size %s -window 10 -negative 5 -hs 0 -sample 1e-1 -threads 2 -binary 0 -iter 15 -min-count 2 -train %s -output %s"%(embeddingsSize, pathToFile, target), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+  p.wait()
+