diff --git a/scripts/filterEmbeddings.py b/scripts/filterEmbeddings.py
new file mode 100755
index 0000000000000000000000000000000000000000..83c138abf3070d2b71151e72d94ca81747fe8f61
--- /dev/null
+++ b/scripts/filterEmbeddings.py
@@ -0,0 +1,44 @@
+#! /usr/bin/env python3
+
+import sys
+
+################################################################################
+def printUsageAndExit() :
+  print("USAGE : %s embeddings.w2v vocabFile1.conllu vocabFile2.conllu..."
+    %sys.argv[0], file=sys.stderr)
+  exit(1)
+################################################################################
+
+################################################################################
+if __name__ == "__main__" :
+  if len(sys.argv) < 3 :
+    printUsageAndExit()
+
+  vocab = {}
+
+  for filename in sys.argv[2:] :
+    formIndex = None
+    for line in open(filename, "r") :
+      line = line.strip()
+      if len(line) == 0 :
+        continue
+      if "# global.columns =" in line :
+        formIndex = line.split('=')[-1].strip().split(' ').index("FORM")
+      if line[0] == '#' :
+        continue
+      word = line.split('\t')[formIndex]
+      vocab[word] = True
+    
+  print("Vocabulary size = %d words"%len(vocab), file=sys.stderr)
+  
+  for line in open(sys.argv[1]) :
+    line = line.strip()
+    splited = line.split(' ')
+    # Ignore optional w2v header
+    if len(splited) == 2 :
+      continue
+    word = splited[0]
+    if word in vocab :
+      print(line)
+################################################################################
+