diff --git a/scripts/cleanW2V.py b/scripts/cleanW2V.py
new file mode 100755
index 0000000000000000000000000000000000000000..9451e5cdf8c3b7e42d0526bc31ff378a7b661907
--- /dev/null
+++ b/scripts/cleanW2V.py
@@ -0,0 +1,44 @@
+#! /usr/bin/env python3
+
+import sys
+
+def printUsageAndExit() :
+  print("USAGE: %s w2vFilename wordSizeLimit"%sys.argv[0], file=sys.stderr)
+  exit(1)
+
+if __name__ == "__main__" :
+  if len(sys.argv) != 3 :
+    printUsageAndExit()
+
+  nbWords = None
+  embSize = None
+  lineNumber = 0
+  deleted = set()
+
+  for line in open(sys.argv[1], "r") :
+    lineNumber += 1
+    line = line.strip()
+    splited = line.split()
+    if nbWords is None :
+      nbWords = int(splited[0])
+      embSize = int(splited[1])
+      deleted.add(lineNumber)
+      continue
+    wordLen = int(len(splited[0]))
+    numValues = int(len(splited))-1
+    if numValues != embSize :
+      print("ERROR: in line %d expected %d values got %d instead"%(lineNumber, embSize, numValues), file=sys.stderr)
+      continue
+    if wordLen > int(sys.argv[2]) :
+      print("deleting line %d: word length = %d (%s...)"%(lineNumber, wordLen,splited[0][:10]), file=sys.stderr)
+      deleted.add(lineNumber)
+
+  newNb = lineNumber - 1 - len(deleted)
+  print(newNb, embSize)
+  lineNumber = 0
+  
+  for line in open(sys.argv[1], "r") :
+    lineNumber += 1
+    if lineNumber not in deleted :
+      print(line, end="")
+