diff --git a/scripts/lefff2w2v.py b/scripts/lefff2w2v.py
index 6a1c46d11fd681a12e050a737b122af08f2483d1..53e0f93f853cfb8f5ba728517859f6c710d57f76 100755
--- a/scripts/lefff2w2v.py
+++ b/scripts/lefff2w2v.py
@@ -7,11 +7,15 @@
 # Example: ./lefff2w2v --conllu data/UD_French-GSD/*\.conllu
 # We can chose to output binary vector with the option --binary which is a threshold above which values will become 1.
 # We can ignore infrequent words in conllu by setting a threshold with --minfreq.
+# We can modulate the impact of the lefff with the parameter --lefffweight.
 
 import sys
 import argparse
 from readMCD import readMCD
 
+# List of UD POS tags : https://universaldependencies.org/u/pos/index.html
+allPos = ["adj", "adp", "adv", "aux", "cconj", "det", "intj", "noun", "num", "part", "pron", "propn", "punct", "sconj", "sym", "verb", "x"]
+
 # Convert lefff part of speech into UD UPOS.
 lefffPOS2UD = {
   "adj" : "adj",
@@ -78,8 +82,6 @@ if __name__ == "__main__" :
 
   # Dict with key=FORM and value= dict associationg pos with number of occ 
   form2pos = {}
-  # List of all pos (UD format) present in data
-  allPos = []
   # Associate each form with a counter, only for conllu files
   formCount = {}
 
@@ -92,9 +94,11 @@ if __name__ == "__main__" :
       # In lefff there might be spaces in forms. W2v format don't allow it. We replace space by dotted circle.
       form.replace(" ", "◌")
       if pos not in allPos :
-        allPos.append(pos)
+        print("ERROR: Unknown pos '%s' (check allPos in the script)"%pos, file=sys.stderr)
       if form not in form2pos :
         form2pos[form] = {}
+      if form not in formCount :
+        formCount[form] = 0
       if pos not in form2pos[form] :
         form2pos[form][pos] = args.lefffWeight
 
@@ -113,11 +117,14 @@ if __name__ == "__main__" :
           if len(line) == 0 or line[0] == "#" :
             continue
           splited = line.split("\t")
+          wordId = splited[conllMCD["ID"]].lower()
+          if "-" in wordId :
+            continue
           form = splited[conllMCD["FORM"]].lower()
           pos = splited[conllMCD["UPOS"]].lower()
           form.replace(" ", "◌")
           if pos not in allPos :
-            allPos.append(pos)
+            print("ERROR: Unknown pos '%s' (check allPos in the script)"%pos, file=sys.stderr)
           if form not in form2pos :
             form2pos[form] = {}
           if pos not in form2pos[form] :
@@ -129,6 +136,9 @@ if __name__ == "__main__" :
 
   outputLines = []
 
+  # To check if all pos are represented in our embeddings
+  usedPos = set()
+
   # Compute probability for each pos and form
   for form in form2pos :
     if args.minfreq is not None and formCount[form] < args.minfreq :
@@ -141,15 +151,18 @@ if __name__ == "__main__" :
       vec[allPos.index(pos)] = form2pos[form][pos] / totalOccs
     baseVec = vec.copy()
     for pos in form2pos[form] :
+      posIndex = allPos.index(pos)
       if args.binary is not None :
-        if vec[allPos.index(pos)] >= args.binary :
-          vec[allPos.index(pos)] = 1
+        if vec[posIndex] >= args.binary :
+          vec[posIndex] = 1
         else :
-          vec[allPos.index(pos)] = 0
+          vec[posIndex] = 0
+      if vec[posIndex] > 0.0 :
+        usedPos.add(posIndex)
       if args.binary is not None :
-        vec[allPos.index(pos)] = "%d"%vec[allPos.index(pos)]
+        vec[posIndex] = "%d"%vec[posIndex]
       else :
-        vec[allPos.index(pos)] = "%.2f"%vec[allPos.index(pos)]
+        vec[posIndex] = "%.2f"%vec[posIndex]
     if sum(map(float, vec)) == 0 :
       print("WARNING: word '%s' gets all 0. Original: '%s'"%(form, " ".join(map(str,baseVec))), file=sys.stderr)
     outputLines.append(form+" "+" ".join(vec))
@@ -159,3 +172,8 @@ if __name__ == "__main__" :
   outputLines.sort()
   print("\n".join(outputLines))
 
+  # Check unused pos
+  for posIndex in range(len(allPos)) :
+    if posIndex not in usedPos :
+      print("WARNING: unused POS '%s'"%allPos[posIndex], file=sys.stderr)
+