From 2f3c05378e414676783e8c10ca24e1c704b5bf68 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Fri, 20 May 2022 14:18:58 +0200
Subject: [PATCH] fixed script to generate embeddings from lexicon

---
 scripts/lefff2w2v.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/lefff2w2v.py b/scripts/lefff2w2v.py
index 53e0f93..1aa5e78 100755
--- a/scripts/lefff2w2v.py
+++ b/scripts/lefff2w2v.py
@@ -92,7 +92,9 @@ if __name__ == "__main__" :
       form = splited[0].lower()
       pos = lefffPOS2UD[splited[1]]
       # In lefff there might be spaces in forms. W2v format don't allow it. We replace space by dotted circle.
-      form.replace(" ", "◌")
+      form = form.replace(" ", "◌")
+      if " " in form :
+        print("HERE '%s'"%form, file=sys.stderr)
       if pos not in allPos :
         print("ERROR: Unknown pos '%s' (check allPos in the script)"%pos, file=sys.stderr)
       if form not in form2pos :
@@ -122,7 +124,7 @@ if __name__ == "__main__" :
             continue
           form = splited[conllMCD["FORM"]].lower()
           pos = splited[conllMCD["UPOS"]].lower()
-          form.replace(" ", "◌")
+          form = form.replace(" ", "◌")
           if pos not in allPos :
             print("ERROR: Unknown pos '%s' (check allPos in the script)"%pos, file=sys.stderr)
           if form not in form2pos :
-- 
GitLab