Skip to content
Snippets Groups Projects
Commit 2f3c0537 authored by Franck Dary's avatar Franck Dary
Browse files

fixed script to generate embeddings from lexicon

parent 33789c0f
No related branches found
No related tags found
No related merge requests found
......@@ -92,7 +92,9 @@ if __name__ == "__main__" :
form = splited[0].lower()
pos = lefffPOS2UD[splited[1]]
# In lefff there might be spaces in forms. W2v format don't allow it. We replace space by dotted circle.
form.replace(" ", "")
form = form.replace(" ", "")
if " " in form :
print("HERE '%s'"%form, file=sys.stderr)
if pos not in allPos :
print("ERROR: Unknown pos '%s' (check allPos in the script)"%pos, file=sys.stderr)
if form not in form2pos :
......@@ -122,7 +124,7 @@ if __name__ == "__main__" :
continue
form = splited[conllMCD["FORM"]].lower()
pos = splited[conllMCD["UPOS"]].lower()
form.replace(" ", "")
form = form.replace(" ", "")
if pos not in allPos :
print("ERROR: Unknown pos '%s' (check allPos in the script)"%pos, file=sys.stderr)
if form not in form2pos :
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment