diff --git a/scripts/lefff2w2v.py b/scripts/lefff2w2v.py index 53e0f93f853cfb8f5ba728517859f6c710d57f76..1aa5e787c2a6c527d09fd4b64b89039d5dd7306a 100755 --- a/scripts/lefff2w2v.py +++ b/scripts/lefff2w2v.py @@ -92,7 +92,9 @@ if __name__ == "__main__" : form = splited[0].lower() pos = lefffPOS2UD[splited[1]] # In lefff there might be spaces in forms. W2v format don't allow it. We replace space by dotted circle. - form.replace(" ", "◌") + form = form.replace(" ", "◌") + if " " in form : + print("HERE '%s'"%form, file=sys.stderr) if pos not in allPos : print("ERROR: Unknown pos '%s' (check allPos in the script)"%pos, file=sys.stderr) if form not in form2pos : @@ -122,7 +124,7 @@ if __name__ == "__main__" : continue form = splited[conllMCD["FORM"]].lower() pos = splited[conllMCD["UPOS"]].lower() - form.replace(" ", "◌") + form = form.replace(" ", "◌") if pos not in allPos : print("ERROR: Unknown pos '%s' (check allPos in the script)"%pos, file=sys.stderr) if form not in form2pos :