Skip to content
Snippets Groups Projects
Commit 3de41e7a authored by Franck Dary's avatar Franck Dary
Browse files

conllu2horizontal replace every number by 42 and every digit by 0

parent 42de4d6d
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,15 @@ ...@@ -3,6 +3,15 @@
import sys import sys
from readMCD import readMCD from readMCD import readMCD
def isNumber(s) :
hasDigit = False
for c in s :
if c.isdigit() :
hasDigit = True
if c.isalpha() :
return False
return hasDigit
def printUsageAndExit() : def printUsageAndExit() :
print("USAGE : %s file.conllu (columnName | LETTERS)"%sys.argv[0], file=sys.stderr) print("USAGE : %s file.conllu (columnName | LETTERS)"%sys.argv[0], file=sys.stderr)
sys.exit(1) sys.exit(1)
...@@ -36,7 +45,10 @@ if __name__ == "__main__" : ...@@ -36,7 +45,10 @@ if __name__ == "__main__" :
print("ERROR : column %s not found in line '%s'"%(index, line.strip())) print("ERROR : column %s not found in line '%s'"%(index, line.strip()))
exit(1) exit(1)
print(splited[index].replace(" ", ""), end=" ") value = splited[index].replace(" ", "")
if isNumber(splited[index].replace(" ", "").strip()) :
value = 42
print(value, end=" ")
else : else :
for line in open(sys.argv[1], "r") : for line in open(sys.argv[1], "r") :
if line.startswith("#") : if line.startswith("#") :
...@@ -45,6 +57,9 @@ if __name__ == "__main__" : ...@@ -45,6 +57,9 @@ if __name__ == "__main__" :
col2index, index2col = readMCD(splited[-1].strip()) col2index, index2col = readMCD(splited[-1].strip())
splited = line.split("text =") splited = line.split("text =")
if len(splited) > 1 : if len(splited) > 1 :
text = splited[-1].replace("\n", " ").replace(" ", "") text = list(splited[-1].replace("\n", " ").replace(" ", ""))
print(" ".join(list(text))) for elem in text :
if isNumber(str(elem)) :
elem = '0'
print(elem, end=" ")
print("")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment