diff --git a/scripts/conllu2horizontal.py b/scripts/conllu2horizontal.py index b0997c0250bcd4bdce0e64a134c1144eba8379b4..a6ef9c491b8b43512944dbf0878eba3bb45b0245 100755 --- a/scripts/conllu2horizontal.py +++ b/scripts/conllu2horizontal.py @@ -3,6 +3,15 @@ import sys from readMCD import readMCD +def isNumber(s) : + hasDigit = False + for c in s : + if c.isdigit() : + hasDigit = True + if c.isalpha() : + return False + return hasDigit + def printUsageAndExit() : print("USAGE : %s file.conllu (columnName | LETTERS)"%sys.argv[0], file=sys.stderr) sys.exit(1) @@ -36,7 +45,10 @@ if __name__ == "__main__" : print("ERROR : column %s not found in line '%s'"%(index, line.strip())) exit(1) - print(splited[index].replace(" ", "◌"), end=" ") + value = splited[index].replace(" ", "◌") + if isNumber(splited[index].replace(" ", "").strip()) : + value = 42 + print(value, end=" ") else : for line in open(sys.argv[1], "r") : if line.startswith("#") : @@ -45,6 +57,9 @@ if __name__ == "__main__" : col2index, index2col = readMCD(splited[-1].strip()) splited = line.split("text =") if len(splited) > 1 : - text = splited[-1].replace("\n", " ").replace(" ", "◌") - print(" ".join(list(text))) - + text = list(splited[-1].replace("\n", " ").replace(" ", "◌")) + for elem in text : + if isNumber(str(elem)) : + elem = '0' + print(elem, end=" ") + print("")