From 796a6a175587416e8c32dc719b5c4112e576d932 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Thu, 10 Oct 2019 14:01:06 +0200 Subject: [PATCH] Forced scripts output to utf8 --- tools/conll2text.py | 5 ++++- tools/conllu2fplm.py | 5 ++++- tools/conllu2splits.py | 5 ++++- tools/conlluShuffleAndMakeDev.py | 7 +++++-- tools/fplm2fP.py | 3 ++- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/conll2text.py b/tools/conll2text.py index 5334ee2..0bb7ec8 100755 --- a/tools/conll2text.py +++ b/tools/conll2text.py @@ -7,10 +7,13 @@ def printUsageAndExit() : exit(1) if __name__ == "__main__" : + + sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + if len(sys.argv) != 3 : printUsageAndExit() - for line in open(sys.argv[1]) : + for line in open(sys.argv[1], encoding='utf-8') : if len(line.strip()) < 2 : continue diff --git a/tools/conllu2fplm.py b/tools/conllu2fplm.py index 300475f..a645e7a 100755 --- a/tools/conllu2fplm.py +++ b/tools/conllu2fplm.py @@ -21,13 +21,16 @@ def readMCD(mcdFilename) : return mcd if __name__ == "__main__" : + + sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + if len(sys.argv) != 3 : printUsageAndExit() conllMCD = readMCD(sys.argv[2]) conllMCDr = {v: k for k, v in conllMCD.items()} - for line in open(sys.argv[1], "r") : + for line in open(sys.argv[1], "r", encoding="utf8") : if len(line.strip()) < 3 : continue if line.strip()[0] == '#' : diff --git a/tools/conllu2splits.py b/tools/conllu2splits.py index ce958cf..b7adba2 100755 --- a/tools/conllu2splits.py +++ b/tools/conllu2splits.py @@ -47,6 +47,9 @@ def computeRules(sentence) : rules[word[1]][rule] = 1 def main() : + + sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + if len(sys.argv) != 3 : printUsageAndExit() @@ -58,7 +61,7 @@ def main() : sentence = [] - for line in open(sys.argv[1], "r") : + for line in open(sys.argv[1], "r", encoding="utf8") : if len(line.strip()) < 2 or line[0] == '#' : if len(sentence) > 0 : computeRules(sentence) diff --git a/tools/conlluShuffleAndMakeDev.py b/tools/conlluShuffleAndMakeDev.py index 4918a3e..43fc42b 100755 --- a/tools/conlluShuffleAndMakeDev.py +++ b/tools/conlluShuffleAndMakeDev.py @@ -8,6 +8,9 @@ def printUsageAndExit() : exit(1) if __name__ == "__main__" : + + sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + if len(sys.argv) != 3 and len(sys.argv) != 4 : printUsageAndExit() @@ -16,7 +19,7 @@ if __name__ == "__main__" : sentences = [] - for line in open(inputFile, "r") : + for line in open(inputFile, "r", encoding="utf8") : if len(line.strip()) < 3 : continue if line.strip().split('=')[0] == "# sent_id " : @@ -33,7 +36,7 @@ if __name__ == "__main__" : if len(sys.argv) == 3 : exit(0) - outputRest = open(sys.argv[3], "w") + outputRest = open(sys.argv[3], "w", encoding="utf8") for sentence in sentences[int(len(sentences)*float(ratio))+1:] : for word in sentence : print(word, file=outputRest) diff --git a/tools/fplm2fP.py b/tools/fplm2fP.py index 7137419..5a2c12e 100755 --- a/tools/fplm2fP.py +++ b/tools/fplm2fP.py @@ -2,6 +2,8 @@ import sys +sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + def getLineAsList(inputFile) : line = inputFile.readline() @@ -13,7 +15,6 @@ def getLineAsList(inputFile) : return line -#fplm = open(sys.argv[1], "r", encoding="ISO-8859-1") fplm = open(sys.argv[1], "r", encoding="utf8") line = [] -- GitLab