diff --git a/tools/conll2text.py b/tools/conll2text.py index 5334ee20211daf8e04ca907c589ae9378f769b1d..0bb7ec8bf84d2cb30de70fc8e9eba439a3b50431 100755 --- a/tools/conll2text.py +++ b/tools/conll2text.py @@ -7,10 +7,13 @@ def printUsageAndExit() : exit(1) if __name__ == "__main__" : + + sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + if len(sys.argv) != 3 : printUsageAndExit() - for line in open(sys.argv[1]) : + for line in open(sys.argv[1], encoding='utf-8') : if len(line.strip()) < 2 : continue diff --git a/tools/conllu2fplm.py b/tools/conllu2fplm.py index 300475ff4d105499cc132a96579469ac5f795775..a645e7af42e90e93551b4dfd756da7e3ed5758f1 100755 --- a/tools/conllu2fplm.py +++ b/tools/conllu2fplm.py @@ -21,13 +21,16 @@ def readMCD(mcdFilename) : return mcd if __name__ == "__main__" : + + sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + if len(sys.argv) != 3 : printUsageAndExit() conllMCD = readMCD(sys.argv[2]) conllMCDr = {v: k for k, v in conllMCD.items()} - for line in open(sys.argv[1], "r") : + for line in open(sys.argv[1], "r", encoding="utf8") : if len(line.strip()) < 3 : continue if line.strip()[0] == '#' : diff --git a/tools/conllu2splits.py b/tools/conllu2splits.py index ce958cff38d664c50a71365911dd17c5373bcb9a..b7adba2a5100a9033ee4faf130a37c77ba124c02 100755 --- a/tools/conllu2splits.py +++ b/tools/conllu2splits.py @@ -47,6 +47,9 @@ def computeRules(sentence) : rules[word[1]][rule] = 1 def main() : + + sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + if len(sys.argv) != 3 : printUsageAndExit() @@ -58,7 +61,7 @@ def main() : sentence = [] - for line in open(sys.argv[1], "r") : + for line in open(sys.argv[1], "r", encoding="utf8") : if len(line.strip()) < 2 or line[0] == '#' : if len(sentence) > 0 : computeRules(sentence) diff --git a/tools/conlluShuffleAndMakeDev.py b/tools/conlluShuffleAndMakeDev.py index 4918a3ea2e349c3e0ba7334e91cd4409487d1733..43fc42b019ac3034bd3dca06ec134e2bafcb9281 100755 --- a/tools/conlluShuffleAndMakeDev.py +++ b/tools/conlluShuffleAndMakeDev.py @@ -8,6 +8,9 @@ def printUsageAndExit() : exit(1) if __name__ == "__main__" : + + sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + if len(sys.argv) != 3 and len(sys.argv) != 4 : printUsageAndExit() @@ -16,7 +19,7 @@ if __name__ == "__main__" : sentences = [] - for line in open(inputFile, "r") : + for line in open(inputFile, "r", encoding="utf8") : if len(line.strip()) < 3 : continue if line.strip().split('=')[0] == "# sent_id " : @@ -33,7 +36,7 @@ if __name__ == "__main__" : if len(sys.argv) == 3 : exit(0) - outputRest = open(sys.argv[3], "w") + outputRest = open(sys.argv[3], "w", encoding="utf8") for sentence in sentences[int(len(sentences)*float(ratio))+1:] : for word in sentence : print(word, file=outputRest) diff --git a/tools/fplm2fP.py b/tools/fplm2fP.py index 713741946c8b39c7df948d596402939890864f25..5a2c12e0a8e27c65fbdf49017f958bc1be86556e 100755 --- a/tools/fplm2fP.py +++ b/tools/fplm2fP.py @@ -2,6 +2,8 @@ import sys +sys.stdout = open(1, 'w', encoding='utf-8', closefd=False) + def getLineAsList(inputFile) : line = inputFile.readline() @@ -13,7 +15,6 @@ def getLineAsList(inputFile) : return line -#fplm = open(sys.argv[1], "r", encoding="ISO-8859-1") fplm = open(sys.argv[1], "r", encoding="utf8") line = []