From 796a6a175587416e8c32dc719b5c4112e576d932 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Thu, 10 Oct 2019 14:01:06 +0200
Subject: [PATCH] Forced scripts output to utf8

---
 tools/conll2text.py              | 5 ++++-
 tools/conllu2fplm.py             | 5 ++++-
 tools/conllu2splits.py           | 5 ++++-
 tools/conlluShuffleAndMakeDev.py | 7 +++++--
 tools/fplm2fP.py                 | 3 ++-
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/tools/conll2text.py b/tools/conll2text.py
index 5334ee2..0bb7ec8 100755
--- a/tools/conll2text.py
+++ b/tools/conll2text.py
@@ -7,10 +7,13 @@ def printUsageAndExit() :
   exit(1)
 
 if __name__ == "__main__" :
+
+  sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
+
   if len(sys.argv) != 3 :
     printUsageAndExit()
 
-  for line in open(sys.argv[1]) :
+  for line in open(sys.argv[1], encoding='utf-8') :
     if len(line.strip()) < 2 :
       continue
 
diff --git a/tools/conllu2fplm.py b/tools/conllu2fplm.py
index 300475f..a645e7a 100755
--- a/tools/conllu2fplm.py
+++ b/tools/conllu2fplm.py
@@ -21,13 +21,16 @@ def readMCD(mcdFilename) :
   return mcd
 
 if __name__ == "__main__" :
+
+  sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
+
   if len(sys.argv) != 3 :
     printUsageAndExit()
 
   conllMCD = readMCD(sys.argv[2])
   conllMCDr = {v: k for k, v in conllMCD.items()} 
 
-  for line in open(sys.argv[1], "r") :
+  for line in open(sys.argv[1], "r", encoding="utf8") :
     if len(line.strip()) < 3 :
       continue
     if line.strip()[0] == '#' :
diff --git a/tools/conllu2splits.py b/tools/conllu2splits.py
index ce958cf..b7adba2 100755
--- a/tools/conllu2splits.py
+++ b/tools/conllu2splits.py
@@ -47,6 +47,9 @@ def computeRules(sentence) :
         rules[word[1]][rule] = 1
 
 def main() :
+
+  sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
+
   if len(sys.argv) != 3 :
     printUsageAndExit()
 
@@ -58,7 +61,7 @@ def main() :
 
   sentence = []
 
-  for line in open(sys.argv[1], "r") :
+  for line in open(sys.argv[1], "r", encoding="utf8") :
     if len(line.strip()) < 2 or line[0] == '#' :
       if len(sentence) > 0 :
         computeRules(sentence)
diff --git a/tools/conlluShuffleAndMakeDev.py b/tools/conlluShuffleAndMakeDev.py
index 4918a3e..43fc42b 100755
--- a/tools/conlluShuffleAndMakeDev.py
+++ b/tools/conlluShuffleAndMakeDev.py
@@ -8,6 +8,9 @@ def printUsageAndExit() :
   exit(1)
 
 if __name__ == "__main__" :
+
+  sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
+
   if len(sys.argv) != 3 and len(sys.argv) != 4 :
     printUsageAndExit()
 
@@ -16,7 +19,7 @@ if __name__ == "__main__" :
 
   sentences = []
 
-  for line in open(inputFile, "r") :
+  for line in open(inputFile, "r", encoding="utf8") :
     if len(line.strip()) < 3 :
       continue
     if line.strip().split('=')[0] == "# sent_id " :
@@ -33,7 +36,7 @@ if __name__ == "__main__" :
   if len(sys.argv) == 3 :
     exit(0)
 
-  outputRest = open(sys.argv[3], "w")
+  outputRest = open(sys.argv[3], "w", encoding="utf8")
   for sentence in sentences[int(len(sentences)*float(ratio))+1:] :
     for word in sentence :
       print(word, file=outputRest)
diff --git a/tools/fplm2fP.py b/tools/fplm2fP.py
index 7137419..5a2c12e 100755
--- a/tools/fplm2fP.py
+++ b/tools/fplm2fP.py
@@ -2,6 +2,8 @@
 
 import sys
 
+sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
+
 def getLineAsList(inputFile) :
   line = inputFile.readline()
 
@@ -13,7 +15,6 @@ def getLineAsList(inputFile) :
 
   return line
 
-#fplm = open(sys.argv[1], "r", encoding="ISO-8859-1")
 fplm = open(sys.argv[1], "r", encoding="utf8")
 
 line = []
-- 
GitLab