Skip to content
Snippets Groups Projects
Commit 796a6a17 authored by Franck Dary's avatar Franck Dary
Browse files

Forced scripts output to utf8

parent c728e343
No related branches found
No related tags found
No related merge requests found
...@@ -7,10 +7,13 @@ def printUsageAndExit() : ...@@ -7,10 +7,13 @@ def printUsageAndExit() :
exit(1) exit(1)
if __name__ == "__main__" : if __name__ == "__main__" :
sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
if len(sys.argv) != 3 : if len(sys.argv) != 3 :
printUsageAndExit() printUsageAndExit()
for line in open(sys.argv[1]) : for line in open(sys.argv[1], encoding='utf-8') :
if len(line.strip()) < 2 : if len(line.strip()) < 2 :
continue continue
......
...@@ -21,13 +21,16 @@ def readMCD(mcdFilename) : ...@@ -21,13 +21,16 @@ def readMCD(mcdFilename) :
return mcd return mcd
if __name__ == "__main__" : if __name__ == "__main__" :
sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
if len(sys.argv) != 3 : if len(sys.argv) != 3 :
printUsageAndExit() printUsageAndExit()
conllMCD = readMCD(sys.argv[2]) conllMCD = readMCD(sys.argv[2])
conllMCDr = {v: k for k, v in conllMCD.items()} conllMCDr = {v: k for k, v in conllMCD.items()}
for line in open(sys.argv[1], "r") : for line in open(sys.argv[1], "r", encoding="utf8") :
if len(line.strip()) < 3 : if len(line.strip()) < 3 :
continue continue
if line.strip()[0] == '#' : if line.strip()[0] == '#' :
......
...@@ -47,6 +47,9 @@ def computeRules(sentence) : ...@@ -47,6 +47,9 @@ def computeRules(sentence) :
rules[word[1]][rule] = 1 rules[word[1]][rule] = 1
def main() : def main() :
sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
if len(sys.argv) != 3 : if len(sys.argv) != 3 :
printUsageAndExit() printUsageAndExit()
...@@ -58,7 +61,7 @@ def main() : ...@@ -58,7 +61,7 @@ def main() :
sentence = [] sentence = []
for line in open(sys.argv[1], "r") : for line in open(sys.argv[1], "r", encoding="utf8") :
if len(line.strip()) < 2 or line[0] == '#' : if len(line.strip()) < 2 or line[0] == '#' :
if len(sentence) > 0 : if len(sentence) > 0 :
computeRules(sentence) computeRules(sentence)
......
...@@ -8,6 +8,9 @@ def printUsageAndExit() : ...@@ -8,6 +8,9 @@ def printUsageAndExit() :
exit(1) exit(1)
if __name__ == "__main__" : if __name__ == "__main__" :
sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
if len(sys.argv) != 3 and len(sys.argv) != 4 : if len(sys.argv) != 3 and len(sys.argv) != 4 :
printUsageAndExit() printUsageAndExit()
...@@ -16,7 +19,7 @@ if __name__ == "__main__" : ...@@ -16,7 +19,7 @@ if __name__ == "__main__" :
sentences = [] sentences = []
for line in open(inputFile, "r") : for line in open(inputFile, "r", encoding="utf8") :
if len(line.strip()) < 3 : if len(line.strip()) < 3 :
continue continue
if line.strip().split('=')[0] == "# sent_id " : if line.strip().split('=')[0] == "# sent_id " :
...@@ -33,7 +36,7 @@ if __name__ == "__main__" : ...@@ -33,7 +36,7 @@ if __name__ == "__main__" :
if len(sys.argv) == 3 : if len(sys.argv) == 3 :
exit(0) exit(0)
outputRest = open(sys.argv[3], "w") outputRest = open(sys.argv[3], "w", encoding="utf8")
for sentence in sentences[int(len(sentences)*float(ratio))+1:] : for sentence in sentences[int(len(sentences)*float(ratio))+1:] :
for word in sentence : for word in sentence :
print(word, file=outputRest) print(word, file=outputRest)
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
import sys import sys
sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)
def getLineAsList(inputFile) : def getLineAsList(inputFile) :
line = inputFile.readline() line = inputFile.readline()
...@@ -13,7 +15,6 @@ def getLineAsList(inputFile) : ...@@ -13,7 +15,6 @@ def getLineAsList(inputFile) :
return line return line
#fplm = open(sys.argv[1], "r", encoding="ISO-8859-1")
fplm = open(sys.argv[1], "r", encoding="utf8") fplm = open(sys.argv[1], "r", encoding="utf8")
line = [] line = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment