Skip to content
Snippets Groups Projects
Commit 6f89810b authored by Franck Dary's avatar Franck Dary
Browse files

updated scripts

parent ff37857e
Branches
No related tags found
No related merge requests found
#! /usr/bin/python3
import sys
def printUsageAndExit() :
print("USAGE : %s file.tsv"%sys.argv[0], file=sys.stderr)
exit(1)
if __name__ == "__main__" :
if len(sys.argv) != 2 :
printUsageAndExit()
curId = 1
for line in open(sys.argv[1], 'r') :
if len(line) < 3 or line[0] == '#' :
curId = 1
if len(line) >= 3 :
print(line, end="")
else :
print("")
continue
splited = line.strip().split('\t')
if curId == 1 :
splited[6] = "0"
splited[7] = "root"
else :
splited[6] = "1"
print('\t'.join(splited))
curId += 1
...@@ -224,7 +224,7 @@ def load_conllu(file): ...@@ -224,7 +224,7 @@ def load_conllu(file):
# Read next token/word # Read next token/word
columns = line.split("\t") columns = line.split("\t")
if len(columns) != 10: if len(columns) < 10:
raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(_encode(line))) raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(_encode(line)))
# Skip empty nodes # Skip empty nodes
...@@ -253,7 +253,7 @@ def load_conllu(file): ...@@ -253,7 +253,7 @@ def load_conllu(file):
for _ in range(start, end + 1): for _ in range(start, end + 1):
word_line = _decode(file.readline().rstrip("\r\n")) word_line = _decode(file.readline().rstrip("\r\n"))
word_columns = word_line.split("\t") word_columns = word_line.split("\t")
if len(word_columns) != 10: if len(word_columns) < 10:
raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(_encode(word_line))) raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(_encode(word_line)))
ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True)) ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True))
# Basic tokens/words # Basic tokens/words
......
...@@ -133,13 +133,14 @@ if __name__ == "__main__" : ...@@ -133,13 +133,14 @@ if __name__ == "__main__" :
for line in open(sys.argv[1], "r", encoding="utf8") : for line in open(sys.argv[1], "r", encoding="utf8") :
fileLineIndex += 1 fileLineIndex += 1
clean = line.strip() clean = line.strip()
if len(clean) == 0 :
if len(clean) < 3 :
if sentFirstLine == -1 :
exit(1)
checkSentence(sentFirstLine, sentence, conllMCD, conllMCDr) checkSentence(sentFirstLine, sentence, conllMCD, conllMCDr)
sentence = [] sentence = []
sentFirstLine = -1 sentFirstLine = -1
continue elif clean[0] != '#' :
if clean[0] == '#' :
continue
if sentFirstLine == -1 : if sentFirstLine == -1 :
sentFirstLine = fileLineIndex sentFirstLine = fileLineIndex
sentence.append(clean.split('\t')) sentence.append(clean.split('\t'))
......
#! /usr/bin/python3
import sys
def printUsageAndExit() :
print("USAGE : %s file.tsv column1 column2..."%sys.argv[0], file=sys.stderr)
exit(1)
if __name__ == "__main__" :
if len(sys.argv) < 2 :
printUsageAndExit()
for line in open(sys.argv[1], 'r') :
if len(line) < 3 or line[0] == '#' :
if len(line) >= 3 :
print(line, end="")
else :
print("")
continue
splited = line.strip().split('\t')
for i in sys.argv[2:] :
col = int(i)
if col in range(len(splited)) :
splited[col] = "_"
print('\t'.join(splited))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment