Skip to content
Snippets Groups Projects
Commit 7b65bf7f authored by Franck Dary's avatar Franck Dary
Browse files

Improved diverse scripts

parent b83580f0
No related branches found
No related tags found
No related merge requests found
#! /usr/bin/env python3
import sys
hadFirst = False
for filename in sys.argv[1:] :
prefix = filename.split('/')[-1].split('.')[0]
for line in open(filename, "r") :
line = line.strip()
splited = line.split()
if len(splited) == 2 :
if hadFirst :
continue
hadFirst = True
print(line)
else :
print(prefix+"_"+line)
#! /usr/bin/env python3
import sys
from readMCD import readMCD
if len(sys.argv) < 3 :
print("USAGE : %s FORMindex filename1 filename2..."%sys.argv[0])
baseMCD = sys.argv[1]
for filename in sys.argv[2:] :
prefix = filename.split('/')[-1].split('.')[0]
formIndex = int(sys.argv[1])
lines = []
for line in open(filename, "r") :
lines.append(line.strip())
with open(filename, "w") as out :
for line in lines :
if len(line) == 0 or line[0] == "#" :
print(line, file=out)
continue
splited = line.split('\t')
splited[formIndex] = prefix+"_"+splited[formIndex]
print("\t".join(splited), file=out)
......@@ -42,7 +42,7 @@ if __name__ == "__main__" :
splited[args.head] = int(splited[args.head])
sentence.append(splited)
eos = int(splited[args.eos])
eos = 0 if splited[args.eos] == "_" else int(splited[args.eos])
if eos == 1 :
sentenceID += 1
print("# sent_id = %d"%sentenceID)
......
#! /usr/bin/env python3
import sys
from readMCD import readMCD
col2index, index2col = readMCD("ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC")
print("# global.columns = %s"%(" ".join(col2index.keys())))
for line in open(sys.argv[1], "r") :
line = line.strip()
words = line.split()
sentence = [["_" for _ in col2index] for _ in words]
for i in range(len(sentence)) :
sentence[i][col2index["ID"]] = str(i+1)
sentence[i][col2index["FORM"]] = words[i]
sentence[i][col2index["HEAD"]] = "0" if i == 0 else "1"
sentence[i][col2index["DEPREL"]] = "root" if i == 0 else "_"
print("# text = %s"%line)
print("\n".join(["\t".join(word) for word in sentence]))
print("")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment