Skip to content
Snippets Groups Projects
Commit 1e001f9b authored by Franck Dary's avatar Franck Dary
Browse files
parents 9d2a9871 93449389
No related branches found
No related tags found
No related merge requests found
......@@ -2,17 +2,27 @@
import sys
hadFirst = False
nbLines = 0
embSize = None
for filename in sys.argv[1:] :
prefix = filename.split('/')[-1].split('.')[0]
for line in open(filename, "r") :
line = line.strip()
splited = line.split()
if len(splited) == 2 :
if hadFirst :
continue
hadFirst = True
print(line)
if embSize is None :
embSize = int(splited[1])
elif embSize != int(splited[1]) :
print("ERROR : incompatibles embedings sizes %d and %d"%(embSize, int(splited[1])), file=sys.stderr)
exit(1)
else :
nbLines += 1
print(nbLines, embSize)
for filename in sys.argv[1:] :
prefix = filename.split('/')[-1].split('.')[0]
for line in open(filename, "r") :
line = line.strip()
splited = line.split()
if len(splited) > 2 :
print(prefix+"_"+line)
#! /usr/bin/env python3
import sys
from readMCD import readMCD
if len(sys.argv) < 4 :
print("USAGE : %s fromColumn toColumn file1.conllu file2.conllu..."%sys.argv[0], file=sys.stderr)
exit(1)
fromCol = sys.argv[1]
toCol = sys.argv[2]
for filename in sys.argv[3:] :
lines = []
for line in open(filename, "r") :
line = line.strip()
if "# global.columns =" in line :
line = line + " " + toCol
conllMCD, conllMCDr = readMCD(line.split('=')[-1].strip())
if len(line) == 0 or line[0] == '#' :
lines.append(line)
continue
splited = line.split('\t')
fromValue = splited[conllMCD[fromCol]]
splited.append(fromValue)
lines.append("\t".join(splited))
with open(filename, "w") as out :
print("\n".join(lines), file=out)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment