Skip to content
Snippets Groups Projects
Commit cb3187f5 authored by Franck Dary's avatar Franck Dary
Browse files

Added script mcf2conllu

parent 07738758
No related branches found
No related tags found
No related merge requests found
#! /usr/bin/env python3
import argparse
import sys
################################################################################
if __name__ == "__main__" :
parser = argparse.ArgumentParser()
parser.add_argument("input", type=str,
help="Input mcf file")
parser.add_argument("head", type=int,
help="Index of the column containing governor relative index.")
parser.add_argument("eos", type=int,
help="Index of the column containing end of sentence info.")
parser.add_argument("--form", type=int, default=None,
help="Index of the column containing FORM.")
parser.add_argument("--upos", type=int, default=None,
help="Index of the column containing UPOS.")
args = parser.parse_args()
mcd = []
sentence = []
sentenceID = 0
for line in open(args.input, "r") :
line = line.strip()
if len(line) == 0 :
continue
if line[0] == '#' :
continue
splited = line.split('\t')
if len(mcd) == 0 :
mcd = [str(k) for k in range(1,len(splited)+1)]
mcd[args.head] = "HEAD"
mcd[args.eos] = "ID"
if args.form is not None :
mcd[args.form] = "FORM"
if args.upos is not None :
mcd[args.upos] = "UPOS"
print("# global.columns = %s"%" ".join(mcd))
splited[args.head] = int(splited[args.head])
sentence.append(splited)
eos = int(splited[args.eos])
if eos == 1 :
sentenceID += 1
print("# sent_id = %d"%sentenceID)
for i in range(len(sentence)) :
sentence[i][args.eos] = i+1 # Recycling EOS column into ID column
if sentence[i][args.head] != 0 :
sentence[i][args.head] += i+1 # Transforming relative head into absolute
for word in sentence :
print('\t'.join(map(str,word)))
print("")
sentence = []
################################################################################
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment