diff --git a/src/conll2mcf.py b/src/conll2mcf.py index 9dd3170919c879a0deece7066ea1656c2a02c3e7..1b5d0c856874ac91ff438e067faabc0ec2041d0b 100644 --- a/src/conll2mcf.py +++ b/src/conll2mcf.py @@ -7,6 +7,15 @@ if len(sys.argv) < 3 : print('usage:', sys.argv[0], 'conllFile mcdFile') exit(1) +def simplifyLabel(label): + simpleLabel = [] + for i in range(len(label)): + if not label[i] == ':' : + simpleLabel.append(label[i]) + else : + break + return ''.join(simpleLabel) + conlluFilename = sys.argv[1] mcdFilename = sys.argv[2] @@ -42,7 +51,8 @@ for ligne in conlluFile: w.setFeat('X1', tokens[4]) w.setFeat('MORPHO', tokens[5]) w.setFeat('GOV', int(tokens[6]) - index) - w.setFeat('LABEL', tokens[7]) + label = simplifyLabel(tokens[7]) + w.setFeat('LABEL', label) w.setFeat('X2', tokens[8]) w.setFeat('X3', tokens[9]) w.setFeat('EOS', '0')