From cb1e3c1f67f807d8dee01a20b6d5a9f81ccff52f Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Fri, 16 Oct 2020 11:00:34 +0200 Subject: [PATCH] =?UTF-8?q?modification=20de=20conll2mcf=20:=20les=20label?= =?UTF-8?q?s=20complexes=20sont=20transform=C3=A9s=20en=20labels=20simples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/conll2mcf.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/conll2mcf.py b/src/conll2mcf.py index 9dd3170..1b5d0c8 100644 --- a/src/conll2mcf.py +++ b/src/conll2mcf.py @@ -7,6 +7,15 @@ if len(sys.argv) < 3 : print('usage:', sys.argv[0], 'conllFile mcdFile') exit(1) +def simplifyLabel(label): + simpleLabel = [] + for i in range(len(label)): + if not label[i] == ':' : + simpleLabel.append(label[i]) + else : + break + return ''.join(simpleLabel) + conlluFilename = sys.argv[1] mcdFilename = sys.argv[2] @@ -42,7 +51,8 @@ for ligne in conlluFile: w.setFeat('X1', tokens[4]) w.setFeat('MORPHO', tokens[5]) w.setFeat('GOV', int(tokens[6]) - index) - w.setFeat('LABEL', tokens[7]) + label = simplifyLabel(tokens[7]) + w.setFeat('LABEL', label) w.setFeat('X2', tokens[8]) w.setFeat('X3', tokens[9]) w.setFeat('EOS', '0') -- GitLab