From 4524d1a4b23979930615a8edec5c106fc93cd893 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 6 Sep 2019 16:30:11 +0200 Subject: [PATCH] Added script to get text from conll format --- scripts/conll2text.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100755 scripts/conll2text.py diff --git a/scripts/conll2text.py b/scripts/conll2text.py new file mode 100755 index 0000000..268981f --- /dev/null +++ b/scripts/conll2text.py @@ -0,0 +1,19 @@ +#! /usr/bin/python + +import sys + +def printUsageAndExit() : + print("Usage : %s file.conllu delimiterSymbol"%sys.argv[0]) + exit(1) + +if __name__ == "__main__" : + if len(sys.argv) != 3 : + printUsageAndExit() + + for line in open(sys.argv[1]) : + if len(line.strip()) < 2 : + continue + + if line.startswith("# text") : + print(line.strip()[9:], end=sys.argv[2]) + -- GitLab