From 5b6f91944812a6a1259b5bd7a8de68de84981c5c Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 28 May 2021 15:20:42 +0200 Subject: [PATCH] Added script to transform conllu into tikz figure --- scripts/conllu2tikz.py | 106 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100755 scripts/conllu2tikz.py diff --git a/scripts/conllu2tikz.py b/scripts/conllu2tikz.py new file mode 100755 index 0000000..40b57c7 --- /dev/null +++ b/scripts/conllu2tikz.py @@ -0,0 +1,106 @@ +#! /usr/bin/env python3 + +import argparse +import sys + +from readMCD import readMCD + +################################################################################ +class Node : + def __init__(self, wordId, name, gov, label, extra) : + self.wordId = wordId + self.name = name + self.gov = gov + self.label = label + self.extra = extra + + def __str__(self) : + return "({} {} {} {})".format(self.wordId, self.name, self.gov, self.label) +################################################################################ + +################################################################################ +def generateTikz(text, sentence, col2index, index2col, idCol, nodeCol, govCol, labelCol, extraCols) : + nodes = [] + for word in sentence : + wordId = word[col2index[idCol]] + if len(wordId.split('-')) > 1 : # Ignoring multiwords + continue + if len(wordId.split('.')) > 1 : # Ignoring empty nodes + continue + name = word[col2index[nodeCol]] + gov = int(word[col2index[govCol]]) + label = word[col2index[labelCol]] + extra = [word[col2index[col]] for col in extraCols] + nodes.append(Node(wordId, name, gov, label, extra)) + + print("""\\begin{figure} +\centering +\\begin{dependency}[edge style = {very thick}]""") + + print("\n\\begin{deptext}[column sep=0.2em]") + print(" \& ".join([node.name for node in nodes])+"\\\\") + for i in range(len(extraCols)) : + print(" \& ".join(["\\tiny{\\textsc{%s}}"%node.extra[i] for node in nodes])+"\\\\") + print("\end{deptext}\n") + + for node in nodes : + if node.gov != 0 : + print("\depedge{%d}{%s}{%s}"%(node.gov, node.wordId, node.label)) + + print("\end{dependency}") + + print("""\caption{``%s''} +\label{} +\end{figure}"""%text) + + for i in range(len(nodes)) : + node = nodes[i] +################################################################################ + +################################################################################ +if __name__ == "__main__" : + parser = argparse.ArgumentParser() + parser.add_argument("input", type=str, + help="Input conllu file") + parser.add_argument("--id", default="ID", + help="Name of the column identifying nodes.") + parser.add_argument("--node", default="FORM", + help="Name of the column giving nodes their names.") + parser.add_argument("--gov", default="HEAD", + help="Name of the column containing nodes governor.") + parser.add_argument("--label", default="DEPREL", + help="Name of the column containing arcs labels.") + parser.add_argument("--extra", default=None, + help="Comma separated list of extra columns to show (ex. UPOS,FEATS).") + + args = parser.parse_args() + args.extra = args.extra.split(',') if args.extra is not None else [] + + print("In Latex, add : \\usepackage{tikz-dependency}", file=sys.stderr, end="\n\n") + + col2index, index2col = readMCD("ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC") + + sentence = [] + text = "" + for line in open(args.input, "r") : + line = line.strip() + if "# global.columns =" in line : + col2index, index2col = readMCD(line.split('=')[-1].strip()) + continue + if "# text =" in line : + text = line.split('=')[-1].strip() + continue + if len(line) == 0 : + if len(text) == 0 : + text = " ".join([word[col2index[args.node]]] for word in sentence) + generateTikz(text, sentence, col2index, index2col, args.id, args.node, args.gov, args.label, args.extra) + sentence = [] + continue + if line[0] == '#' : + continue + sentence.append(line.split('\t')) + + if len(sentence) > 0 : + generateTikz(text, sentence, col2index, index2col, args.id, args.node, args.gov, args.label, args.extra) +################################################################################ + -- GitLab