Skip to content
Snippets Groups Projects
Commit 5b6f9194 authored by Franck Dary's avatar Franck Dary
Browse files

Added script to transform conllu into tikz figure

parent 02686401
No related branches found
No related tags found
No related merge requests found
#! /usr/bin/env python3
import argparse
import sys
from readMCD import readMCD
################################################################################
class Node :
def __init__(self, wordId, name, gov, label, extra) :
self.wordId = wordId
self.name = name
self.gov = gov
self.label = label
self.extra = extra
def __str__(self) :
return "({} {} {} {})".format(self.wordId, self.name, self.gov, self.label)
################################################################################
################################################################################
def generateTikz(text, sentence, col2index, index2col, idCol, nodeCol, govCol, labelCol, extraCols) :
nodes = []
for word in sentence :
wordId = word[col2index[idCol]]
if len(wordId.split('-')) > 1 : # Ignoring multiwords
continue
if len(wordId.split('.')) > 1 : # Ignoring empty nodes
continue
name = word[col2index[nodeCol]]
gov = int(word[col2index[govCol]])
label = word[col2index[labelCol]]
extra = [word[col2index[col]] for col in extraCols]
nodes.append(Node(wordId, name, gov, label, extra))
print("""\\begin{figure}
\centering
\\begin{dependency}[edge style = {very thick}]""")
print("\n\\begin{deptext}[column sep=0.2em]")
print(" \& ".join([node.name for node in nodes])+"\\\\")
for i in range(len(extraCols)) :
print(" \& ".join(["\\tiny{\\textsc{%s}}"%node.extra[i] for node in nodes])+"\\\\")
print("\end{deptext}\n")
for node in nodes :
if node.gov != 0 :
print("\depedge{%d}{%s}{%s}"%(node.gov, node.wordId, node.label))
print("\end{dependency}")
print("""\caption{``%s''}
\label{}
\end{figure}"""%text)
for i in range(len(nodes)) :
node = nodes[i]
################################################################################
################################################################################
if __name__ == "__main__" :
parser = argparse.ArgumentParser()
parser.add_argument("input", type=str,
help="Input conllu file")
parser.add_argument("--id", default="ID",
help="Name of the column identifying nodes.")
parser.add_argument("--node", default="FORM",
help="Name of the column giving nodes their names.")
parser.add_argument("--gov", default="HEAD",
help="Name of the column containing nodes governor.")
parser.add_argument("--label", default="DEPREL",
help="Name of the column containing arcs labels.")
parser.add_argument("--extra", default=None,
help="Comma separated list of extra columns to show (ex. UPOS,FEATS).")
args = parser.parse_args()
args.extra = args.extra.split(',') if args.extra is not None else []
print("In Latex, add : \\usepackage{tikz-dependency}", file=sys.stderr, end="\n\n")
col2index, index2col = readMCD("ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC")
sentence = []
text = ""
for line in open(args.input, "r") :
line = line.strip()
if "# global.columns =" in line :
col2index, index2col = readMCD(line.split('=')[-1].strip())
continue
if "# text =" in line :
text = line.split('=')[-1].strip()
continue
if len(line) == 0 :
if len(text) == 0 :
text = " ".join([word[col2index[args.node]]] for word in sentence)
generateTikz(text, sentence, col2index, index2col, args.id, args.node, args.gov, args.label, args.extra)
sentence = []
continue
if line[0] == '#' :
continue
sentence.append(line.split('\t'))
if len(sentence) > 0 :
generateTikz(text, sentence, col2index, index2col, args.id, args.node, args.gov, args.label, args.extra)
################################################################################
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment