Skip to content
Snippets Groups Projects
Commit 74e65fd4 authored by Franck Dary's avatar Franck Dary
Browse files

conllu2latex is now able to produce feat function comparisons

parent 4e066375
No related branches found
No related tags found
No related merge requests found
......@@ -4,38 +4,16 @@ import argparse
import sys
from readMCD import readMCD
################################################################################
if __name__ == "__main__" :
arrowConf = "-{.latex[scale=0.2]}, line width=0.70mm, opacity=0.2"
parser = argparse.ArgumentParser()
parser.add_argument("input", type=str,
help="Input conllu file")
parser.add_argument("id", type=str,
help="sent_id of the target sentence in the conllu file.")
parser.add_argument("--tapes", default="ID,FORM,UPOS,FEATS,LEMMA,HEAD,DEPREL,EOS",
help="Comma separated list of column names that will be the rows of the table. ID should be the first. FORM should be second.")
parser.add_argument("--reduce", "-r", default=False, action="store_true",
help="Only keep values after '=' in cases like a=b.")
parser.add_argument("--incr", default=False, action="store_true",
help="Draw incremental processing paths.")
parser.add_argument("--seq", default=False, action="store_true",
help="Draw sequential processing paths.")
args = parser.parse_args()
args.paths = args.incr or args.seq
baseMCD = "ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC"
col2index, index2col = readMCD(baseMCD)
columns = args.tapes.split(',')
sentence = []
################################################################################
def readInputFile(filename, mcd, tapes, sentId) :
text = ""
sentence = []
col2index, index2col = readMCD(mcd.replace(",", " "))
columns = tapes.split(',')
reading = False
for line in open(args.input, "r") :
for line in open(filename, "r") :
line = line.strip()
if len(line) == 0 :
if reading :
......@@ -48,7 +26,7 @@ if __name__ == "__main__" :
text = line.split('=')[-1].strip()
if "# sent_id =" in line :
curSent = line.split('=')[-1].strip()
if curSent == args.id :
if curSent == sentId :
reading = True
if line[0] == '#' :
continue
......@@ -59,6 +37,12 @@ if __name__ == "__main__" :
splited = line.split('\t')
sentence.append([splited[col2index[col]] for col in columns if col != "EOS"])
return sentence, text, columns
################################################################################
################################################################################
def getLayout(sentence, text) :
ranges = [[-1,-1] for _ in sentence]
curIndex = 0
......@@ -106,6 +90,13 @@ if __name__ == "__main__" :
else :
ranges[i+1][0] = ranges[i][1]+1
return sentence, ranges
################################################################################
################################################################################
def produceTabular(sentence, ranges, text, columns, nodes, reduce, breakSize, hsep=True) :
maxNbLetters = 45
parts = [[]]
......@@ -118,23 +109,11 @@ if __name__ == "__main__" :
partSizes = [-ranges[parts[partId][0]][0]+ranges[parts[partId][-1]][1]+1 for partId in range(len(parts))]
if args.paths :
print(r"""\makeatletter
\@ifundefined{tabnode}{%
\newcommand\tabnode[1]{\addtocounter{nodecount}{1} \tikz \node[minimum height=0.5cm] (\arabic{nodecount}) {#1};}%
\newcounter{nodecount}%
}{}
\makeatother
\setcounter{nodecount}{0}""")
print(r"\tikzstyle{every picture}+=[remember picture,baseline]")
print(r"\tikzstyle{every node}+=[inner sep=0pt,anchor=base]")
print("\\begin{figure}")
print("\\tabcolsep=0.40mm")
colsep = "|" if hsep else ""
for partId in range(len(parts)) :
if partId != 0 :
print("\\vspace{7pt}\n")
print("\\begin{tabular}{|l|%s|}"%("|".join(["c"]*partSizes[partId])))
print("\\begin{tabular}{|l|%s|}"%(colsep.join(["c"]*partSizes[partId])))
print("\cline{1-%d}\n"%(partSizes[partId]+1))
for i in range(len(columns))[::-1] :
print("\\texttt{\\textbf{\\footnotesize{%s}}}"%columns[i].lower(), end=" &\n")
......@@ -147,55 +126,253 @@ if __name__ == "__main__" :
values = value.split('|')
for k in range(len(values)) :
values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k].split("=")[-1] if args.reduce else values[k])
values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k].split("=")[-1] if reduce else values[k])
if columns[i] not in ["FORM","LEMMA"] :
values[k] = "\\texttt{%s}"%(values[k].lower())
else :
values[k] = "\\texttt{%s}"%(values[k])
cellContent = "\\\\".join(values)
if args.paths :
print("\multicolumn{%d}{c|}{\makecell[cc]{\\tabnode{%s}}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "")
tcolsep = colsep if j != parts[partId][-1] else "|"
if nodes :
print("\multicolumn{%d}{c%s}{\makecell[cc]{\\tabnode{%s}}}"%(ranges[j][1]-ranges[j][0]+1, tcolsep, cellContent), end=" &\n" if j != parts[partId][-1] else "")
else :
print("\multicolumn{%d}{c|}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "")
if args.paths and i != 0 :
print("\\\\%s\n"%("[-0.1cm]" if i == 1 else "[%scm]"%("0.1" if args.seq else "0.30")))
print("\multicolumn{%d}{c%s}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, tcolsep, cellContent), end=" &\n" if j != parts[partId][-1] else "")
if nodes and i != 0 :
print("\\\\%s\n"%("[-0.1cm]" if i == 1 else "[%scm]"%(breakSize)))
else :
print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1))
print("\\texttt{\\textbf{\\footnotesize{input}}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{\\footnotesize{%s}}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1))
print("\end{tabular}")
print("\caption{``%s''}"%text)
print("\label{fig:a}")
print("\end{tabular}", end="")
################################################################################
################################################################################
def drawArrows(firstIndex, nbLines, nbCols, isSeq) :
arrowConf = "-{.latex[scale=0.2]}, line width=0.70mm, opacity=0.2"
seq = "color=blue"
incr = "color=blue"
print(r"\begin{tikzpicture}[overlay]")
for line in range(nbLines-1) :
for col in range(nbCols) :
curNode = firstIndex-1+line*nbCols+col
firstOfNextLine = firstIndex-1+(line+1)*nbCols
firstOfLine = curNode-col
curOfNextLine = firstOfNextLine+col
bottomNode = nbCols*(nbLines-2) + col+1
if isSeq :
if col in range(nbCols-1) :
print("\draw [%s, %s] (%d) -- (%d);"%(seq, arrowConf, curNode+1, curNode+2))
elif curNode+2-firstIndex in range(nbLines*(nbCols-1)) and line in range(nbLines-2) :
print("\draw[%s, %s] (%d) -- (%d.south) -- (%d.south);"%(seq, arrowConf, curOfNextLine+1, curNode+1, firstOfLine+1))
else :
if line in range(nbLines-2) :
print("\draw [%s, %s] (%d) -- (%d);"%(incr, arrowConf, curOfNextLine+1, curNode+1))
if line == 0 and col != nbCols-1 :
print("\draw[%s, %s] (%d) -- ($(%d.east)!0.5!(%d.west)$) -- ($(%d.east)!0.5!(%d.west)-(%d)+(%d)+(0,0.5)$) -- (%d.west);"%(seq, arrowConf, curNode+1, curNode+1, curNode+2, curNode+1, curNode+2, curNode+1, bottomNode, bottomNode+1))
print(r"\end{tikzpicture}")
################################################################################
################################################################################
def getNodes(firstNodeIndex, nbLines, nbCols) :
centerNode = firstNodeIndex + (nbLines//2) * nbCols + nbCols//2
centerLine = centerNode - nbCols//2
centerLineEnd = centerLine + nbCols-1
bottomLine = firstNodeIndex + (nbLines-1)*nbCols
bottomLineEnd = firstNodeIndex + nbLines*nbCols - 1
topCenter = firstNodeIndex + nbCols//2
return centerNode, centerLine, centerLineEnd, bottomLine, bottomLineEnd, topCenter
################################################################################
################################################################################
def drawCenterRect(centerNode) :
lineConf = "color=blue, dashed, opacity=0.4, line width=0.8mm"
center1 = "($(%d.north east)!0.5!(%d.north west)$)"%(centerNode-1, centerNode)
center2 = "($(%d.north east)!0.5!(%d.north west)$)"%(centerNode, centerNode+1)
center3 = "($%s-(%d.north)+(%d.south)$)"%(center2, centerNode, centerNode)
center4 = "($%s-(%d.north)+(%d.south)$)"%(center1, centerNode, centerNode)
print("\draw[%s] %s -- %s -- %s -- %s -- cycle;"%(lineConf, center1, center2, center3, center4))
################################################################################
################################################################################
def getRectConf() :
return "color=blue, fill, opacity=0.2"
################################################################################
################################################################################
def drawTextAbove(node, txt) :
print(r"\node[align=center] at ($(%d.north)+(0,0.5)$) {\Large{%s}};"%(node,txt))
################################################################################
################################################################################
def drawRectanglePalo(firstNodeIndex, nbLines, nbCols) :
centerNode, centerLine, centerLineEnd, bottomLine, bottomLineEnd, _ = getNodes(firstNodeIndex, nbLines, nbCols)
pt1 = "(%d.north west)"%centerLine
pt2 = "($(%d.north east)!0.5!(%d.north west)$)"%(centerNode-1, centerNode)
pt3 = "($%s-(%d.north)+(%d.south)$)"%(pt2, centerNode, centerNode)
pt4 = "($%s-(%d.north)+(%d.south)$)"%("($(%d.north east)!0.5!(%d.north west)$)"%(centerNode, centerNode+1), centerNode, centerNode)
pt5 = "($%s-(%d)+(%d)$)"%(pt4, centerLineEnd, bottomLineEnd)
pt6 = "($%s-(%d)+(%d)-(%d.north)+(%d.south)$)"%(pt1, centerLine, bottomLine, centerLine, centerLine)
print(r"\begin{tikzpicture}[overlay]")
print("\draw[%s] %s -- %s -- %s -- %s -- %s -- %s -- cycle;"%(getRectConf(), pt1, pt2, pt3, pt4, pt5, pt6))
drawCenterRect(centerNode)
drawTextAbove(firstNodeIndex-1+nbCols//2, r"Passé-Bas (\palo)")
print(r"\end{tikzpicture}")
################################################################################
################################################################################
def drawRectanglePahi(firstNodeIndex, nbLines, nbCols) :
centerNode, centerLine, centerLineEnd, bottomLine, bottomLineEnd, topCenter = getNodes(firstNodeIndex, nbLines, nbCols)
pt1 = "($(%d.north west)-(%d.north)+(%d.north)$)"%(centerLine, centerLine, firstNodeIndex)
pt2 = "($%s-(%d.west)+($(%d.east)!0.5!(%d.west)$)$)"%(pt1, centerLine, centerNode-1, centerNode)
pt3 = "($%s-(%d.north)+(%d.south)$)"%(pt2, firstNodeIndex, centerLine)
pt4 = "($%s-(%d.north)+(%d.south)$)"%("($(%d.north east)!0.5!(%d.north west)$)"%(centerNode, centerNode+1), centerNode, centerNode)
pt5 = "($%s-(%d)+(%d)$)"%(pt4, centerLineEnd, bottomLineEnd)
pt6 = "($(%d.north west)-(%d)+(%d)-(%d.north)+(%d.south)$)"%(centerLine, centerLine, bottomLine, centerLine, centerLine)
print(r"\begin{tikzpicture}[overlay]")
print("\draw[%s] %s -- %s -- %s -- %s -- %s -- %s -- cycle;"%(getRectConf(), pt1, pt2, pt3, pt4, pt5, pt6))
drawCenterRect(centerNode)
drawTextAbove(firstNodeIndex-1+nbCols//2, r"Passé-Haut (\pahi)")
print(r"\end{tikzpicture}")
################################################################################
################################################################################
def drawRectangleFulo(firstNodeIndex, nbLines, nbCols) :
centerNode, centerLine, centerLineEnd, bottomLine, bottomLineEnd, _ = getNodes(firstNodeIndex, nbLines, nbCols)
pt1 = "(%d.north west)"%centerLine
pt2 = "($(%d.north east)!0.5!(%d.north west)$)"%(centerNode-1, centerNode)
pt3 = "($%s-(%d.north)+(%d.south)$)"%(pt2, centerNode, centerNode)
pt4 = "(%d.south east)"%(centerLineEnd)
pt5 = "($%s-(%d)+(%d)$)"%(pt4, centerLineEnd, bottomLineEnd)
pt6 = "($%s-(%d)+(%d)-(%d.north)+(%d.south)$)"%(pt1, centerLine, bottomLine, centerLine, centerLine)
print(r"\begin{tikzpicture}[overlay]")
print("\draw[%s] %s -- %s -- %s -- %s -- %s -- %s -- cycle;"%(getRectConf(), pt1, pt2, pt3, pt4, pt5, pt6))
drawCenterRect(centerNode)
drawTextAbove(firstNodeIndex-1+nbCols//2, r"Futur-Bas (\fulo)")
print(r"\end{tikzpicture}")
################################################################################
################################################################################
def drawSimpleTapes(sentence, ranges, text, columns, hsep) :
print(r"\begin{figure}")
print("\\tabcolsep=0.40mm")
produceTabular(sentence, ranges, text, columns, False, False, "0.1cm", hsep=hsep)
print("")
print(r"\caption{Caption.}")
print(r"\label{fig:a}")
print(r"\end{figure}")
################################################################################
################################################################################
def drawPaths(sentence, ranges, text, columns, hsep, isSeq) :
print(r"""\makeatletter
\@ifundefined{tabnode}{%
\newcommand\tabnode[1]{\addtocounter{nodecount}{1} \tikz \node[minimum height=0.5cm] (\arabic{nodecount}) {#1};}%
\newcounter{nodecount}%
}{}
\makeatother
\setcounter{nodecount}{0}""")
print(r"\tikzstyle{every picture}+=[remember picture,baseline]")
print(r"\tikzstyle{every node}+=[inner sep=0pt,anchor=base]")
print(r"\begin{figure}")
print("\\tabcolsep=0.40mm")
produceTabular(sentence, ranges, text, columns, True, True, "0.1" if isSeq else "0.3", hsep=hsep)
print("")
drawArrows(1, len(sentence[0]), len(sentence), isSeq)
print(r"\caption{Caption.}")
print(r"\label{fig:a}")
print(r"\end{figure}")
################################################################################
################################################################################
def drawFeatures(sentence, ranges, text, columns) :
print(r"""\makeatletter
\@ifundefined{tabnode}{%
\newcommand\tabnode[1]{\addtocounter{nodecount}{1} \tikz \node[minimum height=0.5cm] (\arabic{nodecount}) {#1};}%
\newcounter{nodecount}%
}{}
\makeatother
\setcounter{nodecount}{0}""")
print(r"\tikzstyle{every picture}+=[remember picture,baseline]")
print(r"\tikzstyle{every node}+=[inner sep=0pt,anchor=base]")
print(r"\begin{figure}")
print("\\tabcolsep=0.10mm")
print(r"\resizebox{\textwidth}{!}{")
produceTabular(sentence, ranges, text, columns, True, True, "0.1", hsep="")
print(r"\quad", end="")
produceTabular(sentence, ranges, text, columns, True, True, "0.1", hsep="")
print(r"\quad", end="")
produceTabular(sentence, ranges, text, columns, True, True, "0.1", hsep="")
print("")
nbLines = len(sentence[0])
nbCols = len(sentence)
drawRectanglePalo(1, nbLines, nbCols)
drawRectangleFulo(nbLines*nbCols+1, nbLines, nbCols)
drawRectanglePahi(2*nbLines*nbCols+1, nbLines, nbCols)
print("}")
print(r"\caption{Caption.}")
print(r"\label{fig:a}")
print(r"\end{figure}")
################################################################################
################################################################################
if __name__ == "__main__" :
parser = argparse.ArgumentParser()
parser.add_argument("input", type=str,
help="Input conllu file")
parser.add_argument("id", type=str,
help="sent_id of the target sentence in the conllu file.")
parser.add_argument("--tapes", default="ID,FORM,UPOS,FEATS,LEMMA,HEAD,DEPREL,EOS",
help="Comma separated list of column names that will be the rows of the table. ID should be the first. FORM should be second.")
parser.add_argument("--mcd", default="ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL,DEPS,MISC",
help="Comma separated list of column names of the input file.")
parser.add_argument("--incr", default=False, action="store_true",
help="Draw incremental processing paths.")
parser.add_argument("--seq", default=False, action="store_true",
help="Draw sequential processing paths.")
parser.add_argument("--nohsep", default=False, action="store_true",
help="Don't draw horizontal separators for columns.")
parser.add_argument("--features", default=False, action="store_true",
help="Compare 3 features modes.")
args = parser.parse_args()
args.paths = args.incr or args.seq
if args.incr + args.seq + args.features > 1 :
print("--incr --seq and --features are mutually exclusives", file=sys.stderr)
exit(1)
sentence, text, columns = readInputFile(args.input, args.mcd, args.tapes, args.id)
sentence, ranges = getLayout(sentence, text)
if args.paths :
seq = "color=blue"
incr = "color=blue"
print(r"\begin{tikzpicture}[overlay]")
if args.seq :
for line in range(len(sentence[0])-1) :
for col in range(len(sentence)) :
curNode = line*len(sentence)+col
firstOfNextLine = (line+1)*len(sentence)
firstOfLine = (line)*len(sentence)
curOfNextLine = firstOfNextLine+col
if col in range(len(sentence)-1) :
print("\draw [%s, %s] (%d) -- (%d);"%(seq, arrowConf, curNode+1, curNode+2))
elif curNode+2 in range(len(sentence[0]*(len(sentence)-1))) and line in range(len(sentence[0])-2) :
print("\draw[%s, %s] (%d) -- (%d.south) -- (%d.south);"%(seq, arrowConf, curOfNextLine+1, curNode+1, firstOfLine+1))
elif args.incr :
for line in range(len(sentence[0])-1) :
for col in range(len(sentence)) :
curNode = line*len(sentence)+col
firstOfNextLine = (line+1)*len(sentence)
firstOfLine = (line)*len(sentence)
curOfNextLine = firstOfNextLine+col
bottomNode = (len(sentence[0])-2)*len(sentence) + col+1
if line in range(len(sentence[0])-2) :
print("\draw [%s, %s] (%d) -- (%d);"%(incr, arrowConf, curOfNextLine+1, curNode+1))
if line == 0 and col != len(sentence)-1 :
print("\draw[%s, %s] (%d) -- ($(%d.east)!0.5!(%d.west)$) -- ($(%d.east)!0.5!(%d.west)-(%d)+(%d)+(0,0.5)$) -- (%d.west);"%(seq, arrowConf, curNode+1, curNode+1, curNode+2, curNode+1, curNode+2, curNode+1, bottomNode, bottomNode+1))
print(r"\end{tikzpicture}")
print("\end{figure}")
drawPaths(sentence, ranges, text, columns, not args.nohsep, args.seq)
elif args.features :
drawFeatures(sentence, ranges, text, columns)
else :
drawSimpleTapes(sentence, ranges, text, columns, not args.nohsep)
################################################################################
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment