diff --git a/scripts/conllu2latex.py b/scripts/conllu2latex.py index d0b60bc6a5868ecbdaea2a71f92b78d73ec55fc4..8d2c410bdb2ffb25eb7012ebe243d38a025bc4fe 100755 --- a/scripts/conllu2latex.py +++ b/scripts/conllu2latex.py @@ -4,38 +4,16 @@ import argparse import sys from readMCD import readMCD -################################################################################ -if __name__ == "__main__" : - arrowConf = "-{.latex[scale=0.2]}, line width=0.70mm, opacity=0.2" - - parser = argparse.ArgumentParser() - parser.add_argument("input", type=str, - help="Input conllu file") - parser.add_argument("id", type=str, - help="sent_id of the target sentence in the conllu file.") - parser.add_argument("--tapes", default="ID,FORM,UPOS,FEATS,LEMMA,HEAD,DEPREL,EOS", - help="Comma separated list of column names that will be the rows of the table. ID should be the first. FORM should be second.") - parser.add_argument("--reduce", "-r", default=False, action="store_true", - help="Only keep values after '=' in cases like a=b.") - parser.add_argument("--incr", default=False, action="store_true", - help="Draw incremental processing paths.") - parser.add_argument("--seq", default=False, action="store_true", - help="Draw sequential processing paths.") - - args = parser.parse_args() - args.paths = args.incr or args.seq - - baseMCD = "ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC" - col2index, index2col = readMCD(baseMCD) - - columns = args.tapes.split(',') - - sentence = [] +################################################################################ +def readInputFile(filename, mcd, tapes, sentId) : text = "" + sentence = [] + col2index, index2col = readMCD(mcd.replace(",", " ")) + columns = tapes.split(',') reading = False - for line in open(args.input, "r") : + for line in open(filename, "r") : line = line.strip() if len(line) == 0 : if reading : @@ -48,7 +26,7 @@ if __name__ == "__main__" : text = line.split('=')[-1].strip() if "# sent_id =" in line : curSent = line.split('=')[-1].strip() - if curSent == args.id : + if curSent == sentId : reading = True if line[0] == '#' : continue @@ -59,6 +37,12 @@ if __name__ == "__main__" : splited = line.split('\t') sentence.append([splited[col2index[col]] for col in columns if col != "EOS"]) + return sentence, text, columns +################################################################################ + + +################################################################################ +def getLayout(sentence, text) : ranges = [[-1,-1] for _ in sentence] curIndex = 0 @@ -106,6 +90,13 @@ if __name__ == "__main__" : else : ranges[i+1][0] = ranges[i][1]+1 + return sentence, ranges +################################################################################ + + +################################################################################ +def produceTabular(sentence, ranges, text, columns, nodes, reduce, breakSize, hsep=True) : + maxNbLetters = 45 parts = [[]] @@ -118,23 +109,11 @@ if __name__ == "__main__" : partSizes = [-ranges[parts[partId][0]][0]+ranges[parts[partId][-1]][1]+1 for partId in range(len(parts))] - if args.paths : - print(r"""\makeatletter -\@ifundefined{tabnode}{% -\newcommand\tabnode[1]{\addtocounter{nodecount}{1} \tikz \node[minimum height=0.5cm] (\arabic{nodecount}) {#1};}% -\newcounter{nodecount}% -}{} -\makeatother -\setcounter{nodecount}{0}""") - print(r"\tikzstyle{every picture}+=[remember picture,baseline]") - print(r"\tikzstyle{every node}+=[inner sep=0pt,anchor=base]") - - print("\\begin{figure}") - print("\\tabcolsep=0.40mm") + colsep = "|" if hsep else "" for partId in range(len(parts)) : if partId != 0 : print("\\vspace{7pt}\n") - print("\\begin{tabular}{|l|%s|}"%("|".join(["c"]*partSizes[partId]))) + print("\\begin{tabular}{|l|%s|}"%(colsep.join(["c"]*partSizes[partId]))) print("\cline{1-%d}\n"%(partSizes[partId]+1)) for i in range(len(columns))[::-1] : print("\\texttt{\\textbf{\\footnotesize{%s}}}"%columns[i].lower(), end=" &\n") @@ -147,55 +126,253 @@ if __name__ == "__main__" : values = value.split('|') for k in range(len(values)) : - values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k].split("=")[-1] if args.reduce else values[k]) + values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k].split("=")[-1] if reduce else values[k]) if columns[i] not in ["FORM","LEMMA"] : values[k] = "\\texttt{%s}"%(values[k].lower()) else : values[k] = "\\texttt{%s}"%(values[k]) cellContent = "\\\\".join(values) - if args.paths : - print("\multicolumn{%d}{c|}{\makecell[cc]{\\tabnode{%s}}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "") + tcolsep = colsep if j != parts[partId][-1] else "|" + if nodes : + print("\multicolumn{%d}{c%s}{\makecell[cc]{\\tabnode{%s}}}"%(ranges[j][1]-ranges[j][0]+1, tcolsep, cellContent), end=" &\n" if j != parts[partId][-1] else "") else : - print("\multicolumn{%d}{c|}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "") - if args.paths and i != 0 : - print("\\\\%s\n"%("[-0.1cm]" if i == 1 else "[%scm]"%("0.1" if args.seq else "0.30"))) + print("\multicolumn{%d}{c%s}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, tcolsep, cellContent), end=" &\n" if j != parts[partId][-1] else "") + if nodes and i != 0 : + print("\\\\%s\n"%("[-0.1cm]" if i == 1 else "[%scm]"%(breakSize))) else : print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1)) print("\\texttt{\\textbf{\\footnotesize{input}}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{\\footnotesize{%s}}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1)) - print("\end{tabular}") - print("\caption{``%s''}"%text) - print("\label{fig:a}") + print("\end{tabular}", end="") +################################################################################ + + +################################################################################ +def drawArrows(firstIndex, nbLines, nbCols, isSeq) : + arrowConf = "-{.latex[scale=0.2]}, line width=0.70mm, opacity=0.2" + seq = "color=blue" + incr = "color=blue" + print(r"\begin{tikzpicture}[overlay]") + for line in range(nbLines-1) : + for col in range(nbCols) : + curNode = firstIndex-1+line*nbCols+col + firstOfNextLine = firstIndex-1+(line+1)*nbCols + firstOfLine = curNode-col + curOfNextLine = firstOfNextLine+col + bottomNode = nbCols*(nbLines-2) + col+1 + if isSeq : + if col in range(nbCols-1) : + print("\draw [%s, %s] (%d) -- (%d);"%(seq, arrowConf, curNode+1, curNode+2)) + elif curNode+2-firstIndex in range(nbLines*(nbCols-1)) and line in range(nbLines-2) : + print("\draw[%s, %s] (%d) -- (%d.south) -- (%d.south);"%(seq, arrowConf, curOfNextLine+1, curNode+1, firstOfLine+1)) + else : + if line in range(nbLines-2) : + print("\draw [%s, %s] (%d) -- (%d);"%(incr, arrowConf, curOfNextLine+1, curNode+1)) + if line == 0 and col != nbCols-1 : + print("\draw[%s, %s] (%d) -- ($(%d.east)!0.5!(%d.west)$) -- ($(%d.east)!0.5!(%d.west)-(%d)+(%d)+(0,0.5)$) -- (%d.west);"%(seq, arrowConf, curNode+1, curNode+1, curNode+2, curNode+1, curNode+2, curNode+1, bottomNode, bottomNode+1)) + print(r"\end{tikzpicture}") +################################################################################ + + +################################################################################ +def getNodes(firstNodeIndex, nbLines, nbCols) : + centerNode = firstNodeIndex + (nbLines//2) * nbCols + nbCols//2 + centerLine = centerNode - nbCols//2 + centerLineEnd = centerLine + nbCols-1 + bottomLine = firstNodeIndex + (nbLines-1)*nbCols + bottomLineEnd = firstNodeIndex + nbLines*nbCols - 1 + topCenter = firstNodeIndex + nbCols//2 + + return centerNode, centerLine, centerLineEnd, bottomLine, bottomLineEnd, topCenter +################################################################################ + + +################################################################################ +def drawCenterRect(centerNode) : + lineConf = "color=blue, dashed, opacity=0.4, line width=0.8mm" + + center1 = "($(%d.north east)!0.5!(%d.north west)$)"%(centerNode-1, centerNode) + center2 = "($(%d.north east)!0.5!(%d.north west)$)"%(centerNode, centerNode+1) + center3 = "($%s-(%d.north)+(%d.south)$)"%(center2, centerNode, centerNode) + center4 = "($%s-(%d.north)+(%d.south)$)"%(center1, centerNode, centerNode) + + print("\draw[%s] %s -- %s -- %s -- %s -- cycle;"%(lineConf, center1, center2, center3, center4)) +################################################################################ + + +################################################################################ +def getRectConf() : + return "color=blue, fill, opacity=0.2" +################################################################################ + + +################################################################################ +def drawTextAbove(node, txt) : + print(r"\node[align=center] at ($(%d.north)+(0,0.5)$) {\Large{%s}};"%(node,txt)) +################################################################################ + + +################################################################################ +def drawRectanglePalo(firstNodeIndex, nbLines, nbCols) : + centerNode, centerLine, centerLineEnd, bottomLine, bottomLineEnd, _ = getNodes(firstNodeIndex, nbLines, nbCols) + pt1 = "(%d.north west)"%centerLine + pt2 = "($(%d.north east)!0.5!(%d.north west)$)"%(centerNode-1, centerNode) + pt3 = "($%s-(%d.north)+(%d.south)$)"%(pt2, centerNode, centerNode) + pt4 = "($%s-(%d.north)+(%d.south)$)"%("($(%d.north east)!0.5!(%d.north west)$)"%(centerNode, centerNode+1), centerNode, centerNode) + pt5 = "($%s-(%d)+(%d)$)"%(pt4, centerLineEnd, bottomLineEnd) + pt6 = "($%s-(%d)+(%d)-(%d.north)+(%d.south)$)"%(pt1, centerLine, bottomLine, centerLine, centerLine) + + print(r"\begin{tikzpicture}[overlay]") + print("\draw[%s] %s -- %s -- %s -- %s -- %s -- %s -- cycle;"%(getRectConf(), pt1, pt2, pt3, pt4, pt5, pt6)) + drawCenterRect(centerNode) + drawTextAbove(firstNodeIndex-1+nbCols//2, r"Passé-Bas (\palo)") + print(r"\end{tikzpicture}") +################################################################################ + + +################################################################################ +def drawRectanglePahi(firstNodeIndex, nbLines, nbCols) : + centerNode, centerLine, centerLineEnd, bottomLine, bottomLineEnd, topCenter = getNodes(firstNodeIndex, nbLines, nbCols) + pt1 = "($(%d.north west)-(%d.north)+(%d.north)$)"%(centerLine, centerLine, firstNodeIndex) + pt2 = "($%s-(%d.west)+($(%d.east)!0.5!(%d.west)$)$)"%(pt1, centerLine, centerNode-1, centerNode) + pt3 = "($%s-(%d.north)+(%d.south)$)"%(pt2, firstNodeIndex, centerLine) + pt4 = "($%s-(%d.north)+(%d.south)$)"%("($(%d.north east)!0.5!(%d.north west)$)"%(centerNode, centerNode+1), centerNode, centerNode) + pt5 = "($%s-(%d)+(%d)$)"%(pt4, centerLineEnd, bottomLineEnd) + pt6 = "($(%d.north west)-(%d)+(%d)-(%d.north)+(%d.south)$)"%(centerLine, centerLine, bottomLine, centerLine, centerLine) + + print(r"\begin{tikzpicture}[overlay]") + print("\draw[%s] %s -- %s -- %s -- %s -- %s -- %s -- cycle;"%(getRectConf(), pt1, pt2, pt3, pt4, pt5, pt6)) + drawCenterRect(centerNode) + drawTextAbove(firstNodeIndex-1+nbCols//2, r"Passé-Haut (\pahi)") + print(r"\end{tikzpicture}") +################################################################################ + + +################################################################################ +def drawRectangleFulo(firstNodeIndex, nbLines, nbCols) : + centerNode, centerLine, centerLineEnd, bottomLine, bottomLineEnd, _ = getNodes(firstNodeIndex, nbLines, nbCols) + pt1 = "(%d.north west)"%centerLine + pt2 = "($(%d.north east)!0.5!(%d.north west)$)"%(centerNode-1, centerNode) + pt3 = "($%s-(%d.north)+(%d.south)$)"%(pt2, centerNode, centerNode) + pt4 = "(%d.south east)"%(centerLineEnd) + pt5 = "($%s-(%d)+(%d)$)"%(pt4, centerLineEnd, bottomLineEnd) + pt6 = "($%s-(%d)+(%d)-(%d.north)+(%d.south)$)"%(pt1, centerLine, bottomLine, centerLine, centerLine) + + print(r"\begin{tikzpicture}[overlay]") + print("\draw[%s] %s -- %s -- %s -- %s -- %s -- %s -- cycle;"%(getRectConf(), pt1, pt2, pt3, pt4, pt5, pt6)) + drawCenterRect(centerNode) + drawTextAbove(firstNodeIndex-1+nbCols//2, r"Futur-Bas (\fulo)") + print(r"\end{tikzpicture}") +################################################################################ + + +################################################################################ +def drawSimpleTapes(sentence, ranges, text, columns, hsep) : + print(r"\begin{figure}") + print("\\tabcolsep=0.40mm") + produceTabular(sentence, ranges, text, columns, False, False, "0.1cm", hsep=hsep) + print("") + + print(r"\caption{Caption.}") + print(r"\label{fig:a}") + print(r"\end{figure}") +################################################################################ + + +################################################################################ +def drawPaths(sentence, ranges, text, columns, hsep, isSeq) : + print(r"""\makeatletter +\@ifundefined{tabnode}{% +\newcommand\tabnode[1]{\addtocounter{nodecount}{1} \tikz \node[minimum height=0.5cm] (\arabic{nodecount}) {#1};}% +\newcounter{nodecount}% +}{} +\makeatother +\setcounter{nodecount}{0}""") + print(r"\tikzstyle{every picture}+=[remember picture,baseline]") + print(r"\tikzstyle{every node}+=[inner sep=0pt,anchor=base]") + + print(r"\begin{figure}") + print("\\tabcolsep=0.40mm") + produceTabular(sentence, ranges, text, columns, True, True, "0.1" if isSeq else "0.3", hsep=hsep) + print("") + + drawArrows(1, len(sentence[0]), len(sentence), isSeq) + + print(r"\caption{Caption.}") + print(r"\label{fig:a}") + print(r"\end{figure}") +################################################################################ + + +################################################################################ +def drawFeatures(sentence, ranges, text, columns) : + print(r"""\makeatletter +\@ifundefined{tabnode}{% +\newcommand\tabnode[1]{\addtocounter{nodecount}{1} \tikz \node[minimum height=0.5cm] (\arabic{nodecount}) {#1};}% +\newcounter{nodecount}% +}{} +\makeatother +\setcounter{nodecount}{0}""") + print(r"\tikzstyle{every picture}+=[remember picture,baseline]") + print(r"\tikzstyle{every node}+=[inner sep=0pt,anchor=base]") + + print(r"\begin{figure}") + print("\\tabcolsep=0.10mm") + print(r"\resizebox{\textwidth}{!}{") + produceTabular(sentence, ranges, text, columns, True, True, "0.1", hsep="") + print(r"\quad", end="") + produceTabular(sentence, ranges, text, columns, True, True, "0.1", hsep="") + print(r"\quad", end="") + produceTabular(sentence, ranges, text, columns, True, True, "0.1", hsep="") + print("") + nbLines = len(sentence[0]) + nbCols = len(sentence) + drawRectanglePalo(1, nbLines, nbCols) + drawRectangleFulo(nbLines*nbCols+1, nbLines, nbCols) + drawRectanglePahi(2*nbLines*nbCols+1, nbLines, nbCols) + print("}") + + print(r"\caption{Caption.}") + print(r"\label{fig:a}") + print(r"\end{figure}") +################################################################################ + +################################################################################ +if __name__ == "__main__" : + parser = argparse.ArgumentParser() + parser.add_argument("input", type=str, + help="Input conllu file") + parser.add_argument("id", type=str, + help="sent_id of the target sentence in the conllu file.") + parser.add_argument("--tapes", default="ID,FORM,UPOS,FEATS,LEMMA,HEAD,DEPREL,EOS", + help="Comma separated list of column names that will be the rows of the table. ID should be the first. FORM should be second.") + parser.add_argument("--mcd", default="ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL,DEPS,MISC", + help="Comma separated list of column names of the input file.") + parser.add_argument("--incr", default=False, action="store_true", + help="Draw incremental processing paths.") + parser.add_argument("--seq", default=False, action="store_true", + help="Draw sequential processing paths.") + parser.add_argument("--nohsep", default=False, action="store_true", + help="Don't draw horizontal separators for columns.") + parser.add_argument("--features", default=False, action="store_true", + help="Compare 3 features modes.") + + args = parser.parse_args() + + args.paths = args.incr or args.seq + + if args.incr + args.seq + args.features > 1 : + print("--incr --seq and --features are mutually exclusives", file=sys.stderr) + exit(1) + + sentence, text, columns = readInputFile(args.input, args.mcd, args.tapes, args.id) + sentence, ranges = getLayout(sentence, text) if args.paths : - seq = "color=blue" - incr = "color=blue" - print(r"\begin{tikzpicture}[overlay]") - if args.seq : - for line in range(len(sentence[0])-1) : - for col in range(len(sentence)) : - curNode = line*len(sentence)+col - firstOfNextLine = (line+1)*len(sentence) - firstOfLine = (line)*len(sentence) - curOfNextLine = firstOfNextLine+col - if col in range(len(sentence)-1) : - print("\draw [%s, %s] (%d) -- (%d);"%(seq, arrowConf, curNode+1, curNode+2)) - elif curNode+2 in range(len(sentence[0]*(len(sentence)-1))) and line in range(len(sentence[0])-2) : - print("\draw[%s, %s] (%d) -- (%d.south) -- (%d.south);"%(seq, arrowConf, curOfNextLine+1, curNode+1, firstOfLine+1)) - elif args.incr : - for line in range(len(sentence[0])-1) : - for col in range(len(sentence)) : - curNode = line*len(sentence)+col - firstOfNextLine = (line+1)*len(sentence) - firstOfLine = (line)*len(sentence) - curOfNextLine = firstOfNextLine+col - bottomNode = (len(sentence[0])-2)*len(sentence) + col+1 - if line in range(len(sentence[0])-2) : - print("\draw [%s, %s] (%d) -- (%d);"%(incr, arrowConf, curOfNextLine+1, curNode+1)) - if line == 0 and col != len(sentence)-1 : - print("\draw[%s, %s] (%d) -- ($(%d.east)!0.5!(%d.west)$) -- ($(%d.east)!0.5!(%d.west)-(%d)+(%d)+(0,0.5)$) -- (%d.west);"%(seq, arrowConf, curNode+1, curNode+1, curNode+2, curNode+1, curNode+2, curNode+1, bottomNode, bottomNode+1)) - print(r"\end{tikzpicture}") - - print("\end{figure}") + drawPaths(sentence, ranges, text, columns, not args.nohsep, args.seq) + elif args.features : + drawFeatures(sentence, ranges, text, columns) + else : + drawSimpleTapes(sentence, ranges, text, columns, not args.nohsep) ################################################################################