diff --git a/scripts/conllu2latex.py b/scripts/conllu2latex.py index dcd3f0ec6379d9c3a38d7f7449ba1ae9ec3fcc72..d0b60bc6a5868ecbdaea2a71f92b78d73ec55fc4 100755 --- a/scripts/conllu2latex.py +++ b/scripts/conllu2latex.py @@ -6,6 +6,8 @@ from readMCD import readMCD ################################################################################ if __name__ == "__main__" : + arrowConf = "-{.latex[scale=0.2]}, line width=0.70mm, opacity=0.2" + parser = argparse.ArgumentParser() parser.add_argument("input", type=str, help="Input conllu file") @@ -13,9 +15,17 @@ if __name__ == "__main__" : help="sent_id of the target sentence in the conllu file.") parser.add_argument("--tapes", default="ID,FORM,UPOS,FEATS,LEMMA,HEAD,DEPREL,EOS", help="Comma separated list of column names that will be the rows of the table. ID should be the first. FORM should be second.") + parser.add_argument("--reduce", "-r", default=False, action="store_true", + help="Only keep values after '=' in cases like a=b.") + parser.add_argument("--incr", default=False, action="store_true", + help="Draw incremental processing paths.") + parser.add_argument("--seq", default=False, action="store_true", + help="Draw sequential processing paths.") args = parser.parse_args() + args.paths = args.incr or args.seq + baseMCD = "ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC" col2index, index2col = readMCD(baseMCD) @@ -108,6 +118,17 @@ if __name__ == "__main__" : partSizes = [-ranges[parts[partId][0]][0]+ranges[parts[partId][-1]][1]+1 for partId in range(len(parts))] + if args.paths : + print(r"""\makeatletter +\@ifundefined{tabnode}{% +\newcommand\tabnode[1]{\addtocounter{nodecount}{1} \tikz \node[minimum height=0.5cm] (\arabic{nodecount}) {#1};}% +\newcounter{nodecount}% +}{} +\makeatother +\setcounter{nodecount}{0}""") + print(r"\tikzstyle{every picture}+=[remember picture,baseline]") + print(r"\tikzstyle{every node}+=[inner sep=0pt,anchor=base]") + print("\\begin{figure}") print("\\tabcolsep=0.40mm") for partId in range(len(parts)) : @@ -126,19 +147,55 @@ if __name__ == "__main__" : values = value.split('|') for k in range(len(values)) : - values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k]) + values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k].split("=")[-1] if args.reduce else values[k]) if columns[i] not in ["FORM","LEMMA"] : values[k] = "\\texttt{%s}"%(values[k].lower()) else : values[k] = "\\texttt{%s}"%(values[k]) cellContent = "\\\\".join(values) - print("\multicolumn{%d}{c|}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "") - print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1)) + if args.paths : + print("\multicolumn{%d}{c|}{\makecell[cc]{\\tabnode{%s}}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "") + else : + print("\multicolumn{%d}{c|}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "") + if args.paths and i != 0 : + print("\\\\%s\n"%("[-0.1cm]" if i == 1 else "[%scm]"%("0.1" if args.seq else "0.30"))) + else : + print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1)) print("\\texttt{\\textbf{\\footnotesize{input}}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{\\footnotesize{%s}}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1)) print("\end{tabular}") print("\caption{``%s''}"%text) print("\label{fig:a}") + + if args.paths : + seq = "color=blue" + incr = "color=blue" + print(r"\begin{tikzpicture}[overlay]") + if args.seq : + for line in range(len(sentence[0])-1) : + for col in range(len(sentence)) : + curNode = line*len(sentence)+col + firstOfNextLine = (line+1)*len(sentence) + firstOfLine = (line)*len(sentence) + curOfNextLine = firstOfNextLine+col + if col in range(len(sentence)-1) : + print("\draw [%s, %s] (%d) -- (%d);"%(seq, arrowConf, curNode+1, curNode+2)) + elif curNode+2 in range(len(sentence[0]*(len(sentence)-1))) and line in range(len(sentence[0])-2) : + print("\draw[%s, %s] (%d) -- (%d.south) -- (%d.south);"%(seq, arrowConf, curOfNextLine+1, curNode+1, firstOfLine+1)) + elif args.incr : + for line in range(len(sentence[0])-1) : + for col in range(len(sentence)) : + curNode = line*len(sentence)+col + firstOfNextLine = (line+1)*len(sentence) + firstOfLine = (line)*len(sentence) + curOfNextLine = firstOfNextLine+col + bottomNode = (len(sentence[0])-2)*len(sentence) + col+1 + if line in range(len(sentence[0])-2) : + print("\draw [%s, %s] (%d) -- (%d);"%(incr, arrowConf, curOfNextLine+1, curNode+1)) + if line == 0 and col != len(sentence)-1 : + print("\draw[%s, %s] (%d) -- ($(%d.east)!0.5!(%d.west)$) -- ($(%d.east)!0.5!(%d.west)-(%d)+(%d)+(0,0.5)$) -- (%d.west);"%(seq, arrowConf, curNode+1, curNode+1, curNode+2, curNode+1, curNode+2, curNode+1, bottomNode, bottomNode+1)) + print(r"\end{tikzpicture}") + print("\end{figure}") ################################################################################