From 4e066375e6177925aa40c10a1722a0d7f6978780 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Fri, 25 Feb 2022 15:49:51 +0100
Subject: [PATCH] Added incr and seq paths to conllu2latex

---
 scripts/conllu2latex.py | 63 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 3 deletions(-)

diff --git a/scripts/conllu2latex.py b/scripts/conllu2latex.py
index dcd3f0e..d0b60bc 100755
--- a/scripts/conllu2latex.py
+++ b/scripts/conllu2latex.py
@@ -6,6 +6,8 @@ from readMCD import readMCD
 
 ################################################################################
 if __name__ == "__main__" :
+  arrowConf = "-{.latex[scale=0.2]}, line width=0.70mm, opacity=0.2"
+
   parser = argparse.ArgumentParser()
   parser.add_argument("input", type=str,
     help="Input conllu file")
@@ -13,9 +15,17 @@ if __name__ == "__main__" :
     help="sent_id of the target sentence in the conllu file.")
   parser.add_argument("--tapes", default="ID,FORM,UPOS,FEATS,LEMMA,HEAD,DEPREL,EOS",
     help="Comma separated list of column names that will be the rows of the table. ID should be the first. FORM should be second.")
+  parser.add_argument("--reduce", "-r", default=False, action="store_true",
+    help="Only keep values after '=' in cases like a=b.")
+  parser.add_argument("--incr", default=False, action="store_true",
+    help="Draw incremental processing paths.")
+  parser.add_argument("--seq", default=False, action="store_true",
+    help="Draw sequential processing paths.")
 
   args = parser.parse_args()
 
+  args.paths = args.incr or args.seq
+
   baseMCD = "ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC"
   col2index, index2col = readMCD(baseMCD)
 
@@ -108,6 +118,17 @@ if __name__ == "__main__" :
 
   partSizes = [-ranges[parts[partId][0]][0]+ranges[parts[partId][-1]][1]+1 for partId in range(len(parts))]
 
+  if args.paths :
+    print(r"""\makeatletter
+\@ifundefined{tabnode}{%
+\newcommand\tabnode[1]{\addtocounter{nodecount}{1} \tikz \node[minimum height=0.5cm] (\arabic{nodecount}) {#1};}%
+\newcounter{nodecount}%
+}{}
+\makeatother
+\setcounter{nodecount}{0}""")
+    print(r"\tikzstyle{every picture}+=[remember picture,baseline]")
+    print(r"\tikzstyle{every node}+=[inner sep=0pt,anchor=base]")
+
   print("\\begin{figure}")
   print("\\tabcolsep=0.40mm")
   for partId in range(len(parts)) :
@@ -126,19 +147,55 @@ if __name__ == "__main__" :
         values = value.split('|')
 
         for k in range(len(values)) :
-          values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k])
+          values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k].split("=")[-1] if args.reduce else values[k])
           if columns[i] not in ["FORM","LEMMA"] :
             values[k] = "\\texttt{%s}"%(values[k].lower())
           else :
             values[k] = "\\texttt{%s}"%(values[k])
         cellContent = "\\\\".join(values)
-        print("\multicolumn{%d}{c|}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "")
-      print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1))
+        if args.paths :
+          print("\multicolumn{%d}{c|}{\makecell[cc]{\\tabnode{%s}}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "")
+        else :
+          print("\multicolumn{%d}{c|}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "")
+      if args.paths and i != 0 :
+        print("\\\\%s\n"%("[-0.1cm]" if i == 1 else "[%scm]"%("0.1" if args.seq else "0.30")))
+      else :
+        print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1))
   
     print("\\texttt{\\textbf{\\footnotesize{input}}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{\\footnotesize{%s}}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1))
     print("\end{tabular}")
   print("\caption{``%s''}"%text)
   print("\label{fig:a}")
+
+  if args.paths :
+    seq = "color=blue"
+    incr = "color=blue"
+    print(r"\begin{tikzpicture}[overlay]")
+    if args.seq :
+      for line in range(len(sentence[0])-1) :
+        for col in range(len(sentence)) :
+          curNode = line*len(sentence)+col
+          firstOfNextLine = (line+1)*len(sentence)
+          firstOfLine = (line)*len(sentence)
+          curOfNextLine = firstOfNextLine+col
+          if col in range(len(sentence)-1) :
+            print("\draw [%s, %s] (%d) -- (%d);"%(seq, arrowConf, curNode+1, curNode+2))
+          elif curNode+2 in range(len(sentence[0]*(len(sentence)-1))) and line in range(len(sentence[0])-2) :
+            print("\draw[%s, %s] (%d) -- (%d.south) -- (%d.south);"%(seq, arrowConf, curOfNextLine+1, curNode+1, firstOfLine+1))
+    elif args.incr :
+      for line in range(len(sentence[0])-1) :
+        for col in range(len(sentence)) :
+          curNode = line*len(sentence)+col
+          firstOfNextLine = (line+1)*len(sentence)
+          firstOfLine = (line)*len(sentence)
+          curOfNextLine = firstOfNextLine+col
+          bottomNode = (len(sentence[0])-2)*len(sentence) + col+1
+          if line in range(len(sentence[0])-2) :
+            print("\draw [%s, %s] (%d) -- (%d);"%(incr, arrowConf, curOfNextLine+1, curNode+1))
+          if line == 0 and col != len(sentence)-1 :
+            print("\draw[%s, %s] (%d) -- ($(%d.east)!0.5!(%d.west)$) -- ($(%d.east)!0.5!(%d.west)-(%d)+(%d)+(0,0.5)$) -- (%d.west);"%(seq, arrowConf, curNode+1, curNode+1, curNode+2, curNode+1, curNode+2, curNode+1, bottomNode, bottomNode+1))
+    print(r"\end{tikzpicture}")
+
   print("\end{figure}")
 ################################################################################
 
-- 
GitLab