From f716d0dfed17c9a734dfdba5b6a6c6661c7ab283 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Fri, 2 Jul 2021 16:15:37 +0200
Subject: [PATCH] In conllu2latex.py : added eos, feats

---
 scripts/conllu2latex.py | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/scripts/conllu2latex.py b/scripts/conllu2latex.py
index 2967aeb..dcd3f0e 100755
--- a/scripts/conllu2latex.py
+++ b/scripts/conllu2latex.py
@@ -11,7 +11,7 @@ if __name__ == "__main__" :
     help="Input conllu file")
   parser.add_argument("id", type=str,
     help="sent_id of the target sentence in the conllu file.")
-  parser.add_argument("--tapes", default="ID,FORM,UPOS,LEMMA,HEAD,DEPREL",
+  parser.add_argument("--tapes", default="ID,FORM,UPOS,FEATS,LEMMA,HEAD,DEPREL,EOS",
     help="Comma separated list of column names that will be the rows of the table. ID should be the first. FORM should be second.")
 
   args = parser.parse_args()
@@ -47,7 +47,7 @@ if __name__ == "__main__" :
       continue
 
     splited = line.split('\t')
-    sentence.append([splited[col2index[col]] for col in columns])
+    sentence.append([splited[col2index[col]] for col in columns if col != "EOS"])
 
   ranges = [[-1,-1] for _ in sentence]
 
@@ -108,30 +108,37 @@ if __name__ == "__main__" :
 
   partSizes = [-ranges[parts[partId][0]][0]+ranges[parts[partId][-1]][1]+1 for partId in range(len(parts))]
 
-  print("\\begin{figure}[t]")
-  print("\centering")
-  print("\\footnotesize")
+  print("\\begin{figure}")
   print("\\tabcolsep=0.40mm")
-  print("\\begin{tabular}{|l|%s|}"%("|".join(["c"]*max(partSizes))))
   for partId in range(len(parts)) :
     if partId != 0 :
-      print("\multicolumn{0}{c}{}\\\\")
+      print("\\vspace{7pt}\n")
+    print("\\begin{tabular}{|l|%s|}"%("|".join(["c"]*partSizes[partId])))
     print("\cline{1-%d}\n"%(partSizes[partId]+1))
     for i in range(len(columns))[::-1] :
-      print("\\texttt{\\textbf{%s}}"%columns[i].lower(), end=" &\n")
+      print("\\texttt{\\textbf{\\footnotesize{%s}}}"%columns[i].lower(), end=" &\n")
       for j in parts[partId] :
-        value = sentence[j][i]
-        if columns[i] not in ["FORM","LEMMA"] :
-          value = "\\texttt{%s}"%(value.lower())
+        if columns[i] == "EOS" :
+          value = "yes" if j == parts[partId][-1] and partId == len(parts)-1 else "no"
         else :
-          value = "\\texttt{%s}"%(value)
-        print("\multicolumn{%d}{c|}{%s}"%(ranges[j][1]-ranges[j][0]+1, value), end=" &\n" if j != parts[partId][-1] else "")
+          value = sentence[j][i]
+        value = value.replace('_','\_')
+        values = value.split('|')
+
+        for k in range(len(values)) :
+          values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k])
+          if columns[i] not in ["FORM","LEMMA"] :
+            values[k] = "\\texttt{%s}"%(values[k].lower())
+          else :
+            values[k] = "\\texttt{%s}"%(values[k])
+        cellContent = "\\\\".join(values)
+        print("\multicolumn{%d}{c|}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "")
       print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1))
   
-    print("\\texttt{\\textbf{input}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{%s}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1))
-  print("\end{tabular}")
-  print("\label{fig:a}")
+    print("\\texttt{\\textbf{\\footnotesize{input}}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{\\footnotesize{%s}}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1))
+    print("\end{tabular}")
   print("\caption{``%s''}"%text)
+  print("\label{fig:a}")
   print("\end{figure}")
 ################################################################################
 
-- 
GitLab