Skip to content
Snippets Groups Projects
Commit f716d0df authored by Franck Dary's avatar Franck Dary
Browse files

In conllu2latex.py : added eos, feats

parent 55dcb353
No related branches found
No related tags found
No related merge requests found
......@@ -11,7 +11,7 @@ if __name__ == "__main__" :
help="Input conllu file")
parser.add_argument("id", type=str,
help="sent_id of the target sentence in the conllu file.")
parser.add_argument("--tapes", default="ID,FORM,UPOS,LEMMA,HEAD,DEPREL",
parser.add_argument("--tapes", default="ID,FORM,UPOS,FEATS,LEMMA,HEAD,DEPREL,EOS",
help="Comma separated list of column names that will be the rows of the table. ID should be the first. FORM should be second.")
args = parser.parse_args()
......@@ -47,7 +47,7 @@ if __name__ == "__main__" :
continue
splited = line.split('\t')
sentence.append([splited[col2index[col]] for col in columns])
sentence.append([splited[col2index[col]] for col in columns if col != "EOS"])
ranges = [[-1,-1] for _ in sentence]
......@@ -108,30 +108,37 @@ if __name__ == "__main__" :
partSizes = [-ranges[parts[partId][0]][0]+ranges[parts[partId][-1]][1]+1 for partId in range(len(parts))]
print("\\begin{figure}[t]")
print("\centering")
print("\\footnotesize")
print("\\begin{figure}")
print("\\tabcolsep=0.40mm")
print("\\begin{tabular}{|l|%s|}"%("|".join(["c"]*max(partSizes))))
for partId in range(len(parts)) :
if partId != 0 :
print("\multicolumn{0}{c}{}\\\\")
print("\\vspace{7pt}\n")
print("\\begin{tabular}{|l|%s|}"%("|".join(["c"]*partSizes[partId])))
print("\cline{1-%d}\n"%(partSizes[partId]+1))
for i in range(len(columns))[::-1] :
print("\\texttt{\\textbf{%s}}"%columns[i].lower(), end=" &\n")
print("\\texttt{\\textbf{\\footnotesize{%s}}}"%columns[i].lower(), end=" &\n")
for j in parts[partId] :
value = sentence[j][i]
if columns[i] not in ["FORM","LEMMA"] :
value = "\\texttt{%s}"%(value.lower())
if columns[i] == "EOS" :
value = "yes" if j == parts[partId][-1] and partId == len(parts)-1 else "no"
else :
value = "\\texttt{%s}"%(value)
print("\multicolumn{%d}{c|}{%s}"%(ranges[j][1]-ranges[j][0]+1, value), end=" &\n" if j != parts[partId][-1] else "")
value = sentence[j][i]
value = value.replace('_','\_')
values = value.split('|')
for k in range(len(values)) :
values[k] = "\\%s{%s}"%("scriptsize" if '|' in value else "footnotesize", values[k])
if columns[i] not in ["FORM","LEMMA"] :
values[k] = "\\texttt{%s}"%(values[k].lower())
else :
values[k] = "\\texttt{%s}"%(values[k])
cellContent = "\\\\".join(values)
print("\multicolumn{%d}{c|}{\makecell[cc]{%s}}"%(ranges[j][1]-ranges[j][0]+1, cellContent), end=" &\n" if j != parts[partId][-1] else "")
print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1))
print("\\texttt{\\textbf{input}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{%s}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1))
print("\end{tabular}")
print("\label{fig:a}")
print("\\texttt{\\textbf{\\footnotesize{input}}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{\\footnotesize{%s}}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1))
print("\end{tabular}")
print("\caption{``%s''}"%text)
print("\label{fig:a}")
print("\end{figure}")
################################################################################
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment