diff --git a/scripts/conllu2latex.py b/scripts/conllu2latex.py index e93398cd475d9606e3701c25074e5e856354bf0d..89a7ebaeb131cbbb3612612b44c7b68082700820 100755 --- a/scripts/conllu2latex.py +++ b/scripts/conllu2latex.py @@ -90,29 +90,44 @@ if __name__ == "__main__" : i = j for i in range(len(ranges)-1) : - ranges[i][1] = ranges[i+1][0]-1 + if ranges[i][1] != ranges[i+1][0]-1 : + if ranges[i][1]-ranges[i][0] <= ranges[i+1][1]-ranges[i+1][0] : + ranges[i][1] = ranges[i+1][0]-1 + else : + ranges[i+1][0] = ranges[i][1]+1 - print([text[r[0]:r[1]+1] for r in ranges]) + maxNbLetters = 45 - print("\\newcolumntype{x}[0]{>{\centering\\arraybackslash}m{2.2mm}}") + parts = [[]] + first = 0 + for i in range(len(ranges)) : + if ranges[i][1]-first > maxNbLetters : + parts.append([]) + first = ranges[i][0] + parts[-1].append(i) + + partSizes = [-ranges[parts[partId][0]][0]+ranges[parts[partId][-1]][1]+1 for partId in range(len(parts))] + + #print("\\newcolumntype{x}[0]{>{\centering\\arraybackslash}m{2.2mm}}") print("\\begin{table}[t]") print("\centering") print("\\footnotesize") - print("\\tabcolsep=0.45mm") - print("\\begin{tabular}{|l|%s|}"%("|".join('x'*len(text)))) - print("\hline\n") + print("\\tabcolsep=0.40mm") + print("\\begin{tabular}{|l|%s|}"%("|".join(["c"]*max(partSizes)))) + for partId in range(len(parts)) : + print("\cline{1-%d}\n"%(partSizes[partId]+1)) + for i in range(len(columns))[::-1] : + print("\\texttt{\\textbf{%s}}"%columns[i].lower(), end=" &\n") + for j in parts[partId] : + value = sentence[j][i] + if columns[i] not in ["FORM","LEMMA"] : + value = "\\texttt{%s}"%(value.lower()) + else : + value = "\\texttt{%s}"%(value) + print("\multicolumn{%d}{c|}{%s}"%(ranges[j][1]-ranges[j][0]+1, value), end=" &\n" if j != parts[partId][-1] else "") + print("\\\\ \cline{1-%d}\n"%(partSizes[partId]+1)) - for i in range(len(columns))[::-1] : - print("\\texttt{\\textsc{%s}}"%columns[i].lower(), end=" &\n") - for j in range(len(sentence)) : - value = sentence[j][i] - if columns[i] not in ["FORM","LEMMA"] : - value = "\\textsc{%s}"%(value.lower()) - print("\multicolumn{%d}{c|}{%s}"%(ranges[j][1]-ranges[j][0]+1, value), end=" &\n" if j != len(sentence)-1 else "") - print("\\\\ \hline\n") - - print("\\texttt{\\textsc{input}} & %s\\\\ \hline"%" & ".join(text)) - + print("\\texttt{\\textbf{input}} & %s\\\\ \cline{1-%d}"%(" & ".join(["\\texttt{%s}"%c for c in text[ranges[parts[partId][0]][0]:ranges[parts[partId][-1]][1]+1]]), partSizes[partId]+1)) print("\end{tabular}") print("\end{table}") ################################################################################