Skip to content
Snippets Groups Projects
Commit 5867c799 authored by Carlos Ramisch's avatar Carlos Ramisch
Browse files

Minimal update un Sequoia simplification script

parent c35044ad
No related branches found
No related tags found
No related merge requests found
......@@ -6,8 +6,8 @@ from transformers import AutoModel, AutoTokenizer
name = 'almanach/camembert-base'
#sent = "Des poids lourds et engins en feu \
# dans une entreprise en Vendée ."
#sent = "La gare routière attend toujours ses illuminations ."
sent = "Quelle surprise ! Arturo a la covid"
sent = "La gare routière attend toujours ses illuminations ."
#sent = "Quelle surprise ! Arturo a la covid"
tok = AutoTokenizer.from_pretrained(name)
model = AutoModel.from_pretrained(name)
......
# global.columns = ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC
# text = Les petits ruisseaux font les grandes rivières.
1 Les le DET _ Definite=Def|Number=Plur|PronType=Art 3 det _ _
2 petits petit ADJ _ Gender=Masc|Number=Plur 3 amod _ _
......
......@@ -538,8 +538,8 @@ class TransBasedConfig(object):
`next_act` is a string among "SHIFT", "RIGHT-ARC-X" or "LEFT-ARC-X" where
"X" is the name of any valid syntactic relation label (deprel).
Returns a new syntactic relation added by the action, or None for "SHIFT"
Returned relation is a triple (mod, head, deprel) with modifier, head, and
deprel label if `add_deprel=True` (default), or a pair (mod, head) if
Returned relation is a triple (dep, head, deprel) with dependent, head, and
deprel label if `add_deprel=True` (default), or a pair (dep, head) if
`add_deprel=False`.
"""
if next_act == "SHIFT":
......
......@@ -171,7 +171,7 @@ with open(sys.argv[1], "r", encoding="UTF=8") as f:
print(sent.serialize(), end="")
else:
np_counter += 1
np_ids.append(sent.metadata["sent_id"])
np_ids.append((sent.metadata["sent_id"],len(sent)))
print( "{} range tokens removed.\n".format(range_counter), file=sys.stderr)
......@@ -182,4 +182,4 @@ print( "{} supersense tags modified (complex operators).\n".format(mod_ssense_co
#print( "{} subrelations removed from deprel.".format(subrel_counter), file=sys.stderr)
print( "{} non-projective sentences removed:".format(np_counter), file=sys.stderr)
print(", ".join(np_ids), file=sys.stderr)
print("\n".join([f"{np_id} -> {lgth}" for (np_id, lgth) in np_ids]), file=sys.stderr)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment