Skip to content
Snippets Groups Projects
Commit 5867c799 authored by Carlos Ramisch's avatar Carlos Ramisch
Browse files

Minimal update un Sequoia simplification script

parent c35044ad
No related branches found
No related tags found
No related merge requests found
...@@ -6,8 +6,8 @@ from transformers import AutoModel, AutoTokenizer ...@@ -6,8 +6,8 @@ from transformers import AutoModel, AutoTokenizer
name = 'almanach/camembert-base' name = 'almanach/camembert-base'
#sent = "Des poids lourds et engins en feu \ #sent = "Des poids lourds et engins en feu \
# dans une entreprise en Vendée ." # dans une entreprise en Vendée ."
#sent = "La gare routière attend toujours ses illuminations ." sent = "La gare routière attend toujours ses illuminations ."
sent = "Quelle surprise ! Arturo a la covid" #sent = "Quelle surprise ! Arturo a la covid"
tok = AutoTokenizer.from_pretrained(name) tok = AutoTokenizer.from_pretrained(name)
model = AutoModel.from_pretrained(name) model = AutoModel.from_pretrained(name)
......
# global.columns = ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC
# text = Les petits ruisseaux font les grandes rivières. # text = Les petits ruisseaux font les grandes rivières.
1 Les le DET _ Definite=Def|Number=Plur|PronType=Art 3 det _ _ 1 Les le DET _ Definite=Def|Number=Plur|PronType=Art 3 det _ _
2 petits petit ADJ _ Gender=Masc|Number=Plur 3 amod _ _ 2 petits petit ADJ _ Gender=Masc|Number=Plur 3 amod _ _
......
...@@ -538,8 +538,8 @@ class TransBasedConfig(object): ...@@ -538,8 +538,8 @@ class TransBasedConfig(object):
`next_act` is a string among "SHIFT", "RIGHT-ARC-X" or "LEFT-ARC-X" where `next_act` is a string among "SHIFT", "RIGHT-ARC-X" or "LEFT-ARC-X" where
"X" is the name of any valid syntactic relation label (deprel). "X" is the name of any valid syntactic relation label (deprel).
Returns a new syntactic relation added by the action, or None for "SHIFT" Returns a new syntactic relation added by the action, or None for "SHIFT"
Returned relation is a triple (mod, head, deprel) with modifier, head, and Returned relation is a triple (dep, head, deprel) with dependent, head, and
deprel label if `add_deprel=True` (default), or a pair (mod, head) if deprel label if `add_deprel=True` (default), or a pair (dep, head) if
`add_deprel=False`. `add_deprel=False`.
""" """
if next_act == "SHIFT": if next_act == "SHIFT":
......
...@@ -171,7 +171,7 @@ with open(sys.argv[1], "r", encoding="UTF=8") as f: ...@@ -171,7 +171,7 @@ with open(sys.argv[1], "r", encoding="UTF=8") as f:
print(sent.serialize(), end="") print(sent.serialize(), end="")
else: else:
np_counter += 1 np_counter += 1
np_ids.append(sent.metadata["sent_id"]) np_ids.append((sent.metadata["sent_id"],len(sent)))
print( "{} range tokens removed.\n".format(range_counter), file=sys.stderr) print( "{} range tokens removed.\n".format(range_counter), file=sys.stderr)
...@@ -182,4 +182,4 @@ print( "{} supersense tags modified (complex operators).\n".format(mod_ssense_co ...@@ -182,4 +182,4 @@ print( "{} supersense tags modified (complex operators).\n".format(mod_ssense_co
#print( "{} subrelations removed from deprel.".format(subrel_counter), file=sys.stderr) #print( "{} subrelations removed from deprel.".format(subrel_counter), file=sys.stderr)
print( "{} non-projective sentences removed:".format(np_counter), file=sys.stderr) print( "{} non-projective sentences removed:".format(np_counter), file=sys.stderr)
print(", ".join(np_ids), file=sys.stderr) print("\n".join([f"{np_id} -> {lgth}" for (np_id, lgth) in np_ids]), file=sys.stderr)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment