Skip to content
Snippets Groups Projects
Commit 4c08f1d3 authored by Carlos Ramisch's avatar Carlos Ramisch
Browse files

Add tiny example .conllu file and add functions to print oracle in TransBasedSent

parent 7257120d
No related branches found
No related tags found
No related merge requests found
......@@ -332,18 +332,40 @@ class TransBasedSent(object):
"""
###############################
def __init__(self, sent):
def __init__(self, sent, actions_only=False):
"""
`sent`: A `TokenList` as retrieved by the `conllu` library or `readConllu()`
`actions_only`: affects the way the __str__ function prints this object
"""
self.sent = sent
self.actions_only = actions_only
###############################
def __str__(self):
"""
Sequence of configs and arc-hybrid actions corresponding to the sentence.
If `self.actions_only=True` prints only sequence of actions
"""
result = []
for config, action in self.get_configs_oracle():
if not self.actions_only :
result.append("{} -> {}".format(str(config), action))
else :
result.append(action)
if not self.actions_only :
result.append("{} -> {}".format(str(config), action))
return "\n".join(result)
else :
return " ".join(result)
###############################
def get_configs_oracle(self):
"""
Generator of oracle arc-hybrid configurations based on gold parsing tree.
Yields triples (stack, buffer, action) where action is a string among:
Yields pairs (`TransBasedConfig`, action) where action is a string among:
- "SHIFT" -> pop buffer into stack
- "LEFT-ARC-X" -> relation "X" from buffer head to stack head, pop stack
- "RIGHT-ARC-X" -> relation "X" from stack head to stack second, pop stack
......@@ -402,6 +424,15 @@ class TransBasedConfig(object):
###############################
def __str__(self):
"""
Generate a string with explicit buffer and stack words.
"""
return "{}, {}".format([self.sent[i - 1]['form'] for i in self.stack],
[self.sent[i - 1]['form'] for i in self.buff[:-1]] + [0])
###############################
def is_final(self):
"""
Returns True if configuration is final, False else.
......
......@@ -7,6 +7,7 @@ We obtained the file `trunk/sequoia-ud.parseme.frsemcor` from commit number `ea7
The file is the result of the conversion from Sequoia's source as described on the [documentation](https://deep-sequoia.inria.fr/process/)
We keep the original file in `src` folder to make command line completion faster
The file `tiny.conllu` was manually extracted and simplified, it is used in parsing exercises.
### Simplification
......
# global.columns = ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC PARSEME:MWE FRSEMCOR:NOUN PARSEME:NE
# sent_id = annodis.er_00192
# text = La gare routière attend toujours ses illuminations.
1 La le DET _ Definite=Def|Gender=Fem|Number=Sing|PronType=Art 2 det _ _ * * *
2 gare gare NOUN _ Gender=Fem|Number=Sing 4 nsubj _ _ 1:_|MWE|SYNT * *
3 routière routier ADJ _ Gender=Fem|Number=Sing 2 amod _ _ 1 * *
4 attend attendre VERB _ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 0 root _ _ * * *
5 toujours toujours ADV _ _ 4 advmod _ _ * * *
6 ses son DET _ Number=Plur|Poss=Yes 7 det _ _ * * *
7 illuminations illumination NOUN _ Gender=Fem|Number=Plur 4 obj _ _ * Artifact *
8 . . PUNCT _ _ 4 punct _ _ * * *
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment