Commit be7d96ae authored by Alexis Nasr's avatar Alexis Nasr
Browse files

modified sentence segmentation by the parser. The sentence segmentation is now...

modified sentence segmentation by the parser. The sentence segmentation is now provoked by a root action
parent 0705ed04
......@@ -55,6 +55,12 @@ void word_buffer_print_compact(FILE *f, word_buffer *wb);
int word_buffer_read_sentence(word_buffer *bw);
word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct);
int word_buffer_locate_token_with_offset(word_buffer *wb, int offset);
word *word_buffer_get_rightmost_child(word_buffer *wb, word *gov);
word *word_buffer_get_rightmost_descendent(word_buffer *wb, word *root);
/*
int word_buffer_is_empty(word_buffer *wb);
int word_buffer_is_last(word_buffer *wb);
......
......@@ -190,3 +190,4 @@ void word_sprint_col_n(char *s, word *w, int n)
}
s[j] = '\0';
}
......@@ -187,6 +187,37 @@ int word_buffer_locate_token_with_offset(word_buffer *wb, int offset)
}
word *word_buffer_get_rightmost_child(word_buffer *wb, word *gov)
{
word *bm1 = word_buffer_bm1(wb);
int rightmost_child_index = word_get_index(gov);
for(int current_index = word_get_index(gov); current_index <= word_get_index(bm1); current_index++)
if(word_get_gov_index(word_buffer_get_word_n(wb, current_index)) == word_get_index(gov))
rightmost_child_index = current_index;
return word_buffer_get_word_n(wb, rightmost_child_index);
}
word *word_buffer_get_rightmost_descendent(word_buffer *wb, word *root)
{
word *rightmost_descendent = root;
word *rightmost_child = NULL;
int change = 1;
while(change){
change = 0;
rightmost_child = word_buffer_get_rightmost_child(wb, rightmost_descendent);
if(word_get_index(rightmost_child) > word_get_index(rightmost_descendent)){
rightmost_descendent = rightmost_child;
change = 1;
}
}
return rightmost_descendent;
}
/*int word_buffer_end(word_buffer *wb)
{
return (wb->current_index >= wb->nbelem)? 1 : 0;
......
......@@ -92,9 +92,6 @@ void generate_training_file(FILE *output_file, context *ctx)
switch(mvt_type){
case MVT_PARSER_EOS :
movement_parser_eos(c);
sentence_nb++;
if((sentence_nb % 100) == 0)
fprintf(stderr, "\rsentence %d", sentence_nb);
/* if(word_buffer_is_last(ref)) */
break;
case MVT_PARSER_LEFT :
......@@ -108,6 +105,9 @@ void generate_training_file(FILE *output_file, context *ctx)
movement_parser_reduce(c);
break;
case MVT_PARSER_ROOT :
sentence_nb++;
if((sentence_nb % 100) == 0)
fprintf(stderr, "\rsentence %d", sentence_nb);
movement_parser_root(c, root_label);
break;
case MVT_PARSER_SHIFT :
......
......@@ -20,7 +20,7 @@ int movement_left_arc(config *c, int movement_code, int label)
word_set_gov(dep, dist);
word_set_label(dep, label);
gov->lspan = dep->lspan;
// gov->lspan = dep->lspan;
config_push_mvt(c, movement_code, gov, dep);
......@@ -57,7 +57,7 @@ int movement_right_arc(config *c, int movement_code, int label)
word_set_gov(dep, dist);
word_set_label(dep, label);
gov->rspan = dep->rspan;
// gov->rspan = dep->rspan;
stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c)));
......@@ -139,6 +139,30 @@ int movement_root(config *c, int movement_code, int root_code)
word_set_label(s0, root_code);
s0->is_root = 1;
/* the word bm1 (preceding the current word in the buffer) is marked as end of sentence */
word *bm1 = word_buffer_bm1(config_get_buffer(c));
if(bm1)
word_set_sent_seg(bm1, 1);
/* word *rd = word_buffer_get_rightmost_descendent(config_get_buffer(c), s0);
if(rd)
word_set_sent_seg(rd, 1); */
stack_pop(config_get_stack(c));
config_push_mvt(c, movement_code, s0, NULL);
return 1;
}
int movement_root_old(config *c, int movement_code, int root_code)
{
if(stack_is_empty(config_get_stack(c))) return 0;
word *s0 = stack_top(config_get_stack(c));
word_set_gov(s0, 0);
word_set_label(s0, root_code);
s0->is_root = 1;
/*---- ajout 19/12/17 ----------*/
// word_set_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0->rspan), 1);
/*-----------------------------*/
......
......@@ -69,12 +69,12 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label)
}
/* word on the top of the stack is an end of sentence marker */
if((word_get_sent_seg(word_buffer_get_word_n(ref, s0_index)) == 1)
/* if((word_get_sent_seg(word_buffer_get_word_n(ref, s0_index)) == 1)
// && (word_get_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != 1)
&& check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)
){
return MVT_PARSER_EOS;
}
}*/
/* LEFT ARC b0 is the governor and s0 the dependent */
if(s0_gov_index == b0_index){
......
......@@ -134,6 +134,7 @@ void simple_decoder_parser_arc_eager(context *ctx)
int argmax1, argmax2;
float max1, max2;
int index;
word *root;
root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label == -1) root_label = 0;
......@@ -212,9 +213,15 @@ void simple_decoder_parser_arc_eager(context *ctx)
result = movement_parser_reduce(c);
break;
case MVT_PARSER_ROOT:
root = stack_top(config_get_stack(c));
result = movement_parser_root(c, root_label);
/* while(!stack_is_empty(config_get_stack(c)))
movement_parser_root(c, root_label);*/
while(!stack_is_empty(config_get_stack(c))){
/* word *dep = stack_pop(config_get_stack(c));
int dist = (word_get_index(root)) - (word_get_index(dep));
word_set_gov(dep, dist);
word_set_label(dep, 0);*/
movement_parser_root(c, root_label);
}
break;
case MVT_PARSER_EOS:
result = movement_parser_eos(c);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment