diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h index 184608f1fa06277abe805ae448628e396a16f8fd..73f191638e98e2bd889b27b4303a2b1345c7f581 100644 --- a/maca_common/include/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -55,6 +55,12 @@ void word_buffer_print_compact(FILE *f, word_buffer *wb); int word_buffer_read_sentence(word_buffer *bw); word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct); int word_buffer_locate_token_with_offset(word_buffer *wb, int offset); + +word *word_buffer_get_rightmost_child(word_buffer *wb, word *gov); +word *word_buffer_get_rightmost_descendent(word_buffer *wb, word *root); + + + /* int word_buffer_is_empty(word_buffer *wb); int word_buffer_is_last(word_buffer *wb); diff --git a/maca_common/src/word.c b/maca_common/src/word.c index 33f27da9cc0865cfa55130e4dd1b5e2964b496a3..e62d2f6a3f1811b9704081219ccf201d7603b623 100644 --- a/maca_common/src/word.c +++ b/maca_common/src/word.c @@ -190,3 +190,4 @@ void word_sprint_col_n(char *s, word *w, int n) } s[j] = '\0'; } + diff --git a/maca_common/src/word_buffer.c b/maca_common/src/word_buffer.c index 7e52dd3c1227ef1c0e955bb7383d0c4a97bfc4f3..715729cd936bdafc98ad4466d0ee5ceaee6d9e0e 100644 --- a/maca_common/src/word_buffer.c +++ b/maca_common/src/word_buffer.c @@ -187,6 +187,37 @@ int word_buffer_locate_token_with_offset(word_buffer *wb, int offset) } +word *word_buffer_get_rightmost_child(word_buffer *wb, word *gov) +{ + word *bm1 = word_buffer_bm1(wb); + int rightmost_child_index = word_get_index(gov); + for(int current_index = word_get_index(gov); current_index <= word_get_index(bm1); current_index++) + if(word_get_gov_index(word_buffer_get_word_n(wb, current_index)) == word_get_index(gov)) + rightmost_child_index = current_index; + return word_buffer_get_word_n(wb, rightmost_child_index); +} + + +word *word_buffer_get_rightmost_descendent(word_buffer *wb, word *root) +{ + word *rightmost_descendent = root; + word *rightmost_child = NULL; + int change = 1; + + while(change){ + change = 0; + rightmost_child = word_buffer_get_rightmost_child(wb, rightmost_descendent); + if(word_get_index(rightmost_child) > word_get_index(rightmost_descendent)){ + rightmost_descendent = rightmost_child; + change = 1; + } + } + + return rightmost_descendent; +} + + + /*int word_buffer_end(word_buffer *wb) { return (wb->current_index >= wb->nbelem)? 1 : 0; diff --git a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c index eeb228824e1275480e4e749b8699ed3800c74a79..e9111f910c33ed2a451821dceb7878f71ea0e410 100644 --- a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c @@ -92,9 +92,6 @@ void generate_training_file(FILE *output_file, context *ctx) switch(mvt_type){ case MVT_PARSER_EOS : movement_parser_eos(c); - sentence_nb++; - if((sentence_nb % 100) == 0) - fprintf(stderr, "\rsentence %d", sentence_nb); /* if(word_buffer_is_last(ref)) */ break; case MVT_PARSER_LEFT : @@ -108,6 +105,9 @@ void generate_training_file(FILE *output_file, context *ctx) movement_parser_reduce(c); break; case MVT_PARSER_ROOT : + sentence_nb++; + if((sentence_nb % 100) == 0) + fprintf(stderr, "\rsentence %d", sentence_nb); movement_parser_root(c, root_label); break; case MVT_PARSER_SHIFT : diff --git a/maca_trans_parser/src/movements.c b/maca_trans_parser/src/movements.c index 89853eb2c9b0bb9485e43575470efc9e033dc6fd..174302d5d0dede65e994f489c9e9ad913a75c0fb 100644 --- a/maca_trans_parser/src/movements.c +++ b/maca_trans_parser/src/movements.c @@ -20,7 +20,7 @@ int movement_left_arc(config *c, int movement_code, int label) word_set_gov(dep, dist); word_set_label(dep, label); - gov->lspan = dep->lspan; + // gov->lspan = dep->lspan; config_push_mvt(c, movement_code, gov, dep); @@ -57,7 +57,7 @@ int movement_right_arc(config *c, int movement_code, int label) word_set_gov(dep, dist); word_set_label(dep, label); - gov->rspan = dep->rspan; + // gov->rspan = dep->rspan; stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); @@ -139,6 +139,30 @@ int movement_root(config *c, int movement_code, int root_code) word_set_label(s0, root_code); s0->is_root = 1; + /* the word bm1 (preceding the current word in the buffer) is marked as end of sentence */ + word *bm1 = word_buffer_bm1(config_get_buffer(c)); + if(bm1) + word_set_sent_seg(bm1, 1); + + + /* word *rd = word_buffer_get_rightmost_descendent(config_get_buffer(c), s0); + if(rd) + word_set_sent_seg(rd, 1); */ + + stack_pop(config_get_stack(c)); + + config_push_mvt(c, movement_code, s0, NULL); + return 1; +} + +int movement_root_old(config *c, int movement_code, int root_code) +{ + if(stack_is_empty(config_get_stack(c))) return 0; + word *s0 = stack_top(config_get_stack(c)); + word_set_gov(s0, 0); + word_set_label(s0, root_code); + s0->is_root = 1; + /*---- ajout 19/12/17 ----------*/ // word_set_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0->rspan), 1); /*-----------------------------*/ diff --git a/maca_trans_parser/src/oracle_parser_arc_eager.c b/maca_trans_parser/src/oracle_parser_arc_eager.c index 0bc5fe0bbf5c8350bd41978fe61f6bc73e2c3a7f..241deb902e66c08bd1bfca1257640b1cdb3b0922 100644 --- a/maca_trans_parser/src/oracle_parser_arc_eager.c +++ b/maca_trans_parser/src/oracle_parser_arc_eager.c @@ -69,12 +69,12 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label) } /* word on the top of the stack is an end of sentence marker */ - if((word_get_sent_seg(word_buffer_get_word_n(ref, s0_index)) == 1) + /* if((word_get_sent_seg(word_buffer_get_word_n(ref, s0_index)) == 1) // && (word_get_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != 1) && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index) ){ return MVT_PARSER_EOS; - } + }*/ /* LEFT ARC b0 is the governor and s0 the dependent */ if(s0_gov_index == b0_index){ diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c index a36bc54f3b1e9d335df01bbf6f540ed6fcfd58da..2ee6b8af3215c0109527fe39278252367375affd 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c @@ -134,6 +134,7 @@ void simple_decoder_parser_arc_eager(context *ctx) int argmax1, argmax2; float max1, max2; int index; + word *root; root_label = dico_string2int(ctx->dico_labels, ctx->root_label); if(root_label == -1) root_label = 0; @@ -212,9 +213,15 @@ void simple_decoder_parser_arc_eager(context *ctx) result = movement_parser_reduce(c); break; case MVT_PARSER_ROOT: + root = stack_top(config_get_stack(c)); result = movement_parser_root(c, root_label); - /* while(!stack_is_empty(config_get_stack(c))) - movement_parser_root(c, root_label);*/ + while(!stack_is_empty(config_get_stack(c))){ + /* word *dep = stack_pop(config_get_stack(c)); + int dist = (word_get_index(root)) - (word_get_index(dep)); + word_set_gov(dep, dist); + word_set_label(dep, 0);*/ + movement_parser_root(c, root_label); + } break; case MVT_PARSER_EOS: result = movement_parser_eos(c);