diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h index a2e358b2cae411be0b609b734f4f28959967d690..f24ab4cee929fe99b93529b94032c1c9ddf25cfa 100644 --- a/maca_common/include/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -44,6 +44,7 @@ int word_buffer_read_next_word(word_buffer *wb); int word_buffer_move_right(word_buffer *wb); int word_buffer_move_left(word_buffer *wb); void word_buffer_print(FILE *f, word_buffer *wb); +void word_buffer_print_compact(FILE *f, word_buffer *wb); int word_buffer_is_empty(word_buffer *wb); int word_buffer_is_last(word_buffer *wb); int word_buffer_end(word_buffer *wb); diff --git a/maca_common/src/mcd.c b/maca_common/src/mcd.c index 5f068e21995509830abfc1c1fb5a2bf25279012f..64be8859569b5af09667538c9a46239ad8d7511c 100644 --- a/maca_common/src/mcd.c +++ b/maca_common/src/mcd.c @@ -56,7 +56,7 @@ mcd *mcd_copy(mcd *m) mcd *copy = mcd_new(m->nb_col); for(i=0; i < MCD_WF_NB; i++) - copy->wf2col[i] = m->wf2col[i] = -1; + copy->wf2col[i] = m->wf2col[i]; for(i=0; i < m->nb_col; i++){ copy->representation[i] = m->representation[i]; diff --git a/maca_common/src/word.c b/maca_common/src/word.c index d4b25c03d074649e499d1c6aa6cf19eb632c4f3e..be7d2f8d26f9763fb82cc89df9fd9412a79d89f6 100644 --- a/maca_common/src/word.c +++ b/maca_common/src/word.c @@ -145,8 +145,8 @@ int word_get_gov_relative_index(word *w) int index; if(word_get_gov(w) == 0) return -1; - printf("in word_get_gov_rel_index(%d)\n", word_get_relative_index(w)); - printf("gov = %d\n", word_get_gov(w)); + /* printf("in word_get_gov_rel_index(%d)\n", word_get_relative_index(w)); + printf("gov = %d\n", word_get_gov(w)); */ index = (word_get_relative_index(w)) + (word_get_gov(w)); /* printf("index = %d\n", index); */ diff --git a/maca_common/src/word_buffer.c b/maca_common/src/word_buffer.c index 06d6b8d1e5fec585ed4a7639cdedc6551fea0f53..aabe1876898902cf68621439b6b14c3ef0173a31 100644 --- a/maca_common/src/word_buffer.c +++ b/maca_common/src/word_buffer.c @@ -45,6 +45,26 @@ void word_buffer_print(FILE *f, word_buffer *wb) if(w){ fprintf(f, "[ 3] "); word_print(f, w); fprintf(f, "\n");} } +void word_buffer_print_compact(FILE *f, word_buffer *wb) +{ + word *w; + w = word_buffer_bm3(wb); + if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);} + w = word_buffer_bm2(wb); + if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);} + w = word_buffer_bm1(wb); + if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);} + w = word_buffer_b0(wb); + if(w){ fprintf(f, "[%d:%s] ", word_get_relative_index(w), w->form);} + w = word_buffer_b1(wb); + if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);} + w = word_buffer_b2(wb); + if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);} + w = word_buffer_b3(wb); + if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);} + fprintf(f, "\n"); +} + void word_buffer_free(word_buffer *wb) { int i; @@ -121,7 +141,7 @@ int word_buffer_move_left(word_buffer *wb) int word_buffer_end(word_buffer *wb) { - return (wb->current_index == wb->nbelem)? 1 : 0; + return (wb->current_index >= wb->nbelem)? 1 : 0; } int word_buffer_is_last(word_buffer *wb) diff --git a/maca_trans_parser/src/config.c b/maca_trans_parser/src/config.c index 43c4cdb2227a0853ac28edd9da5575cadb421f67..8e809a8b0b6b6ac1d85a232f97b6abb8a6245b42 100644 --- a/maca_trans_parser/src/config.c +++ b/maca_trans_parser/src/config.c @@ -106,14 +106,14 @@ void config_print(FILE *f, config *c) word *s0 = NULL; if(c){ if(!stack_is_empty(c->st)) - s0 = stack_elt_n(c->st, 0); + s0 = stack_elt_n(c->st, 0); b0 = word_buffer_b0(c->bf); - if(s0) { printf("s0 = "); word_print2(stdout, s0);} - if(b0) { printf("b0 = "); word_print2(stdout, b0);} + /* if(s0) { printf("s0 = "); word_print2(stdout, s0);} */ + /* if(b0) { printf("b0 = "); word_print2(stdout, b0);} */ stack_print(f, c->st); fprintf(f, "\n"); - word_buffer_print(f, c->bf); + word_buffer_print_compact(f, c->bf); } } diff --git a/maca_trans_parser/src/maca_trans_parser_mcf2cff.c b/maca_trans_parser/src/maca_trans_parser_mcf2cff.c index 64cd4dfbc8bbfe9b2b365a019783d2413ca9061a..645307c5596c17747d6d2bd7574f1cdd9279a4a4 100644 --- a/maca_trans_parser/src/maca_trans_parser_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_parser_mcf2cff.c @@ -51,9 +51,11 @@ void generate_training_file_stream(FILE *output_file, context *ctx) feat_vec *fv = feat_vec_new(feature_types_nb); int sentence_nb = 0; int root_label = dico_string2int(ctx->dico_labels, (char *) ctx->root_label); + int eos_label = dico_string2int(ctx->dico_labels, "eos"); word_buffer *ref = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct); FILE *mcf_file = myfopen(ctx->input_filename, "r"); + /* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */ mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct); mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_GOV); mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL); @@ -61,10 +63,11 @@ void generate_training_file_stream(FILE *output_file, context *ctx) c = config_initial(mcf_file, mcd_struct_hyp, 5); while(!word_buffer_end(ref)){ - printf("************ REF ************\n"); + /* printf("************ REF ************\n"); word_buffer_print(stdout, ref); - printf("*****************************\n"); + printf("*****************************\n");*/ + printf("*****************************\n"); config_print(stdout,c); config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); @@ -74,32 +77,50 @@ void generate_training_file_stream(FILE *output_file, context *ctx) mvt_type = movement_type(mvt_code); mvt_label = movement_label(mvt_code); - - /* printf("mvt code = %d\n", mvt_code); */ + + /* printf("mvt code = %d\n", mvt_code); + printf("mvt type = %d\n", mvt_type); */ movement_print(stdout, mvt_code, ctx->dico_labels); fprintf(output_file, "%d", mvt_code); feat_vec_print(output_file, fv); - - if((mvt_type == MVT_RIGHT) && (mvt_label == root_label)){ /* sentence is complete */ - - /* create the root arc */ - movement_right_arc(c, mvt_label, 0); - - /* pop root from stack */ - stack_pop(config_get_stack(c)); - - /* printf("sentence complete config : "); - config_print(stdout,c); */ - } + if(mvt_type == MVT_LEFT){ movement_left_arc(c, mvt_label, 0); continue; } + if(mvt_type == MVT_RIGHT){ movement_right_arc(c, mvt_label, 0); word_buffer_move_right(ref); + + if((mvt_label == eos_label)){ /* sentence is complete */ + printf("sentence complete\n"); + + while(movement_reduce(c,0)){ + printf("reduce\n"); + printf("*****************************\n"); + config_print(stdout,c); + + } + + /* pop eos from stack */ + /* stack_pop(config_get_stack(c)); */ + /* pop root from stack */ + /* stack_pop(config_get_stack(c)); */ + + config_print(stdout,c); + + printf("ref current index = %d\n", word_buffer_get_current_index(ref)); + if(word_buffer_is_last(ref)){ + printf("it is the end\n"); + break; + } + + /* change index of dummy word */ + word_set_relative_index(stack_top(config_get_stack(c)), word_get_relative_index(word_buffer_b0(config_get_buffer(c))) - 1); + } continue; } if(mvt_type == MVT_REDUCE){ diff --git a/maca_trans_parser/src/movement_parser_arc_eager.c b/maca_trans_parser/src/movement_parser_arc_eager.c index c1f8a207bd46315ccc791aa672a7b93c62e541fc..56beba1d06b79bd3b91f19a8cbfa3125b630e9b1 100644 --- a/maca_trans_parser/src/movement_parser_arc_eager.c +++ b/maca_trans_parser/src/movement_parser_arc_eager.c @@ -21,7 +21,7 @@ int movement_type(int mvt) if(mvt == 0) return MVT_SHIFT; /* 0 is the code of shift */ if(mvt == 1) return MVT_REDUCE; /* 1 is the code of reduce */ if(mvt % 2 == 0) return MVT_LEFT; /* even movements are left movements */ - return MVT_LEFT; /* odd movements are right movements */ + return MVT_RIGHT; /* odd movements are right movements */ } int movement_label(int mvt) @@ -40,15 +40,21 @@ int movement_left_arc(config *c, int label, float score) if(word_buffer_is_empty(c->bf)) return 0; /* word on top of the stack should not have a governor */ - - printf("word_get_gov_relative_index(stack_top(c->st)) = %d\n", word_get_gov_relative_index(stack_top(c->st))); + /* printf("index word top of stack = %d\n", word_get_relative_index(stack_top(c->st))); */ + /* printf("word_get_gov_relative_index(stack_top(c->st)) = %d\n", word_get_gov_relative_index(stack_top(c->st))); */ if(word_get_gov_relative_index(stack_top(c->st)) != -1) return 0; + word *gov = word_buffer_b0(c->bf); + word *dep = stack_top(c->st); + int dist = (word_get_relative_index(gov)) - (word_get_relative_index(dep)); + + printf("create left arc %d <- %d dist = %d\n", word_get_relative_index(dep), word_get_relative_index(gov), dist); + /* create a new dependency */ - word_set_gov(stack_top(c->st), word_get_relative_index(word_buffer_b0(c->bf))); - word_set_label(stack_top(c->st), label); + word_set_gov(dep, dist); + word_set_label(dep, label); /* depset_add(c->ds, word_buffer_b0(c->bf), label, stack_top(c->st)); */ stack_pop(c->st); @@ -62,12 +68,17 @@ int movement_right_arc(config *c, int label, float score) if(stack_is_empty(c->st)) return 0; if(word_buffer_is_empty(c->bf)) return 0; + word *gov = stack_top(c->st); + word *dep = word_buffer_b0(c->bf); + int dist = (word_get_relative_index(gov)) - (word_get_relative_index(dep)); + + printf("create right arc %d -> %d dist = %d\n", word_get_relative_index(gov), word_get_relative_index(dep), dist); + /* create a new dependency */ - word_set_gov(word_buffer_b0(c->bf), word_get_relative_index(stack_top(c->st))); - word_set_label(word_buffer_b0(c->bf), label); + word_set_gov(dep, dist); + word_set_label(dep, label); - /* depset_add(c->ds, stack_top(c->st), label, word_buffer_b0(c->bf)); */ stack_push(c->st, word_buffer_b0(c->bf)); word_buffer_move_right(c->bf); @@ -91,7 +102,7 @@ int movement_shift(config *c, int stream, float score) int movement_reduce(config *c, float score) { if(stack_is_empty(c->st)) return 0; - if(word_get_gov(stack_top(c->st)) == -1) return 0; /* word on top of stack does not have a governor */ + if(word_get_gov(stack_top(c->st)) == 0) return 0; /* word on top of stack does not have a governor */ stack_pop(c->st); return 1; } diff --git a/maca_trans_parser/src/oracle_parser_arc_eager.c b/maca_trans_parser/src/oracle_parser_arc_eager.c index f719363d27f95d661d1ed3efb2e26a18878e2e6e..a24442bf116d7df7ed426a1f45ae4a091ae565d6 100644 --- a/maca_trans_parser/src/oracle_parser_arc_eager.c +++ b/maca_trans_parser/src/oracle_parser_arc_eager.c @@ -7,12 +7,18 @@ int check_all_dependents_of_word_in_ref_are_in_hyp(config *c, word_buffer *ref, int word_index) { int dep; + int gov_ref; + int gov_hyp; for(dep=1; dep < ref->nbelem; dep++){ - if(word_get_gov_relative_index(word_buffer_get_word_n(ref, dep)) == word_index){ /* found a dependent of word in ref */ + gov_ref = word_get_gov_relative_index(word_buffer_get_word_n(ref, dep)); + if(gov_ref == word_index){ /* found a dependent of word in ref */ /* look for a dependency in hyp such that its dependent is dep */ - - if(word_get_gov_relative_index(word_buffer_get_word_n(config_get_buffer(c), dep)) != word_index) return 0; + printf("found a dep of word %d in ref, it is %d\n", word_index, dep); + gov_hyp = word_get_gov_relative_index(word_buffer_get_word_n(config_get_buffer(c), dep)); + + printf("gov of %d in hyp is %d\n", dep,gov_hyp); + if(gov_hyp != gov_ref) return 0; /* if((dep >= c->ds->length) || (c->ds->array[dep].gov == NULL) @@ -29,38 +35,42 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref) word *s0; /* word on top of stack */ word *b0; /* next word in the bufer */ int s0_index, b0_index; + int s0_gov_index, b0_gov_index; if(!stack_is_empty(c->st) && !word_buffer_is_empty(c->bf)){ s0 = stack_top(c->st); s0_index = word_get_relative_index(s0); + s0_gov_index = word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index)); b0 = word_buffer_b0(c->bf); b0_index = word_get_relative_index(b0); + b0_gov_index = word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index)); - printf("s0_index = %d b0_index = %d\n", s0_index, b0_index); - printf("dans ref gov de s0 (%d) = %d\n", s0_index, word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index))); - printf("dans ref gov de b0 (%d) = %d\n", b0_index, word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index))); + /* printf("s0_index = %d b0_index = %d\n", s0_index, b0_index); + printf("dans ref gov de s0 (%d) = %d\n", s0_index, s0_gov_index); + printf("dans ref gov de b0 (%d) = %d\n", b0_index, b0_gov_index);*/ /* LEFT ARC b0 is the governor and s0 the dependent */ - if(word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index)) == b0_index){ + if(s0_gov_index == b0_index){ - printf("oracle says left\n"); + /* printf("oracle says left\n"); */ return movement_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index))); } /* RIGHT ARC s0 is the governor and b0 the dependent */ - if((word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index)) == s0_index)) + if(b0_gov_index == s0_index){ + /* printf("oracle says right\n"); */ return movement_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index))); - + } /* REDUCE */ - if((stack_height(c->st) > 2) && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)) - - /* if(word_get_gov_relative_index(stack_top(c->st)) != -1) */ - return MVT_REDUCE; - - /* if(check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)) - return MVT_REDUCE;*/ + printf("all dep in ref are in hyp = %d\n", check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)); + if((stack_height(c->st) > 2) + && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index) + && (word_get_gov(stack_top(c->st)) != 0)) /* word on top of the stack has a goveror */ + { + return MVT_REDUCE; + } /* SHIFT */ return MVT_SHIFT;