diff --git a/maca_tools/src/mcf2conll.c b/maca_tools/src/mcf2conll.c index 49f3e34f9b431acdc5dee6a7205d429680040fe9..5bd2c4465a52d9b85a743ca5bb0c31c190b1671b 100644 --- a/maca_tools/src/mcf2conll.c +++ b/maca_tools/src/mcf2conll.c @@ -181,8 +181,8 @@ int main(int argc, char *argv[]) fprintf(output_file, "_"); fprintf(output_file, "\t"); - fprintf(output_file, "\t_\t\n"); - + /* fprintf(output_file, "\t_\t\n"); */ + fprintf(output_file, "\n"); if((sent_seg_col) && (word_get_sent_seg(w))){ fprintf(output_file, "\n"); index = 0; diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index a9b9d4382b9f5480d9f0875d56726f21a35a4870..f8fca20f3c9607adc211bc775c5de6488df64e80 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -269,11 +269,7 @@ context *context_read_options(int argc, char *argv[]) if(ctx->mcd_filename) ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose); else - if(ctx->conll) - ctx->mcd_struct = mcd_build_conll07(); - else - ctx->mcd_struct = mcd_build_wplgfs(); - /* ctx->mcd_struct = mcd_build_ifpls(); */ + ctx->mcd_struct = mcd_build_wplgfs(); return ctx; } diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c index 77fb88b89d8959f68864817d91535c99fb3428fd..53ba95d2916cad21c2cd9ff88d541b010bf3b4fe 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c @@ -10,7 +10,7 @@ #include"feature_table.h" #include"dico.h" -void print_word_buffer(config *c, dico *dico_labels) +void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct) { int i; word *dep; @@ -25,10 +25,96 @@ void print_word_buffer(config *c, dico *dico_labels) printf("%s\t", label) ; else printf("_\t"); - if(word_get_sent_seg(dep) == 1) - printf("1\n") ; - else - printf("0\n"); + if(mcd_get_sent_seg_col(mcd_struct) == -1){ + if(word_get_sent_seg(dep) == 1) + printf("1") ; + else + printf("0"); + } + printf("\n"); + + } +} + + + +void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) +{ + int i; + word *w; + char *label; + char *buffer = NULL; + char *token = NULL; + int col_nb = 0; + + + for(i=0; i < config_get_buffer(c)->nbelem; i++){ + w = word_buffer_get_word_n(config_get_buffer(c), i); + + if((mcd_get_gov_col(mcd_struct) == -1) + && (mcd_get_label_col(mcd_struct) == -1) + && (mcd_get_sent_seg_col(mcd_struct) == -1)){ + printf("%s\t", word_get_input(w)); + printf("%d\t", word_get_gov(w)); + label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w)); + if(label != NULL) + printf("%s\t", label) ; + else + printf("_\t"); + if(word_get_sent_seg(w) == 1) + printf("1\n") ; + else + printf("0\n"); + } + else{ + buffer = strdup(w->input); + token = strtok(buffer, "\t"); + col_nb = 0; + while(token){ + if(col_nb != 0) printf("\t"); + if(col_nb == mcd_get_gov_col(mcd_struct)){ + printf("%d", word_get_gov(w)); + } + else + if(col_nb == mcd_get_label_col(mcd_struct)){ + label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w)); + if(label != NULL) + printf("%s", label) ; + else + printf("_"); + } + else + if(col_nb == mcd_get_sent_seg_col(mcd_struct)){ + if(word_get_sent_seg(w) == 1) + printf("1") ; + else + printf("0"); + } + else{ + word_print_col_n(stdout, w, col_nb); + } + col_nb++; + token = strtok(NULL, "\t"); + } + if((col_nb <= mcd_get_gov_col(mcd_struct)) || (mcd_get_gov_col(mcd_struct) == -1)){ + printf("\t%d", word_get_gov(w)); + } + if((col_nb <= mcd_get_label_col(mcd_struct)) || (mcd_get_label_col(mcd_struct) == -1)){ + label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w)); + if(label != NULL) + printf("\t%s", label) ; + else + printf("\t_"); + } + if((col_nb <= mcd_get_sent_seg_col(mcd_struct)) || (mcd_get_sent_seg_col(mcd_struct) == -1)){ + if(word_get_sent_seg(w) == 1) + printf("\t1") ; + else + printf("\t0"); + } + printf("\n"); + free(buffer); + } } } @@ -55,69 +141,82 @@ void simple_decoder_parser_arc_eager(context *ctx) c = config_new(f, ctx->mcd_struct, 5); while(!config_is_terminal(c)){ - config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); - mvt_code = feature_table_argmax(fv, ft, &max); - mvt_type = movement_parser_type(mvt_code); - mvt_label = movement_parser_label(mvt_code); - if(ctx->debug_mode){ fprintf(stdout, "***********************************\n"); config_print(stdout, c); - - vcode *vcode_array = feature_table_get_vcode_array(fv, ft); - - for(int i=0; i < 5; i++){ - printf("%d\t", i); - movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels); - printf("\t%.4f\n", vcode_array[i].score); - } - free(vcode_array); - } - - if(ctx->trace_mode){ - index = word_get_index(word_buffer_b0(config_get_buffer(c))); - fprintf(stdout, "%d\t", index); + } + /* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */ + /* which means that the top of the stack got its eos status from input */ + if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){ + word_set_sent_seg(stack_top(config_get_stack(c)), -1); - stack_print(stdout, c->st); - fprintf(stdout, "\t"); + movement_parser_eos(c); + while(movement_parser_reduce(c)); + while(movement_parser_root(c, root_label)); - movement_parser_print(stdout, mvt_code, ctx->dico_labels); - fprintf(stdout, "\t"); - feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); - printf("%f\n", max1 - max2); - + /* mvt_code = MVT_PARSER_EOS; */ + if(ctx->debug_mode) printf("force EOS\n"); } - - result = 0; - switch(mvt_type){ - case MVT_PARSER_LEFT : - result = movement_parser_left_arc(c, mvt_label); - break; - case MVT_PARSER_RIGHT: - result = movement_parser_right_arc(c, mvt_label); - break; - case MVT_PARSER_REDUCE: - result = movement_parser_reduce(c); - break; - case MVT_PARSER_ROOT: - result = movement_parser_root(c, root_label); - break; - case MVT_PARSER_EOS: - result = movement_parser_eos(c); - break; - case MVT_PARSER_SHIFT: - result = movement_parser_shift(c); - } - - if(result == 0){ + else{ + config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); + mvt_code = feature_table_argmax(fv, ft, &max); if(ctx->debug_mode){ - fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); + vcode *vcode_array = feature_table_get_vcode_array(fv, ft); + for(int i=0; i < 3; i++){ + printf("%d\t", i); + movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels); + printf("\t%.4f\n", vcode_array[i].score); + } + free(vcode_array); + } + + if(ctx->trace_mode){ + index = word_get_index(word_buffer_b0(config_get_buffer(c))); + fprintf(stdout, "%d\t", index); + + stack_print(stdout, c->st); + fprintf(stdout, "\t"); + + movement_parser_print(stdout, mvt_code, ctx->dico_labels); + fprintf(stdout, "\t"); + feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); + printf("%f\n", max1 - max2); + + } + + mvt_type = movement_parser_type(mvt_code); + mvt_label = movement_parser_label(mvt_code); + + result = 0; + switch(mvt_type){ + case MVT_PARSER_LEFT : + result = movement_parser_left_arc(c, mvt_label); + break; + case MVT_PARSER_RIGHT: + result = movement_parser_right_arc(c, mvt_label); + break; + case MVT_PARSER_REDUCE: + result = movement_parser_reduce(c); + break; + case MVT_PARSER_ROOT: + result = movement_parser_root(c, root_label); + break; + case MVT_PARSER_EOS: + result = movement_parser_eos(c); + break; + case MVT_PARSER_SHIFT: + result = movement_parser_shift(c); + } + + if(result == 0){ + if(ctx->debug_mode){ + fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); + } + movement_parser_shift(c); } - movement_parser_shift(c); } } - - /* horrible hack */ + /* horrible hack: force the remaining element in the stack (if any) to be the root */ if(stack_nbelem(config_get_stack(c)) && (stack_top(config_get_stack(c)) == NULL)) stack_pop(config_get_stack(c)); @@ -126,7 +225,7 @@ void simple_decoder_parser_arc_eager(context *ctx) /* end of horrible hack */ if(!ctx->trace_mode) - print_word_buffer(c, ctx->dico_labels); + print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct); config_free(c); feat_vec_free(fv);