From bc98dfbd81f480d595bbb6410119b9c558502435 Mon Sep 17 00:00:00 2001 From: Mathux <mathis.petrovich@gmail.com> Date: Mon, 26 Jun 2017 15:38:44 +0200 Subject: [PATCH] error on beginning --- ...error_predictor_parser_arc_eager_mcf2cff.c | 137 ++++++++++++------ .../simple_decoder_tagger_error_predictor.c | 98 +++++-------- 2 files changed, 131 insertions(+), 104 deletions(-) diff --git a/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c index 0abe7af..5f20451 100644 --- a/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c @@ -52,6 +52,70 @@ int config_is_equal_parser(config *c1, config *c2, int co1, int co2) // 3 for bm return ((co1==co2) ? 0 : 1); } +int testoracle = 0; +int testpred = 0; + +int test_or() { + //printf("Oracle : %d\n",testoracle++); + return 0; +} + +int test_pred() { + //printf("Pred : %d\n",testpred++); + return 0; +} + +void oracle_movement(int *mvt_code_oracle, char *mvt_type_oracle, int *mvt_label_oracle, config *config_oracle, word_buffer *ref_oracle, int root_label_oracle, context *ctx, feat_vec *fv_oracle, int *sentence_nb) +{ + if (!word_buffer_end(ref_oracle) && (*sentence_nb < ctx->sent_nb)) { + + *mvt_code_oracle = oracle_parser_arc_eager(config_oracle, ref_oracle, root_label_oracle); + *mvt_type_oracle = movement_parser_type(*mvt_code_oracle); + *mvt_label_oracle = movement_parser_label(*mvt_code_oracle); + + test_or(); + + config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE); + + switch(*mvt_type_oracle){ + case MVT_PARSER_EOS : + movement_parser_eos(config_oracle); + (*sentence_nb)++; + if((*sentence_nb % 100) == 0) + fprintf(stderr, "\rsentence %d", *sentence_nb); + break; + case MVT_PARSER_LEFT : + movement_parser_left_arc(config_oracle, *mvt_label_oracle); + break; + case MVT_PARSER_RIGHT : + movement_parser_right_arc(config_oracle, *mvt_label_oracle); + word_buffer_move_right(ref_oracle); + break; + case MVT_PARSER_REDUCE : + movement_parser_reduce(config_oracle); + break; + case MVT_PARSER_ROOT : + movement_parser_root(config_oracle, root_label_oracle); + break; + case MVT_PARSER_SHIFT : + movement_parser_shift(config_oracle); + word_buffer_move_right(ref_oracle); + break; + } + } + else { + //printf("Oracle finishes its job\n"); + } +} +void print_cff(context *ctx, FILE *output_file, config *config_oracle, config *config_predicted, int mvt_code_oracle, int mvt_code_predicted, feat_vec *fv_predicted) +{ + if(!ctx->debug_mode || output_file!=stdout) { + fprintf(output_file, "%d", ((config_is_equal_parser(config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted)))); + feat_vec_print(output_file, fv_predicted); + } +} + + void generate_training_file_error(FILE *output_file, context *ctx) { // oracle @@ -100,40 +164,11 @@ void generate_training_file_error(FILE *output_file, context *ctx) config_oracle = config_new(mcf_file_oracle, mcd_struct_hyp, 5); while((!word_buffer_end(ref_oracle) && (sentence_nb < ctx->sent_nb)) || !config_is_terminal(config_predicted)){ - - mvt_code_oracle = oracle_parser_arc_eager(config_oracle, ref_oracle, root_label_oracle); - mvt_type_oracle = movement_parser_type(mvt_code_oracle); - mvt_label_oracle = movement_parser_label(mvt_code_oracle); + //oracle + + oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_oracle, &sentence_nb); - config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE); - - switch(mvt_type_oracle){ - case MVT_PARSER_EOS : - movement_parser_eos(config_oracle); - sentence_nb++; - if((sentence_nb % 100) == 0) - fprintf(stderr, "\rsentence %d", sentence_nb); - break; - case MVT_PARSER_LEFT : - movement_parser_left_arc(config_oracle, mvt_label_oracle); - break; - case MVT_PARSER_RIGHT : - movement_parser_right_arc(config_oracle, mvt_label_oracle); - word_buffer_move_right(ref_oracle); - break; - case MVT_PARSER_REDUCE : - movement_parser_reduce(config_oracle); - break; - case MVT_PARSER_ROOT : - movement_parser_root(config_oracle, root_label_oracle); - break; - case MVT_PARSER_SHIFT : - movement_parser_shift(config_oracle); - word_buffer_move_right(ref_oracle); - break; - } - // predicted /* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */ @@ -143,8 +178,18 @@ void generate_training_file_error(FILE *output_file, context *ctx) if((word_get_sent_seg(stack_top(config_get_stack(config_predicted))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(config_predicted))) != MVT_PARSER_EOS)){ word_set_sent_seg(stack_top(config_get_stack(config_predicted)), -1); movement_parser_eos(config_predicted); - while(movement_parser_reduce(config_predicted)); - while(movement_parser_root(config_predicted, root_label_predicted)); + test_pred(); + while(movement_parser_reduce(config_predicted)) { + oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_oracle, &sentence_nb); + test_pred(); + //print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_predicted); + + } + while(movement_parser_root(config_predicted, root_label_predicted)) { + oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_oracle, &sentence_nb); + test_pred(); + //print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_predicted); + } } /* normal behaviour, ask classifier what is the next movement to do and do it */ @@ -199,19 +244,27 @@ void generate_training_file_error(FILE *output_file, context *ctx) if(result == 0){ result = movement_parser_shift(config_predicted); if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */ - while(!stack_is_empty(config_get_stack(config_predicted))) + if (!stack_is_empty(config_get_stack(config_predicted))) { + movement_parser_root(config_predicted, root_label_predicted); + test_pred(); + } + while(!stack_is_empty(config_get_stack(config_predicted))) { + oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_oracle, &sentence_nb); movement_parser_root(config_predicted, root_label_predicted); + test_pred(); + } + } + else { + test_pred(); } } + else { + test_pred(); + } } - - if(!ctx->debug_mode || output_file!=stdout) { - fprintf(output_file, "%d", ((config_is_equal_parser(config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted)))); - feat_vec_print(output_file, fv_predicted); - } - - + print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_predicted); } + fprintf(stdout,"\n"); /* config_free(c); feat_vec_free(fv); diff --git a/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c b/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c index bc864f3..1e98a89 100644 --- a/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c +++ b/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c @@ -132,6 +132,8 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena char *impr[300000]; int nb = 0; + + int no_back = 0; c = config_new(f, ctx->mcd_struct, 5); @@ -189,6 +191,25 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena break; } } + else if(ctx->trace_mode) { + switch (error_detect) { + case 0 : // No errors detected + sprintf(impr[nb]+strlen(impr[nb]),"\ttrue\t0"); + break; + + case 1 : + sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t1"); + break; + + case 2 : + sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t2"); + break; + + case 3 : + sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t3"); + break; + } + } sprintf(impr[nb]+strlen(impr[nb]),"\n"); nb +=1; @@ -203,7 +224,7 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena } free(vcode_array_err); - if (error_detect == 3){// && (vcode_array_err[0].score-vcode_array_err[1].score)>2.5) { + if (error_detect == 3 && !ctx->trace_mode && !no_back && word_buffer_bm1(c->bf) && word_buffer_bm2(c->bf)){// && (vcode_array_err[0].score-vcode_array_err[1].score)>2.5) { backward(c); backward(c); nb -= 3; @@ -230,8 +251,8 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena free(vcode_array); if(ctx->debug_mode){ - fprintf(stderr, "***********************************\n"); - config_print(stderr, c); + fprintf(stdout, "***********************************\n"); + config_print(stdout, c); } if(ctx->debug_mode){ @@ -241,78 +262,31 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_arraye[i].class_code), vcode_arraye[i].score); } free(vcode_arraye); - fprintf(stderr, "Ancien pos : %s, nouveau : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag)); + fprintf(stdout, "Ancien pos : %s, nouveau : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag)); } - if(postag==postag_err) - { - printf("ERROR PREDICTOR, NO CHOICE LEFT\n"); - exit(1); - } - word_set_pos(word_buffer_b0(c->bf), postag); - string_print_word(word_buffer_b0(c->bf), ctx->mcd_struct, dico_pos, postag,&impr[nb]); - if(ctx->debug_mode) - sprintf(impr[nb]+strlen(impr[nb]),"\t✐\t_\n"); - else - sprintf(impr[nb]+strlen(impr[nb]),"\n"); - nb += 1; - } - - else if (error_detect == 2 && ctx->force) { - backward(c); - nb -= 2; - - if(ctx->f2p) - add_signature_to_words_in_word_buffer(c->bf, ctx->f2p); - - postag_err = word_get_pos(word_buffer_b0(c->bf)); - - postag = postag_err; - config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); - - vcode *vcode_array = feature_table_get_vcode_array(fv, ft); - - int debug_choice; - for(int i=0; i < ft->classes_nb-1; i++){ - if (postag_err == vcode_array[i].class_code) { - postag = vcode_array[i+1].class_code; - debug_choice = i+1; - break; + if(postag==postag_err){ + if(ctx->debug_mode){ + postag = feature_table_argmax(fv, ft, &max); + fprintf(stdout, "ERROR PREDICTOR, NO CHOICE LEFT, take the first choice : %s\n", dico_int2string(dico_pos, postag)); } + no_back = 1; + printf("test\n"); } - - free(vcode_array); - if(ctx->debug_mode){ - fprintf(stderr, "***********************************\n"); - config_print(stderr, c); - } - - if(ctx->debug_mode){ - vcode *vcode_arraye = feature_table_get_vcode_array(fv, ft); - for(int i=debug_choice-1; i < debug_choice+2; i++){//postag_err+3; i++){ - fprintf(stdout, "%d\t", i); - fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_arraye[i].class_code), vcode_arraye[i].score); - } - free(vcode_arraye); - fprintf(stderr, "Ancien pos : %s, nouveau : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag)); - } - - if(postag==postag_err) - { - printf("ERROR PREDICTOR, NO CHOICE LEFT\n"); - exit(1); - } word_set_pos(word_buffer_b0(c->bf), postag); string_print_word(word_buffer_b0(c->bf), ctx->mcd_struct, dico_pos, postag,&impr[nb]); if(ctx->debug_mode) - sprintf(impr[nb]+strlen(impr[nb]),"\t⚠\t_\n"); + sprintf(impr[nb]+strlen(impr[nb]),"\t✐\t_\n"); else sprintf(impr[nb]+strlen(impr[nb]),"\n"); nb += 1; } + + else if(error_detect == 3 && !ctx->trace_mode && no_back) + no_back = 0; + word_buffer_move_right(c->bf); - } for (int i = 0; i < nb; i++) { printf("%s",impr[i]); -- GitLab