diff --git a/maca_trans_parser/src/maca_error_predictor_parser.c b/maca_trans_parser/src/maca_error_predictor_parser.c index ac6cae4162c454637cc803981fa0daf3a0587020..a9ce9dd6986f5a78ebb8d4f6b7c3ba80e00340d7 100644 --- a/maca_trans_parser/src/maca_error_predictor_parser.c +++ b/maca_trans_parser/src/maca_error_predictor_parser.c @@ -51,7 +51,7 @@ void decode_parser_set_linguistic_resources_filenames(context *ctx) if(!ctx->vocabs_filename){ strcpy(absolute_filename, ctx->maca_data_path); - strcat(absolute_filename, DEFAULT_VOCABS_FILENAME); + strcat(absolute_filename, DEFAULT_VOCABS_PARSER_ERROR_PREDICTOR_FILENAME); ctx->vocabs_filename = strdup(absolute_filename); } @@ -61,12 +61,6 @@ void decode_parser_set_linguistic_resources_filenames(context *ctx) ctx->l_rules_filename = strdup(absolute_filename); } - if(!ctx->fann_filename){ - strcpy(absolute_filename, ctx->maca_data_path); - strcat(absolute_filename, DEFAULT_VOCABS_PARSER_ERROR_PREDICTOR_FILENAME); - ctx->fann_filename = strdup(absolute_filename); - } - if(!ctx->features_model_filename){ strcpy(absolute_filename, ctx->maca_data_path); strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME); @@ -86,23 +80,19 @@ int main(int argc, char *argv[]) { context *ctx = context_read_options(argc, argv); decode_parser_check_options(ctx); - decode_parser_set_linguistic_resources_filenames(ctx); ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); - ctx->vocabs_error = dico_vec_read(ctx->fann_filename, ctx->hash_ratio); ctx->features_model_error = feat_model_read(ctx->l_rules_filename, feat_lib_build(), ctx->verbose); - ctx->mcd_struct_error = mcd_copy(ctx->mcd_struct); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); - mcd_link_to_dico(ctx->mcd_struct_error, ctx->vocabs_error, ctx->verbose); ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); - ctx->d_perceptron_features_error = dico_vec_get_dico(ctx->vocabs_error, (char *)"d_perceptron_features"); + ctx->d_perceptron_features_error = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features_error"); ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); diff --git a/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c index 2a90b3ce84f1561374bcd5102a338e33e9a9660d..df46d044f2052b9f9aea8ad974e7b27ae7d16ea6 100644 --- a/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c @@ -50,15 +50,26 @@ void maca_error_predictor_parser_mcf2cff_check_options(context *ctx) int config_is_equal_parser(config *c1, config *c2, int co1, int co2) {// (c1->history)->size > 0 && (c1->history)->size > 0 && ( /*printf("COracle : \n"); - mvt_stack_print(stdout,c1->history); - printf("CPredicted : \n"); - mvt_stack_print(stdout,c2->history);*/ + mvt_stack_print(stdout,c1->history); + printf("CDecoder : \n"); + mvt_stack_print(stdout,c2->history);*/ //printf("Or : %d\nPred : %d\n",mvt_stack_0(c1->history)->type,mvt_stack_0(c2->history)->type); - - if (mvt_stack_0(c1->history)->type != mvt_stack_0(c2->history)->type) - return 1; - else - return 0; + + if (mvt_stack_0(c1->history) && mvt_stack_0(c2->history) && mvt_stack_0(c1->history)->type != mvt_stack_0(c2->history)->type) + return 1; + else + return 0; + /* + int result = 0; + + if (mvt_stack_2(c1->history) && mvt_stack_2(c2->history) && mvt_stack_2(c1->history)->type != mvt_stack_2(c2->history)->type) + result = result *10 + 3; + if (mvt_stack_1(c1->history) && mvt_stack_1(c2->history) && mvt_stack_1(c1->history)->type != mvt_stack_1(c2->history)->type) + result = result *10 + 2; + if (mvt_stack_0(c1->history) && mvt_stack_0(c2->history) && mvt_stack_0(c1->history)->type != mvt_stack_0(c2->history)->type) + result = result *10 + 1; + return result; + */ } int testoracle = 0; @@ -74,12 +85,9 @@ int test_pred() { return 0; } -void oracle_movement(int *mvt_code_oracle, char *mvt_type_oracle, int *mvt_label_oracle, config *config_oracle, word_buffer *ref_oracle, int root_label_oracle, context *ctx, feat_vec *fv_decoder, int *sentence_nb) +void oracle_movement(int *mvt_code_oracle, char *mvt_type_oracle, int *mvt_label_oracle, config *config_oracle, word_buffer *ref_oracle, int root_label_oracle, context *ctx, int *sentence_nb) { if (!word_buffer_end(ref_oracle) && (*sentence_nb < ctx->sent_nb)) { - - // decoder here before movement - config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_decoder, LOOKUP_MODE); *mvt_code_oracle = oracle_parser_arc_eager(config_oracle, ref_oracle, root_label_oracle); *mvt_type_oracle = movement_parser_type(*mvt_code_oracle); @@ -87,6 +95,13 @@ void oracle_movement(int *mvt_code_oracle, char *mvt_type_oracle, int *mvt_label test_or(); + if(ctx->debug_mode){ + printf("Oracle : "); + movement_parser_print(stdout, *mvt_code_oracle, ctx->dico_labels); + printf("\n"); + config_print(stdout,config_oracle); + } + switch(*mvt_type_oracle){ case MVT_PARSER_EOS : movement_parser_eos(config_oracle); @@ -117,10 +132,10 @@ void oracle_movement(int *mvt_code_oracle, char *mvt_type_oracle, int *mvt_label //printf("Oracle finishes its job\n"); } } -void print_cff(context *ctx, FILE *output_file, config *config_oracle, config *config_predicted, int mvt_code_oracle, int mvt_code_predicted, feat_vec *fv_error) +void print_cff(context *ctx, FILE *output_file, config *config_oracle, config *config_decoder, int mvt_code_oracle, int mvt_code_decoder, feat_vec *fv_error) { if(!ctx->debug_mode || output_file!=stdout) { - fprintf(output_file, "%d", ((config_is_equal_parser(config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted)))); + fprintf(output_file, "%d", ((config_is_equal_parser(config_oracle, config_decoder, mvt_code_oracle, mvt_code_decoder)))); feat_vec_print(output_file, fv_error); } } @@ -133,29 +148,31 @@ void generate_training_file_error(FILE *output_file, context *ctx) int mvt_code_oracle; char mvt_type_oracle; int mvt_label_oracle; - feat_vec *fv_decoder = feat_vec_new(feature_types_nb); int sentence_nb = 0; int root_label_oracle = dico_string2int(ctx->dico_labels, (char *) ctx->root_label); word_buffer *ref_oracle = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct); FILE *mcf_file_oracle = myfopen(ctx->input_filename, "r"); - // prediction - config *config_predicted; - feat_vec *fv_error = feat_vec_new(feature_types_nb); - FILE *mcf_file_predicted = myfopen(ctx->input_filename, "r"); + // decoder + config *config_decoder; + FILE *mcf_file_decoder = myfopen(ctx->input_filename, "r"); feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose); - int root_label_predicted; - int mvt_code_predicted; - int mvt_type_predicted; - int mvt_label_predicted; + int root_label_decoder; + int mvt_code_decoder; + int mvt_type_decoder; + int mvt_label_decoder; float max; + feat_vec *fv_decoder = feat_vec_new(feature_types_nb); int result; int argmax1, argmax2; float max1, max2; //int index; + + //error training + feat_vec *fv_error = feat_vec_new(feature_types_nb); + //dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS"); - //dico *dico_pos_error = dico_vec_get_dico(ctx->vocabs_error, (char *)"POS"); /* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */ @@ -167,108 +184,133 @@ void generate_training_file_error(FILE *output_file, context *ctx) mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL); mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_SENT_SEG); - root_label_predicted = dico_string2int(ctx->dico_labels, ctx->root_label); - if(root_label_predicted == -1) root_label_predicted = 0; - - config_predicted = config_new(mcf_file_predicted, ctx->mcd_struct_error, 5); config_oracle = config_new(mcf_file_oracle, mcd_struct_hyp, 5); - while((!word_buffer_end(ref_oracle) && (sentence_nb < ctx->sent_nb)) || !config_is_terminal(config_predicted)){ + root_label_decoder = dico_string2int(ctx->dico_labels, ctx->root_label); + if(root_label_decoder == -1) root_label_decoder = 0; + + mcd *mcd_struct_decoder = mcd_copy(mcd_struct_hyp); + mcd_remove_wf_column(mcd_struct_decoder, MCD_WF_FEATS); + + config_decoder = config_new(mcf_file_decoder, mcd_struct_decoder, 5); + + while((!word_buffer_end(ref_oracle) && (sentence_nb < ctx->sent_nb)) || !config_is_terminal(config_decoder)){ //oracle - oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_decoder, &sentence_nb); - - // predicted + oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx, &sentence_nb); - /* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */ - /* which means that the top of the stack got its eos status from input */ - /* force the parser to finish parsing the sentence (perform all pending reduce actions) and determine root of the sentence */ - - if((word_get_sent_seg(stack_top(config_get_stack(config_predicted))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(config_predicted))) != MVT_PARSER_EOS)){ - word_set_sent_seg(stack_top(config_get_stack(config_predicted)), -1); - movement_parser_eos(config_predicted); - test_pred(); - while(movement_parser_reduce(config_predicted)) { - oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_decoder, &sentence_nb); - test_pred(); - //print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_error); + // decoder + + config2feat_vec_cff(ctx->features_model, config_decoder, ctx->d_perceptron_features, fv_decoder, LOOKUP_MODE); + + mvt_code_decoder = feature_table_argmax(fv_decoder, ft, &max); + mvt_type_decoder = movement_parser_type(mvt_code_decoder); + mvt_label_decoder = movement_parser_label(mvt_code_decoder); + + /* forced EOS, oracle detect EOS */ + + if(mvt_type_oracle == MVT_PARSER_EOS) { + if(ctx->debug_mode) printf("EOS FORCED\n"); + mvt_type_decoder = MVT_PARSER_EOS; + movement_parser_eos(config_decoder); + while(movement_parser_reduce(config_decoder)) { + //oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx, &sentence_nb); + //test_pred(); + //print_cff(ctx, output_file, config_oracle, config_decoder, mvt_code_oracle, mvt_code_decoder, fv_error); } - while(movement_parser_root(config_predicted, root_label_predicted)) { - oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_decoder, &sentence_nb); - test_pred(); - //print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_error); + movement_parser_reduce(config_oracle); + + while(movement_parser_root(config_decoder, root_label_decoder)) { + //oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx, &sentence_nb); + //test_pred(); + //print_cff(ctx, output_file, config_oracle, config_decoder, mvt_code_oracle, mvt_code_decoder, fv_error); } - } + movement_parser_root(config_oracle, root_label_oracle); + if(ctx->debug_mode) printf("b0? = %d / %d\n", (word_buffer_b0(config_oracle->bf))->index, (word_buffer_b0(config_decoder->bf))->index); - /* normal behaviour, ask classifier what is the next movement to do and do it */ - else{ + while((word_buffer_b0(config_decoder->bf))->index < (word_buffer_b0(config_oracle->bf))->index) + word_buffer_move_right(config_decoder->bf); + while((word_buffer_b0(config_decoder->bf))->index > (word_buffer_b0(config_oracle->bf))->index) + word_buffer_move_left(config_decoder->bf); - mvt_code_predicted = feature_table_argmax(fv_decoder, ft, &max); - mvt_type_predicted = movement_parser_type(mvt_code_predicted); - mvt_label_predicted = movement_parser_label(mvt_code_predicted); + if(ctx->debug_mode) printf("b0? = %d / %d\n", (word_buffer_b0(config_oracle->bf))->index, (word_buffer_b0(config_decoder->bf))->index); + continue; + } - //printf("DEBUGGG, label predicted : %d\n",mvt_label_predicted); - - if((mvt_type_predicted == MVT_PARSER_EOS) && (word_get_sent_seg(stack_top(config_get_stack(config_predicted))) == 0)){ - feature_table_argmax_1_2(fv_decoder, ft, &argmax1, &max1, &argmax2, &max2); - mvt_code_predicted = argmax2; - mvt_type_predicted = movement_parser_type(mvt_code_predicted); - mvt_label_predicted = movement_parser_label(mvt_code_predicted); - } + if(mvt_type_decoder == MVT_PARSER_EOS && mvt_type_oracle != MVT_PARSER_EOS) { + feature_table_argmax_1_2(fv_decoder, ft, &argmax1, &max1, &argmax2, &max2); + mvt_code_decoder = argmax2; + mvt_type_decoder = movement_parser_type(mvt_code_decoder); + mvt_label_decoder = movement_parser_label(mvt_code_decoder); + } - if(ctx->debug_mode){ - printf("Oracle : "); - movement_parser_print(stdout, mvt_code_oracle, ctx->dico_labels); - printf("\nPredicted : "); - movement_parser_print(stdout, mvt_code_predicted, ctx->dico_labels); - printf("\n"); - config_print(stdout,config_predicted); - if (mvt_code_oracle!=mvt_code_predicted) - fprintf(stdout, "**************** DIFFERENT CHOICE ***********\n\n"); - else - fprintf(stdout, "**************** EQUAL CHOICE ***********\n\n"); + if(ctx->debug_mode){ + vcode *vcode_array = feature_table_get_vcode_array(fv_decoder, ft); + for(int i=0; i < 3; i++){ + printf("%d\t", i); + movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels); + printf("\t%.4f\n", vcode_array[i].score); } + free(vcode_array); + } - result = 0; - switch(mvt_type_predicted){ - case MVT_PARSER_LEFT : - result = movement_parser_left_arc(config_predicted, mvt_label_predicted); - break; - case MVT_PARSER_RIGHT: - result = movement_parser_right_arc(config_predicted, mvt_label_predicted); - break; - case MVT_PARSER_REDUCE: - result = movement_parser_reduce(config_predicted); - break; - case MVT_PARSER_ROOT: - result = movement_parser_root(config_predicted, root_label_predicted); - break; - case MVT_PARSER_EOS: - result = movement_parser_eos(config_predicted); - break; - case MVT_PARSER_SHIFT: - result = movement_parser_shift(config_predicted); - break; - } + //printf("DEBUGGG, label decoder : %d\n",mvt_label_decoder); + + /* if((mvt_type_decoder == MVT_PARSER_EOS) && (word_get_sent_seg(stack_top(config_get_stack(config_decoder))) == 0)){ + feature_table_argmax_1_2(fv_decoder, ft, &argmax1, &max1, &argmax2, &max2); + mvt_code_decoder = argmax2; + mvt_type_decoder = movement_parser_type(mvt_code_decoder); + mvt_label_decoder = movement_parser_label(mvt_code_decoder); + }*/ + + if(ctx->debug_mode){ + printf("Decoder : "); + movement_parser_print(stdout, mvt_code_decoder, ctx->dico_labels); + printf("\n"); + config_print(stdout,config_decoder); + if (mvt_code_oracle!=mvt_code_decoder) + fprintf(stdout, "**************** DIFFERENT CHOICE ***********\n\n"); + else + fprintf(stdout, "**************** EQUAL CHOICE ***********\n\n"); + } + + result = 0; + switch(mvt_type_decoder){ + case MVT_PARSER_LEFT : + result = movement_parser_left_arc(config_decoder, mvt_label_decoder); + break; + case MVT_PARSER_RIGHT: + result = movement_parser_right_arc(config_decoder, mvt_label_decoder); + break; + case MVT_PARSER_REDUCE: + result = movement_parser_reduce(config_decoder); + break; + case MVT_PARSER_ROOT: + result = movement_parser_root(config_decoder, root_label_decoder); + break; + case MVT_PARSER_EOS: + result = movement_parser_eos(config_decoder); + break; + case MVT_PARSER_SHIFT: + result = movement_parser_shift(config_decoder); + break; + } - if(result == 0){ - if(ctx->debug_mode) fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); - result = movement_parser_shift(config_predicted); - if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */ if(ctx->debug_mode) fprintf(stdout, "WARNING : cannot exectue a SHIFT emptying stack !\n"); - if (!stack_is_empty(config_get_stack(config_predicted))) { - movement_parser_root(config_predicted, root_label_predicted); - test_pred(); - } - while(!stack_is_empty(config_get_stack(config_predicted))) { - oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_decoder, &sentence_nb); - movement_parser_root(config_predicted, root_label_predicted); - test_pred(); - } + if(result == 0){ + if(ctx->debug_mode) fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); + result = movement_parser_shift(config_decoder); + if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */ + if(ctx->debug_mode) fprintf(stdout, "WARNING : cannot exectue a SHIFT emptying stack !\n"); + if (!stack_is_empty(config_get_stack(config_decoder))) { + movement_parser_root(config_decoder, root_label_decoder); + test_pred(); } - else { + while(!stack_is_empty(config_get_stack(config_decoder))) { + oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx, &sentence_nb); + movement_parser_root(config_decoder, root_label_decoder); test_pred(); } } @@ -276,19 +318,23 @@ void generate_training_file_error(FILE *output_file, context *ctx) test_pred(); } } + else { + test_pred(); + } + //error training - config2feat_vec_cff(ctx->features_model_error, config_predicted, ctx->d_perceptron_features_error, fv_error, TRAIN_MODE); - print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_error); + config2feat_vec_cff(ctx->features_model_error, config_decoder, ctx->d_perceptron_features_error, fv_error, TRAIN_MODE); + print_cff(ctx, output_file, config_oracle, config_decoder, mvt_code_oracle, mvt_code_decoder, fv_error); } fprintf(stdout,"\n"); config_free(config_oracle); - config_free(config_predicted); + config_free(config_decoder); feat_vec_free(fv_decoder); feat_vec_free(fv_error); feature_table_free(ft); fclose(mcf_file_oracle); - fclose(mcf_file_predicted); + fclose(mcf_file_decoder); } @@ -314,14 +360,6 @@ void error_parser_set_linguistic_resources_filename(context *ctx) strcat(absolute_filename, DEFAULT_MCF_DEV); ctx->input_filename = strdup(absolute_filename); } - - if(!ctx->mcd_filename) { - ctx->mcd_struct = mcd_build_wpmlgfs(); - } - - if(!ctx->cff_filename){ - //printf("cff -> stdout\n") - } if(!ctx->features_model_filename){ strcpy(absolute_filename, ctx->maca_data_path); @@ -329,12 +367,6 @@ void error_parser_set_linguistic_resources_filename(context *ctx) ctx->features_model_filename = strdup(absolute_filename); } - if(!ctx->f2p_filename){ - strcpy(absolute_filename, ctx->maca_data_path); - strcat(absolute_filename, DEFAULT_F2P_FILENAME); - ctx->f2p_filename = strdup(absolute_filename); - } - if(ctx->verbose){ fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename); fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename); @@ -351,51 +383,45 @@ int main(int argc, char *argv[]) FILE *output_file; ctx = context_read_options(argc, argv); - //error_parser_set_linguistic_resources_filename(ctx); - ctx->f2p = form2pos_read(ctx->f2p_filename); - maca_error_predictor_parser_mcf2cff_check_options(ctx); - - ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose); - ctx->mcd_struct_error = mcd_read(ctx->l_rules_filename, ctx->verbose); - - //error - mcd_extract_dico_from_corpus(ctx->mcd_struct_error, ctx->input_filename); - ctx->vocabs_error = mcd_build_dico_vec(ctx->mcd_struct_error); - - //parser - ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); - mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); + error_parser_set_linguistic_resources_filename(ctx); + maca_error_predictor_parser_mcf2cff_check_options(ctx); + //mcd_build_wpmlgfs(); + ctx->mcd_struct = mcd_build_wpmlgfs();//mcd_read(ctx->mcd_filename, ctx->verbose); + //ctx->mcd_struct_error = mcd_build_wpmlgfs();//mcd_read(ctx->l_rules_filename, ctx->verbose); + //error - ctx->d_perceptron_features_error = dico_new((char *)"d_perceptron_features", 10000000); ctx->features_model_error = feat_model_read(ctx->fann_filename, feat_lib_build(), ctx->verbose); - - //parser - ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); + //mcd_extract_dico_from_corpus(ctx->mcd_struct_error, ctx->input_filename); + //ctx->vocabs_error = mcd_build_dico_vec(ctx->mcd_struct_error); + ctx->d_perceptron_features_error = dico_new((char *)"d_perceptron_features_error", 10000000); + //oracle + decoder ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); + ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); + mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); - ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); - + ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); if(ctx->dico_labels == NULL){ fprintf(stderr, "cannot find label names\n"); return 1; } ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3; - - feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb); + ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); + /* add the feature dictionnary to the dico vector */ - dico_vec_add(ctx->vocabs, ctx->d_perceptron_features); - dico_vec_add(ctx->vocabs_error, ctx->d_perceptron_features_error); + //dico_vec_add(ctx->vocabs, ctx->d_perceptron_features); + /* open output file */ output_file = (ctx->cff_filename) ? myfopen_no_exit(ctx->cff_filename, "w") : stdout; generate_training_file_error(output_file, ctx); - - dico_vec_print(ctx->dnn_model_filename, ctx->vocabs_error); + + dico_vec_add(ctx->vocabs, ctx->d_perceptron_features_error); + dico_vec_print(ctx->dnn_model_filename, ctx->vocabs); if(ctx->cff_filename) fclose(output_file); diff --git a/maca_trans_parser/src/maca_error_predictor_tagger.c b/maca_trans_parser/src/maca_error_predictor_tagger.c index c43344722aac451ca4ec143cd0b363d087986f0a..573cec68a0d7330ee8efbd4a1b80b9e056f60532 100644 --- a/maca_trans_parser/src/maca_error_predictor_tagger.c +++ b/maca_trans_parser/src/maca_error_predictor_tagger.c @@ -51,7 +51,7 @@ void decode_tagger_set_linguistic_resources_filenames(context *ctx) if(!ctx->vocabs_filename){ strcpy(absolute_filename, ctx->maca_data_path); - strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_FILENAME); + strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_ERROR_PREDICTOR_FILENAME); ctx->vocabs_filename = strdup(absolute_filename); } @@ -105,20 +105,18 @@ int main(int argc, char *argv[]) ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); - ctx->vocabs_error = dico_vec_read(ctx->fann_filename, ctx->hash_ratio); ctx->features_model_error = feat_model_read(ctx->l_rules_filename, feat_lib_build(), ctx->verbose); //ctx->mcd_struct = mcd_read("/home/mathis/maca_data2/fr/bin/maca_trans_tagger.mcd",ctx->verbose); //ctx->mcd_struct_error = mcd_read("/home/mathis/maca_data2/fr/bin/maca_trans_tagger.mcd",ctx->verbose); - ctx->mcd_struct_error = mcd_build_wpmlgfs(); + //ctx->mcd_struct_error = mcd_build_wpmlgfs(); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); - mcd_link_to_dico(ctx->mcd_struct_error, ctx->vocabs_error, ctx->verbose); ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); - ctx->d_perceptron_features_error = dico_vec_get_dico(ctx->vocabs_error, (char *)"d_perceptron_features"); + ctx->d_perceptron_features_error = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features_error"); char perc_error_filename[500]; strcpy(perc_error_filename, ctx->maca_data_path); diff --git a/maca_trans_parser/src/maca_error_predictor_tagger_mcf2cff.c b/maca_trans_parser/src/maca_error_predictor_tagger_mcf2cff.c index 00d553a99f908d24aca820ecef2b56aa33d24ddc..95731889c3e253e48f12b36c9292cd14cbd70d34 100644 --- a/maca_trans_parser/src/maca_error_predictor_tagger_mcf2cff.c +++ b/maca_trans_parser/src/maca_error_predictor_tagger_mcf2cff.c @@ -113,9 +113,13 @@ void generate_error_train(FILE *output_file, context *ctx) int postag_predicted; float max; dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS"); - + + + mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct); + //mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_POS); + config_oracle = config_new(mcf_file_oracle, ctx->mcd_struct, 5); - config_predicted = config_new(mcf_file_predicted, ctx->mcd_struct, 5); + config_predicted = config_new(mcf_file_predicted, mcd_struct_hyp, 5); while(!config_is_terminal(config_oracle)){ if(ctx->f2p){ @@ -124,11 +128,11 @@ void generate_error_train(FILE *output_file, context *ctx) } // oracle - config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_decoder, LOOKUP_MODE); + config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv_decoder, LOOKUP_MODE); postag_oracle = oracle_tagger(config_oracle); if(ctx->debug_mode){ - printf("Oracle : "); + printf("Oracle : "); print_word_simple(word_buffer_b0(config_oracle->bf), ctx->mcd_struct, dico_pos, postag_oracle); } @@ -238,35 +242,28 @@ int main(int argc, char *argv[]) FILE *output_file; ctx = context_read_options(argc, argv); - error_tagger_set_linguistic_resources_filename(ctx); + // error_tagger_set_linguistic_resources_filename(ctx); ctx->f2p = form2pos_read(ctx->f2p_filename); maca_error_predictor_check_options(ctx); ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose); - ctx->mcd_struct_error = mcd_read(ctx->mcd_filename, ctx->verbose);//mcd_read(ctx->l_rules_filename, ctx->verbose); //error - mcd_extract_dico_from_corpus(ctx->mcd_struct_error, ctx->input_filename); - ctx->vocabs_error = mcd_build_dico_vec(ctx->mcd_struct_error); + //mcd_extract_dico_from_corpus(ctx->mcd_struct_error, ctx->input_filename); + //ctx->vocabs_error = mcd_build_dico_vec(ctx->mcd_struct_error); //tagger ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); - feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb); - //error - ctx->d_perceptron_features_error = dico_new((char *)"d_perceptron_features", 10000000); + ctx->d_perceptron_features_error = dico_new((char *)"d_perceptron_features_error", 10000000); ctx->features_model_error = feat_model_read(ctx->fann_filename, feat_lib_build(), ctx->verbose); //tagger ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); - - /* add the feature dictionnary to the dico vector */ - dico_vec_add(ctx->vocabs, ctx->d_perceptron_features); - dico_vec_add(ctx->vocabs_error, ctx->d_perceptron_features_error); - + /* open output file */ if(ctx->cff_filename) output_file = myfopen(ctx->cff_filename, "w"); @@ -274,8 +271,11 @@ int main(int argc, char *argv[]) output_file = stdout; generate_error_train(output_file,ctx); - - dico_vec_print(ctx->dnn_model_filename, ctx->vocabs_error); + + /* add the feature dictionnary to the dico vector */ + //dico_vec_add(ctx->vocabs, ctx->d_perceptron_features); + dico_vec_add(ctx->vocabs, ctx->d_perceptron_features_error); + dico_vec_print(ctx->dnn_model_filename, ctx->vocabs); if(ctx->cff_filename) fclose(output_file); diff --git a/maca_trans_parser/src/movement_parser_arc_eager.c b/maca_trans_parser/src/movement_parser_arc_eager.c index baa4c62e8f1684b296df6dde3ca69908c54f833e..63c23aeb8e3328d6227a0a086d850002d203613c 100644 --- a/maca_trans_parser/src/movement_parser_arc_eager.c +++ b/maca_trans_parser/src/movement_parser_arc_eager.c @@ -103,7 +103,7 @@ int movement_parser_root_undo(config *c) int movement_parser_undo(config *c) { int result; - int mvt_type = mvt_get_type(mvt_stack_top(config_get_history(c))); + int mvt_type = movement_parser_type(mvt_get_type(mvt_stack_top(config_get_history(c)))); switch(mvt_type){ case MVT_PARSER_LEFT : result = movement_parser_left_arc_undo(c); @@ -122,6 +122,9 @@ int movement_parser_undo(config *c) break; case MVT_PARSER_SHIFT: result = movement_parser_shift_undo(c); + break; + default : + printf("type pas reconnu :/ \n"); } return result; } diff --git a/maca_trans_parser/src/movements.c b/maca_trans_parser/src/movements.c index 2a01f10c9b7f4a503902cf3cc574db5b245e3595..78a5d3505d4ba9ba89fdbcf4179c2d2176dbb725 100644 --- a/maca_trans_parser/src/movements.c +++ b/maca_trans_parser/src/movements.c @@ -37,7 +37,7 @@ int movement_left_arc(config *c, int movement_code, int label) /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ //printf("BEGINNING 2\n"); /* word on top of the stack should not have a governor */ // com here bug ? - //if(word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV) return 0; + if(word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV) return 0; //printf("BEGINNING 3\n"); word *gov = word_buffer_b0(config_get_buffer(c)); word *dep = stack_top(config_get_stack(c)); diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager_error_predictor.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager_error_predictor.c index 9adb4e659063422db8de1316a441305c7c15eb6f..75d14de0f40c0756f2100fcbf704be4cee8e8a68 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager_error_predictor.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager_error_predictor.c @@ -93,30 +93,27 @@ void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_error_filename) { + config *c = NULL; FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; - feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose); - feature_table *ft_error = feature_table_load(perc_error_filename, ctx->verbose); - feat_vec *fv = feat_vec_new(feature_types_nb); - feat_vec *fv_error = feat_vec_new(feature_types_nb); int root_label; int mvt_code; int mvt_type; int mvt_label; - float max; - float max_err; - - int error_detect; - - config *c = NULL; int result; - int argmax1, argmax2; float max1, max2; int index; + + int error_detect = 0; + int err_mvt_code = 0; + feature_table *ft_error = feature_table_load(perc_error_filename, ctx->verbose); + float max_err; + feat_vec *fv_error = feat_vec_new(feature_types_nb); + int no_back = 0; root_label = dico_string2int(ctx->dico_labels, ctx->root_label); if(root_label == -1) root_label = 0; @@ -164,17 +161,16 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er config2feat_vec_cff(ctx->features_model_error, c, ctx->d_perceptron_features_error, fv_error, LOOKUP_MODE); error_detect = feature_table_argmax(fv_error, ft_error, &max_err); + vcode *vcode_array_err = feature_table_get_vcode_array(fv_error, ft_error); if(ctx->debug_mode){ fprintf(stdout, " ***Error detection***\n"); - vcode *vcode_array_err = feature_table_get_vcode_array(fv_error, ft_error); for(int i=0; i < 2; i++){ fprintf(stdout, " %d\t", i); fprintf(stdout, "%d\t%.4f\n", vcode_array_err[i].class_code, vcode_array_err[i].score); } - free(vcode_array_err); } - if((mvt_type == MVT_PARSER_EOS) && (word_get_sent_seg(stack_top(config_get_stack(c))) == 0)){ + /* if((mvt_type == MVT_PARSER_EOS) && (word_get_sent_seg(stack_top(config_get_stack(c))) == 0)){ if(ctx->verbose) fprintf(stderr, "the classifier did predict EOS but this is not the case\n"); feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); @@ -182,8 +178,56 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er mvt_type = movement_parser_type(mvt_code); mvt_label = movement_parser_label(mvt_code); + }*/ + + // If there is an error : + float scoreError = vcode_array_err[0].score; + free(vcode_array_err); + + if(error_detect == 1 && scoreError >= 10 && !ctx->trace_mode && mvt_stack_0(c->history) && ctx->force && c->bf->current_index < c->bf->nbelem -1/*its to avoid problems with EOS, PPT :-> get_pos(b0) == ponct*/) { + if(no_back) + no_back = 0; + else { + err_mvt_code = mvt_get_type(mvt_stack_top(config_get_history(c))); // issue + movement_parser_undo(c); + config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); + vcode *vcode_array = feature_table_get_vcode_array(fv, ft); + + for(int i=0; i < ft->classes_nb-1; i++){ + if (err_mvt_code == vcode_array[i].class_code) { + mvt_code = vcode_array[i+1].class_code; + break; + } + } + if(err_mvt_code == mvt_code){ + mvt_code = feature_table_argmax(fv, ft, &max); + if(ctx->debug_mode){ + fprintf(stdout, "ERROR PREDICTOR, NO CHOICE LEFT, take the first choice : "); + movement_parser_print(stdout, mvt_code, ctx->dico_labels); + fprintf(stdout,"\n"); + + } + no_back = 1; + + } + + mvt_type = movement_parser_type(mvt_code); + mvt_label = movement_parser_label(mvt_code); + + if(ctx->debug_mode){ + fprintf(stdout, "***********************************\n"); + config_print(stdout, c); + fprintf(stdout,"Old : "); + movement_parser_print(stdout, err_mvt_code, ctx->dico_labels); + fprintf(stdout, ", New : "); + movement_parser_print(stdout, mvt_code, ctx->dico_labels); + fprintf(stdout, "\n"); + } + + } } + // normal case : result = 0; switch(mvt_type){ case MVT_PARSER_LEFT : @@ -213,6 +257,7 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er while(!stack_is_empty(config_get_stack(c))) movement_parser_root(c, root_label); } + } } } diff --git a/maca_trans_parser/src/simple_decoder_tagger.c b/maca_trans_parser/src/simple_decoder_tagger.c index 7e3811d9985c65aed3300dc1ab6b713fff58fc75..1859af00b1e783ab1976f29769b1ead451d02614 100644 --- a/maca_trans_parser/src/simple_decoder_tagger.c +++ b/maca_trans_parser/src/simple_decoder_tagger.c @@ -102,7 +102,7 @@ void simple_decoder_tagger(context *ctx) add_signature_to_words_in_word_buffer(c->bf, ctx->f2p); b0 = word_buffer_b0(c->bf); - postag = word_get_pos(b0); + postag = -1;//word_get_pos(b0); if(ctx->debug_mode){ fprintf(stderr, "***********************************\n"); diff --git a/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c b/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c index 4d96a0909ddb4916c10727c2ef24e40d9bbafa96..16b26bd282dff1f6f868ee92fa2d0e3d77315b2c 100644 --- a/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c +++ b/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c @@ -225,7 +225,7 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena float scoreError = vcode_array_err[0].score; free(vcode_array_err); - if (error_detect == 1 && !ctx->trace_mode && scoreError > 0.28 && !no_back && word_buffer_bm1(c->bf) && word_buffer_bm2(c->bf)){ + if (error_detect == 1 && !ctx->trace_mode /*&& scoreError > 0.28*/ && !no_back && word_buffer_bm1(c->bf) && word_buffer_bm2(c->bf)){ //backward(c); backward(c); nb -= 2; @@ -267,8 +267,8 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena } if(postag==postag_err){ + postag = feature_table_argmax(fv, ft, &max); if(ctx->debug_mode){ - postag = feature_table_argmax(fv, ft, &max); fprintf(stdout, "ERROR PREDICTOR, NO CHOICE LEFT, take the first choice : %s\n", dico_int2string(dico_pos, postag)); } no_back = 1;