diff --git a/donne/Makefile b/donne/Makefile index 4e1bc1bf2a0637e03361606ddd992ba2c18db5fd..5c223d818cbf3dc645f6ff1fe56227855424da85 100644 --- a/donne/Makefile +++ b/donne/Makefile @@ -2,14 +2,8 @@ all: ocamlbuild -use-ocamlfind recuptest.native mv recuptest.native recuptest - ocamlbuild -use-ocamlfind rapp.native - mv rapp.native rapp - - ocamlbuild -use-ocamlfind ref.native - mv ref.native ref - - ocamlbuild -use-ocamlfind pred.native - mv pred.native pred + ocamlbuild -use-ocamlfind ref2.native + mv ref2.native ref2 ocamlbuild -use-ocamlfind calcul.native mv calcul.native calcul diff --git a/donne/calcul.ml b/donne/calcul.ml index 4df14c8f63bfc120b4981945fd33bdf16ba6f98a..24149987a04f31914bedc11f2168ef7f33ee1aed 100644 --- a/donne/calcul.ml +++ b/donne/calcul.ml @@ -1,6 +1,7 @@ exception Finish -exception Not_a_check - +exception Normal + + let arrayt = Array.make_matrix 4 3 0 let readline () = @@ -14,12 +15,21 @@ let rap a b = 100. *. (af /. bf) let _ = + let arg = ref 0 in try while true do readline () done with | Finish | End_of_file -> + try + arg := int_of_string (Sys.argv.(1)); + if (!arg = 2) then + let _ = Printf.printf "P0=%.1f%%, P1=%.1f%%\n" (rap (arrayt.(0).(2)) (arrayt.(0).(1))) (rap (arrayt.(1).(2)) (arrayt.(1).(1))) in + Printf.printf "R0=%.1f%%, R1=%.1f%%\n" (rap (arrayt.(0).(2)) (arrayt.(0).(0))) (rap (arrayt.(1).(2)) (arrayt.(1).(0))) + else raise Normal + with + | _ -> Printf.printf "P0=%.1f%%, P1=%.1f%%, P2=%.1f%%, P3=%.1f%%\n" (rap (arrayt.(0).(2)) (arrayt.(0).(1))) (rap (arrayt.(1).(2)) (arrayt.(1).(1))) (rap (arrayt.(2).(2)) (arrayt.(2).(1))) (rap (arrayt.(3).(2)) (arrayt.(3).(1))); Printf.printf "R0=%.1f%%, R1=%.1f%%, R2=%.1f%%, R3=%.1f%%\n" (rap (arrayt.(0).(2)) (arrayt.(0).(0))) (rap (arrayt.(1).(2)) (arrayt.(1).(0))) (rap (arrayt.(2).(2)) (arrayt.(2).(0))) (rap (arrayt.(3).(2)) (arrayt.(3).(0))) diff --git a/donne/recuptest.ml b/donne/recuptest.ml index e7e5021f05f8f230754fa54140d1feea6f3837e5..2fec73dd2bcb5ea3860bfab02917903735c491c6 100644 --- a/donne/recuptest.ml +++ b/donne/recuptest.ml @@ -1,10 +1,10 @@ exception Finish let readline () = - let a,b,c,d,e,ver = Scanf.scanf "%s %s %s %s %s\n" (fun a b c d e -> (a,b,c,d,e,b=e)) in + let a,cla,re = Scanf.scanf "%s %s %d %s\n" (fun a b c d -> (a,c,b=d)) in if a="" then raise Finish else - Printf.printf "%s %s %b\n" c d ver + Printf.printf "%b %d\n" re cla let _ = try diff --git a/donne/ref2.ml b/donne/ref2.ml new file mode 100644 index 0000000000000000000000000000000000000000..281b239cecea62124dcdd420966d081515ba20b1 --- /dev/null +++ b/donne/ref2.ml @@ -0,0 +1,15 @@ +let enCours = ref (true,0) + +let rec readline () = + let reB,_ = !enCours in + enCours := Scanf.scanf "%b %d\n" (fun re cla -> (re,cla)); + let _,cla = !enCours in + if not reB then Printf.printf "%d %d\n" 1 cla + else Printf.printf "%d %d\n" 0 cla; + readline() + +let _ = + try + readline () + with + | End_of_file -> () diff --git a/donne/testError/analyse.ml b/donne/testError/analyse.ml index ddfc0d76b744fb3f554976bd429780b7123fb94e..b061758bf51630f4501c10973670e3b4b219edf1 100644 --- a/donne/testError/analyse.ml +++ b/donne/testError/analyse.ml @@ -51,9 +51,10 @@ let _ = else begin printf "*******Score*******\n"; - printf "Tagger\tvrai = %d\tfaux = %d\t te = %.2f%%\n" !okTag !pokTag (tot pokTag okTag); - printf "Error\tvrai = %d\tfaux = %d\t te = %.2f%%\n" !okError !pokError (tot pokError okError); + printf "Tagger\tvrai = %d\tfaux = %d\tte = %.2f%%\ttr = %.2f%%\n" !okTag !pokTag (tot pokTag okTag) (100. -. tot pokTag okTag); + printf "Error\tvrai = %d\tfaux = %d\tte = %.2f%%\ttr = %.2f%%\n" !okError !pokError (tot pokError okError) (100. -. tot pokError okError); printf "Error fait %d de différent et mieux que Tag\n" !diffEj; - printf "Tag fait %d de différent et mieux que Error\n" !diffTj + printf "Tag fait %d de différent et mieux que Error\n" !diffTj; + printf "Réduction du taux d'échec : %.2f%%\n" (100.*.((tot pokTag okTag)-.(tot pokError okError))/.(tot pokTag okTag)) end diff --git a/donne/testError/script.sh b/donne/testError/script.sh index e49c51cc69da2121406f9862235fc40e3b7957c0..8a5e7af99638845b937f5054d440af9d010b97b8 100755 --- a/donne/testError/script.sh +++ b/donne/testError/script.sh @@ -1,11 +1,26 @@ -cut -f1 ~/maca_data2/fr/data/treebank/$1.mcf > ./data/$1.input -cat ./data/$1.input | maca_error_predictor_tagger | cut -f2 > ./data/$1_error.output -cat ./data/$1.input | maca_trans_tagger | cut -f2 > ./data/$1_tag.output -cut -f2 ~/maca_data2/fr/data/treebank/$1.mcf > ./data/$1_ref.output -paste ./data/$1_ref.output ./data/$1_tag.output ./data/$1_error.output > ./data/$1.data -cat ./data/$1.data | ./analyse $2 -paste data/$1.input data/$1_ref.output ./data/$1_tag.output ./data/$1_error.output > ./data/$1.anfm - -#cut -f2 ~/maca_data2/fr/data/treebank/$1.mcf > $1.correct -#paste ./data/pos_$1.pred ./data/pos_$1.correct > ./data/pos_$1.ok -#cat ./data/pos_$1.ok | ./recuptest | ./ref | ./pred | ./calcul +if [ -z $1 ] +then + name=new_dev +else + name=$1 +fi +echo "=> Input : $name.mcf" +cut -f1 ~/maca_data2/fr/data/treebank/"$name".mcf > ./data/"$name".input + +echo "=> Ref" +cut -f2 ~/maca_data2/fr/data/treebank/"$name".mcf > ./data/"$name"_ref.output + +echo "=> Tagger" +cat ./data/"$name".input | maca_trans_tagger | cut -f2 > ./data/"$name"_tag.output + +echo "=> Error predictor" +cat ./data/"$name".input | maca_error_predictor_tagger | cut -f2 > ./data/"$name"_error.output +echo "=> Results" +paste ./data/"$name"_ref.output ./data/"$name"_tag.output ./data/"$name"_error.output > ./data/"$name".data +cat ./data/"$name".data | ./analyse +#paste data/"$name".input data/"$name"_ref.output ./data/"$name"_tag.output ./data/"$name"_error.output > ./data/"$name".anfm +ooo + +#cut -f2 ~/maca_data2/fr/data/treebank/"$name".mcf > "$name".correct +#paste ./data/pos_"$name".pred ./data/pos_"$name".correct > ./data/pos_"$name".ok +#cat ./data/pos_"$name".ok | ./recuptest | ./ref | ./pred | ./calcul diff --git a/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c index df46d044f2052b9f9aea8ad974e7b27ae7d16ea6..0547dc325dc9885c12042350491a9dbafc32f2df 100644 --- a/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c @@ -102,6 +102,14 @@ void oracle_movement(int *mvt_code_oracle, char *mvt_type_oracle, int *mvt_label config_print(stdout,config_oracle); } + if(ctx->force) { + movement_parser_print(stdout, *mvt_code_oracle, ctx->dico_labels); + int mvt_code = *mvt_code_oracle; + if(mvt_code == 0 || mvt_code == 1 || mvt_code == 2 || mvt_code == 3) {//SHIFT REDUCE ROOT OR EOS + fprintf(stdout," _"); + } + fprintf(stdout,"\n"); + } switch(*mvt_type_oracle){ case MVT_PARSER_EOS : movement_parser_eos(config_oracle); @@ -134,7 +142,7 @@ void oracle_movement(int *mvt_code_oracle, char *mvt_type_oracle, int *mvt_label } void print_cff(context *ctx, FILE *output_file, config *config_oracle, config *config_decoder, int mvt_code_oracle, int mvt_code_decoder, feat_vec *fv_error) { - if(!ctx->debug_mode || output_file!=stdout) { + if((!ctx->debug_mode || output_file!=stdout) && !ctx->force) { fprintf(output_file, "%d", ((config_is_equal_parser(config_oracle, config_decoder, mvt_code_oracle, mvt_code_decoder)))); feat_vec_print(output_file, fv_error); } @@ -324,9 +332,12 @@ void generate_training_file_error(FILE *output_file, context *ctx) //error training config2feat_vec_cff(ctx->features_model_error, config_decoder, ctx->d_perceptron_features_error, fv_error, TRAIN_MODE); - print_cff(ctx, output_file, config_oracle, config_decoder, mvt_code_oracle, mvt_code_decoder, fv_error); + if(!ctx->force) + print_cff(ctx, output_file, config_oracle, config_decoder, mvt_code_oracle, mvt_code_decoder, fv_error); } - fprintf(stdout,"\n"); + if(!ctx->force) + fprintf(stdout,"\n"); + config_free(config_oracle); config_free(config_decoder); feat_vec_free(fv_decoder); diff --git a/maca_trans_parser/src/movements.c b/maca_trans_parser/src/movements.c index 78a5d3505d4ba9ba89fdbcf4179c2d2176dbb725..623461da460f2d0431e557ce509a68896710dbf0 100644 --- a/maca_trans_parser/src/movements.c +++ b/maca_trans_parser/src/movements.c @@ -32,13 +32,12 @@ int movement_eos_undo(config *c) int movement_left_arc(config *c, int movement_code, int label) { - //printf("BEGINNING 1\n"); if(stack_is_empty(config_get_stack(c))) return 0; /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ - //printf("BEGINNING 2\n"); + /* word on top of the stack should not have a governor */ // com here bug ? - if(word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV) return 0; - //printf("BEGINNING 3\n"); + if(word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV) return 0; + word *gov = word_buffer_b0(config_get_buffer(c)); word *dep = stack_top(config_get_stack(c)); int dist = (word_get_index(gov)) - (word_get_index(dep)); diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager_error_predictor.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager_error_predictor.c index 75d14de0f40c0756f2100fcbf704be4cee8e8a68..f1811f9fa396f8ea0a3a92f0adb6377c8727abdc 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager_error_predictor.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager_error_predictor.c @@ -91,6 +91,18 @@ void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) } } +void print_movement(context *ctx, int mvt_code, int err_detect) +{ + if(ctx->trace_mode){ + movement_parser_print(stdout, mvt_code, ctx->dico_labels); + if(mvt_code == 0 || mvt_code == 1 || mvt_code == 2 || mvt_code == 3) {//SHIFT REDUCE ROOT OR EOS + fprintf(stdout," _"); + } + fprintf(stdout, "\t%d\n", err_detect); + } + +} + void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_error_filename) { config *c = NULL; @@ -114,6 +126,7 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er float max_err; feat_vec *fv_error = feat_vec_new(feature_types_nb); int no_back = 0; + int mvt_code_pred_back = 0; // for backtracking because sometimes / the movement is changed into a Shift root_label = dico_string2int(ctx->dico_labels, ctx->root_label); if(root_label == -1) root_label = 0; @@ -124,7 +137,7 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er /* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */ /* which means that the top of the stack got its eos status from input */ /* force the parser to finish parsing the sentence (perform all pending reduce actions) and determine root of the sentence */ - + /* if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){ word_set_sent_seg(stack_top(config_get_stack(c)), -1); movement_parser_eos(c); @@ -132,12 +145,13 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er while(movement_parser_root(c, root_label)); if(ctx->debug_mode) printf("force EOS\n"); } - + */ + if(0) {} /* normal behaviour, ask classifier what is the next movement to do and do it */ else{ config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); mvt_code = feature_table_argmax(fv, ft, &max); - + if(ctx->debug_mode){ fprintf(stdout, "***********************************\n"); config_print(stdout, c); @@ -183,40 +197,56 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er // If there is an error : float scoreError = vcode_array_err[0].score; free(vcode_array_err); + + int do_backtrack; + + int smin = 0;//50 - if(error_detect == 1 && scoreError >= 10 && !ctx->trace_mode && mvt_stack_0(c->history) && ctx->force && c->bf->current_index < c->bf->nbelem -1/*its to avoid problems with EOS, PPT :-> get_pos(b0) == ponct*/) { + do_backtrack = error_detect == 1 && word_get_pos((word_buffer_b2(c->bf))) != 13 && word_get_pos((word_buffer_b1(c->bf))) != 13 && word_get_pos((word_buffer_bm1(c->bf))) != 13 && word_get_pos((word_buffer_b0(c->bf))) != 13 && scoreError >= smin && mvt_stack_0(c->history) && ctx->force && c->bf->current_index < c->bf->nbelem -1/*its to avoid problems with EOS, PPT :-> get_pos(b0) == ponct*/; + + if(do_backtrack && !ctx->trace_mode) { if(no_back) no_back = 0; else { - err_mvt_code = mvt_get_type(mvt_stack_top(config_get_history(c))); // issue + err_mvt_code = mvt_code_pred_back; //mvt_get_type(mvt_stack_top(config_get_history(c))); // issue movement_parser_undo(c); config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); vcode *vcode_array = feature_table_get_vcode_array(fv, ft); - for(int i=0; i < ft->classes_nb-1; i++){ + int debug_choice = -1; + + for(int i=0; i < 1/*ft->classes_nb-1*/; i++){ if (err_mvt_code == vcode_array[i].class_code) { mvt_code = vcode_array[i+1].class_code; + debug_choice = i+1; break; } - } - if(err_mvt_code == mvt_code){ - mvt_code = feature_table_argmax(fv, ft, &max); + } + if (debug_choice == -1) { + debug_choice = 1; + no_back = 1; + mvt_code = vcode_array[0].class_code; if(ctx->debug_mode){ - fprintf(stdout, "ERROR PREDICTOR, NO CHOICE LEFT, take the first choice : "); - movement_parser_print(stdout, mvt_code, ctx->dico_labels); - fprintf(stdout,"\n"); - + fprintf(stdout, "ERROR PREDICTOR, NO CHOICE LEFT, take the first choice \n"); } - no_back = 1; - } - + if(ctx->debug_mode){ + fprintf(stdout, "***********************************\n"); + fprintf(stdout, " Correction :\n"); + config_print(stdout, c); + vcode *vcode_arraye = feature_table_get_vcode_array(fv, ft); + for(int i=debug_choice-1; i < debug_choice+2; i++){//postag_err+3; i++){ + fprintf(stdout, "%d\t", i); + movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels); + printf("\t%.4f\n", vcode_array[i].score); + } + free(vcode_arraye); + } + mvt_type = movement_parser_type(mvt_code); mvt_label = movement_parser_label(mvt_code); if(ctx->debug_mode){ - fprintf(stdout, "***********************************\n"); - config_print(stdout, c); fprintf(stdout,"Old : "); movement_parser_print(stdout, err_mvt_code, ctx->dico_labels); fprintf(stdout, ", New : "); @@ -226,7 +256,10 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er } } - + + if (ctx->trace_mode) { + print_movement(ctx, mvt_code, do_backtrack); + } // normal case : result = 0; switch(mvt_type){ @@ -248,6 +281,8 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er case MVT_PARSER_SHIFT: result = movement_parser_shift(c); } + + mvt_code_pred_back = mvt_code; if(result == 0){ if(ctx->debug_mode) fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); @@ -260,7 +295,7 @@ void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_er } } - } +} if(!ctx->trace_mode) print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct); diff --git a/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c b/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c index 16b26bd282dff1f6f868ee92fa2d0e3d77315b2c..5f71f8cb276ffc82a5a58721cff5b75b70d9622f 100644 --- a/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c +++ b/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c @@ -194,19 +194,19 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena else if(ctx->trace_mode) { switch (error_detect) { case 0 : // No errors detected - sprintf(impr[nb]+strlen(impr[nb]),"\ttrue\t0"); + sprintf(impr[nb]+strlen(impr[nb]),"\t0"); break; case 1 : - sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t1"); + sprintf(impr[nb]+strlen(impr[nb]),"\t1"); break; case 2 : - sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t2"); + sprintf(impr[nb]+strlen(impr[nb]),"\t2"); break; case 3 : - sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t3"); + sprintf(impr[nb]+strlen(impr[nb]),"\t3"); break; } } @@ -263,7 +263,7 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_arraye[i].class_code), vcode_arraye[i].score); } free(vcode_arraye); - fprintf(stdout, "Ancien pos : %s, nouveau : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag)); + fprintf(stdout, "Old pos : %s, New : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag)); } if(postag==postag_err){ @@ -272,7 +272,6 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena fprintf(stdout, "ERROR PREDICTOR, NO CHOICE LEFT, take the first choice : %s\n", dico_int2string(dico_pos, postag)); } no_back = 1; - printf("test\n"); } word_set_pos(word_buffer_b0(c->bf), postag);