From bc98dfbd81f480d595bbb6410119b9c558502435 Mon Sep 17 00:00:00 2001
From: Mathux <mathis.petrovich@gmail.com>
Date: Mon, 26 Jun 2017 15:38:44 +0200
Subject: [PATCH] error on beginning

---
 ...error_predictor_parser_arc_eager_mcf2cff.c | 137 ++++++++++++------
 .../simple_decoder_tagger_error_predictor.c   |  98 +++++--------
 2 files changed, 131 insertions(+), 104 deletions(-)

diff --git a/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c
index 0abe7af..5f20451 100644
--- a/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c
+++ b/maca_trans_parser/src/maca_error_predictor_parser_arc_eager_mcf2cff.c
@@ -52,6 +52,70 @@ int config_is_equal_parser(config *c1, config *c2, int co1, int co2) // 3 for bm
   return ((co1==co2) ? 0 : 1);
 }
 
+int testoracle = 0;
+int testpred = 0;
+
+int test_or() {
+  //printf("Oracle : %d\n",testoracle++);
+  return 0;
+}
+
+int test_pred() {
+  //printf("Pred   : %d\n",testpred++);
+  return 0;
+}
+
+void oracle_movement(int *mvt_code_oracle, char *mvt_type_oracle, int *mvt_label_oracle, config *config_oracle, word_buffer *ref_oracle, int root_label_oracle, context *ctx, feat_vec *fv_oracle, int *sentence_nb)
+{
+  if (!word_buffer_end(ref_oracle) && (*sentence_nb < ctx->sent_nb)) {
+
+    *mvt_code_oracle = oracle_parser_arc_eager(config_oracle, ref_oracle, root_label_oracle);
+    *mvt_type_oracle = movement_parser_type(*mvt_code_oracle);
+    *mvt_label_oracle = movement_parser_label(*mvt_code_oracle);
+
+    test_or();
+     
+    config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE);
+
+    switch(*mvt_type_oracle){
+    case MVT_PARSER_EOS :
+      movement_parser_eos(config_oracle);
+      (*sentence_nb)++;
+      if((*sentence_nb % 100) == 0)
+        fprintf(stderr, "\rsentence %d", *sentence_nb);
+      break;
+    case MVT_PARSER_LEFT :
+      movement_parser_left_arc(config_oracle, *mvt_label_oracle);
+      break;    
+    case MVT_PARSER_RIGHT :
+      movement_parser_right_arc(config_oracle, *mvt_label_oracle);
+      word_buffer_move_right(ref_oracle);
+      break;    
+    case MVT_PARSER_REDUCE :
+      movement_parser_reduce(config_oracle);
+      break;   
+    case MVT_PARSER_ROOT :
+      movement_parser_root(config_oracle, root_label_oracle);
+      break;
+    case MVT_PARSER_SHIFT :
+      movement_parser_shift(config_oracle);
+      word_buffer_move_right(ref_oracle);
+      break;
+    }
+  }
+  else {
+    //printf("Oracle finishes its job\n");
+  }
+}
+void print_cff(context *ctx, FILE *output_file, config *config_oracle, config *config_predicted, int mvt_code_oracle, int mvt_code_predicted, feat_vec *fv_predicted)
+{  
+  if(!ctx->debug_mode || output_file!=stdout) {
+    fprintf(output_file, "%d", ((config_is_equal_parser(config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted))));
+    feat_vec_print(output_file, fv_predicted);
+  }
+}
+
+
 void generate_training_file_error(FILE *output_file, context *ctx)
 {
   // oracle
@@ -100,40 +164,11 @@ void generate_training_file_error(FILE *output_file, context *ctx)
   config_oracle = config_new(mcf_file_oracle, mcd_struct_hyp, 5);
 
   while((!word_buffer_end(ref_oracle) && (sentence_nb < ctx->sent_nb)) || !config_is_terminal(config_predicted)){
-    
-    mvt_code_oracle = oracle_parser_arc_eager(config_oracle, ref_oracle, root_label_oracle);
-    mvt_type_oracle = movement_parser_type(mvt_code_oracle);
-    mvt_label_oracle = movement_parser_label(mvt_code_oracle);
 
+    //oracle
+    
+    oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_oracle, &sentence_nb);
     
-    config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE);
-
-    switch(mvt_type_oracle){
-    case MVT_PARSER_EOS :
-      movement_parser_eos(config_oracle);
-      sentence_nb++;
-      if((sentence_nb % 100) == 0)
-        fprintf(stderr, "\rsentence %d", sentence_nb);
-      break;
-    case MVT_PARSER_LEFT :
-      movement_parser_left_arc(config_oracle, mvt_label_oracle);
-      break;    
-    case MVT_PARSER_RIGHT :
-      movement_parser_right_arc(config_oracle, mvt_label_oracle);
-      word_buffer_move_right(ref_oracle);
-      break;    
-    case MVT_PARSER_REDUCE :
-      movement_parser_reduce(config_oracle);
-      break;   
-    case MVT_PARSER_ROOT :
-      movement_parser_root(config_oracle, root_label_oracle);
-      break;
-    case MVT_PARSER_SHIFT :
-      movement_parser_shift(config_oracle);
-      word_buffer_move_right(ref_oracle);
-      break;
-    }
-
     // predicted
 
     /* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */
@@ -143,8 +178,18 @@ void generate_training_file_error(FILE *output_file, context *ctx)
     if((word_get_sent_seg(stack_top(config_get_stack(config_predicted))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(config_predicted))) != MVT_PARSER_EOS)){
       word_set_sent_seg(stack_top(config_get_stack(config_predicted)), -1);
       movement_parser_eos(config_predicted);
-      while(movement_parser_reduce(config_predicted));
-      while(movement_parser_root(config_predicted, root_label_predicted));
+      test_pred();
+      while(movement_parser_reduce(config_predicted)) {
+        oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_oracle, &sentence_nb);
+        test_pred();
+        //print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_predicted);
+
+      }
+      while(movement_parser_root(config_predicted, root_label_predicted)) {
+        oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_oracle, &sentence_nb);
+        test_pred();
+        //print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_predicted);
+      }
     }
 
     /* normal behaviour, ask classifier what is the next movement to do and do it */
@@ -199,19 +244,27 @@ void generate_training_file_error(FILE *output_file, context *ctx)
       if(result == 0){
         result = movement_parser_shift(config_predicted);
         if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */
-          while(!stack_is_empty(config_get_stack(config_predicted)))
+          if (!stack_is_empty(config_get_stack(config_predicted))) {
+            movement_parser_root(config_predicted, root_label_predicted);
+            test_pred();
+          }
+          while(!stack_is_empty(config_get_stack(config_predicted))) {
+            oracle_movement(&mvt_code_oracle, &mvt_type_oracle, &mvt_label_oracle, config_oracle, ref_oracle, root_label_oracle, ctx,fv_oracle, &sentence_nb);
             movement_parser_root(config_predicted, root_label_predicted);
+            test_pred();
+          }
+        }
+        else {
+          test_pred();
         }
       }
+      else {
+        test_pred();
+      }
     }
-
-    if(!ctx->debug_mode || output_file!=stdout) {
-      fprintf(output_file, "%d", ((config_is_equal_parser(config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted))));
-      feat_vec_print(output_file, fv_predicted);
-    }
-
-    
+    print_cff(ctx, output_file, config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted, fv_predicted); 
   }
+  fprintf(stdout,"\n");
   /*
     config_free(c); 
     feat_vec_free(fv);
diff --git a/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c b/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c
index bc864f3..1e98a89 100644
--- a/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c
+++ b/maca_trans_parser/src/simple_decoder_tagger_error_predictor.c
@@ -132,6 +132,8 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena
 
   char *impr[300000];
   int nb = 0;
+
+  int no_back = 0;
   
   c = config_new(f, ctx->mcd_struct, 5); 
 
@@ -189,6 +191,25 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena
         break;
       }
     }
+    else if(ctx->trace_mode) {
+      switch (error_detect) {
+      case 0 : // No errors detected 
+        sprintf(impr[nb]+strlen(impr[nb]),"\ttrue\t0");
+        break;
+      
+      case 1 :
+        sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t1");
+        break;
+
+      case 2 :
+        sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t2");
+        break;
+
+      case 3 :
+        sprintf(impr[nb]+strlen(impr[nb]),"\tfalse\t3");
+        break;
+      }
+    }
     sprintf(impr[nb]+strlen(impr[nb]),"\n");
     nb +=1;
     
@@ -203,7 +224,7 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena
     }
     free(vcode_array_err);
     
-    if (error_detect == 3){// && (vcode_array_err[0].score-vcode_array_err[1].score)>2.5) {
+    if (error_detect == 3 && !ctx->trace_mode && !no_back && word_buffer_bm1(c->bf) && word_buffer_bm2(c->bf)){// && (vcode_array_err[0].score-vcode_array_err[1].score)>2.5) {
       backward(c);
       backward(c);
       nb -= 3;
@@ -230,8 +251,8 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena
       free(vcode_array);
       
       if(ctx->debug_mode){
-        fprintf(stderr, "***********************************\n");
-        config_print(stderr, c);
+        fprintf(stdout, "***********************************\n");
+        config_print(stdout, c);
       }
       
       if(ctx->debug_mode){
@@ -241,78 +262,31 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena
           fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_arraye[i].class_code), vcode_arraye[i].score);
         }
         free(vcode_arraye);
-        fprintf(stderr, "Ancien pos : %s, nouveau : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag));
+        fprintf(stdout, "Ancien pos : %s, nouveau : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag));
       }
             
-      if(postag==postag_err)
-        {
-          printf("ERROR PREDICTOR, NO CHOICE LEFT\n");
-          exit(1);
-        }
-      word_set_pos(word_buffer_b0(c->bf), postag);
-      string_print_word(word_buffer_b0(c->bf), ctx->mcd_struct, dico_pos, postag,&impr[nb]);
-      if(ctx->debug_mode)
-        sprintf(impr[nb]+strlen(impr[nb]),"\t✐\t_\n");
-      else
-        sprintf(impr[nb]+strlen(impr[nb]),"\n");
-      nb += 1;
-    }
-    
-    else if (error_detect == 2 && ctx->force) {
-      backward(c);
-      nb -= 2;
-
-      if(ctx->f2p)
-        add_signature_to_words_in_word_buffer(c->bf, ctx->f2p);
-          
-      postag_err = word_get_pos(word_buffer_b0(c->bf));
-
-      postag = postag_err;
-      config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
-
-      vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
-      
-      int debug_choice;
-      for(int i=0; i < ft->classes_nb-1; i++){
-        if (postag_err == vcode_array[i].class_code) {
-          postag = vcode_array[i+1].class_code;
-          debug_choice = i+1;
-          break;
+      if(postag==postag_err){
+        if(ctx->debug_mode){
+          postag = feature_table_argmax(fv, ft, &max);
+          fprintf(stdout, "ERROR PREDICTOR, NO CHOICE LEFT, take the first choice : %s\n", dico_int2string(dico_pos, postag));
         }
+        no_back = 1;
+        printf("test\n");
       }
-
-      free(vcode_array);
       
-      if(ctx->debug_mode){
-        fprintf(stderr, "***********************************\n");
-        config_print(stderr, c);
-      }
-      
-      if(ctx->debug_mode){
-        vcode *vcode_arraye = feature_table_get_vcode_array(fv, ft);
-        for(int i=debug_choice-1; i < debug_choice+2; i++){//postag_err+3; i++){
-          fprintf(stdout, "%d\t", i);
-          fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_arraye[i].class_code), vcode_arraye[i].score);
-        }
-        free(vcode_arraye);
-        fprintf(stderr, "Ancien pos : %s, nouveau : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag));
-      }
-            
-      if(postag==postag_err)
-        {
-          printf("ERROR PREDICTOR, NO CHOICE LEFT\n");
-          exit(1);
-        }
       word_set_pos(word_buffer_b0(c->bf), postag);
       string_print_word(word_buffer_b0(c->bf), ctx->mcd_struct, dico_pos, postag,&impr[nb]);
       if(ctx->debug_mode)
-        sprintf(impr[nb]+strlen(impr[nb]),"\t⚠\t_\n");
+        sprintf(impr[nb]+strlen(impr[nb]),"\t✐\t_\n");
       else
         sprintf(impr[nb]+strlen(impr[nb]),"\n");
       nb += 1;
     }
+    
+    else if(error_detect == 3 && !ctx->trace_mode && no_back)
+      no_back = 0;
+    
     word_buffer_move_right(c->bf);
-
   }
   for (int i = 0; i < nb; i++) {
     printf("%s",impr[i]);
-- 
GitLab