From aeb14ceb0fdf3b6ef06d550b9d516030c2663947 Mon Sep 17 00:00:00 2001
From: Silvio Ricardo Cordeiro <silvioricardoc@gmail.com>
Date: Tue, 21 Feb 2017 17:45:07 +0100
Subject: [PATCH] Add option --n_extracols to maca_trans_parser

---
 maca_common/include/mcd.h                     |  1 +
 maca_common/include/word.h                    |  1 +
 maca_common/src/word.c                        | 23 +++++++++++++++++++
 maca_trans_parser/src/context.c               |  9 +++++++-
 maca_trans_parser/src/context.h               |  1 +
 .../src/simple_decoder_parser_arc_eager.c     | 23 +++++++++++++++----
 6 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/maca_common/include/mcd.h b/maca_common/include/mcd.h
index fe4eecf..4963127 100644
--- a/maca_common/include/mcd.h
+++ b/maca_common/include/mcd.h
@@ -63,6 +63,7 @@
 #define mcd_get_label_col(m)    (m)->wf2col[MCD_WF_LABEL]
 #define mcd_get_stag_col(m)     (m)->wf2col[MCD_WF_STAG]
 #define mcd_get_sent_seg_col(m) (m)->wf2col[MCD_WF_SENT_SEG]
+#define mcd_get_letter_col(m,L) (m)->wf2col[MCD_WF_A+(L)]
 #define mcd_get_a_col(m)        (m)->wf2col[MCD_WF_A]
 #define mcd_get_b_col(m)        (m)->wf2col[MCD_WF_B]
 #define mcd_get_c_col(m)        (m)->wf2col[MCD_WF_C]
diff --git a/maca_common/include/word.h b/maca_common/include/word.h
index 30074b7..da28551 100644
--- a/maca_common/include/word.h
+++ b/maca_common/include/word.h
@@ -26,6 +26,7 @@ typedef struct _word {
 #define word_get_label(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
 #define word_get_stag(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
 #define word_get_sent_seg(w)       (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_SENT_SEG])
+#define word_get_letterfeat(w, L)  (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A+(L)])
 #define word_get_A(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
 #define word_get_B(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
 #define word_get_C(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
diff --git a/maca_common/src/word.c b/maca_common/src/word.c
index b642093..952fe06 100644
--- a/maca_common/src/word.c
+++ b/maca_common/src/word.c
@@ -134,6 +134,29 @@ void word_print(FILE *f, word *w)
   fprintf(f, "%s", w->input);
 }
 
+void word_debug(FILE *f, word *w, mcd *mcd_struct) {
+    int wf;
+    fprintf(f, "WORD:\n");
+    for (wf=0; wf < MCD_WF_NB; wf++) {  // wf in 0..36
+        int wf_int = w->wf_array[wf];  // wf_array[0..36]
+        int col = mcd_struct->wf2col[wf];  // wf2col[0..36]; output col in 0..N (N <= 36), or -1
+        if (col != -1) {
+            const char* colname = mcd_struct->wf_str[col];  // wf_str[0..N]
+            dico* d = mcd_struct->dico_array[col];  // dico_aray[0..N]
+            if (d == NULL) {
+                fprintf(f, "  %s: %d\n", colname, wf_int);
+            } else {
+                const char* wf_string = dico_int2string(d, wf_int);
+                if (wf_string == NULL) {
+                    fprintf(f, "  %s: NULL (%d)\n", colname, wf_int);
+                } else {
+                    fprintf(f, "  %s: \"%s\" (%d)\n", colname, wf_string, wf_int);
+                }
+            }
+        }
+    }
+}
+
 int word_is_eos(word *w, mcd *mcd_struct)
 {
   if(w == NULL) return 0;
diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c
index 72c2d61..c871bc7 100644
--- a/maca_trans_parser/src/context.c
+++ b/maca_trans_parser/src/context.c
@@ -105,6 +105,7 @@ void context_general_help_message(context *ctx)
     fprintf(stderr, "\t-r --hratio    <float>    : set the occupation ratio of hash tables (default is 0.5)\n");
     fprintf(stderr, "\t-D --maca_data_path <str> : path to the maca_data directory\n");
     fprintf(stderr, "\t-L --language    <str>    : identifier of the language to use (default is fr)\n");
+    fprintf(stderr, "\t-N --n_extracols  <int>   : number of extra columns to print [e.g. 3 for columns A,B,C] (default is 0)\n");
 }
 
 void context_model_help_message(context *ctx){
@@ -176,7 +177,7 @@ context *context_read_options(int argc, char *argv[])
 
   ctx->program_name = strdup(argv[0]);
 
-  static struct option long_options[22] =
+  static struct option long_options[23] =
     {
       {"help",                no_argument,       0, 'h'},
       {"verbose",             no_argument,       0, 'v'},
@@ -197,6 +198,7 @@ context *context_read_options(int argc, char *argv[])
       {"vocabs",              required_argument, 0, 'V'}, 
       {"language",            required_argument, 0, 'L'},
       {"maca_data_path",      required_argument, 0, 'D'},
+      {"n_extracols",         required_argument, 0, 'N'},
       {"root_label",          required_argument, 0, 'R'},
       {"f2p",                 required_argument, 0, 'P'},
       {"traces",              required_argument, 0, 'T'}
@@ -266,6 +268,11 @@ context *context_read_options(int argc, char *argv[])
 	if (ctx->language) free(ctx->language); // libérer le default (strdup("fr") )
   	ctx->language = strdup(optarg);
 	break;
+      case 'N':
+	ctx->n_extracols = atoi(optarg);
+	if (ctx->n_extracols < 0 || ctx->n_extracols > 26)
+		ctx->n_extracols = 0;
+	break;
       case 'D':
 	ctx->maca_data_path = strdup(optarg);
 	break;
diff --git a/maca_trans_parser/src/context.h b/maca_trans_parser/src/context.h
index 932e671..c743a65 100644
--- a/maca_trans_parser/src/context.h
+++ b/maca_trans_parser/src/context.h
@@ -46,6 +46,7 @@ typedef struct {
   int feature_cutoff;
   int mode;
   int sent_nb;
+  int n_extracols;
   float hash_ratio;
   int beam_width;
   int mvt_nb;
diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c
index 7c44a6b..66260a3 100644
--- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c
+++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c
@@ -36,7 +36,7 @@ void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct)
   }
 }
 
-void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct)
+void print_word_buffer(config *c, context *ctx, dico *dico_labels, mcd *mcd_struct)
 {
   int i;
   word *w;
@@ -60,9 +60,9 @@ void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct)
       else
 	printf("_\t");
       if(word_get_sent_seg(w) == 1)
-	printf("1\n") ;
+	printf("1");
       else
-	printf("0\n");
+	printf("0");
     }
     else{
       buffer = strdup(w->input);
@@ -110,9 +110,22 @@ void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct)
 	else
 	  printf("\t0");
       }
-      printf("\n");
       free(buffer);
     }
+
+    int j;
+    for (j=0; j < ctx->n_extracols; j++) {
+        int int_feature = word_get_letterfeat(w, j);
+        int col = mcd_get_letter_col(ctx->mcd_struct, j);
+        if (col == -1 || mcd_struct->dico_array[col] == NULL || int_feature < 0) {
+            printf("\t%d", int_feature);
+        } else {
+            dico* d = mcd_struct->dico_array[col];
+            const char* str_feature = dico_int2string(d, int_feature);
+            printf("\t%s", str_feature);
+        }
+    }
+    printf("\n");
   }
 }
 
@@ -221,7 +234,7 @@ void simple_decoder_parser_arc_eager(context *ctx)
   }
   
   if(!ctx->trace_mode)
-    print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct);
+    print_word_buffer(c, ctx, ctx->dico_labels, ctx->mcd_struct);
   
   config_free(c); 
   feat_vec_free(fv);
-- 
GitLab