From ebabce6be0cb28ffd0cdf50093c4b4d3d7cdae8a Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Tue, 6 Dec 2016 10:10:07 -0500
Subject: [PATCH] added maca_trans_tagger_bt an (experimental) tagger with a
 (very) limited for of backtrack, modfied some help messages

---
 maca_common/include/word_buffer.h            |   5 -
 maca_trans_parser/src/context.c              |   3 +
 maca_trans_parser/src/context.h              |   3 +
 maca_trans_parser/src/maca_trans_parser.c    |   9 +-
 maca_trans_parser/src/maca_trans_tagger_bt.c | 103 +++++++++++++++++++
 perceptron/lib/include/feat_vec.h            |  14 ++-
 perceptron/lib/include/feature_table.h       |  20 ++--
 perceptron/lib/src/feature_table.c           |   2 -
 8 files changed, 130 insertions(+), 29 deletions(-)
 create mode 100644 maca_trans_parser/src/maca_trans_tagger_bt.c

diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h
index 91b216d..0c30a9a 100644
--- a/maca_common/include/word_buffer.h
+++ b/maca_common/include/word_buffer.h
@@ -29,11 +29,6 @@
 #define word_buffer_is_last(wb)  (((wb)->current_index == (wb)->nbelem - 1)? 1 : 0)
 #define word_buffer_is_empty(wb) (((wb)->nbelem == 0)? 1 : 0)
 
-
-
-
-
-
 typedef struct {
   int size;           /* size of the array used to store words */
   int nbelem;         /* number of words in the buffer */
diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c
index 85acc24..72c2d61 100644
--- a/maca_trans_parser/src/context.c
+++ b/maca_trans_parser/src/context.c
@@ -164,6 +164,9 @@ void context_f2p_filename_help_message(context *ctx){
 void context_trace_mode_help_message(context *ctx){
   fprintf(stderr, "\t-T --traces            : activate trace mode (default is false)\n");
 }
+void context_debug_help_message(context *ctx){
+  fprintf(stderr, "\t-d --debug            : activate debug mode (default is false)\n");
+}
 
 context *context_read_options(int argc, char *argv[])
 {
diff --git a/maca_trans_parser/src/context.h b/maca_trans_parser/src/context.h
index 611dd10..932e671 100644
--- a/maca_trans_parser/src/context.h
+++ b/maca_trans_parser/src/context.h
@@ -102,4 +102,7 @@ void context_ifpls_help_message(context *ctx);
 void context_input_help_message(context *ctx);
 void context_root_label_help_message(context *ctx);
 
+void context_debug_help_message(context *ctx);
+
+
 #endif
diff --git a/maca_trans_parser/src/maca_trans_parser.c b/maca_trans_parser/src/maca_trans_parser.c
index 3ecbcd4..fa44e9b 100644
--- a/maca_trans_parser/src/maca_trans_parser.c
+++ b/maca_trans_parser/src/maca_trans_parser.c
@@ -13,11 +13,12 @@
 /*#include"dnn_decoder.h"*/
 #include"config2feat_vec.h"
 
-void decode_help_message(context *ctx)
+void maca_trans_parser_help_message(context *ctx)
 {
   context_general_help_message(ctx);
   /* context_beam_help_message(ctx); */
   /* context_conll_help_message(ctx); */
+  context_debug_help_message(ctx);
   fprintf(stderr, "INPUT\n");
   context_input_help_message(ctx);
   context_mcd_help_message(ctx);
@@ -27,7 +28,7 @@ void decode_help_message(context *ctx)
   context_root_label_help_message(ctx);
 }
 
-void decode_check_options(context *ctx){
+void maca_trans_parser_check_options(context *ctx){
   if(ctx->help
      /*!ctx->conll_filename*/
      /*     || !ctx->perc_model_filename
@@ -35,7 +36,7 @@ void decode_check_options(context *ctx){
      || !ctx->vocabs_filename
      || !ctx->features_model_filename*/
      ){
-    decode_help_message(ctx);
+    maca_trans_parser_help_message(ctx);
     exit(1);
   }
 }
@@ -83,7 +84,7 @@ int main(int argc, char *argv[])
   context *ctx;
 
   ctx = context_read_options(argc, argv);
-  decode_check_options(ctx);
+  maca_trans_parser_check_options(ctx);
 
   set_linguistic_resources_filenames_parser(ctx);
   ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
diff --git a/maca_trans_parser/src/maca_trans_tagger_bt.c b/maca_trans_parser/src/maca_trans_tagger_bt.c
new file mode 100644
index 0000000..778c634
--- /dev/null
+++ b/maca_trans_parser/src/maca_trans_tagger_bt.c
@@ -0,0 +1,103 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<unistd.h>
+#include<getopt.h>
+#include"context.h"
+#include"feat_fct.h"
+#include"feature_table.h"
+#include"dico.h"
+#include"beam.h"
+#include"form2pos.h"
+#include"simple_decoder_tagger.h"
+/*#include"dnn_decoder.h"*/
+#include"config2feat_vec.h"
+
+void decode_tagger_help_message(context *ctx)
+{
+  context_general_help_message(ctx);
+  context_beam_help_message(ctx);
+  context_conll_help_message(ctx);
+  fprintf(stderr, "INPUT\n");
+  context_input_help_message(ctx);
+  context_mcd_help_message(ctx);
+  context_model_help_message(ctx);
+  context_vocabs_help_message(ctx);
+  context_features_model_help_message(ctx);
+  context_f2p_filename_help_message(ctx);
+}
+
+void decode_tagger_check_options(context *ctx){
+  if(ctx->help
+     /*!ctx->conll_filename*/
+     /*     || !ctx->perc_model_filename
+     || !ctx->mcd_filename
+     || !ctx->vocabs_filename
+     || !ctx->features_model_filename*/
+     ){
+    decode_tagger_help_message(ctx);
+    exit(1);
+  }
+}
+
+void decode_tagger_set_linguistic_resources_filenames(context *ctx)
+{
+  char absolute_filename[500];
+  
+  if(!ctx->perc_model_filename){
+    strcpy(absolute_filename, ctx->maca_data_path);
+    strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME);
+    ctx->perc_model_filename = strdup(absolute_filename);
+  }
+
+  if(!ctx->vocabs_filename){
+    strcpy(absolute_filename, ctx->maca_data_path);
+    strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_FILENAME);
+    ctx->vocabs_filename = strdup(absolute_filename);
+  }
+
+  /*  if(!ctx->mcd_filename){
+    strcpy(absolute_filename, ctx->maca_data_path);
+    strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_TAGGER_FILENAME);
+    ctx->mcd_filename = strdup(absolute_filename);
+    }*/
+
+  if(!ctx->features_model_filename){
+    strcpy(absolute_filename, ctx->maca_data_path);
+    strcat(absolute_filename, DEFAULT_FEATURES_MODEL_TAGGER_FILENAME);
+    ctx->features_model_filename = strdup(absolute_filename);
+  }
+
+  if(!ctx->f2p_filename){
+    strcpy(absolute_filename, ctx->maca_data_path);
+    strcat(absolute_filename, DEFAULT_F2P_FILENAME);
+    ctx->f2p_filename = strdup(absolute_filename);
+    ctx->f2p = form2pos_read(ctx->f2p_filename);
+  }
+
+  if(ctx->verbose){
+    fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
+    fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
+    fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
+    fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
+    fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
+  }
+}
+
+
+int main(int argc, char *argv[])
+{
+  context *ctx = context_read_options(argc, argv);
+  decode_tagger_check_options(ctx);
+
+  decode_tagger_set_linguistic_resources_filenames(ctx);
+  ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
+  ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
+  mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
+
+  simple_decoder_tagger2(ctx);
+  
+  context_free(ctx);
+  return 0;
+}
+
diff --git a/perceptron/lib/include/feat_vec.h b/perceptron/lib/include/feat_vec.h
index 4116d76..e1dfe91 100644
--- a/perceptron/lib/include/feat_vec.h
+++ b/perceptron/lib/include/feat_vec.h
@@ -13,16 +13,14 @@ typedef struct {
 /*#include "word_emb.h"*/
 #include "mcd.h"
 
-
-void feat_vec_concat(feat_vec *fv1, feat_vec *fv2);
+void      feat_vec_concat(feat_vec *fv1, feat_vec *fv2);
 feat_vec *feat_vec_copy(feat_vec *fv);
-
 feat_vec *feat_vec_new(int size);
-void feat_vec_free(feat_vec *fv);
-int feat_vec_add(feat_vec *fv, int feat);
-void feat_vec_empty(feat_vec *fv);
-void feat_vec_print_string(feat_vec *fv, dico *dico_features);
-void feat_vec_print(FILE *f, feat_vec *fv);
+void      feat_vec_free(feat_vec *fv);
+int       feat_vec_add(feat_vec *fv, int feat);
+void      feat_vec_empty(feat_vec *fv);
+void      feat_vec_print_string(feat_vec *fv, dico *dico_features);
+void      feat_vec_print(FILE *f, feat_vec *fv);
 /* void feat_vec_print_dnn(FILE *f, feat_vec *fv, feat_model *fm, mcd *m);  */
 /* void feat_vec_fill_input_array_dnn(fann_type *input_array, feat_vec *fv, feat_model *fm, mcd *m); */
 /* void feat_vec_fill_input_array_dnn(float *input_array, feat_vec *fv, feat_model *fm, mcd *m); */
diff --git a/perceptron/lib/include/feature_table.h b/perceptron/lib/include/feature_table.h
index ef1bbe6..97abb90 100644
--- a/perceptron/lib/include/feature_table.h
+++ b/perceptron/lib/include/feature_table.h
@@ -17,15 +17,15 @@ typedef struct {
 } vcode;
 
 feature_table *feature_table_load(char *filename, int verbose);
-void feature_table_dump(char *filename, feature_table *ft);
+void           feature_table_dump(char *filename, feature_table *ft);
 feature_table *feature_table_new(int features_nb, int classes_nb);
-void feature_table_print(char *filename, feature_table *ft);
-void feature_table_print_verbose(char *filename, feature_table *ft, dico *dico_features, dico *dico_classes);
-int feature_table_argmax(feat_vec *fv, feature_table *ft, float *max);
-float feature_table_entropy(feat_vec *fv, feature_table *ft);
-float feature_table_diff_scores(feat_vec *fv, feature_table *ft);
-float feature_table_argmax_1_2(feat_vec *fv, feature_table *ft, int *argmax1, float *max1, int *argmax2, float *max2);
-void feature_table_free(feature_table *ft);
-void feature_table_scores(feat_vec *fv, feature_table *ft, float *classes_score);
-vcode* feature_table_get_vcode_array(feat_vec *fv, feature_table* ft);
+void           feature_table_print(char *filename, feature_table *ft);
+void           feature_table_print_verbose(char *filename, feature_table *ft, dico *dico_features, dico *dico_classes);
+int            feature_table_argmax(feat_vec *fv, feature_table *ft, float *max);
+float          feature_table_entropy(feat_vec *fv, feature_table *ft);
+float          feature_table_diff_scores(feat_vec *fv, feature_table *ft);
+float          feature_table_argmax_1_2(feat_vec *fv, feature_table *ft, int *argmax1, float *max1, int *argmax2, float *max2);
+void           feature_table_free(feature_table *ft);
+void           feature_table_scores(feat_vec *fv, feature_table *ft, float *classes_score);
+vcode         *feature_table_get_vcode_array(feat_vec *fv, feature_table* ft);
 #endif
diff --git a/perceptron/lib/src/feature_table.c b/perceptron/lib/src/feature_table.c
index 5c2384a..248a1b3 100644
--- a/perceptron/lib/src/feature_table.c
+++ b/perceptron/lib/src/feature_table.c
@@ -8,8 +8,6 @@
 feature_table *feature_table_load(char *filename, int verbose)
 {
   int i;
-
-
   feature_table *ft = NULL;
   int features_nb;
   int classes_nb;
-- 
GitLab