From bbb0919cadd8e77ad9caf6639495879d75f3ef1f Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Tue, 12 Jul 2016 14:58:38 -0400
Subject: [PATCH] code refactoring

---
 INSTALL                                       |  3 ++
 maca_common/include/util.h                    |  1 +
 maca_common/src/form2pos.c                    |  4 +-
 maca_common/src/util.c                        |  9 +++++
 maca_lemmatizer/src/maca_lemmatizer.c         |  2 +-
 maca_trans_parser/src/context.c               | 37 +++++++++----------
 maca_trans_parser/src/context.h               |  7 ++++
 maca_trans_parser/src/decode.c                |  3 ++
 maca_trans_parser/src/decode_tagger.c         |  3 +-
 maca_trans_parser/src/depset.c                |  3 +-
 .../src/maca_trans_parser_conll2cff.c         |  4 ++
 .../src/maca_trans_parser_conll2cff_tagger.c  |  6 +--
 maca_trans_parser/src/queue.c                 |  2 +-
 maca_trans_parser/src/simple_decoder_tagger.c | 25 ++-----------
 maca_trans_parser/src/word.c                  |  1 +
 15 files changed, 62 insertions(+), 48 deletions(-)

diff --git a/INSTALL b/INSTALL
index 8306709..b6e0cf2 100644
--- a/INSTALL
+++ b/INSTALL
@@ -10,6 +10,9 @@ The basic procedure to build and install macaon from sources is the following.
 - Launch the cmake command:
     cmake ..
 
+  If you want to compile macaon with debugging options type:
+    cmake -DCMAKE_BUILD_TYPE=Debug ..
+
   If you want to install macaon locally, you can specify the install path with :
     cmake -DCMAKE_INSTALL_PREFIX:PATH=/absolute/path/to/macaon_install_dir
 
diff --git a/maca_common/include/util.h b/maca_common/include/util.h
index 1700e95..26c0952 100644
--- a/maca_common/include/util.h
+++ b/maca_common/include/util.h
@@ -5,4 +5,5 @@
 void myfree(void *ptr);
 void *memalloc(size_t s);
 FILE *myfopen(const char *path, const char *mode);
+FILE *myfopen_no_exit(const char *path, const char *mode);
 #endif
diff --git a/maca_common/src/form2pos.c b/maca_common/src/form2pos.c
index 610ccf4..1a98ad3 100644
--- a/maca_common/src/form2pos.c
+++ b/maca_common/src/form2pos.c
@@ -31,7 +31,7 @@ void form2pos_free(form2pos *f2p)
 
 form2pos *form2pos_read(char *filename)
 {
-  FILE *f = myfopen(filename, "r");
+  FILE *f = myfopen_no_exit(filename, "r");
   int nbelem;
   int pos_nb;
   char pos_list[10000];
@@ -39,6 +39,8 @@ form2pos *form2pos_read(char *filename)
   char signature[200];
   form2pos *f2p = NULL;
 
+  if(f == NULL) return NULL;
+
   /* read number of forms */
   fscanf(f, "%d\n", &nbelem);
   
diff --git a/maca_common/src/util.c b/maca_common/src/util.c
index 84a1ba7..4ff0352 100644
--- a/maca_common/src/util.c
+++ b/maca_common/src/util.c
@@ -25,3 +25,12 @@ FILE *myfopen(const char *path, const char *mode)
   }
   return f;
 }
+
+FILE *myfopen_no_exit(const char *path, const char *mode)
+{
+  FILE *f = fopen(path, mode);
+  if(f == NULL){
+    fprintf(stderr, "cannot open file %s\n", path);
+  }
+  return f;
+}
diff --git a/maca_lemmatizer/src/maca_lemmatizer.c b/maca_lemmatizer/src/maca_lemmatizer.c
index d705599..b748e7b 100644
--- a/maca_lemmatizer/src/maca_lemmatizer.c
+++ b/maca_lemmatizer/src/maca_lemmatizer.c
@@ -123,7 +123,7 @@ int main(int argc, char *argv[])
   /* look for a valid word */
   while(fgets(buffer, 10000, f)){
     if(feof(f)) return 0; /* no more words to read */
-    if((buffer[0] == '\n') || (buffer[0] == ' ')){
+    if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')){
       printf("\n");
       continue;
     }
diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c
index 76dff3d..4300e4a 100644
--- a/maca_trans_parser/src/context.c
+++ b/maca_trans_parser/src/context.c
@@ -7,9 +7,6 @@
 #include "context.h"
 #include "util.h"
 
-
-void context_set_linguistic_resources_filenames(context *ctx);
-
 void context_free(context *ctx)
 {
   if(ctx->program_name)            free(ctx->program_name);
@@ -306,13 +303,8 @@ context *context_read_options(int argc, char *argv[])
       }
   }
 
-  context_set_linguistic_resources_filenames(ctx);
 
 
-  if(ctx->features_model_filename){
-    ctx->features_model = feat_model_read(ctx->features_model_filename);
-  }
-  
   /*  if(ctx->mcd_filename && ctx->conll_filename){
     ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename);
     ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
@@ -341,7 +333,7 @@ context *context_read_options(int argc, char *argv[])
   return ctx;
 }
 
-void context_set_linguistic_resources_filenames(context *ctx)
+void context_set_linguistic_resources_filenames_parser(context *ctx)
 {
   char absolute_path[500];
   char absolute_filename[500];
@@ -382,11 +374,10 @@ void context_set_linguistic_resources_filenames(context *ctx)
     ctx->features_model_filename = strdup(absolute_filename);
   }
 
-  /*  fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
-  fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
-  fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
-  fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
-  
+  fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
+  fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
+  fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
+  fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
 }
 
 void context_set_linguistic_resources_filenames_tagger(context *ctx)
@@ -405,7 +396,7 @@ void context_set_linguistic_resources_filenames_tagger(context *ctx)
   strcat(absolute_path, ctx->language);
   strcat(absolute_path, "/bin/");
 
-
+  
   if(!ctx->perc_model_filename){
     strcpy(absolute_filename, absolute_path);
     strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME);
@@ -430,9 +421,17 @@ void context_set_linguistic_resources_filenames_tagger(context *ctx)
     ctx->features_model_filename = strdup(absolute_filename);
   }
 
-  /*  fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
-  fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
-  fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
-  fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
+  if(!ctx->f2p_filename){
+    strcpy(absolute_filename, absolute_path);
+    strcat(absolute_filename, DEFAULT_F2P_FILENAME);
+    ctx->f2p_filename = strdup(absolute_filename);
+    ctx->f2p = form2pos_read(ctx->f2p_filename);
+  }
+
+  fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
+  fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
+  fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
+  fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
+  fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
   
 }
diff --git a/maca_trans_parser/src/context.h b/maca_trans_parser/src/context.h
index 83d859b..ff17413 100644
--- a/maca_trans_parser/src/context.h
+++ b/maca_trans_parser/src/context.h
@@ -14,6 +14,7 @@
 #define DEFAULT_FEATURES_MODEL_TAGGER_FILENAME "maca_trans_tagger.fm" 
 #define DEFAULT_VOCABS_TAGGER_FILENAME "maca_trans_tagger.vocab" 
 #define DEFAULT_MODEL_TAGGER_FILENAME  "maca_trans_tagger.model" 
+#define DEFAULT_F2P_FILENAME "fP" 
 
 #include "dico_vec.h"
 #include "feat_model.h"
@@ -92,4 +93,10 @@ void context_maca_data_path_help_message(context *ctx);
 void context_f2p_filename_help_message(context *ctx);
 
 
+void context_set_linguistic_resources_filenames_tagger(context *ctx);
+void context_set_linguistic_resources_filenames_parser(context *ctx);
+
+
+
+
 #endif
diff --git a/maca_trans_parser/src/decode.c b/maca_trans_parser/src/decode.c
index fcd0e66..aabe26a 100644
--- a/maca_trans_parser/src/decode.c
+++ b/maca_trans_parser/src/decode.c
@@ -53,6 +53,9 @@ int main(int argc, char *argv[])
   ctx = context_read_options(argc, argv);
   decode_check_options(ctx);
 
+  context_set_linguistic_resources_filenames_parser(ctx);
+  ctx->features_model = feat_model_read(ctx->features_model_filename);
+
   ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
   mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
 
diff --git a/maca_trans_parser/src/decode_tagger.c b/maca_trans_parser/src/decode_tagger.c
index f2d92c8..722cdd7 100644
--- a/maca_trans_parser/src/decode_tagger.c
+++ b/maca_trans_parser/src/decode_tagger.c
@@ -46,11 +46,12 @@ int main(int argc, char *argv[])
 {
   FILE *conll_file = NULL;
   context *ctx;
-  /* struct fann *ann; */
 
   ctx = context_read_options(argc, argv);
   decode_check_options(ctx);
 
+  context_set_linguistic_resources_filenames_tagger(ctx);
+  ctx->features_model = feat_model_read(ctx->features_model_filename);
   ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
   mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
   
diff --git a/maca_trans_parser/src/depset.c b/maca_trans_parser/src/depset.c
index 4f7b8a5..a71bba8 100644
--- a/maca_trans_parser/src/depset.c
+++ b/maca_trans_parser/src/depset.c
@@ -77,7 +77,8 @@ void depset_print2(FILE *f, depset *d, dico *dico_labels)
 
   for(i=1; i < d->length; i++){
     if((d->array[i].gov) && (d->array[i].dep)){
-      fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label));
+      /* fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label ));*/
+      fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov), dico_int2string(dico_labels, d->array[i].label));
     }
   }  
   fprintf(f, "\n");
diff --git a/maca_trans_parser/src/maca_trans_parser_conll2cff.c b/maca_trans_parser/src/maca_trans_parser_conll2cff.c
index 8193933..d42ad86 100644
--- a/maca_trans_parser/src/maca_trans_parser_conll2cff.c
+++ b/maca_trans_parser/src/maca_trans_parser_conll2cff.c
@@ -169,6 +169,10 @@ int main(int argc, char *argv[])
   
   ctx = context_read_options(argc, argv);
   maca_trans_parser_conll2cff_check_options(ctx);
+
+
+  ctx->features_model = feat_model_read(ctx->features_model_filename);
+
   
   if(ctx->mode == TRAIN_MODE){
     mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
diff --git a/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c b/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c
index cfd7965..85f0f4e 100644
--- a/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c
+++ b/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c
@@ -82,7 +82,6 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
       fprintf(output_file, "%d", postag);
       feat_vec_print(output_file, fv);
       
-      
       if(postag != -1)
 	movement_tagger(c, postag, 0, 1);
     }
@@ -111,7 +110,6 @@ void generate_training_file_buffer(FILE *output_file, context *ctx)
     if(ctx->f2p)
       add_signature_to_words_in_queue(c->bf, ctx->f2p);
 
-
     while(!config_is_terminal(c)){
       /* config_print(stdout, c);  */
       config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); 
@@ -136,6 +134,9 @@ int main(int argc, char *argv[])
   ctx = context_read_options(argc, argv);
   maca_trans_parser_conll2cff_check_options(ctx);
   
+  ctx->features_model = feat_model_read(ctx->features_model_filename);
+
+
   if(ctx->mode == TRAIN_MODE){
     mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
     ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
@@ -146,7 +147,6 @@ int main(int argc, char *argv[])
   }
     
   feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
-
   
   /* in train mode create feature dictionnary for perceptron */
   if(ctx->mode == TRAIN_MODE)
diff --git a/maca_trans_parser/src/queue.c b/maca_trans_parser/src/queue.c
index 3b268ef..9d1315f 100644
--- a/maca_trans_parser/src/queue.c
+++ b/maca_trans_parser/src/queue.c
@@ -22,7 +22,7 @@ int queue_read_sentence(queue *bf, FILE *f, mcd *mcd_struct)
   while(fgets(buffer, 10000, f)){
     if(feof(f)) break;
      /* fprintf(stderr, "%s", buffer);   */
-    if((buffer[0] == '\n') || (buffer[0] == ' ')) break; /* end of the sentence */
+    if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) break; /* end of the sentence */
     w = word_parse_buffer(buffer, mcd_struct);
     if(word_get_index(w) == -1){
       w->feat_array[FEAT_TYPE_INDEX] = index++; 
diff --git a/maca_trans_parser/src/simple_decoder_tagger.c b/maca_trans_parser/src/simple_decoder_tagger.c
index 6eb1cce..d312fdd 100644
--- a/maca_trans_parser/src/simple_decoder_tagger.c
+++ b/maca_trans_parser/src/simple_decoder_tagger.c
@@ -21,25 +21,17 @@ void add_signature_to_words_in_queue(queue *bf, form2pos *f2p)
   }
 }
 
-
 void simple_decoder_buffer(context *ctx)
 {
-  FILE *f = NULL; 
   dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
   feature_table *ft =  feature_table_load(ctx->perc_model_filename);
-  config *c = NULL;
   int postag;
   feat_vec *fv = feat_vec_new(feature_types_nb);
   float max;
   int i;
-  word *w;
-
-  if(ctx->conll_filename)
-    f= myfopen(ctx->conll_filename, "r");
-  else
-    f= stdin;
-
-  c = config_initial(f, ctx->mcd_struct, 1000, 0);
+  word *w = NULL;
+  FILE *f = (ctx->conll_filename)? myfopen(ctx->conll_filename, "r") : stdin;
+  config *c = config_initial(f, ctx->mcd_struct, 1000, 0);
 
   /* read a sentence and put it in the buffer */
   while(queue_read_sentence(c->bf, f, ctx->mcd_struct)){
@@ -59,6 +51,7 @@ void simple_decoder_buffer(context *ctx)
       w = stack_elt_n(c->st, i);
       printf("%s\t%s\n", w->input, dico_int2string(dico_pos, word_get_pos(w)));
     }
+    printf("\n");
 
     /* config_free(c);  */
     c = config_initial(f, ctx->mcd_struct, 1000, 0);
@@ -74,7 +67,6 @@ void simple_decoder_stream(context *ctx)
   feat_vec *fv = feat_vec_new(feature_types_nb);
   FILE *f = NULL; 
 
-
   /* when in stream mode, force to renumber the tokens (ugly !) */
   ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1;
   
@@ -82,23 +74,14 @@ void simple_decoder_stream(context *ctx)
   while(!config_is_terminal(c)){
     config_print(stdout, c);
     config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
-
-
   }
-
   /* config_print(stdout, c);  */
-  
   /* config_free(c); */
-
 }
 
 
 void simple_decoder_tagger(context *ctx)
-/* (FILE *f, mcd *mcd_struct, dico *d_perceptron_features, dico *dico_pos, feature_table *ft, feat_model  *fm, int verbose, int stream_mode)*/
 {
-
-  /*conll_file, ctx->mcd_struct, ctx->d_perceptron_features, dico_pos, ft, ctx->features_model, ctx->verbose, ctx->stream_mode);*/
-
   ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
   
   if(ctx->stream_mode)
diff --git a/maca_trans_parser/src/word.c b/maca_trans_parser/src/word.c
index d2a28f7..d163f13 100644
--- a/maca_trans_parser/src/word.c
+++ b/maca_trans_parser/src/word.c
@@ -31,6 +31,7 @@ word *word_read(FILE *f, mcd *mcd_struct)
   while(fgets(buffer, 10000, f)){
     if(feof(f)) return NULL; /* no more words to read */
     if((buffer[0] != '\n') && (buffer[0] != ' ')){
+      /* printf("word = %s\n", buffer); */
       return word_parse_buffer(buffer, mcd_struct);
     }
   }
-- 
GitLab