code refactoring

bbb0919c · Alexis Nasr · a4db64a2 · bbb0919c · bbb0919c · bbb0919c
Commit bbb0919c authored Jul 12, 2016 by Alexis Nasr
--- a/INSTALL
+++ b/INSTALL
@@ -10,6 +10,9 @@ The basic procedure to build and install macaon from sources is the following.
 - Launch the cmake command:
    cmake ..

+  If you want to compile macaon with debugging options type:
+    cmake -DCMAKE_BUILD_TYPE=Debug ..
+
  If you want to install macaon locally, you can specify the install path with :
    cmake -DCMAKE_INSTALL_PREFIX:PATH=/absolute/path/to/macaon_install_dir


--- a/maca_common/include/util.h
+++ b/maca_common/include/util.h
@@ -5,4 +5,5 @@
 void myfree(void *ptr);
 void *memalloc(size_t s);
 FILE *myfopen(const char *path, const char *mode);
+FILE *myfopen_no_exit(const char *path, const char *mode);
 #endif
--- a/maca_common/src/form2pos.c
+++ b/maca_common/src/form2pos.c
@@ -31,7 +31,7 @@ void form2pos_free(form2pos *f2p)

 form2pos *form2pos_read(char *filename)
 {
-  FILE *f = myfopen(filename, "r");
+  FILE *f = myfopen_no_exit(filename, "r");
  int nbelem;
  int pos_nb;
  char pos_list[10000];
@@ -39,6 +39,8 @@ form2pos *form2pos_read(char *filename)
  char signature[200];
  form2pos *f2p = NULL;

+  if(f == NULL) return NULL;
+
  /* read number of forms */
  fscanf(f, "%d\n", &nbelem);
  

--- a/maca_common/src/util.c
+++ b/maca_common/src/util.c
@@ -25,3 +25,12 @@ FILE *myfopen(const char *path, const char *mode)
  }
  return f;
 }
+
+FILE *myfopen_no_exit(const char *path, const char *mode)
+{
+  FILE *f = fopen(path, mode);
+  if(f == NULL){
+    fprintf(stderr, "cannot open file %s\n", path);
+  }
+  return f;
+}
--- a/maca_lemmatizer/src/maca_lemmatizer.c
+++ b/maca_lemmatizer/src/maca_lemmatizer.c
@@ -123,7 +123,7 @@ int main(int argc, char *argv[])
  /* look for a valid word */
  while(fgets(buffer, 10000, f)){
    if(feof(f)) return 0; /* no more words to read */
-    if((buffer[0] == '\n') || (buffer[0] == ' ')){
+    if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')){
      printf("\n");
      continue;
    }

--- a/maca_trans_parser/src/context.c
+++ b/maca_trans_parser/src/context.c
@@ -7,9 +7,6 @@
 #include "context.h"
 #include "util.h"

-
-void context_set_linguistic_resources_filenames(context *ctx);
-
 void context_free(context *ctx)
 {
  if(ctx->program_name)            free(ctx->program_name);
@@ -306,13 +303,8 @@ context *context_read_options(int argc, char *argv[])
      }
  }

-  context_set_linguistic_resources_filenames(ctx);


-  if(ctx->features_model_filename){
-    ctx->features_model = feat_model_read(ctx->features_model_filename);
-  }
-  
  /*  if(ctx->mcd_filename && ctx->conll_filename){
    ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename);
    ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
@@ -341,7 +333,7 @@ context *context_read_options(int argc, char *argv[])
  return ctx;
 }

-void context_set_linguistic_resources_filenames(context *ctx)
+void context_set_linguistic_resources_filenames_parser(context *ctx)
 {
  char absolute_path[500];
  char absolute_filename[500];
@@ -382,11 +374,10 @@ void context_set_linguistic_resources_filenames(context *ctx)
    ctx->features_model_filename = strdup(absolute_filename);
  }

-  /*  fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
-  fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
-  fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
-  fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
-  
+  fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
+  fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
+  fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
+  fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
 }

 void context_set_linguistic_resources_filenames_tagger(context *ctx)
@@ -430,9 +421,17 @@ void context_set_linguistic_resources_filenames_tagger(context *ctx)
    ctx->features_model_filename = strdup(absolute_filename);
  }

-  /*  fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
-  fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
-  fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
-  fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
+  if(!ctx->f2p_filename){
+    strcpy(absolute_filename, absolute_path);
+    strcat(absolute_filename, DEFAULT_F2P_FILENAME);
+    ctx->f2p_filename = strdup(absolute_filename);
+    ctx->f2p = form2pos_read(ctx->f2p_filename);
+  }
+
+  fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
+  fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
+  fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
+  fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
+  fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
  
 }
--- a/maca_trans_parser/src/context.h
+++ b/maca_trans_parser/src/context.h
@@ -14,6 +14,7 @@
 #define DEFAULT_FEATURES_MODEL_TAGGER_FILENAME "maca_trans_tagger.fm" 
 #define DEFAULT_VOCABS_TAGGER_FILENAME "maca_trans_tagger.vocab" 
 #define DEFAULT_MODEL_TAGGER_FILENAME  "maca_trans_tagger.model" 
+#define DEFAULT_F2P_FILENAME "fP" 

 #include "dico_vec.h"
 #include "feat_model.h"
@@ -92,4 +93,10 @@ void context_maca_data_path_help_message(context *ctx);
 void context_f2p_filename_help_message(context *ctx);


+void context_set_linguistic_resources_filenames_tagger(context *ctx);
+void context_set_linguistic_resources_filenames_parser(context *ctx);
+
+
+
+
 #endif
--- a/maca_trans_parser/src/decode.c
+++ b/maca_trans_parser/src/decode.c
@@ -53,6 +53,9 @@ int main(int argc, char *argv[])
  ctx = context_read_options(argc, argv);
  decode_check_options(ctx);

+  context_set_linguistic_resources_filenames_parser(ctx);
+  ctx->features_model = feat_model_read(ctx->features_model_filename);
+
  ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
  mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);


--- a/maca_trans_parser/src/decode_tagger.c
+++ b/maca_trans_parser/src/decode_tagger.c
@@ -46,11 +46,12 @@ int main(int argc, char *argv[])
 {
  FILE *conll_file = NULL;
  context *ctx;
-  /* struct fann *ann; */

  ctx = context_read_options(argc, argv);
  decode_check_options(ctx);

+  context_set_linguistic_resources_filenames_tagger(ctx);
+  ctx->features_model = feat_model_read(ctx->features_model_filename);
  ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
  mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
  

--- a/maca_trans_parser/src/depset.c
+++ b/maca_trans_parser/src/depset.c
@@ -77,7 +77,8 @@ void depset_print2(FILE *f, depset *d, dico *dico_labels)

  for(i=1; i < d->length; i++){
    if((d->array[i].gov) && (d->array[i].dep)){
-      fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label));
+      /* fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label ));*/
+      fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov), dico_int2string(dico_labels, d->array[i].label));
    }
  }  
  fprintf(f, "\n");

--- a/maca_trans_parser/src/maca_trans_parser_conll2cff.c
+++ b/maca_trans_parser/src/maca_trans_parser_conll2cff.c
@@ -170,6 +170,10 @@ int main(int argc, char *argv[])
  ctx = context_read_options(argc, argv);
  maca_trans_parser_conll2cff_check_options(ctx);

+
+  ctx->features_model = feat_model_read(ctx->features_model_filename);
+
+  
  if(ctx->mode == TRAIN_MODE){
    mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
    ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);

--- a/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c
+++ b/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c
@@ -82,7 +82,6 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
      fprintf(output_file, "%d", postag);
      feat_vec_print(output_file, fv);
      
-      
      if(postag != -1)
 	movement_tagger(c, postag, 0, 1);
    }
@@ -111,7 +110,6 @@ void generate_training_file_buffer(FILE *output_file, context *ctx)
    if(ctx->f2p)
      add_signature_to_words_in_queue(c->bf, ctx->f2p);

-
    while(!config_is_terminal(c)){
      /* config_print(stdout, c);  */
      config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); 
@@ -136,6 +134,9 @@ int main(int argc, char *argv[])
  ctx = context_read_options(argc, argv);
  maca_trans_parser_conll2cff_check_options(ctx);
  
+  ctx->features_model = feat_model_read(ctx->features_model_filename);
+
+
  if(ctx->mode == TRAIN_MODE){
    mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
    ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
@@ -147,7 +148,6 @@ int main(int argc, char *argv[])
    
  feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
  
-  
  /* in train mode create feature dictionnary for perceptron */
  if(ctx->mode == TRAIN_MODE)
    ctx->d_perceptron_features = dico_new((char *)"d_perceptron_features", 10000000);

--- a/maca_trans_parser/src/queue.c
+++ b/maca_trans_parser/src/queue.c
@@ -22,7 +22,7 @@ int queue_read_sentence(queue *bf, FILE *f, mcd *mcd_struct)
  while(fgets(buffer, 10000, f)){
    if(feof(f)) break;
     /* fprintf(stderr, "%s", buffer);   */
-    if((buffer[0] == '\n') || (buffer[0] == ' ')) break; /* end of the sentence */
+    if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) break; /* end of the sentence */
    w = word_parse_buffer(buffer, mcd_struct);
    if(word_get_index(w) == -1){
      w->feat_array[FEAT_TYPE_INDEX] = index++; 

--- a/maca_trans_parser/src/simple_decoder_tagger.c
+++ b/maca_trans_parser/src/simple_decoder_tagger.c
@@ -21,25 +21,17 @@ void add_signature_to_words_in_queue(queue *bf, form2pos *f2p)
  }
 }

-
 void simple_decoder_buffer(context *ctx)
 {
-  FILE *f = NULL; 
  dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
  feature_table *ft =  feature_table_load(ctx->perc_model_filename);
-  config *c = NULL;
  int postag;
  feat_vec *fv = feat_vec_new(feature_types_nb);
  float max;
  int i;
-  word *w;
-
-  if(ctx->conll_filename)
-    f= myfopen(ctx->conll_filename, "r");
-  else
-    f= stdin;
-
-  c = config_initial(f, ctx->mcd_struct, 1000, 0);
+  word *w = NULL;
+  FILE *f = (ctx->conll_filename)? myfopen(ctx->conll_filename, "r") : stdin;
+  config *c = config_initial(f, ctx->mcd_struct, 1000, 0);

  /* read a sentence and put it in the buffer */
  while(queue_read_sentence(c->bf, f, ctx->mcd_struct)){
@@ -59,6 +51,7 @@ void simple_decoder_buffer(context *ctx)
      w = stack_elt_n(c->st, i);
      printf("%s\t%s\n", w->input, dico_int2string(dico_pos, word_get_pos(w)));
    }
+    printf("\n");

    /* config_free(c);  */
    c = config_initial(f, ctx->mcd_struct, 1000, 0);
@@ -74,7 +67,6 @@ void simple_decoder_stream(context *ctx)
  feat_vec *fv = feat_vec_new(feature_types_nb);
  FILE *f = NULL; 

-
  /* when in stream mode, force to renumber the tokens (ugly !) */
  ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1;
  
@@ -82,23 +74,14 @@ void simple_decoder_stream(context *ctx)
  while(!config_is_terminal(c)){
    config_print(stdout, c);
    config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
-
-
  }
-
  /* config_print(stdout, c);  */
-  
  /* config_free(c); */
-
 }


 void simple_decoder_tagger(context *ctx)
-/* (FILE *f, mcd *mcd_struct, dico *d_perceptron_features, dico *dico_pos, feature_table *ft, feat_model  *fm, int verbose, int stream_mode)*/
 {
-
-  /*conll_file, ctx->mcd_struct, ctx->d_perceptron_features, dico_pos, ft, ctx->features_model, ctx->verbose, ctx->stream_mode);*/
-
  ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
  
  if(ctx->stream_mode)

--- a/maca_trans_parser/src/word.c
+++ b/maca_trans_parser/src/word.c
@@ -31,6 +31,7 @@ word *word_read(FILE *f, mcd *mcd_struct)
  while(fgets(buffer, 10000, f)){
    if(feof(f)) return NULL; /* no more words to read */
    if((buffer[0] != '\n') && (buffer[0] != ' ')){
+      /* printf("word = %s\n", buffer); */
      return word_parse_buffer(buffer, mcd_struct);
    }
  }