From 568cf30a95f4e02efeefe9dd272aceebf355245d Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Sat, 29 Jul 2017 11:24:59 +0200
Subject: [PATCH] modified maca_trans_lemmatizer so that it works when no
 morphological rules are supplied, in which case it only uses the exception
 lexicon

---
 maca_trans_parser/src/maca_trans_lemmatizer.c | 75 +++++++++++--------
 .../src/maca_trans_lemmatizer_mcf2cff.c       | 25 ++++---
 perceptron/exec/perceptron_train.c            | 12 +--
 perceptron/lib/src/cf_file.c                  |  3 +
 4 files changed, 68 insertions(+), 47 deletions(-)

diff --git a/maca_trans_parser/src/maca_trans_lemmatizer.c b/maca_trans_parser/src/maca_trans_lemmatizer.c
index 8970bf5..645be94 100644
--- a/maca_trans_parser/src/maca_trans_lemmatizer.c
+++ b/maca_trans_parser/src/maca_trans_lemmatizer.c
@@ -119,7 +119,7 @@ int main(int argc, char *argv[])
   int l_rule_code;
   char *l_rule;
   float max;
-
+  feature_table *ft = NULL;
   
   maca_lemmatizer_check_options(ctx);
   maca_lemmatizer_set_linguistic_resources_filenames(ctx);
@@ -131,8 +131,17 @@ int main(int argc, char *argv[])
   ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
   mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
 
-  ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
-  feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
+  if(d_l_rules->nbelem){
+    ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
+    ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
+  }
+  else{
+    if(ctx->verbose)
+      fprintf(stderr, "no morphological rules loaded\n");
+    ctx->d_perceptron_features = NULL;
+    ft = NULL;
+  }
+
   c = config_new(f, ctx->mcd_struct, 5); 
   
   while(!config_is_terminal(c)){
@@ -153,37 +162,41 @@ int main(int argc, char *argv[])
       }
     // if lemma is not found in exception file, predict an l_rule 
       else{
-	config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
-	//	feat_vec_print_string(fv, ctx->d_perceptron_features);
-	//	feat_vec_print(stdout, fv);
-	
-	vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
-	if(ctx->debug_mode){
-	  for(int i=0; i < 10; i++){
+	if(ft == NULL){ /* no rule model just print the form as a lemma */
+	  print_word(b0, ctx->mcd_struct, form);
+	}
+	else{
+	  config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
+	  //	feat_vec_print_string(fv, ctx->d_perceptron_features);
+	  //	feat_vec_print(stdout, fv);
+	  
+	  vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
+	  if(ctx->debug_mode){
+	    for(int i=0; i < 10; i++){
+	      l_rule = dico_int2string(d_l_rules, vcode_array[i].class_code);
+	      fprintf(stderr, "%d", i);
+	      if(l_rule_is_applicable(form, l_rule)) fprintf(stderr, "*");
+	      fprintf(stderr, "\t%s\t%.4f\n", l_rule, vcode_array[i].score);
+	    }
+	  }
+	  int i;
+	  for(i=0; i < 10; i++){
 	    l_rule = dico_int2string(d_l_rules, vcode_array[i].class_code);
-	    fprintf(stderr, "%d", i);
-	    if(l_rule_is_applicable(form, l_rule)) fprintf(stderr, "*");
-	    fprintf(stderr, "\t%s\t%.4f\n", l_rule, vcode_array[i].score);
+	    if(l_rule_is_applicable(form, l_rule)){
+	      char *transformed_lemma = apply_l_rule(form, l_rule);
+	      //	printf("transformed_lemma = %s\n", transformed_lemma);
+	      //	    print_word(b0, ctx->mcd_struct, to_lower_string(transformed_lemma));
+	      print_word(b0, ctx->mcd_struct, transformed_lemma);
+	      free(transformed_lemma);
+	      break;
+	    }
 	  }
-	}
-	int i;
-	for(i=0; i < 10; i++){
-	  l_rule = dico_int2string(d_l_rules, vcode_array[i].class_code);
-	  if(l_rule_is_applicable(form, l_rule)){
-	    char *transformed_lemma = apply_l_rule(form, l_rule);
-	    //	printf("transformed_lemma = %s\n", transformed_lemma);
-	    //	    print_word(b0, ctx->mcd_struct, to_lower_string(transformed_lemma));
-	    print_word(b0, ctx->mcd_struct, transformed_lemma);
-	    free(transformed_lemma);
-	    break;
+	  /* no rule applied */
+	  if(i == 10){
+	    print_word(b0, ctx->mcd_struct, form);
 	  }
+	  free(vcode_array);
 	}
-	/* no rule applied */
-	if(i == 10){
-	  print_word(b0, ctx->mcd_struct, form);
-	}
-	
-	free(vcode_array);
       }
     }
     word_buffer_move_right(c->bf);
@@ -192,7 +205,7 @@ int main(int argc, char *argv[])
   if (ctx->input_filename) fclose(f);
   context_free(ctx);
   fplm_free(exceptions);
-  feature_table_free(ft);
+  if(ft) feature_table_free(ft);
   return 0;
 }
 
diff --git a/maca_trans_parser/src/maca_trans_lemmatizer_mcf2cff.c b/maca_trans_parser/src/maca_trans_lemmatizer_mcf2cff.c
index 8458683..1b4dd5f 100644
--- a/maca_trans_parser/src/maca_trans_lemmatizer_mcf2cff.c
+++ b/maca_trans_parser/src/maca_trans_lemmatizer_mcf2cff.c
@@ -171,15 +171,17 @@ int main(int argc, char *argv[])
   fplm_struct *exceptions;
   ctx = context_read_options(argc, argv);
 
-
-
-
-
   //  decode_lemmatizer_set_linguistic_resources_filenames(ctx);
   maca_trans_lemmatizer_mcf2cff_check_options(ctx);
   exceptions = fplm_load_file(ctx->fplm_filename, ctx->verbose);
   d_l_rules = dico_read(ctx->l_rules_filename, 0.5);
 
+  if(d_l_rules->nbelem == 0){
+    /* do not produce cff file when the rule file is empty */
+    /*    exit(1);*/
+  }
+
+  
   ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
 
   if(ctx->mode == TRAIN_MODE){
@@ -203,22 +205,25 @@ int main(int argc, char *argv[])
   /* add the feature dictionnary to the dico vector */
   dico_vec_add(ctx->vocabs, ctx->d_perceptron_features);
   
+
   /* open output file */
   if(ctx->cff_filename)
     output_file = myfopen(ctx->cff_filename, "w");
   else
     output_file = stdout;
   
-  generate_training_file(output_file, ctx, d_l_rules, exceptions);
-    
+  if(d_l_rules->nbelem)
+    generate_training_file(output_file, ctx, d_l_rules, exceptions);
+  
+  
+  if(ctx->cff_filename)
+    fclose(output_file);
+  
   if(ctx->mode == TRAIN_MODE){
     /* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */
     dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
-    
   }
-  
-  if(ctx->cff_filename)
-    fclose(output_file);
+
   context_free(ctx);
   return 0;
 }
diff --git a/perceptron/exec/perceptron_train.c b/perceptron/exec/perceptron_train.c
index ba5b918..d4db2e9 100644
--- a/perceptron/exec/perceptron_train.c
+++ b/perceptron/exec/perceptron_train.c
@@ -35,12 +35,12 @@ int main(int argc, char *argv[])
   train_check_options(ctx);
 
   look_for_number_of_features_and_classes(ctx->cff_filename, &nb_feat, &nb_class);
-
-  ft = feature_table_new(nb_feat, nb_class);
-  fprintf(stderr, "table allocated (%d x %d)\n", nb_feat, nb_class); 
-  perceptron_avg(ctx->cff_filename, ft, ctx->iteration_nb);
-  feature_table_dump(ctx->perc_model_filename, ft);
-  
+  if(nb_class > 1){
+    ft = feature_table_new(nb_feat, nb_class);
+    fprintf(stderr, "table allocated (%d x %d)\n", nb_feat, nb_class); 
+    perceptron_avg(ctx->cff_filename, ft, ctx->iteration_nb);
+    feature_table_dump(ctx->perc_model_filename, ft);
+  }
   perceptron_context_free(ctx);
 
   return 0;
diff --git a/perceptron/lib/src/cf_file.c b/perceptron/lib/src/cf_file.c
index 0114a3d..908d405 100644
--- a/perceptron/lib/src/cf_file.c
+++ b/perceptron/lib/src/cf_file.c
@@ -56,8 +56,11 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int
   FILE *f = fopen(filename, "r");
   char *token;
   int nb;
+
   *max_feat = 0;
   *max_class = 0;
+  if(f == NULL)
+    return;
   while(fgets(buffer, 10000, f)){
     buffer[strlen(buffer) - 1] = '\0';
     token = strtok(buffer, "\t");
-- 
GitLab