From 723cafa949d379217fe0e167409dbc7a3445d5bb Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Thu, 8 Feb 2018 10:04:38 +0100
Subject: [PATCH] fixed a bug in cff2fann

---
 maca_tools/src/mcf2json.c        | 81 +++++++++++++++++++++-----------
 maca_trans_parser/src/cff2fann.c | 11 ++---
 2 files changed, 58 insertions(+), 34 deletions(-)

diff --git a/maca_tools/src/mcf2json.c b/maca_tools/src/mcf2json.c
index bfca900..2cf60bf 100644
--- a/maca_tools/src/mcf2json.c
+++ b/maca_tools/src/mcf2json.c
@@ -311,6 +311,17 @@ void print_tokens(FILE *output_file, word_buffer *wb, int index_first_word, int
   fprintf(output_file," ],\n");
 }
 
+void mcf_print_sentence(FILE *output_file, word_buffer *wb, int index_first_word, int index_last_word)
+{
+  int index;
+  word *w = NULL;
+  
+  for(index = index_first_word; index <= index_last_word; index++){
+    w = word_buffer_get_word_n(wb, index);
+    fprintf(output_file, "%s\n", w->input);
+  }
+}
+
 
 void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int index_first_word, int index_last_word)
 {
@@ -324,7 +335,8 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind
 
 int main(int argc, char *argv[])
 {
-  FILE *output_file = NULL;
+  FILE *json_output_file = NULL;
+  FILE *mcf_output_file = NULL;
   context *ctx = mcf2json_context_read_options(argc, argv);
   word_buffer *wb = NULL;
   word *w = NULL;
@@ -337,9 +349,10 @@ int main(int argc, char *argv[])
   char current_file[1000];
   char previous_directory[1000];
   char previous_file[1000];
-  char filename_for_header[1000];
+  char json_output_filename_for_header[1000];
   char *root_directory = NULL;
-  char destination_file[1000];
+  char json_output_filename[1000];
+  char mcf_output_filename[1000];
   char destination_dir[1000];
   struct stat st = {0};
   
@@ -369,21 +382,31 @@ int main(int argc, char *argv[])
 	}
       }
       if(strcmp(current_file, previous_file)){
-	strcpy(destination_file, destination_dir);
-	strcat(destination_file, "/");
-	strcat(destination_file, current_file);
-	strcat(destination_file, ".json");
-	fprintf(stderr, "creating file %s\n", destination_file);
-	if(output_file){
-	  print_footer(output_file);
-	  fclose(output_file);
+	strcpy(json_output_filename, destination_dir);
+	strcat(json_output_filename, "/");
+	strcat(json_output_filename, current_file);
+	strcat(json_output_filename, ".json");
+	fprintf(stderr, "creating file %s\n", json_output_filename);
+
+	strcpy(mcf_output_filename, destination_dir);
+	strcat(mcf_output_filename, "/");
+	strcat(mcf_output_filename, current_file);
+	strcat(mcf_output_filename, ".mcf");
+	fprintf(stderr, "creating file %s\n", mcf_output_filename);
+
+	
+	if(json_output_file){
+	  print_footer(json_output_file);
+	  fclose(json_output_file);
+	  fclose(mcf_output_file);
 	}
-	output_file = myfopen_no_exit(destination_file, "w");
-	strcpy(filename_for_header, current_directory);
-	strcat(filename_for_header, "/");
-	strcat(filename_for_header, current_file);
-	strcat(filename_for_header, ".json");
-	print_header(output_file, ctx->mcd_struct, filename_for_header);
+	mcf_output_file = myfopen_no_exit(mcf_output_filename, "w");
+	json_output_file = myfopen_no_exit(json_output_filename, "w");
+	strcpy(json_output_filename_for_header, current_directory);
+	strcat(json_output_filename_for_header, "/");
+	strcat(json_output_filename_for_header, current_file);
+	strcat(json_output_filename_for_header, ".json");
+	print_header(json_output_file, ctx->mcd_struct, json_output_filename_for_header);
 	first_sentence = 1;
       }
       if(new_sentence){
@@ -398,20 +421,22 @@ int main(int argc, char *argv[])
 	if(first_sentence == 1)
 	  first_sentence = 0;
 	else
-	  fprintf(output_file, ",");
-	fprintf(output_file, "\n");
-	print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
+	  fprintf(json_output_file, ",");
+	fprintf(json_output_file, "\n");
+	print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word);
+	mcf_print_sentence(mcf_output_file, wb, index_first_word, index_last_word);
       }
       strcpy(previous_file, current_file);
       strcpy(previous_directory, current_directory);
     } while(word_buffer_move_right(wb));
-    print_footer(output_file);
-    fclose(output_file);
+    print_footer(json_output_file);
+    fclose(json_output_file);
+    fclose(mcf_output_file);
   }
 
   else{ //ctx->root_dir is NULL dump everything to stdout
-      output_file = stdout;
-      print_header(output_file, ctx->mcd_struct, "");
+      json_output_file = stdout;
+      print_header(json_output_file, ctx->mcd_struct, "");
       do{
 	w = word_buffer_b0(wb);
 	if(new_sentence){
@@ -426,12 +451,12 @@ int main(int argc, char *argv[])
 	  if(first_sentence == 1)
 	    first_sentence = 0;
 	  else
-	    fprintf(output_file, ",");
-	  fprintf(output_file, "\n");
-	  print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
+	    fprintf(json_output_file, ",");
+	  fprintf(json_output_file, "\n");
+	  print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word);
 	}
       } while(word_buffer_move_right(wb));
-      print_footer(output_file);
+      print_footer(json_output_file);
   }
   
   mcf2json_context_free(ctx);
diff --git a/maca_trans_parser/src/cff2fann.c b/maca_trans_parser/src/cff2fann.c
index 1aaffd1..e29c4e3 100644
--- a/maca_trans_parser/src/cff2fann.c
+++ b/maca_trans_parser/src/cff2fann.c
@@ -119,13 +119,13 @@ void cff2fann(context *ctx)
   mcd *m = ctx->mcd_struct;
   FILE *f = myfopen(ctx->cff_filename, "r");
   int val;
-  dico *vocab;
+  dico *d_perceptron_features;
   char feature_type[64];
   int feature_valindex;
   int count = 0;
   char *feat_str = NULL;
   
-  vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
+  d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
 
   /* printf("%d %d\n", 1, ctx->features_model->nbelem); */
 
@@ -146,7 +146,7 @@ void cff2fann(context *ctx)
         /* printf("\n"); */
 	printf("%d", val);
       } else {
-	feat_str = dico_int2string(vocab, val);
+	feat_str = dico_int2string(d_perceptron_features, val);
 	if(feat_str){
 	  /* printf("feat str = %s\n", feat_str); */
 	  sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex);
@@ -158,15 +158,14 @@ void cff2fann(context *ctx)
 	
 	  /* printf("representation = %d\n", m->representation[mcd_col]); */
 	  if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
-	    fprintf(stderr, "it is an embedding val = %d, file = %s\n", feature_valindex, m->filename[mcd_col]);
+	    //	    fprintf(stderr, "it is an embedding val = %d, string = %s we_index = %d file = %s\n", feature_valindex, form, m->filename[mcd_col], we_index);
 
-	    //int word_emb_get_code(word_emb *we, char *word)
 	    /* word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); */
 	    /* printf("\n"); */
 	    printf("\t%d", feature_valindex);
 	    
 	  } else if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){
-	    /* printf("it is a vocab\n"); */
+	    /* printf("it is a d_perceptron_features\n"); */
 	    /* one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem);  */
 	    /* printf("\n"); */
 	    printf("\t%d", feature_valindex);
-- 
GitLab