fixed a bug in cff2fann

723cafa9 · Alexis Nasr · 68ee1829 · 723cafa9 · 723cafa9
Commit 723cafa9 authored 7 years ago by Alexis Nasr
--- a/maca_tools/src/mcf2json.c
+++ b/maca_tools/src/mcf2json.c
@@ -311,6 +311,17 @@ void print_tokens(FILE *output_file, word_buffer *wb, int index_first_word, int
  fprintf(output_file," ],\n");
 }
+void mcf_print_sentence(FILE *output_file, word_buffer *wb, int index_first_word, int index_last_word)
+{
+  int index;
+  word *w = NULL;
+  for(index = index_first_word; index <= index_last_word; index++){
+    w = word_buffer_get_word_n(wb, index);
+    fprintf(output_file, "%s\n", w->input);
+  }
+}
 void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int index_first_word, int index_last_word)
 {
@@ -324,7 +335,8 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind
 int main(int argc, char *argv[])
 {
-  FILE *output_file = NULL;
+  FILE *json_output_file = NULL;
+  FILE *mcf_output_file = NULL;
  context *ctx = mcf2json_context_read_options(argc, argv);
  word_buffer *wb = NULL;
  word *w = NULL;
@@ -337,9 +349,10 @@ int main(int argc, char *argv[])
  char current_file[1000];
  char previous_directory[1000];
  char previous_file[1000];
-  char filename_for_header[1000];
+  char json_output_filename_for_header[1000];
  char *root_directory = NULL;
-  char destination_file[1000];
+  char json_output_filename[1000];
+  char mcf_output_filename[1000];
  char destination_dir[1000];
  struct stat st = {0};
@@ -369,21 +382,31 @@ int main(int argc, char *argv[])
 	}
      }
      if(strcmp(current_file, previous_file)){
-	strcpy(destination_file, destination_dir);
+	strcpy(json_output_filename, destination_dir);
-	strcat(destination_file, "/");
+	strcat(json_output_filename, "/");
-	strcat(destination_file, current_file);
+	strcat(json_output_filename, current_file);
-	strcat(destination_file, ".json");
+	strcat(json_output_filename, ".json");
-	fprintf(stderr, "creating file %s\n", destination_file);
+	fprintf(stderr, "creating file %s\n", json_output_filename);
-	if(output_file){
-	  print_footer(output_file);
+	strcpy(mcf_output_filename, destination_dir);
-	  fclose(output_file);
+	strcat(mcf_output_filename, "/");
-	}
+	strcat(mcf_output_filename, current_file);
-	output_file = myfopen_no_exit(destination_file, "w");
+	strcat(mcf_output_filename, ".mcf");
-	strcpy(filename_for_header, current_directory);
+	fprintf(stderr, "creating file %s\n", mcf_output_filename);
-	strcat(filename_for_header, "/");
-	strcat(filename_for_header, current_file);
-	strcat(filename_for_header, ".json");
+	if(json_output_file){
-	print_header(output_file, ctx->mcd_struct, filename_for_header);
+	  print_footer(json_output_file);
+	  fclose(json_output_file);
+	  fclose(mcf_output_file);
+	}
+	mcf_output_file = myfopen_no_exit(mcf_output_filename, "w");
+	json_output_file = myfopen_no_exit(json_output_filename, "w");
+	strcpy(json_output_filename_for_header, current_directory);
+	strcat(json_output_filename_for_header, "/");
+	strcat(json_output_filename_for_header, current_file);
+	strcat(json_output_filename_for_header, ".json");
+	print_header(json_output_file, ctx->mcd_struct, json_output_filename_for_header);
 	first_sentence = 1;
      }
      if(new_sentence){
@@ -398,20 +421,22 @@ int main(int argc, char *argv[])
 	if(first_sentence == 1)
 	  first_sentence = 0;
 	else
-	  fprintf(output_file, ",");
+	  fprintf(json_output_file, ",");
-	fprintf(output_file, "\n");
+	fprintf(json_output_file, "\n");
-	print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
+	print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word);
+	mcf_print_sentence(mcf_output_file, wb, index_first_word, index_last_word);
      }
      strcpy(previous_file, current_file);
      strcpy(previous_directory, current_directory);
    } while(word_buffer_move_right(wb));
-    print_footer(output_file);
+    print_footer(json_output_file);
-    fclose(output_file);
+    fclose(json_output_file);
+    fclose(mcf_output_file);
  }
  else{ //ctx->root_dir is NULL dump everything to stdout
-      output_file = stdout;
+      json_output_file = stdout;
-      print_header(output_file, ctx->mcd_struct, "");
+      print_header(json_output_file, ctx->mcd_struct, "");
      do{
 	w = word_buffer_b0(wb);
 	if(new_sentence){
@@ -426,12 +451,12 @@ int main(int argc, char *argv[])
 	  if(first_sentence == 1)
 	    first_sentence = 0;
 	  else
-	    fprintf(output_file, ",");
+	    fprintf(json_output_file, ",");
-	  fprintf(output_file, "\n");
+	  fprintf(json_output_file, "\n");
-	  print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
+	  print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word);
 	}
      } while(word_buffer_move_right(wb));
-      print_footer(output_file);
+      print_footer(json_output_file);
  }
  mcf2json_context_free(ctx);

--- a/maca_trans_parser/src/cff2fann.c
+++ b/maca_trans_parser/src/cff2fann.c
@@ -119,13 +119,13 @@ void cff2fann(context *ctx)
  mcd *m = ctx->mcd_struct;
  FILE *f = myfopen(ctx->cff_filename, "r");
  int val;
-  dico *vocab;
+  dico *d_perceptron_features;
  char feature_type[64];
  int feature_valindex;
  int count = 0;
  char *feat_str = NULL;
-  vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
+  d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
  /* printf("%d %d\n", 1, ctx->features_model->nbelem); */
@@ -146,7 +146,7 @@ void cff2fann(context *ctx)
        /* printf("\n"); */
 	printf("%d", val);
      } else {
-	feat_str = dico_int2string(vocab, val);
+	feat_str = dico_int2string(d_perceptron_features, val);
 	if(feat_str){
 	  /* printf("feat str = %s\n", feat_str); */
 	  sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex);
@@ -158,15 +158,14 @@ void cff2fann(context *ctx)
 	  /* printf("representation = %d\n", m->representation[mcd_col]); */
 	  if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
-	    fprintf(stderr, "it is an embedding val = %d, file = %s\n", feature_valindex, m->filename[mcd_col]);
+	    //	    fprintf(stderr, "it is an embedding val = %d, string = %s we_index = %d file = %s\n", feature_valindex, form, m->filename[mcd_col], we_index);
-	    //int word_emb_get_code(word_emb *we, char *word)
 	    /* word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); */
 	    /* printf("\n"); */
 	    printf("\t%d", feature_valindex);
 	  } else if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){
-	    /* printf("it is a vocab\n"); */
+	    /* printf("it is a d_perceptron_features\n"); */
 	    /* one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem);  */
 	    /* printf("\n"); */
 	    printf("\t%d", feature_valindex);