From 723cafa949d379217fe0e167409dbc7a3445d5bb Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Thu, 8 Feb 2018 10:04:38 +0100 Subject: [PATCH] fixed a bug in cff2fann --- maca_tools/src/mcf2json.c | 81 +++++++++++++++++++++----------- maca_trans_parser/src/cff2fann.c | 11 ++--- 2 files changed, 58 insertions(+), 34 deletions(-) diff --git a/maca_tools/src/mcf2json.c b/maca_tools/src/mcf2json.c index bfca900..2cf60bf 100644 --- a/maca_tools/src/mcf2json.c +++ b/maca_tools/src/mcf2json.c @@ -311,6 +311,17 @@ void print_tokens(FILE *output_file, word_buffer *wb, int index_first_word, int fprintf(output_file," ],\n"); } +void mcf_print_sentence(FILE *output_file, word_buffer *wb, int index_first_word, int index_last_word) +{ + int index; + word *w = NULL; + + for(index = index_first_word; index <= index_last_word; index++){ + w = word_buffer_get_word_n(wb, index); + fprintf(output_file, "%s\n", w->input); + } +} + void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int index_first_word, int index_last_word) { @@ -324,7 +335,8 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind int main(int argc, char *argv[]) { - FILE *output_file = NULL; + FILE *json_output_file = NULL; + FILE *mcf_output_file = NULL; context *ctx = mcf2json_context_read_options(argc, argv); word_buffer *wb = NULL; word *w = NULL; @@ -337,9 +349,10 @@ int main(int argc, char *argv[]) char current_file[1000]; char previous_directory[1000]; char previous_file[1000]; - char filename_for_header[1000]; + char json_output_filename_for_header[1000]; char *root_directory = NULL; - char destination_file[1000]; + char json_output_filename[1000]; + char mcf_output_filename[1000]; char destination_dir[1000]; struct stat st = {0}; @@ -369,21 +382,31 @@ int main(int argc, char *argv[]) } } if(strcmp(current_file, previous_file)){ - strcpy(destination_file, destination_dir); - strcat(destination_file, "/"); - strcat(destination_file, current_file); - strcat(destination_file, ".json"); - fprintf(stderr, "creating file %s\n", destination_file); - if(output_file){ - print_footer(output_file); - fclose(output_file); + strcpy(json_output_filename, destination_dir); + strcat(json_output_filename, "/"); + strcat(json_output_filename, current_file); + strcat(json_output_filename, ".json"); + fprintf(stderr, "creating file %s\n", json_output_filename); + + strcpy(mcf_output_filename, destination_dir); + strcat(mcf_output_filename, "/"); + strcat(mcf_output_filename, current_file); + strcat(mcf_output_filename, ".mcf"); + fprintf(stderr, "creating file %s\n", mcf_output_filename); + + + if(json_output_file){ + print_footer(json_output_file); + fclose(json_output_file); + fclose(mcf_output_file); } - output_file = myfopen_no_exit(destination_file, "w"); - strcpy(filename_for_header, current_directory); - strcat(filename_for_header, "/"); - strcat(filename_for_header, current_file); - strcat(filename_for_header, ".json"); - print_header(output_file, ctx->mcd_struct, filename_for_header); + mcf_output_file = myfopen_no_exit(mcf_output_filename, "w"); + json_output_file = myfopen_no_exit(json_output_filename, "w"); + strcpy(json_output_filename_for_header, current_directory); + strcat(json_output_filename_for_header, "/"); + strcat(json_output_filename_for_header, current_file); + strcat(json_output_filename_for_header, ".json"); + print_header(json_output_file, ctx->mcd_struct, json_output_filename_for_header); first_sentence = 1; } if(new_sentence){ @@ -398,20 +421,22 @@ int main(int argc, char *argv[]) if(first_sentence == 1) first_sentence = 0; else - fprintf(output_file, ","); - fprintf(output_file, "\n"); - print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word); + fprintf(json_output_file, ","); + fprintf(json_output_file, "\n"); + print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word); + mcf_print_sentence(mcf_output_file, wb, index_first_word, index_last_word); } strcpy(previous_file, current_file); strcpy(previous_directory, current_directory); } while(word_buffer_move_right(wb)); - print_footer(output_file); - fclose(output_file); + print_footer(json_output_file); + fclose(json_output_file); + fclose(mcf_output_file); } else{ //ctx->root_dir is NULL dump everything to stdout - output_file = stdout; - print_header(output_file, ctx->mcd_struct, ""); + json_output_file = stdout; + print_header(json_output_file, ctx->mcd_struct, ""); do{ w = word_buffer_b0(wb); if(new_sentence){ @@ -426,12 +451,12 @@ int main(int argc, char *argv[]) if(first_sentence == 1) first_sentence = 0; else - fprintf(output_file, ","); - fprintf(output_file, "\n"); - print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word); + fprintf(json_output_file, ","); + fprintf(json_output_file, "\n"); + print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word); } } while(word_buffer_move_right(wb)); - print_footer(output_file); + print_footer(json_output_file); } mcf2json_context_free(ctx); diff --git a/maca_trans_parser/src/cff2fann.c b/maca_trans_parser/src/cff2fann.c index 1aaffd1..e29c4e3 100644 --- a/maca_trans_parser/src/cff2fann.c +++ b/maca_trans_parser/src/cff2fann.c @@ -119,13 +119,13 @@ void cff2fann(context *ctx) mcd *m = ctx->mcd_struct; FILE *f = myfopen(ctx->cff_filename, "r"); int val; - dico *vocab; + dico *d_perceptron_features; char feature_type[64]; int feature_valindex; int count = 0; char *feat_str = NULL; - vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); + d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); /* printf("%d %d\n", 1, ctx->features_model->nbelem); */ @@ -146,7 +146,7 @@ void cff2fann(context *ctx) /* printf("\n"); */ printf("%d", val); } else { - feat_str = dico_int2string(vocab, val); + feat_str = dico_int2string(d_perceptron_features, val); if(feat_str){ /* printf("feat str = %s\n", feat_str); */ sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex); @@ -158,15 +158,14 @@ void cff2fann(context *ctx) /* printf("representation = %d\n", m->representation[mcd_col]); */ if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){ - fprintf(stderr, "it is an embedding val = %d, file = %s\n", feature_valindex, m->filename[mcd_col]); + // fprintf(stderr, "it is an embedding val = %d, string = %s we_index = %d file = %s\n", feature_valindex, form, m->filename[mcd_col], we_index); - //int word_emb_get_code(word_emb *we, char *word) /* word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); */ /* printf("\n"); */ printf("\t%d", feature_valindex); } else if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){ - /* printf("it is a vocab\n"); */ + /* printf("it is a d_perceptron_features\n"); */ /* one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem); */ /* printf("\n"); */ printf("\t%d", feature_valindex); -- GitLab