Commit 723cafa9 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

fixed a bug in cff2fann

parent 68ee1829
......@@ -311,6 +311,17 @@ void print_tokens(FILE *output_file, word_buffer *wb, int index_first_word, int
fprintf(output_file," ],\n");
}
void mcf_print_sentence(FILE *output_file, word_buffer *wb, int index_first_word, int index_last_word)
{
int index;
word *w = NULL;
for(index = index_first_word; index <= index_last_word; index++){
w = word_buffer_get_word_n(wb, index);
fprintf(output_file, "%s\n", w->input);
}
}
void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int index_first_word, int index_last_word)
{
......@@ -324,7 +335,8 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind
int main(int argc, char *argv[])
{
FILE *output_file = NULL;
FILE *json_output_file = NULL;
FILE *mcf_output_file = NULL;
context *ctx = mcf2json_context_read_options(argc, argv);
word_buffer *wb = NULL;
word *w = NULL;
......@@ -337,9 +349,10 @@ int main(int argc, char *argv[])
char current_file[1000];
char previous_directory[1000];
char previous_file[1000];
char filename_for_header[1000];
char json_output_filename_for_header[1000];
char *root_directory = NULL;
char destination_file[1000];
char json_output_filename[1000];
char mcf_output_filename[1000];
char destination_dir[1000];
struct stat st = {0};
......@@ -369,21 +382,31 @@ int main(int argc, char *argv[])
}
}
if(strcmp(current_file, previous_file)){
strcpy(destination_file, destination_dir);
strcat(destination_file, "/");
strcat(destination_file, current_file);
strcat(destination_file, ".json");
fprintf(stderr, "creating file %s\n", destination_file);
if(output_file){
print_footer(output_file);
fclose(output_file);
strcpy(json_output_filename, destination_dir);
strcat(json_output_filename, "/");
strcat(json_output_filename, current_file);
strcat(json_output_filename, ".json");
fprintf(stderr, "creating file %s\n", json_output_filename);
strcpy(mcf_output_filename, destination_dir);
strcat(mcf_output_filename, "/");
strcat(mcf_output_filename, current_file);
strcat(mcf_output_filename, ".mcf");
fprintf(stderr, "creating file %s\n", mcf_output_filename);
if(json_output_file){
print_footer(json_output_file);
fclose(json_output_file);
fclose(mcf_output_file);
}
output_file = myfopen_no_exit(destination_file, "w");
strcpy(filename_for_header, current_directory);
strcat(filename_for_header, "/");
strcat(filename_for_header, current_file);
strcat(filename_for_header, ".json");
print_header(output_file, ctx->mcd_struct, filename_for_header);
mcf_output_file = myfopen_no_exit(mcf_output_filename, "w");
json_output_file = myfopen_no_exit(json_output_filename, "w");
strcpy(json_output_filename_for_header, current_directory);
strcat(json_output_filename_for_header, "/");
strcat(json_output_filename_for_header, current_file);
strcat(json_output_filename_for_header, ".json");
print_header(json_output_file, ctx->mcd_struct, json_output_filename_for_header);
first_sentence = 1;
}
if(new_sentence){
......@@ -398,20 +421,22 @@ int main(int argc, char *argv[])
if(first_sentence == 1)
first_sentence = 0;
else
fprintf(output_file, ",");
fprintf(output_file, "\n");
print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
fprintf(json_output_file, ",");
fprintf(json_output_file, "\n");
print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word);
mcf_print_sentence(mcf_output_file, wb, index_first_word, index_last_word);
}
strcpy(previous_file, current_file);
strcpy(previous_directory, current_directory);
} while(word_buffer_move_right(wb));
print_footer(output_file);
fclose(output_file);
print_footer(json_output_file);
fclose(json_output_file);
fclose(mcf_output_file);
}
else{ //ctx->root_dir is NULL dump everything to stdout
output_file = stdout;
print_header(output_file, ctx->mcd_struct, "");
json_output_file = stdout;
print_header(json_output_file, ctx->mcd_struct, "");
do{
w = word_buffer_b0(wb);
if(new_sentence){
......@@ -426,12 +451,12 @@ int main(int argc, char *argv[])
if(first_sentence == 1)
first_sentence = 0;
else
fprintf(output_file, ",");
fprintf(output_file, "\n");
print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
fprintf(json_output_file, ",");
fprintf(json_output_file, "\n");
print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word);
}
} while(word_buffer_move_right(wb));
print_footer(output_file);
print_footer(json_output_file);
}
mcf2json_context_free(ctx);
......
......@@ -119,13 +119,13 @@ void cff2fann(context *ctx)
mcd *m = ctx->mcd_struct;
FILE *f = myfopen(ctx->cff_filename, "r");
int val;
dico *vocab;
dico *d_perceptron_features;
char feature_type[64];
int feature_valindex;
int count = 0;
char *feat_str = NULL;
vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
/* printf("%d %d\n", 1, ctx->features_model->nbelem); */
......@@ -146,7 +146,7 @@ void cff2fann(context *ctx)
/* printf("\n"); */
printf("%d", val);
} else {
feat_str = dico_int2string(vocab, val);
feat_str = dico_int2string(d_perceptron_features, val);
if(feat_str){
/* printf("feat str = %s\n", feat_str); */
sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex);
......@@ -158,15 +158,14 @@ void cff2fann(context *ctx)
/* printf("representation = %d\n", m->representation[mcd_col]); */
if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
fprintf(stderr, "it is an embedding val = %d, file = %s\n", feature_valindex, m->filename[mcd_col]);
// fprintf(stderr, "it is an embedding val = %d, string = %s we_index = %d file = %s\n", feature_valindex, form, m->filename[mcd_col], we_index);
//int word_emb_get_code(word_emb *we, char *word)
/* word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); */
/* printf("\n"); */
printf("\t%d", feature_valindex);
} else if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){
/* printf("it is a vocab\n"); */
/* printf("it is a d_perceptron_features\n"); */
/* one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem); */
/* printf("\n"); */
printf("\t%d", feature_valindex);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment