Skip to content
Snippets Groups Projects
Commit 723cafa9 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

fixed a bug in cff2fann

parent 68ee1829
No related branches found
No related tags found
No related merge requests found
...@@ -311,6 +311,17 @@ void print_tokens(FILE *output_file, word_buffer *wb, int index_first_word, int ...@@ -311,6 +311,17 @@ void print_tokens(FILE *output_file, word_buffer *wb, int index_first_word, int
fprintf(output_file," ],\n"); fprintf(output_file," ],\n");
} }
void mcf_print_sentence(FILE *output_file, word_buffer *wb, int index_first_word, int index_last_word)
{
int index;
word *w = NULL;
for(index = index_first_word; index <= index_last_word; index++){
w = word_buffer_get_word_n(wb, index);
fprintf(output_file, "%s\n", w->input);
}
}
void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int index_first_word, int index_last_word) void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int index_first_word, int index_last_word)
{ {
...@@ -324,7 +335,8 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind ...@@ -324,7 +335,8 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
FILE *output_file = NULL; FILE *json_output_file = NULL;
FILE *mcf_output_file = NULL;
context *ctx = mcf2json_context_read_options(argc, argv); context *ctx = mcf2json_context_read_options(argc, argv);
word_buffer *wb = NULL; word_buffer *wb = NULL;
word *w = NULL; word *w = NULL;
...@@ -337,9 +349,10 @@ int main(int argc, char *argv[]) ...@@ -337,9 +349,10 @@ int main(int argc, char *argv[])
char current_file[1000]; char current_file[1000];
char previous_directory[1000]; char previous_directory[1000];
char previous_file[1000]; char previous_file[1000];
char filename_for_header[1000]; char json_output_filename_for_header[1000];
char *root_directory = NULL; char *root_directory = NULL;
char destination_file[1000]; char json_output_filename[1000];
char mcf_output_filename[1000];
char destination_dir[1000]; char destination_dir[1000];
struct stat st = {0}; struct stat st = {0};
...@@ -369,21 +382,31 @@ int main(int argc, char *argv[]) ...@@ -369,21 +382,31 @@ int main(int argc, char *argv[])
} }
} }
if(strcmp(current_file, previous_file)){ if(strcmp(current_file, previous_file)){
strcpy(destination_file, destination_dir); strcpy(json_output_filename, destination_dir);
strcat(destination_file, "/"); strcat(json_output_filename, "/");
strcat(destination_file, current_file); strcat(json_output_filename, current_file);
strcat(destination_file, ".json"); strcat(json_output_filename, ".json");
fprintf(stderr, "creating file %s\n", destination_file); fprintf(stderr, "creating file %s\n", json_output_filename);
if(output_file){
print_footer(output_file); strcpy(mcf_output_filename, destination_dir);
fclose(output_file); strcat(mcf_output_filename, "/");
} strcat(mcf_output_filename, current_file);
output_file = myfopen_no_exit(destination_file, "w"); strcat(mcf_output_filename, ".mcf");
strcpy(filename_for_header, current_directory); fprintf(stderr, "creating file %s\n", mcf_output_filename);
strcat(filename_for_header, "/");
strcat(filename_for_header, current_file);
strcat(filename_for_header, ".json"); if(json_output_file){
print_header(output_file, ctx->mcd_struct, filename_for_header); print_footer(json_output_file);
fclose(json_output_file);
fclose(mcf_output_file);
}
mcf_output_file = myfopen_no_exit(mcf_output_filename, "w");
json_output_file = myfopen_no_exit(json_output_filename, "w");
strcpy(json_output_filename_for_header, current_directory);
strcat(json_output_filename_for_header, "/");
strcat(json_output_filename_for_header, current_file);
strcat(json_output_filename_for_header, ".json");
print_header(json_output_file, ctx->mcd_struct, json_output_filename_for_header);
first_sentence = 1; first_sentence = 1;
} }
if(new_sentence){ if(new_sentence){
...@@ -398,20 +421,22 @@ int main(int argc, char *argv[]) ...@@ -398,20 +421,22 @@ int main(int argc, char *argv[])
if(first_sentence == 1) if(first_sentence == 1)
first_sentence = 0; first_sentence = 0;
else else
fprintf(output_file, ","); fprintf(json_output_file, ",");
fprintf(output_file, "\n"); fprintf(json_output_file, "\n");
print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word); print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word);
mcf_print_sentence(mcf_output_file, wb, index_first_word, index_last_word);
} }
strcpy(previous_file, current_file); strcpy(previous_file, current_file);
strcpy(previous_directory, current_directory); strcpy(previous_directory, current_directory);
} while(word_buffer_move_right(wb)); } while(word_buffer_move_right(wb));
print_footer(output_file); print_footer(json_output_file);
fclose(output_file); fclose(json_output_file);
fclose(mcf_output_file);
} }
else{ //ctx->root_dir is NULL dump everything to stdout else{ //ctx->root_dir is NULL dump everything to stdout
output_file = stdout; json_output_file = stdout;
print_header(output_file, ctx->mcd_struct, ""); print_header(json_output_file, ctx->mcd_struct, "");
do{ do{
w = word_buffer_b0(wb); w = word_buffer_b0(wb);
if(new_sentence){ if(new_sentence){
...@@ -426,12 +451,12 @@ int main(int argc, char *argv[]) ...@@ -426,12 +451,12 @@ int main(int argc, char *argv[])
if(first_sentence == 1) if(first_sentence == 1)
first_sentence = 0; first_sentence = 0;
else else
fprintf(output_file, ","); fprintf(json_output_file, ",");
fprintf(output_file, "\n"); fprintf(json_output_file, "\n");
print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word); print_sentence(json_output_file, sentence_nb, wb, index_first_word, index_last_word);
} }
} while(word_buffer_move_right(wb)); } while(word_buffer_move_right(wb));
print_footer(output_file); print_footer(json_output_file);
} }
mcf2json_context_free(ctx); mcf2json_context_free(ctx);
......
...@@ -119,13 +119,13 @@ void cff2fann(context *ctx) ...@@ -119,13 +119,13 @@ void cff2fann(context *ctx)
mcd *m = ctx->mcd_struct; mcd *m = ctx->mcd_struct;
FILE *f = myfopen(ctx->cff_filename, "r"); FILE *f = myfopen(ctx->cff_filename, "r");
int val; int val;
dico *vocab; dico *d_perceptron_features;
char feature_type[64]; char feature_type[64];
int feature_valindex; int feature_valindex;
int count = 0; int count = 0;
char *feat_str = NULL; char *feat_str = NULL;
vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
/* printf("%d %d\n", 1, ctx->features_model->nbelem); */ /* printf("%d %d\n", 1, ctx->features_model->nbelem); */
...@@ -146,7 +146,7 @@ void cff2fann(context *ctx) ...@@ -146,7 +146,7 @@ void cff2fann(context *ctx)
/* printf("\n"); */ /* printf("\n"); */
printf("%d", val); printf("%d", val);
} else { } else {
feat_str = dico_int2string(vocab, val); feat_str = dico_int2string(d_perceptron_features, val);
if(feat_str){ if(feat_str){
/* printf("feat str = %s\n", feat_str); */ /* printf("feat str = %s\n", feat_str); */
sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex); sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex);
...@@ -158,15 +158,14 @@ void cff2fann(context *ctx) ...@@ -158,15 +158,14 @@ void cff2fann(context *ctx)
/* printf("representation = %d\n", m->representation[mcd_col]); */ /* printf("representation = %d\n", m->representation[mcd_col]); */
if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){ if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
fprintf(stderr, "it is an embedding val = %d, file = %s\n", feature_valindex, m->filename[mcd_col]); // fprintf(stderr, "it is an embedding val = %d, string = %s we_index = %d file = %s\n", feature_valindex, form, m->filename[mcd_col], we_index);
//int word_emb_get_code(word_emb *we, char *word)
/* word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); */ /* word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); */
/* printf("\n"); */ /* printf("\n"); */
printf("\t%d", feature_valindex); printf("\t%d", feature_valindex);
} else if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){ } else if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){
/* printf("it is a vocab\n"); */ /* printf("it is a d_perceptron_features\n"); */
/* one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem); */ /* one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem); */
/* printf("\n"); */ /* printf("\n"); */
printf("\t%d", feature_valindex); printf("\t%d", feature_valindex);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment