diff --git a/maca_trans_parser/src/cff2fann.c b/maca_trans_parser/src/cff2fann.c index b9922662a9285ab52297d2e5e97b33d14b44822f..38336968a1d63fd7d81c14fb3e5b8076dfb1e78f 100644 --- a/maca_trans_parser/src/cff2fann.c +++ b/maca_trans_parser/src/cff2fann.c @@ -6,6 +6,7 @@ #include"context.h" #include"util.h" #include"cf_file.h" +#include"feat_lib.h" void cff2fann_help_message(context *ctx) @@ -45,6 +46,7 @@ void one_hot_print(FILE *f, int val, int dim) fprintf(f, "%d ", (i == val)? 1 : 0); } + void cff2fann(context *ctx) { char buffer[10000]; @@ -55,40 +57,49 @@ void cff2fann(context *ctx) FILE *f = myfopen(ctx->input_filename, "r"); int val; dico *vocab; + char feature_type[64]; + int feature_valindex; + int count = 0; + + vocab = dico_vec_get_dico(ctx->vocabs, "d_perceptron_features"); + + printf("%d %d\n", 1, ctx->features_model->nbelem); while(fgets(buffer, 10000, f)){ /* printf("%s", buffer); */ /* printf("\n"); */ token = strtok(buffer, "\t"); col_nb = 0; + if (count % 100 == 0) + fprintf(stderr, "%d\r", count); while(token){ /* printf("col = %d token = %s max = %d\n", col_nb, token, max_array[col_nb]); */ val = atoi(token); - vocab = dico_vec_get_dico(ctx->vocabs, "d_perceptron_features"); - printf("!!! %s\n", dico_int2string(vocab, val)); if(col_nb == 0){ - one_hot_print(stderr, val, ctx->mvt_nb); + one_hot_print(stdout, val, ctx->mvt_nb); printf("\n"); - } - else{ - feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1); - /* printf("feat_type = %d\n", feat_type); */ - int mcd_col = m->wf2col[feat_type]; - /* printf("representation = %d\n", m->representation[mcd_col]); */ - if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){ - /* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */ - word_emb_print(stderr, m->word_emb_array[mcd_col], val); - printf("\n"); - } - if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){ - /* printf("it is a vocab\n"); */ - one_hot_print(stderr, val, m->dico_array[mcd_col]->nbelem); - printf("\n"); - } + } else { + sscanf(dico_int2string(vocab, val), "%[^==]==%d", feature_type, &feature_valindex); + feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1); + /* printf("feat_type = %d\n", feat_type); */ + int mcd_col = m->wf2col[feat_type]; + /* printf("representation = %d\n", m->representation[mcd_col]); */ + if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){ + /* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */ + word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); + printf("\n"); + } else if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){ + /* printf("it is a vocab\n"); */ + one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem); + printf("\n"); + } else { + printf("%d\n", feature_valindex); + } } col_nb++; token = strtok(NULL , "\t"); } + count++; } } @@ -110,8 +121,6 @@ int main(int argc, char *argv[]) mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, 1); - printf("Coucou\n"); - cff2fann(ctx); return 0; }