Select Git revision
Jeremy Auguste authored
cff2fann.c 2.84 KiB
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"util.h"
#include"cf_file.h"
void cff2fann_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
}
void cff2fann_check_options(context *ctx)
{
if(!ctx->input_filename
|| ctx->help
/* || !ctx->mcd_filename */
/* || !(ctx->cff_filename || ctx->fann_filename) */
){
cff2fann_help_message(ctx);
exit(1);
}
}
void one_hot_print(FILE *f, int val, int dim)
{
int i;
for(i=0; i < dim; i++)
fprintf(f, "%d ", (i == val)? 1 : 0);
}
void cff2fann(context *ctx)
{
char buffer[10000];
char *token;
int col_nb;
int feat_type;
mcd *m = ctx->mcd_struct;
FILE *f = myfopen(ctx->input_filename, "r");
int val;
dico *vocab;
while(fgets(buffer, 10000, f)){
/* printf("%s", buffer); */
/* printf("\n"); */
token = strtok(buffer, "\t");
col_nb = 0;
while(token){
/* printf("col = %d token = %s max = %d\n", col_nb, token, max_array[col_nb]); */
val = atoi(token);
vocab = dico_vec_get_dico(ctx->vocabs, "d_perceptron_features");
printf("!!! %s\n", dico_int2string(vocab, val));
if(col_nb == 0){
one_hot_print(stderr, val, ctx->mvt_nb);
printf("\n");
}
else{
feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1);
/* printf("feat_type = %d\n", feat_type); */
int mcd_col = m->wf2col[feat_type];
/* printf("representation = %d\n", m->representation[mcd_col]); */
if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
/* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */
word_emb_print(stderr, m->word_emb_array[mcd_col], val);
printf("\n");
}
if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){
/* printf("it is a vocab\n"); */
one_hot_print(stderr, val, m->dico_array[mcd_col]->nbelem);
printf("\n");
}
}
col_nb++;
token = strtok(NULL , "\t");
}
}
}
int main(int argc, char *argv[])
{
context *ctx;
int nb_feat;
int nb_class;
ctx = context_read_options(argc, argv);
cff2fann_check_options(ctx);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
look_for_number_of_features_and_classes(ctx->input_filename, &nb_feat, &nb_class);
ctx->mvt_nb = nb_class;
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, 1);
printf("Coucou\n");
cff2fann(ctx);
return 0;
}