Skip to content
Snippets Groups Projects
Select Git revision
  • 7aed132388c3435d809e2ae6b54b234bec20c10d
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
21 results

cff2fann.c

Blame
  • cff2fann.c 2.84 KiB
    #include<stdio.h>
    #include<stdlib.h>
    #include<string.h>
    #include<unistd.h>
    #include<getopt.h>
    #include"context.h"
    #include"util.h"
    #include"cf_file.h"
    
    
    void cff2fann_help_message(context *ctx)
    {
      context_general_help_message(ctx);
      context_mode_help_message(ctx);
      context_sent_nb_help_message(ctx);
    
      fprintf(stderr, "INPUT\n");
      context_conll_help_message(ctx);
      fprintf(stderr, "IN TEST MODE\n");
      context_vocabs_help_message(ctx);
    
      fprintf(stderr, "OUTPUT\n");
      context_cff_help_message(ctx);
      fprintf(stderr, "IN TRAIN MODE\n");
      context_vocabs_help_message(ctx);
    
    }
    
    void cff2fann_check_options(context *ctx)
    {
      if(!ctx->input_filename
         || ctx->help
         /* || !ctx->mcd_filename */
         /* || !(ctx->cff_filename || ctx->fann_filename) */
         ){
        cff2fann_help_message(ctx);
        exit(1);
      }
    }
    
    void one_hot_print(FILE *f, int val, int dim)
    {
      int i;
      for(i=0; i < dim; i++)
        fprintf(f, "%d ", (i == val)? 1  : 0);
    }
    
    void cff2fann(context *ctx)
    {
      char buffer[10000];
      char *token;
      int col_nb;
      int feat_type;
      mcd *m = ctx->mcd_struct;
      FILE *f = myfopen(ctx->input_filename, "r");
      int val;
      dico *vocab;
      
      while(fgets(buffer, 10000, f)){
        /* printf("%s", buffer); */
        /* printf("\n"); */
        token = strtok(buffer, "\t");
        col_nb = 0;
        while(token){
          /* printf("col = %d token = %s max = %d\n", col_nb, token, max_array[col_nb]); */
          val = atoi(token);
          vocab = dico_vec_get_dico(ctx->vocabs, "d_perceptron_features");
          printf("!!! %s\n", dico_int2string(vocab, val));
          if(col_nb == 0){
            one_hot_print(stderr, val, ctx->mvt_nb); 
            printf("\n");
          }
          else{
    	feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1);
    	/* printf("feat_type = %d\n", feat_type); */
    	int mcd_col = m->wf2col[feat_type];
    	/* printf("representation = %d\n", m->representation[mcd_col]); */
    	if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
    	  /* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */
    	  word_emb_print(stderr, m->word_emb_array[mcd_col], val);
    	  printf("\n");
    	}
    	if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){
    	  /* printf("it is a vocab\n"); */
    	  one_hot_print(stderr, val, m->dico_array[mcd_col]->nbelem); 
    	  printf("\n");
    	}
          }
          col_nb++;
          token = strtok(NULL , "\t");
        }
      }
    }
    
    int main(int argc, char *argv[])
    {
      context *ctx;
      int nb_feat;
      int nb_class;
    
      ctx = context_read_options(argc, argv);
      cff2fann_check_options(ctx);
    
      ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
    
      ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
    
      look_for_number_of_features_and_classes(ctx->input_filename, &nb_feat, &nb_class);
      ctx->mvt_nb = nb_class;
    
      mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, 1);
    
      printf("Coucou\n");
      
      cff2fann(ctx);
      return 0;
    }