Working version of cff2fann

22a16f02 · Jeremy Auguste · 173c0056 · 22a16f02
Commit 22a16f02 authored Jan 11, 2017 by Jeremy Auguste
--- a/maca_trans_parser/src/cff2fann.c
+++ b/maca_trans_parser/src/cff2fann.c
@@ -6,6 +6,7 @@
 #include"context.h"
 #include"util.h"
 #include"cf_file.h"
+#include"feat_lib.h"
 void cff2fann_help_message(context *ctx)
@@ -45,6 +46,7 @@ void one_hot_print(FILE *f, int val, int dim)
    fprintf(f, "%d ", (i == val)? 1  : 0);
 }
 void cff2fann(context *ctx)
 {
  char buffer[10000];
@@ -55,40 +57,49 @@ void cff2fann(context *ctx)
  FILE *f = myfopen(ctx->input_filename, "r");
  int val;
  dico *vocab;
+  char feature_type[64];
+  int feature_valindex;
+  int count = 0;
+  vocab = dico_vec_get_dico(ctx->vocabs, "d_perceptron_features");
+  printf("%d %d\n", 1, ctx->features_model->nbelem);
  while(fgets(buffer, 10000, f)){
    /* printf("%s", buffer); */
    /* printf("\n"); */
    token = strtok(buffer, "\t");
    col_nb = 0;
+    if (count % 100 == 0)
+      fprintf(stderr, "%d\r", count);
    while(token){
      /* printf("col = %d token = %s max = %d\n", col_nb, token, max_array[col_nb]); */
      val = atoi(token);
-      vocab = dico_vec_get_dico(ctx->vocabs, "d_perceptron_features");
-      printf("!!! %s\n", dico_int2string(vocab, val));
      if(col_nb == 0){
-        one_hot_print(stderr, val, ctx->mvt_nb); 
+        one_hot_print(stdout, val, ctx->mvt_nb); 
        printf("\n");
-      }
+      } else {
-      else{
+        sscanf(dico_int2string(vocab, val), "%[^==]==%d", feature_type, &feature_valindex);
        feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1);
        /* printf("feat_type = %d\n", feat_type); */
        int mcd_col = m->wf2col[feat_type];
        /* printf("representation = %d\n", m->representation[mcd_col]); */
        if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
          /* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */
-	  word_emb_print(stderr, m->word_emb_array[mcd_col], val);
+          word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex);
          printf("\n");
-	}
+        } else if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){
-	if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){
          /* printf("it is a vocab\n"); */
-	  one_hot_print(stderr, val, m->dico_array[mcd_col]->nbelem); 
+          one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem); 
          printf("\n");
+        } else {
+          printf("%d\n", feature_valindex);
        }
      }
      col_nb++;
      token = strtok(NULL , "\t");
    }
+    count++;
  }
 }
@@ -110,8 +121,6 @@ int main(int argc, char *argv[])
  mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, 1);
-  printf("Coucou\n");
  cff2fann(ctx);
  return 0;
 }