#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"movement_parser.h"
#include"oracle_parser.h"
#include"feat_fct.h"
#include"context.h"
#include"feat_vec.h"
#include"dico_vec.h"
#include"word_emb.h"
#include"config2feat_vec.h"

void maca_trans_parser_conll2cff_help_message(context *ctx)
{
  context_general_help_message(ctx);
  context_mode_help_message(ctx);
  context_sent_nb_help_message(ctx);

  fprintf(stderr, "INPUT\n");
  context_conll_help_message(ctx);
  fprintf(stderr, "IN TEST MODE\n");
  context_vocabs_help_message(ctx);

  fprintf(stderr, "OUTPUT\n");
  context_cff_help_message(ctx);
  fprintf(stderr, "IN TRAIN MODE\n");
  context_vocabs_help_message(ctx);

}

void maca_trans_parser_conll2cff_check_options(context *ctx)
{
  if(!ctx->input_filename
     || ctx->help
     /* || !ctx->mcd_filename */
     /* || !(ctx->cff_filename || ctx->fann_filename) */
     ){
    maca_trans_parser_conll2cff_help_message(ctx);
    exit(1);
  }
}

void generate_training_file_stream(FILE *output_file, context *ctx)
{
  config *c;
  int mvt_code;
  char mvt_type;
  int mvt_label;
  feat_vec *fv = feat_vec_new(feature_types_nb);
  sentence *ref = NULL;
  int sentence_nb = 0;
  /* int root_label = dico_string2int(mcd_get_dico_label(ctx->mcd_struct), (char *) ctx->root_label); */
  int root_label = dico_string2int(ctx->dico_labels, (char *) ctx->root_label);
  FILE *conll_file = myfopen(ctx->input_filename, "r");
  FILE *conll_file_ref = myfopen(ctx->input_filename, "r");

  c = config_initial(conll_file, ctx->mcd_struct, 5);
  
  while((ref = sentence_read(conll_file_ref , ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ 
    /* sentence_print(stdout, ref, ctx->dico_labels);  */
    while(1){
         /* config_print(stdout,c);       */
      config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
      
      /* feat_vec_print(stdout, fv);    */
      
      mvt_code = oracle_parser(c, ref);
      
      mvt_type = movement_type(mvt_code);
      mvt_label = movement_label(mvt_code);

      /* printf("mvt code = %d\n", mvt_code); */
       /* movement_print(stdout, mvt_code, ctx->dico_labels);   */
      
      fprintf(output_file, "%d", mvt_code);
      feat_vec_print(output_file, fv);
      
      if(queue_is_empty(c->bf)) break;
      
      if((mvt_type == MVT_RIGHT) && (mvt_label == root_label)){       /* sentence is complete */
	
	/* create the root arc */
	movement_right_arc(c, mvt_label, 0);
	
	/* shift dummy word in stack */
	movement_shift(c, 1, 0);

	/*	printf("sentence complete config : ");
		config_print(stdout,c);   */
	
	/* empty depset */
	depset_free(c->ds);
	c->ds = depset_new();
	sentence_free(ref);
	sentence_nb++;

	c->current_index = queue_renumber_words(c->bf);
	
	break;
      }
      
      if(mvt_type == MVT_LEFT){
	movement_left_arc(c, mvt_label, 0);
	continue;
      }
      if(mvt_type == MVT_RIGHT){
	movement_right_arc(c, mvt_label, 0);
	continue;
      }
      if(mvt_type == MVT_SHIFT){
	movement_shift(c, 1, 0);
	continue;
      }
    }
  } 
}

void generate_training_file_buffer(FILE *output_file, context *ctx)
{  
  config *c;
  int mvt_code;
  char mvt_type;
  int mvt_label;
  feat_vec *fv = feat_vec_new(feature_types_nb);
  sentence *ref = NULL;
  int sentence_nb = 0;
  FILE *conll_file = myfopen(ctx->input_filename, "r");
  FILE *conll_file_ref = myfopen(ctx->input_filename, "r");

  c = config_initial(conll_file, ctx->mcd_struct, 0);

  while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ 
      /* sentence_print(stdout, ref, NULL);    */
    queue_read_sentence(c->bf, conll_file, ctx->mcd_struct);
    while(!config_is_terminal(c)){
      /* config_print(stdout,c);     */
      
      config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
      
      mvt_code = oracle_parser(c, ref);
      
      mvt_type = movement_type(mvt_code);
      mvt_label = movement_label(mvt_code);

      /* printf("mvt type = %d mvt label = %d\n", mvt_type, mvt_label); */

      fprintf(output_file, "%d", mvt_code);
      feat_vec_print(output_file, fv);
      
      if(mvt_type == MVT_LEFT){
	movement_left_arc(c, mvt_label, 0);
	continue;
      }
      if(mvt_type == MVT_RIGHT){
	movement_right_arc(c, mvt_label, 0);
	continue;
      }
      if(mvt_type == MVT_SHIFT){
	movement_shift(c, 0, 0);
	continue;
      }
    }
    config_free(c); 
    c = config_initial(conll_file, ctx->mcd_struct, 0);
    sentence_nb++;
  }
}

int main(int argc, char *argv[])
{
  context *ctx;
  FILE *output_file;
  
  ctx = context_read_options(argc, argv);
  maca_trans_parser_conll2cff_check_options(ctx);

  ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
  
  if(ctx->mode == TRAIN_MODE){
    mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->input_filename);
    ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
  }
  else if(ctx->mode == TEST_MODE){
    ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
    mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
  }

 /* dico_vec_print(NULL, ctx->vocabs); */
  
  ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");

  
  if(ctx->dico_labels == NULL){
    fprintf(stderr, "cannot find label names\n");
    return 1;
  }
  ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1;
    
  feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);

  
  /* in train mode create feature dictionnary for perceptron */
  if(ctx->mode == TRAIN_MODE)
    ctx->d_perceptron_features = dico_new((char *)"d_perceptron_features", 10000000);
  
  /* in test mode read feature dictionnary for perceptron */
  if(ctx->mode == TEST_MODE)
    ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
  
  /* add the feature dictionnary to the dico vector */
  dico_vec_add(ctx->vocabs, ctx->d_perceptron_features);
  
  /* open output file */
  if(ctx->cff_filename)
    output_file = myfopen(ctx->cff_filename, "w");
  else
    output_file = stdout;




  
  if(ctx->stream_mode)
    generate_training_file_stream(output_file, ctx);
  else
    generate_training_file_buffer(output_file, ctx);
  
  if(ctx->mode == TRAIN_MODE){
    /* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */
    dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
    
  }
  
  if(ctx->cff_filename)
    fclose(output_file);
  context_free(ctx);
  return 0;
}