/*******************************************************************************
    Copyright (C) 2010 by Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
                      and Joseph Le Roux <joseph.le.roux@gmail.com>
    conll_lib is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    conll_lib is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with conll_lib. If not, see <http://www.gnu.org/licenses/>.
*******************************************************************************/

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"conll_lib.h"

int parse_line(FILE *f, sentence *s);

void renumber_sentence(sentence *s)
{
  int i;
  word *w;
  for(i=0 ; i < s->l; i++){
    s->words[i]->id = i;
  }
  for(i=0 ; i < s->l; i++){
    w = s->words[i];
    if(w->mother)
      w->head = w->mother->id;
    else
      w->head = 0;
  }
}


void reset_sentence(sentence *s)
{
  int i;
  for(i=0 ; i < s->l; i++){
    if(s->words[i]){
      free(s->words[i]);
      s->words[i] = NULL;
    }
  }
  s->words[0] = allocate_word(0, "ROOT", "ROOT", "ROOT", "ROOT", "ROOT", -1, "ROOT");
  s->l = 1;

}

void free_sentence(sentence *s)
{
  int i;
  for(i=0 ; i < s->l; i++){
    if(s->words[i]){
      /* free(s->words[i]); */
    }
  }
  free(s);
}

word *copy_word(word *w){
  return allocate_word(w->id, w->form, w->lemma, w->cpostag, w->postag, w->feats, w->head, w->deprel);
}

word *allocate_word(unsigned id, char *form, char *lemma, char *cpostag, char *postag, char *feats, unsigned head, char *deprel)

{
  word *w = malloc(sizeof(word));

  w->id = id;
  strcpy(w->form, form);
  strcpy(w->lemma, lemma);
  strcpy(w->cpostag, cpostag);
  strcpy(w->postag, postag);
  strcpy(w->feats, feats);
  w->head = head;
  strcpy(w->deprel, deprel);
  w->mother = NULL;
  w->daughters_nb = 0;
  return w;
}

sentence *allocate_sentence(void)
{
  sentence *s;
  int i;

  s = malloc(sizeof(sentence));
  if(s == NULL){
    fprintf(stderr, "cannot allocate sentence\n");
    exit(1);
  }

  s->num = INCORRECT_SENTENCE_NUM_VALUE;
  s->l = 0;
  for(i=0; i < MAX_WORDS_IN_SENTENCE; i++){
    s->words[i] = NULL;
  }
  return s;
}

int load_sentence(FILE *f, sentence *s)
{
  int res;
  int i;
  if(feof(f)) return 0;

  reset_sentence(s);

  for(res = parse_line(f, s); res; res = parse_line(f, s));
  
  /* read an 'empty' parse (two succeding cr) */
  if(s->l == 0) return 0;
  
  /* build the tree structure */
  s->words[0]->mother = NULL;
  for(i=1; i < s->l; ++i){
    if((s->words[i]->head >= 0) && (s->words[i]->head <= s->l)){ /* check that head attribute is not out of range */
      add_daughter(s->words[i], s->words[s->words[i]->head]);
    }
  }
  
  return 1;
}



/*----------------------------------------------------------------------------*/
int parse_line(FILE *f, sentence *s)
{
  char buff[MAX_LINE_LENGTH];
  word *w;
  char head_str[100];

  if(feof(f)) return 0;

  if (fgets(buff, MAX_LINE_LENGTH, f) == NULL) {
    //    fprintf(stderr, "cannot read file: empty ?");
    return 0;
  }

  /* read an empty line */
  if(buff[0] == '\n'){
    /* printf("\n"); */
    return 0;
  }

  s->words[s->l] = w = malloc(sizeof(word));
  w->daughters_nb = 0;
  s->l++;
  
  if(s->l < MAX_WORDS_IN_SENTENCE){
    /* read a dependency description */
    
    
    /* 1	A	a	_	DT	_	3	det	_	_ */
    /* 2	severe	severe	_	JJ	_	3	amod	_	_ */
    /* 3	storm	storm	_	NN	_	4	nsubj	_	_ */
    /* 4	swept	sweep	_	VBD	_	26	ccomp	_	_ */
    /* 5	through	through	_	IN	_	4	prep	_	_ */
     
    /* sscanf(buff, "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel); */
    sscanf(buff, "%d\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel);
    /*       printf("form = %s\n", w->form);
    printf("lemma = %s\n", w->lemma);
    printf("cpostag = %s\n", w->cpostag);
    printf("postag = %s\n", w->postag);
    printf("feats = %s\n", w->feats);
    printf("head_str = %s\n", head_str);*/
    
    /* w->cpostag[0] = w->postag[0]; */
    if(strcmp(head_str, "_")){
      w->head = atoi(head_str);
      if(w->head == 0) s->root = w;
    }
    
  }    
  
  return 1;
}

void print_sentence_mcf2(sentence *s, int print_id, int print_form, int print_lemma, int print_cpostag, int print_postag, int print_feats, int print_head, int print_deprel)
{
  int i;
  word *w;

  if((s->l == 1) || (s->l == 0)) return;
  for(i=1; i<s->l; i++){
    w = s->words[i];
    if(print_id)
      printf("%d\t", w->id);
    if(print_form)
      printf("%s\t", w->form);
    if(print_lemma)
      printf("%s\t", w->lemma);
    if(print_cpostag)
      printf("%s\t", w->cpostag);
    if(print_postag)
      printf("%s\t", w->postag);
    if(print_feats)
      printf("%s\t", w->feats);
    if(print_head)
      printf("%d\t", w->head);
    if(print_deprel)
      printf("%s\t", w->deprel);
    if(i == s->l - 1)
      fprintf(stdout, "1\n");
    else
      fprintf(stdout, "0\n");
  }
}

void print_sentence_mcf3(sentence *s, char *columns, int nb_col)
{
  int i,j;
  word *w;

  if((s->l == 1) || (s->l == 0)) return;
  for(i=1; i<s->l; i++){
    w = s->words[i];
    for(j=0; j < nb_col; j++)
      switch(columns[j]){
      case 'I':
	printf("%d\t", w->id);
	break;
      case 'W':
	printf("%s\t", w->form);
	break;
      case 'L':
	printf("%s\t", w->lemma);
	break;
      case 'C':
	printf("%s\t", w->cpostag);
	break;
      case 'P':
	printf("%s\t", w->postag);
	break;
      case 'F':
	printf("%s\t", w->feats);
	break;
      case 'H':
	printf("%d\t", w->head);
	break;
      case 'D':
	printf("%s\t", w->deprel);
	break;
      }
    if(i == s->l - 1)
      fprintf(stdout, "1\n");
    else
      fprintf(stdout, "0\n");
  }
}

void print_sentence_mcf(sentence *s, int coarse_pos)
{
  int i;
  word *w;

  if((s->l == 1) || (s->l == 0)) return;

  for(i=1; i<s->l; i++){
    w = s->words[i];
    /* fprintf(stdout, "%d", w->id); */
    fprintf(stdout, "%s", w->form);
    if(coarse_pos)
      fprintf(stdout, "\t%s", w->cpostag);
    else
      fprintf(stdout, "\t%s", w->postag);
    fprintf(stdout, "\t%s", w->lemma);
    if(w->mother == NULL)
      fprintf(stdout, "\t0");
    else
      fprintf(stdout, "\t%d", w->mother->id - w->id);
    fprintf(stdout, "\t%s", w->deprel);
    if(i == s->l - 1)
      fprintf(stdout, "\t1");
    else
      fprintf(stdout, "\t0");
    fprintf(stdout, "\n");
  }
}


void print_sentence(sentence *s)
{
  int i;
  word *w;

  if((s->l == 1) || (s->l == 0)) return;

  for(i=1; i<s->l; i++){
    w = s->words[i];
    fprintf(stdout, "%d", w->id);
    fprintf(stdout, "\t%s", w->form);
    fprintf(stdout, "\t%s", w->lemma);
    fprintf(stdout, "\t%s", w->cpostag);
    fprintf(stdout, "\t%s", w->postag);
    fprintf(stdout, "\t%s", w->feats);
    if(w->mother == NULL)
      fprintf(stdout, "\t0");
    else
      fprintf(stdout, "\t%d", w->mother->id);
    fprintf(stdout, "\t%s", w->deprel);
    fprintf(stdout, "\t_\t_\n");
    
  }
  printf("\n");

}

void compact_sentence(sentence *s)
{
  int i,j;
  for(i=0; i < s->l; i++){
    if(s->words[i] == NULL){
      for(j = i; j < s->l - 1; j++){
	s->words[j] = s->words[j+1];
      }
      i--;
      s->l--;
    }
  }
}

void add_daughter(word *daughter, word *mother)
{
  if(daughter){
    if(mother){
      daughter->mother = mother;
      mother->daughters[mother->daughters_nb] = daughter;
      mother->daughters_nb++;
    } 
    else{
      daughter->mother = NULL;
    }
  }
}

void remove_daughter(sentence *s, int i)
{
  int j,k;
  word *dep = s->words[i];
  word *gov;
  if(dep){
    gov = dep->mother;
    if(gov){
      for(j=0; j < gov->daughters_nb; j++){
	if(gov->daughters[j] == dep){
	  for(k=j; k < gov->daughters_nb - 1; k++){
	    gov->daughters[k] = gov->daughters[k+1]; 
	  }
	  gov->daughters_nb--;
	}
      }
    }
  }
}

void remove_word_rec(sentence *s, int i)
{
  int j;
  word *w = s->words[i];

  for(j=1; j < s->l; j++){
    if((s->words[j]) && (s->words[j]->mother == w))
      remove_word_rec(s, j);
  }
  remove_daughter(s, i);
  free(w);
  s->words[i] = NULL;
}

void remove_subtree(sentence *s, int root)
{
  remove_word_rec(s, root);
  compact_sentence(s);
}

void add_word(sentence *s, word *w, int index, word *gov)
{
  int i;
  if(s->words[index] != NULL){
    for(i=s->l; i>index; i--){
      s->words[i] = s->words[i-1];
    }
    s->l++;
  }
  s->words[index] = w;
  if(index >= s->l) s->l = index+1;
  if(gov != NULL)
    add_daughter(w, gov);
}
 
void split_node_in_two(sentence *s, int index, word *gov, word *dep, int index_gov, int index_dep)
{
  int i;
  word *w = s->words[index];
  word *mother = w->mother;

  strcpy(gov->deprel, w->deprel); 
  for(i=1; i < s->l; i++){
    if(s->words[i]->mother == w)
      add_daughter(s->words[i], gov);
  }
  free(w);
  s->words[index] = NULL;
  add_word(s, gov, index_gov, mother);
  add_word(s, dep, index_dep, gov); 
} 


/*---------------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------------*/

void change_cpos(sentence *s, hash_str *h_cpos)
{
  int i;
  word *w;
  char *val;

  for(i=1; i<s->l; i++){
    w = s->words[i];

    val = hash_str_get_val (h_cpos, w->cpostag);
    if(val){
      strcpy(w->cpostag, val);
    }
    else{
      fprintf(stderr, "ATTENTION: cpos %s inconnue\n", w->cpostag);
    }
  }

}

/*---------------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------------*/
void change_pos(sentence *s, hash_str *h_pos)
{
  int i;
  word *w;
  char *val;

  for(i=1; i<s->l; i++){
    w = s->words[i];
    
    val = hash_str_get_val (h_pos, w->postag);
    if(val){
      strcpy(w->postag, val);
    }
    else{
      fprintf(stderr, "ATTENTION: pos %s inconnue\n", w->cpostag);
    }
  }

}

/*---------------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------------*/



void change_fct(sentence *s, hash_str *h_fct)
{
  int i;
  word *w;
  char *val;

  for(i=1; i<s->l; i++){
    w = s->words[i];

    val = hash_str_get_val (h_fct, w->deprel);
    if(val){
      strcpy(w->deprel, val);
    }
    else{
      fprintf(stderr, "ATTENTION: fct %s inconnue\n", w->deprel);
    }
  }

}

int is_num(char *s)
{
  int i;
  int l;
  if(s == NULL) return 0;
  l = strlen(s);
  if((l == 1) && (s[0] == ',')) return 0;
  for(i=0; i <l; i++)
    if(((s[i] < '0') || (s[i] > '9')) && (s[i] != ','))
      return 0;
  return 1;
} 


void renumber_sentence_offset(sentence *s, int offset)
{
  int i;
  for(i=0 ; i < s->l; i++){
    s->words[i]->id = i + offset;
  }
}