Skip to content
Snippets Groups Projects
Select Git revision
  • 4c87a9822a50dbb422111179a03b3527a9e9d0f0
  • master default protected
  • tania
3 results

Moves.py

Blame
  • conll_lib.c 10.97 KiB
    /*******************************************************************************
        Copyright (C) 2010 by Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
                          and Joseph Le Roux <joseph.le.roux@gmail.com>
        conll_lib is free software: you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
        the Free Software Foundation, either version 3 of the License, or
        (at your option) any later version.
    
        conll_lib is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU General Public License for more details.
    
        You should have received a copy of the GNU General Public License
        along with conll_lib. If not, see <http://www.gnu.org/licenses/>.
    *******************************************************************************/
    
    #include<stdio.h>
    #include<stdlib.h>
    #include<string.h>
    #include"conll_lib.h"
    
    int parse_line(FILE *f, sentence *s);
    
    void renumber_sentence(sentence *s)
    {
      int i;
      word *w;
      for(i=0 ; i < s->l; i++){
        s->words[i]->id = i;
      }
      for(i=0 ; i < s->l; i++){
        w = s->words[i];
        if(w->mother)
          w->head = w->mother->id;
        else
          w->head = 0;
      }
    }
    
    
    void reset_sentence(sentence *s)
    {
      int i;
      for(i=0 ; i < s->l; i++){
        if(s->words[i]){
          free(s->words[i]);
          s->words[i] = NULL;
        }
      }
      s->words[0] = allocate_word(0, "ROOT", "ROOT", "ROOT", "ROOT", "ROOT", -1, "ROOT");
      s->l = 1;
    
    }
    
    void free_sentence(sentence *s)
    {
      int i;
      for(i=0 ; i < s->l; i++){
        if(s->words[i]){
          /* free(s->words[i]); */
        }
      }
      free(s);
    }
    
    word *copy_word(word *w){
      return allocate_word(w->id, w->form, w->lemma, w->cpostag, w->postag, w->feats, w->head, w->deprel);
    }
    
    word *allocate_word(unsigned id, char *form, char *lemma, char *cpostag, char *postag, char *feats, unsigned head, char *deprel)
    
    {
      word *w = malloc(sizeof(word));
    
      w->id = id;
      strcpy(w->form, form);
      strcpy(w->lemma, lemma);
      strcpy(w->cpostag, cpostag);
      strcpy(w->postag, postag);
      strcpy(w->feats, feats);
      w->head = head;
      strcpy(w->deprel, deprel);
      w->mother = NULL;
      w->daughters_nb = 0;
      return w;
    }
    
    sentence *allocate_sentence(void)
    {
      sentence *s;
      int i;
    
      s = malloc(sizeof(sentence));
      if(s == NULL){
        fprintf(stderr, "cannot allocate sentence\n");
        exit(1);
      }
    
      s->num = INCORRECT_SENTENCE_NUM_VALUE;
      s->l = 0;
      for(i=0; i < MAX_WORDS_IN_SENTENCE; i++){
        s->words[i] = NULL;
      }
      return s;
    }
    
    int load_sentence(FILE *f, sentence *s)
    {
      int res;
      int i;
      if(feof(f)) return 0;
    
      reset_sentence(s);
    
      for(res = parse_line(f, s); res; res = parse_line(f, s));
      
      /* read an 'empty' parse (two succeding cr) */
      if(s->l == 0) return 0;
      
      /* build the tree structure */
      s->words[0]->mother = NULL;
      for(i=1; i < s->l; ++i){
        if((s->words[i]->head >= 0) && (s->words[i]->head <= s->l)){ /* check that head attribute is not out of range */
          add_daughter(s->words[i], s->words[s->words[i]->head]);
        }
      }
      
      return 1;
    }
    
    
    
    /*----------------------------------------------------------------------------*/
    int parse_line(FILE *f, sentence *s)
    {
      char buff[MAX_LINE_LENGTH];
      word *w;
      char head_str[100];
    
      if(feof(f)) return 0;
    
      if (fgets(buff, MAX_LINE_LENGTH, f) == NULL) {
        //    fprintf(stderr, "cannot read file: empty ?");
        return 0;
      }
    
      /* read an empty line */
      if(buff[0] == '\n'){
        /* printf("\n"); */
        return 0;
      }
    
      s->words[s->l] = w = malloc(sizeof(word));
      w->daughters_nb = 0;
      s->l++;
      
      if(s->l < MAX_WORDS_IN_SENTENCE){
        /* read a dependency description */
        
        
        /* 1	A	a	_	DT	_	3	det	_	_ */
        /* 2	severe	severe	_	JJ	_	3	amod	_	_ */
        /* 3	storm	storm	_	NN	_	4	nsubj	_	_ */
        /* 4	swept	sweep	_	VBD	_	26	ccomp	_	_ */
        /* 5	through	through	_	IN	_	4	prep	_	_ */
         
        /* sscanf(buff, "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel); */
        sscanf(buff, "%d\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel);
        /*       printf("form = %s\n", w->form);
        printf("lemma = %s\n", w->lemma);
        printf("cpostag = %s\n", w->cpostag);
        printf("postag = %s\n", w->postag);
        printf("feats = %s\n", w->feats);
        printf("head_str = %s\n", head_str);*/
        
        /* w->cpostag[0] = w->postag[0]; */
        if(strcmp(head_str, "_")){
          w->head = atoi(head_str);
          if(w->head == 0) s->root = w;
        }
        
      }    
      
      return 1;
    }
    
    void print_sentence_mcf2(sentence *s, int print_id, int print_form, int print_lemma, int print_cpostag, int print_postag, int print_feats, int print_head, int print_deprel)
    {
      int i;
      word *w;
    
      if((s->l == 1) || (s->l == 0)) return;
      for(i=1; i<s->l; i++){
        w = s->words[i];
        if(print_id)
          printf("%d\t", w->id);
        if(print_form)
          printf("%s\t", w->form);
        if(print_lemma)
          printf("%s\t", w->lemma);
        if(print_cpostag)
          printf("%s\t", w->cpostag);
        if(print_postag)
          printf("%s\t", w->postag);
        if(print_feats)
          printf("%s\t", w->feats);
        if(print_head)
          printf("%d\t", w->head);
        if(print_deprel)
          printf("%s\t", w->deprel);
        if(i == s->l - 1)
          fprintf(stdout, "1\n");
        else
          fprintf(stdout, "0\n");
      }
    }
    
    void print_sentence_mcf3(sentence *s, char *columns, int nb_col)
    {
      int i,j;
      word *w;
    
      if((s->l == 1) || (s->l == 0)) return;
      for(i=1; i<s->l; i++){
        w = s->words[i];
        for(j=0; j < nb_col; j++)
          switch(columns[j]){
          case 'I':
    	printf("%d\t", w->id);
    	break;
          case 'W':
    	printf("%s\t", w->form);
    	break;
          case 'L':
    	printf("%s\t", w->lemma);
    	break;
          case 'C':
    	printf("%s\t", w->cpostag);
    	break;
          case 'P':
    	printf("%s\t", w->postag);
    	break;
          case 'F':
    	printf("%s\t", w->feats);
    	break;
          case 'H':
    	printf("%d\t", w->head);
    	break;
          case 'D':
    	printf("%s\t", w->deprel);
    	break;
          }
        if(i == s->l - 1)
          fprintf(stdout, "1\n");
        else
          fprintf(stdout, "0\n");
      }
    }
    
    void print_sentence_mcf(sentence *s, int coarse_pos)
    {
      int i;
      word *w;
    
      if((s->l == 1) || (s->l == 0)) return;
    
      for(i=1; i<s->l; i++){
        w = s->words[i];
        /* fprintf(stdout, "%d", w->id); */
        fprintf(stdout, "%s", w->form);
        if(coarse_pos)
          fprintf(stdout, "\t%s", w->cpostag);
        else
          fprintf(stdout, "\t%s", w->postag);
        fprintf(stdout, "\t%s", w->lemma);
        if(w->mother == NULL)
          fprintf(stdout, "\t0");
        else
          fprintf(stdout, "\t%d", w->mother->id - w->id);
        fprintf(stdout, "\t%s", w->deprel);
        if(i == s->l - 1)
          fprintf(stdout, "\t1");
        else
          fprintf(stdout, "\t0");
        fprintf(stdout, "\n");
      }
    }
    
    
    void print_sentence(sentence *s)
    {
      int i;
      word *w;
    
      if((s->l == 1) || (s->l == 0)) return;
    
      for(i=1; i<s->l; i++){
        w = s->words[i];
        fprintf(stdout, "%d", w->id);
        fprintf(stdout, "\t%s", w->form);
        fprintf(stdout, "\t%s", w->lemma);
        fprintf(stdout, "\t%s", w->cpostag);
        fprintf(stdout, "\t%s", w->postag);
        fprintf(stdout, "\t%s", w->feats);
        if(w->mother == NULL)
          fprintf(stdout, "\t0");
        else
          fprintf(stdout, "\t%d", w->mother->id);
        fprintf(stdout, "\t%s", w->deprel);
        fprintf(stdout, "\t_\t_\n");
        
      }
      printf("\n");
    
    }
    
    void compact_sentence(sentence *s)
    {
      int i,j;
      for(i=0; i < s->l; i++){
        if(s->words[i] == NULL){
          for(j = i; j < s->l - 1; j++){
    	s->words[j] = s->words[j+1];
          }
          i--;
          s->l--;
        }
      }
    }
    
    void add_daughter(word *daughter, word *mother)
    {
      if(daughter){
        if(mother){
          daughter->mother = mother;
          mother->daughters[mother->daughters_nb] = daughter;
          mother->daughters_nb++;
        } 
        else{
          daughter->mother = NULL;
        }
      }
    }
    
    void remove_daughter(sentence *s, int i)
    {
      int j,k;
      word *dep = s->words[i];
      word *gov;
      if(dep){
        gov = dep->mother;
        if(gov){
          for(j=0; j < gov->daughters_nb; j++){
    	if(gov->daughters[j] == dep){
    	  for(k=j; k < gov->daughters_nb - 1; k++){
    	    gov->daughters[k] = gov->daughters[k+1]; 
    	  }
    	  gov->daughters_nb--;
    	}
          }
        }
      }
    }
    
    void remove_word_rec(sentence *s, int i)
    {
      int j;
      word *w = s->words[i];
    
      for(j=1; j < s->l; j++){
        if((s->words[j]) && (s->words[j]->mother == w))
          remove_word_rec(s, j);
      }
      remove_daughter(s, i);
      free(w);
      s->words[i] = NULL;
    }
    
    void remove_subtree(sentence *s, int root)
    {
      remove_word_rec(s, root);
      compact_sentence(s);
    }
    
    void add_word(sentence *s, word *w, int index, word *gov)
    {
      int i;
      if(s->words[index] != NULL){
        for(i=s->l; i>index; i--){
          s->words[i] = s->words[i-1];
        }
        s->l++;
      }
      s->words[index] = w;
      if(index >= s->l) s->l = index+1;
      if(gov != NULL)
        add_daughter(w, gov);
    }
     
    void split_node_in_two(sentence *s, int index, word *gov, word *dep, int index_gov, int index_dep)
    {
      int i;
      word *w = s->words[index];
      word *mother = w->mother;
    
      strcpy(gov->deprel, w->deprel); 
      for(i=1; i < s->l; i++){
        if(s->words[i]->mother == w)
          add_daughter(s->words[i], gov);
      }
      free(w);
      s->words[index] = NULL;
      add_word(s, gov, index_gov, mother);
      add_word(s, dep, index_dep, gov); 
    } 
    
    
    /*---------------------------------------------------------------------------------*/
    /*---------------------------------------------------------------------------------*/
    
    void change_cpos(sentence *s, hash_str *h_cpos)
    {
      int i;
      word *w;
      char *val;
    
      for(i=1; i<s->l; i++){
        w = s->words[i];
    
        val = hash_str_get_val (h_cpos, w->cpostag);
        if(val){
          strcpy(w->cpostag, val);
        }
        else{
          fprintf(stderr, "ATTENTION: cpos %s inconnue\n", w->cpostag);
        }
      }
    
    }
    
    /*---------------------------------------------------------------------------------*/
    /*---------------------------------------------------------------------------------*/
    void change_pos(sentence *s, hash_str *h_pos)
    {
      int i;
      word *w;
      char *val;
    
      for(i=1; i<s->l; i++){
        w = s->words[i];
        
        val = hash_str_get_val (h_pos, w->postag);
        if(val){
          strcpy(w->postag, val);
        }
        else{
          fprintf(stderr, "ATTENTION: pos %s inconnue\n", w->cpostag);
        }
      }
    
    }
    
    /*---------------------------------------------------------------------------------*/
    /*---------------------------------------------------------------------------------*/
    
    
    
    void change_fct(sentence *s, hash_str *h_fct)
    {
      int i;
      word *w;
      char *val;
    
      for(i=1; i<s->l; i++){
        w = s->words[i];
    
        val = hash_str_get_val (h_fct, w->deprel);
        if(val){
          strcpy(w->deprel, val);
        }
        else{
          fprintf(stderr, "ATTENTION: fct %s inconnue\n", w->deprel);
        }
      }
    
    }
    
    int is_num(char *s)
    {
      int i;
      int l;
      if(s == NULL) return 0;
      l = strlen(s);
      if((l == 1) && (s[0] == ',')) return 0;
      for(i=0; i <l; i++)
        if(((s[i] < '0') || (s[i] > '9')) && (s[i] != ','))
          return 0;
      return 1;
    } 
    
    
    void renumber_sentence_offset(sentence *s, int offset)
    {
      int i;
      for(i=0 ; i < s->l; i++){
        s->words[i]->id = i + offset;
      }
    }