Skip to content
Snippets Groups Projects
Select Git revision
  • da28bdb46aaeec5859ad74f2ecb7a7a90f32683c
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
21 results

vectorize.c

Blame
  • form2pos.c NaN GiB
    #include<stdio.h>
    #include<stdlib.h>
    #include<string.h>
    #include"form2pos.h"
    #include"util.h"
    
    form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list)
    {
      form2pos *f2p = (form2pos *)memalloc(sizeof(form2pos));
      char *token;
      
      f2p->nbelem = nbelem;
      f2p->pos_nb = pos_nb;
      f2p->d_pos = dico_new((char *)"d_pos", pos_nb * 10);
      f2p->d_signature = dico_new((char *)"d_signature", pos_nb * 10);
      f2p->h_form2signature = hash_new(nbelem * 4);
      token = strtok(pos_list, "\t");
      do{
          //dico_add(f2p->d_pos, strdup(token));
          dico_add(f2p->d_pos, token); // token is strdup'ed in dico_add()
      }while((token = strtok(NULL, "\t")));
      return f2p;
    }
    
    void form2pos_free(form2pos *f2p)
    {
      dico_free(f2p->d_pos);
      dico_free(f2p->d_signature);
      hash_free(f2p->h_form2signature);
      free(f2p);
    }
    
    
    int form2pos_word_is_non_ambiguous(form2pos *f2p, char *form, char **pos)
    {
      int pos_code;
      int signature = form2pos_get_signature(f2p, form);
      char *signature_str = dico_int2string(f2p->d_signature, signature);
      if(signature_str == NULL) return 0;
      int l = strlen(signature_str);
      int sum = 0;
      /* printf("form = %s signature = %s\n", form, signature_str); */
      for(int i = 0; i < l; i++){
        sum += signature_str[i] - '0';
        if(signature_str[i] != '0') pos_code = i;
        /* printf("sum = %d\n", sum); */
      }
      if(sum == 1)
        *pos = dico_int2string(f2p->d_pos, pos_code);
      else
        *pos = NULL;
          
      return (sum ==1)? 1 : 0;
    }
    
    form2pos *form2pos_read(char *filename)
    {
      FILE *f = myfopen_no_exit(filename, "r");
      int nbelem;
      int pos_nb;
      char pos_list[10000];
      char form[300];
      char signature[200];
      form2pos *f2p = NULL;
    
      if(f == NULL) return NULL;
    
      /* read number of forms */
      fscanf(f, "%d\n", &nbelem);
      
      /* read number of pos tags */
      fscanf(f, "%d\n", &pos_nb);
      
      /* read list of pos tags */
      fgets(pos_list, 10000, f);
    
      /* printf("form2pos read nbelem = %d pos nb = %d pos list = %s\n", nbelem, pos_nb, pos_list); */
      
      f2p = form2pos_new(nbelem, pos_nb, pos_list);
    
      
      while(!feof(f)){
        fscanf(f, "%[^\t]\t%s\n", form, signature);
        /* printf("form = %s signature = %s code = %d\n", form, signature, signature_code);   */
        hash_add(f2p->h_form2signature, form, dico_add(f2p->d_signature, signature));
      }
      fclose(f);
      return f2p;
    }
    
    int form2pos_get_signature(form2pos *f2p, char *form)
    {
       if(form == NULL) 
        return -1;
        else
        return hash_get_val(f2p->h_form2signature, form);
    }
    
    int form2pos_form_has_pos(form2pos *f2p, char *form, char *pos)
    {
      int pos_code = dico_string2int(f2p->d_pos, pos);
      char *signature;
      int signature_code;
    
      if(pos_code == -1){
        fprintf(stderr, "cat %s unknown\n", pos);
        return -1;
      }
    
      signature_code = hash_get_val(f2p->h_form2signature, form);
      if(signature_code == -1){
        fprintf(stderr, "form %s unknown\n", form);
        return -1;
      }
    
      signature = dico_int2string(f2p->d_signature, signature_code);
      
      return signature[pos_code];
    }