Skip to content
Snippets Groups Projects
Select Git revision
  • 1d536e6cbcf8e0cbd1d24d9621b60b93b3ebbbd3
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
21 results

dico.c

Blame
  • dico.c 3.38 KiB
    #include<stdio.h>
    #include<stdlib.h>
    #include<string.h>
    #include"dico.h"
    #include"util.h"
    
    
    dico *dico_new(char *name, int size)
    {
      dico *d = (dico *)memalloc(sizeof(dico));
      if(name)
        d->name = strdup(name);
      else
        d->name =NULL;
      d->htable = hash_new(size);
      d->array = NULL;
      d->nbelem = 0;
      d->array_size = 0;
      return d;
    }
    
    void dico_free(dico *d)
    {
      if(d){
        if(d->htable) hash_free(d->htable);
        if(d->array) free(d->array);
        if(d->name) free(d->name);
        free(d);
      }
    }
    
    dico *dico_read_fh(FILE *f, float ratio)
    {
      dico *d = NULL;
      char buffer[1000];
      int size;
      char name[1000];
      fscanf(f, "%s\n", name);
      fscanf(f, "%d\n", &size);
    
      /* printf("name = %s\n", name); */
      /* printf("size = %d\n", size); */
    
    
      d = dico_new(name, 1/ratio * size);
      while(fgets(buffer, 1000, f)){
        if(feof(f)) 
          break;
        buffer[strlen(buffer) - 1] = '\0';
        /* printf("%s buffer = %s\n", name, buffer);  */
        if(!strcmp(buffer, DICO_END_STR))
        break; 
        dico_add(d, buffer);
      }
      return d;
    }
    
    dico *dico_read(char *filename, float ratio)
    {
      dico *d = NULL;
      FILE  *f = NULL;
      if(filename == NULL){
        f = stdout;
      }
      else{
        f= fopen(filename, "r");
        if(f == NULL){
          fprintf(stderr, "cannot open file %s\n", filename);
          exit(1);
        }
      }
      d = dico_read_fh(f, ratio);
      fclose(f);
      return d;
    }
    
    
    void dico_print_fh(FILE *f, dico *d)
    {
      int i;
      
      fprintf(f,"%s\n", d->name);
      fprintf(f,"%d\n", d->nbelem);
      for(i=0; i<d->nbelem; i++)
        fprintf(f,"%s\n", d->array[i]);
    
    }
    
    void dico_print(char *filename, dico *d)
    {
      FILE  *f;
      if(filename == NULL)
        f = stdout;
      else{
        f= fopen(filename, "w");
        if(f == NULL){
          fprintf(stderr, "cannot open file %s\n", filename);
          exit(1);
        }
      }
      dico_print_fh(f, d);
      
      if(filename != NULL)
        fclose(f);
    }
    
    int dico_add(dico *d, char *key)
    {
      int val = d->nbelem;
      //  char *key_copy;
      cell *c;
      
      if((c = hash_lookup(d->htable, key)) != NULL){
        /* printf("already here nbelem  = %d\n", d->nbelem); */
        return c->val;
      }
    
      //  key_copy = strdup(key);
      d->nbelem++;
      /* printf("adding it nbelem  = %d\n", d->nbelem); */
      c = hash_add(d->htable, key, val);
      if(d->nbelem > d->array_size){
        d->array_size = 2 * (d->array_size +1);
        d->array = (char **)realloc(d->array, d->array_size * sizeof(char*));
      }
      d->array[val] =  c->key;
      return val;
    }
    
    char *dico_int2string(dico *d, int val)
    {
      if(val < 0) return NULL;
      if(val >= d->array_size) return NULL;
      return d->array[val];
    }
    
    int dico_string2int(dico *d, char *string)
    {
      cell *c;
    
      c= hash_lookup(d->htable, string);
      if(c)
        return c->val;
      else
        return -1;
    }
    
    dico *dico_extract_from_corpus(char *filename, int column, char *dico_name)
    {
      dico *d = dico_new(dico_name, 1000);
      FILE *f = myfopen(filename, "r");
      char buffer[10000];
      char *token;   
      int column_nb = 0;
    
      if(feof(f)) return NULL; /* no more words to read */
    
      while(fgets(buffer, 10000, f)){
        if(feof(f)) return NULL; /* no more words to read */
        if((buffer[0] == '\n') || (buffer[0] == ' ')) continue;
        if(buffer[strlen(buffer) - 1] == '\n')
          buffer[strlen(buffer) - 1] = '\0';
        token = strtok(buffer, "\t"); /* get index */
        column_nb = 0;
        do{
          if(column_nb == column){
    	/* printf("token = %s\n", token); */
    	dico_add(d, token);
          }
          column_nb++;
        }while((token = strtok(NULL , "\t")));
      }
      fclose(f);
      return d;
    }