Select Git revision
dico.c 3.38 KiB
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"dico.h"
#include"util.h"
dico *dico_new(char *name, int size)
{
dico *d = (dico *)memalloc(sizeof(dico));
if(name)
d->name = strdup(name);
else
d->name =NULL;
d->htable = hash_new(size);
d->array = NULL;
d->nbelem = 0;
d->array_size = 0;
return d;
}
void dico_free(dico *d)
{
if(d){
if(d->htable) hash_free(d->htable);
if(d->array) free(d->array);
if(d->name) free(d->name);
free(d);
}
}
dico *dico_read_fh(FILE *f, float ratio)
{
dico *d = NULL;
char buffer[1000];
int size;
char name[1000];
fscanf(f, "%s\n", name);
fscanf(f, "%d\n", &size);
/* printf("name = %s\n", name); */
/* printf("size = %d\n", size); */
d = dico_new(name, 1/ratio * size);
while(fgets(buffer, 1000, f)){
if(feof(f))
break;
buffer[strlen(buffer) - 1] = '\0';
/* printf("%s buffer = %s\n", name, buffer); */
if(!strcmp(buffer, DICO_END_STR))
break;
dico_add(d, buffer);
}
return d;
}
dico *dico_read(char *filename, float ratio)
{
dico *d = NULL;
FILE *f = NULL;
if(filename == NULL){
f = stdout;
}
else{
f= fopen(filename, "r");
if(f == NULL){
fprintf(stderr, "cannot open file %s\n", filename);
exit(1);
}
}
d = dico_read_fh(f, ratio);
fclose(f);
return d;
}
void dico_print_fh(FILE *f, dico *d)
{
int i;
fprintf(f,"%s\n", d->name);
fprintf(f,"%d\n", d->nbelem);
for(i=0; i<d->nbelem; i++)
fprintf(f,"%s\n", d->array[i]);
}
void dico_print(char *filename, dico *d)
{
FILE *f;
if(filename == NULL)
f = stdout;
else{
f= fopen(filename, "w");
if(f == NULL){
fprintf(stderr, "cannot open file %s\n", filename);
exit(1);
}
}
dico_print_fh(f, d);
if(filename != NULL)
fclose(f);
}
int dico_add(dico *d, char *key)
{
int val = d->nbelem;
// char *key_copy;
cell *c;
if((c = hash_lookup(d->htable, key)) != NULL){
/* printf("already here nbelem = %d\n", d->nbelem); */
return c->val;
}
// key_copy = strdup(key);
d->nbelem++;
/* printf("adding it nbelem = %d\n", d->nbelem); */
c = hash_add(d->htable, key, val);
if(d->nbelem > d->array_size){
d->array_size = 2 * (d->array_size +1);
d->array = (char **)realloc(d->array, d->array_size * sizeof(char*));
}
d->array[val] = c->key;
return val;
}
char *dico_int2string(dico *d, int val)
{
if(val < 0) return NULL;
if(val >= d->array_size) return NULL;
return d->array[val];
}
int dico_string2int(dico *d, char *string)
{
cell *c;
c= hash_lookup(d->htable, string);
if(c)
return c->val;
else
return -1;
}
dico *dico_extract_from_corpus(char *filename, int column, char *dico_name)
{
dico *d = dico_new(dico_name, 1000);
FILE *f = myfopen(filename, "r");
char buffer[10000];
char *token;
int column_nb = 0;
if(feof(f)) return NULL; /* no more words to read */
while(fgets(buffer, 10000, f)){
if(feof(f)) return NULL; /* no more words to read */
if((buffer[0] == '\n') || (buffer[0] == ' ')) continue;
if(buffer[strlen(buffer) - 1] == '\n')
buffer[strlen(buffer) - 1] = '\0';
token = strtok(buffer, "\t"); /* get index */
column_nb = 0;
do{
if(column_nb == column){
/* printf("token = %s\n", token); */
dico_add(d, token);
}
column_nb++;
}while((token = strtok(NULL , "\t")));
}
fclose(f);
return d;
}