Skip to content
Snippets Groups Projects
Commit 2ce50f9e authored by Alexis Nasr's avatar Alexis Nasr
Browse files

Merge branch 'master' of gitlab.lif.univ-mrs.fr:alexis.nasr/macaon2

parents e2de5c6d 73cea196
No related branches found
No related tags found
No related merge requests found
Showing
with 95 additions and 28 deletions
...@@ -18,6 +18,9 @@ dico_vec *dico_vec_new(void) ...@@ -18,6 +18,9 @@ dico_vec *dico_vec_new(void)
void dico_vec_free(dico_vec *dv) void dico_vec_free(dico_vec *dv)
{ {
if(dv){ if(dv){
if (dv->ht)
hash_free(dv->ht);
if(dv->t) if(dv->t)
free(dv->t); free(dv->t);
free(dv); free(dv);
...@@ -98,5 +101,6 @@ dico_vec *dico_vec_read(char *filename, float ratio) ...@@ -98,5 +101,6 @@ dico_vec *dico_vec_read(char *filename, float ratio)
while(!feof(f)){ while(!feof(f)){
dico_vec_add(dv, dico_read_fh(f, ratio)); dico_vec_add(dv, dico_read_fh(f, ratio));
} }
fclose(f);
return dv; return dv;
} }
...@@ -16,7 +16,8 @@ form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list) ...@@ -16,7 +16,8 @@ form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list)
f2p->h_form2signature = hash_new(nbelem * 4); f2p->h_form2signature = hash_new(nbelem * 4);
token = strtok(pos_list, "\t"); token = strtok(pos_list, "\t");
do{ do{
dico_add(f2p->d_pos, strdup(token)); //dico_add(f2p->d_pos, strdup(token));
dico_add(f2p->d_pos, token); // token is strdup'ed in dico_add()
}while((token = strtok(NULL, "\t"))); }while((token = strtok(NULL, "\t")));
return f2p; return f2p;
} }
...@@ -83,6 +84,7 @@ form2pos *form2pos_read(char *filename) ...@@ -83,6 +84,7 @@ form2pos *form2pos_read(char *filename)
/* printf("form = %s signature = %s code = %d\n", form, signature, signature_code); */ /* printf("form = %s signature = %s code = %d\n", form, signature, signature_code); */
hash_add(f2p->h_form2signature, strdup(form), dico_add(f2p->d_signature, signature)); hash_add(f2p->h_form2signature, strdup(form), dico_add(f2p->d_signature, signature));
} }
fclose(f);
return f2p; return f2p;
} }
......
...@@ -39,6 +39,7 @@ void hash_free(hash *h) ...@@ -39,6 +39,7 @@ void hash_free(hash *h)
int i; int i;
for(i=0; i < h->size; i++) for(i=0; i < h->size; i++)
cell_free(h->array[i]); cell_free(h->array[i]);
free(h->array);
free(h); free(h);
} }
......
...@@ -156,8 +156,15 @@ void context_set_linguistic_resources_filenames(context *ctx) ...@@ -156,8 +156,15 @@ void context_set_linguistic_resources_filenames(context *ctx)
if(ctx->maca_data_path) if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path); strcat(absolute_path, ctx->maca_data_path);
else else {
strcat(absolute_path, getenv("MACAON_DIR")); char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/"); strcat(absolute_path, "/");
strcat(absolute_path, ctx->language); strcat(absolute_path, ctx->language);
......
...@@ -22,6 +22,14 @@ void context_free(context *ctx) ...@@ -22,6 +22,14 @@ void context_free(context *ctx)
if(ctx->maca_data_path) free(ctx->maca_data_path); if(ctx->maca_data_path) free(ctx->maca_data_path);
if(ctx->language) free(ctx->language); if(ctx->language) free(ctx->language);
if(ctx->root_label) free(ctx->root_label); if(ctx->root_label) free(ctx->root_label);
if(ctx->vocabs_filename) free(ctx->vocabs_filename);
if(ctx->fplm_filename) free(ctx->fplm_filename);
if (ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
if (ctx->vocabs)
dico_vec_free(ctx->vocabs);
if(ctx->d_perceptron_features) if(ctx->d_perceptron_features)
dico_free(ctx->d_perceptron_features); dico_free(ctx->d_perceptron_features);
...@@ -55,6 +63,7 @@ context *context_new(void) ...@@ -55,6 +63,7 @@ context *context_new(void)
ctx->features_model_filename = NULL; ctx->features_model_filename = NULL;
ctx->vocabs_filename = NULL; ctx->vocabs_filename = NULL;
ctx->f2p_filename = NULL; ctx->f2p_filename = NULL;
ctx->fplm_filename = NULL;
ctx->maca_data_path = NULL; ctx->maca_data_path = NULL;
ctx->language = strdup("fr"); ctx->language = strdup("fr");
...@@ -251,12 +260,14 @@ context *context_read_options(int argc, char *argv[]) ...@@ -251,12 +260,14 @@ context *context_read_options(int argc, char *argv[])
ctx->vocabs_filename = strdup(optarg); ctx->vocabs_filename = strdup(optarg);
break; break;
case 'L': case 'L':
if (ctx->language) free(ctx->language); // libérer le default (strdup("fr") )
ctx->language = strdup(optarg); ctx->language = strdup(optarg);
break; break;
case 'D': case 'D':
ctx->maca_data_path = strdup(optarg); ctx->maca_data_path = strdup(optarg);
break; break;
case 'R': case 'R':
if (ctx->root_label) free(ctx->root_label); // libérer le default (strdup("root") )
ctx->root_label = strdup(optarg); ctx->root_label = strdup(optarg);
break; break;
case 'P': case 'P':
......
...@@ -6,18 +6,15 @@ ...@@ -6,18 +6,15 @@
void simple_feat_desc_free(simple_feat_desc *sfd) void simple_feat_desc_free(simple_feat_desc *sfd)
{ {
/* if(sfd->name) if(sfd->name)
free(sfd->name); free(sfd->name);
free(sfd);*/ free(sfd);
} }
void feat_desc_free(feat_desc *fd) void feat_desc_free(feat_desc *fd)
{ {
int i;
for(i=0; i < fd->nbelem; i++)
simple_feat_desc_free(fd->array[i]);
free(fd->array); free(fd->array);
/* free(fd); */ free(fd);
} }
simple_feat_desc *simple_feat_desc_new(char *name, int type, feat_fct fct) simple_feat_desc *simple_feat_desc_new(char *name, int type, feat_fct fct)
......
...@@ -23,6 +23,7 @@ feat_desc *feat_desc_new(void); ...@@ -23,6 +23,7 @@ feat_desc *feat_desc_new(void);
void feat_desc_free(feat_desc *fd); void feat_desc_free(feat_desc *fd);
feat_desc *feat_desc_add(feat_desc *fd, simple_feat_desc *sfd); feat_desc *feat_desc_add(feat_desc *fd, simple_feat_desc *sfd);
simple_feat_desc *simple_feat_desc_new(char *name, int type, feat_fct fct); simple_feat_desc *simple_feat_desc_new(char *name, int type, feat_fct fct);
void simple_feat_desc_free(simple_feat_desc *sfd);
#endif #endif
......
...@@ -7,6 +7,7 @@ void feat_lib_add(feat_lib *fl, int feature_type, char *feature_name, feat_fct f ...@@ -7,6 +7,7 @@ void feat_lib_add(feat_lib *fl, int feature_type, char *feature_name, feat_fct f
int feature_nb = dico_add(fl->d_features, feature_name); int feature_nb = dico_add(fl->d_features, feature_name);
fl->array = (simple_feat_desc **)realloc(fl->array, (feature_nb + 1) * sizeof(simple_feat_desc *)); fl->array = (simple_feat_desc **)realloc(fl->array, (feature_nb + 1) * sizeof(simple_feat_desc *));
fl->array[feature_nb] = simple_feat_desc_new(feature_name, feature_type, feature_fct); fl->array[feature_nb] = simple_feat_desc_new(feature_name, feature_type, feature_fct);
fl->nbelem = feature_nb+1;
} }
simple_feat_desc *feat_lib_get_simple_feat_desc(feat_lib *fl, char *feat_name) simple_feat_desc *feat_lib_get_simple_feat_desc(feat_lib *fl, char *feat_name)
...@@ -28,6 +29,16 @@ feat_lib *feat_lib_new(void) ...@@ -28,6 +29,16 @@ feat_lib *feat_lib_new(void)
return fl; return fl;
} }
void feat_lib_free(feat_lib *fl) {
for(int i=0; i < fl->nbelem; ++i) {
simple_feat_desc_free(fl->array[i]);
}
dico_free(fl->d_features);
free(fl->array);
free(fl); // ne devrait pas etre en commentaire
}
feat_lib *feat_lib_build(void) feat_lib *feat_lib_build(void)
{ {
......
...@@ -16,5 +16,5 @@ feat_lib *feat_lib_new(void); ...@@ -16,5 +16,5 @@ feat_lib *feat_lib_new(void);
void feat_lib_add(feat_lib *fl, int feature_type, char *feature_name, feat_fct feature_fct); void feat_lib_add(feat_lib *fl, int feature_type, char *feature_name, feat_fct feature_fct);
simple_feat_desc *feat_lib_get_simple_feat_desc(feat_lib *fl, char *feat_name); simple_feat_desc *feat_lib_get_simple_feat_desc(feat_lib *fl, char *feat_name);
feat_lib *feat_lib_build(void); feat_lib *feat_lib_build(void);
void feat_lib_free(feat_lib *fl);
#endif #endif
...@@ -44,7 +44,9 @@ void feat_model_free(feat_model *fm) ...@@ -44,7 +44,9 @@ void feat_model_free(feat_model *fm)
int i; int i;
for(i=0; i < fm->nbelem; i++) for(i=0; i < fm->nbelem; i++)
feat_desc_free(fm->array[i]); feat_desc_free(fm->array[i]);
free(fm->array);
free(fm->name); free(fm->name);
feat_lib_free(fm->fl);
free(fm); free(fm);
} }
...@@ -55,7 +57,8 @@ feat_model *feat_model_read(char *filename, int verbose) ...@@ -55,7 +57,8 @@ feat_model *feat_model_read(char *filename, int verbose)
int feature_number = 0; int feature_number = 0;
char buffer[1000]; /* ugly */ char buffer[1000]; /* ugly */
char *feat_name; char *feat_name;
feat_lib *fl = feat_lib_build(); //feat_lib *fl
fm->fl = feat_lib_build(); // must be preserved in feature_model to delete correctly features at the end
simple_feat_desc *sfd; simple_feat_desc *sfd;
feat_desc *fd; feat_desc *fd;
...@@ -67,7 +70,7 @@ feat_model *feat_model_read(char *filename, int verbose) ...@@ -67,7 +70,7 @@ feat_model *feat_model_read(char *filename, int verbose)
feat_name = strtok(buffer, " \n"); feat_name = strtok(buffer, " \n");
do{ do{
if(verbose) fprintf(stderr, "\t%s", feat_name); if(verbose) fprintf(stderr, "\t%s", feat_name);
sfd = feat_lib_get_simple_feat_desc(fl, feat_name); sfd = feat_lib_get_simple_feat_desc(fm->fl, feat_name);
if(sfd) if(sfd)
feat_desc_add(fd, sfd); feat_desc_add(fd, sfd);
}while((feat_name = strtok(NULL, " \n"))); }while((feat_name = strtok(NULL, " \n")));
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define __FEAT_MODEL__ #define __FEAT_MODEL__
#include "feat_desc.h" #include "feat_desc.h"
#include "feat_lib.h"
typedef struct { typedef struct {
char string[2048]; char string[2048];
...@@ -9,6 +10,7 @@ typedef struct { ...@@ -9,6 +10,7 @@ typedef struct {
int nbelem; int nbelem;
feat_desc **array; feat_desc **array;
int dim; int dim;
feat_lib *fl; // stores all simple features
} feat_model; } feat_model;
#include "config.h" #include "config.h"
......
...@@ -41,7 +41,7 @@ int main(int argc, char *argv[]) ...@@ -41,7 +41,7 @@ int main(int argc, char *argv[])
char *mcf_filename = NULL; char *mcf_filename = NULL;
FILE *mcf_file = NULL; FILE *mcf_file = NULL;
int verbose = 0; int verbose = 0;
word_buffer *wb = NULL; //word_buffer *wb = NULL;
config *c = NULL; config *c = NULL;
int mode = MODE_PARSER; int mode = MODE_PARSER;
......
...@@ -41,7 +41,7 @@ void maca_lemmatizer_set_linguistic_resources_filenames(context *ctx) ...@@ -41,7 +41,7 @@ void maca_lemmatizer_set_linguistic_resources_filenames(context *ctx)
} }
} }
char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode, int *lemma_array_size)
{ {
char form[1000]; char form[1000];
char pos[1000]; char pos[1000];
...@@ -49,12 +49,13 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) ...@@ -49,12 +49,13 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
char morpho[1000]; char morpho[1000];
int num = 0; int num = 0;
char **lemma_array; char **lemma_array;
int lemma_array_size = 10000; //int lemma_array_size = 10000;
*lemma_array_size = 10000;
char buffer[10000]; char buffer[10000];
int fields_nb; int fields_nb;
FILE *f= myfopen(fplm_filename, "r"); FILE *f= myfopen(fplm_filename, "r");
lemma_array = (char **)memalloc(lemma_array_size * sizeof(char *)); lemma_array = (char **)memalloc((*lemma_array_size) * sizeof(char *));
while(fgets(buffer, 10000, f)){ while(fgets(buffer, 10000, f)){
fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho); fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho);
...@@ -69,11 +70,19 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) ...@@ -69,11 +70,19 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
} }
strcat(form, "/"); strcat(form, "/");
strcat(form, pos); strcat(form, pos);
// TODO: memory leak: if form is already in the hash, it is not added and the memory
// allocated by strdup() is leaked
// solutions: hash_add does the strdup() if necessary (check else where !)
// or return code to indicate whether form has been added or not
hash_add(form_pos_ht, strdup(form), num); hash_add(form_pos_ht, strdup(form), num);
if(num >= lemma_array_size){ if(num >= *lemma_array_size){
lemma_array_size = 2 * (lemma_array_size) + 1; *lemma_array_size = 2 * (*lemma_array_size) + 1;
lemma_array = realloc(lemma_array, (lemma_array_size) * sizeof(char *)); lemma_array = realloc(lemma_array, (*lemma_array_size) * sizeof(char *));
// initialize in order to be able to free correctly and the end
for(int i=num; i<*lemma_array_size; ++i) {
lemma_array[i] = NULL;
}
} }
/* if(lemma_array[num] == NULL) */ /* if(lemma_array[num] == NULL) */
...@@ -81,6 +90,7 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) ...@@ -81,6 +90,7 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
num++; num++;
} }
/* fprintf(stderr, "%d entries loaded\n", num); */ /* fprintf(stderr, "%d entries loaded\n", num); */
fclose(f);
return lemma_array; return lemma_array;
} }
...@@ -159,7 +169,8 @@ int main(int argc, char *argv[]) ...@@ -159,7 +169,8 @@ int main(int argc, char *argv[])
maca_lemmatizer_check_options(ctx); maca_lemmatizer_check_options(ctx);
maca_lemmatizer_set_linguistic_resources_filenames(ctx); maca_lemmatizer_set_linguistic_resources_filenames(ctx);
lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode); int lemma_array_size;
lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode, &lemma_array_size);
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
...@@ -179,8 +190,19 @@ int main(int argc, char *argv[]) ...@@ -179,8 +190,19 @@ int main(int argc, char *argv[])
word_buffer_move_right(c->bf); word_buffer_move_right(c->bf);
} }
hash_free(form_pos_ht);
for(int i=0; i<lemma_array_size; ++i) {
if (lemma_array[i]) free(lemma_array[i]);
}
free(lemma_array);
config_free(c); config_free(c);
if (ctx->input_filename) fclose(f);
context_free(ctx); context_free(ctx);
return 0; return 0;
} }
...@@ -12,6 +12,9 @@ int i; ...@@ -12,6 +12,9 @@ int i;
void mvt_stack_free(mvt_stack *s) void mvt_stack_free(mvt_stack *s)
{ {
for(int i=0; i < s->size; ++i) {
mvt_free(s->array[i]);
}
free(s->array); free(s->array);
free(s); free(s);
} }
......
...@@ -134,7 +134,10 @@ void simple_decoder_tagger(context *ctx) ...@@ -134,7 +134,10 @@ void simple_decoder_tagger(context *ctx)
} }
/* config_print(stdout, c); */ /* config_print(stdout, c); */
feat_vec_free(fv);
feature_table_free(ft);
config_free(c); config_free(c);
fclose(f);
} }
#endif #endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment