Skip to content
Snippets Groups Projects
Commit 73cea196 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

Merge branch 'johannes' into 'master'

Johannes

memory leaks in lemmatizer, tagger and parser
(one problem to be solved: see TODO in maca_trans_lemmatizer.c)

See merge request !4
parents 71b263d5 f0ca4a1d
Branches
No related tags found
1 merge request!4Johannes
Showing
with 95 additions and 28 deletions
......@@ -18,6 +18,9 @@ dico_vec *dico_vec_new(void)
void dico_vec_free(dico_vec *dv)
{
if(dv){
if (dv->ht)
hash_free(dv->ht);
if(dv->t)
free(dv->t);
free(dv);
......@@ -98,5 +101,6 @@ dico_vec *dico_vec_read(char *filename, float ratio)
while(!feof(f)){
dico_vec_add(dv, dico_read_fh(f, ratio));
}
fclose(f);
return dv;
}
......@@ -16,7 +16,8 @@ form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list)
f2p->h_form2signature = hash_new(nbelem * 4);
token = strtok(pos_list, "\t");
do{
dico_add(f2p->d_pos, strdup(token));
//dico_add(f2p->d_pos, strdup(token));
dico_add(f2p->d_pos, token); // token is strdup'ed in dico_add()
}while((token = strtok(NULL, "\t")));
return f2p;
}
......@@ -83,6 +84,7 @@ form2pos *form2pos_read(char *filename)
/* printf("form = %s signature = %s code = %d\n", form, signature, signature_code); */
hash_add(f2p->h_form2signature, strdup(form), dico_add(f2p->d_signature, signature));
}
fclose(f);
return f2p;
}
......
......@@ -39,6 +39,7 @@ void hash_free(hash *h)
int i;
for(i=0; i < h->size; i++)
cell_free(h->array[i]);
free(h->array);
free(h);
}
......
......@@ -156,8 +156,15 @@ void context_set_linguistic_resources_filenames(context *ctx)
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else
strcat(absolute_path, getenv("MACAON_DIR"));
else {
char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
......
......@@ -22,6 +22,14 @@ void context_free(context *ctx)
if(ctx->maca_data_path) free(ctx->maca_data_path);
if(ctx->language) free(ctx->language);
if(ctx->root_label) free(ctx->root_label);
if(ctx->vocabs_filename) free(ctx->vocabs_filename);
if(ctx->fplm_filename) free(ctx->fplm_filename);
if (ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
if (ctx->vocabs)
dico_vec_free(ctx->vocabs);
if(ctx->d_perceptron_features)
dico_free(ctx->d_perceptron_features);
......@@ -55,6 +63,7 @@ context *context_new(void)
ctx->features_model_filename = NULL;
ctx->vocabs_filename = NULL;
ctx->f2p_filename = NULL;
ctx->fplm_filename = NULL;
ctx->maca_data_path = NULL;
ctx->language = strdup("fr");
......@@ -251,12 +260,14 @@ context *context_read_options(int argc, char *argv[])
ctx->vocabs_filename = strdup(optarg);
break;
case 'L':
if (ctx->language) free(ctx->language); // libérer le default (strdup("fr") )
ctx->language = strdup(optarg);
break;
case 'D':
ctx->maca_data_path = strdup(optarg);
break;
case 'R':
if (ctx->root_label) free(ctx->root_label); // libérer le default (strdup("root") )
ctx->root_label = strdup(optarg);
break;
case 'P':
......
......@@ -6,18 +6,15 @@
void simple_feat_desc_free(simple_feat_desc *sfd)
{
/* if(sfd->name)
if(sfd->name)
free(sfd->name);
free(sfd);*/
free(sfd);
}
void feat_desc_free(feat_desc *fd)
{
int i;
for(i=0; i < fd->nbelem; i++)
simple_feat_desc_free(fd->array[i]);
free(fd->array);
/* free(fd); */
free(fd);
}
simple_feat_desc *simple_feat_desc_new(char *name, int type, feat_fct fct)
......
......@@ -23,6 +23,7 @@ feat_desc *feat_desc_new(void);
void feat_desc_free(feat_desc *fd);
feat_desc *feat_desc_add(feat_desc *fd, simple_feat_desc *sfd);
simple_feat_desc *simple_feat_desc_new(char *name, int type, feat_fct fct);
void simple_feat_desc_free(simple_feat_desc *sfd);
#endif
......
......@@ -7,6 +7,7 @@ void feat_lib_add(feat_lib *fl, int feature_type, char *feature_name, feat_fct f
int feature_nb = dico_add(fl->d_features, feature_name);
fl->array = (simple_feat_desc **)realloc(fl->array, (feature_nb + 1) * sizeof(simple_feat_desc *));
fl->array[feature_nb] = simple_feat_desc_new(feature_name, feature_type, feature_fct);
fl->nbelem = feature_nb+1;
}
simple_feat_desc *feat_lib_get_simple_feat_desc(feat_lib *fl, char *feat_name)
......@@ -28,6 +29,16 @@ feat_lib *feat_lib_new(void)
return fl;
}
void feat_lib_free(feat_lib *fl) {
for(int i=0; i < fl->nbelem; ++i) {
simple_feat_desc_free(fl->array[i]);
}
dico_free(fl->d_features);
free(fl->array);
free(fl); // ne devrait pas etre en commentaire
}
feat_lib *feat_lib_build(void)
{
......
......@@ -16,5 +16,5 @@ feat_lib *feat_lib_new(void);
void feat_lib_add(feat_lib *fl, int feature_type, char *feature_name, feat_fct feature_fct);
simple_feat_desc *feat_lib_get_simple_feat_desc(feat_lib *fl, char *feat_name);
feat_lib *feat_lib_build(void);
void feat_lib_free(feat_lib *fl);
#endif
......@@ -44,7 +44,9 @@ void feat_model_free(feat_model *fm)
int i;
for(i=0; i < fm->nbelem; i++)
feat_desc_free(fm->array[i]);
free(fm->array);
free(fm->name);
feat_lib_free(fm->fl);
free(fm);
}
......@@ -55,7 +57,8 @@ feat_model *feat_model_read(char *filename, int verbose)
int feature_number = 0;
char buffer[1000]; /* ugly */
char *feat_name;
feat_lib *fl = feat_lib_build();
//feat_lib *fl
fm->fl = feat_lib_build(); // must be preserved in feature_model to delete correctly features at the end
simple_feat_desc *sfd;
feat_desc *fd;
......@@ -67,7 +70,7 @@ feat_model *feat_model_read(char *filename, int verbose)
feat_name = strtok(buffer, " \n");
do{
if(verbose) fprintf(stderr, "\t%s", feat_name);
sfd = feat_lib_get_simple_feat_desc(fl, feat_name);
sfd = feat_lib_get_simple_feat_desc(fm->fl, feat_name);
if(sfd)
feat_desc_add(fd, sfd);
}while((feat_name = strtok(NULL, " \n")));
......
......@@ -2,6 +2,7 @@
#define __FEAT_MODEL__
#include "feat_desc.h"
#include "feat_lib.h"
typedef struct {
char string[2048];
......@@ -9,6 +10,7 @@ typedef struct {
int nbelem;
feat_desc **array;
int dim;
feat_lib *fl; // stores all simple features
} feat_model;
#include "config.h"
......
......@@ -41,7 +41,7 @@ int main(int argc, char *argv[])
char *mcf_filename = NULL;
FILE *mcf_file = NULL;
int verbose = 0;
word_buffer *wb = NULL;
//word_buffer *wb = NULL;
config *c = NULL;
int mode = MODE_PARSER;
......
......@@ -41,7 +41,7 @@ void maca_lemmatizer_set_linguistic_resources_filenames(context *ctx)
}
}
char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode, int *lemma_array_size)
{
char form[1000];
char pos[1000];
......@@ -49,12 +49,13 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
char morpho[1000];
int num = 0;
char **lemma_array;
int lemma_array_size = 10000;
//int lemma_array_size = 10000;
*lemma_array_size = 10000;
char buffer[10000];
int fields_nb;
FILE *f= myfopen(fplm_filename, "r");
lemma_array = (char **)memalloc(lemma_array_size * sizeof(char *));
lemma_array = (char **)memalloc((*lemma_array_size) * sizeof(char *));
while(fgets(buffer, 10000, f)){
fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho);
......@@ -69,11 +70,19 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
}
strcat(form, "/");
strcat(form, pos);
// TODO: memory leak: if form is already in the hash, it is not added and the memory
// allocated by strdup() is leaked
// solutions: hash_add does the strdup() if necessary (check else where !)
// or return code to indicate whether form has been added or not
hash_add(form_pos_ht, strdup(form), num);
if(num >= lemma_array_size){
lemma_array_size = 2 * (lemma_array_size) + 1;
lemma_array = realloc(lemma_array, (lemma_array_size) * sizeof(char *));
if(num >= *lemma_array_size){
*lemma_array_size = 2 * (*lemma_array_size) + 1;
lemma_array = realloc(lemma_array, (*lemma_array_size) * sizeof(char *));
// initialize in order to be able to free correctly and the end
for(int i=num; i<*lemma_array_size; ++i) {
lemma_array[i] = NULL;
}
}
/* if(lemma_array[num] == NULL) */
......@@ -81,6 +90,7 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
num++;
}
/* fprintf(stderr, "%d entries loaded\n", num); */
fclose(f);
return lemma_array;
}
......@@ -159,7 +169,8 @@ int main(int argc, char *argv[])
maca_lemmatizer_check_options(ctx);
maca_lemmatizer_set_linguistic_resources_filenames(ctx);
lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode);
int lemma_array_size;
lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode, &lemma_array_size);
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
......@@ -179,8 +190,19 @@ int main(int argc, char *argv[])
word_buffer_move_right(c->bf);
}
hash_free(form_pos_ht);
for(int i=0; i<lemma_array_size; ++i) {
if (lemma_array[i]) free(lemma_array[i]);
}
free(lemma_array);
config_free(c);
if (ctx->input_filename) fclose(f);
context_free(ctx);
return 0;
}
......@@ -12,6 +12,9 @@ int i;
void mvt_stack_free(mvt_stack *s)
{
for(int i=0; i < s->size; ++i) {
mvt_free(s->array[i]);
}
free(s->array);
free(s);
}
......
......@@ -133,5 +133,8 @@ void simple_decoder_tagger(context *ctx)
}
/* config_print(stdout, c); */
feat_vec_free(fv);
feature_table_free(ft);
config_free(c);
fclose(f);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment