Skip to content
Snippets Groups Projects
Commit f0ca4a1d authored by johannes.heinecke's avatar johannes.heinecke
Browse files

memory leaks in maca_trans_tagger and maca_trans_lemmatizer (one problem yet...

memory leaks in maca_trans_tagger and maca_trans_lemmatizer (one problem yet to solve: check TODO in maca_trans_lemmatizer
parent f98226f8
Branches
No related tags found
2 merge requests!4Johannes,!3correction unfreed memory/invalid acces: valgrind does not report any error anymore
...@@ -16,7 +16,8 @@ form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list) ...@@ -16,7 +16,8 @@ form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list)
f2p->h_form2signature = hash_new(nbelem * 4); f2p->h_form2signature = hash_new(nbelem * 4);
token = strtok(pos_list, "\t"); token = strtok(pos_list, "\t");
do{ do{
dico_add(f2p->d_pos, strdup(token)); //dico_add(f2p->d_pos, strdup(token));
dico_add(f2p->d_pos, token); // token is strdup'ed in dico_add()
}while((token = strtok(NULL, "\t"))); }while((token = strtok(NULL, "\t")));
return f2p; return f2p;
} }
...@@ -83,6 +84,7 @@ form2pos *form2pos_read(char *filename) ...@@ -83,6 +84,7 @@ form2pos *form2pos_read(char *filename)
/* printf("form = %s signature = %s code = %d\n", form, signature, signature_code); */ /* printf("form = %s signature = %s code = %d\n", form, signature, signature_code); */
hash_add(f2p->h_form2signature, strdup(form), dico_add(f2p->d_signature, signature)); hash_add(f2p->h_form2signature, strdup(form), dico_add(f2p->d_signature, signature));
} }
fclose(f);
return f2p; return f2p;
} }
......
...@@ -23,6 +23,7 @@ void context_free(context *ctx) ...@@ -23,6 +23,7 @@ void context_free(context *ctx)
if(ctx->language) free(ctx->language); if(ctx->language) free(ctx->language);
if(ctx->root_label) free(ctx->root_label); if(ctx->root_label) free(ctx->root_label);
if(ctx->vocabs_filename) free(ctx->vocabs_filename); if(ctx->vocabs_filename) free(ctx->vocabs_filename);
if(ctx->fplm_filename) free(ctx->fplm_filename);
if (ctx->mcd_struct) if (ctx->mcd_struct)
mcd_free(ctx->mcd_struct); mcd_free(ctx->mcd_struct);
...@@ -62,6 +63,7 @@ context *context_new(void) ...@@ -62,6 +63,7 @@ context *context_new(void)
ctx->features_model_filename = NULL; ctx->features_model_filename = NULL;
ctx->vocabs_filename = NULL; ctx->vocabs_filename = NULL;
ctx->f2p_filename = NULL; ctx->f2p_filename = NULL;
ctx->fplm_filename = NULL;
ctx->maca_data_path = NULL; ctx->maca_data_path = NULL;
ctx->language = strdup("fr"); ctx->language = strdup("fr");
......
...@@ -41,7 +41,7 @@ void maca_lemmatizer_set_linguistic_resources_filenames(context *ctx) ...@@ -41,7 +41,7 @@ void maca_lemmatizer_set_linguistic_resources_filenames(context *ctx)
} }
} }
char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode, int *lemma_array_size)
{ {
char form[1000]; char form[1000];
char pos[1000]; char pos[1000];
...@@ -49,12 +49,13 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) ...@@ -49,12 +49,13 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
char morpho[1000]; char morpho[1000];
int num = 0; int num = 0;
char **lemma_array; char **lemma_array;
int lemma_array_size = 10000; //int lemma_array_size = 10000;
*lemma_array_size = 10000;
char buffer[10000]; char buffer[10000];
int fields_nb; int fields_nb;
FILE *f= myfopen(fplm_filename, "r"); FILE *f= myfopen(fplm_filename, "r");
lemma_array = (char **)memalloc(lemma_array_size * sizeof(char *)); lemma_array = (char **)memalloc((*lemma_array_size) * sizeof(char *));
while(fgets(buffer, 10000, f)){ while(fgets(buffer, 10000, f)){
fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho); fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho);
...@@ -69,11 +70,19 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) ...@@ -69,11 +70,19 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
} }
strcat(form, "/"); strcat(form, "/");
strcat(form, pos); strcat(form, pos);
// TODO: memory leak: if form is already in the hash, it is not added and the memory
// allocated by strdup() is leaked
// solutions: hash_add does the strdup() if necessary (check else where !)
// or return code to indicate whether form has been added or not
hash_add(form_pos_ht, strdup(form), num); hash_add(form_pos_ht, strdup(form), num);
if(num >= lemma_array_size){ if(num >= *lemma_array_size){
lemma_array_size = 2 * (lemma_array_size) + 1; *lemma_array_size = 2 * (*lemma_array_size) + 1;
lemma_array = realloc(lemma_array, (lemma_array_size) * sizeof(char *)); lemma_array = realloc(lemma_array, (*lemma_array_size) * sizeof(char *));
// initialize in order to be able to free correctly and the end
for(int i=num; i<*lemma_array_size; ++i) {
lemma_array[i] = NULL;
}
} }
/* if(lemma_array[num] == NULL) */ /* if(lemma_array[num] == NULL) */
...@@ -81,6 +90,7 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) ...@@ -81,6 +90,7 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode)
num++; num++;
} }
/* fprintf(stderr, "%d entries loaded\n", num); */ /* fprintf(stderr, "%d entries loaded\n", num); */
fclose(f);
return lemma_array; return lemma_array;
} }
...@@ -159,7 +169,8 @@ int main(int argc, char *argv[]) ...@@ -159,7 +169,8 @@ int main(int argc, char *argv[])
maca_lemmatizer_check_options(ctx); maca_lemmatizer_check_options(ctx);
maca_lemmatizer_set_linguistic_resources_filenames(ctx); maca_lemmatizer_set_linguistic_resources_filenames(ctx);
lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode); int lemma_array_size;
lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode, &lemma_array_size);
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
...@@ -179,8 +190,19 @@ int main(int argc, char *argv[]) ...@@ -179,8 +190,19 @@ int main(int argc, char *argv[])
word_buffer_move_right(c->bf); word_buffer_move_right(c->bf);
} }
hash_free(form_pos_ht);
for(int i=0; i<lemma_array_size; ++i) {
if (lemma_array[i]) free(lemma_array[i]);
}
free(lemma_array);
config_free(c); config_free(c);
if (ctx->input_filename) fclose(f);
context_free(ctx); context_free(ctx);
return 0; return 0;
} }
...@@ -133,5 +133,8 @@ void simple_decoder_tagger(context *ctx) ...@@ -133,5 +133,8 @@ void simple_decoder_tagger(context *ctx)
} }
/* config_print(stdout, c); */ /* config_print(stdout, c); */
feat_vec_free(fv);
feature_table_free(ft);
config_free(c); config_free(c);
fclose(f);
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment