diff --git a/maca_common/src/form2pos.c b/maca_common/src/form2pos.c index dccc016eeb197852c866e9a59adbaefde3b39b6b..b200c780e12371d4473561e2690c44fc481e6c5c 100644 --- a/maca_common/src/form2pos.c +++ b/maca_common/src/form2pos.c @@ -16,7 +16,8 @@ form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list) f2p->h_form2signature = hash_new(nbelem * 4); token = strtok(pos_list, "\t"); do{ - dico_add(f2p->d_pos, strdup(token)); + //dico_add(f2p->d_pos, strdup(token)); + dico_add(f2p->d_pos, token); // token is strdup'ed in dico_add() }while((token = strtok(NULL, "\t"))); return f2p; } @@ -83,6 +84,7 @@ form2pos *form2pos_read(char *filename) /* printf("form = %s signature = %s code = %d\n", form, signature, signature_code); */ hash_add(f2p->h_form2signature, strdup(form), dico_add(f2p->d_signature, signature)); } + fclose(f); return f2p; } diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index 760410cbd36e130e0f78bedf27268195c9a791e1..85acc249c706395b5bcd9a0512765990736c8f9e 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -23,6 +23,7 @@ void context_free(context *ctx) if(ctx->language) free(ctx->language); if(ctx->root_label) free(ctx->root_label); if(ctx->vocabs_filename) free(ctx->vocabs_filename); + if(ctx->fplm_filename) free(ctx->fplm_filename); if (ctx->mcd_struct) mcd_free(ctx->mcd_struct); @@ -62,6 +63,7 @@ context *context_new(void) ctx->features_model_filename = NULL; ctx->vocabs_filename = NULL; ctx->f2p_filename = NULL; + ctx->fplm_filename = NULL; ctx->maca_data_path = NULL; ctx->language = strdup("fr"); diff --git a/maca_trans_parser/src/maca_trans_lemmatizer.c b/maca_trans_parser/src/maca_trans_lemmatizer.c index 6b71bc98401b1cf99efa3af21311af1bf374812e..1044ddad92280eae381b9a806357fc1508f78a02 100644 --- a/maca_trans_parser/src/maca_trans_lemmatizer.c +++ b/maca_trans_parser/src/maca_trans_lemmatizer.c @@ -41,7 +41,7 @@ void maca_lemmatizer_set_linguistic_resources_filenames(context *ctx) } } -char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) +char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode, int *lemma_array_size) { char form[1000]; char pos[1000]; @@ -49,12 +49,13 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) char morpho[1000]; int num = 0; char **lemma_array; - int lemma_array_size = 10000; + //int lemma_array_size = 10000; + *lemma_array_size = 10000; char buffer[10000]; int fields_nb; FILE *f= myfopen(fplm_filename, "r"); - lemma_array = (char **)memalloc(lemma_array_size * sizeof(char *)); + lemma_array = (char **)memalloc((*lemma_array_size) * sizeof(char *)); while(fgets(buffer, 10000, f)){ fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho); @@ -69,11 +70,19 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) } strcat(form, "/"); strcat(form, pos); + // TODO: memory leak: if form is already in the hash, it is not added and the memory + // allocated by strdup() is leaked + // solutions: hash_add does the strdup() if necessary (check else where !) + // or return code to indicate whether form has been added or not hash_add(form_pos_ht, strdup(form), num); - if(num >= lemma_array_size){ - lemma_array_size = 2 * (lemma_array_size) + 1; - lemma_array = realloc(lemma_array, (lemma_array_size) * sizeof(char *)); + if(num >= *lemma_array_size){ + *lemma_array_size = 2 * (*lemma_array_size) + 1; + lemma_array = realloc(lemma_array, (*lemma_array_size) * sizeof(char *)); + // initialize in order to be able to free correctly and the end + for(int i=num; i<*lemma_array_size; ++i) { + lemma_array[i] = NULL; + } } /* if(lemma_array[num] == NULL) */ @@ -81,6 +90,7 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) num++; } /* fprintf(stderr, "%d entries loaded\n", num); */ + fclose(f); return lemma_array; } @@ -159,7 +169,8 @@ int main(int argc, char *argv[]) maca_lemmatizer_check_options(ctx); maca_lemmatizer_set_linguistic_resources_filenames(ctx); - lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode); + int lemma_array_size; + lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode, &lemma_array_size); FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; @@ -179,8 +190,19 @@ int main(int argc, char *argv[]) word_buffer_move_right(c->bf); } + + hash_free(form_pos_ht); + + for(int i=0; i<lemma_array_size; ++i) { + if (lemma_array[i]) free(lemma_array[i]); + } + free(lemma_array); + + config_free(c); + if (ctx->input_filename) fclose(f); context_free(ctx); + return 0; } diff --git a/maca_trans_parser/src/simple_decoder_tagger.c b/maca_trans_parser/src/simple_decoder_tagger.c index 9a50f63a81f68ff20b4d9617c7516349069ac6be..747be8c17e391c14511afd14e21ffa04fced1d1a 100644 --- a/maca_trans_parser/src/simple_decoder_tagger.c +++ b/maca_trans_parser/src/simple_decoder_tagger.c @@ -133,5 +133,8 @@ void simple_decoder_tagger(context *ctx) } /* config_print(stdout, c); */ - config_free(c); + feat_vec_free(fv); + feature_table_free(ft); + config_free(c); + fclose(f); }