Skip to content
Snippets Groups Projects
Commit 568cf30a authored by Alexis Nasr's avatar Alexis Nasr
Browse files

modified maca_trans_lemmatizer so that it works when no morphological rules...

modified maca_trans_lemmatizer so that it works when no morphological rules are supplied, in which case it only uses the exception lexicon
parent 5c81f3ea
No related branches found
No related tags found
No related merge requests found
...@@ -119,7 +119,7 @@ int main(int argc, char *argv[]) ...@@ -119,7 +119,7 @@ int main(int argc, char *argv[])
int l_rule_code; int l_rule_code;
char *l_rule; char *l_rule;
float max; float max;
feature_table *ft = NULL;
maca_lemmatizer_check_options(ctx); maca_lemmatizer_check_options(ctx);
maca_lemmatizer_set_linguistic_resources_filenames(ctx); maca_lemmatizer_set_linguistic_resources_filenames(ctx);
...@@ -131,8 +131,17 @@ int main(int argc, char *argv[]) ...@@ -131,8 +131,17 @@ int main(int argc, char *argv[])
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
if(d_l_rules->nbelem){
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose); ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
}
else{
if(ctx->verbose)
fprintf(stderr, "no morphological rules loaded\n");
ctx->d_perceptron_features = NULL;
ft = NULL;
}
c = config_new(f, ctx->mcd_struct, 5); c = config_new(f, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){ while(!config_is_terminal(c)){
...@@ -152,6 +161,10 @@ int main(int argc, char *argv[]) ...@@ -152,6 +161,10 @@ int main(int argc, char *argv[])
print_word(b0, ctx->mcd_struct, lemma_from_fplm); print_word(b0, ctx->mcd_struct, lemma_from_fplm);
} }
// if lemma is not found in exception file, predict an l_rule // if lemma is not found in exception file, predict an l_rule
else{
if(ft == NULL){ /* no rule model just print the form as a lemma */
print_word(b0, ctx->mcd_struct, form);
}
else{ else{
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
// feat_vec_print_string(fv, ctx->d_perceptron_features); // feat_vec_print_string(fv, ctx->d_perceptron_features);
...@@ -182,17 +195,17 @@ int main(int argc, char *argv[]) ...@@ -182,17 +195,17 @@ int main(int argc, char *argv[])
if(i == 10){ if(i == 10){
print_word(b0, ctx->mcd_struct, form); print_word(b0, ctx->mcd_struct, form);
} }
free(vcode_array); free(vcode_array);
} }
} }
}
word_buffer_move_right(c->bf); word_buffer_move_right(c->bf);
} }
config_free(c); config_free(c);
if (ctx->input_filename) fclose(f); if (ctx->input_filename) fclose(f);
context_free(ctx); context_free(ctx);
fplm_free(exceptions); fplm_free(exceptions);
feature_table_free(ft); if(ft) feature_table_free(ft);
return 0; return 0;
} }
......
...@@ -171,15 +171,17 @@ int main(int argc, char *argv[]) ...@@ -171,15 +171,17 @@ int main(int argc, char *argv[])
fplm_struct *exceptions; fplm_struct *exceptions;
ctx = context_read_options(argc, argv); ctx = context_read_options(argc, argv);
// decode_lemmatizer_set_linguistic_resources_filenames(ctx); // decode_lemmatizer_set_linguistic_resources_filenames(ctx);
maca_trans_lemmatizer_mcf2cff_check_options(ctx); maca_trans_lemmatizer_mcf2cff_check_options(ctx);
exceptions = fplm_load_file(ctx->fplm_filename, ctx->verbose); exceptions = fplm_load_file(ctx->fplm_filename, ctx->verbose);
d_l_rules = dico_read(ctx->l_rules_filename, 0.5); d_l_rules = dico_read(ctx->l_rules_filename, 0.5);
if(d_l_rules->nbelem == 0){
/* do not produce cff file when the rule file is empty */
/* exit(1);*/
}
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
if(ctx->mode == TRAIN_MODE){ if(ctx->mode == TRAIN_MODE){
...@@ -203,22 +205,25 @@ int main(int argc, char *argv[]) ...@@ -203,22 +205,25 @@ int main(int argc, char *argv[])
/* add the feature dictionnary to the dico vector */ /* add the feature dictionnary to the dico vector */
dico_vec_add(ctx->vocabs, ctx->d_perceptron_features); dico_vec_add(ctx->vocabs, ctx->d_perceptron_features);
/* open output file */ /* open output file */
if(ctx->cff_filename) if(ctx->cff_filename)
output_file = myfopen(ctx->cff_filename, "w"); output_file = myfopen(ctx->cff_filename, "w");
else else
output_file = stdout; output_file = stdout;
if(d_l_rules->nbelem)
generate_training_file(output_file, ctx, d_l_rules, exceptions); generate_training_file(output_file, ctx, d_l_rules, exceptions);
if(ctx->cff_filename)
fclose(output_file);
if(ctx->mode == TRAIN_MODE){ if(ctx->mode == TRAIN_MODE){
/* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */ /* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */
dico_vec_print(ctx->vocabs_filename, ctx->vocabs); dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
} }
if(ctx->cff_filename)
fclose(output_file);
context_free(ctx); context_free(ctx);
return 0; return 0;
} }
......
...@@ -35,12 +35,12 @@ int main(int argc, char *argv[]) ...@@ -35,12 +35,12 @@ int main(int argc, char *argv[])
train_check_options(ctx); train_check_options(ctx);
look_for_number_of_features_and_classes(ctx->cff_filename, &nb_feat, &nb_class); look_for_number_of_features_and_classes(ctx->cff_filename, &nb_feat, &nb_class);
if(nb_class > 1){
ft = feature_table_new(nb_feat, nb_class); ft = feature_table_new(nb_feat, nb_class);
fprintf(stderr, "table allocated (%d x %d)\n", nb_feat, nb_class); fprintf(stderr, "table allocated (%d x %d)\n", nb_feat, nb_class);
perceptron_avg(ctx->cff_filename, ft, ctx->iteration_nb); perceptron_avg(ctx->cff_filename, ft, ctx->iteration_nb);
feature_table_dump(ctx->perc_model_filename, ft); feature_table_dump(ctx->perc_model_filename, ft);
}
perceptron_context_free(ctx); perceptron_context_free(ctx);
return 0; return 0;
......
...@@ -56,8 +56,11 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int ...@@ -56,8 +56,11 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int
FILE *f = fopen(filename, "r"); FILE *f = fopen(filename, "r");
char *token; char *token;
int nb; int nb;
*max_feat = 0; *max_feat = 0;
*max_class = 0; *max_class = 0;
if(f == NULL)
return;
while(fgets(buffer, 10000, f)){ while(fgets(buffer, 10000, f)){
buffer[strlen(buffer) - 1] = '\0'; buffer[strlen(buffer) - 1] = '\0';
token = strtok(buffer, "\t"); token = strtok(buffer, "\t");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment