Skip to content
Snippets Groups Projects
Commit 26193d70 authored by Mathux's avatar Mathux
Browse files

maca_error_predictor_tagger works

parent b71a2896
No related branches found
No related tags found
No related merge requests found
...@@ -37,7 +37,10 @@ ...@@ -37,7 +37,10 @@
#define DEFAULT_MODEL_PARSER_NN_FILENAME "maca_trans_parser_nn.weights" #define DEFAULT_MODEL_PARSER_NN_FILENAME "maca_trans_parser_nn.weights"
#define DEFAULT_JSON_PARSER_NN_FILENAME "maca_trans_parser_nn.json" #define DEFAULT_JSON_PARSER_NN_FILENAME "maca_trans_parser_nn.json"
#define DEFAULT_PATH_RELAT "../data/treebank/"
#define DEFAULT_CONLL07_DEV "dev.conll07"
#define DEFAULT_CONLL07_TRAIN "train.conll07"
#define DEFAULT_CONLL07_TEST "test.conll07"
#define DEFAULT_F2P_FILENAME "fP" #define DEFAULT_F2P_FILENAME "fP"
#define DEFAULT_FPLM_FILENAME "fplm" #define DEFAULT_FPLM_FILENAME "fplm"
......
...@@ -72,29 +72,24 @@ void add_signature_to_words_in_word_buffer_tagger(word_buffer *bf, form2pos *f2p ...@@ -72,29 +72,24 @@ void add_signature_to_words_in_word_buffer_tagger(word_buffer *bf, form2pos *f2p
void maca_error_predictor_help_message(context *ctx) void maca_error_predictor_help_message(context *ctx)
{ {
context_general_help_message(ctx); context_general_help_message(ctx);
context_mode_help_message(ctx); //context_mode_help_message(ctx);
context_sent_nb_help_message(ctx); context_sent_nb_help_message(ctx);
context_mcd_help_message(ctx); //context_mcd_help_message(ctx);
fprintf(stderr, "INPUT\n"); fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx); fprintf(stderr, "\t-i --input <file> : input is in conll07 format (default is dev.conll07)\n");
fprintf(stderr, "IN TEST MODE\n"); //fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx); context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n"); fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx); fprintf(stderr, "\t-x --cff <file> : CFF format file name (default is stdout)\n");
fprintf(stderr, "IN TRAIN MODE\n"); //fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx); //context_vocabs_help_message(ctx);
} }
void maca_error_predictor_check_options(context *ctx) void maca_error_predictor_check_options(context *ctx)
{ {
if(0 /*!ctx->input_filename if(ctx->help){
|| ctx->help
/ || !ctx->mcd_filename /
|| !(ctx->cff_filename || ctx->fann_filename)
*/){
maca_error_predictor_help_message(ctx); maca_error_predictor_help_message(ctx);
exit(1); exit(1);
} }
...@@ -106,55 +101,58 @@ int config_is_equal_tagger(config *c1, config *c2) ...@@ -106,55 +101,58 @@ int config_is_equal_tagger(config *c1, config *c2)
return ((bm1p(c1)==bm1p(c2))&&(bm2p(c1)==bm2p(c2))&&(bm3p(c1)==bm3p(c2))); return ((bm1p(c1)==bm1p(c2))&&(bm2p(c1)==bm2p(c2))&&(bm3p(c1)==bm3p(c2)));
} }
void generate_error_train(FILE *output_file, context *ctx) void generate_error_train(FILE *output_file, context *ctx)
{ {
config *config_oracle; config *config_oracle;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
feat_vec *fv_oracle = feat_vec_new(feature_types_nb); feat_vec *fv_oracle = feat_vec_new(feature_types_nb);
FILE *conll_file_oracle = myfopen(ctx->input_filename, "r"); FILE *conll_file_oracle = myfopen(ctx->input_filename, "r");
int postag_oracle; int postag_oracle;
float max;
word *b0; word *b0;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
config *config_predicted; config *config_predicted;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
feat_vec *fv_predicted = feat_vec_new(feature_types_nb); feat_vec *fv_predicted = feat_vec_new(feature_types_nb);
FILE *conll_file_predicted = myfopen(ctx->input_filename, "r"); FILE *conll_file_predicted = myfopen(ctx->input_filename, "r");
int postag_predicted; int postag_predicted;
float max;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
config_oracle = config_new(conll_file_oracle, ctx->mcd_struct, 5); config_oracle = config_new(conll_file_oracle, ctx->mcd_struct, 5);
config_predicted = config_new(conll_file_predicted, ctx->mcd_struct, 5); config_predicted = config_new(conll_file_predicted, ctx->mcd_struct, 5);
while(!config_is_terminal(config_oracle)){ while(!config_is_terminal(config_oracle)){
/*if(ctx->f2p){ if(ctx->f2p){
add_signature_to_words_in_word_buffer_tagger(config_oracle->bf, ctx->f2p);
add_signature_to_words_in_word_buffer_tagger(config_predicted->bf, ctx->f2p); add_signature_to_words_in_word_buffer_tagger(config_predicted->bf, ctx->f2p);
}*/ add_signature_to_words_in_word_buffer_tagger(config_oracle->bf, ctx->f2p);
}
// oracle // oracle
config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE); config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE);
postag_oracle = oracle_tagger(config_oracle); postag_oracle = oracle_tagger(config_oracle);
if(ctx->debug_mode){
printf("Oracle : "); printf("Oracle : ");
print_word_simple(word_buffer_b0(config_oracle->bf), ctx->mcd_struct, dico_pos, postag_oracle); print_word_simple(word_buffer_b0(config_oracle->bf), ctx->mcd_struct, dico_pos, postag_oracle);
}
// predicted // predicted
b0 = word_buffer_b0(config_predicted->bf); b0 = word_buffer_b0(config_predicted->bf);
config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv_predicted, LOOKUP_MODE); config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv_predicted, LOOKUP_MODE);
postag_predicted = feature_table_argmax(fv_predicted, ft, &max); postag_predicted = feature_table_argmax(fv_predicted, ft, &max);
if(ctx->debug_mode){
printf("Predicted : "); printf("Predicted : ");
print_word_simple(b0, ctx->mcd_struct, dico_pos, postag_predicted); print_word_simple(b0, ctx->mcd_struct, dico_pos, postag_predicted);
}
if(1){ if(ctx->debug_mode){
vcode *vcode_array = feature_table_get_vcode_array(fv_predicted, ft); vcode *vcode_array = feature_table_get_vcode_array(fv_predicted, ft);
for(int i=0; i < 3; i++){ for(int i=0; i < 3; i++){
fprintf(stdout, "%d\t", i); printf("%d\t", i);
fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_array[i].class_code), vcode_array[i].score); printf("%s\t%.4f\n", dico_int2string(dico_pos, vcode_array[i].class_code), vcode_array[i].score);
} }
free(vcode_array); free(vcode_array);
}
if (postag_oracle!=postag_predicted) if (postag_oracle!=postag_predicted)
fprintf(stdout, "**************** DIFFERENT CHOICE ***********\n\n"); fprintf(stdout, "**************** DIFFERENT CHOICE ***********\n\n");
...@@ -162,16 +160,17 @@ void generate_error_train(FILE *output_file, context *ctx) ...@@ -162,16 +160,17 @@ void generate_error_train(FILE *output_file, context *ctx)
else else
fprintf(stdout, "**************** EQUAL CHOICE ***********\n\n"); fprintf(stdout, "**************** EQUAL CHOICE ***********\n\n");
}
movement_tagger(config_oracle, postag_oracle); movement_tagger(config_oracle, postag_oracle);
movement_tagger(config_predicted, postag_predicted); movement_tagger(config_predicted, postag_predicted);
if(!ctx->debug_mode || output_file!=stdout) {
fprintf(output_file, "%d", ((config_is_equal_tagger(config_oracle, config_predicted)) ? 1 : 0)); fprintf(output_file, "%d", ((config_is_equal_tagger(config_oracle, config_predicted)) ? 1 : 0));
fprintf(output_file, " or : %d, pred : %d", postag_oracle,postag_predicted);
feat_vec_print(output_file, fv_predicted); feat_vec_print(output_file, fv_predicted);
//word_set_pos(word_buffer_bm1(config_predicted->bf), postag_oracle); }
} }
feat_vec_free(fv_oracle); feat_vec_free(fv_oracle);
feat_vec_free(fv_predicted); feat_vec_free(fv_predicted);
feature_table_free(ft); feature_table_free(ft);
...@@ -183,6 +182,54 @@ void generate_error_train(FILE *output_file, context *ctx) ...@@ -183,6 +182,54 @@ void generate_error_train(FILE *output_file, context *ctx)
} }
void error_tagger_set_linguistic_resources_filename(context *ctx)
{
char absolute_filename[500];
if(!ctx->perc_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename);
}
if(!ctx->vocabs_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename);
}
if(!ctx->input_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_PATH_RELAT);
strcat(absolute_filename, DEFAULT_CONLL07_DEV);
ctx->input_filename = strdup(absolute_filename);
}
if(!ctx->cff_filename){
//printf("cff -> stdout\n")
}
if(!ctx->features_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_TAGGER_FILENAME);
ctx->features_model_filename = strdup(absolute_filename);
}
if(!ctx->f2p_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_F2P_FILENAME);
ctx->f2p_filename = strdup(absolute_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
fprintf(stderr, "input_filename = %s\n", ctx->f2p_filename);
}
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
...@@ -191,20 +238,13 @@ int main(int argc, char *argv[]) ...@@ -191,20 +238,13 @@ int main(int argc, char *argv[])
FILE *output_file; FILE *output_file;
ctx = context_read_options(argc, argv); ctx = context_read_options(argc, argv);
//maca_error_predictor_check_options(ctx); error_tagger_set_linguistic_resources_filename(ctx);
ctx->f2p = form2pos_read(ctx->f2p_filename);
ctx->perc_model_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.model" ; maca_error_predictor_check_options(ctx);
ctx->features_model_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.fm" ;
ctx->input_filename = "/home/mathis/maca_data2/fr/data/treebank/dev.conll07";
ctx->f2p_filename = "/home/mathis/maca_data2/fr/bin/fP";
ctx->vocabs_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.vocab";
ctx->cff_filename = "/home/mathis/test/stage/error.cff";
/* load ctx */
ctx->mcd_struct = mcd_build_conll07(); ctx->mcd_struct = mcd_build_conll07();
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
//decode_tagger_set_linguistic_resources_filenames(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), 1);//ctx->verbose);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
...@@ -225,7 +265,7 @@ int main(int argc, char *argv[]) ...@@ -225,7 +265,7 @@ int main(int argc, char *argv[])
if(ctx->cff_filename) if(ctx->cff_filename)
fclose(output_file); fclose(output_file);
//context_free(ctx); context_free(ctx);
return 0; return 0;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment