Skip to content
Snippets Groups Projects
Commit 26193d70 authored by Mathux's avatar Mathux
Browse files

maca_error_predictor_tagger works

parent b71a2896
No related branches found
No related tags found
No related merge requests found
......@@ -37,7 +37,10 @@
#define DEFAULT_MODEL_PARSER_NN_FILENAME "maca_trans_parser_nn.weights"
#define DEFAULT_JSON_PARSER_NN_FILENAME "maca_trans_parser_nn.json"
#define DEFAULT_PATH_RELAT "../data/treebank/"
#define DEFAULT_CONLL07_DEV "dev.conll07"
#define DEFAULT_CONLL07_TRAIN "train.conll07"
#define DEFAULT_CONLL07_TEST "test.conll07"
#define DEFAULT_F2P_FILENAME "fP"
#define DEFAULT_FPLM_FILENAME "fplm"
......
......@@ -72,29 +72,24 @@ void add_signature_to_words_in_word_buffer_tagger(word_buffer *bf, form2pos *f2p
void maca_error_predictor_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
//context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
context_mcd_help_message(ctx);
//context_mcd_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
fprintf(stderr, "\t-i --input <file> : input is in conll07 format (default is dev.conll07)\n");
//fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "\t-x --cff <file> : CFF format file name (default is stdout)\n");
//fprintf(stderr, "IN TRAIN MODE\n");
//context_vocabs_help_message(ctx);
}
void maca_error_predictor_check_options(context *ctx)
{
if(0 /*!ctx->input_filename
|| ctx->help
/ || !ctx->mcd_filename /
|| !(ctx->cff_filename || ctx->fann_filename)
*/){
if(ctx->help){
maca_error_predictor_help_message(ctx);
exit(1);
}
......@@ -106,55 +101,58 @@ int config_is_equal_tagger(config *c1, config *c2)
return ((bm1p(c1)==bm1p(c2))&&(bm2p(c1)==bm2p(c2))&&(bm3p(c1)==bm3p(c2)));
}
void generate_error_train(FILE *output_file, context *ctx)
{
config *config_oracle;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
feat_vec *fv_oracle = feat_vec_new(feature_types_nb);
FILE *conll_file_oracle = myfopen(ctx->input_filename, "r");
int postag_oracle;
float max;
word *b0;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
config *config_predicted;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
feat_vec *fv_predicted = feat_vec_new(feature_types_nb);
FILE *conll_file_predicted = myfopen(ctx->input_filename, "r");
int postag_predicted;
float max;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
config_oracle = config_new(conll_file_oracle, ctx->mcd_struct, 5);
config_predicted = config_new(conll_file_predicted, ctx->mcd_struct, 5);
while(!config_is_terminal(config_oracle)){
/*if(ctx->f2p){
add_signature_to_words_in_word_buffer_tagger(config_oracle->bf, ctx->f2p);
if(ctx->f2p){
add_signature_to_words_in_word_buffer_tagger(config_predicted->bf, ctx->f2p);
}*/
add_signature_to_words_in_word_buffer_tagger(config_oracle->bf, ctx->f2p);
}
// oracle
config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE);
postag_oracle = oracle_tagger(config_oracle);
if(ctx->debug_mode){
printf("Oracle : ");
print_word_simple(word_buffer_b0(config_oracle->bf), ctx->mcd_struct, dico_pos, postag_oracle);
}
// predicted
b0 = word_buffer_b0(config_predicted->bf);
config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv_predicted, LOOKUP_MODE);
postag_predicted = feature_table_argmax(fv_predicted, ft, &max);
if(ctx->debug_mode){
printf("Predicted : ");
print_word_simple(b0, ctx->mcd_struct, dico_pos, postag_predicted);
}
if(1){
if(ctx->debug_mode){
vcode *vcode_array = feature_table_get_vcode_array(fv_predicted, ft);
for(int i=0; i < 3; i++){
fprintf(stdout, "%d\t", i);
fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_array[i].class_code), vcode_array[i].score);
printf("%d\t", i);
printf("%s\t%.4f\n", dico_int2string(dico_pos, vcode_array[i].class_code), vcode_array[i].score);
}
free(vcode_array);
}
if (postag_oracle!=postag_predicted)
fprintf(stdout, "**************** DIFFERENT CHOICE ***********\n\n");
......@@ -162,16 +160,17 @@ void generate_error_train(FILE *output_file, context *ctx)
else
fprintf(stdout, "**************** EQUAL CHOICE ***********\n\n");
}
movement_tagger(config_oracle, postag_oracle);
movement_tagger(config_predicted, postag_predicted);
if(!ctx->debug_mode || output_file!=stdout) {
fprintf(output_file, "%d", ((config_is_equal_tagger(config_oracle, config_predicted)) ? 1 : 0));
fprintf(output_file, " or : %d, pred : %d", postag_oracle,postag_predicted);
feat_vec_print(output_file, fv_predicted);
//word_set_pos(word_buffer_bm1(config_predicted->bf), postag_oracle);
}
}
feat_vec_free(fv_oracle);
feat_vec_free(fv_predicted);
feature_table_free(ft);
......@@ -183,6 +182,54 @@ void generate_error_train(FILE *output_file, context *ctx)
}
void error_tagger_set_linguistic_resources_filename(context *ctx)
{
char absolute_filename[500];
if(!ctx->perc_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename);
}
if(!ctx->vocabs_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename);
}
if(!ctx->input_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_PATH_RELAT);
strcat(absolute_filename, DEFAULT_CONLL07_DEV);
ctx->input_filename = strdup(absolute_filename);
}
if(!ctx->cff_filename){
//printf("cff -> stdout\n")
}
if(!ctx->features_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_TAGGER_FILENAME);
ctx->features_model_filename = strdup(absolute_filename);
}
if(!ctx->f2p_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_F2P_FILENAME);
ctx->f2p_filename = strdup(absolute_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
fprintf(stderr, "input_filename = %s\n", ctx->f2p_filename);
}
}
int main(int argc, char *argv[])
......@@ -191,20 +238,13 @@ int main(int argc, char *argv[])
FILE *output_file;
ctx = context_read_options(argc, argv);
//maca_error_predictor_check_options(ctx);
ctx->perc_model_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.model" ;
ctx->features_model_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.fm" ;
ctx->input_filename = "/home/mathis/maca_data2/fr/data/treebank/dev.conll07";
ctx->f2p_filename = "/home/mathis/maca_data2/fr/bin/fP";
ctx->vocabs_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.vocab";
ctx->cff_filename = "/home/mathis/test/stage/error.cff";
error_tagger_set_linguistic_resources_filename(ctx);
ctx->f2p = form2pos_read(ctx->f2p_filename);
maca_error_predictor_check_options(ctx);
/* load ctx */
ctx->mcd_struct = mcd_build_conll07();
//decode_tagger_set_linguistic_resources_filenames(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), 1);//ctx->verbose);
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
......@@ -225,7 +265,7 @@ int main(int argc, char *argv[])
if(ctx->cff_filename)
fclose(output_file);
//context_free(ctx);
context_free(ctx);
return 0;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment