Skip to content
Snippets Groups Projects
Commit b71a2896 authored by Mathux's avatar Mathux
Browse files

error tagger

parent db3b262b
No related branches found
No related tags found
No related merge requests found
......@@ -50,6 +50,12 @@ target_link_libraries(maca_trans_tagger_mcf2cff transparse)
target_link_libraries(maca_trans_tagger_mcf2cff maca_common)
install (TARGETS maca_trans_tagger_mcf2cff DESTINATION bin)
add_executable(maca_error_predictor_tagger ./src/maca_error_predictor_tagger.c)
target_link_libraries(maca_error_predictor_tagger perceptron)
target_link_libraries(maca_error_predictor_tagger transparse)
target_link_libraries(maca_error_predictor_tagger maca_common)
install (TARGETS maca_error_predictor_tagger DESTINATION bin)
add_executable(maca_trans_morpho_mcf2cff ./src/maca_trans_morpho_mcf2cff.c)
target_link_libraries(maca_trans_morpho_mcf2cff perceptron)
target_link_libraries(maca_trans_morpho_mcf2cff transparse)
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include<ctype.h>
#include"movement_tagger.h"
#include"oracle_tagger.h"
#include"feat_fct.h"
#include"context.h"
#include"feat_vec.h"
#include"dico_vec.h"
#include"word_emb.h"
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"
#include"mcd.h"
void print_word_simple(word *w, mcd *mcd_struct, dico *dico_pos, int postag)
{
char *buffer = NULL;
char *token = NULL;
int col_nb = 0;
buffer = strdup(w->input);
token = strtok(buffer, "\t");
printf("%s\t%s\t%s\n", token, strtok(NULL, "\t"),dico_int2string(dico_pos, postag));
/*
if(mcd_get_pos_col(mcd_struct) == -1){
printf("%s\t%s\n", w->input, dico_int2string(dico_pos, postag));
}
else{
buffer = strdup(w->input);
token = strtok(buffer, "\t");
col_nb = 0;
while(token){
if(col_nb != 0) printf("\t");
if(col_nb == mcd_get_pos_col(mcd_struct))
printf("%s", dico_int2string(dico_pos, postag));
else
word_print_col_n(stdout, w, col_nb);
col_nb++;
token = strtok(NULL, "\t");
}
if(col_nb <= mcd_get_pos_col(mcd_struct))
printf("\t%s", dico_int2string(dico_pos, postag));
printf("\n");
free(buffer);
}*/
}
void add_signature_to_words_in_word_buffer_tagger(word_buffer *bf, form2pos *f2p)
{
int i;
word *w;
char lower_form[1000];
for(i = word_buffer_get_nbelem(bf) - 1; i >=0 ; i--){
w = word_buffer_get_word_n(bf, i);
if(word_get_signature(w) != -1) break;
w->signature = form2pos_get_signature(f2p, w->form);
if(w->signature == -1){
if(w->form){
strcpy(lower_form, w->form);
to_lower_string(lower_form);
w->signature = form2pos_get_signature(f2p, lower_form);
}
}
}
}
void maca_error_predictor_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
context_mcd_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
}
void maca_error_predictor_check_options(context *ctx)
{
if(0 /*!ctx->input_filename
|| ctx->help
/ || !ctx->mcd_filename /
|| !(ctx->cff_filename || ctx->fann_filename)
*/){
maca_error_predictor_help_message(ctx);
exit(1);
}
}
int config_is_equal_tagger(config *c1, config *c2)
{
return ((bm1p(c1)==bm1p(c2))&&(bm2p(c1)==bm2p(c2))&&(bm3p(c1)==bm3p(c2)));
}
void generate_error_train(FILE *output_file, context *ctx)
{
config *config_oracle;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
feat_vec *fv_oracle = feat_vec_new(feature_types_nb);
FILE *conll_file_oracle = myfopen(ctx->input_filename, "r");
int postag_oracle;
float max;
word *b0;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
config *config_predicted;
feat_vec *fv_predicted = feat_vec_new(feature_types_nb);
FILE *conll_file_predicted = myfopen(ctx->input_filename, "r");
int postag_predicted;
config_oracle = config_new(conll_file_oracle, ctx->mcd_struct, 5);
config_predicted = config_new(conll_file_predicted, ctx->mcd_struct, 5);
while(!config_is_terminal(config_oracle)){
/*if(ctx->f2p){
add_signature_to_words_in_word_buffer_tagger(config_oracle->bf, ctx->f2p);
add_signature_to_words_in_word_buffer_tagger(config_predicted->bf, ctx->f2p);
}*/
// oracle
config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE);
postag_oracle = oracle_tagger(config_oracle);
printf("Oracle : ");
print_word_simple(word_buffer_b0(config_oracle->bf), ctx->mcd_struct, dico_pos, postag_oracle);
// predicted
b0 = word_buffer_b0(config_predicted->bf);
config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv_predicted, LOOKUP_MODE);
postag_predicted = feature_table_argmax(fv_predicted, ft, &max);
printf("Predicted : ");
print_word_simple(b0, ctx->mcd_struct, dico_pos, postag_predicted);
if(1){
vcode *vcode_array = feature_table_get_vcode_array(fv_predicted, ft);
for(int i=0; i < 3; i++){
fprintf(stdout, "%d\t", i);
fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_array[i].class_code), vcode_array[i].score);
}
free(vcode_array);
}
if (postag_oracle!=postag_predicted)
fprintf(stdout, "**************** DIFFERENT CHOICE ***********\n\n");
else
fprintf(stdout, "**************** EQUAL CHOICE ***********\n\n");
movement_tagger(config_oracle, postag_oracle);
movement_tagger(config_predicted, postag_predicted);
fprintf(output_file, "%d", ((config_is_equal_tagger(config_oracle, config_predicted)) ? 1 : 0));
fprintf(output_file, " or : %d, pred : %d", postag_oracle,postag_predicted);
feat_vec_print(output_file, fv_predicted);
//word_set_pos(word_buffer_bm1(config_predicted->bf), postag_oracle);
}
feat_vec_free(fv_oracle);
feat_vec_free(fv_predicted);
feature_table_free(ft);
config_free(config_oracle);
config_free(config_predicted);
fclose(conll_file_oracle);
fclose(conll_file_predicted);
}
int main(int argc, char *argv[])
{
context *ctx;
FILE *output_file;
ctx = context_read_options(argc, argv);
//maca_error_predictor_check_options(ctx);
ctx->perc_model_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.model" ;
ctx->features_model_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.fm" ;
ctx->input_filename = "/home/mathis/maca_data2/fr/data/treebank/dev.conll07";
ctx->f2p_filename = "/home/mathis/maca_data2/fr/bin/fP";
ctx->vocabs_filename = "/home/mathis/maca_data2/fr/bin/maca_trans_tagger.vocab";
ctx->cff_filename = "/home/mathis/test/stage/error.cff";
ctx->mcd_struct = mcd_build_conll07();
//decode_tagger_set_linguistic_resources_filenames(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), 1);//ctx->verbose);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->input_filename);
/* add the feature dictionnary to the dico vector */
dico_vec_add(ctx->vocabs, ctx->d_perceptron_features);
/* open output file */
if(ctx->cff_filename)
output_file = myfopen(ctx->cff_filename, "w");
else
output_file = stdout;
generate_error_train(output_file,ctx);
if(ctx->cff_filename)
fclose(output_file);
//context_free(ctx);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment