Skip to content
Snippets Groups Projects
Commit 9d6b7ba3 authored by Mathux's avatar Mathux
Browse files

error_predictor parser

parent 4cef9d6e
Branches
No related tags found
No related merge requests found
......@@ -12,6 +12,7 @@ set(SOURCES src/context.c
# src/simple_decoder_parser.c
src/simple_decoder_parser_arc_eager.c
src/simple_decoder_tagparser_arc_eager.c
src/simple_decoder_parser_arc_eager_error_predictor.c
# src/simple_decoder_forrest.c
src/simple_decoder_tagger.c
src/simple_decoder_tagger_error_predictor.c
......@@ -57,6 +58,12 @@ target_link_libraries(maca_error_predictor_tagger_mcf2cff transparse)
target_link_libraries(maca_error_predictor_tagger_mcf2cff maca_common)
install (TARGETS maca_error_predictor_tagger_mcf2cff DESTINATION bin)
add_executable(maca_error_predictor_parser_arc_eager_mcf2cff ./src/maca_error_predictor_parser_arc_eager_mcf2cff.c)
target_link_libraries(maca_error_predictor_parser_arc_eager_mcf2cff perceptron)
target_link_libraries(maca_error_predictor_parser_arc_eager_mcf2cff transparse)
target_link_libraries(maca_error_predictor_parser_arc_eager_mcf2cff maca_common)
install (TARGETS maca_error_predictor_parser_arc_eager_mcf2cff DESTINATION bin)
add_executable(maca_trans_morpho_mcf2cff ./src/maca_trans_morpho_mcf2cff.c)
target_link_libraries(maca_trans_morpho_mcf2cff perceptron)
target_link_libraries(maca_trans_morpho_mcf2cff transparse)
......@@ -105,6 +112,12 @@ target_link_libraries(maca_trans_parser transparse)
target_link_libraries(maca_trans_parser maca_common)
install (TARGETS maca_trans_parser DESTINATION bin)
add_executable(maca_error_predictor_parser ./src/maca_error_predictor_parser.c)
target_link_libraries(maca_error_predictor_parser perceptron)
target_link_libraries(maca_error_predictor_parser transparse)
target_link_libraries(maca_error_predictor_parser maca_common)
install (TARGETS maca_error_predictor_parser DESTINATION bin)
add_executable(maca_trans_tagparser ./src/maca_trans_tagparser.c)
target_link_libraries(maca_trans_tagparser perceptron)
target_link_libraries(maca_trans_tagparser transparse)
......
......@@ -24,9 +24,7 @@ void context_free(context *ctx)
if(ctx->root_label) free(ctx->root_label);
if(ctx->vocabs_filename) free(ctx->vocabs_filename);
if(ctx->fplm_filename) free(ctx->fplm_filename);
if(ctx->json_filename) free(ctx->json_filename);
if(ctx->dnn_model_filename) free(ctx->dnn_model_filename);
if (ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
......@@ -34,6 +32,7 @@ void context_free(context *ctx)
if (ctx->mcd_struct_error)
mcd_free(ctx->mcd_struct_error);
if (ctx->vocabs)
dico_vec_free(ctx->vocabs);
......@@ -45,7 +44,6 @@ void context_free(context *ctx)
if(ctx->d_perceptron_features_error)
dico_free(ctx->d_perceptron_features_error);
/*
if(ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
......@@ -54,12 +52,11 @@ void context_free(context *ctx)
feat_model_free(ctx->features_model);
if(ctx->features_model_error)
feat_model_free(ctx->features_model);
feat_model_free(ctx->features_model_error);
if(ctx->f2p)
form2pos_free(ctx->f2p);
free(ctx);
}
......@@ -67,6 +64,7 @@ context *context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->force = 0;
ctx->verbose = 0;
ctx->program_name = NULL;
ctx->input_filename = NULL;
......@@ -207,9 +205,10 @@ context *context_read_options(int argc, char *argv[])
ctx->program_name = strdup(argv[0]);
static struct option long_options[26] =
static struct option long_options[27] =
{
{"help", no_argument, 0, 'h'},
{"force", no_argument, 0, 'K'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"conll", no_argument, 0, 'c'},
......@@ -240,12 +239,15 @@ context *context_read_options(int argc, char *argv[])
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdcSTm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:J:N:w:l:", long_options, &option_index)) != -1){
while ((c = getopt_long (argc, argv, "hKvdcSTm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:J:N:w:l:", long_options, &option_index)) != -1){
switch (c)
{
case 'h':
ctx->help = 1;
break;
case 'K' :
ctx->force = 1;
break;
case 'v':
ctx->verbose = 1;
break;
......
......@@ -14,10 +14,16 @@
#define DEFAULT_VOCABS_TAGGER_FILENAME "maca_trans_tagger.vocab"
#define DEFAULT_MODEL_TAGGER_FILENAME "maca_trans_tagger.model"
#define DEFAULT_MULTI_COL_DESC_TAGGER_ERROR_PREDICTOR_FILENAME "maca_error_predictor_tagger.mcd"
#define DEFAULT_FEATURES_MODEL_TAGGER_ERROR_PREDICTOR_FILENAME "maca_error_predictor_tagger.fm"
#define DEFAULT_VOCABS_TAGGER_ERROR_PREDICTOR_FILENAME "maca_error_predictor_tagger.vocab"
#define DEFAULT_MODEL_TAGGER_ERROR_PREDICTOR_FILENAME "maca_error_predictor_tagger.model"
#define DEFAULT_MULTI_COL_DESC_PARSER_ERROR_PREDICTOR_FILENAME "maca_error_predictor_parser.mcd"
#define DEFAULT_FEATURES_MODEL_PARSER_ERROR_PREDICTOR_FILENAME "maca_error_predictor_parser.fm"
#define DEFAULT_VOCABS_PARSER_ERROR_PREDICTOR_FILENAME "maca_error_predictor_parser.vocab"
#define DEFAULT_MODEL_PARSER_ERROR_PREDICTOR_FILENAME "maca_error_predictor_parser.model"
#define DEFAULT_MULTI_COL_DESC_LEMMATIZER_FILENAME "maca_trans_lemmatizer.mcd"
#define DEFAULT_FEATURES_MODEL_LEMMATIZER_FILENAME "maca_trans_lemmatizer.fm"
#define DEFAULT_VOCABS_LEMMATIZER_FILENAME "maca_trans_lemmatizer.vocab"
......@@ -57,6 +63,7 @@
typedef struct {
int help;
int force;
char *program_name;
char *input_filename;
char *perc_model_filename;
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"feat_fct.h"
#include"feature_table.h"
#include"dico.h"
#include"beam.h"
#include"form2pos.h"
#include"simple_decoder_parser_arc_eager_error_predictor.h"
#include"config2feat_vec.h"
void decode_parser_help_message(context *ctx)
{
context_general_help_message(ctx);
context_beam_help_message(ctx);
context_conll_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_input_help_message(ctx);
context_mcd_help_message(ctx);
context_model_help_message(ctx);
context_vocabs_help_message(ctx);
context_features_model_help_message(ctx);
context_f2p_filename_help_message(ctx);
}
void decode_parser_check_options(context *ctx){
if(ctx->help
/*!ctx->conll_filename*/
/* || !ctx->perc_model_filename
|| !ctx->mcd_filename
|| !ctx->vocabs_filename
|| !ctx->features_model_filename*/
){
decode_parser_help_message(ctx);
exit(1);
}
}
void decode_parser_set_linguistic_resources_filenames(context *ctx)
{
char absolute_filename[500];
if(!ctx->perc_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MODEL_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename);
}
if(!ctx->vocabs_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_VOCABS_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename);
}
if(!ctx->l_rules_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_PARSER_ERROR_PREDICTOR_FILENAME);
ctx->l_rules_filename = strdup(absolute_filename);
}
if(!ctx->fann_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_VOCABS_PARSER_ERROR_PREDICTOR_FILENAME);
ctx->fann_filename = strdup(absolute_filename);
}
if(!ctx->features_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME);
ctx->features_model_filename = strdup(absolute_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
}
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
decode_parser_check_options(ctx);
decode_parser_set_linguistic_resources_filenames(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
ctx->vocabs_error = dico_vec_read(ctx->fann_filename, ctx->hash_ratio);
ctx->features_model_error = feat_model_read(ctx->l_rules_filename, feat_lib_build(), ctx->verbose);
ctx->mcd_struct_error = mcd_copy(ctx->mcd_struct);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
mcd_link_to_dico(ctx->mcd_struct_error, ctx->vocabs_error, ctx->verbose);
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
ctx->d_perceptron_features_error = dico_vec_get_dico(ctx->vocabs_error, (char *)"d_perceptron_features");
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
if(ctx->dico_labels == NULL){
fprintf(stderr, "cannot find label names\n");
return 1;
}
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3;
char perc_error_filename[500];
strcpy(perc_error_filename, ctx->maca_data_path);
strcat(perc_error_filename, DEFAULT_MODEL_PARSER_ERROR_PREDICTOR_FILENAME);
simple_decoder_parser_arc_eager_error_predictor(ctx, perc_error_filename);
context_free(ctx);
return 0;
}
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include<ctype.h>
#include"movement_parser_arc_eager.h"
#include"oracle_parser_arc_eager.h"
#include"feat_fct.h"
#include"context.h"
#include"feat_vec.h"
#include"dico_vec.h"
#include"word_emb.h"
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"
#include"mcd.h"
void maca_error_predictor_parser_mcf2cff_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
}
void maca_error_predictor_parser_mcf2cff_check_options(context *ctx)
{
if(!ctx->input_filename
|| ctx->help
/* || !ctx->mcd_filename */
/* || !(ctx->cff_filename || ctx->fann_filename) */
){
maca_error_predictor_parser_mcf2cff_help_message(ctx);
exit(1);
}
}
int config_is_equal_parser(config *c1, config *c2, int co1, int co2) // 3 for bm2p , 2 .. , 1 now, 0 no errors
{
return ((co1==co2) ? 0 : 1);
}
void generate_training_file_error(FILE *output_file, context *ctx)
{
// oracle
config *config_oracle;
int mvt_code_oracle;
char mvt_type_oracle;
int mvt_label_oracle;
feat_vec *fv_oracle = feat_vec_new(feature_types_nb);
int sentence_nb = 0;
int root_label_oracle = dico_string2int(ctx->dico_labels, (char *) ctx->root_label);
word_buffer *ref_oracle = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
FILE *mcf_file_oracle = myfopen(ctx->input_filename, "r");
// prediction
config *config_predicted;
feat_vec *fv_predicted = feat_vec_new(feature_types_nb);
FILE *mcf_file_predicted = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
int root_label_predicted;
int mvt_code_predicted;
int mvt_type_predicted;
int mvt_label_predicted;
float max;
int result;
int argmax1, argmax2;
float max1, max2;
//int index;
//dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
//dico *dico_pos_error = dico_vec_get_dico(ctx->vocabs_error, (char *)"POS");
/* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */
/* the idea is to ignore syntax in the mcf file that will be read */
/* it is ugly !!! */
mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_GOV);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_SENT_SEG);
root_label_predicted = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label_predicted == -1) root_label_predicted = 0;
config_predicted = config_new(mcf_file_predicted, ctx->mcd_struct, 5);
config_oracle = config_new(mcf_file_oracle, mcd_struct_hyp, 5);
while((!word_buffer_end(ref_oracle) && (sentence_nb < ctx->sent_nb)) || !config_is_terminal(config_predicted)){
mvt_code_oracle = oracle_parser_arc_eager(config_oracle, ref_oracle, root_label_oracle);
mvt_type_oracle = movement_parser_type(mvt_code_oracle);
mvt_label_oracle = movement_parser_label(mvt_code_oracle);
config2feat_vec_cff(ctx->features_model, config_oracle, ctx->d_perceptron_features, fv_oracle, LOOKUP_MODE);
switch(mvt_type_oracle){
case MVT_PARSER_EOS :
movement_parser_eos(config_oracle);
sentence_nb++;
if((sentence_nb % 100) == 0)
fprintf(stderr, "\rsentence %d", sentence_nb);
break;
case MVT_PARSER_LEFT :
movement_parser_left_arc(config_oracle, mvt_label_oracle);
break;
case MVT_PARSER_RIGHT :
movement_parser_right_arc(config_oracle, mvt_label_oracle);
word_buffer_move_right(ref_oracle);
break;
case MVT_PARSER_REDUCE :
movement_parser_reduce(config_oracle);
break;
case MVT_PARSER_ROOT :
movement_parser_root(config_oracle, root_label_oracle);
break;
case MVT_PARSER_SHIFT :
movement_parser_shift(config_oracle);
word_buffer_move_right(ref_oracle);
break;
}
// predicted
/* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */
/* which means that the top of the stack got its eos status from input */
/* force the parser to finish parsing the sentence (perform all pending reduce actions) and determine root of the sentence */
if((word_get_sent_seg(stack_top(config_get_stack(config_predicted))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(config_predicted))) != MVT_PARSER_EOS)){
word_set_sent_seg(stack_top(config_get_stack(config_predicted)), -1);
movement_parser_eos(config_predicted);
while(movement_parser_reduce(config_predicted));
while(movement_parser_root(config_predicted, root_label_predicted));
}
/* normal behaviour, ask classifier what is the next movement to do and do it */
else{
config2feat_vec_cff(ctx->features_model_error, config_predicted, ctx->d_perceptron_features_error, fv_predicted, TRAIN_MODE);
mvt_code_predicted = feature_table_argmax(fv_oracle, ft, &max);
mvt_type_predicted = movement_parser_type(mvt_code_predicted);
mvt_label_predicted = movement_parser_label(mvt_code_predicted);
if((mvt_type_predicted == MVT_PARSER_EOS) && (word_get_sent_seg(stack_top(config_get_stack(config_predicted))) == 0)){
feature_table_argmax_1_2(fv_oracle, ft, &argmax1, &max1, &argmax2, &max2);
mvt_code_predicted = argmax2;
mvt_type_predicted = movement_parser_type(mvt_code_predicted);
mvt_label_predicted = movement_parser_label(mvt_code_predicted);
}
if(ctx->debug_mode){
printf("Oracle : ");
movement_parser_print(stdout, mvt_code_oracle, ctx->dico_labels);
printf("\nPredicted : ");
movement_parser_print(stdout, mvt_code_predicted, ctx->dico_labels);
printf("\n");
config_print(stdout,config_predicted);
if (mvt_code_oracle!=mvt_code_predicted)
fprintf(stdout, "**************** DIFFERENT CHOICE ***********\n\n");
else
fprintf(stdout, "**************** EQUAL CHOICE ***********\n\n");
}
result = 0;
switch(mvt_type_predicted){
case MVT_PARSER_LEFT :
result = movement_parser_left_arc(config_predicted, mvt_label_predicted);
break;
case MVT_PARSER_RIGHT:
result = movement_parser_right_arc(config_predicted, mvt_label_predicted);
break;
case MVT_PARSER_REDUCE:
result = movement_parser_reduce(config_predicted);
break;
case MVT_PARSER_ROOT:
result = movement_parser_root(config_predicted, root_label_predicted);
break;
case MVT_PARSER_EOS:
result = movement_parser_eos(config_predicted);
break;
case MVT_PARSER_SHIFT:
result = movement_parser_shift(config_predicted);
}
if(result == 0){
result = movement_parser_shift(config_predicted);
if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */
while(!stack_is_empty(config_get_stack(config_predicted)))
movement_parser_root(config_predicted, root_label_predicted);
}
}
}
if(!ctx->debug_mode || output_file!=stdout) {
fprintf(output_file, "%d", ((config_is_equal_parser(config_oracle, config_predicted, mvt_code_oracle, mvt_code_predicted))));
feat_vec_print(output_file, fv_predicted);
}
}
/*
config_free(c);
feat_vec_free(fv);
feature_table_free(ft);
if(ctx->input_filename)
fclose(f);*/
}
void error_parser_set_linguistic_resources_filename(context *ctx)
{
char absolute_filename[500];
if(!ctx->perc_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MODEL_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename);
}
if(!ctx->vocabs_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_VOCABS_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename);
}
if(!ctx->input_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_PATH_RELAT);
strcat(absolute_filename, DEFAULT_MCF_DEV);
ctx->input_filename = strdup(absolute_filename);
}
if(!ctx->mcd_filename) {
ctx->mcd_struct = mcd_build_wpmlgfs();
}
if(!ctx->cff_filename){
//printf("cff -> stdout\n")
}
if(!ctx->features_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME);
ctx->features_model_filename = strdup(absolute_filename);
}
if(!ctx->f2p_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_F2P_FILENAME);
ctx->f2p_filename = strdup(absolute_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
fprintf(stderr, "input_filename = %s\n", ctx->input_filename);
}
}
int main(int argc, char *argv[])
{
context *ctx;
FILE *output_file;
ctx = context_read_options(argc, argv);
//error_parser_set_linguistic_resources_filename(ctx);
ctx->f2p = form2pos_read(ctx->f2p_filename);
maca_error_predictor_parser_mcf2cff_check_options(ctx);
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
ctx->mcd_struct_error = mcd_read(ctx->l_rules_filename, ctx->verbose);
//error
mcd_extract_dico_from_corpus(ctx->mcd_struct_error, ctx->input_filename);
ctx->vocabs_error = mcd_build_dico_vec(ctx->mcd_struct_error);
//parser
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
//error
ctx->d_perceptron_features_error = dico_new((char *)"d_perceptron_features", 10000000);
ctx->features_model_error = feat_model_read(ctx->fann_filename, feat_lib_build(), ctx->verbose);
//parser
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
if(ctx->dico_labels == NULL){
fprintf(stderr, "cannot find label names\n");
return 1;
}
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3;
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
/* add the feature dictionnary to the dico vector */
dico_vec_add(ctx->vocabs, ctx->d_perceptron_features);
dico_vec_add(ctx->vocabs_error, ctx->d_perceptron_features_error);
/* open output file */
output_file = (ctx->cff_filename) ? myfopen_no_exit(ctx->cff_filename, "w") : stdout;
generate_training_file_error(output_file, ctx);
dico_vec_print(ctx->dnn_model_filename, ctx->vocabs_error);
if(ctx->cff_filename)
fclose(output_file);
context_free(ctx);
return 0;
}
......@@ -91,21 +91,21 @@ void generate_error_train(FILE *output_file, context *ctx)
{
config *config_oracle;
feat_vec *fv_oracle = feat_vec_new(feature_types_nb);
FILE *conll_file_oracle = myfopen(ctx->input_filename, "r");
FILE *mcf_file_oracle = myfopen(ctx->input_filename, "r");
int postag_oracle;
word *b0;
config *config_predicted;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
feat_vec *fv_predicted = feat_vec_new(feature_types_nb);
FILE *conll_file_predicted = myfopen(ctx->input_filename, "r");
FILE *mcf_file_predicted = myfopen(ctx->input_filename, "r");
int postag_predicted;
float max;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
dico *dico_pos_error = dico_vec_get_dico(ctx->vocabs_error, (char *)"POS");
config_oracle = config_new(conll_file_oracle, ctx->mcd_struct, 5);
config_predicted = config_new(conll_file_predicted, ctx->mcd_struct, 5);
config_oracle = config_new(mcf_file_oracle, ctx->mcd_struct, 5);
config_predicted = config_new(mcf_file_predicted, ctx->mcd_struct, 5);
while(!config_is_terminal(config_oracle)){
if(ctx->f2p){
......@@ -166,8 +166,8 @@ void generate_error_train(FILE *output_file, context *ctx)
config_free(config_oracle);
config_free(config_predicted);
fclose(conll_file_oracle);
fclose(conll_file_predicted);
fclose(mcf_file_oracle);
fclose(mcf_file_predicted);
}
......@@ -273,7 +273,7 @@ int main(int argc, char *argv[])
if(ctx->cff_filename)
fclose(output_file);
//context_free(ctx);
context_free(ctx);
return 0;
}
......
......@@ -41,8 +41,6 @@ void maca_trans_parser_check_options(context *ctx){
}
}
void set_linguistic_resources_filenames_parser(context *ctx)
{
char absolute_filename[500];
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"movement_parser_arc_eager.h"
#include"feat_fct.h"
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"
void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct)
{
int i;
word *w;
char *label;
char *buffer = NULL;
char *token = NULL;
int col_nb = 0;
for(i=0; i < config_get_buffer(c)->nbelem; i++){
w = word_buffer_get_word_n(config_get_buffer(c), i);
if((mcd_get_gov_col(mcd_struct) == -1)
&& (mcd_get_label_col(mcd_struct) == -1)
&& (mcd_get_sent_seg_col(mcd_struct) == -1)){
printf("%s\t", word_get_input(w));
printf("%d\t", word_get_gov(w));
label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
if(label != NULL)
printf("%s\t", label) ;
else
printf("_\t");
if(word_get_sent_seg(w) == 1)
printf("1\n") ;
else
printf("0\n");
}
else{
buffer = strdup(w->input);
token = strtok(buffer, "\t");
col_nb = 0;
while(token){
if(col_nb != 0) printf("\t");
if(col_nb == mcd_get_gov_col(mcd_struct)){
printf("%d", word_get_gov(w));
}
else
if(col_nb == mcd_get_label_col(mcd_struct)){
label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
if(label != NULL)
printf("%s", label) ;
else
printf("_");
}
else
if(col_nb == mcd_get_sent_seg_col(mcd_struct)){
if(word_get_sent_seg(w) == 1)
printf("1") ;
else
printf("0");
}
else{
word_print_col_n(stdout, w, col_nb);
}
col_nb++;
token = strtok(NULL, "\t");
}
if((col_nb <= mcd_get_gov_col(mcd_struct)) || (mcd_get_gov_col(mcd_struct) == -1)){
printf("\t%d", word_get_gov(w));
}
if((col_nb <= mcd_get_label_col(mcd_struct)) || (mcd_get_label_col(mcd_struct) == -1)){
label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
if(label != NULL)
printf("\t%s", label) ;
else
printf("\t_");
}
if((col_nb <= mcd_get_sent_seg_col(mcd_struct)) || (mcd_get_sent_seg_col(mcd_struct) == -1)){
if(word_get_sent_seg(w) == 1)
printf("\t1") ;
else
printf("\t0");
}
printf("\n");
free(buffer);
}
}
}
void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_error_filename)
{
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
feature_table *ft_error = feature_table_load(perc_error_filename, ctx->verbose);
feat_vec *fv = feat_vec_new(feature_types_nb);
feat_vec *fv_error = feat_vec_new(feature_types_nb);
int root_label;
int mvt_code;
int mvt_type;
int mvt_label;
float max;
float max_err;
int error_detect;
config *c = NULL;
int result;
int argmax1, argmax2;
float max1, max2;
int index;
root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label == -1) root_label = 0;
c = config_new(f, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){
/* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */
/* which means that the top of the stack got its eos status from input */
/* force the parser to finish parsing the sentence (perform all pending reduce actions) and determine root of the sentence */
if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){
word_set_sent_seg(stack_top(config_get_stack(c)), -1);
movement_parser_eos(c);
while(movement_parser_reduce(c));
while(movement_parser_root(c, root_label));
if(ctx->debug_mode) printf("force EOS\n");
}
/* normal behaviour, ask classifier what is the next movement to do and do it */
else{
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
mvt_code = feature_table_argmax(fv, ft, &max);
if(ctx->debug_mode){
fprintf(stdout, "***********************************\n");
config_print(stdout, c);
}
if(ctx->debug_mode){
fprintf(stdout, " ***Parser choice***\n");
vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
for(int i=0; i < 3; i++){
printf(" %d\t", i);
movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels);
printf("\t%.4f\n", vcode_array[i].score);
}
free(vcode_array);
}
mvt_type = movement_parser_type(mvt_code);
mvt_label = movement_parser_label(mvt_code);
config2feat_vec_cff(ctx->features_model_error, c, ctx->d_perceptron_features_error, fv_error, LOOKUP_MODE);
error_detect = feature_table_argmax(fv_error, ft_error, &max_err);
if(ctx->debug_mode){
fprintf(stdout, " ***Error detection***\n");
vcode *vcode_array_err = feature_table_get_vcode_array(fv_error, ft_error);
for(int i=0; i < 2; i++){
fprintf(stdout, " %d\t", i);
fprintf(stdout, "%d\t%.4f\n", vcode_array_err[i].class_code, vcode_array_err[i].score);
}
free(vcode_array_err);
}
if((mvt_type == MVT_PARSER_EOS) && (word_get_sent_seg(stack_top(config_get_stack(c))) == 0)){
if(ctx->verbose)
fprintf(stderr, "the classifier did predict EOS but this is not the case\n");
feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2);
mvt_code = argmax2;
mvt_type = movement_parser_type(mvt_code);
mvt_label = movement_parser_label(mvt_code);
}
result = 0;
switch(mvt_type){
case MVT_PARSER_LEFT :
result = movement_parser_left_arc(c, mvt_label);
break;
case MVT_PARSER_RIGHT:
result = movement_parser_right_arc(c, mvt_label);
break;
case MVT_PARSER_REDUCE:
result = movement_parser_reduce(c);
break;
case MVT_PARSER_ROOT:
result = movement_parser_root(c, root_label);
break;
case MVT_PARSER_EOS:
result = movement_parser_eos(c);
break;
case MVT_PARSER_SHIFT:
result = movement_parser_shift(c);
}
if(result == 0){
if(ctx->debug_mode) fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n");
result = movement_parser_shift(c);
if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */
if(ctx->debug_mode) fprintf(stdout, "WARNING : cannot exectue a SHIFT emptying stack !\n");
while(!stack_is_empty(config_get_stack(c)))
movement_parser_root(c, root_label);
}
}
}
}
if(!ctx->trace_mode)
print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct);
config_free(c);
feat_vec_free(fv);
feature_table_free(ft);
if(ctx->input_filename)
fclose(f);
}
#ifndef __SIMPLE_DECODER_PARSER_ARC_EAGER_ERROR_PREDICTOR__
#define __SIMPLE_DECODER_PARSER_ARC_EAGER_ERROR_PREDICTOR__
#include"context.h"
void simple_decoder_parser_arc_eager_error_predictor(context *ctx, char *perc_error_filename);
#endif
......@@ -173,7 +173,7 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena
if(ctx->debug_mode) {
switch (error_detect) {
case 0 : // No errors detected
sprintf(impr[nb]+strlen(impr[nb]),"\t\t0");
sprintf(impr[nb]+strlen(impr[nb]),"\t\t_");
break;
case 1 :
......@@ -192,17 +192,18 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena
sprintf(impr[nb]+strlen(impr[nb]),"\n");
nb +=1;
vcode *vcode_array_err = feature_table_get_vcode_array(fv_error, ft_error);
if(ctx->debug_mode){
fprintf(stdout, " ***Error detection***\n");
vcode *vcode_array_err = feature_table_get_vcode_array(fv_error, ft_error);
for(int i=0; i < 4; i++){
fprintf(stdout, " %d\t", i);
fprintf(stdout, " %d\t%.4f\n", vcode_array_err[i].class_code, vcode_array_err[i].score);
}
free(vcode_array_err);
}
free(vcode_array_err);
if (error_detect == 3) {
if (error_detect == 3){// && (vcode_array_err[0].score-vcode_array_err[1].score)>2.5) {
backward(c);
backward(c);
nb -= 3;
......@@ -235,7 +236,61 @@ void simple_decoder_tagger_error_predictor(context *ctx, char *perc_error_filena
if(ctx->debug_mode){
vcode *vcode_arraye = feature_table_get_vcode_array(fv, ft);
for(int i=debug_choice; i < debug_choice+3; i++){//postag_err+3; i++){
for(int i=debug_choice-1; i < debug_choice+2; i++){//postag_err+3; i++){
fprintf(stdout, "%d\t", i);
fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_arraye[i].class_code), vcode_arraye[i].score);
}
free(vcode_arraye);
fprintf(stderr, "Ancien pos : %s, nouveau : %s\n", dico_int2string(dico_pos, postag_err), dico_int2string(dico_pos, postag));
}
if(postag==postag_err)
{
printf("ERROR PREDICTOR, NO CHOICE LEFT\n");
exit(1);
}
word_set_pos(word_buffer_b0(c->bf), postag);
string_print_word(word_buffer_b0(c->bf), ctx->mcd_struct, dico_pos, postag,&impr[nb]);
if(ctx->debug_mode)
sprintf(impr[nb]+strlen(impr[nb]),"\t\t_\n");
else
sprintf(impr[nb]+strlen(impr[nb]),"\n");
nb += 1;
}
else if (error_detect == 2 && ctx->force) {
backward(c);
nb -= 2;
if(ctx->f2p)
add_signature_to_words_in_word_buffer(c->bf, ctx->f2p);
postag_err = word_get_pos(word_buffer_b0(c->bf));
postag = postag_err;
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
int debug_choice;
for(int i=0; i < ft->classes_nb-1; i++){
if (postag_err == vcode_array[i].class_code) {
postag = vcode_array[i+1].class_code;
debug_choice = i+1;
break;
}
}
free(vcode_array);
if(ctx->debug_mode){
fprintf(stderr, "***********************************\n");
config_print(stderr, c);
}
if(ctx->debug_mode){
vcode *vcode_arraye = feature_table_get_vcode_array(fv, ft);
for(int i=debug_choice-1; i < debug_choice+2; i++){//postag_err+3; i++){
fprintf(stdout, "%d\t", i);
fprintf(stdout, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_arraye[i].class_code), vcode_arraye[i].score);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment