diff --git a/maca_common/include/dico.h b/maca_common/include/dico.h index b10b9e3af475b4abcbe169f5be68e81ebf48ba69..91c2292cf5417b81f34bb513f33d68d996cd771b 100644 --- a/maca_common/include/dico.h +++ b/maca_common/include/dico.h @@ -15,6 +15,7 @@ typedef struct int array_size; hash *htable; char **array; + int is_clone; } dico; dico *dico_new(char *name, int size); @@ -27,5 +28,6 @@ void dico_print_fh(FILE *f, dico *d); dico *dico_read(char *filename, float ratio); dico *dico_read_fh(FILE *f, float ratio); dico *dico_extract_from_corpus(char *filename, int column, char *dico_name); +dico *dico_clone(dico *d, char* name); //doesn't duplicate anything. very ugly. #endif diff --git a/maca_common/include/dico_vec.h b/maca_common/include/dico_vec.h index aaab19cf2fa93ce22e963ed58dd2a22c65cefd74..8c187b0481a669939e70ef8606a976452882acb1 100644 --- a/maca_common/include/dico_vec.h +++ b/maca_common/include/dico_vec.h @@ -20,4 +20,6 @@ dico_vec *dico_vec_read(char *filename, float ratio); dico *dico_vec_get_dico(dico_vec *dv, char *dico_name); dico_vec *dico_vec_replace_dico(dico_vec *dv, dico *old_dico, dico *new_dico); + + #endif diff --git a/maca_common/src/dico.c b/maca_common/src/dico.c index 01a18da8cd0af84670ff89facb68ead405d1bd65..b46172d4e8ac0f7752ed7039f939c8df2b1b90af 100644 --- a/maca_common/src/dico.c +++ b/maca_common/src/dico.c @@ -16,15 +16,20 @@ dico *dico_new(char *name, int size) d->array = NULL; d->nbelem = 0; d->array_size = 0; + d->is_clone = 0; return d; } void dico_free(dico *d) { if(d){ + if(d->name) free(d->name); + if(d->is_clone){ + free(d); + return; + } if(d->htable) hash_free(d->htable); if(d->array) free(d->array); - if(d->name) free(d->name); free(d); } } @@ -173,3 +178,16 @@ dico *dico_extract_from_corpus(char *filename, int column, char *dico_name) fclose(f); return d; } + +dico *dico_clone(dico* dic, char* dico_name) //doesn't duplicate anything. very ugly. +{ + dico *d = (dico *)memalloc(sizeof(dico)); + + d->name = strdup(dico_name); + d->htable = dic->htable; + d->array = dic->array; + d->nbelem = dic->nbelem; + d->array_size = dic->array_size; + d->is_clone = 1; + return d; +} diff --git a/maca_common/src/dico_vec.c b/maca_common/src/dico_vec.c index 1fda6012398f212472fb4aec7a857ba60ee75769..2e2cd1f9cfe1913e72c19d8b8cc27d92d00ca45e 100644 --- a/maca_common/src/dico_vec.c +++ b/maca_common/src/dico_vec.c @@ -104,3 +104,4 @@ dico_vec *dico_vec_read(char *filename, float ratio) fclose(f); return dv; } + diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index a3cded9233f9abdf656a33a4ba577e004ad525ef..cb29e04fc2573d70272f5286a004b41fc0297e8f 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -12,6 +12,7 @@ set(SOURCES src/context.c src/oracle_tagger.c src/oracle_chunker.c # src/simple_decoder_parser.c + src/partial_parser_conditional.c src/simple_decoder_parser_arc_eager.c src/simple_decoder_tagparser_arc_eager.c src/simple_decoder_parser_arc_eager_error_predictor.c @@ -29,6 +30,7 @@ set(SOURCES src/context.c src/feat_types.c src/mvt.c src/mvt_stack.c + src/confidence_score.c ) #compiling library diff --git a/maca_trans_parser/src/confidence_score.c b/maca_trans_parser/src/confidence_score.c new file mode 100644 index 0000000000000000000000000000000000000000..d57529100a3262ba3bf95ee817dac29fcd52131b --- /dev/null +++ b/maca_trans_parser/src/confidence_score.c @@ -0,0 +1,38 @@ +#include "config.h" +#include "context.h" +#include "feature_table.h" +#include "partial_parser_conditional.h" +#include "movement_parser_arc_eager.h" + +float confidence_score(int mvt_code, vcode *vcode_array, int size, context *ctx, config *c){ + + int firstindex = -1; + int secondindex = -1; + + int i = 0; + + switch(ctx->score_method){ + case 1: //methode 1: First - Second. + + while(firstindex == -1 && i < size){ + int b1 = respect_standard_constraint(c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b2 = respect_stack_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b3 = respect_buffer_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + if(b1 && b2 && b3) + firstindex = i; + i += 1; + } + while(secondindex == -1 && i < size){ + int b1 = respect_standard_constraint(c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b2 = respect_stack_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b3 = respect_buffer_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + if(b1 && b2 && b3) + secondindex = i; + i += 1; + } + if(secondindex == -1) + return -1.; + return (vcode_array[firstindex].score - vcode_array[secondindex].score); + } + return -1.; +} diff --git a/maca_trans_parser/src/confidence_score.h b/maca_trans_parser/src/confidence_score.h new file mode 100644 index 0000000000000000000000000000000000000000..fdde0ff7b891d4e1342dfe69beb9076e4aafa989 --- /dev/null +++ b/maca_trans_parser/src/confidence_score.h @@ -0,0 +1,8 @@ +#ifndef __CONFIDENCE_SCORE__ +#define __CONFIDENCE_SCORE__ +#include"config.h" +#include"context.h" +#include"feature_table.h" + +float confidence_score(int mvt_code, vcode *vcode_array, int size, context *ctx, config *c); +#endif diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index 753136eafa5312cbc55121c55a424fd90747ee0c..dabd562f1700b1c7ae7598db0cc783bf927f2cd7 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -24,32 +24,31 @@ void context_free(context *ctx) if(ctx->root_label) free(ctx->root_label); if(ctx->vocabs_filename) free(ctx->vocabs_filename); if(ctx->fplm_filename) free(ctx->fplm_filename); + if(ctx->json_filename) free(ctx->json_filename); + if(ctx->dnn_model_filename) free(ctx->dnn_model_filename); + if (ctx->mcd_struct) - mcd_free(ctx->mcd_struct); - + mcd_free(ctx->mcd_struct); + if (ctx->vocabs) - dico_vec_free(ctx->vocabs); - + dico_vec_free(ctx->vocabs); + if(ctx->d_perceptron_features) dico_free(ctx->d_perceptron_features); - - if(ctx->d_perceptron_features_error) - dico_free(ctx->d_perceptron_features_error); + /* - if(ctx->mcd_struct) + if(ctx->mcd_struct) mcd_free(ctx->mcd_struct); */ if(ctx->features_model) feat_model_free(ctx->features_model); - if(ctx->features_model_error) - feat_model_free(ctx->features_model_error); - if(ctx->f2p) form2pos_free(ctx->f2p); + free(ctx); } @@ -57,10 +56,6 @@ context *context_new(void) { context *ctx = (context *)memalloc(sizeof(context)); - ctx->smin = 1; - ctx->smax = 1; - ctx->force = 0; - ctx->nb_classes = 6; ctx->verbose = 0; ctx->program_name = NULL; ctx->input_filename = NULL; @@ -80,10 +75,8 @@ context *context_new(void) ctx->root_label = strdup("root"); ctx->d_perceptron_features = NULL; - ctx->d_perceptron_features_error = NULL; ctx->mcd_struct = NULL; ctx->features_model = NULL; - ctx->features_model_error = NULL; ctx->vocabs = NULL; ctx->dico_labels = NULL; ctx->dico_postags = NULL; @@ -104,8 +97,8 @@ context *context_new(void) ctx->ifpls = 1; ctx->trace_mode = 0; - - + ctx->partial_mode = 0; + ctx->score_method = 0; ctx->json_filename = NULL; ctx->dnn_model_filename = NULL; ctx->l_rules_filename = NULL; @@ -115,13 +108,13 @@ context *context_new(void) void context_general_help_message(context *ctx) { - fprintf(stderr, "usage: %s [options]\n", ctx->program_name); - fprintf(stderr, "Options:\n"); - fprintf(stderr, "\t-h --help : print this message\n"); - fprintf(stderr, "\t-v --verbose : activate verbose mode\n"); - fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n"); - fprintf(stderr, "\t-D --maca_data_path <str> : path to the maca_data directory\n"); - fprintf(stderr, "\t-L --language <str> : identifier of the language to use (default is fr)\n"); + fprintf(stderr, "usage: %s [options]\n", ctx->program_name); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "\t-h --help : print this message\n"); + fprintf(stderr, "\t-v --verbose : activate verbose mode\n"); + fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n"); + fprintf(stderr, "\t-D --maca_data_path <str> : path to the maca_data directory\n"); + fprintf(stderr, "\t-L --language <str> : identifier of the language to use (default is fr)\n"); } void context_model_help_message(context *ctx){ @@ -184,6 +177,12 @@ void context_trace_mode_help_message(context *ctx){ void context_debug_help_message(context *ctx){ fprintf(stderr, "\t-d --debug : activate debug mode (default is false)\n"); } +void context_partial_mode_help_message(context *ctx){ + fprintf(stderr, "\t-p --partial : activate partial mode (default is false); only works in TEST mode.\n"); +} +void context_score_method_help_message(context *ctx){ + fprintf(stderr, "\t-S --score : method for scoring the oddity of the parsing.\n"); +} void context_json_help_message(context *ctx){ fprintf(stderr, "\t-J --json : json description of keras model\n"); } @@ -191,8 +190,6 @@ void context_dnn_model_help_message(context *ctx){ fprintf(stderr, "\t-N --dnn_model : weight file for dnn\n"); } - - context *context_read_options(int argc, char *argv[]) { int c; @@ -201,16 +198,13 @@ context *context_read_options(int argc, char *argv[]) ctx->program_name = strdup(argv[0]); - static struct option long_options[30] = + static struct option long_options[28] = { {"help", no_argument, 0, 'h'}, - {"force", no_argument, 0, 'K'}, {"verbose", no_argument, 0, 'v'}, {"debug", no_argument, 0, 'd'}, + {"partial", no_argument, 0, 'p'}, {"conll", no_argument, 0, 'c'}, - {"classes", required_argument, 0, 'q'}, - {"smin", required_argument, 0, 'A'}, - {"smax", required_argument, 0, 'B'}, {"model", required_argument, 0, 'm'}, {"input", required_argument, 0, 'i'}, {"iter", required_argument, 0, 'n'}, @@ -229,6 +223,7 @@ context *context_read_options(int argc, char *argv[]) {"root_label", required_argument, 0, 'R'}, {"f2p", required_argument, 0, 'P'}, {"traces", required_argument, 0, 'T'}, + {"score", required_argument, 0, 'S'}, {"json", required_argument, 0, 'J'}, {"dnn_model", required_argument, 0, 'N'}, {"l_rules", required_argument, 0, 'l'}, @@ -237,106 +232,100 @@ context *context_read_options(int argc, char *argv[]) optind = 0; opterr = 0; - - while ((c = getopt_long (argc, argv, "hKvdcSTm:i:A:B:n:x:q:u:r:M:b:f:s:C:F:V:L:D:R:P:J:N:w:l:", long_options, &option_index)) != -1){ + while ((c = getopt_long (argc, argv, "hvdcTpm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:J:N:w:l:S:T:", long_options, &option_index)) != -1){ + switch (c) { - case 'A': - ctx->smin = atoi(optarg); - break; - case 'B': - ctx->smax = atoi(optarg); - break; - case 'q': - ctx->nb_classes = atoi(optarg); - break; case 'h': - ctx->help = 1; - break; - case 'K' : - ctx->force = 1; - break; + ctx->help = 1; + break; case 'v': - ctx->verbose = 1; - break; + ctx->verbose = 1; + break; case 'd': - ctx->debug_mode = 1; - break; + ctx->debug_mode = 1; + break; + case 'p': + ctx->partial_mode = 1; + break; case 'c': - ctx->conll = 1; - break; + ctx->conll = 1; + break; case 'T': - ctx->trace_mode = 1; - break; + ctx->trace_mode = 1; + break; case 'm': - ctx->perc_model_filename = strdup(optarg); - break; + ctx->perc_model_filename = strdup(optarg); + break; case 'i': - ctx->input_filename = strdup(optarg); - break; + ctx->input_filename = strdup(optarg); + break; case 'n': - ctx->iteration_nb = atoi(optarg); - break; + ctx->iteration_nb = atoi(optarg); + break; case 'x': - ctx->cff_filename = strdup(optarg); - break; + ctx->cff_filename = strdup(optarg); + break; case 'w': - ctx->fplm_filename = strdup(optarg); - break; + ctx->fplm_filename = strdup(optarg); + break; case 'u': - ctx->feature_cutoff = atoi(optarg); - break; + ctx->feature_cutoff = atoi(optarg); + break; case 'r': - ctx->hash_ratio = atof(optarg); - break; + ctx->hash_ratio = atof(optarg); + break; case 'M': - ctx->mode = (!strcmp(optarg, "TEST"))? TEST_MODE : TRAIN_MODE; - break; + ctx->mode = (!strcmp(optarg, "TEST"))? TEST_MODE : TRAIN_MODE; + break; case 'b': - ctx->beam_width = atoi(optarg); - break; + ctx->beam_width = atoi(optarg); + break; case 'f': - ctx->fann_filename = strdup(optarg); - break; + ctx->fann_filename = strdup(optarg); + break; case 'l': - ctx->l_rules_filename = strdup(optarg); - break; + ctx->l_rules_filename = strdup(optarg); + break; case 's': - ctx->sent_nb = atoi(optarg); - break; + ctx->sent_nb = atoi(optarg); + break; case 'C': - ctx->mcd_filename = strdup(optarg); - break; + ctx->mcd_filename = strdup(optarg); + break; case 'F': - ctx->features_model_filename = strdup(optarg); - break; + ctx->features_model_filename = strdup(optarg); + break; case 'V': - ctx->vocabs_filename = strdup(optarg); - break; + ctx->vocabs_filename = strdup(optarg); + break; case 'L': - if (ctx->language) free(ctx->language); // libérer le default (strdup("fr") ) - ctx->language = strdup(optarg); - break; + if (ctx->language) free(ctx->language); // libérer le default (strdup("fr") ) + ctx->language = strdup(optarg); + break; case 'D': - ctx->maca_data_path = strdup(optarg); - break; + ctx->maca_data_path = strdup(optarg); + break; case 'R': - if (ctx->root_label) free(ctx->root_label); // libérer le default (strdup("root") ) - ctx->root_label = strdup(optarg); - break; + if (ctx->root_label) free(ctx->root_label); // libérer le default (strdup("root") ) + ctx->root_label = strdup(optarg); + break; case 'P': - ctx->f2p_filename = strdup(optarg); - if(!strcmp(ctx->f2p_filename, "_") || !strcmp(ctx->f2p_filename, "NULL")) - ctx->f2p = NULL; - else - ctx->f2p = form2pos_read(ctx->f2p_filename); - break; + ctx->f2p_filename = strdup(optarg); + if(!strcmp(ctx->f2p_filename, "_") || !strcmp(ctx->f2p_filename, "NULL")) + ctx->f2p = NULL; + else + ctx->f2p = form2pos_read(ctx->f2p_filename); + break; case 'N': - ctx->dnn_model_filename = strdup(optarg); - break; + ctx->dnn_model_filename = strdup(optarg); + break; case 'J': - ctx->json_filename = strdup(optarg); - break; + ctx->json_filename = strdup(optarg); + break; + case 'S': + ctx->score_method = atoi(optarg); + break; } } @@ -344,7 +333,7 @@ context *context_read_options(int argc, char *argv[]) ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose); else ctx->mcd_struct = mcd_build_wpmlgfs(); - /* ctx->mcd_struct = mcd_build_wplgfs(); */ + /* ctx->mcd_struct = mcd_build_wplgfs(); */ /* initialize maca_data_path field */ @@ -356,12 +345,12 @@ context *context_read_options(int argc, char *argv[]) free(ctx->maca_data_path); } else { - char *e = getenv("MACAON_DIR"); - if (e != NULL) { + char *e = getenv("MACAON_DIR"); + if (e != NULL) { strcpy(absolute_path, e); - } else { + } else { fprintf(stderr, "WARNING: the environment variable MACAON_DIR is not defined\n"); - } + } } strcat(absolute_path, "/"); strcat(absolute_path, ctx->language); diff --git a/maca_trans_parser/src/context.h b/maca_trans_parser/src/context.h index e00bf24963406eab4f066daae654910ef46217d3..c71d3606dbc95c614489d812a4f66af6e35904c9 100644 --- a/maca_trans_parser/src/context.h +++ b/maca_trans_parser/src/context.h @@ -110,11 +110,11 @@ typedef struct { int conll; int ifpls; int trace_mode; - + int partial_mode; + int score_method; char *json_filename; char *dnn_model_filename; char *l_rules_filename; - } context; context *context_new(void); @@ -149,6 +149,8 @@ void context_conll_help_message(context *ctx); void context_ifpls_help_message(context *ctx); void context_input_help_message(context *ctx); void context_root_label_help_message(context *ctx); +void context_partial_mode_help_message(context *ctx); +void context_score_method_help_message(context *ctx); void context_debug_help_message(context *ctx); void context_json_help_message(context *ctx); diff --git a/maca_trans_parser/src/maca_trans_parser.c b/maca_trans_parser/src/maca_trans_parser.c index cdb1c998e8404fa81786be3c8a331c7841e41c74..32d87ac3819409ac94595c045aa808c71862c4c3 100644 --- a/maca_trans_parser/src/maca_trans_parser.c +++ b/maca_trans_parser/src/maca_trans_parser.c @@ -87,6 +87,10 @@ int main(int argc, char *argv[]) set_linguistic_resources_filenames_parser(ctx); ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); + + if(ctx->partial_mode){ + dico_vec_add(ctx->vocabs,dico_clone(dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"),"W")); + } mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); @@ -97,7 +101,7 @@ int main(int argc, char *argv[]) return 1; } - ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3; + ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 4; //4 specials are ROOT EOS SHIFT and REDUCE. /* load models */ @@ -105,6 +109,8 @@ int main(int argc, char *argv[]) simple_decoder_parser_arc_eager(ctx); + + context_free(ctx); return 0; } diff --git a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c index eeb228824e1275480e4e749b8699ed3800c74a79..961e57c44ea802ef46b2a093b619ea59602aeb45 100644 --- a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "cannot find label names\n"); return 1; } - ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3; + ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 4; feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb); diff --git a/maca_trans_parser/src/movements.c b/maca_trans_parser/src/movements.c index 36e3ff77c5042056c947e28b3cfe1e36f94ff53d..137ad26a38fddaa66199318f5c143215826e68c3 100644 --- a/maca_trans_parser/src/movements.c +++ b/maca_trans_parser/src/movements.c @@ -6,6 +6,7 @@ int movement_eos(config *c, int movement_code) { + if(stack_is_empty(config_get_stack(c))) return 0; word *s0 = stack_top(config_get_stack(c)); diff --git a/maca_trans_parser/src/partial_parser_conditional.c b/maca_trans_parser/src/partial_parser_conditional.c new file mode 100644 index 0000000000000000000000000000000000000000..44507c05962513c1c1e5d1fb620299b8dd6eb2db --- /dev/null +++ b/maca_trans_parser/src/partial_parser_conditional.c @@ -0,0 +1,182 @@ +#include"context.h" +#include"movement_parser_arc_eager.h" + +int respect_standard_constraint(config *c, int mvt_type, int mvt_label){ + int gov; + switch(mvt_type){ + case MVT_PARSER_LEFT : + if(stack_is_empty(config_get_stack(c))) return 0; + if(word_buffer_end(config_get_buffer(c))) return 0; + gov = word_get_gov(stack_top(config_get_stack(c))); + // printf("\n%d %d???\n",gov,stack_top(config_get_stack(c))->is_root); + if(stack_top(config_get_stack(c))->is_root || gov != WORD_INVALID_GOV) + //if(gov != WORD_INVALID_GOV) + return 0; + return 1; + case MVT_PARSER_RIGHT: + if(stack_is_empty(config_get_stack(c))) return 0; + if(word_buffer_end(config_get_buffer(c))) return 0; + return 1; + case MVT_PARSER_REDUCE: + if(stack_is_empty(config_get_stack(c))) return 0; + gov = word_get_gov(stack_top(config_get_stack(c))); + // printf("\n%d %d???\n",gov,stack_top(config_get_stack(c))->is_root); + if(stack_top(config_get_stack(c))->is_root || gov != WORD_INVALID_GOV) + //if(gov != WORD_INVALID_GOV) + return 1; + return 0; + case MVT_PARSER_SHIFT: + if(word_buffer_end(config_get_buffer(c))) return 0; + return 1; + case MVT_PARSER_ROOT: + if(stack_is_empty(config_get_stack(c))) return 0; + gov = word_get_gov(stack_top(config_get_stack(c))); + return (gov == WORD_INVALID_GOV); + case MVT_PARSER_EOS: + if(stack_is_empty(config_get_stack(c)) || word_get_sent_seg(stack_top(config_get_stack(c))) == 1) return 0; + return 1; + } + return 1; +} + +int movement_type_safe_for_top_stack(int mvt_type){ + switch(mvt_type){ + case MVT_PARSER_LEFT : + return 0; + case MVT_PARSER_RIGHT: + return 1; + case MVT_PARSER_REDUCE: + return 0; + case MVT_PARSER_SHIFT: + return 1; + case MVT_PARSER_EOS: + return 0; + case MVT_PARSER_ROOT: + return 0; + } + return 1; +} + +int movement_type_safe_for_top_buffer(int mvt_type){ + switch(mvt_type){ + case MVT_PARSER_LEFT : + return 1; + case MVT_PARSER_RIGHT: + return 0; + case MVT_PARSER_REDUCE: + return 1; + case MVT_PARSER_SHIFT: + return 0; + case MVT_PARSER_EOS: + return 1; + case MVT_PARSER_ROOT: + return 1; + } + return 1; +} + +int respect_stack_constraint(int mode_partial, config *c, int mvt_type, int mvt_label){ + if(!mode_partial) return 1; + if(stack_is_empty(config_get_stack(c))) return (mvt_type == MVT_PARSER_SHIFT); + + word *w_stack = stack_top(config_get_stack(c)); + int stack_id = word_get_index(w_stack); + int gov_rel_id = word_get_X(w_stack); + + if(word_buffer_end(config_get_buffer(c))) return ((mvt_type == MVT_PARSER_EOS && word_get_U(w_stack)) || (mvt_type == MVT_PARSER_ROOT && (gov_rel_id == 0 && ((word_get_V(w_stack) == 0) || + (word_get_V(w_stack) == 1 && (word_get_Y(w_stack) < 0 || word_get_Z(w_stack) > 0)) || + (word_get_V(w_stack) == 2 && (word_get_Y(w_stack) < 0 && word_get_Z(w_stack) > 0))))) || mvt_type == MVT_PARSER_REDUCE); + + word *w_buffer = word_buffer_b0(config_get_buffer(c)); + int buffer_id = word_get_index(w_buffer); + // printf("%d %d %d ",stack_id, gov_rel_id, buffer_id); + if(gov_rel_id > 0){ + //top of stack needs to be governed by a left dependency. + if(buffer_id - stack_id < gov_rel_id){ + //allow only if top of stack doesn't move and isn't set new dep (left move. implyed by not moving). + if(!movement_type_safe_for_top_stack(mvt_type)) + return 0; + } + else if(buffer_id - stack_id == gov_rel_id){ + //expected movement. + // printf(" <%d %d %d %d> %s ",word_get_W(w_stack),word_get_X(w_stack),word_get_Y(w_stack),word_get_Z(w_stack),dico_int2string(dico_W,word_get_W(w_stack))); + // fflush(stdout); + return (mvt_type == MVT_PARSER_LEFT && mvt_label == word_get_W(w_stack)); + } + } + + + // if(gov_rel_id == 0 && word_get_X(w_stack) != 0){ //root dependency on top of stack. disable LEFT. + // if(mvt_type == MVT_PARSER_LEFT) + // return 0; + //} + + + if(word_get_Z(w_stack) != 0 && word_get_Z(w_stack) >= buffer_id - stack_id){ + //there is an undone right dependency. + //allow only if top of stack doesn't move. + if(!movement_type_safe_for_top_stack(mvt_type)) + return 0; + } + + switch(mvt_type){ + case MVT_PARSER_ROOT: + return (gov_rel_id == 0 && ((word_get_V(w_stack) == 0) || + (word_get_V(w_stack) == 1 && (word_get_Y(w_stack) < 0 || word_get_Z(w_stack) > 0)) || + (word_get_V(w_stack) == 2 && (word_get_Y(w_stack) < 0 && word_get_Z(w_stack) > 0)))); + case MVT_PARSER_EOS: + return word_get_U(w_stack); + default: + return 1; + } + return 1; +} + + +int respect_buffer_constraint(int mode_partial, config *c, int mvt_type, int mvt_label){ + if(!mode_partial) return 1; + if(stack_is_empty(config_get_stack(c))) return (mvt_type == MVT_PARSER_SHIFT); + if(word_buffer_end(config_get_buffer(c))) return (mvt_type == MVT_PARSER_EOS || mvt_type == MVT_PARSER_ROOT || mvt_type == MVT_PARSER_REDUCE); + + word *w_stack = stack_top(config_get_stack(c)); + word *w_buffer = word_buffer_b0(config_get_buffer(c)); + + int stack_id = word_get_index(w_stack); + int gov_rel_id = word_get_X(w_buffer); + int buffer_id = word_get_index(w_buffer); + if(gov_rel_id < 0){ + //top of buffer needs to be governed by a right dependency. + if(stack_id - buffer_id > gov_rel_id){ + //allow only if top of buffer doesn't move and isn't set new dep (right move. implyed by not moving OR ROOT move?). + if(!movement_type_safe_for_top_buffer(mvt_type)) + return 0; + } + else if(stack_id - buffer_id == gov_rel_id){ + //expected movement. + // printf(" <%d %d %d %d> %s ",word_get_W(w_buffer),word_get_X(w_buffer),word_get_Y(w_buffer),word_get_Z(w_buffer),dico_int2string(dico_W,word_get_W(w_buffer))); + //fflush(stdout); + return (mvt_type == MVT_PARSER_RIGHT && mvt_label == word_get_W(w_buffer)); + } + + if(mvt_type == MVT_PARSER_ROOT) + return 0; + } + + if(gov_rel_id > 0){ + //top of buffer needs to be governed by a left dependency + if(mvt_type == MVT_PARSER_ROOT || mvt_type == MVT_PARSER_RIGHT || mvt_type == MVT_PARSER_EOS) + return 0; // make sure top of buffer is protected from preventing future left dependency. + } + + if(word_get_Y(w_buffer) != 0 && word_get_Y(w_buffer) <= stack_id - buffer_id){ + //there is an undone left dependency. + //allow only if top of buffer doesn't move. + if(!movement_type_safe_for_top_buffer(mvt_type)) + return 0; + } + + // if(gov_rel_id == 0 && word_get_X(w_buffer) != 0) //root dependency on top of buffer. Disable RIGHT. + // return (mvt_type != MVT_PARSER_RIGHT); + + return 1; +} diff --git a/maca_trans_parser/src/partial_parser_conditional.h b/maca_trans_parser/src/partial_parser_conditional.h new file mode 100644 index 0000000000000000000000000000000000000000..55bd8a40f48da2112c41d0da0211f4278d0e45fa --- /dev/null +++ b/maca_trans_parser/src/partial_parser_conditional.h @@ -0,0 +1,9 @@ +#ifndef __PARTIAL_PARSER_CONDITIONAL__ +#define __PARTIAL_PARSER_CONDITIONAL__ +#include"context.h" + +int respect_standard_constraint(config *c,int mvt_type, int mv_label); +int respect_stack_constraint(int mode_partial, config *c, int mvt_type, int mvt_label); +int respect_buffer_constraint(int mode_partial, config *c, int mvt_type, int mvt_label); + +#endif diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c index f4646dd3fe6ce58b407b15084788d5e9910da239..72d7baf6ec65d355cc4f78fef91b69dc1bbc6760 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c @@ -3,12 +3,30 @@ #include<string.h> #include<unistd.h> #include<getopt.h> +#include<math.h> +#include<time.h> #include"context.h" #include"movement_parser_arc_eager.h" #include"feat_fct.h" #include"config2feat_vec.h" #include"feature_table.h" #include"dico.h" +#include"partial_parser_conditional.h" +#include"confidence_score.h" + +unsigned long mix(unsigned long a, unsigned long b, unsigned long c) +{ + a=a-b; a=a-c; a=a^(c >> 13); + b=b-c; b=b-a; b=b^(a << 8); + c=c-a; c=c-b; c=c^(b >> 13); + a=a-b; a=a-c; a=a^(c >> 12); + b=b-c; b=b-a; b=b^(a << 16); + c=c-a; c=c-b; c=c^(b >> 5); + a=a-b; a=a-c; a=a^(c >> 3); + b=b-c; b=b-a; b=b^(a << 10); + c=c-a; c=c-b; c=c^(b >> 15); + return c; +} void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct) { @@ -110,12 +128,25 @@ void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) else printf("\t0"); } + if(col_nb <= mcd_get_s_col(mcd_struct)){ + if(word_get_S(w) > 0) + printf("\t%d",word_get_S(w)); + else + printf("\t-1"); + } + if(col_nb <= mcd_get_t_col(mcd_struct)){ + if(word_get_T(w) > 0) + printf("\t%d",word_get_T(w)); + else + printf("\t-1"); + } printf("\n"); free(buffer); } } } + void simple_decoder_parser_arc_eager(context *ctx) { FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; @@ -133,12 +164,30 @@ void simple_decoder_parser_arc_eager(context *ctx) int argmax1, argmax2; float max1, max2; int index; + float score; + + double sumExp; + double currentSumExp; + double ScoreTranslation; + int FlagNotInitExp; + double ProbaDivider = 15; // TO SETUP + double randomFloat; + + srand(mix(clock(), time(NULL), getpid())); + + + word* word_scored; root_label = dico_string2int(ctx->dico_labels, ctx->root_label); if(root_label == -1) root_label = 0; c = config_new(f, ctx->mcd_struct, 5); while(!config_is_terminal(c)){ + + sumExp = 0; + currentSumExp = 0; + ScoreTranslation = -5; // TO SETUP + FlagNotInitExp = 1; if(ctx->debug_mode){ fprintf(stdout, "***********************************\n"); @@ -163,10 +212,46 @@ void simple_decoder_parser_arc_eager(context *ctx) if(ctx->debug_mode){ vcode *vcode_array = feature_table_get_vcode_array(fv, ft); - for(int i=0; i < 3; i++){ + + /* Get the probabilistic parameters */ + for(int i=0; i < ft->classes_nb; i++){ + int b1 = respect_standard_constraint(c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b2 = respect_stack_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b3 = respect_buffer_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + + if(b1 && b2 && b3){ + if(FlagNotInitExp){ + ScoreTranslation += vcode_array[i].score/ProbaDivider; + FlagNotInitExp = 0; + } + if(vcode_array[i].score/ProbaDivider - ScoreTranslation > 0){ + sumExp += exp(vcode_array[i].score/ProbaDivider - ScoreTranslation); + } + } + } + + currentSumExp = 0.; + for(int i=0; i < ft->classes_nb && i < 10; i++){ printf("%d\t", i); movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels); - printf("\t%.4f\n", vcode_array[i].score); + printf("\t%.4f", vcode_array[i].score); + fflush(stdout); + int b1 = respect_standard_constraint(c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b2 = respect_stack_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b3 = respect_buffer_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + if(b1 && b2 && b3){ + if(vcode_array[i].score/ProbaDivider - ScoreTranslation > 0){ + printf(" %f %f %f",sumExp, currentSumExp,ScoreTranslation); + printf(" [%f-",currentSumExp/sumExp); + currentSumExp += exp(vcode_array[i].score/ProbaDivider - ScoreTranslation); + printf("%f[", currentSumExp/sumExp); + } + printf("\t<----"); + }else + printf("\t<%d,%d,%d>",b1,b2,b3); + // printf("\t%d", respect_standard_constraint(c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code))); + //printf("AAAAAAA\n"); + printf("\n"); } free(vcode_array); } @@ -184,31 +269,61 @@ void simple_decoder_parser_arc_eager(context *ctx) printf("%f\n", max1 - max2); } + currentSumExp = 0.; + randomFloat = (double) rand()/(double)RAND_MAX; + + printf("< %f > is our random number. \n",randomFloat); + + // if(ctx->partial_mode){ // NOT YET COMPATIBLE + vcode *vcode_array = feature_table_get_vcode_array(fv, ft); + mvt_code = 0; + for(int i=0; i < ft->classes_nb; i++){ + int b1 = respect_standard_constraint(c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b2 = respect_stack_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + int b3 = respect_buffer_constraint(ctx->partial_mode, c, movement_parser_type(vcode_array[i].class_code), movement_parser_label(vcode_array[i].class_code)); + + if(b1 && b2 && b3){ + currentSumExp += exp(vcode_array[i].score/ProbaDivider - ScoreTranslation); + if(currentSumExp/sumExp >= randomFloat){ + printf("The %d th move has been selected by the probabilistic parser.\n",i); + mvt_code = vcode_array[i].class_code; + break; + } + } + } + free(vcode_array); + // } + + mvt_type = movement_parser_type(mvt_code); mvt_label = movement_parser_label(mvt_code); if((mvt_type == MVT_PARSER_EOS) && (word_get_sent_seg(stack_top(config_get_stack(c))) == 0)){ - if(ctx->verbose) - fprintf(stderr, "the classifier did predict EOS but this is not the case\n"); - feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); - mvt_code = argmax2; - mvt_type = movement_parser_type(mvt_code); - mvt_label = movement_parser_label(mvt_code); + if(ctx->verbose) + fprintf(stderr, "the classifier did predict EOS but this is not the case\n"); + feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); + mvt_code = argmax2; + mvt_type = movement_parser_type(mvt_code); + mvt_label = movement_parser_label(mvt_code); } result = 0; switch(mvt_type){ case MVT_PARSER_LEFT : + word_scored = stack_top(config_get_stack(c)); result = movement_parser_left_arc(c, mvt_label); break; case MVT_PARSER_RIGHT: + word_scored = word_buffer_b0(config_get_buffer(c)); result = movement_parser_right_arc(c, mvt_label); break; case MVT_PARSER_REDUCE: + word_scored = stack_top(config_get_stack(c)); result = movement_parser_reduce(c); break; case MVT_PARSER_ROOT: + word_scored = stack_top(config_get_stack(c)); result = movement_parser_root(c, root_label); /* while(!stack_is_empty(config_get_stack(c))) movement_parser_root(c, root_label);*/ @@ -217,6 +332,7 @@ void simple_decoder_parser_arc_eager(context *ctx) result = movement_parser_eos(c); break; case MVT_PARSER_SHIFT: + word_scored = word_buffer_b0(config_get_buffer(c)); result = movement_parser_shift(c); } @@ -228,6 +344,25 @@ void simple_decoder_parser_arc_eager(context *ctx) while(!stack_is_empty(config_get_stack(c))) movement_parser_root(c, root_label); } + }else{ + if(ctx->score_method > 0){ + score = confidence_score(mvt_code,feature_table_get_vcode_array(fv,ft),ft->classes_nb,ctx,c); + switch(mvt_type){ + case MVT_PARSER_LEFT : + case MVT_PARSER_RIGHT : + case MVT_PARSER_ROOT : +// printf("dep score: %d %d!!\n", word_get_form(word_scored), (int)(score*1000)); + word_set_S(word_scored,(int)(score*1000)); + break; + case MVT_PARSER_REDUCE: + case MVT_PARSER_SHIFT: +// printf("pop/shift score: %d %d!!\n", word_get_form(word_scored), (int)(score*1000)); + word_set_T(word_scored,(int)(score*1000)); + break; + default: + break; + } + } } } } diff --git a/perceptron/lib/src/cf_file.c b/perceptron/lib/src/cf_file.c index 908d4057d9842da2c98adac279cb6f4695a121c3..9e8f82b8c19f94cb6e5ecbd8ae750415994e095f 100644 --- a/perceptron/lib/src/cf_file.c +++ b/perceptron/lib/src/cf_file.c @@ -74,7 +74,7 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int } *max_feat = *max_feat + 1; - *max_class = *max_class + 1; + *max_class = *max_class + 2; //RP: the number of class was incorrect for arc_eager. fclose(f); }