Skip to content
Snippets Groups Projects
Commit afcc1a93 authored by robin.perrotin's avatar robin.perrotin
Browse files

add various partial parser changes.

parent fa9c15c1
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,7 @@ set(SOURCES src/context.c
src/oracle_tagparser_arc_eager.c
src/oracle_tagger.c
# src/simple_decoder_parser.c
src/partial_parser_conditional.c
src/simple_decoder_parser_arc_eager.c
src/simple_decoder_tagparser_arc_eager.c
# src/simple_decoder_forrest.c
......@@ -27,6 +28,7 @@ set(SOURCES src/context.c
src/feat_types.c
src/mvt.c
src/mvt_stack.c
src/confidence_score.c
)
#compiling library
......
......@@ -93,6 +93,7 @@ context *context_new(void)
ctx->trace_mode = 0;
ctx->partial_mode = 0;
ctx->score_method = 0;
return ctx;
}
......@@ -171,6 +172,9 @@ void context_debug_help_message(context *ctx){
void context_partial_mode_help_message(context *ctx){
fprintf(stderr, "\t-p --partial : activate partial mode (default is false); only works in TEST mode.\n");
}
void context_score_method_help_message(context *ctx){
fprintf(stderr, "\t-S --score : method for scoring the oddity of the parsing.\n");
}
context *context_read_options(int argc, char *argv[])
{
......@@ -180,7 +184,7 @@ context *context_read_options(int argc, char *argv[])
ctx->program_name = strdup(argv[0]);
static struct option long_options[22] =
static struct option long_options[24] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
......@@ -204,13 +208,14 @@ context *context_read_options(int argc, char *argv[])
{"maca_data_path", required_argument, 0, 'D'},
{"root_label", required_argument, 0, 'R'},
{"f2p", required_argument, 0, 'P'},
{"traces", required_argument, 0, 'T'}
{"traces", required_argument, 0, 'T'},
{"score", required_argument, 0, 'S'},
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdpcSTm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:", long_options, &option_index)) != -1){
while ((c = getopt_long (argc, argv, "hvdpcm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:T:S:", long_options, &option_index)) != -1){
switch (c)
{
case 'h':
......@@ -285,6 +290,9 @@ context *context_read_options(int argc, char *argv[])
ctx->f2p_filename = strdup(optarg);
ctx->f2p = form2pos_read(ctx->f2p_filename);
break;
case 'S':
ctx->score_method = atoi(optarg);
break;
}
}
......
......@@ -68,6 +68,7 @@ typedef struct {
int ifpls;
int trace_mode;
int partial_mode;
int score_method;
} context;
context *context_new(void);
......@@ -103,6 +104,7 @@ void context_ifpls_help_message(context *ctx);
void context_input_help_message(context *ctx);
void context_root_label_help_message(context *ctx);
void context_partial_mode_help_message(context *ctx);
void context_score_method_help_message(context *ctx);
void context_debug_help_message(context *ctx);
......
......@@ -103,7 +103,7 @@ int main(int argc, char *argv[])
return 1;
}
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3;
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 4; //4 specials are ROOT EOS SHIFT and REDUCE.
/* load models */
......
......@@ -144,7 +144,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "cannot find label names\n");
return 1;
}
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3;
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 4;
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
......
......@@ -6,6 +6,7 @@
int movement_eos(config *c, int movement_code)
{
if(stack_is_empty(config_get_stack(c))) return 0;
word *s0 = stack_top(config_get_stack(c));
......
......@@ -9,6 +9,8 @@
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"
#include"partial_parser_conditional.h"
#include"confidence_score.h"
void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct)
{
......@@ -110,161 +112,24 @@ void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct)
else
printf("\t0");
}
printf("\n");
free(buffer);
}
}
}
int respect_standard_constraint(config *c, int mvt_type, int mvt_label){
int gov;
switch(mvt_type){
case MVT_PARSER_LEFT :
if(stack_is_empty(config_get_stack(c))) return 0;
gov = word_get_gov(stack_top(config_get_stack(c)));
if(stack_top(config_get_stack(c))->is_root || gov != WORD_INVALID_GOV)
return 0;
return 1;
case MVT_PARSER_RIGHT:
if(stack_is_empty(config_get_stack(c))) return 0;
return 1;
case MVT_PARSER_REDUCE:
if(stack_is_empty(config_get_stack(c))) return 0;
gov = word_get_gov(stack_top(config_get_stack(c)));
if(gov == WORD_INVALID_GOV)
return 0;
return 1;
case MVT_PARSER_SHIFT:
if(stack_is_empty(config_get_stack(c))) return 0;//
return 1;
case MVT_PARSER_ROOT:
if(stack_is_empty(config_get_stack(c))) return 0;
gov = word_get_gov(stack_top(config_get_stack(c)));
return (gov == WORD_INVALID_GOV);
case MVT_PARSER_EOS:
if(stack_is_empty(config_get_stack(c))) return 0;
return 1;
}
}
int movement_type_safe_for_top_stack(int mvt_type){
switch(mvt_type){
case MVT_PARSER_LEFT :
return 0;
case MVT_PARSER_RIGHT:
return 1;
case MVT_PARSER_REDUCE:
return 0;
case MVT_PARSER_SHIFT:
return 1;
case MVT_PARSER_EOS:
return 0;
case MVT_PARSER_ROOT:
return 1;
}
}
int movement_type_safe_for_top_buffer(int mvt_type){
switch(mvt_type){
case MVT_PARSER_LEFT :
return 1;
case MVT_PARSER_RIGHT:
return 0;
case MVT_PARSER_REDUCE:
return 1;
case MVT_PARSER_SHIFT:
return 0;
case MVT_PARSER_EOS:
return 1;
case MVT_PARSER_ROOT:
return 1;
}
}
int respect_stack_constraint(int mode_partial, config *c, int mvt_type, int mvt_label){
if(!mode_partial) return 1;
if(stack_is_empty(config_get_stack(c))) return (mvt_type == MVT_PARSER_SHIFT);
if(config_get_buffer(c)->nbelem == 0) return (mvt_type == MVT_PARSER_EOS || mvt_type == MVT_PARSER_ROOT);
word *w_stack = stack_top(config_get_stack(c));
word *w_buffer = word_buffer_b0(config_get_buffer(c));
word *gov_stack;
int potentialz;
int stack_id = word_get_index(w_stack);
int gov_rel_id = word_get_X(w_stack);
int buffer_id = word_get_index(w_buffer);
// printf("%d %d %d ",stack_id, gov_rel_id, buffer_id);
if(gov_rel_id > 0){
//top of stack needs to be governed by a left dependency.
if(buffer_id - stack_id < gov_rel_id){
//allow only if top of stack doesn't move and isn't set new dep (left move. implyed by not moving).
if(!movement_type_safe_for_top_stack(mvt_type))
return 0;
}
else if(buffer_id - stack_id == gov_rel_id){
//expected movement.
// printf(" <%d %d %d %d> %s ",word_get_W(w_stack),word_get_X(w_stack),word_get_Y(w_stack),word_get_Z(w_stack),dico_int2string(dico_W,word_get_W(w_stack)));
// fflush(stdout);
return (mvt_type == MVT_PARSER_LEFT && mvt_label == word_get_W(w_stack));
}
}
if(word_get_Z(w_stack) != 0 && word_get_Z(w_stack) >= buffer_id - stack_id){
//there is an undone right dependency.
//allow only if top of stack doesn't move.
if(!movement_type_safe_for_top_stack(mvt_type))
return 0;
}
switch(mvt_type){
case MVT_PARSER_ROOT:
return ((word_get_V(w_stack) == 0) ||
(word_get_V(w_stack) == 1 && (word_get_Y(w_stack) < 0 || word_get_Z(w_stack) > 0)) ||
(word_get_V(w_stack) == 2 && (word_get_Y(w_stack) < 0 && word_get_Z(w_stack) > 0)));
case MVT_PARSER_EOS:
return word_get_U(w_stack);
default:
return 1;
if(col_nb <= mcd_get_s_col(mcd_struct)){
if(word_get_S(w) > 0)
printf("\t%d",word_get_S(w));
else
printf("\t-1");
}
if(col_nb <= mcd_get_t_col(mcd_struct)){
if(word_get_T(w) > 0)
printf("\t%d",word_get_T(w));
else
printf("\t-1");
}
int respect_buffer_constraint(int mode_partial, config *c, int mvt_type, int mvt_label){
if(!mode_partial) return 1;
if(stack_is_empty(config_get_stack(c))) return (mvt_type == MVT_PARSER_SHIFT);
if(config_get_buffer(c)->nbelem == 0) return (mvt_type == MVT_PARSER_EOS || mvt_type == MVT_PARSER_ROOT);
word *w_stack = stack_top(config_get_stack(c));
word *w_buffer = word_buffer_b0(config_get_buffer(c));
int stack_id = word_get_index(w_stack);
int gov_rel_id = word_get_X(w_buffer);
int buffer_id = word_get_index(w_buffer);
if(gov_rel_id < 0){
//top of stack needs to be governed by a right dependency.
if(stack_id - buffer_id > gov_rel_id){
//allow only if top of buffer doesn't move and isn't set new dep (right move. implyed by not moving OR ROOT move?).
if(!movement_type_safe_for_top_buffer(mvt_type))
return 0;
}
else if(stack_id - buffer_id == gov_rel_id){
//expected movement.
// printf(" <%d %d %d %d> %s ",word_get_W(w_buffer),word_get_X(w_buffer),word_get_Y(w_buffer),word_get_Z(w_buffer),dico_int2string(dico_W,word_get_W(w_buffer)));
//fflush(stdout);
return (mvt_type == MVT_PARSER_RIGHT && mvt_label == word_get_W(w_buffer));
printf("\n");
free(buffer);
}
}
if(word_get_Y(w_stack) != 0 && word_get_Y(w_stack) <= buffer_id - stack_id){
//there is an undone left dependency.
//allow only if top of buffer doesn't move.
if(!movement_type_safe_for_top_buffer(mvt_type))
return 0;
}
return 1;
}
void simple_decoder_parser_arc_eager(context *ctx)
{
......@@ -283,6 +148,9 @@ void simple_decoder_parser_arc_eager(context *ctx)
int argmax1, argmax2;
float max1, max2;
int index;
float score;
word* word_scored;
root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label == -1) root_label = 0;
......@@ -313,7 +181,7 @@ void simple_decoder_parser_arc_eager(context *ctx)
if(ctx->debug_mode){
vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
for(int i=0; i < ft->classes_nb; i++){
for(int i=0; i < ctx->mvt_nb && i < 1000; i++){
printf("%d\t", i);
movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels);
printf("\t%.4f", vcode_array[i].score);
......@@ -361,27 +229,33 @@ void simple_decoder_parser_arc_eager(context *ctx)
free(vcode_array);
}
mvt_type = movement_parser_type(mvt_code);
mvt_label = movement_parser_label(mvt_code);
result = 0;
switch(mvt_type){
case MVT_PARSER_LEFT :
word_scored = stack_top(config_get_stack(c));
result = movement_parser_left_arc(c, mvt_label);
break;
case MVT_PARSER_RIGHT:
word_scored = word_buffer_b0(config_get_buffer(c));
result = movement_parser_right_arc(c, mvt_label);
break;
case MVT_PARSER_REDUCE:
word_scored = stack_top(config_get_stack(c));
result = movement_parser_reduce(c);
break;
case MVT_PARSER_ROOT:
word_scored = stack_top(config_get_stack(c));
result = movement_parser_root(c, root_label);
break;
case MVT_PARSER_EOS:
result = movement_parser_eos(c);
break;
case MVT_PARSER_SHIFT:
word_scored = word_buffer_b0(config_get_buffer(c));
result = movement_parser_shift(c);
}
......@@ -393,6 +267,25 @@ void simple_decoder_parser_arc_eager(context *ctx)
while(!stack_is_empty(config_get_stack(c)))
movement_parser_root(c, root_label);
}
}else{
if(ctx->score_method > 0){
score = confidence_score(mvt_code,feature_table_get_vcode_array(fv,ft),ft->classes_nb,ctx,c);
switch(mvt_type){
case MVT_PARSER_LEFT :
case MVT_PARSER_RIGHT :
case MVT_PARSER_ROOT :
// printf("dep score: %d %d!!\n", word_get_form(word_scored), (int)(score*1000));
word_set_S(word_scored,(int)(score*1000));
break;
case MVT_PARSER_REDUCE:
case MVT_PARSER_SHIFT:
// printf("pop/shift score: %d %d!!\n", word_get_form(word_scored), (int)(score*1000));
word_set_T(word_scored,(int)(score*1000));
break;
default:
break;
}
}
}
}
}
......
......@@ -71,7 +71,7 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int
}
*max_feat = *max_feat + 1;
*max_class = *max_class + 1;
*max_class = *max_class + 2; //RP: the number of class was incorrect for arc_eager.
fclose(f);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment