From 56e83bcf770acf280a995864706e58e21623335d Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Fri, 21 Oct 2016 11:23:40 -0400 Subject: [PATCH] added new tagparse decoder, that tags and parses the sentence at the same time --- maca_common/include/word_buffer.h | 26 ++++++++-------- maca_trans_parser/CMakeLists.txt | 6 ++-- .../maca_trans_tagparser_arc_eager_mcf2cff.c | 22 +++++++------- .../src/movement_tagparser_arc_eager.c | 30 +++++++++---------- .../src/movement_tagparser_arc_eager.h | 30 +++++++++---------- .../src/oracle_tagparser_arc_eager.c | 8 ++--- .../src/simple_decoder_tagparser_arc_eager.c | 28 ++++++++--------- 7 files changed, 75 insertions(+), 75 deletions(-) diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h index f24ab4c..2b182b7 100644 --- a/maca_common/include/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -36,19 +36,19 @@ typedef struct { word_buffer *word_buffer_new(FILE *input_file, mcd *mcd_struct, int lookahead); -void word_buffer_free(word_buffer *wb); -int word_buffer_add(word_buffer *wb, word *w); -word *word_buffer_get_word_relative(word_buffer *wb, int dist); -word* word_buffer_get_word_n(word_buffer *wb, int n); -int word_buffer_read_next_word(word_buffer *wb); -int word_buffer_move_right(word_buffer *wb); -int word_buffer_move_left(word_buffer *wb); -void word_buffer_print(FILE *f, word_buffer *wb); -void word_buffer_print_compact(FILE *f, word_buffer *wb); -int word_buffer_is_empty(word_buffer *wb); -int word_buffer_is_last(word_buffer *wb); -int word_buffer_end(word_buffer *wb); -int word_buffer_read_sentence(word_buffer *bw); +void word_buffer_free(word_buffer *wb); +int word_buffer_add(word_buffer *wb, word *w); +word* word_buffer_get_word_relative(word_buffer *wb, int dist); +word* word_buffer_get_word_n(word_buffer *wb, int n); +int word_buffer_read_next_word(word_buffer *wb); +int word_buffer_move_right(word_buffer *wb); +int word_buffer_move_left(word_buffer *wb); +void word_buffer_print(FILE *f, word_buffer *wb); +void word_buffer_print_compact(FILE *f, word_buffer *wb); +int word_buffer_is_empty(word_buffer *wb); +int word_buffer_is_last(word_buffer *wb); +int word_buffer_end(word_buffer *wb); +int word_buffer_read_sentence(word_buffer *bw); word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct); #endif diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index a372ba3..efe9bac 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -1,15 +1,15 @@ set(SOURCES src/context.c src/feat_desc.c -# src/movement_parser_arc_eager.c +src/movement_parser_arc_eager.c src/movement_tagparser_arc_eager.c src/movement_tagger.c src/feat_fct.c src/global_feat_vec.c # src/oracle_parser.c -# src/oracle_parser_arc_eager.c + src/oracle_parser_arc_eager.c src/oracle_tagparser_arc_eager.c src/oracle_tagger.c -# src/simple_decoder_parser.c + src/simple_decoder_parser.c src/simple_decoder_parser_arc_eager.c src/simple_decoder_tagparser_arc_eager.c src/simple_decoder_forrest.c diff --git a/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c index aeff658..e4c69d1 100644 --- a/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c @@ -78,12 +78,12 @@ void generate_training_file_stream(FILE *output_file, context *ctx) config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); mvt_code = oracle_tagparser_arc_eager(c, ref, root_label); - mvt_type = movement_type(mvt_code); - mvt_label = movement_label(mvt_code); + mvt_type = movement_tagparse_type(mvt_code); + mvt_label = movement_tagparse_label(mvt_code); if(ctx->debug_mode){ config_print(stdout,c); - movement_print(stdout, mvt_code, ctx->dico_labels, dico_postag); + movement_tagparse_print(stdout, mvt_code, ctx->dico_labels, dico_postag); fprintf(stdout, "\n"); } @@ -92,7 +92,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx) stack_print(output_file, c->st); fprintf(output_file, "\t"); - movement_print(output_file, mvt_code, ctx->dico_labels, dico_postag); + movement_tagparse_print(output_file, mvt_code, ctx->dico_labels, dico_postag); fprintf(output_file, "\t1\n"); } else{ @@ -101,40 +101,40 @@ void generate_training_file_stream(FILE *output_file, context *ctx) } if(mvt_type == MVT_EOS){ - movement_eos(c, 0); + movement_tagparse_eos(c, 0); sentence_nb++; if(word_buffer_is_last(ref)) break; } if(mvt_type == MVT_POSTAG){ - movement_add_pos(c, 0, mvt_label); + movement_tagparse_add_pos(c, 0, mvt_label); continue; } if(mvt_type == MVT_LEFT){ - movement_left_arc(c, mvt_label, 0); + movement_tagparse_left_arc(c, mvt_label, 0); continue; } if(mvt_type == MVT_RIGHT){ - movement_right_arc(c, mvt_label, 0); + movement_tagparse_right_arc(c, mvt_label, 0); word_buffer_move_right(ref); continue; } if(mvt_type == MVT_REDUCE){ - movement_reduce(c, 0); + movement_tagparse_reduce(c, 0); continue; } if(mvt_type == MVT_ROOT){ - movement_root(c, 0, root_label); + movement_tagparse_root(c, 0, root_label); continue; } if(mvt_type == MVT_SHIFT){ - movement_shift(c, 1, 0); + movement_tagparse_shift(c, 1, 0); word_buffer_move_right(ref); continue; } diff --git a/maca_trans_parser/src/movement_tagparser_arc_eager.c b/maca_trans_parser/src/movement_tagparser_arc_eager.c index bdf7e4c..df5f5a7 100644 --- a/maca_trans_parser/src/movement_tagparser_arc_eager.c +++ b/maca_trans_parser/src/movement_tagparser_arc_eager.c @@ -4,11 +4,11 @@ #include"util.h" #include"movement_tagparser_arc_eager.h" -void movement_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag) +void movement_tagparse_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag) { - int mvt_type = movement_type(mvt_code); - int mvt_label = movement_label(mvt_code); + int mvt_type = movement_tagparse_type(mvt_code); + int mvt_label = movement_tagparse_label(mvt_code); char *label; if(mvt_type == MVT_SHIFT) {fprintf(f, "SHIFT"); return;} @@ -27,7 +27,7 @@ void movement_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag) fprintf(f, " %s", label); } -int movement_type(int mvt) +int movement_tagparse_type(int mvt) { if(mvt == MVT_SHIFT) return MVT_SHIFT; /* 0 */ if(mvt == MVT_REDUCE) return MVT_REDUCE; /* 1 */ @@ -38,7 +38,7 @@ int movement_type(int mvt) /*if(mvt % 3 == 2)*/ return MVT_LEFT; /* 6, 9, 12 ... */ } -int movement_label(int mvt) +int movement_tagparse_label(int mvt) { if(mvt == MVT_SHIFT) return -1; if(mvt == MVT_REDUCE) return -1; @@ -52,7 +52,7 @@ int movement_label(int mvt) return (mvt - 6) / 3; } -int movement_add_pos(config *c, float score, int pos) +int movement_tagparse_add_pos(config *c, float score, int pos) { if(word_buffer_b0(config_get_buffer(c)) == NULL) return 0; if(word_get_pos(word_buffer_b0(config_get_buffer(c))) != -1) return 0; @@ -60,12 +60,12 @@ int movement_add_pos(config *c, float score, int pos) /* stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); word_buffer_move_right(config_get_buffer(c));*/ - config_add_mvt(c, movement_postag(pos)); + config_add_mvt(c, movement_tagparse_postag(pos)); return 1; } -int movement_eos(config *c, float score) +int movement_tagparse_eos(config *c, float score) { if(stack_is_empty(config_get_stack(c))) return 0; if(word_get_sent_seg(stack_top(config_get_stack(c))) == 1) return 0; @@ -80,7 +80,7 @@ int movement_eos(config *c, float score) return 1; } -int movement_left_arc(config *c, int label, float score) +int movement_tagparse_left_arc(config *c, int label, float score) { if(stack_is_empty(config_get_stack(c))) return 0; /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ @@ -97,11 +97,11 @@ int movement_left_arc(config *c, int label, float score) word_set_label(dep, label); stack_pop(config_get_stack(c)); - config_add_mvt(c, movement_left_code(label)); + config_add_mvt(c, movement_tagparse_left_code(label)); return 1; } -int movement_right_arc(config *c, int label, float score) +int movement_tagparse_right_arc(config *c, int label, float score) { if(stack_is_empty(config_get_stack(c))) return 0; @@ -116,11 +116,11 @@ int movement_right_arc(config *c, int label, float score) stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); word_buffer_move_right(config_get_buffer(c)); - config_add_mvt(c, movement_right_code(label)); + config_add_mvt(c, movement_tagparse_right_code(label)); return 1; } -int movement_shift(config *c, int stream, float score) +int movement_tagparse_shift(config *c, int stream, float score) { if(word_buffer_is_empty(config_get_buffer(c))) return 0; stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); @@ -129,7 +129,7 @@ int movement_shift(config *c, int stream, float score) return 1; } -int movement_reduce(config *c, float score) +int movement_tagparse_reduce(config *c, float score) { if(stack_nbelem(config_get_stack(c)) <= 1) return 0; @@ -142,7 +142,7 @@ int movement_reduce(config *c, float score) return 1; } -int movement_root(config *c, float score, int root_code) +int movement_tagparse_root(config *c, float score, int root_code) { word *s0 = stack_top(config_get_stack(c)); if(s0 == NULL) return 0; diff --git a/maca_trans_parser/src/movement_tagparser_arc_eager.h b/maca_trans_parser/src/movement_tagparser_arc_eager.h index 880e14c..89f5a68 100644 --- a/maca_trans_parser/src/movement_tagparser_arc_eager.h +++ b/maca_trans_parser/src/movement_tagparser_arc_eager.h @@ -13,23 +13,23 @@ #define MVT_RIGHT 5 #define MVT_POSTAG 6 -#define movement_postag(postag) (3 * (postag) + 4) +#define movement_tagparse_postag(postag) (3 * (postag) + 4) /* even movements are left movements (except 0, which is shift and 2 which is root) */ -#define movement_left_code(label) (3 * (label) + 5) +#define movement_tagparse_left_code(label) (3 * (label) + 5) /* odd movements are right movements (except 1, which is reduce and 3 which is end_of_sentence) */ -#define movement_right_code(label) (3 * (label) + 6) - -int movement_type(int mvt); -int movement_label(int mvt); - -int movement_left_arc(config *c, int label, float score); -int movement_right_arc(config *c, int label, float score); -int movement_shift(config *c, int stream, float score); -int movement_reduce(config *c, float score); -int movement_root(config *c, float score, int root_code); -int movement_eos(config *c, float score); -int movement_add_pos(config *c, float score, int postag); -void movement_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag); +#define movement_tagparse_right_code(label) (3 * (label) + 6) + +int movement_tagparse_type(int mvt); +int movement_tagparse_label(int mvt); + +int movement_tagparse_left_arc(config *c, int label, float score); +int movement_tagparse_right_arc(config *c, int label, float score); +int movement_tagparse_shift(config *c, int stream, float score); +int movement_tagparse_reduce(config *c, float score); +int movement_tagparse_root(config *c, float score, int root_code); +int movement_tagparse_eos(config *c, float score); +int movement_tagparse_add_pos(config *c, float score, int postag); +void movement_tagparse_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag); #endif diff --git a/maca_trans_parser/src/oracle_tagparser_arc_eager.c b/maca_trans_parser/src/oracle_tagparser_arc_eager.c index 8d3a152..d9123e5 100644 --- a/maca_trans_parser/src/oracle_tagparser_arc_eager.c +++ b/maca_trans_parser/src/oracle_tagparser_arc_eager.c @@ -75,9 +75,9 @@ int oracle_tagparser_arc_eager(config *c, word_buffer *ref, int root_label) /* give a pos to b0 if it does not have one */ if(word_get_pos(b0) == -1){ /* word_set_pos(b0, word_get_pos(word_buffer_get_word_n(ref, b0_index))); */ - /* return movement_postag(word_get_pos(b0)); */ + /* return movement_tagparse_postag(word_get_pos(b0)); */ - return movement_postag(word_get_pos(word_buffer_get_word_n(ref, b0_index))); + return movement_tagparse_postag(word_get_pos(word_buffer_get_word_n(ref, b0_index))); } @@ -113,12 +113,12 @@ int oracle_tagparser_arc_eager(config *c, word_buffer *ref, int root_label) /* LEFT ARC b0 is the governor and s0 the dependent */ if(s0_gov_index == b0_index){ - return movement_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index))); + return movement_tagparse_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index))); } /* RIGHT ARC s0 is the governor and b0 the dependent */ if(b0_gov_index == s0_index){ - return movement_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index))); + return movement_tagparse_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index))); } /* REDUCE */ if((stack_nbelem(config_get_stack(c)) > 1) diff --git a/maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c b/maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c index 469913e..946c8a4 100644 --- a/maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c @@ -82,8 +82,8 @@ void simple_decoder_tagparser_arc_eager(context *ctx) config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); mvt_code = feature_table_argmax(fv, ft, &max); - mvt_type = movement_type(mvt_code); - mvt_label = movement_label(mvt_code); + mvt_type = movement_tagparse_type(mvt_code); + mvt_label = movement_tagparse_label(mvt_code); if(ctx->trace_mode){ index = word_get_index(word_buffer_b0(config_get_buffer(c))); @@ -92,7 +92,7 @@ void simple_decoder_tagparser_arc_eager(context *ctx) stack_print(stdout, c->st); fprintf(stdout, "\t"); - movement_print(stdout, mvt_code, ctx->dico_labels, ctx->dico_postags); + movement_tagparse_print(stdout, mvt_code, ctx->dico_labels, ctx->dico_postags); fprintf(stdout, "\t"); feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); printf("%f\n", max1 - max2); @@ -105,9 +105,9 @@ void simple_decoder_tagparser_arc_eager(context *ctx) entropy = feature_table_entropy(fv, ft); /* delta = feature_table_diff_scores(fv, ft); */ feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); - movement_print(stdout, argmax1, ctx->dico_labels, ctx->dico_postags); + movement_tagparse_print(stdout, argmax1, ctx->dico_labels, ctx->dico_postags); printf(":\t%f\n", max1); - movement_print(stdout, argmax2, ctx->dico_labels, ctx->dico_postags); + movement_tagparse_print(stdout, argmax2, ctx->dico_labels, ctx->dico_postags); printf(":\t%f\n", max2); printf("delta = %f\n", max1 - max2); @@ -115,37 +115,37 @@ void simple_decoder_tagparser_arc_eager(context *ctx) /* printf("entropy = %f delta = %f\n", entropy, delta); */ printf("entropy = %f\n",entropy); - /* movement_print(stdout, mvt_code, ctx->dico_labels); */ + /* movement_tagparse_print(stdout, mvt_code, ctx->dico_labels); */ } result = 0; switch(mvt_type){ case MVT_POSTAG : - result = movement_add_pos(c, max, mvt_label); + result = movement_tagparse_add_pos(c, max, mvt_label); break; case MVT_LEFT : - result = movement_left_arc(c, mvt_label, max); + result = movement_tagparse_left_arc(c, mvt_label, max); break; case MVT_RIGHT: - result = movement_right_arc(c, mvt_label, max); + result = movement_tagparse_right_arc(c, mvt_label, max); break; case MVT_REDUCE: - result = movement_reduce(c, max); + result = movement_tagparse_reduce(c, max); break; case MVT_ROOT: - result = movement_root(c, max, root_label); + result = movement_tagparse_root(c, max, root_label); break; case MVT_EOS: - result = movement_eos(c, max); + result = movement_tagparse_eos(c, max); break; case MVT_SHIFT: - result = movement_shift(c, 1, max); + result = movement_tagparse_shift(c, 1, max); } if(result == 0){ if(ctx->debug_mode){ fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); } - movement_shift(c, 1, max); + movement_tagparse_shift(c, 1, max); } } -- GitLab