From d83924581c519c665b3f3449cf8b061c4913a74a Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Mon, 20 Mar 2017 17:13:25 +0100 Subject: [PATCH] added features (l|r)dep_(s0|s1|b0)p --- maca_trans_parser/src/feat_fct.c | 126 ++++++++++++++++++ maca_trans_parser/src/feat_fct.h | 6 + .../maca_trans_tagparser_arc_eager_mcf2cff.c | 1 + 3 files changed, 133 insertions(+) diff --git a/maca_trans_parser/src/feat_fct.c b/maca_trans_parser/src/feat_fct.c index ec84300..fdb525e 100644 --- a/maca_trans_parser/src/feat_fct.c +++ b/maca_trans_parser/src/feat_fct.c @@ -435,6 +435,26 @@ int ldep_s0r(void *input){ return -1; } +int ldep_s0p(void *input){ + config *c = input; + word *gov = stack_s0(config_get_stack((config *) c)); + int i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) - 1; i > 0 ; i--){ + dep = word_buffer_get_word_n(config_get_buffer((config *) c), i); + if(word_get_sent_seg(dep) == 1) return -1; + dist = word_get_index(gov) - i; + if(word_get_gov(dep) == dist){ + return word_get_pos(dep); + } + } + } + return -1; +} + int ldep_s1r(void *input){ config *c = input; word *gov = stack_s1(config_get_stack((config *) c)); @@ -455,6 +475,26 @@ int ldep_s1r(void *input){ return -1; } +int ldep_s1p(void *input){ + config *c = input; + word *gov = stack_s1(config_get_stack((config *) c)); + int i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) - 1; i > 0 ; i--){ + dep = word_buffer_get_word_n(config_get_buffer((config *) c), i); + if(word_get_sent_seg(dep) == 1) return -1; + dist = word_get_index(gov) - i; + if(word_get_gov(dep) == dist){ + return word_get_pos(dep); + } + } + } + return -1; +} + int ldep_b0r(void *input){ config *c = input; word *gov = word_buffer_b0(config_get_buffer((config *) c)); @@ -475,6 +515,26 @@ int ldep_b0r(void *input){ return -1; } +int ldep_b0p(void *input){ + config *c = input; + word *gov = word_buffer_b0(config_get_buffer((config *) c)); + int i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) - 1; i > 0 ; i--){ + dep = word_buffer_get_word_n(config_get_buffer((config *) c), i); + if(word_get_sent_seg(dep) == 1) return -1; + dist = word_get_index(gov) - i; + if(word_get_gov(dep) == dist){ + return word_get_pos(dep); + } + } + } + return -1; +} + int rdep_s0r(void *input){ config *c = input; word *gov = stack_s0(config_get_stack((config *) c)); @@ -495,6 +555,46 @@ int rdep_s0r(void *input){ return -1; } +int rdep_s0p(void *input){ + config *c = input; + word *gov = stack_s0(config_get_stack((config *) c)); + int i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) + 1; i < word_buffer_get_nbelem(config_get_buffer((config *) c)) ; i++){ + dep = word_buffer_get_word_n(config_get_buffer((config *) c), i); + if(i >= word_get_index(word_buffer_b0(config_get_buffer((config *) c)))) return -1; + dist = i - word_get_index(gov); + if(word_get_gov(dep) == - dist){ + return word_get_pos(dep); + } + } + } + return -1; +} + +int rdep_s1p(void *input){ + config *c = input; + word *gov = stack_s1(config_get_stack((config *) c)); + int i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) + 1; i < word_buffer_get_nbelem(config_get_buffer((config *) c)) ; i++){ + dep = word_buffer_get_word_n(config_get_buffer((config *) c), i); + if(i >= word_get_index(word_buffer_b0(config_get_buffer((config *) c)))) return -1; + dist = i - word_get_index(gov); + if(word_get_gov(dep) == - dist){ + return word_get_pos(dep); + } + } + } + return -1; +} + int rdep_s1r(void *input){ config *c = input; word *gov = stack_s1(config_get_stack((config *) c)); @@ -535,6 +635,26 @@ int rdep_b0r(void *input){ return -1; } +int rdep_b0p(void *input){ + config *c = input; + word *gov = word_buffer_b0(config_get_buffer((config *) c)); + int i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) + 1; i < word_buffer_get_nbelem(config_get_buffer((config *) c)) ; i++){ + dep = word_buffer_get_word_n(config_get_buffer((config *) c), i); + if(i >= word_get_index(word_buffer_b0(config_get_buffer((config *) c)))) return -1; + dist = i - word_get_index(gov); + if(word_get_gov(dep) == - dist){ + return word_get_pos(dep); + } + } + } + return -1; +} + int ndep_b0(void *input){ config *c = input; @@ -1118,10 +1238,16 @@ feat_lib *feat_lib_build(void) feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_s0r", ldep_s0r); feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_s0r", rdep_s0r); + feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_s0p", ldep_s0p); + feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_s0p", rdep_s0p); feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_s1r", ldep_s1r); feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_s1r", rdep_s1r); + feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_s1p", ldep_s1p); + feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_s1p", rdep_s1p); feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_b0r", ldep_b0r); feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_b0r", rdep_b0r); + feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_b0p", ldep_b0p); + feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_b0p", rdep_b0p); feat_lib_add(fl, FEAT_TYPE_INT_7, (char *)"ndep_b0", ndep_b0); feat_lib_add(fl, FEAT_TYPE_INT_7, (char *)"ndep_s0", ndep_s0); diff --git a/maca_trans_parser/src/feat_fct.h b/maca_trans_parser/src/feat_fct.h index 8650e6b..35820ec 100644 --- a/maca_trans_parser/src/feat_fct.h +++ b/maca_trans_parser/src/feat_fct.h @@ -423,12 +423,18 @@ int gs0p(void *input); int ldep_s0r(void *input); int rdep_s0r(void *input); +int ldep_s0p(void *input); +int rdep_s0p(void *input); int ldep_s1r(void *input); int rdep_s1r(void *input); +int ldep_s1p(void *input); +int rdep_s1p(void *input); int ndep_b0(void *input); int ndep_s0(void *input); int ldep_b0r(void *input); int rdep_b0r(void *input); +int ldep_b0p(void *input); +int rdep_b0p(void *input); /* distance features */ diff --git a/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c index ef78339..11a3bb5 100644 --- a/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c @@ -152,6 +152,7 @@ int main(int argc, char *argv[]) ctx = context_read_options(argc, argv); maca_trans_parser_mcf2cff_check_options(ctx); + ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); if(ctx->mode == TRAIN_MODE){ -- GitLab