diff --git a/maca_common/include/mcd.h b/maca_common/include/mcd.h index 5c6747eba43a0cff072a031c0e412ace373d26bb..c45d5d79bb642104b75664d8e368bba0d753f28c 100644 --- a/maca_common/include/mcd.h +++ b/maca_common/include/mcd.h @@ -129,4 +129,6 @@ dico_vec *mcd_build_dico_vec(mcd *mcd_struct); int mcd_wf_code(char *wf); void mcd_remove_wf_column(mcd *m, int wf_code); mcd *mcd_copy(mcd *m); +char *mcd_get_str(mcd *m, int code, int col); + #endif diff --git a/maca_common/include/word.h b/maca_common/include/word.h index c1eb982862c9c4991cfe902631d96ea9a5ff3fd2..7620b6d8263d46fb1e240f9a72998c9521e7ba8e 100644 --- a/maca_common/include/word.h +++ b/maca_common/include/word.h @@ -3,6 +3,8 @@ #include "mcd.h" +#define WORD_INVALID_GOV 10000 + #define word_get_id(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID] #define word_get_form(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM] #define word_get_lemma(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA] @@ -12,7 +14,7 @@ #define word_get_gov(w) ((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_GOV] #define word_get_label(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL] #define word_get_stag(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG] -#define word_get_sent_seg(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_SENT_SEG] +#define word_get_sent_seg(w) ((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG] #define word_get_A(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A] #define word_get_B(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B] #define word_get_C(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C] @@ -91,6 +93,7 @@ typedef struct _word { int label; char *form; int index; + int is_root; } word; word *word_new(char *input); diff --git a/maca_common/src/mcd.c b/maca_common/src/mcd.c index 9a2dc6d94d2afefe9b1b70389c6bf71bb844479d..a9313d6babf7eccd421d1fe6d6bd5cb9d688c024 100644 --- a/maca_common/src/mcd.c +++ b/maca_common/src/mcd.c @@ -428,3 +428,14 @@ int mcd_wf_code(char *wf) if(!strcmp(wf, "Z")) return MCD_WF_Z; return -1; } + + +char *mcd_get_str(mcd *m, int code, int col) +{ + if((col < 0) || (col >= m->nb_col)) return NULL; + if(m->representation[col] == MCD_REPRESENTATION_VOCAB) + return (m->dico_array[col])? dico_int2string(m->dico_array[col], code) : NULL; + return NULL; +} + + diff --git a/maca_common/src/word.c b/maca_common/src/word.c index 1a668e4421a96b40af11657254c764c5c894447d..ebf00c6e47670a4988e86be11a1004eebb9f2e0e 100644 --- a/maca_common/src/word.c +++ b/maca_common/src/word.c @@ -17,11 +17,12 @@ word *word_new(char *input) for(i=0; i < MCD_WF_NB; i++) w->wf_array[i] = -1; - w->wf_array[MCD_WF_GOV] = 0; + w->wf_array[MCD_WF_GOV] = WORD_INVALID_GOV; w->form = NULL; w->index = -1; w->signature = -1; + w->is_root = 0; return w; } @@ -143,7 +144,7 @@ int word_is_eos(word *w, mcd *mcd_struct) int word_get_gov_index(word *w) { int index; - if(word_get_gov(w) == 0) return -1; + if(word_get_gov(w) == WORD_INVALID_GOV) return -1; index = (word_get_index(w)) + (word_get_gov(w)); return index; } diff --git a/maca_common/src/word_buffer.c b/maca_common/src/word_buffer.c index 61695c8d52f5cf0a0153c1bc7e8f493f824e8f73..e7cb52b7b6ce4c109f6ccdb2e62bfcf446c0fa2c 100644 --- a/maca_common/src/word_buffer.c +++ b/maca_common/src/word_buffer.c @@ -97,15 +97,22 @@ word *word_buffer_get_word_n(word_buffer *wb, int n) word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct) { - FILE *f = myfopen(mcf_filename, "r"); + FILE *f; + + if(mcf_filename == NULL) + f = stdin; + else + f = myfopen(mcf_filename, "r"); word_buffer *wb = word_buffer_new(f, mcd_struct, 0); while(word_buffer_read_next_word(wb) != -1){ /* printf("load word %d\n", wb->nbelem - 1); */ } - fclose(f); + if(mcf_filename != NULL) + fclose(f); return wb; } + int word_buffer_read_next_word(word_buffer *wb) { word *w = NULL; @@ -121,7 +128,7 @@ int word_buffer_move_right(word_buffer *wb) { if((wb->nbelem - 1 - wb->current_index) <= wb->lookahead) word_buffer_read_next_word(wb); - if(wb->current_index == wb->nbelem - 1) return 0; + if(wb->current_index >= wb->nbelem) return 0; wb->current_index++; return 1; } diff --git a/maca_tools/src/mcf2conll.c b/maca_tools/src/mcf2conll.c index f518ff5e4b0a8d9efd647ef0e7acef1d9f0b918e..a19adc81dab31d62beab765a0625368d3eb127f9 100644 --- a/maca_tools/src/mcf2conll.c +++ b/maca_tools/src/mcf2conll.c @@ -171,36 +171,6 @@ context *context_read_options(int argc, char *argv[]) return ctx; } - - - -word_buffer *word_buffer_load_mcf2(char *mcf_filename, mcd *mcd_struct) -{ - FILE *f; - - if(mcf_filename == NULL) - f = stdin; - else - f = myfopen(mcf_filename, "r"); - word_buffer *wb = word_buffer_new(f, mcd_struct, 0); - while(word_buffer_read_next_word(wb) != -1){ - /* printf("load word %d\n", wb->nbelem - 1); */ - } - if(mcf_filename != NULL) - fclose(f); - return wb; -} - -char *mcd_get_str(mcd *m, int code, int col) -{ - if((col < 0) || (col >= m->nb_col)) return NULL; - if(m->representation[col] == MCD_REPRESENTATION_VOCAB) - return (m->dico_array[col])? dico_int2string(m->dico_array[col], code) : NULL; - return NULL; -} - - - int main(int argc, char *argv[]) { FILE *output_file; @@ -208,7 +178,7 @@ int main(int argc, char *argv[]) mcf2conll_check_options(ctx); - word_buffer *wb = word_buffer_load_mcf2(ctx->mcf_filename, ctx->mcd_struct); + word_buffer *wb = word_buffer_load_mcf(ctx->mcf_filename, ctx->mcd_struct); word *w = NULL; int form_col = mcd_get_form_col(ctx->mcd_struct); int pos_col = mcd_get_pos_col(ctx->mcd_struct); diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index c5dcf448efae49799baaf604a09a780a70200dad..495f31de62a8ebe90ac4f5c71d9c3f4a878c88ed 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -4,10 +4,10 @@ set(SOURCES src/context.c src/movement_tagger.c src/feat_fct.c src/global_feat_vec.c - src/oracle_parser.c +# src/oracle_parser.c src/oracle_parser_arc_eager.c src/oracle_tagger.c - src/simple_decoder_parser.c +# src/simple_decoder_parser.c src/simple_decoder_parser_arc_eager.c src/simple_decoder_forrest.c src/simple_decoder_tagger.c @@ -35,17 +35,17 @@ target_link_libraries(maca_trans_tagger_mcf2cff transparse) target_link_libraries(maca_trans_tagger_mcf2cff maca_common) install (TARGETS maca_trans_tagger_mcf2cff DESTINATION bin) -add_executable(maca_trans_parser_mcf2fann ./src/maca_trans_parser_mcf2fann.c) -target_link_libraries(maca_trans_parser_mcf2fann perceptron) -target_link_libraries(maca_trans_parser_mcf2fann transparse) -target_link_libraries(maca_trans_parser_mcf2fann maca_common) -install (TARGETS maca_trans_parser_mcf2fann DESTINATION bin) - -add_executable(maca_trans_parser_mcf2cff ./src/maca_trans_parser_mcf2cff.c) -target_link_libraries(maca_trans_parser_mcf2cff perceptron) -target_link_libraries(maca_trans_parser_mcf2cff transparse) -target_link_libraries(maca_trans_parser_mcf2cff maca_common) -install (TARGETS maca_trans_parser_mcf2cff DESTINATION bin) +#add_executable(maca_trans_parser_mcf2fann ./src/maca_trans_parser_mcf2fann.c) +#target_link_libraries(maca_trans_parser_mcf2fann perceptron) +#target_link_libraries(maca_trans_parser_mcf2fann transparse) +#target_link_libraries(maca_trans_parser_mcf2fann maca_common) +#install (TARGETS maca_trans_parser_mcf2fann DESTINATION bin) + +#add_executable(maca_trans_parser_mcf2cff ./src/maca_trans_parser_mcf2cff.c) +#target_link_libraries(maca_trans_parser_mcf2cff perceptron) +#target_link_libraries(maca_trans_parser_mcf2cff transparse) +#target_link_libraries(maca_trans_parser_mcf2cff maca_common) +#install (TARGETS maca_trans_parser_mcf2cff DESTINATION bin) add_executable(maca_trans_parser_arc_eager_mcf2cff ./src/maca_trans_parser_arc_eager_mcf2cff.c) target_link_libraries(maca_trans_parser_arc_eager_mcf2cff perceptron) diff --git a/maca_trans_parser/src/config.c b/maca_trans_parser/src/config.c index 68e15dd464e277074117c0489c45ca56c34b379c..02c392ecee9fe1d1e3e36c2f849cec6f749e156b 100644 --- a/maca_trans_parser/src/config.c +++ b/maca_trans_parser/src/config.c @@ -18,8 +18,6 @@ config *config_new(FILE *f, mcd *mcd_struct, int lookahead) c->mvt_array_size = 0; c->mvt_array_nbelem = 0; c->gfv = global_feat_vec_new(); - - c->current_index = 1; return c; } @@ -40,22 +38,33 @@ void config_free(config *c) { stack_free(c->st); word_buffer_free(c->bf); - depset_free(c->ds); + /* depset_free(c->ds); */ if(c->mvt_array)free(c->mvt_array); if(c->gfv)global_feat_vec_free(c->gfv); free(c); } + + + + int config_is_terminal(config *c) { - return word_buffer_end(c->bf); + /* return (word_buffer_end(config_get_buffer(c)) && stack_is_empty(config_get_stack(c))); */ + + return (word_buffer_end(config_get_buffer(c)) && + (stack_is_empty(config_get_stack(c)) || + (stack_top(config_get_stack(c)) == NULL))); + + + /* return word_buffer_end(config_get_buffer(c)); */ } config *config_initial(FILE *f, mcd *mcd_struct, int lookahead) { config *c = config_new(f, mcd_struct, lookahead); - stack_push(c->st, word_create_dummy(mcd_struct)); + /* stack_push(c->st, word_create_dummy(mcd_struct)); */ return c; } diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index bbdbc3da3a6e0a7608ac35fb4780669a44ecd1e3..be6a90926c29f21b68dcb5c095bd368da94f3e7b 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -57,7 +57,7 @@ context *context_new(void) ctx->f2p_filename = NULL; ctx->maca_data_path = NULL; - ctx->language = strdup("fr_stream"); + ctx->language = strdup("fr"); ctx->root_label = strdup("root"); ctx->d_perceptron_features = NULL; @@ -159,10 +159,11 @@ context *context_read_options(int argc, char *argv[]) ctx->program_name = strdup(argv[0]); - static struct option long_options[21] = + static struct option long_options[22] = { {"help", no_argument, 0, 'h'}, {"verbose", no_argument, 0, 'v'}, + {"debug", no_argument, 0, 'd'}, {"conll", no_argument, 0, 'c'}, {"stream", no_argument, 0, 'S'}, {"model", required_argument, 0, 'm'}, @@ -187,7 +188,7 @@ context *context_read_options(int argc, char *argv[]) opterr = 0; - while ((c = getopt_long (argc, argv, "hvcSm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:", long_options, &option_index)) != -1){ + while ((c = getopt_long (argc, argv, "hvdcSm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:", long_options, &option_index)) != -1){ switch (c) { case 'h': @@ -196,6 +197,9 @@ context *context_read_options(int argc, char *argv[]) case 'v': ctx->verbose = 1; break; + case 'd': + ctx->debug_mode = 1; + break; case 'c': ctx->conll = 1; break; diff --git a/maca_trans_parser/src/feat_fct.c b/maca_trans_parser/src/feat_fct.c index 5740caadc35a99d1b09714354b3179343d3d4fc6..5c665a112a9733853578c851234138d99f0185ac 100644 --- a/maca_trans_parser/src/feat_fct.c +++ b/maca_trans_parser/src/feat_fct.c @@ -1,6 +1,7 @@ #include<stdio.h> #include<stdlib.h> #include<string.h> +#include"word_buffer.h" #include"stack.h" #include"feat_fct.h" #include"feat_types.h" @@ -8,7 +9,7 @@ /* word features */ /* words in the stack */ -int s0g(config *c) {return word_get_gov(stack_s0(config_get_stack(c)));} +int s0g(config *c) {return (word_get_gov(stack_s0(config_get_stack(c))) == WORD_INVALID_GOV) ? 0 : 1;} int s0sf(config *c) {return word_get_label(stack_s0(config_get_stack(c)));} int s0f(config *c) {return word_get_form(stack_s0(config_get_stack(c)));} @@ -47,7 +48,7 @@ int s0Z(config *c) {return word_get_Z(stack_s0(config_get_stack(c)));} int s0U1(config *c) {return word_get_U1(stack_s0(config_get_stack(c)));} int s0sgn(config *c) {return word_get_signature(stack_s0(config_get_stack(c)));} -int s1g(config *c) {return word_get_gov(stack_s1(config_get_stack(c)));} +int s1g(config *c) {return (word_get_gov(stack_s1(config_get_stack(c))) == WORD_INVALID_GOV) ? 0 : 1;} int s1sf(config *c) {return word_get_label(stack_s1(config_get_stack(c)));} int s1f(config *c) {return word_get_form(stack_s1(config_get_stack(c)));} @@ -151,6 +152,10 @@ int s3Z(config *c) {return word_get_Z(stack_s3(config_get_stack(c)));} /* words in the buffer */ +int b0g(config *c) {return (word_get_gov(word_buffer_b0(config_get_buffer(c))) == WORD_INVALID_GOV) ? 0 : 1;} +int b0sf(config *c) {return word_get_label(word_buffer_b0(config_get_buffer(c)));} + + int b0f(config *c) {return word_get_form(word_buffer_b0(config_get_buffer(c)));} int b0l(config *c) {return word_get_lemma(word_buffer_b0(config_get_buffer(c)));} int b0c(config *c) {return word_get_cpos(word_buffer_b0(config_get_buffer(c)));} @@ -393,165 +398,183 @@ int bm3Z(config *c) {return word_get_Z(word_buffer_bm3(config_get_buffer(c)));} /* structural features */ -/*int ldep_s0r(config *c){ - word *top = stack_top(config_get_stack(c)); - if(top) - if(top->last_left_child) - return top->last_left_child->label; - return -1; - }*/ - int ldep_s0r(config *c){ - word *top = stack_top(c->st); + word *gov = stack_s0(config_get_stack(c)); int i; - - if(top){ - if(word_get_index(top) >= c->ds->length) return -1; - for(i=word_get_index(top); i > 0; i--) - if(c->ds->array[i].gov == top) - return i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) - 1; i > 0 ; i--){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(word_get_sent_seg(dep) == 1) return -1; + dist = word_get_index(gov) - i; + if(word_get_gov(dep) == dist){ + return word_get_label(dep); + } + } } return -1; } -/*int rdep_s0r(config *c){ - word *top = stack_top(c->st); - - if(top) - if(top->last_right_child) - return top->last_right_child->label; - return -1; - }*/ - -int rdep_s0r(config *c){ - word *top = stack_top(c->st); +int ldep_s1r(config *c){ + word *gov = stack_s1(config_get_stack(c)); int i; - - if(top) - for(i=word_get_index(top); i < c->ds->length; i++) - if(c->ds->array[i].gov == top) - return i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) - 1; i > 0 ; i--){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(word_get_sent_seg(dep) == 1) return -1; + dist = word_get_index(gov) - i; + if(word_get_gov(dep) == dist){ + return word_get_label(dep); + } + } + } return -1; } -/*int ldep_b0r(config *c){ - word *top = queue_elt_n(c->bf,0); +int ldep_b0r(config *c){ + word *gov = word_buffer_b0(config_get_buffer(c)); + int i; + word *dep; + int dist; - if(top) - if(top->last_left_child) - return top->last_left_child->label; + if(gov){ + for(i=word_get_index(gov) - 1; i > 0 ; i--){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(word_get_sent_seg(dep) == 1) return -1; + dist = word_get_index(gov) - i; + if(word_get_gov(dep) == dist){ + return word_get_label(dep); + } + } + } return -1; - }*/ +} -int ldep_b0r(config *c){ - word *top = word_buffer_b0(c->bf); +int rdep_s0r(config *c){ + word *gov = stack_s0(config_get_stack(c)); int i; - - if(top){ - if(word_get_index(top) >= c->ds->length) return -1; - for(i=word_get_index(top); i > 0; i--) - if(c->ds->array[i].gov == top) - return i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) + 1; i < word_buffer_get_nbelem(config_get_buffer(c)) ; i++){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(i >= word_get_index(word_buffer_b0(config_get_buffer(c)))) return -1; + dist = i - word_get_index(gov); + if(word_get_gov(dep) == - dist){ + return word_get_label(dep); + } + } } - return -1; } - - - -/*int rdep_b0r(config *c){ - word *top = queue_elt_n(c->bf,0); +int rdep_s1r(config *c){ + word *gov = stack_s1(config_get_stack(c)); + int i; + word *dep; + int dist; - if(top) - if(top->last_right_child) - return top->last_right_child->label; + if(gov){ + for(i=word_get_index(gov) + 1; i < word_buffer_get_nbelem(config_get_buffer(c)) ; i++){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(i >= word_get_index(word_buffer_b0(config_get_buffer(c)))) return -1; + dist = i - word_get_index(gov); + if(word_get_gov(dep) == - dist){ + return word_get_label(dep); + } + } + } return -1; - }*/ +} int rdep_b0r(config *c){ - word *top = word_buffer_b0(c->bf); + word *gov = word_buffer_b0(config_get_buffer(c)); int i; - - if(top) - for(i=word_get_index(top); i < c->ds->length; i++) - if(c->ds->array[i].gov == top) - return i; + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) + 1; i < word_buffer_get_nbelem(config_get_buffer(c)) ; i++){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(i >= word_get_index(word_buffer_b0(config_get_buffer(c)))) return -1; + dist = i - word_get_index(gov); + if(word_get_gov(dep) == - dist){ + return word_get_label(dep); + } + } + } return -1; } - - -/*int ndep_b0(config *c){ - word *top = queue_elt_n(c->bf,0); - int nb = 0; - word *child; - - if(top){ - for(child = top->first_left_child; child; child = child->next_sibling) - nb++; - - for(child = top->first_right_child; child; child = child->next_sibling) - nb++; - } - return (nb > 6)? 6 : nb; - }*/ - - int ndep_b0(config *c){ - word *top = word_buffer_b0(c->bf); - int i = 0; + word *gov = word_buffer_b0(config_get_buffer(c)); + int i; int n = 0; - - for(i=0; i < c->ds->length; i++) - if(c->ds->array[i].gov == top) - n++; - + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) - 1; i > 0 ; i--){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(word_get_sent_seg(dep) == 1) break; + dist = word_get_index(gov) - i; + if(word_get_gov(dep) == dist) + n++; + } + for(i=word_get_index(gov) + 1; i < word_buffer_get_nbelem(config_get_buffer(c)) ; i++){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(i >= word_get_index(word_buffer_b0(config_get_buffer(c)))) break; + dist = i - word_get_index(gov); + if(word_get_gov(dep) == - dist) + n++; + } + } return (n > 6)? 6 : n; } - -/* int ndep_s0(config *c){ - word *top = stack_top(c->st); - int nb = 0; - word *child; - - if(top){ - for(child = top->first_left_child; child; child = child->next_sibling) - nb++; - - for(child = top->first_right_child; child; child = child->next_sibling) - nb++; - } - return (nb > 6)? 6 : nb; - }*/ - -int ndep_s0(config *c){ - word *top = stack_top(c->st); - int i = 0; + word *gov = stack_s0(config_get_stack(c)); + int i; int n = 0; - - for(i=0; i < c->ds->length; i++) - if(c->ds->array[i].gov == top) - n++; - + word *dep; + int dist; + + if(gov){ + for(i=word_get_index(gov) - 1; i > 0 ; i--){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(word_get_sent_seg(dep) == 1) break; + dist = word_get_index(gov) - i; + if(word_get_gov(dep) == dist) + n++; + } + for(i=word_get_index(gov) + 1; i < word_buffer_get_nbelem(config_get_buffer(c)) ; i++){ + dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(i >= word_get_index(word_buffer_b0(config_get_buffer(c)))) break; + dist = i - word_get_index(gov); + if(word_get_gov(dep) == - dist) + n++; + } + } return (n > 6)? 6 : n; } - - /* distance features */ int dist_s0_b0(config *c){ int dist; - if(stack_is_empty(c->st) || word_buffer_is_empty(c->bf)) + if(stack_is_empty(config_get_stack(c)) || word_buffer_is_empty(config_get_buffer(c))) return 0; - dist = word_get_index(word_buffer_b0(c->bf)) - word_get_index(stack_top(c->st)); + dist = word_get_index(word_buffer_b0(config_get_buffer(c))) - word_get_index(stack_top(config_get_stack(c))); return (abs(dist) > 6)? 6 : dist; } @@ -560,12 +583,12 @@ int dist_s0_b0(config *c){ int sh(config *c) /* stack height */ { - return (c->st->size > 7)? 7 : c->st->size; + return (config_get_stack(c)->size > 7)? 7 : config_get_stack(c)->size; } int bh(config *c) /* buffer size */ { - return (c->bf->size > 7)? 7 : c->bf->size; + return (config_get_buffer(c)->size > 7)? 7 : config_get_buffer(c)->size; } int dh(config *c) /* depset size */ diff --git a/maca_trans_parser/src/feat_fct.h b/maca_trans_parser/src/feat_fct.h index 6f91a3afc8d1e91aae837918ec2fee505f3dbc80..41af327a4465b5cf6c19b97ab82d23bbbe44c261 100644 --- a/maca_trans_parser/src/feat_fct.h +++ b/maca_trans_parser/src/feat_fct.h @@ -155,6 +155,9 @@ int s3Y(config *c); int s3Z(config *c); int s3r(config *c); +int b0g(config *c); +int b0sf(config *c); + int b0f(config *c); int b0l(config *c); @@ -413,6 +416,8 @@ int gs0p(config *c); int ldep_s0r(config *c); int rdep_s0r(config *c); +int ldep_s1r(config *c); +int rdep_s1r(config *c); int ndep_b0(config *c); int ndep_s0(config *c); int ldep_b0r(config *c); diff --git a/maca_trans_parser/src/feat_lib.c b/maca_trans_parser/src/feat_lib.c index 6a2293f5327804028f34d66605af322ffaadeb14..a60a51103fb1f2ce52f15d9af69a291e18cd567e 100644 --- a/maca_trans_parser/src/feat_lib.c +++ b/maca_trans_parser/src/feat_lib.c @@ -177,6 +177,9 @@ feat_lib *feat_lib_build(void) feat_lib_add(fl, FEAT_TYPE_Y, (char *)"s3Y", s3Y); feat_lib_add(fl, FEAT_TYPE_Y, (char *)"s3Z", s3Z); + feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"b0g", b0g); + feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"b0sf", b0sf); + feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"b0f", b0f); feat_lib_add(fl, FEAT_TYPE_LEMMA, (char *)"b0l", b0l); feat_lib_add(fl, FEAT_TYPE_CPOS, (char *)"b0c", b0c); @@ -430,6 +433,8 @@ feat_lib *feat_lib_build(void) feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_s0r", ldep_s0r); feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_s0r", rdep_s0r); + feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_s1r", ldep_s1r); + feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_s1r", rdep_s1r); feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"ldep_b0r", ldep_b0r); feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"rdep_b0r", rdep_b0r); diff --git a/maca_trans_parser/src/maca_trans_parser.c b/maca_trans_parser/src/maca_trans_parser.c index f65c10dd4ee3ac23498ae6204d34bec9a4d8e427..43a304507aca2b817b028eeb5b4114b7e11e1c3f 100644 --- a/maca_trans_parser/src/maca_trans_parser.c +++ b/maca_trans_parser/src/maca_trans_parser.c @@ -11,7 +11,7 @@ #include"feature_table.h" #include"dico.h" #include"beam.h" -#include"simple_decoder_parser.h" +#include"simple_decoder_parser_arc_eager.h" /*#include"dnn_decoder.h"*/ #include"config2feat_vec.h" @@ -112,14 +112,14 @@ int main(int argc, char *argv[]) return 1; } - ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1; + ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3; /* load models */ ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); if(ctx->beam_width == 1){ - simple_decoder(ctx); + simple_decoder_parser_arc_eager(ctx); } /* else beam_decoder(conll_file, ctx->mcd_struct, ctx->d_perceptron_features, ctx->dico_labels, ft, ctx->features_model, ctx->verbose, root_label, ctx->beam_width, ctx->mvt_nb); diff --git a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c index ae143cd277cd38eadedbd5aa2086de5b31c073a3..ea9d8cf9fded0975ed3d27969a0164cf56dfb680 100644 --- a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c @@ -53,7 +53,6 @@ void generate_training_file_stream(FILE *output_file, context *ctx) int root_label = dico_string2int(ctx->dico_labels, (char *) ctx->root_label); word_buffer *ref = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct); FILE *mcf_file = myfopen(ctx->input_filename, "r"); - int start_sentence_index = 0; /* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */ /* the idea is to ignore syntax in the mcf file that will be read */ @@ -62,8 +61,9 @@ void generate_training_file_stream(FILE *output_file, context *ctx) mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct); mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_GOV); mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL); + mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_SENT_SEG); - c = config_initial_no_dummy_word(mcf_file, mcd_struct_hyp, 5); + c = config_initial(mcf_file, mcd_struct_hyp, 5); while(!word_buffer_end(ref) && (sentence_nb < ctx->sent_nb)){ /*printf("************ REF ************\n"); @@ -72,23 +72,20 @@ void generate_training_file_stream(FILE *output_file, context *ctx) config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); - mvt_code = oracle_parser_arc_eager(c, ref, start_sentence_index, root_label); + mvt_code = oracle_parser_arc_eager(c, ref, root_label); mvt_type = movement_type(mvt_code); mvt_label = movement_label(mvt_code); - - /* config_print(stdout,c); */ - /* movement_print(stdout, mvt_code, ctx->dico_labels); */ - + + if(ctx->debug_mode){ + config_print(stdout,c); + movement_print(stdout, mvt_code, ctx->dico_labels); + } fprintf(output_file, "%d", mvt_code); feat_vec_print(output_file, fv); if(mvt_type == MVT_EOS){ movement_eos(c, 0); sentence_nb++; - start_sentence_index = word_get_index(word_buffer_b0(config_get_buffer(c))) - 1; - - - /* config_print(stdout,c); */ if(word_buffer_is_last(ref)) break; } @@ -147,7 +144,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "cannot find label names\n"); return 1; } - ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1; + ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3; feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb); diff --git a/maca_trans_parser/src/movement_parser_arc_eager.c b/maca_trans_parser/src/movement_parser_arc_eager.c index 3941705e5e0ab38c70aac18847384dec3060f1e8..564da2a0e7c849eb3e6710286782b888a4e6a29a 100644 --- a/maca_trans_parser/src/movement_parser_arc_eager.c +++ b/maca_trans_parser/src/movement_parser_arc_eager.c @@ -20,20 +20,20 @@ void movement_print(FILE *f, int mvt_code, dico *dico_labels){ int movement_type(int mvt) { - if(mvt == MVT_SHIFT) return MVT_SHIFT; /* 0 is the code of shift */ - if(mvt == MVT_REDUCE) return MVT_REDUCE; /* 1 is the code of reduce */ - if(mvt == MVT_ROOT) return MVT_ROOT; /* 2 is the code of root */ - if(mvt == MVT_EOS) return MVT_EOS; /* 3 is the code of root */ - if(mvt % 2 == 0) return MVT_LEFT; /* even movements are left movements */ - return MVT_RIGHT; /* odd movements are right movements */ + if(mvt == MVT_SHIFT) return MVT_SHIFT; + if(mvt == MVT_REDUCE) return MVT_REDUCE; + if(mvt == MVT_ROOT) return MVT_ROOT; + if(mvt == MVT_EOS) return MVT_EOS; + if(mvt % 2 == 0) return MVT_LEFT; /* even movements are left movements */ + return MVT_RIGHT; /* odd movements are right movements */ } int movement_label(int mvt) { - if(mvt == MVT_SHIFT) return -1; /* 0 is the code of shift */ - if(mvt == MVT_REDUCE) return -1; /* 1 is the code of reduce */ - if(mvt == MVT_ROOT) return -1; /* 2 is the code of root */ - if(mvt == MVT_EOS) return -1; /* 3 is the code of eos */ + if(mvt == MVT_SHIFT) return -1; + if(mvt == MVT_REDUCE) return -1; + if(mvt == MVT_ROOT) return -1; + if(mvt == MVT_EOS) return -1; if(mvt % 2 == 0) /* even codes correspond to left movements */ return mvt / 2 - 2; return (mvt - 1) / 2 - 2; /* odd codes correspond to right movements */ @@ -41,16 +41,10 @@ int movement_label(int mvt) int movement_eos(config *c, float score) { - if(stack_is_empty(config_get_stack(c))) return 1; + if(stack_is_empty(config_get_stack(c))) return 0; /* word on the top of the stack is sent_seg */ - word_set_sent_seg(stack_top(config_get_stack(c)), 1); - - /* perform all pending reduce */ - while(movement_reduce(c,0)); - - /* remove root from stack */ - stack_pop(config_get_stack(c)); + word_set_sent_seg(stack_top(config_get_stack(c)), 1); config_add_mvt(c, MVT_EOS); return 1; @@ -58,18 +52,16 @@ int movement_eos(config *c, float score) int movement_left_arc(config *c, int label, float score) { - /* the dummy word cannot be a dependent */ - /* if(stack_height(config_get_stack(c)) < 2) return 0; */ + if(stack_is_empty(config_get_stack(c))) return 0; /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ + /* word on top of the stack should not have a governor */ - if(word_get_gov(stack_top(config_get_stack(c))) != 0) return 0; + if(word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV) return 0; word *gov = word_buffer_b0(config_get_buffer(c)); word *dep = stack_top(config_get_stack(c)); int dist = (word_get_index(gov)) - (word_get_index(dep)); - /* printf("create left arc %d <- %d dist = %d\n", word_get_index(dep), word_get_index(gov), dist); */ - /* create a new dependency */ word_set_gov(dep, dist); word_set_label(dep, label); @@ -82,13 +74,10 @@ int movement_left_arc(config *c, int label, float score) int movement_right_arc(config *c, int label, float score) { if(stack_is_empty(config_get_stack(c))) return 0; - /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ word *gov = stack_top(config_get_stack(c)); word *dep = word_buffer_b0(config_get_buffer(c)); int dist = (word_get_index(gov)) - (word_get_index(dep)); - - /* printf("create right arc %d -> %d dist = %d\n", word_get_index(gov), word_get_index(dep), dist); */ /* create a new dependency */ word_set_gov(dep, dist); @@ -114,7 +103,7 @@ int movement_reduce(config *c, float score) { if(stack_is_empty(config_get_stack(c))) return 0; /* word on top of stack must have a governor */ - if(word_get_gov(stack_top(config_get_stack(c))) == 0) return 0; + if(word_get_gov(stack_top(config_get_stack(c))) == WORD_INVALID_GOV) return 0; stack_pop(config_get_stack(c)); config_add_mvt(c, MVT_REDUCE); return 1; @@ -122,10 +111,12 @@ int movement_reduce(config *c, float score) int movement_root(config *c, float score, int root_code) { - word *b0 = word_buffer_b0(config_get_buffer(c)); - word_set_gov(b0, 0); - word_set_label(b0, root_code); + word *s0 = stack_top(config_get_stack(c)); + if(s0 == NULL) return 0; + word_set_gov(s0, 0); + word_set_label(s0, root_code); + s0->is_root = 1; + stack_pop(config_get_stack(c)); config_add_mvt(c, MVT_ROOT); return 1; } - diff --git a/maca_trans_parser/src/oracle_parser_arc_eager.c b/maca_trans_parser/src/oracle_parser_arc_eager.c index 2e41c609187f4ef488478728bb212ab78565aee0..e77d9e4cb96eab8f079f458cff12641f620c92fa 100644 --- a/maca_trans_parser/src/oracle_parser_arc_eager.c +++ b/maca_trans_parser/src/oracle_parser_arc_eager.c @@ -4,65 +4,104 @@ #include"word_buffer.h" #include"movement_parser_arc_eager.h" -int check_all_dependents_of_word_in_ref_are_in_hyp(config *c, word_buffer *ref, int word_index, int start_sentence_index) +int check_all_dependents_of_word_in_ref_are_in_hyp(config *c, word_buffer *ref, int word_index) { int dep; int gov_ref; int gov_hyp; - int max_sent_length = 300; - int max = ((start_sentence_index + max_sent_length) > ref->nbelem)? ref->nbelem : (start_sentence_index + max_sent_length); - for(dep=start_sentence_index; dep < max; dep++){ + int sentence_change; + +#if 1 + for(dep = word_index - 1; (dep > 0) && (word_get_sent_seg(word_buffer_get_word_n(ref, dep)) == 0); dep--){ + gov_ref = word_get_gov_index(word_buffer_get_word_n(ref, dep)); + if(gov_ref == word_index){ /* found a dependent of word in ref */ + /* look for a dependency in hyp such that its dependent is dep */ + gov_hyp = word_get_gov_index(word_buffer_get_word_n(config_get_buffer(c), dep)); + + if(gov_hyp != gov_ref) return 0; + } + } + + for(dep = word_index + 1; ((dep < word_buffer_get_nbelem(ref)) && (word_get_sent_seg(word_buffer_get_word_n(ref, dep)) == 0)); dep++){ gov_ref = word_get_gov_index(word_buffer_get_word_n(ref, dep)); if(gov_ref == word_index){ /* found a dependent of word in ref */ /* look for a dependency in hyp such that its dependent is dep */ - /* printf("found a dep of word %d in ref, it is %d\n", word_index, dep); */ gov_hyp = word_get_gov_index(word_buffer_get_word_n(config_get_buffer(c), dep)); - /* printf("gov of %d in hyp is %d\n", dep,gov_hyp); */ if(gov_hyp != gov_ref) return 0; - /* - if((dep >= c->ds->length) - || (c->ds->array[dep].gov == NULL) - || (word_get_index(c->ds->array[dep].gov) != word_index) - || (c->ds->array[dep].label != word_get_label(ref->words[dep]))) - return 0;*/ } } +#endif + +#if 0 + sentence_change = 0; + for(dep = word_index - 1; (dep > 0) && (word_get_sent_seg(word_buffer_get_word_n(ref, dep)) == 0); dep--){ + /* printf("dep = %d\n", dep); */ + if(word_get_sent_seg(word_buffer_get_word_n(ref, dep)) == 1) + sentence_change = 1; + gov_ref = word_get_gov_index(word_buffer_get_word_n(ref, dep)); + if(gov_ref == word_index){ /* found a dependent of word in ref */ + /* look for a dependency in hyp such that its dependent is dep */ + gov_hyp = word_get_gov_index(word_buffer_get_word_n(config_get_buffer(c), dep)); + if(gov_hyp != gov_ref) return 0; + } + } + + sentence_change = 0; + for(dep = word_index + 1; + (dep < word_buffer_get_nbelem(ref)) && (sentence_change == 0); dep++){ + /* printf("dep = %d\n", dep); */ + if(word_get_sent_seg(word_buffer_get_word_n(ref, dep)) == 1) + sentence_change = 1; + gov_ref = word_get_gov_index(word_buffer_get_word_n(ref, dep)); + if(gov_ref == word_index){ /* found a dependent of word in ref */ + /* look for a dependency in hyp such that its dependent is dep */ + gov_hyp = word_get_gov_index(word_buffer_get_word_n(config_get_buffer(c), dep)); + if(gov_hyp != gov_ref) return 0; + } + } + +#endif + return 1; } -int oracle_parser_arc_eager(config *c, word_buffer *ref, int start_sentence_index, int root_label) +int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label) { word *s0; /* word on top of stack */ word *b0; /* next word in the bufer */ int s0_index, b0_index; int s0_gov_index, b0_gov_index; - int b0_label; - int b0_label_in_hyp; + int s0_label; + /* int s0_label_in_hyp; */ - b0 = word_buffer_b0(config_get_buffer(c)); - b0_index = word_get_index(b0); - b0_gov_index = word_get_gov_index(word_buffer_get_word_n(ref, b0_index)); - b0_label = word_get_label(word_buffer_get_word_n(ref, b0_index)); - b0_label_in_hyp = word_get_label(word_buffer_get_word_n(config_get_buffer(c), b0_index)); - - /* b0 is the root of the sentence */ - if((b0_label == root_label) && (b0_label_in_hyp != root_label)){ - return MVT_ROOT; - } - /* if(!stack_is_empty(config_get_stack(c)) && !word_buffer_is_empty(config_get_buffer(c))){ */ if(!stack_is_empty(config_get_stack(c))){ + + b0 = word_buffer_b0(config_get_buffer(c)); + b0_index = word_get_index(b0); + b0_gov_index = word_get_gov_index(word_buffer_get_word_n(ref, b0_index)); + s0 = stack_top(config_get_stack(c)); s0_index = word_get_index(s0); s0_gov_index = word_get_gov_index(word_buffer_get_word_n(ref, s0_index)); + s0_label = word_get_label(word_buffer_get_word_n(ref, s0_index)); + /* s0_label_in_hyp = word_get_label(word_buffer_get_word_n(config_get_buffer(c), s0_index)); */ /* printf("s0_index = %d b0_index = %d\n", s0_index, b0_index); printf("dans ref gov de s0 (%d) = %d\n", s0_index, s0_gov_index); printf("dans ref gov de b0 (%d) = %d\n", b0_index, b0_gov_index);*/ + /* s0 is the root of the sentence */ + if((s0_label == root_label) + && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index) + ){ + return MVT_ROOT; + } + /* word on the top of the stack is an end of sentence marker */ - if(word_get_sent_seg(s0) == 1){ + if((word_get_sent_seg(word_buffer_get_word_n(ref, s0_index)) == 1) + && (word_get_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != 1)){ return MVT_EOS; } @@ -76,13 +115,11 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref, int start_sentence_inde return movement_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index))); } /* REDUCE */ - if( - /* (stack_height(config_get_stack(c)) > 2) */ - check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index, start_sentence_index) - && (word_get_gov(stack_top(config_get_stack(c))) != 0)) /* word on top of the stack has a goveror */ - { - return MVT_REDUCE; - } + if((stack_height(config_get_stack(c)) > 2) + && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index) + && (word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV)){ /* word on top of the stack has a goveror */ + return MVT_REDUCE; + } } /* SHIFT */ diff --git a/maca_trans_parser/src/oracle_parser_arc_eager.h b/maca_trans_parser/src/oracle_parser_arc_eager.h index 9b92d5d284f7e183c726efc1042af06f2acbf734..af3131214143e88c3fb267f538ab5cbb9ba3f59f 100644 --- a/maca_trans_parser/src/oracle_parser_arc_eager.h +++ b/maca_trans_parser/src/oracle_parser_arc_eager.h @@ -6,6 +6,6 @@ #include"word_buffer.h" -int oracle_parser_arc_eager(config *c, word_buffer *ref, int start_sentence_index, int root_label); +int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label); #endif diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c index 1be8bbf27129f9e0d3e51352888fcaadb81d2429..03b1f7c8863017810eda9edfc1b5872882a6b2d8 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c @@ -15,19 +15,23 @@ void print_word_buffer(config *c, dico *dico_labels) int i; word *dep; char *label; + int root_position = 0; + for(i=0; i < config_get_buffer(c)->nbelem; i++){ dep = word_buffer_get_word_n(config_get_buffer(c), i); + if(word_get_gov(dep) == 0) root_position = i; printf("%s\t", word_get_input(dep)); - printf("%d\t", word_get_gov(dep)); - label = (word_get_label(dep) == -1)? NULL : dico_int2string(dico_labels, word_get_label(dep)); - if(label != NULL) - printf("%s\t", label) ; - else - printf("_\t"); - if((label != NULL) && !strcmp(label, "eos")) - printf("1\n"); - else - printf("0\n"); + if(word_get_sent_seg(dep) == 1){ + printf("%d\teos\t1\n", root_position - i); + } + else{ + printf("%d\t", word_get_gov(dep)); + label = (word_get_label(dep) == -1)? NULL : dico_int2string(dico_labels, word_get_label(dep)); + if(label != NULL) + printf("%s\t0\n", label) ; + else + printf("_\t0\n"); + } } } @@ -43,65 +47,55 @@ void simple_decoder_parser_arc_eager(context *ctx) float max; feat_vec *fv = feat_vec_new(feature_types_nb); config *c = NULL; + int result; root_label = dico_string2int(ctx->dico_labels, ctx->root_label); if(root_label == -1) root_label = 0; - c = config_initial_no_dummy_word(f, ctx->mcd_struct, 5); - while(1){ + c = config_initial(f, ctx->mcd_struct, 5); + while(!config_is_terminal(c)){ config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); mvt_code = feature_table_argmax(fv, ft, &max); mvt_type = movement_type(mvt_code); mvt_label = movement_label(mvt_code); - config_print(stdout, c); - movement_print(stdout, mvt_code, ctx->dico_labels); + if(ctx->debug_mode){ + fprintf(stdout, "***********************************\n"); + config_print(stdout, c); + movement_print(stdout, mvt_code, ctx->dico_labels); + } + result = 0; + switch(mvt_type){ + case MVT_LEFT : + result = movement_left_arc(c, mvt_label, max); + break; + case MVT_RIGHT: + result = movement_right_arc(c, mvt_label, max); + break; + case MVT_REDUCE: + result = movement_reduce(c, max); + break; + case MVT_ROOT: + result = movement_root(c, max, root_label); + break; + case MVT_EOS: + result = movement_eos(c, max); + break; + case MVT_SHIFT: + result = movement_shift(c, 1, max); + } - if(mvt_type == MVT_LEFT) - if(movement_left_arc(c, mvt_label, max)){ - if(word_buffer_is_last(config_get_buffer(c))) - break; - continue; + if(result == 0){ + if(ctx->debug_mode){ + fprintf(stdout, "WARNING : movement cannot be executed !\n"); } - - if(mvt_type == MVT_RIGHT) - if(movement_right_arc(c, mvt_label, max)){ - if(word_buffer_is_last(config_get_buffer(c))) - break; - continue; - } - - if(mvt_type == MVT_REDUCE) - if(movement_reduce(c, max)){ - if(word_buffer_is_last(config_get_buffer(c))) - break; - continue; - } - - if(mvt_type == MVT_ROOT) - if(movement_root(c, max, root_label)){ - if(word_buffer_is_last(config_get_buffer(c))) - break; - continue; - } - - /* if(mvt_type == MVT_EOS) - if(movement_eos(c, max)){ - if(word_buffer_is_last(config_get_buffer(c))) - break; - continue; - } - */ - if(word_buffer_is_last(config_get_buffer(c))) - break; - - movement_shift(c, 1, max); - + movement_shift(c, 1, max); + } } print_word_buffer(c, ctx->dico_labels); - /* config_free(c); */ + config_free(c); feat_vec_free(fv); feature_table_free(ft); if(ctx->input_filename) diff --git a/maca_trans_parser/src/simple_decoder_tagger.c b/maca_trans_parser/src/simple_decoder_tagger.c index 9e3341c362a39892648f55cbf32b665856cdff48..da0e990d72097c336491c378e64644e37809cc4a 100644 --- a/maca_trans_parser/src/simple_decoder_tagger.c +++ b/maca_trans_parser/src/simple_decoder_tagger.c @@ -44,7 +44,7 @@ void simple_decoder_buffer(context *ctx) int i; word *w = NULL; FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; - config *c = config_initial_no_dummy_word(f, ctx->mcd_struct, 0); + config *c = config_initial(f, ctx->mcd_struct, 0); /* read a sentence and put it in the buffer */ while(word_buffer_read_sentence(c->bf)){ @@ -68,7 +68,7 @@ void simple_decoder_buffer(context *ctx) /* printf("\n"); */ /* config_free(c); */ - c = config_initial_no_dummy_word(f, ctx->mcd_struct, 0); + c = config_initial(f, ctx->mcd_struct, 0); } if(ctx->input_filename) fclose(f);