diff --git a/maca_common/include/word.h b/maca_common/include/word.h index 7620b6d8263d46fb1e240f9a72998c9521e7ba8e..3990714bf0e6ec67c62dbbc1a5a7fe83a25e23e3 100644 --- a/maca_common/include/word.h +++ b/maca_common/include/word.h @@ -5,7 +5,7 @@ #define WORD_INVALID_GOV 10000 -#define word_get_id(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID] +#define word_get_id(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID] #define word_get_form(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM] #define word_get_lemma(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA] #define word_get_cpos(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS] diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h index 2b182b73f6d0b91e857b5035764ee8258f27750f..bce1b7d90fdc33d21ff4395834e2db96620e1087 100644 --- a/maca_common/include/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -13,15 +13,15 @@ #define word_buffer_get_input_file(wb) (wb)->input_file #define word_buffer_get_mcd(wb) (wb)->mcd_struct -#define word_buffer_b0(wb) ((wb)->nbelem == 0)? NULL : (wb)->array[(wb)->current_index] -#define word_buffer_b1(wb) ((wb)->current_index + 1 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 1] -#define word_buffer_b2(wb) ((wb)->current_index + 2 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 2] -#define word_buffer_b3(wb) ((wb)->current_index + 3 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 3] -#define word_buffer_bm1(wb) ((wb)->current_index - 1 < 0)? NULL : (wb)->array[(wb)->current_index - 1] -#define word_buffer_bm2(wb) ((wb)->current_index - 2 < 0)? NULL : (wb)->array[(wb)->current_index - 2] -#define word_buffer_bm3(wb) ((wb)->current_index - 3 < 0)? NULL : (wb)->array[(wb)->current_index - 3] -#define word_buffer_nb_elts_right(wb) (wb)->nbelem - (wb)->current_index - 1 -#define word_buffer_nb_elts_left(wb) (wb)->current_index +#define word_buffer_b0(wb) (((wb)->nbelem == 0)? NULL : (wb)->array[(wb)->current_index]) +#define word_buffer_b1(wb) (((wb)->current_index + 1 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 1]) +#define word_buffer_b2(wb) (((wb)->current_index + 2 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 2]) +#define word_buffer_b3(wb) (((wb)->current_index + 3 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 3]) +#define word_buffer_bm1(wb) (((wb)->current_index - 1 < 0)? NULL : (wb)->array[(wb)->current_index - 1]) +#define word_buffer_bm2(wb) (((wb)->current_index - 2 < 0)? NULL : (wb)->array[(wb)->current_index - 2]) +#define word_buffer_bm3(wb) (((wb)->current_index - 3 < 0)? NULL : (wb)->array[(wb)->current_index - 3]) +#define word_buffer_nb_elts_right(wb) ((wb)->nbelem - (wb)->current_index - 1) +#define word_buffer_nb_elts_left(wb) ((wb)->current_index) typedef struct { diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index 6ad73f7fd8743d5a5e9e0ce18ccce04e5ad4856f..85d67be561ad16cd6349bf20b8cbec8c62aadf57 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -1,31 +1,32 @@ set(SOURCES src/context.c - src/feat_desc.c -src/movement_parser_arc_eager.c - src/movement_tagparser_arc_eager.c - src/movement_tagger.c - src/feat_fct.c - src/global_feat_vec.c -# src/oracle_parser.c - src/oracle_parser_arc_eager.c - src/oracle_tagparser_arc_eager.c - src/oracle_tagger.c - src/simple_decoder_parser.c - src/simple_decoder_parser_arc_eager.c - src/simple_decoder_tagparser_arc_eager.c - src/simple_decoder_forrest.c - src/simple_decoder_tagger.c - src/feat_lib.c - src/stack.c - src/config2feat_vec.c - src/depset.c - src/feat_model.c - src/config.c - src/queue.c -# src/beam.c -src/feat_types.c -src/mvt.c -src/mvt_stack.c -) + src/feat_desc.c + src/movements.c + src/movement_parser_arc_eager.c + src/movement_tagparser_arc_eager.c + src/movement_tagger.c + src/feat_fct.c + src/global_feat_vec.c + # src/oracle_parser.c + src/oracle_parser_arc_eager.c + src/oracle_tagparser_arc_eager.c + src/oracle_tagger.c +# src/simple_decoder_parser.c + src/simple_decoder_parser_arc_eager.c + src/simple_decoder_tagparser_arc_eager.c +# src/simple_decoder_forrest.c + src/simple_decoder_tagger.c + src/feat_lib.c + src/stack.c + src/config2feat_vec.c + src/depset.c + src/feat_model.c + src/config.c +# src/queue.c + # src/beam.c + src/feat_types.c + src/mvt.c + src/mvt_stack.c + ) #compiling library include_directories(src) @@ -82,11 +83,11 @@ target_link_libraries(maca_trans_tagparser transparse) target_link_libraries(maca_trans_tagparser maca_common) install (TARGETS maca_trans_tagparser DESTINATION bin) -add_executable(maca_trans_parser_forrest ./src/decode_forrest.c) -target_link_libraries(maca_trans_parser_forrest perceptron) -target_link_libraries(maca_trans_parser_forrest transparse) -target_link_libraries(maca_trans_parser_forrest maca_common) -install (TARGETS maca_trans_parser_forrest DESTINATION bin) +#add_executable(maca_trans_parser_forrest ./src/decode_forrest.c) +#target_link_libraries(maca_trans_parser_forrest perceptron) +#target_link_libraries(maca_trans_parser_forrest transparse) +#target_link_libraries(maca_trans_parser_forrest maca_common) +#install (TARGETS maca_trans_parser_forrest DESTINATION bin) add_executable(maca_trans_tagger ./src/maca_trans_tagger.c) target_link_libraries(maca_trans_tagger perceptron) diff --git a/maca_trans_parser/src/cff2fann.c b/maca_trans_parser/src/cff2fann.c new file mode 100644 index 0000000000000000000000000000000000000000..2167fa8d0108633ea55e3b3802bb271d3b94157d --- /dev/null +++ b/maca_trans_parser/src/cff2fann.c @@ -0,0 +1,113 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include<unistd.h> +#include<getopt.h> +#include"context.h" +#include"util.h" +#include"cf_file.h" + + +void cff2fann_help_message(context *ctx) +{ + context_general_help_message(ctx); + context_mode_help_message(ctx); + context_sent_nb_help_message(ctx); + + fprintf(stderr, "INPUT\n"); + context_conll_help_message(ctx); + fprintf(stderr, "IN TEST MODE\n"); + context_vocabs_help_message(ctx); + + fprintf(stderr, "OUTPUT\n"); + context_cff_help_message(ctx); + fprintf(stderr, "IN TRAIN MODE\n"); + context_vocabs_help_message(ctx); + +} + +void cff2fann_check_options(context *ctx) +{ + if(!ctx->input_filename + || ctx->help + /* || !ctx->mcd_filename */ + /* || !(ctx->cff_filename || ctx->fann_filename) */ + ){ + cff2fann_help_message(ctx); + exit(1); + } +} + +void one_hot_print(FILE *f, int val, int dim) +{ + int i; + for(i=0; i < dim; i++) + fprintf(f, "%d ", (i == val)? 1 : 0); +} + +void cff2fann(context *ctx) +{ + char buffer[10000]; + char *token; + int col_nb; + int feat_type; + mcd *m = ctx->mcd_struct; + FILE *f = myfopen(ctx->input_filename, "r"); + int val; + + while(fgets(buffer, 10000, f)){ + /* printf("%s", buffer); */ + /* printf("\n"); */ + token = strtok(buffer, "\t"); + col_nb = 0; + while(token){ + /* printf("col = %d token = %s max = %d\n", col_nb, token, max_array[col_nb]); */ + val = atoi(token); + if(col_nb == 0){ + one_hot_print(stdout, val, ctx->mvt_nb); + printf("\n"); + } + else{ + feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1); + /* printf("feat_type = %d\n", feat_type); */ + int mcd_col = m->wf2col[feat_type]; + /* printf("representation = %d\n", m->representation[mcd_col]); */ + if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){ + /* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */ + word_emb_print(stdout, m->word_emb_array[mcd_col], val); + printf("\n"); + } + if(m->representation[mcd_col] == MCD_REPRESENTATION_VOCAB){ + /* printf("it is a vocab\n"); */ + one_hot_print(stdout, val, m->dico_array[mcd_col]->nbelem); + printf("\n"); + } + } + col_nb++; + token = strtok(NULL , "\t"); + } + } +} + +int main(int argc, char *argv[]) +{ + context *ctx; + int nb_feat; + int nb_class; + + ctx = context_read_options(argc, argv); + cff2fann_check_options(ctx); + + ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); + + ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose); + + look_for_number_of_features_and_classes(ctx->input_filename, &nb_feat, &nb_class); + ctx->mvt_nb = nb_class; + + mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, 1); + + cff2fann(ctx); + return 0; +} + diff --git a/maca_trans_parser/src/compare_traces.c b/maca_trans_parser/src/compare_traces.c index a4d1875da2223b891457657293752502712b4f31..f6567c3df39b0938ad658283241576387d3914b7 100644 --- a/maca_trans_parser/src/compare_traces.c +++ b/maca_trans_parser/src/compare_traces.c @@ -112,7 +112,7 @@ void trace_compare(trace *ref, trace *hyp) int index_hyp = 0; int index_ref = 0; configuration *c_ref, *c_hyp; - int status = 1; + while(1){ c_ref = ref->array[index_ref]; c_hyp = hyp->array[index_hyp]; @@ -123,7 +123,6 @@ void trace_compare(trace *ref, trace *hyp) configuration_print(stdout, c_hyp); if(configuration_equal(c_ref, c_hyp)){ - status = 1; fprintf(stdout, "EQUAL\n"); if(strcmp(c_ref->movement, c_hyp->movement)){ /* fprintf(stdout, "BAAD\t%s\t%s\t%f\n", c_ref->movement, c_hyp->movement, c_hyp->score); */ @@ -138,7 +137,6 @@ void trace_compare(trace *ref, trace *hyp) } else{ fprintf(stdout, "DIFFERENT\n"); - status = 0; if(c_ref->index > c_hyp->index) index_hyp++; else if(c_ref->index < c_hyp->index) diff --git a/maca_trans_parser/src/config.c b/maca_trans_parser/src/config.c index 392299edc8070cd6b03f45028154fd4ca7b00ff6..ed5e3f0b7c04f2c636f82573d79ea66389372488 100644 --- a/maca_trans_parser/src/config.c +++ b/maca_trans_parser/src/config.c @@ -8,105 +8,35 @@ config *config_new(FILE *f, mcd *mcd_struct, int lookahead) { config *c = (config *)memalloc(sizeof(config)); - c->ds = depset_new(); - c->f = f; - c->mcd_struct = mcd_struct; c->st = stack_new(); c->bf = word_buffer_new(f, mcd_struct, lookahead); - c->score = 0; - c->mvt_array = NULL; - c->mvt_array_size = 0; - c->mvt_array_nbelem = 0; - c->gfv = global_feat_vec_new(); + c->history = mvt_stack_new(); return c; } -/*word *config_add_next_word_to_buffer(config *c) -{ - word *w = NULL; - - w = word_read(c->f, c->mcd_struct); - if(w == NULL) return NULL; - - word_set_index(w, c->current_index); - c->current_index++; - queue_add(c->bf, w); - return w; - }*/ - void config_free(config *c) { stack_free(c->st); word_buffer_free(c->bf); - /* depset_free(c->ds); */ - if(c->mvt_array)free(c->mvt_array); - if(c->gfv)global_feat_vec_free(c->gfv); + mvt_stack_free(c->history); free(c); } - - - - int config_is_terminal(config *c) { - /* return (word_buffer_end(config_get_buffer(c)) && stack_is_empty(config_get_stack(c))); */ - return (word_buffer_end(config_get_buffer(c)) && (stack_is_empty(config_get_stack(c)) || (stack_top(config_get_stack(c)) == NULL))); - - - /* return word_buffer_end(config_get_buffer(c)); */ -} - -config *config_initial(FILE *f, mcd *mcd_struct, int lookahead) -{ - config *c = config_new(f, mcd_struct, lookahead); - - /* stack_push(c->st, word_create_dummy(mcd_struct)); */ - - return c; -} - -config *config_initial_no_dummy_word(FILE *f, mcd *mcd_struct, int lookahead) -{ - config *c = config_new(f, mcd_struct, lookahead); - - return c; } -/* must be checked things probably missing */ - -config *config_copy(config *o) +void config_push_mvt(config *c, int type, word *gov, word *dep) { - int i; - config *c = (config *)memalloc(sizeof(config)); - c->ds = depset_copy(o->ds); - c->st = stack_copy(o->st); - /* c->bf = queue_copy(o->bf); */ - c->score = o->score; - - c->f = o->f; - c->mcd_struct = o->mcd_struct; - - c->mvt_array_size = o->mvt_array_size; - c->mvt_array_nbelem = o->mvt_array_nbelem; - c->mvt_array = (int *)memalloc(c->mvt_array_size * sizeof(int)); - for(i=0; i < c->mvt_array_nbelem; i++) - c->mvt_array[i] = o->mvt_array[i]; - - c->gfv = global_feat_vec_copy(o->gfv); - return c; + mvt_stack_push(config_get_history(c), mvt_new(type, gov, dep)); } -void config_add_mvt(config *c, int mvt) +mvt *config_pop_mvt(config *c) { - if(c->mvt_array_size == c->mvt_array_nbelem){ - c->mvt_array_size = 2 * (c->mvt_array_size + 1); - c->mvt_array = (int *)realloc(c->mvt_array, c->mvt_array_size * sizeof(int)); - } - c->mvt_array[c->mvt_array_nbelem++] = mvt; + return mvt_stack_pop(config_get_history(c)); } void config_print(FILE *f, config *c) @@ -121,48 +51,3 @@ void config_print(FILE *f, config *c) } } -void config_connect_subtrees(config *c, int root_label) -{ - int i; - - if(stack_height(c->st) > 1){ /* stack contains other elements that dummy token */ - /* make all elements in the stack dependents of the dummy token with a root_label relation */ - for(i=1; i < stack_height(c->st); i++){ - depset_add(c->ds, c->st->array[0], root_label, c->st->array[i]); - } - } -} - -int config_equal2(config *c1, config *c2) -{ - int i; - - fprintf(stdout, "pred score = %.4f\n", c1->score); - fprintf(stdout, "mvt seq "); - for(i=0; i < c1->mvt_array_nbelem; i++) - fprintf(stdout, "%d ", c1->mvt_array[i]); - fprintf(stdout, "\n"); - - fprintf(stdout, "dep set "); - depset_print(stdout, c1->ds); - - return depset_compare(c1->ds, c2->ds); -} - -int config_equal(config *c1, config *c2) -{ - int i; - - /* for(i=0; i < c2->mvt_array_nbelem; i++) - fprintf(stdout, "%d ", c2->mvt_array[i]); - fprintf(stdout, "\n");*/ - - - if(c1->mvt_array_nbelem != c2->mvt_array_nbelem) - return 0; - for(i=0; i < c1->mvt_array_nbelem; i++){ - if(c1->mvt_array[i] != c2->mvt_array[i]) - return 0; - } - return 1; -} diff --git a/maca_trans_parser/src/config.h b/maca_trans_parser/src/config.h index b122662efcc8a5915adf410187fa720c58e68b3a..e2075925265642efe2612b14ec5de129812ced03 100644 --- a/maca_trans_parser/src/config.h +++ b/maca_trans_parser/src/config.h @@ -5,45 +5,25 @@ #include<stdlib.h> #include"stack.h" -#include"queue.h" -#include"depset.h" #include"mcd.h" #include"word_buffer.h" -#include"feat_vec.h" -#include"global_feat_vec.h" +#include"mvt_stack.h" #define config_get_stack(c) (c)->st #define config_get_buffer(c) (c)->bf -#define config_get_depset(c) (c)->ds +#define config_get_history(c) (c)->history typedef struct { stack *st; /* the stack */ word_buffer *bf; /* the buffer */ - depset *ds; /* the dependency set */ - FILE *f; /* file from which words are read */ - mcd *mcd_struct; /* description of the format of file f */ - float score; /* score of the configuration */ - int *mvt_array; /* array containing the movement sequence that led to this configuration */ - int mvt_array_size; /* self explanatory */ - int mvt_array_nbelem; /* self explanatory */ - global_feat_vec *gfv; /* global feature vector for structured perceptron */ - int current_index; /* index of current word (useful for stream parsing only) */ - + mvt_stack *history; /* movement sequence that led to this configuration */ } config; -int config_equal(config *c1, config *c2); -int config_equal2(config *c1, config *c2); config *config_new(FILE *f, mcd *mcd_struct, int lookahead); -config *config_initial(FILE *f, mcd *mcd_struct, int lookahead); -config *config_initial_no_dummy_word(FILE *f, mcd *mcd_struct, int lookahead); -config *config_copy(config *o); void config_print(FILE *buffer, config *c); int config_is_terminal(config *c); void config_free(config *c); -void config_add_mvt(config *c, int mvt); -word *config_add_next_word_to_buffer(config *c); -void config_connect_subtrees(config *c, int root_label); - - +void config_push_mvt(config *c, int type, word *gov, word *dep); +mvt *config_pop_mvt(config *c); #endif diff --git a/maca_trans_parser/src/feat_fct.c b/maca_trans_parser/src/feat_fct.c index 54cb8ee72da896eab9b28b57ae19bb20a5c581d6..2a24b2e5a90ba3b14315cb2e405f687a4be11c90 100644 --- a/maca_trans_parser/src/feat_fct.c +++ b/maca_trans_parser/src/feat_fct.c @@ -597,32 +597,34 @@ int bh(config *c) /* buffer size */ return (config_get_buffer(c)->size > 7)? 7 : config_get_buffer(c)->size; } +#if 0 int dh(config *c) /* depset size */ { return (c->ds->length > 7)? 7 : c->ds->length; /* return c->ds->length; */ } +#endif int t1(config *c) /* previous transition */ { - if(c->mvt_array_nbelem > 0) return c->mvt_array[c->mvt_array_nbelem - 1]; - return -1; + mvt *m = mvt_stack_0(config_get_history(c)); + return (m == NULL)? -1 : mvt_get_type(m); } int t2(config *c) /* previous transition */ { - if(c->mvt_array_nbelem > 1) return c->mvt_array[c->mvt_array_nbelem - 2]; - return -1; + mvt *m = mvt_stack_1(config_get_history(c)); + return (m == NULL)? -1 : mvt_get_type(m); } int t3(config *c) /* previous transition */ { - if(c->mvt_array_nbelem > 2) return c->mvt_array[c->mvt_array_nbelem - 3]; - return -1; + mvt *m = mvt_stack_2(config_get_history(c)); + return (m == NULL)? -1 : mvt_get_type(m); } int t4(config *c) /* previous transition */ { - if(c->mvt_array_nbelem > 3) return c->mvt_array[c->mvt_array_nbelem - 4]; - return -1; + mvt *m = mvt_stack_3(config_get_history(c)); + return (m == NULL)? -1 : mvt_get_type(m); } diff --git a/maca_trans_parser/src/feat_fct.h b/maca_trans_parser/src/feat_fct.h index e654b10cda73ab593d7fd65e2b32b11d5e012e14..457232b42e4d4f08cba39710d151faff03396e1e 100644 --- a/maca_trans_parser/src/feat_fct.h +++ b/maca_trans_parser/src/feat_fct.h @@ -435,7 +435,7 @@ int dist_s0_b0(config *c); int sh(config *c); int bh(config *c); -int dh(config *c); +/* int dh(config *c); */ int t1(config *c); int t2(config *c); diff --git a/maca_trans_parser/src/feat_lib.c b/maca_trans_parser/src/feat_lib.c index 6342d4e602ba3cc54ce39434f156082fcf2fea31..e76cc2ab249b0db71ad1d34553ab084f9400b857 100644 --- a/maca_trans_parser/src/feat_lib.c +++ b/maca_trans_parser/src/feat_lib.c @@ -451,7 +451,7 @@ feat_lib *feat_lib_build(void) feat_lib_add(fl, FEAT_TYPE_INT_8, (char *)"sh", sh); feat_lib_add(fl, FEAT_TYPE_INT_8, (char *)"bh", bh); - feat_lib_add(fl, FEAT_TYPE_INT_8, (char *)"dh", dh); + /* feat_lib_add(fl, FEAT_TYPE_INT_8, (char *)"dh", dh); */ feat_lib_add(fl, FEAT_TYPE_TRANS, (char *)"t1", t1); feat_lib_add(fl, FEAT_TYPE_TRANS, (char *)"t2", t2); diff --git a/maca_trans_parser/src/maca_trans_parser.c b/maca_trans_parser/src/maca_trans_parser.c index 0b9e785b5ffb976245abaefcd4f2b10c18cb689f..6a3e6d798c83b1d8256d2adf5a3ed21fe1ee6c8b 100644 --- a/maca_trans_parser/src/maca_trans_parser.c +++ b/maca_trans_parser/src/maca_trans_parser.c @@ -102,7 +102,6 @@ int main(int argc, char *argv[]) ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); - /* dico_vec_print(NULL, ctx->vocabs); */ mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); @@ -115,15 +114,10 @@ int main(int argc, char *argv[]) ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3; /* load models */ - + ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); - - if(ctx->beam_width == 1){ - simple_decoder_parser_arc_eager(ctx); - } - /* else - beam_decoder(conll_file, ctx->mcd_struct, ctx->d_perceptron_features, ctx->dico_labels, ft, ctx->features_model, ctx->verbose, root_label, ctx->beam_width, ctx->mvt_nb); - }*/ + + simple_decoder_parser_arc_eager(ctx); context_free(ctx); return 0; diff --git a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c index cee41b35a5221ffb747b439f563c731495442e4f..b990c79841230ff428c9c98484b2ca777d00bd92 100644 --- a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c @@ -63,7 +63,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx) mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL); mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_SENT_SEG); - c = config_initial(mcf_file, mcd_struct_hyp, 5); + c = config_new(mcf_file, mcd_struct_hyp, 5); while(!word_buffer_end(ref) && (sentence_nb < ctx->sent_nb)){ /*printf("************ REF ************\n"); @@ -71,12 +71,12 @@ void generate_training_file_stream(FILE *output_file, context *ctx) printf("*****************************\n");*/ mvt_code = oracle_parser_arc_eager(c, ref, root_label); - mvt_type = movement_type(mvt_code); - mvt_label = movement_label(mvt_code); + mvt_type = movement_parser_type(mvt_code); + mvt_label = movement_parser_label(mvt_code); if(ctx->debug_mode){ config_print(stdout,c); - movement_print(stdout, mvt_code, ctx->dico_labels); + movement_parser_print(stdout, mvt_code, ctx->dico_labels); } if(ctx->trace_mode){ @@ -84,7 +84,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx) stack_print(output_file, c->st); fprintf(output_file, "\t"); - movement_print(output_file, mvt_code, ctx->dico_labels); + movement_parser_print(output_file, mvt_code, ctx->dico_labels); fprintf(output_file, "\t1\n"); } else{ @@ -93,37 +93,37 @@ void generate_training_file_stream(FILE *output_file, context *ctx) feat_vec_print(output_file, fv); } - if(mvt_type == MVT_EOS){ - movement_eos(c, 0); + if(mvt_type == MVT_PARSER_EOS){ + movement_parser_eos(c); sentence_nb++; fprintf(stderr, "sentence %d\n", sentence_nb); if(word_buffer_is_last(ref)) break; } - if(mvt_type == MVT_LEFT){ - movement_left_arc(c, mvt_label, 0); + if(mvt_type == MVT_PARSER_LEFT){ + movement_parser_left_arc(c, mvt_label); continue; } - if(mvt_type == MVT_RIGHT){ - movement_right_arc(c, mvt_label, 0); + if(mvt_type == MVT_PARSER_RIGHT){ + movement_parser_right_arc(c, mvt_label); word_buffer_move_right(ref); continue; } - if(mvt_type == MVT_REDUCE){ - movement_reduce(c, 0); + if(mvt_type == MVT_PARSER_REDUCE){ + movement_parser_reduce(c); continue; } - if(mvt_type == MVT_ROOT){ - movement_root(c, 0, root_label); + if(mvt_type == MVT_PARSER_ROOT){ + movement_parser_root(c, root_label); continue; } - if(mvt_type == MVT_SHIFT){ - movement_shift(c, 1, 0); + if(mvt_type == MVT_PARSER_SHIFT){ + movement_parser_shift(c); word_buffer_move_right(ref); continue; } diff --git a/maca_trans_parser/src/maca_trans_tagger_mcf2cff.c b/maca_trans_parser/src/maca_trans_tagger_mcf2cff.c index 6936c6399c713da664876b45ec181e39ee043bb2..93f990a73a245950ff35a95f71b310ff5a6c2d68 100644 --- a/maca_trans_parser/src/maca_trans_tagger_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_tagger_mcf2cff.c @@ -12,20 +12,6 @@ #include"word_emb.h" #include"config2feat_vec.h" -void add_signature_to_words_in_queue(queue *bf, form2pos *f2p) -{ - int i; - word *w; - - for(i=0; i < queue_nbelem(bf); i++){ - w = queue_elt_n(bf, i); - if(!w->signature){ - /* printf("add signature %d to word %s\n", form2pos_get_signature(f2p, w->form), w->form); */ - w->signature = form2pos_get_signature(f2p, w->form); - } - } -} - void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p) { int i; @@ -77,7 +63,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx) FILE *conll_file = myfopen(ctx->input_filename, "r"); int postag; - c = config_initial_no_dummy_word(conll_file, ctx->mcd_struct, 5); + c = config_new(conll_file, ctx->mcd_struct, 5); while(!config_is_terminal(c)){ /* config_print(stdout,c); */ @@ -101,7 +87,7 @@ void generate_training_file_buffer(FILE *output_file, context *ctx) FILE *conll_file = myfopen(ctx->input_filename, "r"); FILE *conll_file_ref = myfopen(ctx->input_filename, "r"); int postag; - c = config_initial(conll_file, ctx->mcd_struct, 0); + c = config_new(conll_file, ctx->mcd_struct, 0); while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ /* sentence_print(stdout, ref, NULL); */ @@ -123,7 +109,7 @@ void generate_training_file_buffer(FILE *output_file, context *ctx) movement_tagger(c, postag, 0, 0); } config_free(c); - c = config_initial(conll_file, ctx->mcd_struct, 0); + c = config_new(conll_file, ctx->mcd_struct, 0); sentence_nb++; } } diff --git a/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c index e4c69d12d39df524473ca3df0b22e138c364edec..fdadaeead7901243dad7907e0d163df68f9a9705 100644 --- a/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c @@ -12,6 +12,18 @@ #include"word_emb.h" #include"config2feat_vec.h" +void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p) +{ + int i; + word *w; + + for(i = word_buffer_get_nbelem(bf) - 1; i >=0 ; i--){ + w = word_buffer_get_word_n(bf, i); + if(word_get_signature(w) != -1) break; + w->signature = form2pos_get_signature(f2p, w->form); + } +} + void maca_trans_parser_mcf2cff_help_message(context *ctx) { context_general_help_message(ctx); @@ -64,7 +76,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx) mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL); mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_SENT_SEG); - c = config_initial(mcf_file, mcd_struct_hyp, 5); + c = config_new(mcf_file, mcd_struct_hyp, 5); while(!word_buffer_end(ref) && (sentence_nb < ctx->sent_nb)){ /*printf("************ REF ************\n"); @@ -78,12 +90,12 @@ void generate_training_file_stream(FILE *output_file, context *ctx) config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); mvt_code = oracle_tagparser_arc_eager(c, ref, root_label); - mvt_type = movement_tagparse_type(mvt_code); - mvt_label = movement_tagparse_label(mvt_code); + mvt_type = movement_tagparser_type(mvt_code); + mvt_label = movement_tagparser_label(mvt_code); if(ctx->debug_mode){ config_print(stdout,c); - movement_tagparse_print(stdout, mvt_code, ctx->dico_labels, dico_postag); + movement_tagparser_print(stdout, mvt_code, ctx->dico_labels, dico_postag); fprintf(stdout, "\n"); } @@ -92,7 +104,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx) stack_print(output_file, c->st); fprintf(output_file, "\t"); - movement_tagparse_print(output_file, mvt_code, ctx->dico_labels, dico_postag); + movement_tagparser_print(output_file, mvt_code, ctx->dico_labels, dico_postag); fprintf(output_file, "\t1\n"); } else{ @@ -100,41 +112,41 @@ void generate_training_file_stream(FILE *output_file, context *ctx) feat_vec_print(output_file, fv); } - if(mvt_type == MVT_EOS){ - movement_tagparse_eos(c, 0); + if(mvt_type == MVT_TAGPARSER_EOS){ + movement_tagparser_eos(c); sentence_nb++; if(word_buffer_is_last(ref)) break; } - if(mvt_type == MVT_POSTAG){ - movement_tagparse_add_pos(c, 0, mvt_label); + if(mvt_type == MVT_TAGPARSER_POSTAG){ + movement_tagparser_add_pos(c, mvt_label); continue; } - if(mvt_type == MVT_LEFT){ - movement_tagparse_left_arc(c, mvt_label, 0); + if(mvt_type == MVT_TAGPARSER_LEFT){ + movement_tagparser_left_arc(c, mvt_label); continue; } - if(mvt_type == MVT_RIGHT){ - movement_tagparse_right_arc(c, mvt_label, 0); + if(mvt_type == MVT_TAGPARSER_RIGHT){ + movement_tagparser_right_arc(c, mvt_label); word_buffer_move_right(ref); continue; } - if(mvt_type == MVT_REDUCE){ - movement_tagparse_reduce(c, 0); + if(mvt_type == MVT_TAGPARSER_REDUCE){ + movement_tagparser_reduce(c); continue; } - if(mvt_type == MVT_ROOT){ - movement_tagparse_root(c, 0, root_label); + if(mvt_type == MVT_TAGPARSER_ROOT){ + movement_tagparser_root(c, root_label); continue; } - if(mvt_type == MVT_SHIFT){ - movement_tagparse_shift(c, 1, 0); + if(mvt_type == MVT_TAGPARSER_SHIFT){ + movement_tagparser_shift(c); word_buffer_move_right(ref); continue; } diff --git a/maca_trans_parser/src/movement_parser_arc_eager.c b/maca_trans_parser/src/movement_parser_arc_eager.c index 757760eabe412fc8bbdca29f977edf5cfae68ba4..baa4c62e8f1684b296df6dde3ca69908c54f833e 100644 --- a/maca_trans_parser/src/movement_parser_arc_eager.c +++ b/maca_trans_parser/src/movement_parser_arc_eager.c @@ -3,127 +3,125 @@ #include<string.h> #include"util.h" #include"movement_parser_arc_eager.h" +#include"movements.h" -void movement_print(FILE *f, int mvt_code, dico *dico_labels){ - int mvt_type = movement_type(mvt_code); - int mvt_label = movement_label(mvt_code); +void movement_parser_print(FILE *f, int mvt_code, dico *dico_labels){ + int mvt_type = movement_parser_type(mvt_code); + int mvt_label = movement_parser_label(mvt_code); char *label; - if(mvt_type == MVT_SHIFT) {fprintf(f, "SHIFT"); return;} - if(mvt_type == MVT_REDUCE) {fprintf(f, "REDUCE"); return;} - if(mvt_type == MVT_ROOT) {fprintf(f, "ROOT"); return;} - if(mvt_type == MVT_EOS) {fprintf(f, "EOS"); return;} - if(mvt_type == MVT_RIGHT) fprintf(f, "RIGHT"); + if(mvt_type == MVT_PARSER_SHIFT) {fprintf(f, "SHIFT"); return;} + if(mvt_type == MVT_PARSER_REDUCE) {fprintf(f, "REDUCE"); return;} + if(mvt_type == MVT_PARSER_ROOT) {fprintf(f, "ROOT"); return;} + if(mvt_type == MVT_PARSER_EOS) {fprintf(f, "EOS"); return;} + if(mvt_type == MVT_PARSER_RIGHT) fprintf(f, "RIGHT"); else fprintf(f, "LEFT"); label = dico_int2string(dico_labels, mvt_label); fprintf(f, " %s", label); } -int movement_type(int mvt) +int movement_parser_type(int mvt) { - if(mvt == MVT_SHIFT) return MVT_SHIFT; - if(mvt == MVT_REDUCE) return MVT_REDUCE; - if(mvt == MVT_ROOT) return MVT_ROOT; - if(mvt == MVT_EOS) return MVT_EOS; - if(mvt % 2 == 0) return MVT_LEFT; /* even movements are left movements */ - return MVT_RIGHT; /* odd movements are right movements */ + if(mvt == MVT_PARSER_SHIFT) return MVT_PARSER_SHIFT; + if(mvt == MVT_PARSER_REDUCE) return MVT_PARSER_REDUCE; + if(mvt == MVT_PARSER_ROOT) return MVT_PARSER_ROOT; + if(mvt == MVT_PARSER_EOS) return MVT_PARSER_EOS; + if(mvt % 2 == 0) return MVT_PARSER_LEFT; /* even movements are left movements */ + return MVT_PARSER_RIGHT; /* odd movements are right movements */ } -int movement_label(int mvt) +int movement_parser_label(int mvt) { - if(mvt == MVT_SHIFT) return -1; - if(mvt == MVT_REDUCE) return -1; - if(mvt == MVT_ROOT) return -1; - if(mvt == MVT_EOS) return -1; + if(mvt == MVT_PARSER_SHIFT) return -1; + if(mvt == MVT_PARSER_REDUCE) return -1; + if(mvt == MVT_PARSER_ROOT) return -1; + if(mvt == MVT_PARSER_EOS) return -1; if(mvt % 2 == 0) /* even codes correspond to left movements */ return mvt / 2 - 2; return (mvt - 1) / 2 - 2; /* odd codes correspond to right movements */ } -int movement_eos(config *c, float score) +int movement_parser_eos(config *c) { - if(stack_is_empty(config_get_stack(c))) return 0; - - /* word on the top of the stack is sent_seg */ - word_set_sent_seg(stack_top(config_get_stack(c)), 1); - - /* (config_get_stack(c))->top = 0; */ - - config_add_mvt(c, MVT_EOS); - return 1; + return movement_eos(c, MVT_PARSER_EOS); } -int movement_left_arc(config *c, int label, float score) +int movement_parser_eos_undo(config *c) { - if(stack_is_empty(config_get_stack(c))) return 0; - /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ - - /* word on top of the stack should not have a governor */ - if(word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV) return 0; + return movement_eos_undo(c); +} - word *gov = word_buffer_b0(config_get_buffer(c)); - word *dep = stack_top(config_get_stack(c)); - int dist = (word_get_index(gov)) - (word_get_index(dep)); +int movement_parser_left_arc(config *c, int label) +{ + return movement_left_arc(c, movement_parser_left_code(label), label); +} - /* create a new dependency */ - word_set_gov(dep, dist); - word_set_label(dep, label); +int movement_parser_left_arc_undo(config *c) +{ + return movement_left_arc_undo(c); +} - stack_pop(config_get_stack(c)); - config_add_mvt(c, movement_left_code(label)); - return 1; +int movement_parser_right_arc(config *c, int label) +{ + return movement_right_arc(c, movement_parser_right_code(label), label); } -int movement_right_arc(config *c, int label, float score) +int movement_parser_right_arc_undo(config *c) { - if(stack_is_empty(config_get_stack(c))) return 0; - - word *gov = stack_top(config_get_stack(c)); - word *dep = word_buffer_b0(config_get_buffer(c)); - int dist = (word_get_index(gov)) - (word_get_index(dep)); - - /* create a new dependency */ - word_set_gov(dep, dist); - word_set_label(dep, label); - - stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); - word_buffer_move_right(config_get_buffer(c)); - - config_add_mvt(c, movement_right_code(label)); - return 1; -} - -int movement_shift(config *c, int stream, float score) + return movement_right_arc_undo(c); +} + +int movement_parser_shift(config *c) { - if(word_buffer_is_empty(config_get_buffer(c))) return 0; - stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); - word_buffer_move_right(config_get_buffer(c)); - config_add_mvt(c, MVT_SHIFT); - return 1; + return movement_shift(c, MVT_PARSER_SHIFT); } -int movement_reduce(config *c, float score) +int movement_parser_shift_undo(config *c) { - if(stack_nbelem(config_get_stack(c)) <= 1) return 0; + return movement_shift_undo(c); +} - /* if(stack_is_empty(config_get_stack(c))) return 0; */ +int movement_parser_reduce(config *c) +{ + return movement_reduce(c, MVT_PARSER_REDUCE); +} - /* word on top of stack must have a governor */ - if(word_get_gov(stack_top(config_get_stack(c))) == WORD_INVALID_GOV) return 0; - stack_pop(config_get_stack(c)); - config_add_mvt(c, MVT_REDUCE); - return 1; +int movement_parser_reduce_undo(config *c) +{ + return movement_reduce_undo(c); } -int movement_root(config *c, float score, int root_code) +int movement_parser_root(config *c, int root_code) { - word *s0 = stack_top(config_get_stack(c)); - if(s0 == NULL) return 0; - word_set_gov(s0, 0); - word_set_label(s0, root_code); - s0->is_root = 1; + return movement_root(c, MVT_PARSER_ROOT, root_code); +} - stack_pop(config_get_stack(c)); +int movement_parser_root_undo(config *c) +{ + return movement_root_undo(c); +} - config_add_mvt(c, MVT_ROOT); - return 1; +int movement_parser_undo(config *c) +{ + int result; + int mvt_type = mvt_get_type(mvt_stack_top(config_get_history(c))); + switch(mvt_type){ + case MVT_PARSER_LEFT : + result = movement_parser_left_arc_undo(c); + break; + case MVT_PARSER_RIGHT: + result = movement_parser_right_arc_undo(c); + break; + case MVT_PARSER_REDUCE: + result = movement_parser_reduce_undo(c); + break; + case MVT_PARSER_ROOT: + result = movement_parser_root_undo(c); + break; + case MVT_PARSER_EOS: + result = movement_parser_eos_undo(c); + break; + case MVT_PARSER_SHIFT: + result = movement_parser_shift_undo(c); + } + return result; } diff --git a/maca_trans_parser/src/movement_parser_arc_eager.h b/maca_trans_parser/src/movement_parser_arc_eager.h index 63f2f8a65f5921ecfde4594532bda3b9feb6384e..2f4205931757b78b9bb9f89df997a32e39fdebb5 100644 --- a/maca_trans_parser/src/movement_parser_arc_eager.h +++ b/maca_trans_parser/src/movement_parser_arc_eager.h @@ -2,32 +2,37 @@ #define __MOVEMENT_PARSER_ARC_EAGER__ #include"config.h" -#include"feat_vec.h" - - -#define MVT_SHIFT 0 -#define MVT_REDUCE 1 -#define MVT_ROOT 2 -#define MVT_EOS 3 -#define MVT_LEFT 4 -#define MVT_RIGHT 5 +#include"dico.h" +#define MVT_PARSER_SHIFT 0 +#define MVT_PARSER_REDUCE 1 +#define MVT_PARSER_ROOT 2 +#define MVT_PARSER_EOS 3 +#define MVT_PARSER_LEFT 4 +#define MVT_PARSER_RIGHT 5 /* even movements are left movements (except 0, which is shift and 2 which is root) */ -#define movement_left_code(label) (2 * (label) + 4) +#define movement_parser_left_code(label) (2 * (label) + 4) /* odd movements are right movements (except 1, which is reduce and 3 which is end_of_sentence) */ -#define movement_right_code(label) (2 * (label) + 5) - -int movement_type(int mvt); -int movement_label(int mvt); - -int movement_left_arc(config *c, int label, float score); -int movement_right_arc(config *c, int label, float score); -int movement_shift(config *c, int stream, float score); -int movement_reduce(config *c, float score); -int movement_root(config *c, float score, int root_code); -int movement_eos(config *c, float score); -void movement_print(FILE *f, int mvt_code, dico *dico_labels); +#define movement_parser_right_code(label) (2 * (label) + 5) + +int movement_parser_type(int mvt); +int movement_parser_label(int mvt); + +int movement_parser_left_arc(config *c, int label); +int movement_parser_left_arc_undo(config *c); +int movement_parser_right_arc(config *c, int label); +int movement_parser_right_arc_undo(config *c); +int movement_parser_shift(config *c); +int movement_parser_shift_undo(config *c); +int movement_parser_reduce(config *c); +int movement_parser_reduce_undo(config *c); +int movement_parser_root(config *c, int root_code); +int movement_parser_root_undo(config *c); +int movement_parser_eos(config *c); +int movement_parser_eos_undo(config *c); +int movement_parser_undo(config *c); +void movement_parser_print(FILE *f, int mvt_code, dico *dico_labels); #endif diff --git a/maca_trans_parser/src/movement_tagparser_arc_eager.c b/maca_trans_parser/src/movement_tagparser_arc_eager.c index df5f5a730cf2e22032dfff3e427bccd4a13ce6d4..b7e494bcabc72e7a51e3bc1a91619722096506e5 100644 --- a/maca_trans_parser/src/movement_tagparser_arc_eager.c +++ b/maca_trans_parser/src/movement_tagparser_arc_eager.c @@ -2,48 +2,49 @@ #include<stdlib.h> #include<string.h> #include"util.h" +#include"movements.h" #include"movement_tagparser_arc_eager.h" -void movement_tagparse_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag) +void movement_tagparser_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag) { - int mvt_type = movement_tagparse_type(mvt_code); - int mvt_label = movement_tagparse_label(mvt_code); + int mvt_type = movement_tagparser_type(mvt_code); + int mvt_label = movement_tagparser_label(mvt_code); char *label; - if(mvt_type == MVT_SHIFT) {fprintf(f, "SHIFT"); return;} - if(mvt_type == MVT_REDUCE) {fprintf(f, "REDUCE"); return;} - if(mvt_type == MVT_ROOT) {fprintf(f, "ROOT"); return;} - if(mvt_type == MVT_EOS) {fprintf(f, "EOS"); return;} - if(mvt_type == MVT_POSTAG){ + if(mvt_type == MVT_TAGPARSER_SHIFT) {fprintf(f, "SHIFT"); return;} + if(mvt_type == MVT_TAGPARSER_REDUCE) {fprintf(f, "REDUCE"); return;} + if(mvt_type == MVT_TAGPARSER_ROOT) {fprintf(f, "ROOT"); return;} + if(mvt_type == MVT_TAGPARSER_EOS) {fprintf(f, "EOS"); return;} + if(mvt_type == MVT_TAGPARSER_POSTAG){ fprintf(f, "POSTAG"); label = dico_int2string(dico_postag, mvt_label); fprintf(f, " %s", label); return; } - if(mvt_type == MVT_RIGHT) fprintf(f, "RIGHT"); + if(mvt_type == MVT_TAGPARSER_RIGHT) fprintf(f, "RIGHT"); else fprintf(f, "LEFT"); label = dico_int2string(dico_labels, mvt_label); fprintf(f, " %s", label); } -int movement_tagparse_type(int mvt) +int movement_tagparser_type(int mvt) { - if(mvt == MVT_SHIFT) return MVT_SHIFT; /* 0 */ - if(mvt == MVT_REDUCE) return MVT_REDUCE; /* 1 */ - if(mvt == MVT_ROOT) return MVT_ROOT; /* 2 */ - if(mvt == MVT_EOS) return MVT_EOS; /* 3 */ - if(mvt % 3 == 0) return MVT_RIGHT; /* 4, 7, 10 ... */ - if(mvt % 3 == 1) return MVT_POSTAG; /* 5, 8, 11 ... */ - /*if(mvt % 3 == 2)*/ return MVT_LEFT; /* 6, 9, 12 ... */ + if(mvt == MVT_TAGPARSER_SHIFT) return MVT_TAGPARSER_SHIFT; /* 0 */ + if(mvt == MVT_TAGPARSER_REDUCE) return MVT_TAGPARSER_REDUCE; /* 1 */ + if(mvt == MVT_TAGPARSER_ROOT) return MVT_TAGPARSER_ROOT; /* 2 */ + if(mvt == MVT_TAGPARSER_EOS) return MVT_TAGPARSER_EOS; /* 3 */ + if(mvt % 3 == 0) return MVT_TAGPARSER_RIGHT; /* 4, 7, 10 ... */ + if(mvt % 3 == 1) return MVT_TAGPARSER_POSTAG; /* 5, 8, 11 ... */ + /*if(mvt % 3 == 2)*/ return MVT_TAGPARSER_LEFT; /* 6, 9, 12 ... */ } -int movement_tagparse_label(int mvt) +int movement_tagparser_label(int mvt) { - if(mvt == MVT_SHIFT) return -1; - if(mvt == MVT_REDUCE) return -1; - if(mvt == MVT_ROOT) return -1; - if(mvt == MVT_EOS) return -1; + if(mvt == MVT_TAGPARSER_SHIFT) return -1; + if(mvt == MVT_TAGPARSER_REDUCE) return -1; + if(mvt == MVT_TAGPARSER_ROOT) return -1; + if(mvt == MVT_TAGPARSER_EOS) return -1; if(mvt % 3 == 1) /* pos movement */ return (mvt - 4) / 3; if(mvt % 3 == 2) /* left movement */ @@ -52,106 +53,101 @@ int movement_tagparse_label(int mvt) return (mvt - 6) / 3; } -int movement_tagparse_add_pos(config *c, float score, int pos) +int movement_tagparser_add_pos(config *c, int pos) { - if(word_buffer_b0(config_get_buffer(c)) == NULL) return 0; - if(word_get_pos(word_buffer_b0(config_get_buffer(c))) != -1) return 0; - word_set_pos(word_buffer_b0(config_get_buffer(c)), pos); - - /* stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); - word_buffer_move_right(config_get_buffer(c));*/ - config_add_mvt(c, movement_tagparse_postag(pos)); - - return 1; + return movement_add_pos(c, movement_tagparser_postag_code(pos), pos); } -int movement_tagparse_eos(config *c, float score) +int movement_tagparser_add_pos_undo(config *c) { - if(stack_is_empty(config_get_stack(c))) return 0; - if(word_get_sent_seg(stack_top(config_get_stack(c))) == 1) return 0; - - - /* word on the top of the stack is sent_seg */ - word_set_sent_seg(stack_top(config_get_stack(c)), 1); - - /* (config_get_stack(c))->top = 0; */ - - config_add_mvt(c, MVT_EOS); - return 1; + return movement_add_pos_undo(c); } -int movement_tagparse_left_arc(config *c, int label, float score) +int movement_tagparser_eos(config *c) { - if(stack_is_empty(config_get_stack(c))) return 0; - /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ - - /* word on top of the stack should not have a governor */ - if(word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV) return 0; + return movement_eos(c, MVT_TAGPARSER_EOS); +} - word *gov = word_buffer_b0(config_get_buffer(c)); - word *dep = stack_top(config_get_stack(c)); - int dist = (word_get_index(gov)) - (word_get_index(dep)); +int movement_tagparser_eos_undo(config *c) +{ + return movement_eos_undo(c); +} - /* create a new dependency */ - word_set_gov(dep, dist); - word_set_label(dep, label); +int movement_tagparser_left_arc(config *c, int label) +{ + return movement_left_arc(c, movement_tagparser_left_code(label), label); +} - stack_pop(config_get_stack(c)); - config_add_mvt(c, movement_tagparse_left_code(label)); - return 1; +int movement_tagparser_left_arc_undo(config *c) +{ + return movement_left_arc_undo(c); } -int movement_tagparse_right_arc(config *c, int label, float score) +int movement_tagparser_right_arc(config *c, int label) { - if(stack_is_empty(config_get_stack(c))) return 0; - - word *gov = stack_top(config_get_stack(c)); - word *dep = word_buffer_b0(config_get_buffer(c)); - int dist = (word_get_index(gov)) - (word_get_index(dep)); + return movement_right_arc(c, movement_tagparser_right_code(label), label); +} - /* create a new dependency */ - word_set_gov(dep, dist); - word_set_label(dep, label); - - stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); - word_buffer_move_right(config_get_buffer(c)); - - config_add_mvt(c, movement_tagparse_right_code(label)); - return 1; +int movement_tagparser_right_arc_undo(config *c) +{ + return movement_right_arc_undo(c); } -int movement_tagparse_shift(config *c, int stream, float score) +int movement_tagparser_shift(config *c) { - if(word_buffer_is_empty(config_get_buffer(c))) return 0; - stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); - word_buffer_move_right(config_get_buffer(c)); - config_add_mvt(c, MVT_SHIFT); - return 1; + return movement_shift(c, MVT_TAGPARSER_SHIFT); } -int movement_tagparse_reduce(config *c, float score) +int movement_tagparser_shift_undo(config *c) { - if(stack_nbelem(config_get_stack(c)) <= 1) return 0; + return movement_shift_undo(c); +} - /* if(stack_is_empty(config_get_stack(c))) return 0; */ +int movement_tagparser_reduce(config *c) +{ + return movement_reduce(c, MVT_TAGPARSER_REDUCE); +} - /* word on top of stack must have a governor */ - if(word_get_gov(stack_top(config_get_stack(c))) == WORD_INVALID_GOV) return 0; - stack_pop(config_get_stack(c)); - config_add_mvt(c, MVT_REDUCE); - return 1; +int movement_tagparser_reduce_undo(config *c) +{ + return movement_reduce_undo(c); } -int movement_tagparse_root(config *c, float score, int root_code) +int movement_tagparser_root(config *c, int root_code) { - word *s0 = stack_top(config_get_stack(c)); - if(s0 == NULL) return 0; - word_set_gov(s0, 0); - word_set_label(s0, root_code); - s0->is_root = 1; + return movement_root(c, MVT_TAGPARSER_ROOT, root_code); +} - stack_pop(config_get_stack(c)); +int movement_tagparser_root_undo(config *c) +{ + return movement_root_undo(c); +} - config_add_mvt(c, MVT_ROOT); - return 1; +int movement_tagparser_undo(config *c) +{ + int result; + int mvt_type = mvt_get_type(mvt_stack_top(config_get_history(c))); + switch(mvt_type){ + case MVT_TAGPARSER_POSTAG : + result = movement_tagparser_left_arc_undo(c); + break; + case MVT_TAGPARSER_LEFT : + result = movement_tagparser_left_arc_undo(c); + break; + case MVT_TAGPARSER_RIGHT: + result = movement_tagparser_right_arc_undo(c); + break; + case MVT_TAGPARSER_REDUCE: + result = movement_tagparser_reduce_undo(c); + break; + case MVT_TAGPARSER_ROOT: + result = movement_tagparser_root_undo(c); + break; + case MVT_TAGPARSER_EOS: + result = movement_tagparser_eos_undo(c); + break; + case MVT_TAGPARSER_SHIFT: + result = movement_tagparser_shift_undo(c); + } + return result; } diff --git a/maca_trans_parser/src/movement_tagparser_arc_eager.h b/maca_trans_parser/src/movement_tagparser_arc_eager.h index 89f5a681aeb1f36db1dd1924d06c3cfa3c0543ea..98e42c48652968e9375519e32e9f51f83e79165b 100644 --- a/maca_trans_parser/src/movement_tagparser_arc_eager.h +++ b/maca_trans_parser/src/movement_tagparser_arc_eager.h @@ -2,34 +2,33 @@ #define __MOVEMENT_TAGPARSER_ARC_EAGER__ #include"config.h" -#include"feat_vec.h" +#include"dico.h" +#define MVT_TAGPARSER_SHIFT 0 +#define MVT_TAGPARSER_REDUCE 1 +#define MVT_TAGPARSER_ROOT 2 +#define MVT_TAGPARSER_EOS 3 +#define MVT_TAGPARSER_LEFT 4 +#define MVT_TAGPARSER_RIGHT 5 +#define MVT_TAGPARSER_POSTAG 6 -#define MVT_SHIFT 0 -#define MVT_REDUCE 1 -#define MVT_ROOT 2 -#define MVT_EOS 3 -#define MVT_LEFT 4 -#define MVT_RIGHT 5 -#define MVT_POSTAG 6 - -#define movement_tagparse_postag(postag) (3 * (postag) + 4) +#define movement_tagparser_postag_code(postag) (3 * (postag) + 4) /* even movements are left movements (except 0, which is shift and 2 which is root) */ -#define movement_tagparse_left_code(label) (3 * (label) + 5) +#define movement_tagparser_left_code(label) (3 * (label) + 5) /* odd movements are right movements (except 1, which is reduce and 3 which is end_of_sentence) */ -#define movement_tagparse_right_code(label) (3 * (label) + 6) - -int movement_tagparse_type(int mvt); -int movement_tagparse_label(int mvt); - -int movement_tagparse_left_arc(config *c, int label, float score); -int movement_tagparse_right_arc(config *c, int label, float score); -int movement_tagparse_shift(config *c, int stream, float score); -int movement_tagparse_reduce(config *c, float score); -int movement_tagparse_root(config *c, float score, int root_code); -int movement_tagparse_eos(config *c, float score); -int movement_tagparse_add_pos(config *c, float score, int postag); -void movement_tagparse_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag); +#define movement_tagparser_right_code(label) (3 * (label) + 6) + +int movement_tagparser_type(int mvt); +int movement_tagparser_label(int mvt); + +int movement_tagparser_left_arc(config *c, int label); +int movement_tagparser_right_arc(config *c, int label); +int movement_tagparser_shift(config *c); +int movement_tagparser_reduce(config *c); +int movement_tagparser_root(config *c, int root_code); +int movement_tagparser_eos(config *c); +int movement_tagparser_add_pos(config *c, int postag); +void movement_tagparser_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag); #endif diff --git a/maca_trans_parser/src/movements.c b/maca_trans_parser/src/movements.c new file mode 100644 index 0000000000000000000000000000000000000000..cccdf083fc2f299f2998f8679968f2c4db3758df --- /dev/null +++ b/maca_trans_parser/src/movements.c @@ -0,0 +1,183 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include"util.h" +#include"movements.h" + +int movement_eos(config *c, int movement_code) +{ + if(stack_is_empty(config_get_stack(c))) return 0; + word *s0 = stack_top(config_get_stack(c)); + + /* word on the top of the stack is sent_seg */ + word_set_sent_seg(s0, 1); + + config_push_mvt(c, movement_code, s0, NULL); + return 1; +} + +int movement_eos_undo(config *c) +{ + word *gov = mvt_get_gov(mvt_stack_top(config_get_history(c))); + + /* word on the top of the stack is not sent_seg anymore */ + word_set_sent_seg(gov, 0); + + mvt_free(config_pop_mvt(c)); + return 1; +} + +int movement_left_arc(config *c, int movement_code, int label) +{ + if(stack_is_empty(config_get_stack(c))) return 0; + /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ + + /* word on top of the stack should not have a governor */ + if(word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV) return 0; + + word *gov = word_buffer_b0(config_get_buffer(c)); + word *dep = stack_top(config_get_stack(c)); + int dist = (word_get_index(gov)) - (word_get_index(dep)); + + /* create a new dependency */ + word_set_gov(dep, dist); + word_set_label(dep, label); + + config_push_mvt(c, movement_code, gov, dep); + + stack_pop(config_get_stack(c)); + return 1; +} + +int movement_left_arc_undo(config *c) +{ + + word *dep = mvt_get_dep(mvt_stack_top(config_get_history(c))); + + /* undo dependency */ + word_set_gov(dep, WORD_INVALID_GOV); + word_set_label(dep, -1); + + mvt_free(config_pop_mvt(c)); + + stack_push(config_get_stack(c), dep); + return 1; +} + + +int movement_right_arc(config *c, int movement_code, int label) +{ + if(stack_is_empty(config_get_stack(c))) return 0; + + word *gov = stack_top(config_get_stack(c)); + word *dep = word_buffer_b0(config_get_buffer(c)); + int dist = (word_get_index(gov)) - (word_get_index(dep)); + + /* create a new dependency */ + word_set_gov(dep, dist); + word_set_label(dep, label); + + stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c))); + word_buffer_move_right(config_get_buffer(c)); + + config_push_mvt(c, movement_code, gov, dep); + return 1; +} + +int movement_right_arc_undo(config *c) +{ + word *dep = mvt_get_dep(mvt_stack_top(config_get_history(c))); + + /* undo dependency */ + word_set_gov(dep, WORD_INVALID_GOV); + word_set_label(dep, -1); + + stack_pop(config_get_stack(c)); + word_buffer_move_left(config_get_buffer(c)); + mvt_free(config_pop_mvt(c)); + return 1; +} + +int movement_shift(config *c, int movement_code) +{ + if(word_buffer_is_empty(config_get_buffer(c))) return 0; + word *b0 = word_buffer_b0(config_get_buffer(c)); + stack_push(config_get_stack(c), b0); + config_push_mvt(c, movement_code, b0, NULL); + word_buffer_move_right(config_get_buffer(c)); + return 1; +} + +int movement_shift_undo(config *c) +{ + stack_pop(config_get_stack(c)); + mvt_free(config_pop_mvt(c)); + word_buffer_move_left(config_get_buffer(c)); + return 1; +} + +int movement_reduce(config *c, int movement_code) +{ + if(stack_nbelem(config_get_stack(c)) <= 1) return 0; + word *s0 = stack_top(config_get_stack(c)); + /* word on top of stack must have a governor */ + if(word_get_gov(s0) == WORD_INVALID_GOV) return 0; + + config_push_mvt(c, movement_code, s0, NULL); + stack_pop(config_get_stack(c)); + return 1; +} + +int movement_reduce_undo(config *c) +{ + word *gov = mvt_get_gov(mvt_stack_top(config_get_history(c))); + mvt_free(config_pop_mvt(c)); + stack_push(config_get_stack(c), gov); + return 1; +} + +int movement_root(config *c, int movement_code, int root_code) +{ + word *s0 = stack_top(config_get_stack(c)); + if(s0 == NULL) return 0; + word_set_gov(s0, 0); + word_set_label(s0, root_code); + s0->is_root = 1; + + stack_pop(config_get_stack(c)); + + config_push_mvt(c, movement_code, s0, NULL); + return 1; +} + +int movement_root_undo(config *c) +{ + word *gov = mvt_get_gov(mvt_stack_top(config_get_history(c))); + if(gov == NULL) return 0; + word_set_gov(gov, WORD_INVALID_GOV); + word_set_label(gov, -1); + gov->is_root = 0; + + stack_push(config_get_stack(c), gov); + mvt_free(config_pop_mvt(c)); + return 1; +} + +int movement_add_pos(config *c, int movement_code, int pos) +{ + if(word_buffer_b0(config_get_buffer(c)) == NULL) return 0; + word *b0 = word_buffer_b0(config_get_buffer(c)); + if(word_get_pos(b0) != -1) return 0; + word_set_pos(b0, pos); + config_push_mvt(c, movement_code, b0, NULL); + return 1; +} + +int movement_add_pos_undo(config *c) +{ + word *gov = mvt_get_gov(mvt_stack_top(config_get_history(c))); + word_set_pos(gov, -1); + mvt_free(config_pop_mvt(c)); + return 1; +} + diff --git a/maca_trans_parser/src/movements.h b/maca_trans_parser/src/movements.h new file mode 100644 index 0000000000000000000000000000000000000000..9d8adf55ff082cecf9cf32609b12e84372c19991 --- /dev/null +++ b/maca_trans_parser/src/movements.h @@ -0,0 +1,25 @@ +#ifndef __MOVEMENTS_ +#define __MOVEMENTS_ + +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include"config.h" + +int movement_eos(config *c, int movement_code); +int movement_eos_undo(config *c); +int movement_left_arc(config *c, int movement_code, int label); +int movement_left_arc_undo(config *c); +int movement_right_arc(config *c, int movement_code, int label); +int movement_right_arc_undo(config *c); +int movement_shift(config *c, int movement_code); +int movement_shift_undo(config *c); +int movement_reduce(config *c, int movement_code); +int movement_reduce_undo(config *c); +int movement_root(config *c, int movement_code, int root_code); +int movement_root_undo(config *c); +int movement_undo(config *c); +int movement_add_pos(config *c, int movement_code, int pos); +int movement_add_pos_undo(config *c); + +#endif diff --git a/maca_trans_parser/src/mvt.c b/maca_trans_parser/src/mvt.c index 228bfbf546b39a6aa1d30ecf22813bc9f565c446..fd5c549fc2561777112c2d0714a9406d5847b119 100644 --- a/maca_trans_parser/src/mvt.c +++ b/maca_trans_parser/src/mvt.c @@ -3,7 +3,7 @@ #include"util.h" #include"mvt.h" -mvt *mvt_new(int type, int gov, int dep) +mvt *mvt_new(int type, word *gov, word *dep) { mvt *m = memalloc(sizeof(mvt)); m->type = type; @@ -21,6 +21,6 @@ void mvt_free(mvt *m) void mvt_print(FILE *f, mvt *m) { if(m) - printf("type = %d gov = %d dep = %d\n", mvt_get_type(m), mvt_get_gov(m), mvt_get_dep(m)); + printf("type = %d gov = %d dep = %d\n", mvt_get_type(m), word_get_index(mvt_get_gov(m)), word_get_index(mvt_get_dep(m))); } diff --git a/maca_trans_parser/src/mvt.h b/maca_trans_parser/src/mvt.h index 863fb0d9a9080a0139f22789b6fd3e1640d71a76..97ca8f50526ab072f94f77678e116fba003ccedf 100644 --- a/maca_trans_parser/src/mvt.h +++ b/maca_trans_parser/src/mvt.h @@ -3,6 +3,7 @@ #include<stdio.h> #include<stdlib.h> +#include"word.h" #define mvt_get_type(m) ((m)->type) #define mvt_set_type(m,v) ((m)->type = (v)) @@ -15,11 +16,11 @@ typedef struct { int type; /* type of the movement */ - int gov; /* first argument of the movement (governor if it is a dependency creation movement) */ - int dep; /* second argument of the movement (dependent if it is a dependency creation movement) */ + word* gov; /* first argument of the movement (governor if it is a dependency creation movement) */ + word* dep; /* second argument of the movement (dependent if it is a dependency creation movement) */ } mvt; -mvt *mvt_new(int type, int gov, int dep); +mvt *mvt_new(int type, word* gov, word* dep); void mvt_free(mvt *m); void mvt_print(FILE *f, mvt *m); diff --git a/maca_trans_parser/src/mvt_stack.c b/maca_trans_parser/src/mvt_stack.c index 2e301ecb8a3f0144e6def1c8599d0ac6cda3986a..6f99a119ac3f662062cadf6545cbad3fdd0c33bd 100644 --- a/maca_trans_parser/src/mvt_stack.c +++ b/maca_trans_parser/src/mvt_stack.c @@ -6,8 +6,8 @@ void mvt_stack_print(FILE *f, mvt_stack *ms) { int i; -for(i=0; i < mvt_stack_nbelem(ms); i++) - mvt_print(f, mvt_stack_elt_n(ms, i)); + for(i=0; i < mvt_stack_nbelem(ms); i++) + mvt_print(f, mvt_stack_elt_n(ms, i)); } void mvt_stack_free(mvt_stack *s) @@ -35,7 +35,4 @@ void mvt_stack_push(mvt_stack *s, mvt *m) s->top++; } -mvt_stack *mvt_stack_copy(mvt_stack *ms) -{} - diff --git a/maca_trans_parser/src/mvt_stack.h b/maca_trans_parser/src/mvt_stack.h index 5648fc7ee0e5a9d91362bd522976504bce678110..5c66e8c2e2aa78406d644544587436672ed791d8 100644 --- a/maca_trans_parser/src/mvt_stack.h +++ b/maca_trans_parser/src/mvt_stack.h @@ -3,30 +3,26 @@ #include"mvt.h" -#define mvt_stack_nbelem(s) ((s)->top) -#define mvt_stack_elt_n(s, n) ((s)->array[(s)->top - (n) - 1]) -#define mvt_stack_pop(s) (((s)->top == 0)? NULL : (s)->array[--((s)->top)]) - - -#define mvt_stack_is_empty(s) (((s)->top == 0) ? 1 : 0) - - -#define mvt_stack_top(s) (((s)->top > 0)? (s)->array[(s)->top - 1] : NULL) -#define mvt_stack_0(s) (((s)->top > 0)? (s)->array[(s)->top - 1] : NULL) -#define mvt_stack_1(s) (((s)->top > 1)? (s)->array[(s)->top - 2] : NULL) -#define mvt_stack_2(s) (((s)->top > 2)? (s)->array[(s)->top - 3] : NULL) -#define mvt_stack_3(s) (((s)->top > 3)? (s)->array[(s)->top - 4] : NULL) -#define mvt_stack_4(s) (((s)->top > 4)? (s)->array[(s)->top - 5] : NULL) -#define mvt_stack_5(s) (((s)->top > 5)? (s)->array[(s)->top - 6] : NULL) - typedef struct { int size; mvt **array; int top; } mvt_stack; + +#define mvt_stack_nbelem(s) ((s)->top) +#define mvt_stack_elt_n(s, n) ((s)->array[(s)->top - (n) - 1]) +#define mvt_stack_pop(s) (((s)->top == 0)? NULL : (s)->array[--((s)->top)]) +#define mvt_stack_is_empty(s) (((s)->top == 0) ? 1 : 0) +#define mvt_stack_top(s) (((s)->top > 0)? (s)->array[(s)->top - 1] : NULL) +#define mvt_stack_0(s) (((s)->top > 0)? (s)->array[(s)->top - 1] : NULL) +#define mvt_stack_1(s) (((s)->top > 1)? (s)->array[(s)->top - 2] : NULL) +#define mvt_stack_2(s) (((s)->top > 2)? (s)->array[(s)->top - 3] : NULL) +#define mvt_stack_3(s) (((s)->top > 3)? (s)->array[(s)->top - 4] : NULL) +#define mvt_stack_4(s) (((s)->top > 4)? (s)->array[(s)->top - 5] : NULL) +#define mvt_stack_5(s) (((s)->top > 5)? (s)->array[(s)->top - 6] : NULL) + mvt_stack *mvt_stack_new(void); -mvt_stack *mvt_stack_copy(mvt_stack *ms); void mvt_stack_push(mvt_stack *ms, mvt *m); void mvt_stack_print(FILE *f, mvt_stack *ms); void mvt_stack_free(mvt_stack *ms); diff --git a/maca_trans_parser/src/oracle_parser_arc_eager.c b/maca_trans_parser/src/oracle_parser_arc_eager.c index 93d662b23fb97f5f03ea3cbd5527c08746e4a574..074400e2b14b8bede8cbef8815224b2a73553b95 100644 --- a/maca_trans_parser/src/oracle_parser_arc_eager.c +++ b/maca_trans_parser/src/oracle_parser_arc_eager.c @@ -11,27 +11,6 @@ int check_all_dependents_of_word_in_ref_are_in_hyp(config *c, word_buffer *ref, int gov_hyp; int sentence_change; -#if 0 - for(dep = word_index - 1; (dep >= 0) && (word_get_sent_seg(word_buffer_get_word_n(ref, dep)) == 0); dep--){ - gov_ref = word_get_gov_index(word_buffer_get_word_n(ref, dep)); - if(gov_ref == word_index){ /* dep is a dependent of word in ref */ - /* check that dep has the same governor in hyp */ - gov_hyp = word_get_gov_index(word_buffer_get_word_n(config_get_buffer(c), dep)); - if(gov_hyp != gov_ref) return 0; - } - } - - for(dep = word_index + 1; ((dep < word_buffer_get_nbelem(ref)) && (word_get_sent_seg(word_buffer_get_word_n(ref, dep)) == 0)); dep++){ - gov_ref = word_get_gov_index(word_buffer_get_word_n(ref, dep)); - if(gov_ref == word_index){ /* dep is a dependent of word in ref */ - /* check that dep has the same governor in hyp */ - gov_hyp = word_get_gov_index(word_buffer_get_word_n(config_get_buffer(c), dep)); - if(gov_hyp != gov_ref) return 0; - } - } -#endif - -#if 1 for(dep = word_index - 1; (dep >= 0) && (word_get_sent_seg(word_buffer_get_word_n(ref, dep)) == 0); dep--){ gov_ref = word_get_gov_index(word_buffer_get_word_n(ref, dep)); if(gov_ref == word_index){ /* dep is a dependent of word in ref */ @@ -52,9 +31,6 @@ int check_all_dependents_of_word_in_ref_are_in_hyp(config *c, word_buffer *ref, if(gov_hyp != gov_ref) return 0; } } - -#endif - return 1; } @@ -88,33 +64,33 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label) if((s0_label == root_label) && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index) ){ - return MVT_ROOT; + return MVT_PARSER_ROOT; } /* word on the top of the stack is an end of sentence marker */ if((word_get_sent_seg(word_buffer_get_word_n(ref, s0_index)) == 1) && (word_get_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != 1)){ - return MVT_EOS; + return MVT_PARSER_EOS; } /* LEFT ARC b0 is the governor and s0 the dependent */ if(s0_gov_index == b0_index){ - return movement_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index))); + return movement_parser_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index))); } /* RIGHT ARC s0 is the governor and b0 the dependent */ if(b0_gov_index == s0_index){ - return movement_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index))); + return movement_parser_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index))); } /* REDUCE */ if((stack_nbelem(config_get_stack(c)) > 1) && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index) /* word on top must have all its dependents */ && (word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV)){ /* word on top of the stack has a governor */ - return MVT_REDUCE; + return MVT_PARSER_REDUCE; } } /* SHIFT */ - return MVT_SHIFT; + return MVT_PARSER_SHIFT; } diff --git a/maca_trans_parser/src/oracle_tagparser_arc_eager.c b/maca_trans_parser/src/oracle_tagparser_arc_eager.c index d9123e5d72cebff9a8b8a96e3984603ae607c066..c53354f4b012fbadcdc366f35407a71c5eaf6d57 100644 --- a/maca_trans_parser/src/oracle_tagparser_arc_eager.c +++ b/maca_trans_parser/src/oracle_tagparser_arc_eager.c @@ -75,9 +75,9 @@ int oracle_tagparser_arc_eager(config *c, word_buffer *ref, int root_label) /* give a pos to b0 if it does not have one */ if(word_get_pos(b0) == -1){ /* word_set_pos(b0, word_get_pos(word_buffer_get_word_n(ref, b0_index))); */ - /* return movement_tagparse_postag(word_get_pos(b0)); */ + /* return movement_tagparser_postag(word_get_pos(b0)); */ - return movement_tagparse_postag(word_get_pos(word_buffer_get_word_n(ref, b0_index))); + return movement_tagparser_postag_code(word_get_pos(word_buffer_get_word_n(ref, b0_index))); } @@ -102,33 +102,33 @@ int oracle_tagparser_arc_eager(config *c, word_buffer *ref, int root_label) if((s0_label == root_label) && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index) ){ - return MVT_ROOT; + return MVT_TAGPARSER_ROOT; } /* word on the top of the stack is an end of sentence marker */ if((word_get_sent_seg(word_buffer_get_word_n(ref, s0_index)) == 1) && (word_get_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != 1)){ - return MVT_EOS; + return MVT_TAGPARSER_EOS; } /* LEFT ARC b0 is the governor and s0 the dependent */ if(s0_gov_index == b0_index){ - return movement_tagparse_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index))); + return movement_tagparser_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index))); } /* RIGHT ARC s0 is the governor and b0 the dependent */ if(b0_gov_index == s0_index){ - return movement_tagparse_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index))); + return movement_tagparser_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index))); } /* REDUCE */ if((stack_nbelem(config_get_stack(c)) > 1) && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index) /* word on top must have all its dependents */ && (word_get_gov(stack_top(config_get_stack(c))) != WORD_INVALID_GOV)){ /* word on top of the stack has a governor */ - return MVT_REDUCE; + return MVT_TAGPARSER_REDUCE; } } /* SHIFT */ - return MVT_SHIFT; + return MVT_TAGPARSER_SHIFT; } diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c index ad65e2eb813ccb2373eb02a902c907f5cffb7205..3a81fc79d7839530580a9e6803a1e070c4dc55b5 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c @@ -32,7 +32,6 @@ void print_word_buffer(config *c, dico *dico_labels) } } - void simple_decoder_parser_arc_eager(context *ctx) { FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; @@ -46,7 +45,7 @@ void simple_decoder_parser_arc_eager(context *ctx) config *c = NULL; int result; float entropy; - float delta; + /* float delta; */ int argmax1, argmax2; float max1, max2; int index; @@ -54,12 +53,12 @@ void simple_decoder_parser_arc_eager(context *ctx) root_label = dico_string2int(ctx->dico_labels, ctx->root_label); if(root_label == -1) root_label = 0; - c = config_initial(f, ctx->mcd_struct, 5); + c = config_new(f, ctx->mcd_struct, 5); while(!config_is_terminal(c)){ config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); mvt_code = feature_table_argmax(fv, ft, &max); - mvt_type = movement_type(mvt_code); - mvt_label = movement_label(mvt_code); + mvt_type = movement_parser_type(mvt_code); + mvt_label = movement_parser_label(mvt_code); if(ctx->trace_mode){ index = word_get_index(word_buffer_b0(config_get_buffer(c))); @@ -68,7 +67,7 @@ void simple_decoder_parser_arc_eager(context *ctx) stack_print(stdout, c->st); fprintf(stdout, "\t"); - movement_print(stdout, mvt_code, ctx->dico_labels); + movement_parser_print(stdout, mvt_code, ctx->dico_labels); fprintf(stdout, "\t"); feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); printf("%f\n", max1 - max2); @@ -81,9 +80,9 @@ void simple_decoder_parser_arc_eager(context *ctx) entropy = feature_table_entropy(fv, ft); /* delta = feature_table_diff_scores(fv, ft); */ feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); - movement_print(stdout, argmax1, ctx->dico_labels); + movement_parser_print(stdout, argmax1, ctx->dico_labels); printf(":\t%f\n", max1); - movement_print(stdout, argmax2, ctx->dico_labels); + movement_parser_print(stdout, argmax2, ctx->dico_labels); printf(":\t%f\n", max2); printf("delta = %f\n", max1 - max2); @@ -91,34 +90,34 @@ void simple_decoder_parser_arc_eager(context *ctx) /* printf("entropy = %f delta = %f\n", entropy, delta); */ printf("entropy = %f\n",entropy); - /* movement_print(stdout, mvt_code, ctx->dico_labels); */ + /* movement_parser_print(stdout, mvt_code, ctx->dico_labels); */ } result = 0; switch(mvt_type){ - case MVT_LEFT : - result = movement_left_arc(c, mvt_label, max); + case MVT_PARSER_LEFT : + result = movement_parser_left_arc(c, mvt_label); break; - case MVT_RIGHT: - result = movement_right_arc(c, mvt_label, max); + case MVT_PARSER_RIGHT: + result = movement_parser_right_arc(c, mvt_label); break; - case MVT_REDUCE: - result = movement_reduce(c, max); + case MVT_PARSER_REDUCE: + result = movement_parser_reduce(c); break; - case MVT_ROOT: - result = movement_root(c, max, root_label); + case MVT_PARSER_ROOT: + result = movement_parser_root(c, root_label); break; - case MVT_EOS: - result = movement_eos(c, max); + case MVT_PARSER_EOS: + result = movement_parser_eos(c); break; - case MVT_SHIFT: - result = movement_shift(c, 1, max); + case MVT_PARSER_SHIFT: + result = movement_parser_shift(c); } if(result == 0){ if(ctx->debug_mode){ fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); } - movement_shift(c, 1, max); + movement_parser_shift(c); } } @@ -132,29 +131,3 @@ void simple_decoder_parser_arc_eager(context *ctx) fclose(f); } -#if 0 -void print_word_buffer(config *c, dico *dico_labels) -{ - int i; - word *dep; - char *label; - int root_position = 0; - - for(i=0; i < config_get_buffer(c)->nbelem; i++){ - dep = word_buffer_get_word_n(config_get_buffer(c), i); - if(word_get_gov(dep) == 0) root_position = i; - printf("%s\t", word_get_input(dep)); - /* if(word_get_sent_seg(dep) == 1){ - printf("%d\teos\t1\n", root_position - i); - } - else{*/ - printf("%d\t", word_get_gov(dep)); - label = (word_get_label(dep) == -1)? NULL : dico_int2string(dico_labels, word_get_label(dep)); - if(label != NULL) - printf("%s\t0\n", label) ; - else - printf("_\t0\n"); - /* } */ - } -} -#endif diff --git a/maca_trans_parser/src/simple_decoder_tagger.c b/maca_trans_parser/src/simple_decoder_tagger.c index da0e990d72097c336491c378e64644e37809cc4a..bad70b6e5ff3d797f62a3dc436ea1c54a50614ed 100644 --- a/maca_trans_parser/src/simple_decoder_tagger.c +++ b/maca_trans_parser/src/simple_decoder_tagger.c @@ -10,18 +10,6 @@ #include"feature_table.h" #include"dico.h" -void add_signature_to_words_in_queue(queue *bf, form2pos *f2p) -{ - int i; - word *w; - - for(i=0; i < queue_nbelem(bf); i++){ - w = queue_elt_n(bf, i); - - w->signature = form2pos_get_signature(f2p, w->form); - } -} - void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p) { int i; @@ -34,48 +22,7 @@ void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p) } } -void simple_decoder_buffer(context *ctx) -{ - dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS"); - feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose); - int postag; - feat_vec *fv = feat_vec_new(feature_types_nb); - float max; - int i; - word *w = NULL; - FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; - config *c = config_initial(f, ctx->mcd_struct, 0); - - /* read a sentence and put it in the buffer */ - while(word_buffer_read_sentence(c->bf)){ - /* queue_remove(c->bf); */ - /* get rid of dummy token */ - if(ctx->f2p) - add_signature_to_words_in_word_buffer(c->bf, ctx->f2p); - while(!config_is_terminal(c)){ - config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); - - postag = feature_table_argmax(fv, ft, &max); - if(postag != -1) - movement_tagger(c, postag, max, 0); - } - /* config_print(stdout, c); */ - - for(i = stack_nbelem(c->st)-1; i >= 0 ; i--){ - w = stack_elt_n(c->st, i); - printf("%s\t%s\n", w->input, dico_int2string(dico_pos, word_get_pos(w))); - } - /* printf("\n"); */ - - /* config_free(c); */ - c = config_initial(f, ctx->mcd_struct, 0); - } - if(ctx->input_filename) - fclose(f); -} - - -void simple_decoder_stream(context *ctx) +void simple_decoder_tagger(context *ctx) { config *c; feat_vec *fv = feat_vec_new(feature_types_nb); @@ -86,7 +33,7 @@ void simple_decoder_stream(context *ctx) word *w; dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS"); int res; - c = config_initial_no_dummy_word(f, ctx->mcd_struct, 5); + c = config_new(f, ctx->mcd_struct, 5); while(1){ if(ctx->f2p) @@ -97,10 +44,10 @@ void simple_decoder_stream(context *ctx) /* feat_vec_print(stdout, fv); */ postag = feature_table_argmax(fv, ft, &max); /* printf("postag = %d\n", postag); */ - + w = word_buffer_b0(c->bf); printf("%s\t%s\n", w->input, dico_int2string(dico_pos, postag)); - + res = movement_tagger(c, postag, max, 1); /* printf(" current index = %d nb elem = %d\n", c->bf->current_index, c->bf->nbelem); */ @@ -112,12 +59,3 @@ void simple_decoder_stream(context *ctx) /* config_free(c); */ } - -void simple_decoder_tagger(context *ctx) -{ - if(ctx->stream_mode) - simple_decoder_stream(ctx); - else - simple_decoder_buffer(ctx); -} - diff --git a/maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c b/maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c index 946c8a4ac0c8a4fcc272b42e8b2d390615d8e150..4b6aac10052fa34c8247075fcd9bdee71e407ec6 100644 --- a/maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c @@ -22,8 +22,6 @@ void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p) } } - - void print_word_buffer_tagparser(config *c, dico *dico_labels, dico *dico_pos) { int i; @@ -53,7 +51,6 @@ void print_word_buffer_tagparser(config *c, dico *dico_labels, dico *dico_pos) } } - void simple_decoder_tagparser_arc_eager(context *ctx) { FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; @@ -67,7 +64,7 @@ void simple_decoder_tagparser_arc_eager(context *ctx) config *c = NULL; int result; float entropy; - float delta; + /* float delta; */ int argmax1, argmax2; float max1, max2; int index; @@ -75,15 +72,15 @@ void simple_decoder_tagparser_arc_eager(context *ctx) root_label = dico_string2int(ctx->dico_labels, ctx->root_label); if(root_label == -1) root_label = 0; - c = config_initial(f, ctx->mcd_struct, 5); + c = config_new(f, ctx->mcd_struct, 5); while(!config_is_terminal(c)){ if(ctx->f2p) add_signature_to_words_in_word_buffer(c->bf, ctx->f2p); config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); mvt_code = feature_table_argmax(fv, ft, &max); - mvt_type = movement_tagparse_type(mvt_code); - mvt_label = movement_tagparse_label(mvt_code); + mvt_type = movement_tagparser_type(mvt_code); + mvt_label = movement_tagparser_label(mvt_code); if(ctx->trace_mode){ index = word_get_index(word_buffer_b0(config_get_buffer(c))); @@ -92,7 +89,7 @@ void simple_decoder_tagparser_arc_eager(context *ctx) stack_print(stdout, c->st); fprintf(stdout, "\t"); - movement_tagparse_print(stdout, mvt_code, ctx->dico_labels, ctx->dico_postags); + movement_tagparser_print(stdout, mvt_code, ctx->dico_labels, ctx->dico_postags); fprintf(stdout, "\t"); feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); printf("%f\n", max1 - max2); @@ -105,9 +102,9 @@ void simple_decoder_tagparser_arc_eager(context *ctx) entropy = feature_table_entropy(fv, ft); /* delta = feature_table_diff_scores(fv, ft); */ feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); - movement_tagparse_print(stdout, argmax1, ctx->dico_labels, ctx->dico_postags); + movement_tagparser_print(stdout, argmax1, ctx->dico_labels, ctx->dico_postags); printf(":\t%f\n", max1); - movement_tagparse_print(stdout, argmax2, ctx->dico_labels, ctx->dico_postags); + movement_tagparser_print(stdout, argmax2, ctx->dico_labels, ctx->dico_postags); printf(":\t%f\n", max2); printf("delta = %f\n", max1 - max2); @@ -115,37 +112,37 @@ void simple_decoder_tagparser_arc_eager(context *ctx) /* printf("entropy = %f delta = %f\n", entropy, delta); */ printf("entropy = %f\n",entropy); - /* movement_tagparse_print(stdout, mvt_code, ctx->dico_labels); */ + /* movement_tagparser_print(stdout, mvt_code, ctx->dico_labels); */ } result = 0; switch(mvt_type){ - case MVT_POSTAG : - result = movement_tagparse_add_pos(c, max, mvt_label); + case MVT_TAGPARSER_POSTAG : + result = movement_tagparser_add_pos(c, mvt_label); break; - case MVT_LEFT : - result = movement_tagparse_left_arc(c, mvt_label, max); + case MVT_TAGPARSER_LEFT : + result = movement_tagparser_left_arc(c, mvt_label); break; - case MVT_RIGHT: - result = movement_tagparse_right_arc(c, mvt_label, max); + case MVT_TAGPARSER_RIGHT: + result = movement_tagparser_right_arc(c, mvt_label); break; - case MVT_REDUCE: - result = movement_tagparse_reduce(c, max); + case MVT_TAGPARSER_REDUCE: + result = movement_tagparser_reduce(c); break; - case MVT_ROOT: - result = movement_tagparse_root(c, max, root_label); + case MVT_TAGPARSER_ROOT: + result = movement_tagparser_root(c, root_label); break; - case MVT_EOS: - result = movement_tagparse_eos(c, max); + case MVT_TAGPARSER_EOS: + result = movement_tagparser_eos(c); break; - case MVT_SHIFT: - result = movement_tagparse_shift(c, 1, max); + case MVT_TAGPARSER_SHIFT: + result = movement_tagparser_shift(c); } if(result == 0){ if(ctx->debug_mode){ fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); } - movement_tagparse_shift(c, 1, max); + movement_tagparser_shift(c); } } diff --git a/maca_trans_parser/src/test_mvt_stack.c b/maca_trans_parser/src/test_mvt_stack.c new file mode 100644 index 0000000000000000000000000000000000000000..711a3a49ffba93a1461c18af5ee6acf36ac63dee --- /dev/null +++ b/maca_trans_parser/src/test_mvt_stack.c @@ -0,0 +1,28 @@ +#include<stdio.h> +#include<stdlib.h> +#include"mvt_stack.h" +#include"util.h" + +int main(int argc, char *argv[]) +{ + mvt_stack *ms = mvt_stack_new(); + word *w1 = NULL; + word *w2 = NULL; + int i; + + for(i=0; i < 10; i++){ + w1 = word_new("1"); + word_set_index(w1, i + 1); + w2 = word_new("1"); + word_set_index(w2, i + 2); + mvt_stack_push(ms, mvt_new(i, w1, w2)); + } + mvt_stack_print(stdout, ms); + + printf("\n"); + + while(!mvt_stack_is_empty(ms)) + mvt_print(stdout, mvt_stack_pop(ms)); + + +}