diff --git a/maca_common/include/word.h b/maca_common/include/word.h index b2fe6cfbb06da3175732b2116d73729b7dafd39f..30074b7606988cfcefa4400b8f35acd958ea9807 100644 --- a/maca_common/include/word.h +++ b/maca_common/include/word.h @@ -5,86 +5,6 @@ #define WORD_INVALID_GOV 10000 -#define word_get_id(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID] -#define word_get_form(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM] -#define word_get_lemma(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA] -#define word_get_cpos(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS] -#define word_get_pos(w) ((w) == NULL) ? -1 : ((w)->wf_array[MCD_WF_POS]) -#define word_get_feats(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS] -#define word_get_gov(w) ((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_GOV] -#define word_get_label(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL] -#define word_get_stag(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG] -#define word_get_sent_seg(w) ((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG] -#define word_get_A(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A] -#define word_get_B(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B] -#define word_get_C(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C] -#define word_get_D(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_D] -#define word_get_E(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_E] -#define word_get_F(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_F] -#define word_get_G(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_G] -#define word_get_H(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_H] -#define word_get_I(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_I] -#define word_get_J(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_J] -#define word_get_K(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_K] -#define word_get_L(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_L] -#define word_get_M(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_M] -#define word_get_N(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_N] -#define word_get_O(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_O] -#define word_get_P(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_P] -#define word_get_Q(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Q] -#define word_get_R(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_R] -#define word_get_S(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_S] -#define word_get_T(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_T] -#define word_get_U(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_U] -#define word_get_V(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_V] -#define word_get_W(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_W] -#define word_get_X(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X] -#define word_get_Y(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y] -#define word_get_Z(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z] -#define word_get_input(w) ((w) == NULL) ? NULL : (w)->input -#define word_get_signature(w) ((w) == NULL) ? -1 : (w)->signature -#define word_get_U1(w) ((w) == NULL) ? -1 : (w)->U1 -#define word_get_index(w) ((w) == NULL) ? -1 : (w)->index - -#define word_set_id(w, val) (w)->wf_array[MCD_WF_ID] = (val) -#define word_set_form(w, val) (w)->wf_array[MCD_WF_FORM] = (val) -#define word_set_lemma(w, val) (w)->wf_array[MCD_WF_LEMMA] = (val) -#define word_set_cpos(w, val) (w)->wf_array[MCD_WF_CPOS] = (val) -#define word_set_pos(w, val) (w)->wf_array[MCD_WF_POS] = (val) -#define word_set_feats(w, val) (w)->wf_array[MCD_WF_FEATS] = (val) -#define word_set_gov(w, val) ((w)->wf_array[MCD_WF_GOV] = (val)) -#define word_set_label(w, val) (w)->wf_array[MCD_WF_LABEL] = (val) -#define word_set_stag(w, val) (w)->wf_array[MCD_WF_STAG] = (val) -#define word_set_sent_seg(w, val) (w)->wf_array[MCD_WF_SENT_SEG] = (val) -#define word_set_A(w, val) (w)->wf_array[MCD_WF_A] = (val) -#define word_set_B(w, val) (w)->wf_array[MCD_WF_B] = (val) -#define word_set_C(w, val) (w)->wf_array[MCD_WF_C] = (val) -#define word_set_D(w, val) (w)->wf_array[MCD_WF_D] = (val) -#define word_set_E(w, val) (w)->wf_array[MCD_WF_E] = (val) -#define word_set_F(w, val) (w)->wf_array[MCD_WF_F] = (val) -#define word_set_G(w, val) (w)->wf_array[MCD_WF_G] = (val) -#define word_set_H(w, val) (w)->wf_array[MCD_WF_H] = (val) -#define word_set_I(w, val) (w)->wf_array[MCD_WF_I] = (val) -#define word_set_J(w, val) (w)->wf_array[MCD_WF_J] = (val) -#define word_set_K(w, val) (w)->wf_array[MCD_WF_K] = (val) -#define word_set_L(w, val) (w)->wf_array[MCD_WF_L] = (val) -#define word_set_M(w, val) (w)->wf_array[MCD_WF_M] = (val) -#define word_set_N(w, val) (w)->wf_array[MCD_WF_N] = (val) -#define word_set_O(w, val) (w)->wf_array[MCD_WF_O] = (val) -#define word_set_P(w, val) (w)->wf_array[MCD_WF_P] = (val) -#define word_set_Q(w, val) (w)->wf_array[MCD_WF_Q] = (val) -#define word_set_R(w, val) (w)->wf_array[MCD_WF_R] = (val) -#define word_set_S(w, val) (w)->wf_array[MCD_WF_S] = (val) -#define word_set_T(w, val) (w)->wf_array[MCD_WF_T] = (val) -#define word_set_U(w, val) (w)->wf_array[MCD_WF_U] = (val) -#define word_set_V(w, val) (w)->wf_array[MCD_WF_V] = (val) -#define word_set_W(w, val) (w)->wf_array[MCD_WF_W] = (val) -#define word_set_X(w, val) (w)->wf_array[MCD_WF_X] = (val) -#define word_set_Y(w, val) (w)->wf_array[MCD_WF_Y] = (val) -#define word_set_Z(w, val) (w)->wf_array[MCD_WF_Z] = (val) -#define word_set_signature(w, val) (w)->signature = (val) -#define word_set_index(w, val) (w)->index = (val) - typedef struct _word { int wf_array[MCD_WF_NB]; /* array containing the codes corresponding to the different word features */ char *input; /* the string corresponding to the actual line in the corpus file */ @@ -96,6 +16,86 @@ typedef struct _word { int is_root; } word; +#define word_get_id(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID]) +#define word_get_form(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM]) +#define word_get_lemma(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA]) +#define word_get_cpos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS]) +#define word_get_pos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_POS]) +#define word_get_feats(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS]) +#define word_get_gov(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_GOV]) +#define word_get_label(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL]) +#define word_get_stag(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG]) +#define word_get_sent_seg(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG]) +#define word_get_A(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A]) +#define word_get_B(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B]) +#define word_get_C(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C]) +#define word_get_D(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_D]) +#define word_get_E(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_E]) +#define word_get_F(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_F]) +#define word_get_G(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_G]) +#define word_get_H(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_H]) +#define word_get_I(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_I]) +#define word_get_J(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_J]) +#define word_get_K(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_K]) +#define word_get_L(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_L]) +#define word_get_M(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_M]) +#define word_get_N(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_N]) +#define word_get_O(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_O]) +#define word_get_P(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_P]) +#define word_get_Q(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Q]) +#define word_get_R(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_R]) +#define word_get_S(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_S]) +#define word_get_T(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_T]) +#define word_get_U(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_U]) +#define word_get_V(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_V]) +#define word_get_W(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_W]) +#define word_get_X(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X]) +#define word_get_Y(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y]) +#define word_get_Z(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z]) +#define word_get_input(w) (((w) == NULL) ? NULL : (w)->input) +#define word_get_signature(w) (((w) == NULL) ? -1 : (w)->signature) +#define word_get_U1(w) (((w) == NULL) ? -1 : (w)->U1) +#define word_get_index(w) (((w) == NULL) ? -1 : (w)->index) + +#define word_set_id(w, val) ((w)->wf_array[MCD_WF_ID] = (val)) +#define word_set_form(w, val) ((w)->wf_array[MCD_WF_FORM] = (val)) +#define word_set_lemma(w, val) ((w)->wf_array[MCD_WF_LEMMA] = (val)) +#define word_set_cpos(w, val) ((w)->wf_array[MCD_WF_CPOS] = (val)) +#define word_set_pos(w, val) ((w)->wf_array[MCD_WF_POS] = (val)) +#define word_set_feats(w, val) ((w)->wf_array[MCD_WF_FEATS] = (val)) +#define word_set_gov(w, val) ((w)->wf_array[MCD_WF_GOV] = (val)) +#define word_set_label(w, val) ((w)->wf_array[MCD_WF_LABEL] = (val)) +#define word_set_stag(w, val) ((w)->wf_array[MCD_WF_STAG] = (val)) +#define word_set_sent_seg(w, val) ((w)->wf_array[MCD_WF_SENT_SEG] = (val)) +#define word_set_A(w, val) ((w)->wf_array[MCD_WF_A] = (val)) +#define word_set_B(w, val) ((w)->wf_array[MCD_WF_B] = (val)) +#define word_set_C(w, val) ((w)->wf_array[MCD_WF_C] = (val)) +#define word_set_D(w, val) ((w)->wf_array[MCD_WF_D] = (val)) +#define word_set_E(w, val) ((w)->wf_array[MCD_WF_E] = (val)) +#define word_set_F(w, val) ((w)->wf_array[MCD_WF_F] = (val)) +#define word_set_G(w, val) ((w)->wf_array[MCD_WF_G] = (val)) +#define word_set_H(w, val) ((w)->wf_array[MCD_WF_H] = (val)) +#define word_set_I(w, val) ((w)->wf_array[MCD_WF_I] = (val)) +#define word_set_J(w, val) ((w)->wf_array[MCD_WF_J] = (val)) +#define word_set_K(w, val) ((w)->wf_array[MCD_WF_K] = (val)) +#define word_set_L(w, val) ((w)->wf_array[MCD_WF_L] = (val)) +#define word_set_M(w, val) ((w)->wf_array[MCD_WF_M] = (val)) +#define word_set_N(w, val) ((w)->wf_array[MCD_WF_N] = (val)) +#define word_set_O(w, val) ((w)->wf_array[MCD_WF_O] = (val)) +#define word_set_P(w, val) ((w)->wf_array[MCD_WF_P] = (val)) +#define word_set_Q(w, val) ((w)->wf_array[MCD_WF_Q] = (val)) +#define word_set_R(w, val) ((w)->wf_array[MCD_WF_R] = (val)) +#define word_set_S(w, val) ((w)->wf_array[MCD_WF_S] = (val)) +#define word_set_T(w, val) ((w)->wf_array[MCD_WF_T] = (val)) +#define word_set_U(w, val) ((w)->wf_array[MCD_WF_U] = (val)) +#define word_set_V(w, val) ((w)->wf_array[MCD_WF_V] = (val)) +#define word_set_W(w, val) ((w)->wf_array[MCD_WF_W] = (val)) +#define word_set_X(w, val) ((w)->wf_array[MCD_WF_X] = (val)) +#define word_set_Y(w, val) ((w)->wf_array[MCD_WF_Y] = (val)) +#define word_set_Z(w, val) ((w)->wf_array[MCD_WF_Z] = (val)) +#define word_set_signature(w, val) ((w)->signature = (val)) +#define word_set_index(w, val) ((w)->index = (val)) + word *word_new(char *input); word *word_create_dummy(mcd *mcd_struct); diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h index bce1b7d90fdc33d21ff4395834e2db96620e1087..91b216dff5257671fafd3f6ecbbeeb174e4350a8 100644 --- a/maca_common/include/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -13,7 +13,8 @@ #define word_buffer_get_input_file(wb) (wb)->input_file #define word_buffer_get_mcd(wb) (wb)->mcd_struct -#define word_buffer_b0(wb) (((wb)->nbelem == 0)? NULL : (wb)->array[(wb)->current_index]) +/* #define word_buffer_b0(wb) (((wb)->nbelem == 0)? NULL : (wb)->array[(wb)->current_index]) */ +#define word_buffer_b0(wb) (((wb)->current_index >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index]) #define word_buffer_b1(wb) (((wb)->current_index + 1 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 1]) #define word_buffer_b2(wb) (((wb)->current_index + 2 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 2]) #define word_buffer_b3(wb) (((wb)->current_index + 3 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 3]) @@ -24,6 +25,15 @@ #define word_buffer_nb_elts_left(wb) ((wb)->current_index) +#define word_buffer_end(wb) (((wb)->current_index >= (wb)->nbelem)? 1 : 0) +#define word_buffer_is_last(wb) (((wb)->current_index == (wb)->nbelem - 1)? 1 : 0) +#define word_buffer_is_empty(wb) (((wb)->nbelem == 0)? 1 : 0) + + + + + + typedef struct { int size; /* size of the array used to store words */ int nbelem; /* number of words in the buffer */ @@ -45,10 +55,13 @@ int word_buffer_move_right(word_buffer *wb); int word_buffer_move_left(word_buffer *wb); void word_buffer_print(FILE *f, word_buffer *wb); void word_buffer_print_compact(FILE *f, word_buffer *wb); +int word_buffer_read_sentence(word_buffer *bw); +word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct); + +/* int word_buffer_is_empty(word_buffer *wb); int word_buffer_is_last(word_buffer *wb); int word_buffer_end(word_buffer *wb); -int word_buffer_read_sentence(word_buffer *bw); -word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct); +*/ #endif diff --git a/maca_common/src/word_buffer.c b/maca_common/src/word_buffer.c index e7cb52b7b6ce4c109f6ccdb2e62bfcf446c0fa2c..bf53a2d61ef2bd273376ce2b876e0810581d7926 100644 --- a/maca_common/src/word_buffer.c +++ b/maca_common/src/word_buffer.c @@ -92,7 +92,6 @@ word *word_buffer_get_word(word_buffer *wb, int offset) word *word_buffer_get_word_n(word_buffer *wb, int n) { return ((n >=0) && (n < wb->nbelem))? wb->array[n] : NULL; - } word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct) @@ -104,7 +103,7 @@ word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct) else f = myfopen(mcf_filename, "r"); word_buffer *wb = word_buffer_new(f, mcd_struct, 0); - while(word_buffer_read_next_word(wb) != -1){ + while(word_buffer_read_next_word(wb)){ /* printf("load word %d\n", wb->nbelem - 1); */ } if(mcf_filename != NULL) @@ -112,23 +111,19 @@ word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct) return wb; } - int word_buffer_read_next_word(word_buffer *wb) { - word *w = NULL; - - w = word_read(wb->input_file, wb->mcd_struct); - /* if((w) && (w->input)) printf("## %s\n", w->input); */ - if(w == NULL) return -1; + word *w = word_read(wb->input_file, wb->mcd_struct); + if(w == NULL) return 0; word_buffer_add(wb, w); - return wb->nbelem - 1; + return 1; } int word_buffer_move_right(word_buffer *wb) { if((wb->nbelem - 1 - wb->current_index) <= wb->lookahead) word_buffer_read_next_word(wb); - if(wb->current_index >= wb->nbelem) return 0; + if(wb->current_index >= wb->nbelem) return 0; wb->current_index++; return 1; } @@ -142,23 +137,6 @@ int word_buffer_move_left(word_buffer *wb) return 0; } -int word_buffer_end(word_buffer *wb) -{ - - /* printf("in word_buffer_end current index = %d nb elem = %d\n", wb->current_index, wb->nbelem); */ - return (wb->current_index >= wb->nbelem)? 1 : 0; -} - -int word_buffer_is_last(word_buffer *wb) -{ - return (wb->current_index == wb->nbelem - 1)? 1 : 0; -} - -int word_buffer_is_empty(word_buffer *wb) -{ - return (wb->nbelem == 0)? 1 : 0; -} - int word_buffer_read_sentence(word_buffer *wb) { char buffer[10000]; @@ -166,16 +144,29 @@ int word_buffer_read_sentence(word_buffer *wb) int index = 1; while(fgets(buffer, 10000, word_buffer_get_input_file(wb))){ + if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) continue; /* ignore empty lines */ if(feof(word_buffer_get_input_file(wb))) break; - /* fprintf(stderr, "%s", buffer); */ - if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) break; /* end of the sentence */ w = word_parse_buffer(buffer, word_buffer_get_mcd(wb)); word_set_index(w, index); index++; word_buffer_add(wb, w); if(word_is_eos(w, word_buffer_get_mcd(wb))) break; } - /* return bw->nbelem - 1; */ /* because of the dummy word */ return wb->nbelem ; } +/*int word_buffer_end(word_buffer *wb) +{ + return (wb->current_index >= wb->nbelem)? 1 : 0; +} + +int word_buffer_is_last(word_buffer *wb) +{ + return (wb->current_index == wb->nbelem - 1)? 1 : 0; +} + +int word_buffer_is_empty(word_buffer *wb) +{ + return (wb->nbelem == 0)? 1 : 0; +} +*/ diff --git a/maca_tools/src/mcf2conll.c b/maca_tools/src/mcf2conll.c index 5bd2c4465a52d9b85a743ca5bb0c31c190b1671b..1eb8b1a3d49e7c6384597a99b8fa074ce7374d83 100644 --- a/maca_tools/src/mcf2conll.c +++ b/maca_tools/src/mcf2conll.c @@ -18,6 +18,23 @@ typedef struct { mcd *mcd_struct; } context; +void context_free(context *ctx) +{ + if(ctx){ + if(ctx->program_name) + free(ctx->program_name); + if(ctx->conll_filename) + free(ctx->conll_filename); + if(ctx->mcf_filename) + free(ctx->mcf_filename); + if(ctx->mcd_filename) + free(ctx->mcd_filename); + if(ctx->mcd_struct) + mcd_free(ctx->mcd_struct); + free(ctx); + } +} + context *context_new(void) { context *ctx = (context *)memalloc(sizeof(context)); @@ -44,7 +61,6 @@ void context_general_help_message(context *ctx) fprintf(stderr, "\t-o --conll : conll filename (write to stdout if absent)\n"); } - void mcf2conll_check_options(context *ctx){ if(ctx->help){ context_general_help_message(ctx); @@ -52,8 +68,6 @@ void mcf2conll_check_options(context *ctx){ } } - - context *context_read_options(int argc, char *argv[]) { int c; @@ -134,7 +148,7 @@ int main(int argc, char *argv[]) w = word_buffer_b0(wb); if(w){ - printf("%d\t", index); + fprintf(output_file, "%d\t", index); if(form_col != -1) word_print_col_n(output_file, w, form_col); @@ -194,5 +208,6 @@ int main(int argc, char *argv[]) if(ctx->conll_filename) fclose(output_file); + context_free(ctx); return 0; } diff --git a/maca_trans_parser/src/config.c b/maca_trans_parser/src/config.c index ed5e3f0b7c04f2c636f82573d79ea66389372488..d4db0fe73a86d1d1ac8955152bd55e2d9806480b 100644 --- a/maca_trans_parser/src/config.c +++ b/maca_trans_parser/src/config.c @@ -27,6 +27,10 @@ int config_is_terminal(config *c) return (word_buffer_end(config_get_buffer(c)) && (stack_is_empty(config_get_stack(c)) || (stack_top(config_get_stack(c)) == NULL))); + + /*return (word_buffer_end(config_get_buffer(c)) && + (stack_is_empty(config_get_stack(c)) || + (stack_top(config_get_stack(c)) == NULL)));*/ } void config_push_mvt(config *c, int type, word *gov, word *dep) diff --git a/maca_trans_parser/src/movements.c b/maca_trans_parser/src/movements.c index 64236b990016bcbe708d61b7bfc3ba8a7a490617..fe865416dc2c43e37d5b613e04ab271c67ac3e8e 100644 --- a/maca_trans_parser/src/movements.c +++ b/maca_trans_parser/src/movements.c @@ -9,9 +9,10 @@ int movement_eos(config *c, int movement_code) if(stack_is_empty(config_get_stack(c))) return 0; word *s0 = stack_top(config_get_stack(c)); + /* word on top of stack is already eos */ if(word_get_sent_seg(s0) == 1) return 0; - /* set word on the top of the stack to sent_seg */ + /* set word on the top of the stack to eos */ word_set_sent_seg(s0, 1); config_push_mvt(c, movement_code, s0, NULL); @@ -22,7 +23,7 @@ int movement_eos_undo(config *c) { word *gov = mvt_get_gov(mvt_stack_top(config_get_history(c))); - /* word on the top of the stack is not sent_seg anymore */ + /* word on the top of the stack is not eos anymore */ word_set_sent_seg(gov, 0); mvt_free(config_pop_mvt(c)); @@ -103,7 +104,10 @@ int movement_right_arc_undo(config *c) int movement_shift(config *c, int movement_code) { - if(word_buffer_is_empty(config_get_buffer(c))) return 0; + /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */ + if(word_buffer_end(config_get_buffer(c))) return 0; + + word *b0 = word_buffer_b0(config_get_buffer(c)); stack_push(config_get_stack(c), b0); config_push_mvt(c, movement_code, b0, NULL); diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c index 53ba95d2916cad21c2cd9ff88d541b010bf3b4fe..e523e5fb0490589e8a464dbbd0588d9ee825781d 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c @@ -36,8 +36,6 @@ void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct) } } - - void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) { int i; @@ -141,25 +139,28 @@ void simple_decoder_parser_arc_eager(context *ctx) c = config_new(f, ctx->mcd_struct, 5); while(!config_is_terminal(c)){ + if(ctx->debug_mode){ fprintf(stdout, "***********************************\n"); config_print(stdout, c); } /* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */ /* which means that the top of the stack got its eos status from input */ + /* force the parser to finish parsing the sentence (perform all pending reduce actions) and determine root of the sentence */ + if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){ word_set_sent_seg(stack_top(config_get_stack(c)), -1); - movement_parser_eos(c); while(movement_parser_reduce(c)); while(movement_parser_root(c, root_label)); - - /* mvt_code = MVT_PARSER_EOS; */ if(ctx->debug_mode) printf("force EOS\n"); } + + /* normal behavious, ask classifier what is the next movement to do and do it */ else{ config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); mvt_code = feature_table_argmax(fv, ft, &max); + if(ctx->debug_mode){ vcode *vcode_array = feature_table_get_vcode_array(fv, ft); for(int i=0; i < 3; i++){ @@ -181,7 +182,6 @@ void simple_decoder_parser_arc_eager(context *ctx) fprintf(stdout, "\t"); feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); printf("%f\n", max1 - max2); - } mvt_type = movement_parser_type(mvt_code); @@ -209,20 +209,16 @@ void simple_decoder_parser_arc_eager(context *ctx) } if(result == 0){ - if(ctx->debug_mode){ - fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); + if(ctx->debug_mode) fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); + result = movement_parser_shift(c); + if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */ + if(ctx->debug_mode) fprintf(stdout, "WARNING : cannot exectue a SHIFT emptying stack !\n"); + while(!stack_is_empty(config_get_stack(c))) + movement_parser_root(c, root_label); } - movement_parser_shift(c); } } } - /* horrible hack: force the remaining element in the stack (if any) to be the root */ - if(stack_nbelem(config_get_stack(c)) && (stack_top(config_get_stack(c)) == NULL)) - stack_pop(config_get_stack(c)); - - while(!stack_is_empty(config_get_stack(c)) && (stack_top(config_get_stack(c)) != NULL)) - movement_parser_root(c, root_label); - /* end of horrible hack */ if(!ctx->trace_mode) print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct);