diff --git a/maca_common/CMakeLists.txt b/maca_common/CMakeLists.txt index 4e79f08cafb365ece442ac6006b250e730bd393d..58e4c1f2f79afc41d48a0ad738c0dc2149a11582 100644 --- a/maca_common/CMakeLists.txt +++ b/maca_common/CMakeLists.txt @@ -7,6 +7,7 @@ set(SOURCES src/util.c src/form2pos.c src/word.c src/sentence.c + src/word_buffer.c ) #compiling library diff --git a/maca_trans_parser/src/word_buffer.h b/maca_common/include/word_buffer.h similarity index 54% rename from maca_trans_parser/src/word_buffer.h rename to maca_common/include/word_buffer.h index cd41d1729e6d59abb91e5b8d92b1d024c80e4b53..47f2928a5662757e9cf0af2f9caf7d29768320f8 100644 --- a/maca_trans_parser/src/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -5,6 +5,22 @@ #include"word.h" #include"mcd.h" + +#define word_buffer_get_size(wb) (wb)->size +#define word_buffer_get_nbelem(wb) (wb)->nbelem +#define word_buffer_get_lookahead(wb) (wb)->lookahead +#define word_buffer_get_current_index(wb) (wb)->current_index +#define word_buffer_get_input_file(wb) (wb)->input_file +#define word_buffer_get_mcd(wb) (wb)->input_mcd_struct + +#define word_buffer_b0(wb) ((wb)->nbelem == 0)? NULL : (wb)->array[(wb)->current_index] +#define word_buffer_b1(wb) ((wb)->current_index + 1 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 1] +#define word_buffer_b2(wb) ((wb)->current_index + 2 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 2] +#define word_buffer_b3(wb) ((wb)->current_index + 3 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 3] +#define word_buffer_bm1(wb) ((wb)->current_index - 1 < 0)? NULL : (wb)->array[(wb)->current_index - 1] +#define word_buffer_bm2(wb) ((wb)->current_index - 2 < 0)? NULL : (wb)->array[(wb)->current_index - 2] +#define word_buffer_bm3(wb) ((wb)->current_index - 3 < 0)? NULL : (wb)->array[(wb)->current_index - 3] + typedef struct { int size; /* size of the array used to store words */ int nbelem; /* number of words in the buffer */ @@ -20,6 +36,11 @@ word_buffer *word_buffer_new(FILE *input_file, mcd *mcd_struct, int lookahead); void word_buffer_free(word_buffer *wb); int word_buffer_add(word_buffer *wb, word *w); word *word_buffer_get_word_relative(word_buffer *wb, int dist); +int word_buffer_read_next_word(word_buffer *wb); +int word_buffer_move_right(word_buffer *wb); +int word_buffer_move_left(word_buffer *wb); +void word_buffer_print(FILE *f, word_buffer *wb); +/* word *word_buffer_b0(word_buffer *wb); word *word_buffer_b1(word_buffer *wb); word *word_buffer_b2(word_buffer *wb); @@ -27,9 +48,7 @@ word *word_buffer_b3(word_buffer *wb); word *word_buffer_bm1(word_buffer *wb); word *word_buffer_bm2(word_buffer *wb); word *word_buffer_bm3(word_buffer *wb); -int word_buffer_read_next_word(word_buffer *wb); -int word_buffer_move_right(word_buffer *wb); -int word_buffer_move_left(word_buffer *wb); -void word_buffer_print(FILE *f, word_buffer *wb); +*/ + #endif diff --git a/maca_trans_parser/src/word_buffer.c b/maca_common/src/word_buffer.c similarity index 99% rename from maca_trans_parser/src/word_buffer.c rename to maca_common/src/word_buffer.c index fe35fc5d7179268c10439c71648a76da19fdbf89..74e1a0b0086aa68466aeea3ec338c97671f3abc7 100644 --- a/maca_trans_parser/src/word_buffer.c +++ b/maca_common/src/word_buffer.c @@ -64,6 +64,41 @@ word *word_buffer_get_word(word_buffer *wb, int offset) return ((wb->current_index + offset >=0) && (wb->current_index + offset <= wb->nbelem))? wb->array[wb->current_index + offset] : NULL; } + +int word_buffer_read_next_word(word_buffer *wb) +{ + word *w = NULL; + int index; + + w = word_read(wb->input_file, wb->mcd_struct); + if(w == NULL) return -1; + + + index = word_buffer_add(wb, w); + word_set_relative_index(w, index); + return index; +} + +int word_buffer_move_right(word_buffer *wb) +{ + if((wb->nbelem - 1 - wb->current_index) <= wb->lookahead) + word_buffer_read_next_word(wb); + if(wb->current_index == wb->nbelem - 1) return 0; + wb->current_index++; + return 1; +} + +int word_buffer_move_left(word_buffer *wb) +{ + if(wb->current_index > 0){ + wb->current_index--; + return 1; + } + return 0; +} + + +/* word *word_buffer_b0(word_buffer *wb) { return(wb->nbelem == 0)? NULL : wb->array[wb->current_index]; @@ -98,37 +133,5 @@ word *word_buffer_bm3(word_buffer *wb) { return(wb->current_index - 3 < 0)? NULL : wb->array[wb->current_index - 3]; } - -int word_buffer_read_next_word(word_buffer *wb) -{ - word *w = NULL; - int index; - - w = word_read(wb->input_file, wb->mcd_struct); - if(w == NULL) return -1; - - - index = word_buffer_add(wb, w); - word_set_relative_index(w, index); - return index; -} - -int word_buffer_move_right(word_buffer *wb) -{ - if((wb->nbelem - 1 - wb->current_index) <= wb->lookahead) - word_buffer_read_next_word(wb); - if(wb->current_index == wb->nbelem - 1) return 0; - wb->current_index++; - return 1; -} - -int word_buffer_move_left(word_buffer *wb) -{ - if(wb->current_index > 0){ - wb->current_index--; - return 1; - } - return 0; -} - +*/ diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index cf264aafcfea1f97ce6ddadb656b9d1218b9ce05..a7c7988797d35ebd6132f4722c79142955287823 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -18,7 +18,6 @@ set(SOURCES src/context.c src/queue.c src/beam.c src/feat_types.c - src/word_buffer.c ) #compiling library diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index fe4f6205fa1ae91fc29347156aacb48e32d72663..bbdbc3da3a6e0a7608ac35fb4780669a44ecd1e3 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -57,7 +57,7 @@ context *context_new(void) ctx->f2p_filename = NULL; ctx->maca_data_path = NULL; - ctx->language = strdup("fr"); + ctx->language = strdup("fr_stream"); ctx->root_label = strdup("root"); ctx->d_perceptron_features = NULL; @@ -67,7 +67,6 @@ context *context_new(void) ctx->dico_labels = NULL; ctx->f2p = NULL; - ctx->iteration_nb = 4; ctx->debug_mode = 0; ctx->feature_cutoff = 0; @@ -77,7 +76,7 @@ context *context_new(void) ctx->beam_width = 1; ctx->sent_nb = 1000000; ctx->hidden_neurons_nb = 100; - ctx->stream_mode = 0; + ctx->stream_mode = 1; ctx->conll = 0; ctx->ifpls = 1; diff --git a/maca_trans_parser/src/depset.c b/maca_trans_parser/src/depset.c index ee7f0fdaf77a51892ab28ac9143dbf1c0c313ccf..3ce6b4776925c1e2320b651adff6193ff7361b06 100644 --- a/maca_trans_parser/src/depset.c +++ b/maca_trans_parser/src/depset.c @@ -75,11 +75,21 @@ void depset_print2(FILE *f, depset *d, dico *dico_labels) { int i; int distance; - + char *label; for(i=1; i < d->length; i++){ if((d->array[i].gov) && (d->array[i].dep)){ distance = word_get_relative_index(d->array[i].gov) - word_get_relative_index(d->array[i].dep); - fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, distance, dico_int2string(dico_labels, d->array[i].label)); + /* fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, distance, dico_int2string(dico_labels, d->array[i].label)); */ + + label = dico_int2string(dico_labels, d->array[i].label); + fprintf(f, "%s\t%d\t%s\t", d->array[i].dep->input, distance, label); + if(!strcmp(label, "eos")) + fprintf(f, "1\n"); + else + fprintf(f, "0\n"); + + + } } } diff --git a/maca_trans_parser/src/maca_trans_parser_mcf2cff.c b/maca_trans_parser/src/maca_trans_parser_mcf2cff.c index 773d985935240478efeeda08fe19d775f95914f5..4b980dedde4336acda32f8e96b27edfe5f47083c 100644 --- a/maca_trans_parser/src/maca_trans_parser_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_parser_mcf2cff.c @@ -59,9 +59,9 @@ void generate_training_file_stream(FILE *output_file, context *ctx) c = config_initial(conll_file, ctx->mcd_struct, 5); while((ref = sentence_read(conll_file_ref , ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ - /* sentence_print(stdout, ref, ctx->dico_labels); */ + /* sentence_print(stdout, ref, ctx->dico_labels); */ while(1){ - /* config_print(stdout,c); */ + /* config_print(stdout,c); */ config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); /* feat_vec_print(stdout, fv); */ diff --git a/maca_trans_parser/src/simple_decoder_tagger.c b/maca_trans_parser/src/simple_decoder_tagger.c index 579cc64406644c14657c9b40dd6977edd0bc8610..dd4eb79c9ee333a9670425206db32d038e8210c5 100644 --- a/maca_trans_parser/src/simple_decoder_tagger.c +++ b/maca_trans_parser/src/simple_decoder_tagger.c @@ -73,7 +73,7 @@ void simple_decoder_stream(context *ctx) float max; word *w; dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS"); - + c = config_initial_no_dummy_word(f, ctx->mcd_struct, 5); while(!config_is_terminal(c)){ if(ctx->f2p)