From c53ae822a33eab41181e144015ef21b4c325ebd6 Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Thu, 4 Aug 2016 21:28:54 -0400 Subject: [PATCH] modified queue structure to be dynamic --- maca_trans_parser/src/beam.c | 4 +- maca_trans_parser/src/config.c | 8 +-- maca_trans_parser/src/config.h | 4 +- .../src/maca_trans_parser_conll2cff.c | 6 +- .../src/maca_trans_parser_conll2cff_tagger.c | 6 +- .../src/maca_trans_parser_conll2fann.c | 4 +- maca_trans_parser/src/queue.c | 65 +++++++++++++++++-- maca_trans_parser/src/queue.h | 5 +- maca_trans_parser/src/simple_decoder.c | 6 +- maca_trans_parser/src/simple_decoder_tagger.c | 6 +- maca_trans_parser/src/train_perceptron.c | 14 ++-- 11 files changed, 91 insertions(+), 37 deletions(-) diff --git a/maca_trans_parser/src/beam.c b/maca_trans_parser/src/beam.c index ecfc0ea..70ff2d2 100644 --- a/maca_trans_parser/src/beam.c +++ b/maca_trans_parser/src/beam.c @@ -209,7 +209,7 @@ void beam_decoder(FILE *f, mcd *mcd_struct, dico *dico_features, dico *dico_labe config *c; config *c_final; - c = config_initial(f, mcd_struct, 1000, 0); + c = config_initial(f, mcd_struct, 0); while(queue_read_sentence(c->bf, f, mcd_struct)){ c_final = beam_decoder_sentence(c, dico_features, ft, beam_width, mvt_nb, fm); @@ -218,7 +218,7 @@ void beam_decoder(FILE *f, mcd *mcd_struct, dico *dico_features, dico *dico_labe /* depset_print(stdout, c_final->ds); */ depset_print2(stdout, c_final->ds, dico_labels); config_free(c_final); - c = config_initial(f, mcd_struct, 1000, 0); + c = config_initial(f, mcd_struct, 0); } } diff --git a/maca_trans_parser/src/config.c b/maca_trans_parser/src/config.c index 15cd75c..2385b06 100644 --- a/maca_trans_parser/src/config.c +++ b/maca_trans_parser/src/config.c @@ -5,14 +5,14 @@ #include"dico.h" #include"feat_types.h" -config *config_new(FILE *f, mcd *mcd_struct, int queue_size) +config *config_new(FILE *f, mcd *mcd_struct) { config *c = (config *)memalloc(sizeof(config)); c->ds = depset_new(); c->f = f; c->mcd_struct = mcd_struct; c->st = stack_new(); - c->bf = queue_new(queue_size); + c->bf = queue_new(); c->score = 0; c->mvt_array = NULL; c->mvt_array_size = 0; @@ -51,10 +51,10 @@ int config_is_terminal(config *c) return queue_is_empty(c->bf); } -config *config_initial(FILE *f, mcd *mcd_struct, int queue_size, int lookahead) +config *config_initial(FILE *f, mcd *mcd_struct, int lookahead) { int i; - config *c = config_new(f, mcd_struct, queue_size); + config *c = config_new(f, mcd_struct); queue_add(c->bf, word_create_dummy(mcd_struct)); diff --git a/maca_trans_parser/src/config.h b/maca_trans_parser/src/config.h index 6b46e99..f14814f 100644 --- a/maca_trans_parser/src/config.h +++ b/maca_trans_parser/src/config.h @@ -32,8 +32,8 @@ typedef struct { int config_equal(config *c1, config *c2); int config_equal2(config *c1, config *c2); -config *config_new(FILE *f, mcd *mcd_struct, int queue_size); -config *config_initial(FILE *f, mcd *mcd_struct, int queue_size, int lookahead); +config *config_new(FILE *f, mcd *mcd_struct); +config *config_initial(FILE *f, mcd *mcd_struct, int lookahead); config *config_copy(config *o); void config_print(FILE *buffer, config *c); int config_is_terminal(config *c); diff --git a/maca_trans_parser/src/maca_trans_parser_conll2cff.c b/maca_trans_parser/src/maca_trans_parser_conll2cff.c index 96533a1..3a49379 100644 --- a/maca_trans_parser/src/maca_trans_parser_conll2cff.c +++ b/maca_trans_parser/src/maca_trans_parser_conll2cff.c @@ -56,7 +56,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx) FILE *conll_file = myfopen(ctx->input_filename, "r"); FILE *conll_file_ref = myfopen(ctx->input_filename, "r"); - c = config_initial(conll_file, ctx->mcd_struct, 10, 5); + c = config_initial(conll_file, ctx->mcd_struct, 5); while((ref = sentence_read(conll_file_ref , ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ /* sentence_print(stdout, ref, mcd_get_dico_label(ctx->mcd_struct)); */ @@ -123,7 +123,7 @@ void generate_training_file_buffer(FILE *output_file, context *ctx) FILE *conll_file = myfopen(ctx->input_filename, "r"); FILE *conll_file_ref = myfopen(ctx->input_filename, "r"); - c = config_initial(conll_file, ctx->mcd_struct, 1000, 0); + c = config_initial(conll_file, ctx->mcd_struct, 0); while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ /* sentence_print(stdout, ref, NULL); */ @@ -157,7 +157,7 @@ void generate_training_file_buffer(FILE *output_file, context *ctx) } } config_free(c); - c = config_initial(conll_file, ctx->mcd_struct, 1000, 0); + c = config_initial(conll_file, ctx->mcd_struct, 0); sentence_nb++; } } diff --git a/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c b/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c index 11d4760..b7d5fae 100644 --- a/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c +++ b/maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c @@ -67,7 +67,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx) FILE *conll_file_ref = myfopen(ctx->input_filename, "r"); int postag; - c = config_initial(conll_file, ctx->mcd_struct, 10, 5); + c = config_initial(conll_file, ctx->mcd_struct, 5); while((ref = sentence_read(conll_file_ref , ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ /* sentence_print(stdout, ref, mcd_get_dico_label(ctx->mcd_struct)); */ @@ -94,7 +94,7 @@ void generate_training_file_buffer(FILE *output_file, context *ctx) FILE *conll_file = myfopen(ctx->input_filename, "r"); FILE *conll_file_ref = myfopen(ctx->input_filename, "r"); int postag; - c = config_initial(conll_file, ctx->mcd_struct, 1000, 0); + c = config_initial(conll_file, ctx->mcd_struct, 0); while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ /* sentence_print(stdout, ref, NULL); */ @@ -115,7 +115,7 @@ void generate_training_file_buffer(FILE *output_file, context *ctx) movement_tagger(c, postag, 0, 0); } config_free(c); - c = config_initial(conll_file, ctx->mcd_struct, 1000, 0); + c = config_initial(conll_file, ctx->mcd_struct, 0); sentence_nb++; } } diff --git a/maca_trans_parser/src/maca_trans_parser_conll2fann.c b/maca_trans_parser/src/maca_trans_parser_conll2fann.c index 940a9ca..ca3f961 100644 --- a/maca_trans_parser/src/maca_trans_parser_conll2fann.c +++ b/maca_trans_parser/src/maca_trans_parser_conll2fann.c @@ -78,7 +78,7 @@ int generate_training_file_buffer(FILE *output_file, context *ctx) FILE *conll_file = myfopen(ctx->input_filename, "r"); FILE *conll_file_ref = myfopen(ctx->input_filename, "r"); - c = config_initial(conll_file, ctx->mcd_struct, 1000, 0); + c = config_initial(conll_file, ctx->mcd_struct, 0); while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ /* sentence_print(stdout, ref, NULL); */ @@ -109,7 +109,7 @@ int generate_training_file_buffer(FILE *output_file, context *ctx) } } config_free(c); - c = config_initial(conll_file, ctx->mcd_struct, 1000, 0); + c = config_initial(conll_file, ctx->mcd_struct, 0); sentence_nb++; } return nb_trans; diff --git a/maca_trans_parser/src/queue.c b/maca_trans_parser/src/queue.c index 9d1315f..c0d1622 100644 --- a/maca_trans_parser/src/queue.c +++ b/maca_trans_parser/src/queue.c @@ -2,7 +2,6 @@ #include"queue.h" #include"util.h" - int queue_renumber_words(queue *bf) { int i; @@ -55,7 +54,7 @@ void queue_print(FILE *f, queue *q) fprintf(f, ")\n"); } -queue *queue_new(int size) +queue *queue_new_full(int size) { queue *q = (queue *)memalloc(sizeof(queue)); q->size = size; @@ -65,10 +64,15 @@ queue *queue_new(int size) return q; } +queue *queue_new(void) +{ + return queue_new_full(10); +} + queue *queue_copy(queue *q) { int i; - queue *copy = queue_new(q->size); + queue *copy = queue_new_full(q->size); for(i=0; i < copy->size; i++) copy->array[i] = q->array[i]; copy->head = q->head; @@ -89,31 +93,78 @@ int queue_is_empty(queue *q) return (q->nbelem == 0); } -void queue_add_in_front(queue *q, word *w) +void queue_add_in_front_old(queue *q, word *w) { if(q->head == 0) q->head = q->size - 1; else q->head --; q->array[q->head] = w; + q->nbelem++; if(q->tail == q->head){ fprintf(stderr, "queue full !\n"); - free(NULL); + /* free(NULL); */ /* what was this !!!! */ } +} + +void queue_add_in_front(queue *q, word *w) +{ + if(q->head == 0) + q->head = q->size - 1; + else + q->head --; + q->array[q->head] = w; q->nbelem++; + if(q->tail == q->head){ + /* fprintf(stderr, "queue full ! increasing size\n"); */ + queue_double_size(q); + } } -void queue_add(queue *q, word *w) +void queue_double_size(queue *q) +{ + int i; + queue *q2 = queue_new_full(q->size * 2); + for(i=0; i < q->nbelem; i++) + queue_add(q2, queue_elt_n(q, i)); + + if(q->array) free(q->array); + q->size = q2->size; + q->array = q2->array; + q->head = q2->head; + q->tail = q2->tail; + q->nbelem = q2->nbelem; + + free(q2); +} + +void queue_add_old(queue *q, word *w) { q->array[q->tail] = w; if(q->tail == q->size-1) q->tail = 0; else q->tail++; - if(q->tail == q->head) + q->nbelem++; + + if(q->tail == q->head){ fprintf(stderr, "queue full !\n"); + } +} +void queue_add(queue *q, word *w) +{ + q->array[q->tail] = w; + if(q->tail == q->size-1) + q->tail = 0; + else + q->tail++; q->nbelem++; + + if(q->tail == q->head){ + /* fprintf(stderr, "queue full ! increasing size\n"); */ + queue_double_size(q); + } } word *queue_remove(queue *q) diff --git a/maca_trans_parser/src/queue.h b/maca_trans_parser/src/queue.h index 8ecd766..efb4326 100644 --- a/maca_trans_parser/src/queue.h +++ b/maca_trans_parser/src/queue.h @@ -20,14 +20,17 @@ typedef struct { int queue_renumber_words(queue *bf); int queue_read_sentence(queue *bf, FILE *f, mcd *mcd_struct); -queue *queue_new(int size); +queue *queue_new(void); queue *queue_copy(queue *q); void queue_free(queue *q); int queue_is_empty(queue *q); void queue_add(queue *q, word *w); void queue_add_in_front(queue *q, word *w); +void queue_add2(queue *q, word *w); +void queue_add_in_front2(queue *q, word *w); word *queue_remove(queue *q); void queue_print(FILE *f, queue *q); word *queue_elt_n(queue *q, int n); +void queue_double_size(queue *q); #endif diff --git a/maca_trans_parser/src/simple_decoder.c b/maca_trans_parser/src/simple_decoder.c index 6225f31..8d43e99 100644 --- a/maca_trans_parser/src/simple_decoder.c +++ b/maca_trans_parser/src/simple_decoder.c @@ -18,7 +18,7 @@ void simple_decoder_buffer(context *ctx, FILE *f, feature_table *ft, int root_la int mvt_label; float max; feat_vec *fv = feat_vec_new(feature_types_nb); - config *c = config_initial(f, ctx->mcd_struct, 10000, 0); + config *c = config_initial(f, ctx->mcd_struct, 0); /* read a sentence and put it in the buffer */ while(queue_read_sentence(c->bf, f, ctx->mcd_struct)){ @@ -45,7 +45,7 @@ void simple_decoder_buffer(context *ctx, FILE *f, feature_table *ft, int root_la depset_print2(stdout, c->ds, ctx->dico_labels); /* config_free(c); */ - c = config_initial(f, ctx->mcd_struct, 1000, 0); + c = config_initial(f, ctx->mcd_struct, 0); } feat_vec_free(fv); @@ -59,7 +59,7 @@ void simple_decoder_stream(context *ctx, FILE *f, feature_table *ft, int root_la int mvt_label; float max; feat_vec *fv = feat_vec_new(feature_types_nb); - config *c = config_initial(f, ctx->mcd_struct, 10, 5); + config *c = config_initial(f, ctx->mcd_struct, 5); /* when in stream mode, force to renumber the tokens (ugly !) */ ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1; diff --git a/maca_trans_parser/src/simple_decoder_tagger.c b/maca_trans_parser/src/simple_decoder_tagger.c index 4065b96..9f58cdd 100644 --- a/maca_trans_parser/src/simple_decoder_tagger.c +++ b/maca_trans_parser/src/simple_decoder_tagger.c @@ -31,7 +31,7 @@ void simple_decoder_buffer(context *ctx) int i; word *w = NULL; FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; - config *c = config_initial(f, ctx->mcd_struct, 10000, 0); + config *c = config_initial(f, ctx->mcd_struct, 0); /* read a sentence and put it in the buffer */ while(queue_read_sentence(c->bf, f, ctx->mcd_struct)){ @@ -54,7 +54,7 @@ void simple_decoder_buffer(context *ctx) printf("\n"); /* config_free(c); */ - c = config_initial(f, ctx->mcd_struct, 1000, 0); + c = config_initial(f, ctx->mcd_struct, 0); } if(ctx->input_filename) fclose(f); @@ -70,7 +70,7 @@ void simple_decoder_stream(context *ctx) /* when in stream mode, force to renumber the tokens (ugly !) */ ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1; - c = config_initial(f, ctx->mcd_struct, 10, 5); + c = config_initial(f, ctx->mcd_struct, 5); while(!config_is_terminal(c)){ config_print(stdout, c); config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); diff --git a/maca_trans_parser/src/train_perceptron.c b/maca_trans_parser/src/train_perceptron.c index b4433a4..e2b21bb 100644 --- a/maca_trans_parser/src/train_perceptron.c +++ b/maca_trans_parser/src/train_perceptron.c @@ -112,8 +112,8 @@ feature_table *train_perceptron(context *ctx) conll_file = myfopen(ctx->input_filename, "r"); conll_file_ref = myfopen(ctx->input_filename, "r"); - config_oracle = config_initial(conll_file, ctx->mcd_struct, 1000, 0); - /* config_pred = config_initial(conll_file, ctx->mcd_struct, 1000, 0); */ + config_oracle = config_initial(conll_file, ctx->mcd_struct, 0); + /* config_pred = config_initial(conll_file, ctx->mcd_struct, 0); */ sentence_nb = 0; while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ queue_read_sentence(config_oracle->bf, conll_file, ctx->mcd_struct); @@ -146,7 +146,7 @@ feature_table *train_perceptron(context *ctx) } } config_free(config_oracle); - config_oracle = config_initial(conll_file, ctx->mcd_struct, 1000, 0); + config_oracle = config_initial(conll_file, ctx->mcd_struct, 0); sentence_nb++; /* sentence_free(ref); */ @@ -256,8 +256,8 @@ feature_table *train_perceptron_early_update(context *ctx) conll_file2 = myfopen(ctx->input_filename, "r"); conll_file_ref = myfopen(ctx->input_filename, "r"); - config_oracle = config_initial(conll_file, ctx->mcd_struct, 1000, 0); - config_pred = config_initial(conll_file2, ctx->mcd_struct, 1000, 0); + config_oracle = config_initial(conll_file, ctx->mcd_struct, 0); + config_pred = config_initial(conll_file2, ctx->mcd_struct, 0); sentence_nb = 0; while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ @@ -327,9 +327,9 @@ feature_table *train_perceptron_early_update(context *ctx) perceptron_update_global(argmax->gfv, config_oracle, ft, ft_sum, counter++);*/ - config_pred = config_initial(conll_file2, ctx->mcd_struct, 1000, 0); + config_pred = config_initial(conll_file2, ctx->mcd_struct, 0); config_free(config_oracle); - config_oracle = config_initial(conll_file, ctx->mcd_struct, 1000, 0); + config_oracle = config_initial(conll_file, ctx->mcd_struct, 0); sentence_nb++; sentence_free(ref); -- GitLab