diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index 1a17285e3f5a97e2e12053d7630a6d2743cfd3b5..78c431c637d3715b615a2a08f03ffa6a96befd7c 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -10,6 +10,7 @@ set(SOURCES src/context.c src/oracle.c src/oracle_tagger.c src/simple_decoder.c + src/simple_decoder_forrest.c src/simple_decoder_tagger.c src/cf_file.c src/feat_lib.c @@ -50,6 +51,11 @@ target_link_libraries(maca_trans_parser transparse) target_link_libraries(maca_trans_parser maca_common) install (TARGETS maca_trans_parser DESTINATION bin) +add_executable(maca_trans_parser_forrest ./src/decode_forrest.c) +target_link_libraries(maca_trans_parser_forrest transparse) +target_link_libraries(maca_trans_parser_forrest maca_common) +install (TARGETS maca_trans_parser_forrest DESTINATION bin) + add_executable(maca_trans_tagger ./src/decode_tagger.c) target_link_libraries(maca_trans_tagger transparse) target_link_libraries(maca_trans_tagger maca_common) diff --git a/maca_trans_parser/src/decode_forrest.c b/maca_trans_parser/src/decode_forrest.c new file mode 100644 index 0000000000000000000000000000000000000000..c29a7fe40b2c50cbfcd6ef36b64cbd93b832c13e --- /dev/null +++ b/maca_trans_parser/src/decode_forrest.c @@ -0,0 +1,124 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include<unistd.h> +#include<getopt.h> +#include"context.h" +#include"movement.h" +#include"oracle.h" +#include"feat_fct.h" +#include"feature_table.h" +#include"dico.h" +#include"beam.h" +#include"simple_decoder_forrest.h" +/*#include"dnn_decoder.h"*/ +#include"config2feat_vec.h" + +void decode_help_message(context *ctx) +{ + context_general_help_message(ctx); + context_beam_help_message(ctx); + context_conll_help_message(ctx); + fprintf(stderr, "INPUT\n"); + context_input_help_message(ctx); + context_mcd_help_message(ctx); + context_model_help_message(ctx); + context_vocabs_help_message(ctx); + context_features_model_help_message(ctx); +} + +void decode_check_options(context *ctx){ + if(ctx->help + /*!ctx->conll_filename*/ + /* || !ctx->perc_model_filename + || !ctx->mcd_filename + || !ctx->vocabs_filename + || !ctx->features_model_filename*/ + ){ + decode_help_message(ctx); + exit(1); + } +} + +void set_linguistic_resources_filenames_parser(context *ctx) +{ + char absolute_path[500]; + char absolute_filename[500]; + + absolute_path[0] = '\0'; + + if(ctx->maca_data_path) + strcat(absolute_path, ctx->maca_data_path); + else + strcat(absolute_path, getenv("MACAON_DIR")); + + strcat(absolute_path, "/"); + strcat(absolute_path, ctx->language); + strcat(absolute_path, "/bin/"); + + + if(!ctx->perc_model_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_MODEL_FILENAME); + ctx->perc_model_filename = strdup(absolute_filename); + } + + if(!ctx->vocabs_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_VOCABS_FILENAME); + ctx->vocabs_filename = strdup(absolute_filename); + } + + /* if(!ctx->mcd_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_FILENAME); + ctx->mcd_filename = strdup(absolute_filename); + }*/ + + if(!ctx->features_model_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME); + ctx->features_model_filename = strdup(absolute_filename); + } + + if(ctx->verbose){ + fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename); + fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename); + fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename); + fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename); + } +} + +int main(int argc, char *argv[]) +{ + context *ctx; + + ctx = context_read_options(argc, argv); + decode_check_options(ctx); + + set_linguistic_resources_filenames_parser(ctx); + ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose); + ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); + mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); + + ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); + + if(ctx->dico_labels == NULL){ + fprintf(stderr, "cannot find label names\n"); + return 1; + } + + ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1; + + /* load models */ + + ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); + + if(ctx->beam_width == 1){ + simple_decoder_forrest(ctx); + } + + context_free(ctx); + return 0; +} + diff --git a/maca_trans_parser/src/queue.c b/maca_trans_parser/src/queue.c index c0d162235d126c05f4d3aaed70d2e66b6a031f75..4e142d22f97005627e74d26d5cafb50fe7b81624 100644 --- a/maca_trans_parser/src/queue.c +++ b/maca_trans_parser/src/queue.c @@ -93,20 +93,6 @@ int queue_is_empty(queue *q) return (q->nbelem == 0); } -void queue_add_in_front_old(queue *q, word *w) -{ - if(q->head == 0) - q->head = q->size - 1; - else - q->head --; - q->array[q->head] = w; - q->nbelem++; - if(q->tail == q->head){ - fprintf(stderr, "queue full !\n"); - /* free(NULL); */ /* what was this !!!! */ - } -} - void queue_add_in_front(queue *q, word *w) { if(q->head == 0) @@ -138,20 +124,6 @@ void queue_double_size(queue *q) free(q2); } -void queue_add_old(queue *q, word *w) -{ - q->array[q->tail] = w; - if(q->tail == q->size-1) - q->tail = 0; - else - q->tail++; - q->nbelem++; - - if(q->tail == q->head){ - fprintf(stderr, "queue full !\n"); - } -} - void queue_add(queue *q, word *w) { q->array[q->tail] = w; diff --git a/maca_trans_parser/src/queue.h b/maca_trans_parser/src/queue.h index efb432684f51862bf5f910aaf309d16ab25b23e8..7b32f3745edc89cd7ef40aaeac6bb5d4c31287e7 100644 --- a/maca_trans_parser/src/queue.h +++ b/maca_trans_parser/src/queue.h @@ -26,8 +26,6 @@ void queue_free(queue *q); int queue_is_empty(queue *q); void queue_add(queue *q, word *w); void queue_add_in_front(queue *q, word *w); -void queue_add2(queue *q, word *w); -void queue_add_in_front2(queue *q, word *w); word *queue_remove(queue *q); void queue_print(FILE *f, queue *q); word *queue_elt_n(queue *q, int n); diff --git a/maca_trans_parser/src/simple_decoder_forrest.c b/maca_trans_parser/src/simple_decoder_forrest.c new file mode 100644 index 0000000000000000000000000000000000000000..baa808837f62581ca4cccb111264f768719c956b --- /dev/null +++ b/maca_trans_parser/src/simple_decoder_forrest.c @@ -0,0 +1,78 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include<unistd.h> +#include<getopt.h> +#include"context.h" +#include"movement.h" +#include"oracle.h" +#include"feat_fct.h" +#include"config2feat_vec.h" +#include"feature_table.h" +#include"dico.h" + +void simple_decoder_buffer_forrest(context *ctx, FILE *f, feature_table *ft, int root_label) +{ + int mvt_code; + int mvt_type; + int mvt_label; + float max; + feat_vec *fv = feat_vec_new(feature_types_nb); + config *c = config_initial(f, ctx->mcd_struct, 0); + + /* read a sentence and put it in the buffer */ + while(queue_read_sentence(c->bf, f, ctx->mcd_struct)){ + while(!config_is_terminal(c)){ + config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); + + + /* FORREST : this is where the DNN should be called */ + /* fv is the feature vector that contains the values of the features extracted from configuration c */ + /* the function returns the code of a movement (mvt_code), that is used to yield a new configuration */ + mvt_code = feature_table_argmax(fv, ft, &max); + + + mvt_type = movement_type(mvt_code); + mvt_label = movement_label(mvt_code); + + if(mvt_type == MVT_LEFT) + if(movement_left_arc(c, mvt_label, max)) + continue; + + if(mvt_type == MVT_RIGHT) + if(movement_right_arc(c, mvt_label, max)) + continue; + + movement_shift(c, 0, max); + } + + /* config_print(stdout, c); */ + + config_connect_subtrees(c, root_label); + depset_print2(stdout, c->ds, ctx->dico_labels); + + /* config_free(c); */ + c = config_initial(f, ctx->mcd_struct, 0); + } + + feat_vec_free(fv); +} + + +void simple_decoder_forrest(context *ctx) +{ + FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; + feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose); + int root_label; + + root_label = dico_string2int(ctx->dico_labels, ctx->root_label); + if(root_label == -1) root_label = 0; + + simple_decoder_buffer_forrest(ctx, f, ft, root_label); + + feature_table_free(ft); + if(ctx->input_filename) + fclose(f); + +} + diff --git a/maca_trans_parser/src/simple_decoder_forrest.h b/maca_trans_parser/src/simple_decoder_forrest.h new file mode 100644 index 0000000000000000000000000000000000000000..f1f9a38ce538d0a8587d77e1dcb974cb9b393050 --- /dev/null +++ b/maca_trans_parser/src/simple_decoder_forrest.h @@ -0,0 +1,7 @@ +#ifndef __SIMPLE_DECODER_FORREST__ +#define __SIMPLE_DECODER_FORREST__ +#include"context.h" + +void simple_decoder_forrest(context *ctx); + +#endif