diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index 4300e4acf9e4be79bc0598dde057b715e6a2d88d..7ab956ffb2918d7a4b222e1f75e879ad9fd932c4 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -333,105 +333,4 @@ context *context_read_options(int argc, char *argv[]) return ctx; } -void context_set_linguistic_resources_filenames_parser(context *ctx) -{ - char absolute_path[500]; - char absolute_filename[500]; - - absolute_path[0] = '\0'; - - if(ctx->maca_data_path) - strcat(absolute_path, ctx->maca_data_path); - else - strcat(absolute_path, getenv("MACAON_DIR")); - - strcat(absolute_path, "/"); - strcat(absolute_path, ctx->language); - strcat(absolute_path, "/bin/"); - - - if(!ctx->perc_model_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_MODEL_FILENAME); - ctx->perc_model_filename = strdup(absolute_filename); - } - - if(!ctx->vocabs_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_VOCABS_FILENAME); - ctx->vocabs_filename = strdup(absolute_filename); - } - - /* if(!ctx->mcd_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_FILENAME); - ctx->mcd_filename = strdup(absolute_filename); - }*/ - - if(!ctx->features_model_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME); - ctx->features_model_filename = strdup(absolute_filename); - } - - fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename); - fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename); - fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename); - fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename); -} - -void context_set_linguistic_resources_filenames_tagger(context *ctx) -{ - char absolute_path[500]; - char absolute_filename[500]; - absolute_path[0] = '\0'; - - if(ctx->maca_data_path) - strcat(absolute_path, ctx->maca_data_path); - else - strcat(absolute_path, getenv("MACAON_DIR")); - - strcat(absolute_path, "/"); - strcat(absolute_path, ctx->language); - strcat(absolute_path, "/bin/"); - - - if(!ctx->perc_model_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME); - ctx->perc_model_filename = strdup(absolute_filename); - } - - if(!ctx->vocabs_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_FILENAME); - ctx->vocabs_filename = strdup(absolute_filename); - } - - /* if(!ctx->mcd_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_TAGGER_FILENAME); - ctx->mcd_filename = strdup(absolute_filename); - }*/ - - if(!ctx->features_model_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_FEATURES_MODEL_TAGGER_FILENAME); - ctx->features_model_filename = strdup(absolute_filename); - } - - if(!ctx->f2p_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_F2P_FILENAME); - ctx->f2p_filename = strdup(absolute_filename); - ctx->f2p = form2pos_read(ctx->f2p_filename); - } - - fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename); - fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename); - fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename); - fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename); - fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename); - -} diff --git a/maca_trans_parser/src/context.h b/maca_trans_parser/src/context.h index ff174133009135a6feac56049d62fa49748fa087..2b8bdb075bca021c1a198fde8e1a51c27204edf0 100644 --- a/maca_trans_parser/src/context.h +++ b/maca_trans_parser/src/context.h @@ -93,10 +93,4 @@ void context_maca_data_path_help_message(context *ctx); void context_f2p_filename_help_message(context *ctx); -void context_set_linguistic_resources_filenames_tagger(context *ctx); -void context_set_linguistic_resources_filenames_parser(context *ctx); - - - - #endif diff --git a/maca_trans_parser/src/decode.c b/maca_trans_parser/src/decode.c index aabe26a6db463d362eae0cb536ccc15faa9ed3ba..299a640e2974ed88b369ac28220b4280933c936f 100644 --- a/maca_trans_parser/src/decode.c +++ b/maca_trans_parser/src/decode.c @@ -42,6 +42,54 @@ void decode_check_options(context *ctx){ } } +void set_linguistic_resources_filenames_parser(context *ctx) +{ + char absolute_path[500]; + char absolute_filename[500]; + + absolute_path[0] = '\0'; + + if(ctx->maca_data_path) + strcat(absolute_path, ctx->maca_data_path); + else + strcat(absolute_path, getenv("MACAON_DIR")); + + strcat(absolute_path, "/"); + strcat(absolute_path, ctx->language); + strcat(absolute_path, "/bin/"); + + + if(!ctx->perc_model_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_MODEL_FILENAME); + ctx->perc_model_filename = strdup(absolute_filename); + } + + if(!ctx->vocabs_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_VOCABS_FILENAME); + ctx->vocabs_filename = strdup(absolute_filename); + } + + /* if(!ctx->mcd_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_FILENAME); + ctx->mcd_filename = strdup(absolute_filename); + }*/ + + if(!ctx->features_model_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME); + ctx->features_model_filename = strdup(absolute_filename); + } + + fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename); + fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename); + fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename); + fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename); +} + + int main(int argc, char *argv[]) { FILE *conll_file = NULL; @@ -53,7 +101,7 @@ int main(int argc, char *argv[]) ctx = context_read_options(argc, argv); decode_check_options(ctx); - context_set_linguistic_resources_filenames_parser(ctx); + set_linguistic_resources_filenames_parser(ctx); ctx->features_model = feat_model_read(ctx->features_model_filename); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); diff --git a/maca_trans_parser/src/decode_tagger.c b/maca_trans_parser/src/decode_tagger.c index 722cdd773b3c42399b649a1c8b2113b0a2948912..99112baddb3b22919a0a4a064a9be4e79a57b396 100644 --- a/maca_trans_parser/src/decode_tagger.c +++ b/maca_trans_parser/src/decode_tagger.c @@ -42,6 +42,64 @@ void decode_check_options(context *ctx){ } } + +void set_linguistic_resources_filenames_tagger(context *ctx) +{ + char absolute_path[500]; + char absolute_filename[500]; + + absolute_path[0] = '\0'; + + if(ctx->maca_data_path) + strcat(absolute_path, ctx->maca_data_path); + else + strcat(absolute_path, getenv("MACAON_DIR")); + + strcat(absolute_path, "/"); + strcat(absolute_path, ctx->language); + strcat(absolute_path, "/bin/"); + + + if(!ctx->perc_model_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME); + ctx->perc_model_filename = strdup(absolute_filename); + } + + if(!ctx->vocabs_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_FILENAME); + ctx->vocabs_filename = strdup(absolute_filename); + } + + /* if(!ctx->mcd_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_TAGGER_FILENAME); + ctx->mcd_filename = strdup(absolute_filename); + }*/ + + if(!ctx->features_model_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_FEATURES_MODEL_TAGGER_FILENAME); + ctx->features_model_filename = strdup(absolute_filename); + } + + if(!ctx->f2p_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_F2P_FILENAME); + ctx->f2p_filename = strdup(absolute_filename); + ctx->f2p = form2pos_read(ctx->f2p_filename); + } + + fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename); + fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename); + fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename); + fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename); + fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename); + +} + + int main(int argc, char *argv[]) { FILE *conll_file = NULL; @@ -50,7 +108,7 @@ int main(int argc, char *argv[]) ctx = context_read_options(argc, argv); decode_check_options(ctx); - context_set_linguistic_resources_filenames_tagger(ctx); + set_linguistic_resources_filenames_tagger(ctx); ctx->features_model = feat_model_read(ctx->features_model_filename); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs); diff --git a/maca_trans_parser/src/maca_trans_parser_conll2fann.c b/maca_trans_parser/src/maca_trans_parser_conll2fann.c index a1745d2053e39ccca87c4d9d0795bc0a6eb73baa..73ad593b50ead0f033379a31fc619a612b64a757 100644 --- a/maca_trans_parser/src/maca_trans_parser_conll2fann.c +++ b/maca_trans_parser/src/maca_trans_parser_conll2fann.c @@ -27,13 +27,13 @@ void transform_treebank_help_message(context *ctx) context_mcd_help_message(ctx); context_features_model_help_message(ctx); fprintf(stderr, "IN TEST MODE\n"); - context_alphabet_help_message(ctx); + context_vocabs_help_message(ctx); fprintf(stderr, "OUTPUT\n"); context_cff_help_message(ctx); context_fann_help_message(ctx); fprintf(stderr, "IN TRAIN MODE\n"); - context_alphabet_help_message(ctx); + context_vocabs_help_message(ctx); } @@ -140,6 +140,8 @@ int main(int argc, char *argv[]) ctx = context_read_options(argc, argv); transform_treebank_check_options(ctx); + + ctx->features_model = feat_model_read(ctx->features_model_filename); if(ctx->mode == TRAIN_MODE){ mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename); @@ -176,6 +178,15 @@ int main(int argc, char *argv[]) fprintf(output_file, "%d", nb_trans); fclose(output_file); + + if(ctx->mode == TRAIN_MODE){ + /* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */ + dico_vec_print(ctx->vocabs_filename, ctx->vocabs); + + } + + + context_free(ctx); return 0; }