diff --git a/CMakeLists.txt b/CMakeLists.txt index bd2ed1c48dec24215fc2d5d5afab5ca11ea4884e..519d490e30ab130f7f42de5e9cf13720fe50cf58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 2.8.7) project(macaon2) add_definitions("-Wall") +find_package(FLEX) include_directories(maca_common/include) include_directories(perceptron/lib/include) @@ -10,6 +11,7 @@ add_subdirectory(maca_common) add_subdirectory(maca_tools) add_subdirectory(perceptron) #add_subdirectory(maca_lemmatizer) +add_subdirectory(maca_tokenizer) add_subdirectory(maca_trans_parser) add_subdirectory(maca_crf_tagger) add_subdirectory(maca_graph_parser) diff --git a/maca_graph_parser/maca_graph_parser.c b/maca_graph_parser/maca_graph_parser.c index 48699723a61b5a827537ff1dd2433c4973a90b4e..bc066ca3d83aac65d7c9b9176b8ec966ca6c8359 100644 --- a/maca_graph_parser/maca_graph_parser.c +++ b/maca_graph_parser/maca_graph_parser.c @@ -336,7 +336,6 @@ maca_graph_parser_ctx * maca_graph_parser_LoadCTX(int argc, char ** argv) { void maca_graph_parser_init(maca_graph_parser_ctx * ctx) { - int i; /* lexicon */ /* used only in maca_graph_parser_sentence, at the moment */ /* filename in cfg */ diff --git a/maca_graph_parser/maca_graph_parser_alphabet.c b/maca_graph_parser/maca_graph_parser_alphabet.c index c96f164c833f855e48a25a1846ed0689b58342c1..c162196e3ab8237c457b76531e96c4d2a635739a 100644 --- a/maca_graph_parser/maca_graph_parser_alphabet.c +++ b/maca_graph_parser/maca_graph_parser_alphabet.c @@ -92,8 +92,6 @@ char * maca_graph_parser_alphabet_get_symbol(maca_graph_parser_alphabet *a, int void maca_graph_parser_alphabet_print4(char *filename, maca_graph_parser_alphabet *a1, maca_graph_parser_alphabet *a2, maca_graph_parser_alphabet *a3, maca_graph_parser_alphabet *a4) { FILE *f; - int i; - char *symbol; if(filename == NULL) f = stdout; @@ -116,8 +114,6 @@ void maca_graph_parser_alphabet_print4(char *filename, maca_graph_parser_alphabe void maca_graph_parser_alphabet_print5(char *filename, maca_graph_parser_alphabet *a1, maca_graph_parser_alphabet *a2, maca_graph_parser_alphabet *a3, maca_graph_parser_alphabet *a4, maca_graph_parser_alphabet *a5) { FILE *f; - int i; - char *symbol; if(filename == NULL) f = stdout; @@ -224,7 +220,6 @@ maca_graph_parser_alphabet **maca_graph_parser_alphabet_load5(char *filename) maca_graph_parser_alphabet *maca_graph_parser_alphabet_load(char *filename) { FILE *f; - int i; char symbol[1000]; maca_graph_parser_alphabet *a = NULL; diff --git a/maca_graph_parser/maca_graph_parser_corpora.c b/maca_graph_parser/maca_graph_parser_corpora.c index ad90a37e3c75166018cf08351209749eec83a11c..03869a5ef1f814170027f17560bf437009aaf558 100644 --- a/maca_graph_parser/maca_graph_parser_corpora.c +++ b/maca_graph_parser/maca_graph_parser_corpora.c @@ -162,7 +162,6 @@ hyp_ref_vector *load_mcf_corpus(maca_graph_parser_ctx *ctx){ hyp_ref_vector *v = allocate_hyp_ref_vector(ctx->sent_nb); maca_graph_parser_sentence *ref_s = NULL; maca_graph_parser_sentence *hyp_s = NULL; - int col_id; maca_mcf *format; maca_mcf_column *column; char buffer[128]; diff --git a/maca_graph_parser/maca_graph_parser_decode_main.c b/maca_graph_parser/maca_graph_parser_decode_main.c index b5b7bbe7344bd3cdfd5f4c7b14aed816d7a692b6..cbe41c9d132f06cccc710cedfcce7848096f98ba 100644 --- a/maca_graph_parser/maca_graph_parser_decode_main.c +++ b/maca_graph_parser/maca_graph_parser_decode_main.c @@ -31,7 +31,7 @@ int main(int argc, char **argv) { - char c; + /* char c; */ maca_graph_parser_ctx * ctx; maca_graph_parser_sentence *sentence; int sent_num; diff --git a/maca_graph_parser/maca_graph_parser_decoder2.c b/maca_graph_parser/maca_graph_parser_decoder2.c index 6320294cb682a93aedd3bbba627cf6ac67b39dc5..4494e68fb251f8dbca4484b2efb42814955dd5b0 100644 --- a/maca_graph_parser/maca_graph_parser_decoder2.c +++ b/maca_graph_parser/maca_graph_parser_decoder2.c @@ -237,7 +237,7 @@ void maca_graph_parser_decoder2_decode(maca_graph_parser_ctx *ctx, maca_graph_pa /* */ Closed *max_C; Open *max_O; - int argmax_i; + /* int argmax_i; */ int label_argmax; Open *cand_O; float score_cand_O; diff --git a/maca_graph_parser/maca_graph_parser_dep_count_table.c b/maca_graph_parser/maca_graph_parser_dep_count_table.c index 93e7c3c4e44a8822f30929095065ddea405cb07a..1acde6e880d9d104fe7c281ab13712e8bfb79f20 100644 --- a/maca_graph_parser/maca_graph_parser_dep_count_table.c +++ b/maca_graph_parser/maca_graph_parser_dep_count_table.c @@ -110,7 +110,8 @@ void maca_graph_parser_dep_count_table_print(maca_graph_parser_ctx * ctx, char * { FILE *f; maca_graph_parser_dep_count_table t = ctx->dep_count_table; - int gov, dep, label, dir, count, length_class; + int gov, dep, label, length_class; + /* int dir, count; */ if(filename == NULL) f = stdout; diff --git a/maca_graph_parser/maca_graph_parser_feature_table.c b/maca_graph_parser/maca_graph_parser_feature_table.c index 50166231677f14a2a11ffe202fbeff1de8d7f55f..cf644822ad265edabdd7a0ae70c30ea7cab33ec6 100644 --- a/maca_graph_parser/maca_graph_parser_feature_table.c +++ b/maca_graph_parser/maca_graph_parser_feature_table.c @@ -71,7 +71,7 @@ void maca_graph_parser_feature_table_fill(maca_graph_parser_ctx *ctx, maca_graph feat_vector *fv_grandchildren = NULL; feat_vector *fv_sibling = NULL; - int labels_nb = ctx->labels_nb; + /* int labels_nb = ctx->labels_nb; */ /* default edge label: joker */ int dft_label = ctx->fct_joker; @@ -324,10 +324,10 @@ void maca_graph_parser_feature_table_fill(maca_graph_parser_ctx *ctx, maca_graph void maca_graph_parser_feature_table_free(maca_graph_parser_ctx *ctx) { - int i,j,k,l; + /* int i,j,k,l; */ maca_graph_parser_feature_table *d = ctx->feature_table; - int length = d->len; - int types = d->typesLen; + /* int length = d->len; */ + /* int types = d->typesLen; */ if(ctx->basic_features){ free(d->pl[0][0]); diff --git a/maca_graph_parser/maca_graph_parser_features.c b/maca_graph_parser/maca_graph_parser_features.c index 9b04fae44fb5ff69a5355785dbb4aa3894fca393..1b07c621f883ee56b706d471598987772fc1fc0b 100644 --- a/maca_graph_parser/maca_graph_parser_features.c +++ b/maca_graph_parser/maca_graph_parser_features.c @@ -160,7 +160,7 @@ templ *maca_graph_parser_templ_allocator(int v0, int v1, int v2, int v3, int v4, { int start = 0; int end; - int l; + /* int l; */ templ *t = malloc(sizeof(templ)); if(t == NULL){ fprintf(stderr, "memory allocation error\n"); @@ -658,7 +658,7 @@ feat_vector *first(maca_graph_parser_sentence *s, maca_graph_parser_ctx *ctx, in int subcat_feats_nb = s->synt_feats_nb[gov]; int *subcat_feats_array = s->synt_feats_array[gov]; - int i,j; + int i; /* fprintf(stderr, "extract first order features : gov : (%d,%s,%s) dep :(%d,%s,%s)\n", gov, */ @@ -1036,7 +1036,7 @@ feat_vector *grandchildren(maca_graph_parser_sentence *s, maca_graph_parser_ctx int subcat_feats_nb = s->synt_feats_nb[gov]; int *subcat_feats_array = s->synt_feats_array[gov]; - int i,j; + int i; if(gdep == -1){ diff --git a/maca_graph_parser/maca_graph_parser_hyperdecoder.c b/maca_graph_parser/maca_graph_parser_hyperdecoder.c index 50b98ec4c2f720aeaac8514c1e9f8f522a7a6150..c8834756ac0ae744268b7401d84d9c8e42cf09ea 100644 --- a/maca_graph_parser/maca_graph_parser_hyperdecoder.c +++ b/maca_graph_parser/maca_graph_parser_hyperdecoder.c @@ -259,7 +259,7 @@ void maca_graph_parser_hyperdecoder_init(maca_graph_parser_ctx *ctx, maca_graph_ int m; int j[2] = {0, 0}; /* min dep count */ - int dep_count; + /* int dep_count; */ /* test: default edge label */ /* int dft_label = maca_tags_get_code(ctx->cfg, "morpho", "fct", "__JOKER__"); */ int dft_label = maca_alphabet_get_code(ctx->labels_alphabet, "__JOKER__"); @@ -723,7 +723,7 @@ void find_kbest(maca_graph_parser_ctx *ctx, Vertex *v, int k, maca_graph_parser_ */ int i; - int j[2] = {-1, -1}; + /* int j[2] = {-1, -1}; */ int ja; int jb; heap *cand; diff --git a/maca_graph_parser/maca_graph_parser_main.c b/maca_graph_parser/maca_graph_parser_main.c index ea3c6379fda08eb893d756b9fcbc2534c585ba0b..08d281ca52e516028c0c0b2b313145cb8e38e983 100644 --- a/maca_graph_parser/maca_graph_parser_main.c +++ b/maca_graph_parser/maca_graph_parser_main.c @@ -75,7 +75,7 @@ int main(int argc, char **argv) void maca_graph_parser_decode_main(maca_graph_parser_ctx * ctx) { maca_alphabet_array *alpha_array; - int i; + /* int i; */ int sent_num; /*maca_sentence * ms;*/ diff --git a/maca_graph_parser/maca_graph_parser_print_model_main.c b/maca_graph_parser/maca_graph_parser_print_model_main.c index 20023da84268b1339e318bb9e395a9050b6e5e96..9c9dd246ec9590d72034ecd206b401b9c2a140d3 100644 --- a/maca_graph_parser/maca_graph_parser_print_model_main.c +++ b/maca_graph_parser/maca_graph_parser_print_model_main.c @@ -9,7 +9,7 @@ int main(int argc, char *argv[]) int i,j; FILE *f = stdout; maca_graph_parser_ctx * ctx; - int hval; + /* int hval; */ maca_alphabet_array *alpha_array; maca_graph_parser_model *model = NULL; diff --git a/maca_graph_parser/maca_graph_parser_sentence.c b/maca_graph_parser/maca_graph_parser_sentence.c index d50cc4f1d24d89652aa3c27e62768324d1f50698..269baaedf9209fad00ae090007a544a826b1a207 100644 --- a/maca_graph_parser/maca_graph_parser_sentence.c +++ b/maca_graph_parser/maca_graph_parser_sentence.c @@ -436,7 +436,7 @@ maca_mcf_sentence *maca_graph_parser_read_mcf_sentence(maca_graph_parser_ctx *ct int length; maca_mcf_word *mcf_word = NULL; int index, k; - int code_postag, code_lemma, code_form, code_label, code_synt_feat, gov; + int code_postag, code_lemma, code_form, code_label, gov; int nb_synt_feats; int *synt_feats; char invalid_sentence; diff --git a/maca_graph_parser/simple_parser.cc b/maca_graph_parser/simple_parser.cc index f2821c5c498fbbce5e300f0a5f0a0a623e11aa93..15ca59f4f2689054aeb225476ebc9e61f7d29f92 100644 --- a/maca_graph_parser/simple_parser.cc +++ b/maca_graph_parser/simple_parser.cc @@ -97,8 +97,8 @@ macaon::Parser::Parser( ctx = maca_graph_parser_LoadCTX(argc, (char**) argv); ctx->verbose_flag = verbose_flag; - int i; - int sent_num; + // int i; + //int sent_num; /* alphabets */ /* load alphabets */ diff --git a/maca_tokenizer/CMakeLists.txt b/maca_tokenizer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d524f5061adb3d2ec447218d48ca430163941960 --- /dev/null +++ b/maca_tokenizer/CMakeLists.txt @@ -0,0 +1,4 @@ +FLEX_TARGET(tokenizer tok_rules.l ${CMAKE_CURRENT_BINARY_DIR}/maca_tokenizer.c) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +add_executable(maca_tokenizer main.c ${FLEX_tokenizer_OUTPUTS}) +install (TARGETS maca_tokenizer DESTINATION bin) diff --git a/maca_tokenizer/main.c b/maca_tokenizer/main.c new file mode 100644 index 0000000000000000000000000000000000000000..f0bef4ca78b9ae992a5cd6fb7a24a17600e157ac --- /dev/null +++ b/maca_tokenizer/main.c @@ -0,0 +1,7 @@ +int main(int argc, char* argv[]) { + + yylex() ; + + return 0; +} + diff --git a/maca_tokenizer/tok_rules.l b/maca_tokenizer/tok_rules.l new file mode 100644 index 0000000000000000000000000000000000000000..d60af0ee7f05e26fa5bb0718370c3581dbfc5fee --- /dev/null +++ b/maca_tokenizer/tok_rules.l @@ -0,0 +1,16 @@ +%{ +#include <stdio.h> +%} +%option noyywrap +%% +" "+ printf("\n"); +\. printf("\n."); +\, printf("\n,"); +' printf("'\n"); +’ printf("'\n"); +\n+ printf("\n"); +du printf("de\nle"); +des printf("de\nles"); +au printf("à\nle"); +aux printf("à\nles"); +%% diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index a64e8f8fb8c9ae4e1c0bae0f1755e4d83862a4c4..18eea288854f1103bcf85ffa2d439941a172ca50 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -102,6 +102,12 @@ target_link_libraries(maca_trans_tagger transparse) target_link_libraries(maca_trans_tagger maca_common) install (TARGETS maca_trans_tagger DESTINATION bin) +add_executable(maca_trans_tagger_bt ./src/maca_trans_tagger_bt.c) +target_link_libraries(maca_trans_tagger_bt perceptron) +target_link_libraries(maca_trans_tagger_bt transparse) +target_link_libraries(maca_trans_tagger_bt maca_common) +install (TARGETS maca_trans_tagger_bt DESTINATION bin) + #add_executable(maca_trans_parser_train ./src/train_perceptron.c) #target_compile_options(maca_trans_parser_train INTERFACE -Wall) #target_link_libraries(maca_trans_parser_train perceptron) diff --git a/maca_trans_parser/src/feat_fct.c b/maca_trans_parser/src/feat_fct.c index 392fece07f8219134749ce759ad6e9de09e96ae3..608c53baa35fe23793a780235c8a78bfdf7b8117 100644 --- a/maca_trans_parser/src/feat_fct.c +++ b/maca_trans_parser/src/feat_fct.c @@ -656,7 +656,7 @@ int mvt1(config *c) int delta1(config *c) { if(c->vcode_array == NULL) return -1; - int delta = (int) (c->vcode_array[0].score - c->vcode_array[1].score); + int delta = (int) (c->vcode_array[0].score - c->vcode_array[1].score); return (delta >= 10)? 10: delta; } diff --git a/maca_trans_parser/src/maca_trans_tagger_mcf2cff_bt.c b/maca_trans_parser/src/maca_trans_tagger_mcf2cff_bt.c index 68eb95f7af388637b924ee40f99d300f4812c9e0..364120e34b65acb713bb5083db04fdc7691ab59f 100644 --- a/maca_trans_parser/src/maca_trans_tagger_mcf2cff_bt.c +++ b/maca_trans_parser/src/maca_trans_tagger_mcf2cff_bt.c @@ -70,79 +70,100 @@ void generate_training_file(FILE *output_file, context *ctx) FILE *conll_file = myfopen(ctx->input_filename, "r"); FILE *conll_file_predicted = myfopen(ctx->input_filename, "r"); int postag_oracle; - dico *dico_pos_oracle = dico_vec_get_dico(ctx->vocabs, (char *)"POS"); + /* dico *dico_pos_oracle = dico_vec_get_dico(ctx->vocabs, (char *)"POS"); */ feat_model *local_feat_model = feat_model_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.fm", ctx->verbose); dico_vec *local_dico_vec = dico_vec_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.vocab", ctx->hash_ratio); - dico *dico_pos_local = dico_vec_get_dico(local_dico_vec, (char *)"POS"); + /* dico *dico_pos_local = dico_vec_get_dico(local_dico_vec, (char *)"POS"); */ feature_table *local_ft = feature_table_load("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.model", ctx->verbose); dico *local_perceptron_features = dico_vec_get_dico(local_dico_vec, (char *)"d_perceptron_features"); config *config_predicted; int postag_predicted; - int i; - char *postag_oracle_string; - char *postag_predicted_string; - + /* int i; */ + /* char *postag_oracle_string; */ + /* char *postag_predicted_string; */ + config_predicted = config_new(conll_file_predicted, ctx->mcd_struct, 5); config_oracle = config_new(conll_file, ctx->mcd_struct, 5); - + while(!config_is_terminal(config_oracle)){ if(ctx->f2p){ add_signature_to_words_in_word_buffer(config_oracle->bf, ctx->f2p); add_signature_to_words_in_word_buffer(config_predicted->bf, ctx->f2p); } - + postag_oracle = word_get_pos(word_buffer_b0(config_get_buffer(config_oracle))); - postag_oracle_string = dico_int2string(dico_pos_oracle, postag_oracle); + /* postag_oracle_string = dico_int2string(dico_pos_oracle, postag_oracle); */ config2feat_vec_cff(local_feat_model, config_predicted, local_perceptron_features, fv, ctx->mode); - + if(config_predicted->vcode_array) free(config_predicted->vcode_array); config_predicted->vcode_array = feature_table_get_vcode_array(fv, local_ft); postag_predicted = config_predicted->vcode_array[0].class_code; - postag_predicted_string = dico_int2string(dico_pos_local, postag_predicted); - + /* postag_predicted_string = dico_int2string(dico_pos_local, postag_predicted); */ + if(ctx->debug_mode){ - if(strcmp(postag_oracle_string, postag_predicted_string)){ + /* if(strcmp(postag_oracle_string, postag_predicted_string)){ */ + if(postag_oracle != postag_predicted){ fprintf(stdout, "**************** DIFFERENTS ***********\n"); fprintf(stdout, "%s\n", word_get_input(word_buffer_b0(config_get_buffer(config_oracle)))); } } - + + forward(config_predicted, postag_predicted); forward(config_oracle, postag_oracle); - - if(!strcmp(postag_oracle_string, postag_predicted_string)){ + + fprintf(output_file, "%d", postag_oracle); + config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv, ctx->mode); + feat_vec_print(output_file, fv); + word_set_pos(word_buffer_bm1(config_predicted->bf), postag_oracle); + } +} +#if 0 +/* if(!strcmp(postag_oracle_string, postag_predicted_string)){ */ + if(postag_oracle == postag_predicted){ fprintf(output_file, "0"); config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv, ctx->mode); feat_vec_print(output_file, fv); + if(ctx->debug_mode){ + printf("CHOOSE 0\n"); + } } - + + if(postag_oracle == postag_predicted) + printf("CORRECT %d\n", delta2(config_predicted)); + else + printf("WRONG %d\n", delta2(config_predicted)); + int choice = 1; - while(strcmp(postag_oracle_string, postag_predicted_string) && (choice < 3)){ - - if(ctx->debug_mode){ - fprintf(stdout, "%d postag oracle = %s postag predicted = %s\n", - word_buffer_get_current_index(config_get_buffer(config_oracle)), - dico_int2string(dico_pos_oracle, postag_oracle), - dico_int2string(dico_pos_local, postag_predicted)); - - for(i=0; i < 5; i++) - fprintf(stdout, "%d\t%s\t%.4f\t%.4f\n", i, - dico_int2string(dico_pos_local, config_predicted->vcode_array[i].class_code), - config_predicted->vcode_array[i].score, - config_predicted->vcode_array[i].score - config_predicted->vcode_array[0].score); - fprintf(stdout, "CHOICE %d\n", choice); - } + /* while(strcmp(postag_oracle_string, postag_predicted_string) && (choice < 10)){ */ + while((postag_oracle != postag_predicted) && (choice < 10)){ + if(ctx->debug_mode){ + if(choice == 1){ + fprintf(stdout, "%d postag oracle = %s postag predicted = %s\n", + word_buffer_get_current_index(config_get_buffer(config_oracle)), + dico_int2string(dico_pos_oracle, postag_oracle), + dico_int2string(dico_pos_local, postag_predicted)); + + for(i=0; i < 5; i++) + fprintf(stdout, "%d\t%s\t%.4f\t%.4f\n", i, + dico_int2string(dico_pos_local, config_predicted->vcode_array[i].class_code), + config_predicted->vcode_array[i].score, + config_predicted->vcode_array[i].score - config_predicted->vcode_array[0].score); + } + /* fprintf(stdout, "CHOICE %d\n", choice); */ + } postag_predicted = config_predicted->vcode_array[choice].class_code; - postag_predicted_string = dico_int2string(dico_pos_local, postag_predicted); - - if(!strcmp(postag_predicted_string, postag_oracle_string)){ - if(ctx->debug_mode){ - printf("GOOD CHOICE\n"); - } + /* postag_predicted_string = dico_int2string(dico_pos_local, postag_predicted); */ + + /* if(!strcmp(postag_predicted_string, postag_oracle_string)){ */ + if(postag_predicted == postag_oracle){ + if(ctx->debug_mode){ + printf("CHOOSE %d\n", choice); + } fprintf(output_file, "%d", choice); config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv, ctx->mode); feat_vec_print(output_file, fv); @@ -152,7 +173,7 @@ void generate_training_file(FILE *output_file, context *ctx) } } } - +#endif int main(int argc, char *argv[]) { context *ctx; @@ -163,24 +184,26 @@ int main(int argc, char *argv[]) ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose); - if(ctx->mode == TRAIN_MODE){ + /* if(ctx->mode == TRAIN_MODE){ mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->input_filename); ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct); } - else if(ctx->mode == TEST_MODE){ - ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); - mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); - } + else if(ctx->mode == TEST_MODE){*/ + + ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); + mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); + + /* } */ feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb); /* in train mode create feature dictionnary for perceptron */ if(ctx->mode == TRAIN_MODE) - ctx->d_perceptron_features = dico_new((char *)"d_perceptron_features", 10000000); + ctx->d_perceptron_features = dico_new((char *)"d_perceptron_features_bt", 10000000); /* in test mode read feature dictionnary for perceptron */ if(ctx->mode == TEST_MODE) - ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); + ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features_bt"); /* add the feature dictionnary to the dico vector */ dico_vec_add(ctx->vocabs, ctx->d_perceptron_features); @@ -193,11 +216,11 @@ int main(int argc, char *argv[]) generate_training_file(output_file, ctx); - if(ctx->mode == TRAIN_MODE){ + /* if(ctx->mode == TRAIN_MODE){ */ /* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */ dico_vec_print(ctx->vocabs_filename, ctx->vocabs); - } + /* } */ if(ctx->cff_filename) fclose(output_file); diff --git a/maca_trans_parser/src/simple_decoder_tagger_bt.c b/maca_trans_parser/src/simple_decoder_tagger_bt.c index 5d28c28e1db0202dd21d51db0d96b81b8eb9bcda..5cfc385e6472fd6558e967ae31173c2b0e184aaf 100644 --- a/maca_trans_parser/src/simple_decoder_tagger_bt.c +++ b/maca_trans_parser/src/simple_decoder_tagger_bt.c @@ -69,9 +69,10 @@ void simple_decoder_tagger2(context *ctx) int postag; feat_model *local_feat_model = feat_model_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.fm", ctx->verbose); - dico_vec *local_dico_vec = dico_vec_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.vocab", ctx->hash_ratio); - dico *local_dico_pos = dico_vec_get_dico(local_dico_vec, (char *)"POS"); - dico *local_perceptron_features = dico_vec_get_dico(local_dico_vec, (char *)"d_perceptron_features"); + /* dico_vec *local_dico_vec = dico_vec_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.vocab", ctx->hash_ratio); */ + ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features_bt"); + dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS"); + dico *local_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); feature_table *local_ft = feature_table_load("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.model", ctx->verbose); c = config_new(f, ctx->mcd_struct, 5); @@ -101,7 +102,7 @@ void simple_decoder_tagger2(context *ctx) if(ctx->debug_mode){ fprintf(stderr, "apply local model\n"); for(int i=0; i < 5; i++) - fprintf(stderr, "%d\t%s\t%.4f\n", i, dico_int2string(local_dico_pos, c->vcode_array[i].class_code), c->vcode_array[i].score); + fprintf(stderr, "%d\t%s\t%.4f\n", i, dico_int2string(dico_pos, c->vcode_array[i].class_code), c->vcode_array[i].score); } forward(c, postag); @@ -109,23 +110,26 @@ void simple_decoder_tagger2(context *ctx) /* apply global model */ config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); - vcode *vcode_array = feature_table_get_vcode_array(fv, ft); if(ctx->debug_mode){ fprintf(stderr, "apply global model\n"); for(int i=0; i < 3; i++) - fprintf(stderr, "%d\t%d\t%.4f\n", i, vcode_array[i].class_code, vcode_array[i].score); + /* fprintf(stderr, "%d\t%d\t%.4f\n", i, vcode_array[i].class_code, vcode_array[i].score); */ + fprintf(stderr, "%d\t%s\t%.4f\n", i, dico_int2string(dico_pos, vcode_array[i].class_code), vcode_array[i].score); } - int choice = vcode_array[0].class_code; + int choice = vcode_array[0].class_code; + word_set_pos(word_buffer_bm1(c->bf), choice); + /* if(choice != 0){ postag = c->vcode_array[choice].class_code; choice_n(c, choice); - } + }*/ free(vcode_array); /* } */ - print_word2(word_buffer_bm1(c->bf), ctx->mcd_struct, local_dico_pos, postag); + /* print_word2(word_buffer_bm1(c->bf), ctx->mcd_struct, dico_pos, postag); */ + print_word2(word_buffer_bm1(c->bf), ctx->mcd_struct, dico_pos, choice); } /* config_print(stdout, c); */