Skip to content
Snippets Groups Projects
Commit 11029e07 authored by Johannes Heinecke's avatar Johannes Heinecke
Browse files

Merge branch 'master' of https://gitlab.lif.univ-mrs.fr/alexis.nasr/macaon2 into johannes

parents 903e8c2f 4843bed3
No related branches found
No related tags found
1 merge request!5Johannes: wrapping to use macaon_trans_tagger, macaon_trans_lemmatizer and macaon_trans_parser within python or java programmes
Showing
with 629 additions and 22 deletions
......@@ -22,6 +22,7 @@ add_subdirectory(perceptron)
#add_subdirectory(maca_lemmatizer)
add_subdirectory(maca_trans_parser)
add_subdirectory(maca_crf_tagger)
add_subdirectory(maca_graph_parser)
if(MACA_EXPORT)
add_subdirectory(maca_export)
......
#add_definitions("-std=c++0x")
SET( CMAKE_CXX_FLAGS "-std=c++0x")
set(SOURCES
./array.c
./hash.c
./maca_alphabet.cc
./maca_alphabet_wrapper.cc
./maca_common.c
./maca_graph_parser_alphabet.c
./maca_graph_parser.c
./maca_graph_parser_conll2007_format.c
./maca_graph_parser_corpora.c
./maca_graph_parser_decoder1.c
./maca_graph_parser_decoder2.c
./maca_graph_parser_decoder.c
./maca_graph_parser_dep_count_table.c
./maca_graph_parser_feature_counter_array.c
./maca_graph_parser_feature_counter.c
./maca_graph_parser_features.c
./maca_graph_parser_feature_table.c
./maca_graph_parser_feature_vector.c
./maca_graph_parser_hash.c
./maca_graph_parser_heapq.c
./maca_graph_parser_hyperdecoder.c
./maca_graph_parser_hypergraph.c
./maca_graph_parser_metrics.c
./maca_graph_parser_model.c
./maca_graph_parser_sentence.c
./maca_mcf.cc
./maca_mcf_wrapper.cc
./maca_msg.c
./simple_parser.cc
./maca_graph_parser_train.c
)
#compiling library
include_directories(.)
add_library(graph_parser STATIC ${SOURCES})
#target_link_libraries(graph_parser perceptron)
#compiling, linking and installing executables
add_executable(test_simple_parser ./test_simple_parser.cc)
target_link_libraries(test_simple_parser graph_parser)
install (TARGETS test_simple_parser DESTINATION bin)
add_executable(maca_graph_parser_decode ./maca_graph_parser_decode_main.c)
target_link_libraries(maca_graph_parser_decode graph_parser)
install (TARGETS maca_graph_parser_decode DESTINATION bin)
add_executable(maca_graph_parser_eval ./maca_graph_parser_eval_main.c)
target_link_libraries(maca_graph_parser_eval graph_parser)
install (TARGETS maca_graph_parser_eval DESTINATION bin)
add_executable(maca_graph_parser ./maca_graph_parser_main.c)
target_link_libraries(maca_graph_parser graph_parser)
install (TARGETS maca_graph_parser DESTINATION bin)
add_executable(maca_graph_parser_print_model ./maca_graph_parser_print_model_main.c)
target_link_libraries(maca_graph_parser_print_model graph_parser)
install (TARGETS maca_graph_parser_print_model DESTINATION bin)
add_executable(maca_graph_parser_resize_model ./maca_graph_parser_resize_model_main.c)
target_link_libraries(maca_graph_parser_resize_model graph_parser)
install (TARGETS maca_graph_parser_resize_model DESTINATION bin)
add_executable(maca_graph_parser_train ./maca_graph_parser_train_main.c)
target_link_libraries(maca_graph_parser_train graph_parser)
install (TARGETS maca_graph_parser_train DESTINATION bin)
......@@ -15,6 +15,7 @@ set(SOURCES src/context.c
src/simple_decoder_tagparser_arc_eager.c
# src/simple_decoder_forrest.c
src/simple_decoder_tagger.c
src/simple_decoder_tagger_bt.c
src/feat_lib.c
src/stack.c
src/config2feat_vec.c
......@@ -41,6 +42,12 @@ target_link_libraries(maca_trans_tagger_mcf2cff transparse)
target_link_libraries(maca_trans_tagger_mcf2cff maca_common)
install (TARGETS maca_trans_tagger_mcf2cff DESTINATION bin)
add_executable(maca_trans_tagger_mcf2cff_bt ./src/maca_trans_tagger_mcf2cff_bt.c)
target_link_libraries(maca_trans_tagger_mcf2cff_bt perceptron)
target_link_libraries(maca_trans_tagger_mcf2cff_bt transparse)
target_link_libraries(maca_trans_tagger_mcf2cff_bt maca_common)
install (TARGETS maca_trans_tagger_mcf2cff_bt DESTINATION bin)
#add_executable(maca_trans_parser_mcf2fann ./src/maca_trans_parser_mcf2fann.c)
#target_link_libraries(maca_trans_parser_mcf2fann perceptron)
#target_link_libraries(maca_trans_parser_mcf2fann transparse)
......
......@@ -11,6 +11,8 @@ config *config_new(FILE *f, mcd *mcd_struct, int lookahead)
c->st = stack_new();
c->bf = word_buffer_new(f, mcd_struct, lookahead);
c->history = mvt_stack_new();
c->mvt_chosen = 0;
c->vcode_array = NULL;
return c;
}
......
......@@ -8,6 +8,7 @@
#include"mcd.h"
#include"word_buffer.h"
#include"mvt_stack.h"
#include"feature_table.h"
#define config_get_stack(c) (c)->st
#define config_get_buffer(c) (c)->bf
......@@ -17,6 +18,8 @@ typedef struct {
stack *st; /* the stack */
word_buffer *bf; /* the buffer */
mvt_stack *history; /* movement sequence that led to this configuration */
int mvt_chosen;
vcode *vcode_array;
} config;
config *config_new(FILE *f, mcd *mcd_struct, int lookahead);
......
......@@ -156,6 +156,16 @@ int b0g(config *c) {return (word_get_gov(word_buffer_b0(config_get_buffer(c))) =
int b0sf(config *c) {return word_get_label(word_buffer_b0(config_get_buffer(c)));}
int b0len(config *c) {
int len = 0;
word *w = word_buffer_b0(config_get_buffer(c));
if(w->input)
len = strlen(w->input);
return (len > 7)? 7 : len;
}
int b0f(config *c) {return word_get_form(word_buffer_b0(config_get_buffer(c)));}
int b0l(config *c) {return word_get_lemma(word_buffer_b0(config_get_buffer(c)));}
int b0c(config *c) {return word_get_cpos(word_buffer_b0(config_get_buffer(c)));}
......@@ -628,3 +638,50 @@ int t4(config *c) /* previous transition */
mvt *m = mvt_stack_3(config_get_history(c));
return (m == NULL)? -1 : mvt_get_type(m);
}
int mvt0(config *c)
{
if(c->vcode_array == NULL) return -1;
return c->vcode_array[0].class_code;
}
int mvt1(config *c)
{
if(c->vcode_array == NULL) return -1;
return c->vcode_array[1].class_code;
}
int delta1(config *c)
{
if(c->vcode_array == NULL) return -1;
int delta = (int) (c->vcode_array[0].score - c->vcode_array[1].score);
return (delta >= 10)? 10: delta;
}
int mvt2(config *c)
{
if(c->vcode_array == NULL) return -1;
return c->vcode_array[2].class_code;
}
int delta2(config *c)
{
if(c->vcode_array == NULL) return -1;
int delta = (int) (c->vcode_array[0].score - c->vcode_array[2].score);
return (delta >= 10)? 10: delta;
}
int mvt3(config *c)
{
if(c->vcode_array == NULL) return -1;
return c->vcode_array[3].class_code;
}
int delta3(config *c)
{
if(c->vcode_array == NULL) return -1;
int delta = (int) (c->vcode_array[0].score - c->vcode_array[3].score);
return (delta >= 10)? 10: delta;
}
......@@ -157,6 +157,7 @@ int s3r(config *c);
int b0g(config *c);
int b0sf(config *c);
int b0len(config *c);
int b0f(config *c);
......@@ -443,4 +444,14 @@ int t3(config *c);
int t4(config *c);
int mvt0(config *c);
int mvt1(config *c);
int delta1(config *c);
int mvt2(config *c);
int delta2(config *c);
int mvt3(config *c);
int delta3(config *c);
#endif
......@@ -191,6 +191,8 @@ feat_lib *feat_lib_build(void)
feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"b0g", b0g);
feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"b0sf", b0sf);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"b0len", b0len);
feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"b0f", b0f);
feat_lib_add(fl, FEAT_TYPE_LEMMA, (char *)"b0l", b0l);
feat_lib_add(fl, FEAT_TYPE_CPOS, (char *)"b0c", b0c);
......@@ -469,6 +471,16 @@ feat_lib *feat_lib_build(void)
feat_lib_add(fl, FEAT_TYPE_TRANS, (char *)"t3", t3);
feat_lib_add(fl, FEAT_TYPE_TRANS, (char *)"t4", t4);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"mvt0", mvt0);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"mvt1", mvt1);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"delta1", delta1);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"mvt2", mvt2);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"delta2", delta2);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"mvt3", mvt3);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"delta3", delta3);
return fl;
}
......
......@@ -96,16 +96,15 @@ void generate_training_file(FILE *output_file, context *ctx)
c = config_new(conll_file, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){
/* config_print(stdout,c); */
if(ctx->f2p)
/*add_signature_to_words_in_word_buffer(c->bf, ctx->f2p, dico_pos); */
add_signature_to_words_in_word_buffer(c->bf, ctx->f2p);
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
postag = oracle_tagger(c, NULL);
postag = oracle_tagger(c);
fprintf(output_file, "%d", postag);
feat_vec_print(output_file, fv);
int res = movement_tagger(c, postag);
if(res == 0) break;
movement_tagger(c, postag);
}
}
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"movement_tagger.h"
#include"oracle_tagger.h"
#include"feat_fct.h"
#include"feature_table.h"
#include"context.h"
#include"feat_vec.h"
#include"dico_vec.h"
#include"word_emb.h"
#include"config2feat_vec.h"
void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p)
{
int i;
word *w;
char lower_form[100];
for(i = word_buffer_get_nbelem(bf) - 1; i >=0 ; i--){
w = word_buffer_get_word_n(bf, i);
if(word_get_signature(w) != -1) break;
w->signature = form2pos_get_signature(f2p, w->form);
if(w->signature == -1){
strcpy(lower_form, w->form);
to_lower_string(lower_form);
w->signature = form2pos_get_signature(f2p, lower_form);
}
}
}
void maca_trans_parser_mcf2cff_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
context_mcd_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
}
void maca_trans_parser_mcf2cff_check_options(context *ctx)
{
if(!ctx->input_filename
|| ctx->help
/* || !ctx->mcd_filename */
|| !(ctx->cff_filename || ctx->fann_filename)
){
maca_trans_parser_mcf2cff_help_message(ctx);
exit(1);
}
}
void generate_training_file(FILE *output_file, context *ctx)
{
config *config_oracle;
feat_vec *fv = feat_vec_new(feature_types_nb);
FILE *conll_file = myfopen(ctx->input_filename, "r");
FILE *conll_file_predicted = myfopen(ctx->input_filename, "r");
int postag_oracle;
dico *dico_pos_oracle = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
feat_model *local_feat_model = feat_model_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.fm", ctx->verbose);
dico_vec *local_dico_vec = dico_vec_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.vocab", ctx->hash_ratio);
dico *dico_pos_local = dico_vec_get_dico(local_dico_vec, (char *)"POS");
feature_table *local_ft = feature_table_load("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.model", ctx->verbose);
dico *local_perceptron_features = dico_vec_get_dico(local_dico_vec, (char *)"d_perceptron_features");
config *config_predicted;
int postag_predicted;
int i;
char *postag_oracle_string;
char *postag_predicted_string;
config_predicted = config_new(conll_file_predicted, ctx->mcd_struct, 5);
config_oracle = config_new(conll_file, ctx->mcd_struct, 5);
while(!config_is_terminal(config_oracle)){
if(ctx->f2p){
add_signature_to_words_in_word_buffer(config_oracle->bf, ctx->f2p);
add_signature_to_words_in_word_buffer(config_predicted->bf, ctx->f2p);
}
postag_oracle = word_get_pos(word_buffer_b0(config_get_buffer(config_oracle)));
postag_oracle_string = dico_int2string(dico_pos_oracle, postag_oracle);
config2feat_vec_cff(local_feat_model, config_predicted, local_perceptron_features, fv, ctx->mode);
if(config_predicted->vcode_array)
free(config_predicted->vcode_array);
config_predicted->vcode_array = feature_table_get_vcode_array(fv, local_ft);
postag_predicted = config_predicted->vcode_array[0].class_code;
postag_predicted_string = dico_int2string(dico_pos_local, postag_predicted);
if(ctx->debug_mode){
if(strcmp(postag_oracle_string, postag_predicted_string)){
fprintf(stdout, "**************** DIFFERENTS ***********\n");
fprintf(stdout, "%s\n", word_get_input(word_buffer_b0(config_get_buffer(config_oracle))));
}
}
forward(config_predicted, postag_predicted);
forward(config_oracle, postag_oracle);
if(!strcmp(postag_oracle_string, postag_predicted_string)){
fprintf(output_file, "0");
config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv, ctx->mode);
feat_vec_print(output_file, fv);
}
int choice = 1;
while(strcmp(postag_oracle_string, postag_predicted_string) && (choice < 3)){
if(ctx->debug_mode){
fprintf(stdout, "%d postag oracle = %s postag predicted = %s\n",
word_buffer_get_current_index(config_get_buffer(config_oracle)),
dico_int2string(dico_pos_oracle, postag_oracle),
dico_int2string(dico_pos_local, postag_predicted));
for(i=0; i < 5; i++)
fprintf(stdout, "%d\t%s\t%.4f\t%.4f\n", i,
dico_int2string(dico_pos_local, config_predicted->vcode_array[i].class_code),
config_predicted->vcode_array[i].score,
config_predicted->vcode_array[i].score - config_predicted->vcode_array[0].score);
fprintf(stdout, "CHOICE %d\n", choice);
}
postag_predicted = config_predicted->vcode_array[choice].class_code;
postag_predicted_string = dico_int2string(dico_pos_local, postag_predicted);
if(!strcmp(postag_predicted_string, postag_oracle_string)){
if(ctx->debug_mode){
printf("GOOD CHOICE\n");
}
fprintf(output_file, "%d", choice);
config2feat_vec_cff(ctx->features_model, config_predicted, ctx->d_perceptron_features, fv, ctx->mode);
feat_vec_print(output_file, fv);
choice_n(config_predicted, choice);
}
choice++;
}
}
}
int main(int argc, char *argv[])
{
context *ctx;
FILE *output_file;
ctx = context_read_options(argc, argv);
maca_trans_parser_mcf2cff_check_options(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
if(ctx->mode == TRAIN_MODE){
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->input_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
}
else if(ctx->mode == TEST_MODE){
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
}
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
/* in train mode create feature dictionnary for perceptron */
if(ctx->mode == TRAIN_MODE)
ctx->d_perceptron_features = dico_new((char *)"d_perceptron_features", 10000000);
/* in test mode read feature dictionnary for perceptron */
if(ctx->mode == TEST_MODE)
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
/* add the feature dictionnary to the dico vector */
dico_vec_add(ctx->vocabs, ctx->d_perceptron_features);
/* open output file */
if(ctx->cff_filename)
output_file = myfopen(ctx->cff_filename, "w");
else
output_file = stdout;
generate_training_file(output_file, ctx);
if(ctx->mode == TRAIN_MODE){
/* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */
dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
}
if(ctx->cff_filename)
fclose(output_file);
context_free(ctx);
return 0;
}
......@@ -11,3 +11,26 @@ int movement_tagger(config *c, int postag)
return 1;
}
int forward(config *c, int postag)
{
word_set_pos(word_buffer_b0(c->bf), postag);
word_buffer_move_right(c->bf);
c->mvt_chosen = 0;
return 1;
}
int choice_n(config *c, int n)
{
word_set_pos(word_buffer_bm1(c->bf), c->vcode_array[n].class_code);
c->mvt_chosen = n;
return 1;
}
int backward(config *c)
{
word_set_pos(word_buffer_b0(c->bf), -1);
word_buffer_move_left(c->bf);
return 1;
}
......@@ -5,4 +5,12 @@
#include"feat_vec.h"
int movement_tagger(config *c, int postag);
int forward(config *c, int postag);
int next_choice(config *c, int postag);
int backward(config *c);
int choice_n(config *c, int n);
#endif
#include"oracle_tagger.h"
int oracle_tagger(config *c, sentence *ref)
int oracle_tagger(config *c)
{
word *b0; /* next word in the bufer */
/* int b0_index; */
int b0_pos;
if(!word_buffer_is_empty(c->bf)){
b0 = word_buffer_b0(c->bf);
b0_pos = word_get_pos(b0);
/* printf("b0_pos = %d\n", b0_pos); */
/* b0_index = word_get_index(b0); */
/* return word_get_pos(ref->words[b0_index]); */
return b0_pos;
}
return -1;
return word_get_pos(word_buffer_b0(config_get_buffer(c)));
}
......@@ -4,8 +4,7 @@
#include<stdio.h>
#include<stdlib.h>
#include"config.h"
#include"sentence.h"
int oracle_tagger(config *c, sentence *ref);
int oracle_tagger(config *c);
#endif
......@@ -82,6 +82,7 @@ void print_word(word *w, mcd *mcd_struct, dico *dico_pos, int postag)
}
}
#if 1
void simple_decoder_tagger(context *ctx)
{
config *c;
......@@ -138,3 +139,83 @@ void simple_decoder_tagger(context *ctx)
config_free(c);
if (ctx->input_filename) fclose(f);
}
#endif
#if 0
void simple_decoder_tagger(context *ctx)
{
config *c;
feat_vec *fv = feat_vec_new(feature_types_nb);
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
int postag;
float max;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
c = config_new(f, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){
if(ctx->f2p)
/* add_signature_to_words_in_word_buffer(c->bf, ctx->f2p, dico_pos); */
add_signature_to_words_in_word_buffer(c->bf, ctx->f2p);
if(ctx->debug_mode){
fprintf(stderr, "***********************************\n");
fprintf(stderr, "b0 lex = %d\n", word_get_form(word_buffer_b0(config_get_buffer(c))));
config_print(stderr, c);
}
/* config_print(stdout, c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
/* feat_vec_print(stdout, fv); */
postag = feature_table_argmax(fv, ft, &max);
/* printf("postag = %d\n", postag); */
if(ctx->debug_mode){
vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
for(int i=0; i < 3; i++){
fprintf(stderr, "%d\t", i);
fprintf(stderr, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_array[i].class_code), vcode_array[i].score);
}
free(vcode_array);
}
word_set_pos(word_buffer_b0(config_get_buffer(c)), postag);
if((word_buffer_b0(config_get_buffer(c)))->index > 0){
/* word_buffer_move_left(config_get_buffer(c)); */
word_buffer_move_left(config_get_buffer(c));
int postag_old = word_get_pos(word_buffer_b0(config_get_buffer(c)));
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
int postag_new = feature_table_argmax(fv, ft, &max);
if(ctx->debug_mode){
fprintf(stderr, "***********************************\n");
fprintf(stderr, "b1p = %s\n", dico_int2string(dico_pos, b1p(c)));
fprintf(stderr, "bm1p = %s\n", dico_int2string(dico_pos, bm1p(c)));
fprintf(stderr, "b0 index = %d\n", word_get_index(word_buffer_b0(config_get_buffer(c))));
fprintf(stderr, "b0 lex = %d\n", word_get_form(word_buffer_b0(config_get_buffer(c))));
config_print(stderr, c);
vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
for(int i=0; i < 3; i++){
fprintf(stderr, "%d\t", i);
fprintf(stderr, "%s\t%.4f\n", dico_int2string(dico_pos, vcode_array[i].class_code), vcode_array[i].score);
}
free(vcode_array);
}
if(postag_new != postag_old)
fprintf(stderr, "postag old = %s postag_new = %s\n", dico_int2string(dico_pos, postag_old), dico_int2string(dico_pos, postag_new));
word_set_pos(word_buffer_b0(c->bf), postag_new);
print_word(word_buffer_b0(c->bf), ctx->mcd_struct, dico_pos, postag_new);
word_buffer_move_right(config_get_buffer(c));
}
word_buffer_move_right(config_get_buffer(c));
}
config_free(c);
}
#endif
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include<ctype.h>
#include"context.h"
#include"movement_tagger.h"
#include"feat_fct.h"
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"
#include"mcd.h"
void add_signature_to_words_in_word_buffer2(word_buffer *bf, form2pos *f2p)
{
int i;
word *w;
char lower_form[100];
for(i = word_buffer_get_nbelem(bf) - 1; i >=0 ; i--){
w = word_buffer_get_word_n(bf, i);
if(word_get_signature(w) != -1) break;
w->signature = form2pos_get_signature(f2p, w->form);
if(w->signature == -1){
strcpy(lower_form, w->form);
to_lower_string(lower_form);
w->signature = form2pos_get_signature(f2p, lower_form);
}
}
}
void print_word2(word *w, mcd *mcd_struct, dico *dico_pos, int postag)
{
char *buffer = NULL;
char *token = NULL;
int col_nb = 0;
if(mcd_get_pos_col(mcd_struct) == -1){
printf("%s\t%s\n", w->input, dico_int2string(dico_pos, postag));
}
else{
buffer = strdup(w->input);
token = strtok(buffer, "\t");
col_nb = 0;
while(token){
if(col_nb != 0) printf("\t");
if(col_nb == mcd_get_pos_col(mcd_struct))
printf("%s", dico_int2string(dico_pos, postag));
else
word_print_col_n(stdout, w, col_nb);
col_nb++;
token = strtok(NULL, "\t");
}
if(col_nb <= mcd_get_pos_col(mcd_struct))
printf("\t%s", dico_int2string(dico_pos, postag));
printf("\n");
free(buffer);
}
}
void simple_decoder_tagger2(context *ctx)
{
config *c;
feat_vec *fv = feat_vec_new(2);
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
int postag;
feat_model *local_feat_model = feat_model_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.fm", ctx->verbose);
dico_vec *local_dico_vec = dico_vec_read("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.vocab", ctx->hash_ratio);
dico *local_dico_pos = dico_vec_get_dico(local_dico_vec, (char *)"POS");
dico *local_perceptron_features = dico_vec_get_dico(local_dico_vec, (char *)"d_perceptron_features");
feature_table *local_ft = feature_table_load("/home/alexis/maca_data2/fr/bin/maca_trans_tagger.model", ctx->verbose);
c = config_new(f, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){
if(ctx->f2p)
/* add_signature_to_words_in_word_buffer2(c->bf, ctx->f2p, dico_pos); */
add_signature_to_words_in_word_buffer2(c->bf, ctx->f2p);
/* postag = word_get_pos(word_buffer_b0(c->bf)); */
if(ctx->debug_mode){
fprintf(stderr, "***********************************\n");
config_print(stderr, c);
}
/* if postag is not specified in input it is predicted */
/* if(postag == -1){ */
/* apply local model */
config2feat_vec_cff(local_feat_model, c, local_perceptron_features, fv, LOOKUP_MODE);
if(c->vcode_array) free(c->vcode_array);
c->vcode_array = feature_table_get_vcode_array(fv, local_ft);
postag = c->vcode_array[0].class_code;
if(ctx->debug_mode){
fprintf(stderr, "apply local model\n");
for(int i=0; i < 5; i++)
fprintf(stderr, "%d\t%s\t%.4f\n", i, dico_int2string(local_dico_pos, c->vcode_array[i].class_code), c->vcode_array[i].score);
}
forward(c, postag);
/* apply global model */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
if(ctx->debug_mode){
fprintf(stderr, "apply global model\n");
for(int i=0; i < 3; i++)
fprintf(stderr, "%d\t%d\t%.4f\n", i, vcode_array[i].class_code, vcode_array[i].score);
}
int choice = vcode_array[0].class_code;
if(choice != 0){
postag = c->vcode_array[choice].class_code;
choice_n(c, choice);
}
free(vcode_array);
/* } */
print_word2(word_buffer_bm1(c->bf), ctx->mcd_struct, local_dico_pos, postag);
}
/* config_print(stdout, c); */
config_free(c);
}
......@@ -325,8 +325,10 @@ vcode* feature_table_get_vcode_array(feat_vec *fv, feature_table* ft)
table[cla].score = 0;
table[cla].class_code = cla;
for(feat = 0; feat < fv->nb;feat++){
if(fv->t[feat] >= ft->features_nb) continue;
if(fv->t[feat] == -1) continue;
table[cla].score += ft->table[fv->t[feat]][cla];
table[cla].score +=
ft->table[fv->t[feat]][cla];
}
}
table[classes_nb].class_code = -1;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment