Skip to content
Snippets Groups Projects
Commit 72eba5aa authored by Alexis Nasr's avatar Alexis Nasr
Browse files

stable version of arc eager parser

parent 89a7f9f3
No related branches found
No related tags found
No related merge requests found
......@@ -53,7 +53,7 @@
#define word_set_gov(w, val) (w)->wf_array[MCD_WF_GOV] = (val)
#define word_set_label(w, val) (w)->wf_array[MCD_WF_LABEL] = (val)
#define word_set_stag(w, val) (w)->wf_array[MCD_WF_STAG] = (val)
#define word_set_word_seg(w) (w)->wf_array[MCD_WF_WORD_SEG] = (val)
#define word_set_sent_seg(w, val) (w)->wf_array[MCD_WF_SENT_SEG] = (val)
#define word_set_A(w, val) (w)->wf_array[MCD_WF_A] = (val)
#define word_set_B(w, val) (w)->wf_array[MCD_WF_B] = (val)
#define word_set_C(w, val) (w)->wf_array[MCD_WF_C] = (val)
......
......@@ -8,6 +8,7 @@ set(SOURCES src/context.c
src/oracle_parser_arc_eager.c
src/oracle_tagger.c
src/simple_decoder_parser.c
src/simple_decoder_parser_arc_eager.c
src/simple_decoder_forrest.c
src/simple_decoder_tagger.c
src/feat_lib.c
......@@ -46,6 +47,12 @@ target_link_libraries(maca_trans_parser_mcf2cff transparse)
target_link_libraries(maca_trans_parser_mcf2cff maca_common)
install (TARGETS maca_trans_parser_mcf2cff DESTINATION bin)
add_executable(maca_trans_parser_arc_eager_mcf2cff ./src/maca_trans_parser_arc_eager_mcf2cff.c)
target_link_libraries(maca_trans_parser_arc_eager_mcf2cff perceptron)
target_link_libraries(maca_trans_parser_arc_eager_mcf2cff transparse)
target_link_libraries(maca_trans_parser_arc_eager_mcf2cff maca_common)
install (TARGETS maca_trans_parser_arc_eager_mcf2cff DESTINATION bin)
add_executable(maca_trans_parser ./src/maca_trans_parser.c)
target_link_libraries(maca_trans_parser perceptron)
target_link_libraries(maca_trans_parser transparse)
......
......@@ -6,6 +6,7 @@
#include"context.h"
#include"movement_parser.h"
#include"oracle_parser.h"
#include"oracle_parser_arc_eager.h"
#include"feat_fct.h"
#include"feature_table.h"
#include"dico.h"
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"movement_parser_arc_eager.h"
#include"oracle_parser_arc_eager.h"
#include"feat_fct.h"
#include"context.h"
#include"feat_vec.h"
#include"dico_vec.h"
#include"word_emb.h"
#include"config2feat_vec.h"
void maca_trans_parser_mcf2cff_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
}
void maca_trans_parser_mcf2cff_check_options(context *ctx)
{
if(!ctx->input_filename
|| ctx->help
/* || !ctx->mcd_filename */
/* || !(ctx->cff_filename || ctx->fann_filename) */
){
maca_trans_parser_mcf2cff_help_message(ctx);
exit(1);
}
}
void generate_training_file_stream(FILE *output_file, context *ctx)
{
config *c;
int mvt_code;
char mvt_type;
int mvt_label;
feat_vec *fv = feat_vec_new(feature_types_nb);
int sentence_nb = 0;
int root_label = dico_string2int(ctx->dico_labels, (char *) ctx->root_label);
word_buffer *ref = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
FILE *mcf_file = myfopen(ctx->input_filename, "r");
int start_sentence_index = 0;
/* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */
/* the idea is to ignore syntax in the mcf file that will be read */
/* it is ugly !!! */
mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_GOV);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL);
c = config_initial_no_dummy_word(mcf_file, mcd_struct_hyp, 5);
while(!word_buffer_end(ref) && (sentence_nb < ctx->sent_nb)){
/*printf("************ REF ************\n");
word_buffer_print(stdout, ref);
printf("*****************************\n");*/
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
mvt_code = oracle_parser_arc_eager(c, ref, start_sentence_index, root_label);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* config_print(stdout,c); */
/* movement_print(stdout, mvt_code, ctx->dico_labels); */
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if(mvt_type == MVT_EOS){
movement_eos(c, 0);
sentence_nb++;
start_sentence_index = word_get_index(word_buffer_b0(config_get_buffer(c))) - 1;
/* config_print(stdout,c); */
if(word_buffer_is_last(ref))
break;
}
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
word_buffer_move_right(ref);
continue;
}
if(mvt_type == MVT_REDUCE){
movement_reduce(c, 0);
continue;
}
if(mvt_type == MVT_ROOT){
movement_root(c, 0, root_label);
continue;
}
if(mvt_type == MVT_SHIFT){
movement_shift(c, 1, 0);
word_buffer_move_right(ref);
continue;
}
}
}
int main(int argc, char *argv[])
{
context *ctx;
FILE *output_file;
ctx = context_read_options(argc, argv);
maca_trans_parser_mcf2cff_check_options(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
if(ctx->mode == TRAIN_MODE){
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->input_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
}
else if(ctx->mode == TEST_MODE){
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
}
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
if(ctx->dico_labels == NULL){
fprintf(stderr, "cannot find label names\n");
return 1;
}
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1;
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
/* in train mode create feature dictionnary for perceptron */
if(ctx->mode == TRAIN_MODE)
ctx->d_perceptron_features = dico_new((char *)"d_perceptron_features", 10000000);
/* in test mode read feature dictionnary for perceptron */
if(ctx->mode == TEST_MODE)
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
/* add the feature dictionnary to the dico vector */
dico_vec_add(ctx->vocabs, ctx->d_perceptron_features);
/* open output file */
output_file = (ctx->cff_filename) ? myfopen_no_exit(ctx->cff_filename, "w") : stdout;
generate_training_file_stream(output_file, ctx);
if(ctx->mode == TRAIN_MODE)
dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
if(ctx->cff_filename)
fclose(output_file);
context_free(ctx);
return 0;
}
......@@ -3,8 +3,8 @@
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"movement_parser_arc_eager.h"
#include"oracle_parser_arc_eager.h"
#include"movement_parser.h"
#include"oracle_parser.h"
#include"feat_fct.h"
#include"context.h"
#include"feat_vec.h"
......@@ -12,7 +12,7 @@
#include"word_emb.h"
#include"config2feat_vec.h"
void maca_trans_parser_mcf2cff_help_message(context *ctx)
void maca_trans_parser_conll2cff_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
......@@ -30,14 +30,14 @@ void maca_trans_parser_mcf2cff_help_message(context *ctx)
}
void maca_trans_parser_mcf2cff_check_options(context *ctx)
void maca_trans_parser_conll2cff_check_options(context *ctx)
{
if(!ctx->input_filename
|| ctx->help
/* || !ctx->mcd_filename */
/* || !(ctx->cff_filename || ctx->fann_filename) */
){
maca_trans_parser_mcf2cff_help_message(ctx);
maca_trans_parser_conll2cff_help_message(ctx);
exit(1);
}
}
......@@ -49,86 +49,122 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
char mvt_type;
int mvt_label;
feat_vec *fv = feat_vec_new(feature_types_nb);
sentence *ref = NULL;
int sentence_nb = 0;
/* int root_label = dico_string2int(mcd_get_dico_label(ctx->mcd_struct), (char *) ctx->root_label); */
int root_label = dico_string2int(ctx->dico_labels, (char *) ctx->root_label);
int eos_label = dico_string2int(ctx->dico_labels, "eos");
word_buffer *ref = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
FILE *mcf_file = myfopen(ctx->input_filename, "r");
int start_sentence_index = 0;
/* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */
/* the idea is to ignore syntax in the mcf file that will be read */
/* it is ugly !!! */
mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_GOV);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL);
c = config_initial_no_dummy_word(mcf_file, mcd_struct_hyp, 5);
FILE *conll_file = myfopen(ctx->input_filename, "r");
FILE *conll_file_ref = myfopen(ctx->input_filename, "r");
c = config_initial(conll_file, ctx->mcd_struct, 5);
while(!word_buffer_end(ref)){
/*printf("************ REF ************\n");
word_buffer_print(stdout, ref);
printf("*****************************\n");*/
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
/* feat_vec_print(stdout, fv); */
mvt_code = oracle_parser_arc_eager(c, ref, start_sentence_index, root_label);
while((ref = sentence_read(conll_file_ref , ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){
/* sentence_print(stdout, ref, ctx->dico_labels); */
while(1){
/* config_print(stdout,c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* config_print(stdout,c); */
/* movement_print(stdout, mvt_code, ctx->dico_labels); */
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if(mvt_type == MVT_EOS){
/* printf("************BEFORE *****************\n"); */
/* config_print(stdout,c); */
movement_eos(c, 0);
/* feat_vec_print(stdout, fv); */
mvt_code = oracle_parser(c, ref);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("************AFTER*****************\n"); */
/* config_print(stdout,c); */
start_sentence_index = word_get_index(word_buffer_b0(config_get_buffer(c))) - 1;
/* printf("%d\n", start_sentence_index); */
/* printf("mvt code = %d\n", mvt_code); */
/* movement_print(stdout, mvt_code, ctx->dico_labels); */
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if(queue_is_empty(c->bf)) break;
if(word_buffer_is_last(ref)){
/* printf("it is the end\n"); */
if((mvt_type == MVT_RIGHT) && (mvt_label == root_label)){ /* sentence is complete */
/* create the root arc */
movement_right_arc(c, mvt_label, 0);
/* shift dummy word in stack */
movement_shift(c, 1, 0);
/* printf("sentence complete config : ");
config_print(stdout,c); */
/* empty depset */
depset_free(c->ds);
c->ds = depset_new();
sentence_free(ref);
sentence_nb++;
c->current_index = queue_renumber_words(c->bf);
break;
}
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_SHIFT){
movement_shift(c, 1, 0);
continue;
}
}
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
word_buffer_move_right(ref);
continue;
}
if(mvt_type == MVT_REDUCE){
movement_reduce(c, 0);
continue;
}
if(mvt_type == MVT_ROOT){
movement_root(c, 0, root_label);
continue;
}
}
}
void generate_training_file_buffer(FILE *output_file, context *ctx)
{
config *c;
int mvt_code;
char mvt_type;
int mvt_label;
feat_vec *fv = feat_vec_new(feature_types_nb);
sentence *ref = NULL;
int sentence_nb = 0;
FILE *conll_file = myfopen(ctx->input_filename, "r");
FILE *conll_file_ref = myfopen(ctx->input_filename, "r");
if(mvt_type == MVT_SHIFT){
movement_shift(c, 1, 0);
word_buffer_move_right(ref);
continue;
c = config_initial(conll_file, ctx->mcd_struct, 0);
while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){
/* sentence_print(stdout, ref, NULL); */
queue_read_sentence(c->bf, conll_file, ctx->mcd_struct);
while(!config_is_terminal(c)){
/* config_print(stdout,c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
mvt_code = oracle_parser(c, ref);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("mvt type = %d mvt label = %d\n", mvt_type, mvt_label); */
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_SHIFT){
movement_shift(c, 0, 0);
continue;
}
}
config_free(c);
c = config_initial(conll_file, ctx->mcd_struct, 0);
sentence_nb++;
}
}
......@@ -138,7 +174,7 @@ int main(int argc, char *argv[])
FILE *output_file;
ctx = context_read_options(argc, argv);
maca_trans_parser_mcf2cff_check_options(ctx);
maca_trans_parser_conll2cff_check_options(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
......@@ -182,18 +218,12 @@ int main(int argc, char *argv[])
else
output_file = stdout;
generate_training_file_stream(output_file, ctx);
#if 0
=======
if(ctx->stream_mode){
generate_training_file_stream(output_file, ctx);
}
else{
generate_training_file_buffer(output_file, ctx);
}
>>>>>>> master
#endif
if(ctx->mode == TRAIN_MODE){
/* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */
dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
......
......@@ -8,10 +8,11 @@ void movement_print(FILE *f, int mvt_code, dico *dico_labels){
int mvt_type = movement_type(mvt_code);
int mvt_label = movement_label(mvt_code);
char *label;
if(mvt_type == MVT_SHIFT) {fprintf(f, "SHIFT\n"); return;}
if(mvt_type == MVT_SHIFT) {fprintf(f, "SHIFT\n"); return;}
if(mvt_type == MVT_REDUCE) {fprintf(f, "REDUCE\n"); return;}
if(mvt_type == MVT_ROOT) {fprintf(f, "ROOT\n"); return;}
if(mvt_type == MVT_RIGHT) fprintf(f, "RIGHT");
if(mvt_type == MVT_ROOT) {fprintf(f, "ROOT\n"); return;}
if(mvt_type == MVT_EOS) {fprintf(f, "EOS\n"); return;}
if(mvt_type == MVT_RIGHT) fprintf(f, "RIGHT");
else fprintf(f, "LEFT");
label = dico_int2string(dico_labels, mvt_label);
fprintf(f, " %s\n", label);
......@@ -19,20 +20,20 @@ void movement_print(FILE *f, int mvt_code, dico *dico_labels){
int movement_type(int mvt)
{
if(mvt == 0) return MVT_SHIFT; /* 0 is the code of shift */
if(mvt == 1) return MVT_REDUCE; /* 1 is the code of reduce */
if(mvt == 2) return MVT_ROOT; /* 2 is the code of root */
if(mvt == 3) return MVT_EOS; /* 2 is the code of root */
if(mvt % 2 == 0) return MVT_LEFT; /* even movements are left movements */
return MVT_RIGHT; /* odd movements are right movements */
if(mvt == MVT_SHIFT) return MVT_SHIFT; /* 0 is the code of shift */
if(mvt == MVT_REDUCE) return MVT_REDUCE; /* 1 is the code of reduce */
if(mvt == MVT_ROOT) return MVT_ROOT; /* 2 is the code of root */
if(mvt == MVT_EOS) return MVT_EOS; /* 3 is the code of root */
if(mvt % 2 == 0) return MVT_LEFT; /* even movements are left movements */
return MVT_RIGHT; /* odd movements are right movements */
}
int movement_label(int mvt)
{
if(mvt == 0) return -1; /* 0 is the code of shift */
if(mvt == 1) return -1; /* 1 is the code of reduce */
if(mvt == 2) return -1; /* 2 is the code of root */
if(mvt == 3) return -1; /* 3 is the code of eos */
if(mvt == MVT_SHIFT) return -1; /* 0 is the code of shift */
if(mvt == MVT_REDUCE) return -1; /* 1 is the code of reduce */
if(mvt == MVT_ROOT) return -1; /* 2 is the code of root */
if(mvt == MVT_EOS) return -1; /* 3 is the code of eos */
if(mvt % 2 == 0) /* even codes correspond to left movements */
return mvt / 2 - 2;
return (mvt - 1) / 2 - 2; /* odd codes correspond to right movements */
......@@ -40,11 +41,18 @@ int movement_label(int mvt)
int movement_eos(config *c, float score)
{
if(stack_is_empty(config_get_stack(c))) return 1;
/* word on the top of the stack is sent_seg */
word_set_sent_seg(stack_top(config_get_stack(c)), 1);
/* perform all pending reduce */
while(movement_reduce(c,0));
/* remove root from stack */
stack_pop(config_get_stack(c));
config_add_mvt(c, MVT_EOS);
return 1;
}
......@@ -82,7 +90,6 @@ int movement_right_arc(config *c, int label, float score)
/* printf("create right arc %d -> %d dist = %d\n", word_get_index(gov), word_get_index(dep), dist); */
/* create a new dependency */
word_set_gov(dep, dist);
word_set_label(dep, label);
......@@ -118,8 +125,6 @@ int movement_root(config *c, float score, int root_code)
word *b0 = word_buffer_b0(config_get_buffer(c));
word_set_gov(b0, 0);
word_set_label(b0, root_code);
/* stack_push(config_get_stack(c), b0); */
/* word_buffer_move_right(config_get_buffer(c)); */
config_add_mvt(c, MVT_ROOT);
return 1;
}
......
......@@ -4,12 +4,55 @@
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"movement_parser_arc_eager.h"
#include"movement_parser.h"
#include"oracle_parser.h"
#include"feat_fct.h"
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"
void simple_decoder_buffer(context *ctx, FILE *f, feature_table *ft, int root_label)
{
int mvt_code;
int mvt_type;
int mvt_label;
float max;
feat_vec *fv = feat_vec_new(feature_types_nb);
config *c = config_initial(f, ctx->mcd_struct, 0);
/* read a sentence and put it in the buffer */
while(queue_read_sentence(c->bf, f, ctx->mcd_struct) > 1){
while(!config_is_terminal(c)){
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
mvt_code = feature_table_argmax(fv, ft, &max);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
if(mvt_type == MVT_LEFT)
if(movement_left_arc(c, mvt_label, max))
continue;
if(mvt_type == MVT_RIGHT)
if(movement_right_arc(c, mvt_label, max))
continue;
movement_shift(c, 0, max);
}
/* config_print(stdout, c); */
config_connect_subtrees(c, root_label);
depset_print2(stdout, c->ds, ctx->dico_labels);
/* config_free(c); */
c = config_initial(f, ctx->mcd_struct, 0);
}
feat_vec_free(fv);
}
void simple_decoder_stream(context *ctx, FILE *f, feature_table *ft, int root_label)
{
int mvt_code;
......@@ -18,68 +61,70 @@ void simple_decoder_stream(context *ctx, FILE *f, feature_table *ft, int root_la
float max;
feat_vec *fv = feat_vec_new(feature_types_nb);
config *c = NULL;
word *dep;
c = config_initial_no_dummy_word(f, ctx->mcd_struct, 5);
while(1){
c = config_initial(f, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){
/* config_print(stdout, c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
/* feat_vec_print(stdout, fv); */
mvt_code = feature_table_argmax(fv, ft, &max);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* config_print(stdout, c); */
/* movement_print(stdout, mvt_code, ctx->dico_labels); */
if(mvt_type == MVT_LEFT){
dep = stack_s0(config_get_stack(c));
if(movement_left_arc(c, mvt_label, max)){
/* printf("%d\t", word_get_index(dep));
printf("%s\t", word_get_input(dep));
printf("%d\t", word_get_gov(dep));
printf("%s\n", dico_int2string(ctx->dico_labels, word_get_label(dep)));*/
continue;
}
}
/* printf("code predicted = %d\n", mvt_code); */
/* movement_print(stdout, mvt_code, ctx->dico_labels); */
if(mvt_type == MVT_RIGHT){
dep = word_buffer_b0(config_get_buffer(c));
if(movement_right_arc(c, mvt_label, max)){
/* printf("%d\t", word_get_index(dep));
printf("%s\t", word_get_input(dep));
printf("%d\t", word_get_gov(dep));
printf("%s\n", dico_int2string(ctx->dico_labels, word_get_label(dep)));*/
continue;
}
/* sentence is complete */
if((stack_height(c->st)==1) && (mvt_type == MVT_RIGHT) && (mvt_label == root_label)){
/* if((mvt_type == MVT_RIGHT) && (mvt_label == root_label)){ */
/* if(mvt_label == root_label){ */
/* printf("sentence complete\n"); */
/*config_print(stdout, c); */
/* create the root arc */
movement_right_arc(c, mvt_label, 0);
/* shift dummy word in stack */
movement_shift(c, 1, 0);
/* config_print(stdout, c); */
/* config_connect_subtrees(c, root_label); */
/* depset_print_new_index(stdout, c->ds, ctx->dico_labels); */
depset_print2(stdout, c->ds, ctx->dico_labels);
/* pop the dummy word */
stack_pop(c->st);
/* remplace it with a fresh one */
stack_push(c->st, word_create_dummy(ctx->mcd_struct));
/* empty depset */
depset_free(c->ds);
c->ds = depset_new();
/* renumber the words that are left in the buffer */
c->current_index = queue_renumber_words(c->bf);
continue;
}
if(mvt_type == MVT_LEFT)
if(movement_left_arc(c, mvt_label, max))
continue;
if(mvt_type == MVT_REDUCE)
if(movement_reduce(c, max))
if(mvt_type == MVT_RIGHT)
if(movement_right_arc(c, mvt_label, max))
continue;
if(mvt_type == MVT_ROOT)
if(movement_root(c, max, root_label))
continue;
movement_shift(c, 1, max);
if(word_buffer_is_last(config_get_buffer(c))) break;
}
/* config_print(stdout, c); */
/* config_connect_subtrees(c, root_label); */
depset_print2(stdout, c->ds, ctx->dico_labels);
for(int i=0; i < config_get_buffer(c)->nbelem; i++){
dep = word_buffer_get_word_n(config_get_buffer(c), i);
printf("%s\t", word_get_input(dep));
printf("%d\t", word_get_gov(dep));
/* printf("label = %d\n", word_get_label(dep)); */
char *label = (word_get_label(dep) == -1)? NULL : dico_int2string(ctx->dico_labels, word_get_label(dep));
if(label != NULL)
printf("%s\t", label) ;
else
printf("_\t");
if((label != NULL) && !strcmp(label, "eos"))
printf("1\n");
else
printf("0\n");
}
/* config_free(c); */
feat_vec_free(fv);
......@@ -95,10 +140,14 @@ void simple_decoder(context *ctx)
root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label == -1) root_label = 0;
simple_decoder_stream(ctx, f, ft, root_label);
if(ctx->stream_mode)
simple_decoder_stream(ctx, f, ft, root_label);
else
simple_decoder_buffer(ctx, f, ft, root_label);
feature_table_free(ft);
if(ctx->input_filename)
fclose(f);
}
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"movement_parser_arc_eager.h"
#include"feat_fct.h"
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"
void print_word_buffer(config *c, dico *dico_labels)
{
int i;
word *dep;
char *label;
for(i=0; i < config_get_buffer(c)->nbelem; i++){
dep = word_buffer_get_word_n(config_get_buffer(c), i);
printf("%s\t", word_get_input(dep));
printf("%d\t", word_get_gov(dep));
label = (word_get_label(dep) == -1)? NULL : dico_int2string(dico_labels, word_get_label(dep));
if(label != NULL)
printf("%s\t", label) ;
else
printf("_\t");
if((label != NULL) && !strcmp(label, "eos"))
printf("1\n");
else
printf("0\n");
}
}
void simple_decoder_parser_arc_eager(context *ctx)
{
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
int root_label;
int mvt_code;
int mvt_type;
int mvt_label;
float max;
feat_vec *fv = feat_vec_new(feature_types_nb);
config *c = NULL;
root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label == -1) root_label = 0;
c = config_initial_no_dummy_word(f, ctx->mcd_struct, 5);
while(1){
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
mvt_code = feature_table_argmax(fv, ft, &max);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
config_print(stdout, c);
movement_print(stdout, mvt_code, ctx->dico_labels);
if(mvt_type == MVT_LEFT)
if(movement_left_arc(c, mvt_label, max)){
if(word_buffer_is_last(config_get_buffer(c)))
break;
continue;
}
if(mvt_type == MVT_RIGHT)
if(movement_right_arc(c, mvt_label, max)){
if(word_buffer_is_last(config_get_buffer(c)))
break;
continue;
}
if(mvt_type == MVT_REDUCE)
if(movement_reduce(c, max)){
if(word_buffer_is_last(config_get_buffer(c)))
break;
continue;
}
if(mvt_type == MVT_ROOT)
if(movement_root(c, max, root_label)){
if(word_buffer_is_last(config_get_buffer(c)))
break;
continue;
}
/* if(mvt_type == MVT_EOS)
if(movement_eos(c, max)){
if(word_buffer_is_last(config_get_buffer(c)))
break;
continue;
}
*/
if(word_buffer_is_last(config_get_buffer(c)))
break;
movement_shift(c, 1, max);
}
print_word_buffer(c, ctx->dico_labels);
/* config_free(c); */
feat_vec_free(fv);
feature_table_free(ft);
if(ctx->input_filename)
fclose(f);
}
#ifndef __SIMPLE_DECODER_PARSER_ARC_EAGER__
#define __SIMPLE_DECODER_PARSER_ARC_EAGER__
#include"context.h"
void simple_decoder_parser_arc_eager(context *ctx);
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment