Commit 56e83bcf authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added new tagparse decoder, that tags and parses the sentence at the same time

parent 1cddee84
......@@ -36,19 +36,19 @@ typedef struct {
word_buffer *word_buffer_new(FILE *input_file, mcd *mcd_struct, int lookahead);
void word_buffer_free(word_buffer *wb);
int word_buffer_add(word_buffer *wb, word *w);
word *word_buffer_get_word_relative(word_buffer *wb, int dist);
word* word_buffer_get_word_n(word_buffer *wb, int n);
int word_buffer_read_next_word(word_buffer *wb);
int word_buffer_move_right(word_buffer *wb);
int word_buffer_move_left(word_buffer *wb);
void word_buffer_print(FILE *f, word_buffer *wb);
void word_buffer_print_compact(FILE *f, word_buffer *wb);
int word_buffer_is_empty(word_buffer *wb);
int word_buffer_is_last(word_buffer *wb);
int word_buffer_end(word_buffer *wb);
int word_buffer_read_sentence(word_buffer *bw);
void word_buffer_free(word_buffer *wb);
int word_buffer_add(word_buffer *wb, word *w);
word* word_buffer_get_word_relative(word_buffer *wb, int dist);
word* word_buffer_get_word_n(word_buffer *wb, int n);
int word_buffer_read_next_word(word_buffer *wb);
int word_buffer_move_right(word_buffer *wb);
int word_buffer_move_left(word_buffer *wb);
void word_buffer_print(FILE *f, word_buffer *wb);
void word_buffer_print_compact(FILE *f, word_buffer *wb);
int word_buffer_is_empty(word_buffer *wb);
int word_buffer_is_last(word_buffer *wb);
int word_buffer_end(word_buffer *wb);
int word_buffer_read_sentence(word_buffer *bw);
word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct);
#endif
set(SOURCES src/context.c
src/feat_desc.c
# src/movement_parser_arc_eager.c
src/movement_parser_arc_eager.c
src/movement_tagparser_arc_eager.c
src/movement_tagger.c
src/feat_fct.c
src/global_feat_vec.c
# src/oracle_parser.c
# src/oracle_parser_arc_eager.c
src/oracle_parser_arc_eager.c
src/oracle_tagparser_arc_eager.c
src/oracle_tagger.c
# src/simple_decoder_parser.c
src/simple_decoder_parser.c
src/simple_decoder_parser_arc_eager.c
src/simple_decoder_tagparser_arc_eager.c
src/simple_decoder_forrest.c
......
......@@ -78,12 +78,12 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
mvt_code = oracle_tagparser_arc_eager(c, ref, root_label);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
mvt_type = movement_tagparse_type(mvt_code);
mvt_label = movement_tagparse_label(mvt_code);
if(ctx->debug_mode){
config_print(stdout,c);
movement_print(stdout, mvt_code, ctx->dico_labels, dico_postag);
movement_tagparse_print(stdout, mvt_code, ctx->dico_labels, dico_postag);
fprintf(stdout, "\n");
}
......@@ -92,7 +92,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
stack_print(output_file, c->st);
fprintf(output_file, "\t");
movement_print(output_file, mvt_code, ctx->dico_labels, dico_postag);
movement_tagparse_print(output_file, mvt_code, ctx->dico_labels, dico_postag);
fprintf(output_file, "\t1\n");
}
else{
......@@ -101,40 +101,40 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
}
if(mvt_type == MVT_EOS){
movement_eos(c, 0);
movement_tagparse_eos(c, 0);
sentence_nb++;
if(word_buffer_is_last(ref))
break;
}
if(mvt_type == MVT_POSTAG){
movement_add_pos(c, 0, mvt_label);
movement_tagparse_add_pos(c, 0, mvt_label);
continue;
}
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
movement_tagparse_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
movement_tagparse_right_arc(c, mvt_label, 0);
word_buffer_move_right(ref);
continue;
}
if(mvt_type == MVT_REDUCE){
movement_reduce(c, 0);
movement_tagparse_reduce(c, 0);
continue;
}
if(mvt_type == MVT_ROOT){
movement_root(c, 0, root_label);
movement_tagparse_root(c, 0, root_label);
continue;
}
if(mvt_type == MVT_SHIFT){
movement_shift(c, 1, 0);
movement_tagparse_shift(c, 1, 0);
word_buffer_move_right(ref);
continue;
}
......
......@@ -4,11 +4,11 @@
#include"util.h"
#include"movement_tagparser_arc_eager.h"
void movement_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag)
void movement_tagparse_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag)
{
int mvt_type = movement_type(mvt_code);
int mvt_label = movement_label(mvt_code);
int mvt_type = movement_tagparse_type(mvt_code);
int mvt_label = movement_tagparse_label(mvt_code);
char *label;
if(mvt_type == MVT_SHIFT) {fprintf(f, "SHIFT"); return;}
......@@ -27,7 +27,7 @@ void movement_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag)
fprintf(f, " %s", label);
}
int movement_type(int mvt)
int movement_tagparse_type(int mvt)
{
if(mvt == MVT_SHIFT) return MVT_SHIFT; /* 0 */
if(mvt == MVT_REDUCE) return MVT_REDUCE; /* 1 */
......@@ -38,7 +38,7 @@ int movement_type(int mvt)
/*if(mvt % 3 == 2)*/ return MVT_LEFT; /* 6, 9, 12 ... */
}
int movement_label(int mvt)
int movement_tagparse_label(int mvt)
{
if(mvt == MVT_SHIFT) return -1;
if(mvt == MVT_REDUCE) return -1;
......@@ -52,7 +52,7 @@ int movement_label(int mvt)
return (mvt - 6) / 3;
}
int movement_add_pos(config *c, float score, int pos)
int movement_tagparse_add_pos(config *c, float score, int pos)
{
if(word_buffer_b0(config_get_buffer(c)) == NULL) return 0;
if(word_get_pos(word_buffer_b0(config_get_buffer(c))) != -1) return 0;
......@@ -60,12 +60,12 @@ int movement_add_pos(config *c, float score, int pos)
/* stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c)));
word_buffer_move_right(config_get_buffer(c));*/
config_add_mvt(c, movement_postag(pos));
config_add_mvt(c, movement_tagparse_postag(pos));
return 1;
}
int movement_eos(config *c, float score)
int movement_tagparse_eos(config *c, float score)
{
if(stack_is_empty(config_get_stack(c))) return 0;
if(word_get_sent_seg(stack_top(config_get_stack(c))) == 1) return 0;
......@@ -80,7 +80,7 @@ int movement_eos(config *c, float score)
return 1;
}
int movement_left_arc(config *c, int label, float score)
int movement_tagparse_left_arc(config *c, int label, float score)
{
if(stack_is_empty(config_get_stack(c))) return 0;
/* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */
......@@ -97,11 +97,11 @@ int movement_left_arc(config *c, int label, float score)
word_set_label(dep, label);
stack_pop(config_get_stack(c));
config_add_mvt(c, movement_left_code(label));
config_add_mvt(c, movement_tagparse_left_code(label));
return 1;
}
int movement_right_arc(config *c, int label, float score)
int movement_tagparse_right_arc(config *c, int label, float score)
{
if(stack_is_empty(config_get_stack(c))) return 0;
......@@ -116,11 +116,11 @@ int movement_right_arc(config *c, int label, float score)
stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c)));
word_buffer_move_right(config_get_buffer(c));
config_add_mvt(c, movement_right_code(label));
config_add_mvt(c, movement_tagparse_right_code(label));
return 1;
}
int movement_shift(config *c, int stream, float score)
int movement_tagparse_shift(config *c, int stream, float score)
{
if(word_buffer_is_empty(config_get_buffer(c))) return 0;
stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c)));
......@@ -129,7 +129,7 @@ int movement_shift(config *c, int stream, float score)
return 1;
}
int movement_reduce(config *c, float score)
int movement_tagparse_reduce(config *c, float score)
{
if(stack_nbelem(config_get_stack(c)) <= 1) return 0;
......@@ -142,7 +142,7 @@ int movement_reduce(config *c, float score)
return 1;
}
int movement_root(config *c, float score, int root_code)
int movement_tagparse_root(config *c, float score, int root_code)
{
word *s0 = stack_top(config_get_stack(c));
if(s0 == NULL) return 0;
......
......@@ -13,23 +13,23 @@
#define MVT_RIGHT 5
#define MVT_POSTAG 6
#define movement_postag(postag) (3 * (postag) + 4)
#define movement_tagparse_postag(postag) (3 * (postag) + 4)
/* even movements are left movements (except 0, which is shift and 2 which is root) */
#define movement_left_code(label) (3 * (label) + 5)
#define movement_tagparse_left_code(label) (3 * (label) + 5)
/* odd movements are right movements (except 1, which is reduce and 3 which is end_of_sentence) */
#define movement_right_code(label) (3 * (label) + 6)
int movement_type(int mvt);
int movement_label(int mvt);
int movement_left_arc(config *c, int label, float score);
int movement_right_arc(config *c, int label, float score);
int movement_shift(config *c, int stream, float score);
int movement_reduce(config *c, float score);
int movement_root(config *c, float score, int root_code);
int movement_eos(config *c, float score);
int movement_add_pos(config *c, float score, int postag);
void movement_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag);
#define movement_tagparse_right_code(label) (3 * (label) + 6)
int movement_tagparse_type(int mvt);
int movement_tagparse_label(int mvt);
int movement_tagparse_left_arc(config *c, int label, float score);
int movement_tagparse_right_arc(config *c, int label, float score);
int movement_tagparse_shift(config *c, int stream, float score);
int movement_tagparse_reduce(config *c, float score);
int movement_tagparse_root(config *c, float score, int root_code);
int movement_tagparse_eos(config *c, float score);
int movement_tagparse_add_pos(config *c, float score, int postag);
void movement_tagparse_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag);
#endif
......@@ -75,9 +75,9 @@ int oracle_tagparser_arc_eager(config *c, word_buffer *ref, int root_label)
/* give a pos to b0 if it does not have one */
if(word_get_pos(b0) == -1){
/* word_set_pos(b0, word_get_pos(word_buffer_get_word_n(ref, b0_index))); */
/* return movement_postag(word_get_pos(b0)); */
/* return movement_tagparse_postag(word_get_pos(b0)); */
return movement_postag(word_get_pos(word_buffer_get_word_n(ref, b0_index)));
return movement_tagparse_postag(word_get_pos(word_buffer_get_word_n(ref, b0_index)));
}
......@@ -113,12 +113,12 @@ int oracle_tagparser_arc_eager(config *c, word_buffer *ref, int root_label)
/* LEFT ARC b0 is the governor and s0 the dependent */
if(s0_gov_index == b0_index){
return movement_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index)));
return movement_tagparse_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index)));
}
/* RIGHT ARC s0 is the governor and b0 the dependent */
if(b0_gov_index == s0_index){
return movement_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index)));
return movement_tagparse_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index)));
}
/* REDUCE */
if((stack_nbelem(config_get_stack(c)) > 1)
......
......@@ -82,8 +82,8 @@ void simple_decoder_tagparser_arc_eager(context *ctx)
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
mvt_code = feature_table_argmax(fv, ft, &max);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
mvt_type = movement_tagparse_type(mvt_code);
mvt_label = movement_tagparse_label(mvt_code);
if(ctx->trace_mode){
index = word_get_index(word_buffer_b0(config_get_buffer(c)));
......@@ -92,7 +92,7 @@ void simple_decoder_tagparser_arc_eager(context *ctx)
stack_print(stdout, c->st);
fprintf(stdout, "\t");
movement_print(stdout, mvt_code, ctx->dico_labels, ctx->dico_postags);
movement_tagparse_print(stdout, mvt_code, ctx->dico_labels, ctx->dico_postags);
fprintf(stdout, "\t");
feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2);
printf("%f\n", max1 - max2);
......@@ -105,9 +105,9 @@ void simple_decoder_tagparser_arc_eager(context *ctx)
entropy = feature_table_entropy(fv, ft);
/* delta = feature_table_diff_scores(fv, ft); */
feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2);
movement_print(stdout, argmax1, ctx->dico_labels, ctx->dico_postags);
movement_tagparse_print(stdout, argmax1, ctx->dico_labels, ctx->dico_postags);
printf(":\t%f\n", max1);
movement_print(stdout, argmax2, ctx->dico_labels, ctx->dico_postags);
movement_tagparse_print(stdout, argmax2, ctx->dico_labels, ctx->dico_postags);
printf(":\t%f\n", max2);
printf("delta = %f\n", max1 - max2);
......@@ -115,37 +115,37 @@ void simple_decoder_tagparser_arc_eager(context *ctx)
/* printf("entropy = %f delta = %f\n", entropy, delta); */
printf("entropy = %f\n",entropy);
/* movement_print(stdout, mvt_code, ctx->dico_labels); */
/* movement_tagparse_print(stdout, mvt_code, ctx->dico_labels); */
}
result = 0;
switch(mvt_type){
case MVT_POSTAG :
result = movement_add_pos(c, max, mvt_label);
result = movement_tagparse_add_pos(c, max, mvt_label);
break;
case MVT_LEFT :
result = movement_left_arc(c, mvt_label, max);
result = movement_tagparse_left_arc(c, mvt_label, max);
break;
case MVT_RIGHT:
result = movement_right_arc(c, mvt_label, max);
result = movement_tagparse_right_arc(c, mvt_label, max);
break;
case MVT_REDUCE:
result = movement_reduce(c, max);
result = movement_tagparse_reduce(c, max);
break;
case MVT_ROOT:
result = movement_root(c, max, root_label);
result = movement_tagparse_root(c, max, root_label);
break;
case MVT_EOS:
result = movement_eos(c, max);
result = movement_tagparse_eos(c, max);
break;
case MVT_SHIFT:
result = movement_shift(c, 1, max);
result = movement_tagparse_shift(c, 1, max);
}
if(result == 0){
if(ctx->debug_mode){
fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n");
}
movement_shift(c, 1, max);
movement_tagparse_shift(c, 1, max);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment