Skip to content
Snippets Groups Projects
Commit ba1bb55e authored by Alexis Nasr's avatar Alexis Nasr
Browse files

modified maca_trans_parser in such a way that one can specify sentence segmentation in the input

parent 1a95c2ff
No related branches found
No related tags found
No related merge requests found
...@@ -181,8 +181,8 @@ int main(int argc, char *argv[]) ...@@ -181,8 +181,8 @@ int main(int argc, char *argv[])
fprintf(output_file, "_"); fprintf(output_file, "_");
fprintf(output_file, "\t"); fprintf(output_file, "\t");
fprintf(output_file, "\t_\t\n"); /* fprintf(output_file, "\t_\t\n"); */
fprintf(output_file, "\n");
if((sent_seg_col) && (word_get_sent_seg(w))){ if((sent_seg_col) && (word_get_sent_seg(w))){
fprintf(output_file, "\n"); fprintf(output_file, "\n");
index = 0; index = 0;
......
...@@ -268,12 +268,8 @@ context *context_read_options(int argc, char *argv[]) ...@@ -268,12 +268,8 @@ context *context_read_options(int argc, char *argv[])
if(ctx->mcd_filename) if(ctx->mcd_filename)
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose); ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
else
if(ctx->conll)
ctx->mcd_struct = mcd_build_conll07();
else else
ctx->mcd_struct = mcd_build_wplgfs(); ctx->mcd_struct = mcd_build_wplgfs();
/* ctx->mcd_struct = mcd_build_ifpls(); */
return ctx; return ctx;
} }
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#include"feature_table.h" #include"feature_table.h"
#include"dico.h" #include"dico.h"
void print_word_buffer(config *c, dico *dico_labels) void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct)
{ {
int i; int i;
word *dep; word *dep;
...@@ -25,11 +25,97 @@ void print_word_buffer(config *c, dico *dico_labels) ...@@ -25,11 +25,97 @@ void print_word_buffer(config *c, dico *dico_labels)
printf("%s\t", label) ; printf("%s\t", label) ;
else else
printf("_\t"); printf("_\t");
if(mcd_get_sent_seg_col(mcd_struct) == -1){
if(word_get_sent_seg(dep) == 1) if(word_get_sent_seg(dep) == 1)
printf("1") ;
else
printf("0");
}
printf("\n");
}
}
void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct)
{
int i;
word *w;
char *label;
char *buffer = NULL;
char *token = NULL;
int col_nb = 0;
for(i=0; i < config_get_buffer(c)->nbelem; i++){
w = word_buffer_get_word_n(config_get_buffer(c), i);
if((mcd_get_gov_col(mcd_struct) == -1)
&& (mcd_get_label_col(mcd_struct) == -1)
&& (mcd_get_sent_seg_col(mcd_struct) == -1)){
printf("%s\t", word_get_input(w));
printf("%d\t", word_get_gov(w));
label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
if(label != NULL)
printf("%s\t", label) ;
else
printf("_\t");
if(word_get_sent_seg(w) == 1)
printf("1\n") ; printf("1\n") ;
else else
printf("0\n"); printf("0\n");
} }
else{
buffer = strdup(w->input);
token = strtok(buffer, "\t");
col_nb = 0;
while(token){
if(col_nb != 0) printf("\t");
if(col_nb == mcd_get_gov_col(mcd_struct)){
printf("%d", word_get_gov(w));
}
else
if(col_nb == mcd_get_label_col(mcd_struct)){
label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
if(label != NULL)
printf("%s", label) ;
else
printf("_");
}
else
if(col_nb == mcd_get_sent_seg_col(mcd_struct)){
if(word_get_sent_seg(w) == 1)
printf("1") ;
else
printf("0");
}
else{
word_print_col_n(stdout, w, col_nb);
}
col_nb++;
token = strtok(NULL, "\t");
}
if((col_nb <= mcd_get_gov_col(mcd_struct)) || (mcd_get_gov_col(mcd_struct) == -1)){
printf("\t%d", word_get_gov(w));
}
if((col_nb <= mcd_get_label_col(mcd_struct)) || (mcd_get_label_col(mcd_struct) == -1)){
label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
if(label != NULL)
printf("\t%s", label) ;
else
printf("\t_");
}
if((col_nb <= mcd_get_sent_seg_col(mcd_struct)) || (mcd_get_sent_seg_col(mcd_struct) == -1)){
if(word_get_sent_seg(w) == 1)
printf("\t1") ;
else
printf("\t0");
}
printf("\n");
free(buffer);
}
}
} }
void simple_decoder_parser_arc_eager(context *ctx) void simple_decoder_parser_arc_eager(context *ctx)
...@@ -55,18 +141,28 @@ void simple_decoder_parser_arc_eager(context *ctx) ...@@ -55,18 +141,28 @@ void simple_decoder_parser_arc_eager(context *ctx)
c = config_new(f, ctx->mcd_struct, 5); c = config_new(f, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){ while(!config_is_terminal(c)){
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
mvt_code = feature_table_argmax(fv, ft, &max);
mvt_type = movement_parser_type(mvt_code);
mvt_label = movement_parser_label(mvt_code);
if(ctx->debug_mode){ if(ctx->debug_mode){
fprintf(stdout, "***********************************\n"); fprintf(stdout, "***********************************\n");
config_print(stdout, c); config_print(stdout, c);
}
/* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */
/* which means that the top of the stack got its eos status from input */
if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){
word_set_sent_seg(stack_top(config_get_stack(c)), -1);
vcode *vcode_array = feature_table_get_vcode_array(fv, ft); movement_parser_eos(c);
while(movement_parser_reduce(c));
while(movement_parser_root(c, root_label));
for(int i=0; i < 5; i++){ /* mvt_code = MVT_PARSER_EOS; */
if(ctx->debug_mode) printf("force EOS\n");
}
else{
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
mvt_code = feature_table_argmax(fv, ft, &max);
if(ctx->debug_mode){
vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
for(int i=0; i < 3; i++){
printf("%d\t", i); printf("%d\t", i);
movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels); movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels);
printf("\t%.4f\n", vcode_array[i].score); printf("\t%.4f\n", vcode_array[i].score);
...@@ -88,6 +184,9 @@ void simple_decoder_parser_arc_eager(context *ctx) ...@@ -88,6 +184,9 @@ void simple_decoder_parser_arc_eager(context *ctx)
} }
mvt_type = movement_parser_type(mvt_code);
mvt_label = movement_parser_label(mvt_code);
result = 0; result = 0;
switch(mvt_type){ switch(mvt_type){
case MVT_PARSER_LEFT : case MVT_PARSER_LEFT :
...@@ -116,8 +215,8 @@ void simple_decoder_parser_arc_eager(context *ctx) ...@@ -116,8 +215,8 @@ void simple_decoder_parser_arc_eager(context *ctx)
movement_parser_shift(c); movement_parser_shift(c);
} }
} }
}
/* horrible hack */ /* horrible hack: force the remaining element in the stack (if any) to be the root */
if(stack_nbelem(config_get_stack(c)) && (stack_top(config_get_stack(c)) == NULL)) if(stack_nbelem(config_get_stack(c)) && (stack_top(config_get_stack(c)) == NULL))
stack_pop(config_get_stack(c)); stack_pop(config_get_stack(c));
...@@ -126,7 +225,7 @@ void simple_decoder_parser_arc_eager(context *ctx) ...@@ -126,7 +225,7 @@ void simple_decoder_parser_arc_eager(context *ctx)
/* end of horrible hack */ /* end of horrible hack */
if(!ctx->trace_mode) if(!ctx->trace_mode)
print_word_buffer(c, ctx->dico_labels); print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct);
config_free(c); config_free(c);
feat_vec_free(fv); feat_vec_free(fv);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment