diff --git a/maca_common/include/mcd.h b/maca_common/include/mcd.h index fe4eecf2e8f13aac08e418f3973606db8e1ce32e..496312765cb74ce36e9886440ae9d1e069b4a88e 100644 --- a/maca_common/include/mcd.h +++ b/maca_common/include/mcd.h @@ -63,6 +63,7 @@ #define mcd_get_label_col(m) (m)->wf2col[MCD_WF_LABEL] #define mcd_get_stag_col(m) (m)->wf2col[MCD_WF_STAG] #define mcd_get_sent_seg_col(m) (m)->wf2col[MCD_WF_SENT_SEG] +#define mcd_get_letter_col(m,L) (m)->wf2col[MCD_WF_A+(L)] #define mcd_get_a_col(m) (m)->wf2col[MCD_WF_A] #define mcd_get_b_col(m) (m)->wf2col[MCD_WF_B] #define mcd_get_c_col(m) (m)->wf2col[MCD_WF_C] diff --git a/maca_common/include/word.h b/maca_common/include/word.h index 30074b7606988cfcefa4400b8f35acd958ea9807..da285510d13bb7b2e9910c988070a18c26800516 100644 --- a/maca_common/include/word.h +++ b/maca_common/include/word.h @@ -26,6 +26,7 @@ typedef struct _word { #define word_get_label(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL]) #define word_get_stag(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG]) #define word_get_sent_seg(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG]) +#define word_get_letterfeat(w, L) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A+(L)]) #define word_get_A(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A]) #define word_get_B(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B]) #define word_get_C(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C]) diff --git a/maca_common/src/word.c b/maca_common/src/word.c index b6420932d994a9d343fe01651984a2dd53b87b02..952fe06d16979f4f25bea543fb18018a9bdca799 100644 --- a/maca_common/src/word.c +++ b/maca_common/src/word.c @@ -134,6 +134,29 @@ void word_print(FILE *f, word *w) fprintf(f, "%s", w->input); } +void word_debug(FILE *f, word *w, mcd *mcd_struct) { + int wf; + fprintf(f, "WORD:\n"); + for (wf=0; wf < MCD_WF_NB; wf++) { // wf in 0..36 + int wf_int = w->wf_array[wf]; // wf_array[0..36] + int col = mcd_struct->wf2col[wf]; // wf2col[0..36]; output col in 0..N (N <= 36), or -1 + if (col != -1) { + const char* colname = mcd_struct->wf_str[col]; // wf_str[0..N] + dico* d = mcd_struct->dico_array[col]; // dico_aray[0..N] + if (d == NULL) { + fprintf(f, " %s: %d\n", colname, wf_int); + } else { + const char* wf_string = dico_int2string(d, wf_int); + if (wf_string == NULL) { + fprintf(f, " %s: NULL (%d)\n", colname, wf_int); + } else { + fprintf(f, " %s: \"%s\" (%d)\n", colname, wf_string, wf_int); + } + } + } + } +} + int word_is_eos(word *w, mcd *mcd_struct) { if(w == NULL) return 0; diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index 72c2d61ab47097612cca44a7d908d4406fd2129a..c871bc7791dacf989327ad3746b317529c59da31 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -105,6 +105,7 @@ void context_general_help_message(context *ctx) fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n"); fprintf(stderr, "\t-D --maca_data_path <str> : path to the maca_data directory\n"); fprintf(stderr, "\t-L --language <str> : identifier of the language to use (default is fr)\n"); + fprintf(stderr, "\t-N --n_extracols <int> : number of extra columns to print [e.g. 3 for columns A,B,C] (default is 0)\n"); } void context_model_help_message(context *ctx){ @@ -176,7 +177,7 @@ context *context_read_options(int argc, char *argv[]) ctx->program_name = strdup(argv[0]); - static struct option long_options[22] = + static struct option long_options[23] = { {"help", no_argument, 0, 'h'}, {"verbose", no_argument, 0, 'v'}, @@ -197,6 +198,7 @@ context *context_read_options(int argc, char *argv[]) {"vocabs", required_argument, 0, 'V'}, {"language", required_argument, 0, 'L'}, {"maca_data_path", required_argument, 0, 'D'}, + {"n_extracols", required_argument, 0, 'N'}, {"root_label", required_argument, 0, 'R'}, {"f2p", required_argument, 0, 'P'}, {"traces", required_argument, 0, 'T'} @@ -266,6 +268,11 @@ context *context_read_options(int argc, char *argv[]) if (ctx->language) free(ctx->language); // libérer le default (strdup("fr") ) ctx->language = strdup(optarg); break; + case 'N': + ctx->n_extracols = atoi(optarg); + if (ctx->n_extracols < 0 || ctx->n_extracols > 26) + ctx->n_extracols = 0; + break; case 'D': ctx->maca_data_path = strdup(optarg); break; diff --git a/maca_trans_parser/src/context.h b/maca_trans_parser/src/context.h index 932e6717e28ebcec122951f69eb69ba05723869f..c743a65d256ef7698133b8a650e684557a956a88 100644 --- a/maca_trans_parser/src/context.h +++ b/maca_trans_parser/src/context.h @@ -46,6 +46,7 @@ typedef struct { int feature_cutoff; int mode; int sent_nb; + int n_extracols; float hash_ratio; int beam_width; int mvt_nb; diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c index 7c44a6b32fe89378c75722a610cbdece3a1905ba..66260a36cb81f1c884ee540e8482ecfbcb2af765 100644 --- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c +++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c @@ -36,7 +36,7 @@ void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct) } } -void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) +void print_word_buffer(config *c, context *ctx, dico *dico_labels, mcd *mcd_struct) { int i; word *w; @@ -60,9 +60,9 @@ void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) else printf("_\t"); if(word_get_sent_seg(w) == 1) - printf("1\n") ; + printf("1"); else - printf("0\n"); + printf("0"); } else{ buffer = strdup(w->input); @@ -110,9 +110,22 @@ void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) else printf("\t0"); } - printf("\n"); free(buffer); } + + int j; + for (j=0; j < ctx->n_extracols; j++) { + int int_feature = word_get_letterfeat(w, j); + int col = mcd_get_letter_col(ctx->mcd_struct, j); + if (col == -1 || mcd_struct->dico_array[col] == NULL || int_feature < 0) { + printf("\t%d", int_feature); + } else { + dico* d = mcd_struct->dico_array[col]; + const char* str_feature = dico_int2string(d, int_feature); + printf("\t%s", str_feature); + } + } + printf("\n"); } } @@ -221,7 +234,7 @@ void simple_decoder_parser_arc_eager(context *ctx) } if(!ctx->trace_mode) - print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct); + print_word_buffer(c, ctx, ctx->dico_labels, ctx->mcd_struct); config_free(c); feat_vec_free(fv);