diff --git a/maca_lemmatizer/src/maca_lemmatizer.c b/maca_lemmatizer/src/maca_lemmatizer.c index 15655bf02a1812de517eb966801da6911d03b128..76b640a267d8d635519b380a49936e8c8e85db8d 100644 --- a/maca_lemmatizer/src/maca_lemmatizer.c +++ b/maca_lemmatizer/src/maca_lemmatizer.c @@ -35,7 +35,7 @@ void maca_lemmatizer_check_options(context *ctx){ } } -char **read_fplm_file(char *fplm_filename, hash *form_pos_ht) +char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode) { char form[1000]; char pos[1000]; @@ -56,8 +56,10 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht) /* if(!strcmp(form, "d")) */ /* fprintf(stderr, "form = %s pos = %s lemma = %s\n", form, pos, lemma); */ if(fields_nb != 4){ - fprintf(stderr, "form = %s pos = %s lemma = %s\n", form, pos, lemma); - fprintf(stderr, "incorrect fplm entry, skipping it\n"); + if(debug_mode){ + fprintf(stderr, "form = %s pos = %s lemma = %s\n", form, pos, lemma); + fprintf(stderr, "incorrect fplm entry, skipping it\n"); + } continue; } strcat(form, "/"); @@ -122,7 +124,7 @@ int main(int argc, char *argv[]) else f = myfopen(ctx->conll_filename, "r"); - lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht); + lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode); /* look for a valid word */ while(fgets(buffer, 10000, f)){ diff --git a/maca_tools/src/mcf2conll.c b/maca_tools/src/mcf2conll.c index a19adc81dab31d62beab765a0625368d3eb127f9..4b300ba6aa922f180a1212ae26fb28287eb1f8b8 100644 --- a/maca_tools/src/mcf2conll.c +++ b/maca_tools/src/mcf2conll.c @@ -35,13 +35,13 @@ context *context_new(void) void context_general_help_message(context *ctx) { - fprintf(stderr, "usage: %s [options]\n", ctx->program_name); - fprintf(stderr, "Options:\n"); - fprintf(stderr, "\t-h --help : print this message\n"); - fprintf(stderr, "\t-v --verbose : activate verbose mode\n"); - fprintf(stderr, "\t-C --mcd : mcd filename\n"); - fprintf(stderr, "\t-i --mcf : mcf filename (read from stdin if absent)\n"); - fprintf(stderr, "\t-o --conll : conll filename (write to stdout if absent)\n"); + fprintf(stderr, "usage: %s [options]\n", ctx->program_name); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "\t-h --help : print this message\n"); + fprintf(stderr, "\t-v --verbose : activate verbose mode\n"); + fprintf(stderr, "\t-C --mcd : mcd filename\n"); + fprintf(stderr, "\t-i --mcf : mcf filename (read from stdin if absent)\n"); + fprintf(stderr, "\t-o --conll : conll filename (write to stdout if absent)\n"); } mcd *mcd_build_wplgfs(void) @@ -193,61 +193,63 @@ int main(int argc, char *argv[]) output_file = (ctx->conll_filename)? myfopen_no_exit(ctx->conll_filename, "w"): stdout; - do{ + do{ w = word_buffer_b0(wb); - - printf("%d\t", index); - - if(form_col != -1) - str_print_col_n(output_file, w->input, form_col); - else - fprintf(output_file, "_"); - fprintf(output_file, "\t"); - - if(lemma_col != -1) - str_print_col_n(output_file, w->input, lemma_col); - else - fprintf(output_file, "_"); - fprintf(output_file, "\t"); - - if(cpos_col != -1) - str_print_col_n(output_file, w->input, cpos_col); - else - fprintf(output_file, "_"); - fprintf(output_file, "\t"); - - if(pos_col != -1) - str_print_col_n(output_file, w->input, pos_col); - else - fprintf(output_file, "_"); - fprintf(output_file, "\t"); - - if(feats_col != -1) - str_print_col_n(output_file, w->input, feats_col); - else - fprintf(output_file, "_"); - fprintf(output_file, "\t"); - - if(gov_col) - fprintf(output_file, "%d\t", word_get_gov(w) + index); - else - fprintf(output_file, "_\t"); + if(w){ + + printf("%d\t", index); + + if(form_col != -1) + str_print_col_n(output_file, w->input, form_col); + else + fprintf(output_file, "_"); + fprintf(output_file, "\t"); + + if(lemma_col != -1) + str_print_col_n(output_file, w->input, lemma_col); + else + fprintf(output_file, "_"); + fprintf(output_file, "\t"); + + if(cpos_col != -1) + str_print_col_n(output_file, w->input, cpos_col); + else + fprintf(output_file, "_"); + fprintf(output_file, "\t"); + + if(pos_col != -1) + str_print_col_n(output_file, w->input, pos_col); + else + fprintf(output_file, "_"); + fprintf(output_file, "\t"); + + if(feats_col != -1) + str_print_col_n(output_file, w->input, feats_col); + else + fprintf(output_file, "_"); + fprintf(output_file, "\t"); + + if(gov_col) + fprintf(output_file, "%d\t", word_get_gov(w) + index); + else + fprintf(output_file, "_\t"); - if(label_col != -1) - str_print_col_n(output_file, w->input, label_col); - else - fprintf(output_file, "_"); - fprintf(output_file, "\t"); - - fprintf(output_file, "\t_\t\n"); - - if((sent_seg_col) && (word_get_sent_seg(w))){ - fprintf(output_file, "\n"); - index = 0; + if(label_col != -1) + str_print_col_n(output_file, w->input, label_col); + else + fprintf(output_file, "_"); + fprintf(output_file, "\t"); + + fprintf(output_file, "\t_\t\n"); + + if((sent_seg_col) && (word_get_sent_seg(w))){ + fprintf(output_file, "\n"); + index = 0; + } + + index ++; } - - index ++; - } while(word_buffer_move_right(wb)); + } while(word_buffer_move_right(wb)); if(ctx->conll_filename) fclose(output_file); diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index be6a90926c29f21b68dcb5c095bd368da94f3e7b..2a1941a876ccd47c459b9f0375bd40c482d516f6 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -93,7 +93,6 @@ void context_general_help_message(context *ctx) fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n"); fprintf(stderr, "\t-D --maca_data_path <str> : path to the maca_data directory\n"); fprintf(stderr, "\t-L --language <str> : identifier of the language to use (default is fr)\n"); - fprintf(stderr, "\t-S --stream : stream mode\n"); } void context_model_help_message(context *ctx){ @@ -145,7 +144,7 @@ void context_maca_data_path_help_message(context *ctx){ fprintf(stderr, "\t-D --maca_data_path : path to maca_data directory\n"); } void context_root_label_help_message(context *ctx){ - fprintf(stderr, "\t-R --root_label : name of the root label (default is \"root\")\n"); + fprintf(stderr, "\t-R --root_label <str> : name of the root label (default is \"root\")\n"); } void context_f2p_filename_help_message(context *ctx){ fprintf(stderr, "\t-P --f2p <file> : form to pos (f2p) filename\n"); @@ -159,13 +158,12 @@ context *context_read_options(int argc, char *argv[]) ctx->program_name = strdup(argv[0]); - static struct option long_options[22] = + static struct option long_options[21] = { {"help", no_argument, 0, 'h'}, {"verbose", no_argument, 0, 'v'}, {"debug", no_argument, 0, 'd'}, {"conll", no_argument, 0, 'c'}, - {"stream", no_argument, 0, 'S'}, {"model", required_argument, 0, 'm'}, {"input", required_argument, 0, 'i'}, {"iter", required_argument, 0, 'n'}, @@ -203,10 +201,6 @@ context *context_read_options(int argc, char *argv[]) case 'c': ctx->conll = 1; break; - case 'S': - ctx->stream_mode = 1; - break; - case 'm': ctx->perc_model_filename = strdup(optarg); break; diff --git a/maca_trans_parser/src/maca_trans_parser.c b/maca_trans_parser/src/maca_trans_parser.c index 43a304507aca2b817b028eeb5b4114b7e11e1c3f..0b9e785b5ffb976245abaefcd4f2b10c18cb689f 100644 --- a/maca_trans_parser/src/maca_trans_parser.c +++ b/maca_trans_parser/src/maca_trans_parser.c @@ -18,8 +18,8 @@ void decode_help_message(context *ctx) { context_general_help_message(ctx); - context_beam_help_message(ctx); - context_conll_help_message(ctx); + /* context_beam_help_message(ctx); */ + /* context_conll_help_message(ctx); */ fprintf(stderr, "INPUT\n"); context_input_help_message(ctx); context_mcd_help_message(ctx);