Commit 64d47ec0 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

changed option names

parent f9233876
......@@ -33,8 +33,10 @@ typedef struct {
} mcd;
mcd *mcd_build_conll07(void);
mcd *mcd_read(char *mcd_filename);
void mcd_link_to_dico(mcd *m, dico_vec *vocabs);
mcd *mcd_build_ifpls(void);
mcd *mcd_read(char *mcd_filename, int verbose);
void mcd_link_to_dico(mcd *m, dico_vec *vocabs, int verbose);
void mcd_extract_dico_from_corpus(mcd *m, char *corpus_filename);
void mcd_free(mcd *m);
int mcd_get_code(mcd *m, char *str, int col);
......
......@@ -65,6 +65,9 @@ form2pos *form2pos_read(char *filename)
int form2pos_get_signature(form2pos *f2p, char *form)
{
/* if(form == NULL)
return -1;
else*/
return hash_get_val(f2p->h_form2signature, form);
}
......
......@@ -110,7 +110,7 @@ void mcd_extract_dico_from_corpus(mcd *m, char *corpus_filename)
/* takes as argument an mcd structure (m) and a dictionary vector (vocabs) */
/* links the vocabularies of m to vocabularies of vocabs (based on their names) */
void mcd_link_to_dico(mcd *m, dico_vec *vocabs)
void mcd_link_to_dico(mcd *m, dico_vec *vocabs, int verbose)
{
int column;
for(column=0; column < m->nb_col; column++){
......@@ -118,14 +118,14 @@ void mcd_link_to_dico(mcd *m, dico_vec *vocabs)
&& (!strcmp(m->filename[column], "_"))
&& (m->dico_array[column] == NULL)){
m->dico_array[column] = dico_vec_get_dico(vocabs, m->type_str[column]);
fprintf(stderr, "linking to dico %s\n", m->type_str[column]);
if(verbose) fprintf(stderr, "linking to dico %s\n", m->type_str[column]);
}
}
}
/* read an multi column description file and produces an mcd structure */
mcd *mcd_read(char *mcd_filename)
mcd *mcd_read(char *mcd_filename, int verbose)
{
int column;
char type[100];
......@@ -148,7 +148,7 @@ mcd *mcd_read(char *mcd_filename)
/* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */
continue;
}
fprintf(stderr, "column = %d type = %s representation = %s filename = %s\n", column, type, representation, filename);
if(verbose) fprintf(stderr, "column = %d type = %s representation = %s filename = %s\n", column, type, representation, filename);
m->type[column] = feat_type_string2int(type);
m->type_str[column] = strdup(type);
if(m->type[column] == -1){
......@@ -170,11 +170,11 @@ mcd *mcd_read(char *mcd_filename)
if(strcmp(m->filename[column], "_")){
if(m->representation[column] == MCD_REPRESENTATION_EMB){
fprintf(stderr, "loading word embedding %s\n", m->filename[column]);
if(verbose) fprintf(stderr, "loading word embedding %s\n", m->filename[column]);
m->word_emb_array[column] = word_emb_load(m->filename[column]);
}
else if(m->representation[column] == MCD_REPRESENTATION_VOCAB){
fprintf(stderr, "loading dico %s\n", m->filename[column]);
if(verbose) fprintf(stderr, "loading dico %s\n", m->filename[column]);
m->dico_array[column] = dico_read(m->filename[column], 0.5);
}
}
......@@ -190,46 +190,97 @@ mcd *mcd_build_conll07(void)
m->type[0]=FEAT_TYPE_INDEX;
m->type_str[0]=strdup("INDEX");
m->representation[0]= MCD_REPRESENTATION_INT;
m->filename[0] = strdup("_");
m->type2col[FEAT_TYPE_INDEX] = 0;
m->type[1]=FEAT_TYPE_FORM;
m->type_str[1]=strdup("FORM");
m->representation[1]= MCD_REPRESENTATION_VOCAB;
m->filename[1] = strdup("_");
m->type2col[FEAT_TYPE_FORM] = 1;
m->type[2]=FEAT_TYPE_LEMMA;
m->type_str[2]=strdup("LEMMA");
m->representation[2]= MCD_REPRESENTATION_VOCAB;
m->filename[2] = strdup("_");
m->type2col[FEAT_TYPE_LEMMA] = 2;
m->type[3]=FEAT_TYPE_CPOS;
m->type_str[3]=strdup("CPOS");
m->representation[3]= MCD_REPRESENTATION_VOCAB;
m->filename[3] = strdup("_");
m->type2col[FEAT_TYPE_CPOS] = 3;
m->type[4]=FEAT_TYPE_POS;
m->type_str[4]=strdup("POS");
m->representation[4]= MCD_REPRESENTATION_VOCAB;
m->filename[4] = strdup("_");
m->type2col[FEAT_TYPE_POS] = 4;
m->type[5]=FEAT_TYPE_FEATS;
m->type_str[5]=strdup("FEATS");
m->representation[5]= MCD_REPRESENTATION_VOCAB;
m->filename[5] = strdup("_");
m->type2col[FEAT_TYPE_FEATS] = 5;
m->type[6]=FEAT_TYPE_GOV;
m->type_str[6]=strdup("GOV");
m->representation[6]= MCD_REPRESENTATION_INT;
m->filename[6] = strdup("_");
m->type2col[FEAT_TYPE_GOV] = 6;
m->type[7]=FEAT_TYPE_LABEL;
m->type_str[7]=strdup("LABEL");
m->representation[7]= MCD_REPRESENTATION_VOCAB;
m->filename[7] = strdup("_");
m->type2col[FEAT_TYPE_LABEL] = 7;
return m;
}
mcd *mcd_build_ifpls(void)
{
mcd *m = mcd_new(6);
m->type[0]=FEAT_TYPE_INDEX;
m->type_str[0]=strdup("INDEX");
m->representation[0]= MCD_REPRESENTATION_INT;
m->filename[0] = strdup("_");
m->type2col[FEAT_TYPE_INDEX] = 0;
m->type[1]=FEAT_TYPE_FORM;
m->type_str[1]=strdup("FORM");
m->representation[1]= MCD_REPRESENTATION_VOCAB;
m->filename[1] = strdup("_");
m->type2col[FEAT_TYPE_FORM] = 1;
m->type[2]=FEAT_TYPE_POS;
m->type_str[2]=strdup("POS");
m->representation[2]= MCD_REPRESENTATION_VOCAB;
m->filename[2] = strdup("_");
m->type2col[FEAT_TYPE_POS] = 2;
m->type[3]=FEAT_TYPE_LEMMA;
m->type_str[3]=strdup("LEMMA");
m->representation[3]= MCD_REPRESENTATION_VOCAB;
m->filename[3] = strdup("_");
m->type2col[FEAT_TYPE_LEMMA] = 3;
m->type[4]=FEAT_TYPE_GOV;
m->type_str[4]=strdup("GOV");
m->representation[4]= MCD_REPRESENTATION_INT;
m->filename[4] = strdup("_");
m->type2col[FEAT_TYPE_GOV] = 4;
m->type[5]=FEAT_TYPE_LABEL;
m->type_str[5]=strdup("LABEL");
m->representation[5]= MCD_REPRESENTATION_VOCAB;
m->filename[5] = strdup("_");
m->type2col[FEAT_TYPE_LABEL] = 5;
return m;
}
mcd *mcd_read_old(char *mcd_filename, char *corpus_filename, dico_vec *vocabs)
{
int column;
......
......@@ -33,8 +33,8 @@ context *context_new(void)
ctx->mcd_struct = NULL;
ctx->language = strdup("fr");
ctx->maca_data_path = NULL;
ctx->form_column = -1;
ctx->pos_column = -1;
ctx->form_column = 0;
ctx->pos_column = 1;
return ctx;
}
......@@ -123,7 +123,6 @@ context *context_read_options(int argc, char *argv[])
break;
case 'm':
ctx->mcd_filename = strdup(optarg);
ctx->mcd_struct = mcd_read(ctx->mcd_filename);
break;
case 'C':
ctx->language = strdup(optarg);
......@@ -136,6 +135,11 @@ context *context_read_options(int argc, char *argv[])
context_set_linguistic_resources_filenames(ctx);
if(ctx->mcd_filename)
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
if((ctx->mcd_filename == NULL) && ((ctx->form_column == -1) || (ctx->pos_column == -1)))
ctx->mcd_struct = mcd_build_conll07();
......
......@@ -18,7 +18,6 @@ void cff_cutoff_help_message(context *ctx)
context_cutoff_help_message(ctx);
context_cff_help_message(ctx);
fprintf(stderr, "INPUT/OUTPUT\n");
context_alphabet_help_message(ctx);
}
void cff_cutoff_check_options(context *ctx)
......
......@@ -10,11 +10,9 @@
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
if(ctx->conll_filename) free(ctx->conll_filename);
if(ctx->input_filename) free(ctx->input_filename);
if(ctx->perc_model_filename) free(ctx->perc_model_filename);
if(ctx->dnn_model_filename) free(ctx->dnn_model_filename);
if(ctx->dico_features_filename) free(ctx->dico_features_filename);
if(ctx->dico_classes_filename) free(ctx->dico_classes_filename);
if(ctx->cff_filename) free(ctx->cff_filename);
if(ctx->fann_filename) free(ctx->fann_filename);
if(ctx->mcd_filename) free(ctx->mcd_filename);
......@@ -46,11 +44,9 @@ context *context_new(void)
ctx->verbose = 0;
ctx->program_name = NULL;
ctx->conll_filename = NULL;
ctx->input_filename = NULL;
ctx->perc_model_filename = NULL;
ctx->dnn_model_filename = NULL;
ctx->dico_features_filename = NULL;
ctx->dico_classes_filename = NULL;
ctx->cff_filename = NULL;
ctx->fann_filename = NULL;
ctx->stag_desc_filename = NULL;
......@@ -82,8 +78,8 @@ context *context_new(void)
ctx->hidden_neurons_nb = 100;
ctx->stream_mode = 0;
ctx->form_column = -1;
ctx->conll = 0;
ctx->ifpls = 1;
return ctx;
}
......@@ -95,11 +91,16 @@ void context_general_help_message(context *ctx)
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n");
fprintf(stderr, "\t-D --maca_data_path <str> : path to the maca_data directory\n");
fprintf(stderr, "\t-L --language <str> : identifier of the language to use (default is fr)\n");
}
void context_model_help_message(context *ctx){
fprintf(stderr, "\t-m --model <file> : model file name\n");
}
void context_input_help_message(context *ctx){
fprintf(stderr, "\t-i --input <file> : input file name\n");
}
void context_iterations_help_message(context *ctx){
fprintf(stderr, "\t-n --iter <int> : number of iterations (default is 4)\n");
}
......@@ -107,22 +108,16 @@ void context_cff_help_message(context *ctx){
fprintf(stderr, "\t-x --cff <file> : CFF format file name\n");
}
void context_fann_help_message(context *ctx){
fprintf(stderr, "\t-y --fann <file> : FANN format file name\n");
}
void context_d_features_help_message(context *ctx){
fprintf(stderr, "\t-f --df <file> : features dictionnary file name\n");
}
void context_d_classes_help_message(context *ctx){
fprintf(stderr, "\t-c --dc <file> : classes dictionnary file name\n");
fprintf(stderr, "\t-f --fann <file> : FANN format file name\n");
}
void context_conll_help_message(context *ctx){
fprintf(stderr, "\t-i --conll <file> : conll file name\n");
fprintf(stderr, "\t-c --conll : input is in conll07 format\n");
}
void context_cutoff_help_message(context *ctx){
fprintf(stderr, "\t-u --cutoff <int> : cutoff value\n");
}
void context_mode_help_message(context *ctx){
fprintf(stderr, "\t-o --mode TEST|TRAIN\n");
fprintf(stderr, "\t-M --mode : TEST|TRAIN\n");
}
void context_beam_help_message(context *ctx){
fprintf(stderr, "\t-b --beam <int> : beam width (default is 1)\n");
......@@ -130,18 +125,6 @@ void context_beam_help_message(context *ctx){
void context_sent_nb_help_message(context *ctx){
fprintf(stderr, "\t-s --sent_nb <int> : number of sentences to process (default is 1000000)\n");
}
void context_alphabet_help_message(context *ctx){
fprintf(stderr, "\t-a --alphabet <file> : name of the file containing the different dictionaries\n");
}
void context_dnn_model_help_message(context *ctx){
fprintf(stderr, "\t-M --dnn_model <file> : FANN model file\n");
}
void context_hidden_neurons_nb_help_message(context *ctx){
fprintf(stderr, "\t-H --hidden_neurons_nb <int> : number of neurons in the hidden layer (default is 100)\n");
}
void context_stag_desc_filename_help_message(context *ctx){
fprintf(stderr, "\t-S --stag_file <file> : name of the file containing the stag description\n");
}
void context_mcd_help_message(context *ctx){
fprintf(stderr, "\t-C --mcd <file> : multi column description file name\n");
}
......@@ -149,27 +132,22 @@ void context_features_model_help_message(context *ctx){
fprintf(stderr, "\t-F --feat_model <file> : feature model file name\n");
}
void context_stream_help_message(context *ctx){
fprintf(stderr, "\t-T --stream (0|1) : steam mode\n");
fprintf(stderr, "\t-S --stream : steam mode\n");
}
void context_vocabs_help_message(context *ctx){
fprintf(stderr, "\t-V --vocabs : vocabularies file\n");
fprintf(stderr, "\t-V --vocabs <file> : vocabularies file\n");
}
void context_language_help_message(context *ctx){
fprintf(stderr, "\t-X --language : identifier of the language to use\n");
fprintf(stderr, "\t-L --language : identifier of the language to use\n");
}
void context_maca_data_path_help_message(context *ctx){
fprintf(stderr, "\t-Y --maca_data_path : path to the maca_data directory\n");
fprintf(stderr, "\t-D --maca_data_path : path to maca_data directory\n");
}
void context_root_label_help_message(context *ctx){
fprintf(stderr, "\t-R --root_label : name of the root label (default is \"root\")\n");
}
void context_f2p_filename_help_message(context *ctx){
fprintf(stderr, "\t-P --f2p : form to pos (f2p) filename\n");
fprintf(stderr, "\t-P --f2p <file> : form to pos (f2p) filename\n");
}
context *context_read_options(int argc, char *argv[])
......@@ -180,75 +158,58 @@ context *context_read_options(int argc, char *argv[])
ctx->program_name = strdup(argv[0]);
static struct option long_options[28] =
static struct option long_options[21] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"conll", no_argument, 0, 'c'},
{"stream", no_argument, 0, 'S'},
{"model", required_argument, 0, 'm'},
{"df", required_argument, 0, 'f'},
{"dc", required_argument, 0, 'c'},
{"conll", required_argument, 0, 'i'},
{"input", required_argument, 0, 'i'},
{"iter", required_argument, 0, 'n'},
{"cff", required_argument, 0, 'x'},
{"cutoff", required_argument, 0, 'u'},
{"hratio", required_argument, 0, 'r'},
{"mode", required_argument, 0, 'o'},
{"mode", required_argument, 0, 'M'},
{"beam", required_argument, 0, 'b'},
{"fann", required_argument, 0, 'y'},
{"fann", required_argument, 0, 'f'},
{"sent_nb", required_argument, 0, 's'},
/* {"alphabet", required_argument, 0, 'a'}, */
{"dnn_model", required_argument, 0, 'M'},
{"hidden_neurons_nb", required_argument, 0, 'H'},
{"stag_file", required_argument, 0, 'S'},
{"mcd", required_argument, 0, 'C'},
{"feat_model", required_argument, 0, 'F'},
{"vocabs", required_argument, 0, 'V'},
{"stream", required_argument, 0, 'T'},
{"language", required_argument, 0, 'X'},
{"maca_data_path", required_argument, 0, 'Y'},
{"language", required_argument, 0, 'L'},
{"maca_data_path", required_argument, 0, 'D'},
{"root_label", required_argument, 0, 'R'},
{"form_col", required_argument, 0, 'O'},
{"f2p", required_argument, 0, 'P'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "dhvT:m:f:c:i:n:x:u:r:o:b:y:s:M:H:S:C:F:V:X:Y:R:O:P:", long_options, &option_index)) != -1){
while ((c = getopt_long (argc, argv, "hvcSm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'T':
ctx->stream_mode = atoi(optarg);
break;
case 'y':
ctx->fann_filename = strdup(optarg);
break;
case 'n':
ctx->iteration_nb = atoi(optarg);
case 'c':
ctx->conll = 1;
break;
case 'i':
ctx->conll_filename = strdup(optarg);
case 'S':
ctx->stream_mode = 1;
break;
case 'm':
ctx->perc_model_filename = strdup(optarg);
break;
case 'M':
ctx->dnn_model_filename = strdup(optarg);
break;
case 'f':
ctx->dico_features_filename = strdup(optarg);
case 'i':
ctx->input_filename = strdup(optarg);
break;
case 'c':
ctx->dico_classes_filename = strdup(optarg);
case 'n':
ctx->iteration_nb = atoi(optarg);
break;
case 'x':
ctx->cff_filename = strdup(optarg);
......@@ -256,27 +217,23 @@ context *context_read_options(int argc, char *argv[])
case 'u':
ctx->feature_cutoff = atoi(optarg);
break;
case 'b':
ctx->beam_width = atoi(optarg);
break;
case 'r':
ctx->hash_ratio = atof(optarg);
break;
case 'o':
case 'M':
ctx->mode = (!strcmp(optarg, "TEST"))? TEST_MODE : TRAIN_MODE;
break;
case 's':
ctx->sent_nb = atoi(optarg);
case 'b':
ctx->beam_width = atoi(optarg);
break;
case 'H':
ctx->hidden_neurons_nb = atoi(optarg);
case 'f':
ctx->fann_filename = strdup(optarg);
break;
case 'S':
ctx->stag_desc_filename = strdup(optarg);
case 's':
ctx->sent_nb = atoi(optarg);
break;
case 'C':
ctx->mcd_filename = strdup(optarg);
ctx->mcd_struct = mcd_read(ctx->mcd_filename);
break;
case 'F':
ctx->features_model_filename = strdup(optarg);
......@@ -284,18 +241,15 @@ context *context_read_options(int argc, char *argv[])
case 'V':
ctx->vocabs_filename = strdup(optarg);
break;
case 'X':
case 'L':
ctx->language = strdup(optarg);
break;
case 'Y':
case 'D':
ctx->maca_data_path = strdup(optarg);
break;
case 'R':
ctx->root_label = strdup(optarg);
break;
case 'O':
ctx->form_column = atoi(optarg);
break;
case 'P':
ctx->f2p_filename = strdup(optarg);
ctx->f2p = form2pos_read(ctx->f2p_filename);
......@@ -303,32 +257,13 @@ context *context_read_options(int argc, char *argv[])
}
}
/* if(ctx->mcd_filename && ctx->conll_filename){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename);
ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
}*/
/*
if(ctx->features_model && ctx->mcd_struct)
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
*/
/* if the form column has been set by user, change it in the mcd file */
/* if(ctx->form_column != -1){
ctx->mcd_struct = mcd_new(ctx->form_column + 1);
mcd_set_form_col(ctx->mcd_struct, ctx->form_column);
ctx->mcd_struct->representation[ctx->form_column] = MCD_REPRESENTATION_VOCAB;
ctx->mcd_struct->filename[ctx->form_column] = strdup("_");
ctx->mcd_struct->dico_array[ctx->form_column] = NULL;
ctx->mcd_struct->type_str[ctx->form_column] = strdup("FORM");
}*/
if(ctx->mcd_struct == NULL){
if(ctx->mcd_filename)
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
else
if(ctx->conll)
ctx->mcd_struct = mcd_build_conll07();
}
else
ctx->mcd_struct = mcd_build_ifpls();
return ctx;
}
......
......@@ -25,11 +25,9 @@
typedef struct {
int help;
char *program_name;
char *conll_filename;
char *input_filename;
char *perc_model_filename;
char *dnn_model_filename;
char *dico_features_filename;
char *dico_classes_filename;
char *cff_filename;
char *fann_filename;
char *stag_desc_filename;
......@@ -57,8 +55,9 @@ typedef struct {
char *maca_data_path;
char *language;
char *root_label;
int form_column;
form2pos *f2p;
int conll;
int ifpls;
} context;
context *context_new(void);
......@@ -76,21 +75,21 @@ void context_cutoff_help_message(context *ctx);
void context_mode_help_message(context *ctx);
void context_beam_help_message(context *ctx);
void context_sent_nb_help_message(context *ctx);
void context_alphabet_help_message(context *ctx);
void context_dnn_model_help_message(context *ctx);
void context_hidden_neurons_nb_help_message(context *ctx);
void context_stag_desc_filename_help_message(context *ctx);
void context_input_filename_help_message(context *ctx);
void context_mcd_help_message(context *ctx);
void context_features_model_help_message(context *ctx);
void context_vocabs_help_message(context *ctx);
void context_load_alphabets(context *ctx);
void context_print_alphabets(context *ctx);
void context_language_help_message(context *ctx);
void context_maca_data_path_help_message(context *ctx);
void context_f2p_filename_help_message(context *ctx);
void context_conll_help_message(context *ctx);
void context_ifpls_help_message(context *ctx);
void context_input_help_message(context *ctx);
#endif
......@@ -18,15 +18,13 @@ void decode_help_message(context *ctx)
{
context_general_help_message(ctx);
context_beam_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_input_help_message(ctx);
context_mcd_help_message(ctx);
context_model_help_message(ctx);
context_vocabs_help_message(ctx);
context_features_model_help_message(ctx);
context_language_help_message(ctx);
context_maca_data_path_help_message(ctx);
}
void decode_check_options(context *ctx){
......@@ -83,29 +81,25 @@ void set_linguistic_resources_filenames_parser(context *ctx)
ctx->features_model_filename = strdup(absolute_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);