Select Git revision
context.c 5.62 KiB
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include "context.h"
#include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
if(ctx->input_filename) free(ctx->input_filename);
if(ctx->output_filename) free(ctx->output_filename);
if(ctx->language) free(ctx->language);
if(ctx->maca_data_path) free(ctx->maca_data_path);
if(ctx->mwe_filename) free(ctx->mwe_filename);
if(ctx->mwe_tokens_dico_filename) free(ctx->mwe_tokens_dico_filename);
if(ctx->mwe_tokens_separator) free(ctx->mwe_tokens_separator);
free(ctx);
}
context *context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->program_name = NULL;
ctx->mcd_filename = NULL;
ctx->mcd_struct = NULL;
ctx->language = strdup("fr");
ctx->maca_data_path = NULL;
ctx->form_column = -1;
ctx->input_filename = NULL;
ctx->output_filename = NULL;
ctx->mwe_filename = NULL;
ctx->mwe_tokens_dico_filename = NULL;
ctx->mwe_tokens_separator = strdup(" ");
return ctx;
}
void context_general_help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n");
}
void context_input_help_message(context *ctx){
fprintf(stderr, "\t-i --input <file> : input mcf file name\n");
}
void context_form_column_help_message(context *ctx){
fprintf(stderr, "\t-F --form_column <int> : column containing form\n");
}
void context_mcd_help_message(context *ctx){
fprintf(stderr, "\t-C --mcd <file> : multi column description file name\n");
}
void context_language_help_message(context *ctx){
fprintf(stderr, "\t-L --language : identifier of the language to use\n");
}
void context_maca_data_path_help_message(context *ctx){
fprintf(stderr, "\t-M --maca_data_path : path to maca_data directory\n");
}
void context_mwe_token_separator_help_message(context *ctx){
fprintf(stderr, "\t-s --mwe_sep <string> : multi word expression tokens separator (default is space character)\n");
}
void context_mwe_filename_help_message(context *ctx){
fprintf(stderr, "\t-M --mwe <filename> : multi word expression file\n");
}
void context_vocab_help_message(context *ctx){
fprintf(stderr, "\t-V --vocab <filename> : multi word expression tokens vocabulary file\n");
}
context *context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[13] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"input", required_argument, 0, 'i'},
{"output", required_argument, 0, 'o'},
{"mcd", required_argument, 0, 'C'},
{"language", required_argument, 0, 'L'},
{"form_column", required_argument, 0, 'F'},
{"maca_data_path", required_argument, 0, 'D'},
{"mwe", required_argument, 0, 'M'},
{"vocab", required_argument, 0, 'V'},
{"mwe_sep", required_argument, 0, 's'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdi:o:C:L:M:F:D:V:s:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'F':
ctx->form_column = atoi(optarg) - 1;
break;
case 'i':
ctx->input_filename = strdup(optarg);
break;
case 'o':
ctx->output_filename = strdup(optarg);
break;
case 'C':
ctx->mcd_filename = strdup(optarg);
break;
case 'L':
ctx->language = strdup(optarg);
break;
case 'D':
ctx->maca_data_path = strdup(optarg);
break;
case 'V':
ctx->mwe_tokens_dico_filename = strdup(optarg);
break;
case 'M':
ctx->mwe_filename = strdup(optarg);
break;
case 's':
ctx->mwe_tokens_separator = strdup(optarg);
break;
}
}
context_set_linguistic_resources_filenames(ctx);
if(ctx->mcd_filename)
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
if((ctx->mcd_filename == NULL) && (ctx->form_column == -1))
/* ctx->mcd_struct = mcd_build_conll07(); */
ctx->mcd_struct = mcd_build_wplgf();
return ctx;
}
void context_set_linguistic_resources_filenames(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
absolute_path[0] = '\0';
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else {
char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
if(!ctx->mwe_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_MWE_FILENAME);
ctx->mwe_filename = strdup(absolute_filename);
}
if(!ctx->mwe_tokens_dico_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_MWE_TOKENS_DICO_FILENAME);
ctx->mwe_tokens_dico_filename = strdup(absolute_filename);
}
}