diff --git a/maca_morpho/CMakeLists.txt b/maca_morpho/CMakeLists.txt index a45cdb404d593c735b80db2dfff156edad8793b0..593b4edd7f7bf512cef3d0d65062b9090938b52f 100644 --- a/maca_morpho/CMakeLists.txt +++ b/maca_morpho/CMakeLists.txt @@ -1,7 +1,10 @@ set(SOURCES - src/feat_fct.c - src/context.c + src/maca_morpho_feat_fct.c + src/maca_morpho_context.c src/vectorize.c + src/fplm2cff_fct.c + src/predict_fct.c + src/fplm_fct.c ) @@ -28,3 +31,8 @@ target_link_libraries(predict maca_common) target_link_libraries(predict maca_morpho) install (TARGETS predict DESTINATION bin) +add_executable(fplm2train_test ./src/fplm2train_test.c) +target_link_libraries(fplm2train_test perceptron) +target_link_libraries(fplm2train_test maca_common) +target_link_libraries(fplm2train_test maca_morpho) +install (TARGETS fplm2train_test DESTINATION bin) diff --git a/maca_morpho/src/context.c b/maca_morpho/src/context.c deleted file mode 100644 index 49d8a4cf58bd2a414e97c87c3db393561f48e560..0000000000000000000000000000000000000000 --- a/maca_morpho/src/context.c +++ /dev/null @@ -1,166 +0,0 @@ -#include<stdlib.h> -#include<stdio.h> -#include<string.h> -#include<unistd.h> -#include<getopt.h> -#include "context.h" -#include "util.h" - - -void context_set_linguistic_resources_filenames(context *ctx); - -void context_free(context *ctx) -{ - if(ctx->program_name) free(ctx->program_name); - if(ctx->fplm_filename) free(ctx->fplm_filename); - if(ctx->cfw_filename) free(ctx->cfw_filename); - if(ctx->language) free(ctx->language); - if(ctx->maca_data_path) free(ctx->maca_data_path); - free(ctx); -} - -context *context_new(void) -{ - context *ctx = (context *)memalloc(sizeof(context)); - - ctx->help = 0; - ctx->verbose = 0; - ctx->debug_mode = 0; - ctx->program_name = NULL; - ctx->fplm_filename = NULL; - ctx->language = strdup("fr"); - ctx->maca_data_path = NULL; - ctx->features_filename = NULL; - ctx->cfw_filename = NULL; - return ctx; -} - -void context_general_help_message(context *ctx) -{ - fprintf(stderr, "usage: %s [options]\n", ctx->program_name); - fprintf(stderr, "Options:\n"); - fprintf(stderr, "\t-h --help : print this message\n"); - fprintf(stderr, "\t-v --verbose : activate verbose mode\n"); - fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n"); -} - -void context_fplm_help_message(context *ctx){ - fprintf(stderr, "\t-f --fplm <file> : fplm (form pos lemma morpho) file\n"); -} - -void context_language_help_message(context *ctx){ - fprintf(stderr, "\t-L --language : identifier of the language to use\n"); -} - -void context_maca_data_path_help_message(context *ctx){ - fprintf(stderr, "\t-M --maca_data_path : path to maca_data directory\n"); -} - -void context_fm_help_message(context *ctx){ - fprintf(stderr, "\t-F --fm <file> : feature model file name\n"); -} - -void context_features_filename_help_message(context *ctx){ - fprintf(stderr, "\t-x --feat <file> : features dictionary file name\n"); -} - -void context_weights_matrix_filename_help_message(context *ctx){ - fprintf(stderr, "\t-w --weights <file> : weight matrix (cfw) filename\n"); -} - -void context_features_model_help_message(context *ctx){ - fprintf(stderr, "\t-F --feat_model <file> : feature model file name\n"); -} - -context *context_read_options(int argc, char *argv[]) -{ - int c; - int option_index = 0; - context *ctx = context_new(); - - ctx->program_name = strdup(argv[0]); - - static struct option long_options[10] = - { - {"help", no_argument, 0, 'h'}, - {"verbose", no_argument, 0, 'v'}, - {"debug", no_argument, 0, 'd'}, - {"mcd", required_argument, 0, 'C'}, - {"language", required_argument, 0, 'L'}, - {"fplm", required_argument, 0, 'f'}, - {"maca_data_path", required_argument, 0, 'D'}, - {"fm", required_argument, 0, 'F'}, - {"feat", required_argument, 0, 'x'}, - {"weights", required_argument, 0, 'w'} - }; - optind = 0; - opterr = 0; - - while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:w:", long_options, &option_index)) != -1){ - switch (c) - { - case 'd': - ctx->debug_mode = 1; - break; - case 'h': - ctx->help = 1; - break; - case 'v': - ctx->verbose = 1; - break; - case 'f': - ctx->fplm_filename = strdup(optarg); - break; - case 'L': - ctx->language = strdup(optarg); - break; - case 'D': - ctx->maca_data_path = strdup(optarg); - break; - case 'F': - ctx->fm_filename = strdup(optarg); - break; - case 'x': - ctx->features_filename = strdup(optarg); - break; - case 'w': - ctx->cfw_filename = strdup(optarg); - break; - } - } - - context_set_linguistic_resources_filenames(ctx); - - return ctx; -} - -void context_set_linguistic_resources_filenames(context *ctx) -{ - char absolute_path[500]; - char absolute_filename[500]; - - absolute_path[0] = '\0'; - - if(ctx->maca_data_path) - strcat(absolute_path, ctx->maca_data_path); - else { - char *e = getenv("MACAON_DIR"); - if (e != NULL) { - strcat(absolute_path, e); - } else { - fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n"); - } - } - - - strcat(absolute_path, "/"); - strcat(absolute_path, ctx->language); - strcat(absolute_path, "/bin/"); - - if(!ctx->fplm_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_FPLM_FILENAME); - ctx->fplm_filename = strdup(absolute_filename); - } - -} diff --git a/maca_morpho/src/context.h b/maca_morpho/src/context.h deleted file mode 100644 index c1789a54631e3bcc3ba695573dc1cf784e177f32..0000000000000000000000000000000000000000 --- a/maca_morpho/src/context.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __MACA_MORPHO_CONTEXT__ -#define __MACA_MORPHO_CONTEXT__ - -#include "mcd.h" -#include <stdlib.h> - -#define DEFAULT_FPLM_FILENAME "fplm" - - - -typedef struct { - int help; - int verbose; - int debug_mode; - char *program_name; - char *fplm_filename; - char *language; - char *maca_data_path; - char *fm_filename; - char *features_filename; - char *cfw_filename; -} context; - - - -context *context_new(void); -void context_free(context *ctx); - -context *context_read_options(int argc, char *argv[]); -void context_general_help_message(context *ctx); -void context_language_help_message(context *ctx); -void context_fplm_help_message(context *ctx); -void context_maca_data_path_help_message(context *ctx); -void context_features_filename_help_message(context *ctx); -void context_weights_matrix_filename_help_message(context *ctx); -void context_features_model_help_message(context *ctx); -#endif diff --git a/maca_morpho/src/feat_fct.c b/maca_morpho/src/feat_fct.c deleted file mode 100644 index 0c7ddcbfdd0e7fb9274e9b8682b3c5ec207c8598..0000000000000000000000000000000000000000 --- a/maca_morpho/src/feat_fct.c +++ /dev/null @@ -1,19 +0,0 @@ -#include<stdio.h> -#include<stdlib.h> -#include<string.h> -#include"feat_lib.h" - - - -int p1(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 1];} -int p2(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 2];} - -feat_lib *feat_lib_build(void) -{ - feat_lib *fl = feat_lib_new(); - - feat_lib_add(fl, 1, (char *)"p1", p1); - feat_lib_add(fl, 1, (char *)"p2", p2); - return fl; -} - diff --git a/maca_morpho/src/fplm2cff.c b/maca_morpho/src/fplm2cff.c index ec346bc01583e008d36d26034bf589a6b54bd9cc..e0321366447c6fcc32e5366ba70ec9b405d15c44 100644 --- a/maca_morpho/src/fplm2cff.c +++ b/maca_morpho/src/fplm2cff.c @@ -1,42 +1,14 @@ -#include <stdio.h> #include <stdlib.h> -#include <string.h> - -#include "context.h" -#include "feat_model.h" -#include "feat_vec.h" -#include "dico.h" -#include "util.h" -#include "vectorize.h" - +#include <stdio.h> +#include "fplm2cff.h" int main(int argc, char *argv[]) { - context *ctx = context_read_options(argc, argv); - if(ctx->help){ - context_general_help_message(ctx); - context_language_help_message(ctx); - context_fplm_help_message(ctx); - context_maca_data_path_help_message(ctx); - context_features_filename_help_message(ctx); - context_features_model_help_message(ctx); - exit(1); - } - feat_vec *fv = feat_vec_new(10); - dico *dico_features = dico_new("dico_features", 1000); - feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); - char form[100]; - while(strcmp(form, "end")){ - fscanf(stdin, "%s", form); - printf("form = %s\n", form); - form2fv(form, fv, fm, dico_features, ADD_MODE); -/* void feat_vec_print_string(feat_vec *fv, dico *dico_features); */ - feat_vec_print(stdout, fv); - } - /* dico_print_fh(stdout, dico_features); */ - if(ctx->features_filename) - dico_print(ctx->features_filename, dico_features); - - - + context *ctx = context_read_options(argc, argv); + if(ctx->help) + fplm2cff_help_message(ctx); + create_cff(ctx); + printf("cff.txt has been generated in the Files directory.\n"); + return 0; } + diff --git a/maca_morpho/src/predict.c b/maca_morpho/src/predict.c index c694f048b3b16000e9b25b547e9b507bea9214ec..9fc8a1ce121a3fda1d6dda33cca453047b8cb82b 100644 --- a/maca_morpho/src/predict.c +++ b/maca_morpho/src/predict.c @@ -1,53 +1,14 @@ -#include <stdio.h> #include <stdlib.h> -#include <string.h> - -#include "context.h" -#include "feat_model.h" -#include "feat_vec.h" -#include "dico.h" -#include "util.h" -#include "vectorize.h" -#include "feature_table.h" - -void predict_help_message(context *ctx) -{ - context_general_help_message(ctx); - context_language_help_message(ctx); - context_fplm_help_message(ctx); - context_maca_data_path_help_message(ctx); - context_features_filename_help_message(ctx); - context_weights_matrix_filename_help_message(ctx); - context_features_model_help_message(ctx); - exit(1); -} +#include <stdio.h> +#include "predict.h" int main(int argc, char *argv[]) { - context *ctx = context_read_options(argc, argv); - if(ctx->help) predict_help_message(ctx); - feature_table *cfw = feature_table_load(ctx->cfw_filename, ctx->verbose); - feat_vec *fv = feat_vec_new(10); - dico *dico_features = dico_read(ctx->features_filename, 0.5); - feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); - char form[100]; - int class; - float max; - - while(strcmp(form, "end")){ - fscanf(stdin, "%s", form); - printf("form = %s\n", form); - form2fv(form, fv, fm, dico_features, LOOKUP_MODE); - class = feature_table_argmax(fv, cfw, &max); - feat_vec_print(stdout, fv); - printf("class = %d\n", class); - - } - - if(ctx->features_filename) - dico_print(ctx->features_filename, dico_features); - - - + context *ctx = context_read_options(argc, argv); + if(ctx->help) + predict_help_message(ctx); + create_predictions_file(ctx); + printf("prediction.txt has been generated in the Files directory.\n"); + return 0; }