From 121525d1226c1972a4fafa4db5d9a7f65f0ba969 Mon Sep 17 00:00:00 2001 From: Marjorie Armando <marjorie.ARMANDO.1@etu.univ-amu.fr> Date: Mon, 17 Apr 2017 23:09:53 +0200 Subject: [PATCH] generate train and test files, generate cff, predict test's forms' classes --- maca_morpho/CMakeLists.txt | 12 ++- maca_morpho/src/context.c | 166 ------------------------------------- maca_morpho/src/context.h | 37 --------- maca_morpho/src/feat_fct.c | 19 ----- maca_morpho/src/fplm2cff.c | 46 ++-------- maca_morpho/src/predict.c | 55 ++---------- 6 files changed, 27 insertions(+), 308 deletions(-) delete mode 100644 maca_morpho/src/context.c delete mode 100644 maca_morpho/src/context.h delete mode 100644 maca_morpho/src/feat_fct.c diff --git a/maca_morpho/CMakeLists.txt b/maca_morpho/CMakeLists.txt index a45cdb4..593b4ed 100644 --- a/maca_morpho/CMakeLists.txt +++ b/maca_morpho/CMakeLists.txt @@ -1,7 +1,10 @@ set(SOURCES - src/feat_fct.c - src/context.c + src/maca_morpho_feat_fct.c + src/maca_morpho_context.c src/vectorize.c + src/fplm2cff_fct.c + src/predict_fct.c + src/fplm_fct.c ) @@ -28,3 +31,8 @@ target_link_libraries(predict maca_common) target_link_libraries(predict maca_morpho) install (TARGETS predict DESTINATION bin) +add_executable(fplm2train_test ./src/fplm2train_test.c) +target_link_libraries(fplm2train_test perceptron) +target_link_libraries(fplm2train_test maca_common) +target_link_libraries(fplm2train_test maca_morpho) +install (TARGETS fplm2train_test DESTINATION bin) diff --git a/maca_morpho/src/context.c b/maca_morpho/src/context.c deleted file mode 100644 index 49d8a4c..0000000 --- a/maca_morpho/src/context.c +++ /dev/null @@ -1,166 +0,0 @@ -#include<stdlib.h> -#include<stdio.h> -#include<string.h> -#include<unistd.h> -#include<getopt.h> -#include "context.h" -#include "util.h" - - -void context_set_linguistic_resources_filenames(context *ctx); - -void context_free(context *ctx) -{ - if(ctx->program_name) free(ctx->program_name); - if(ctx->fplm_filename) free(ctx->fplm_filename); - if(ctx->cfw_filename) free(ctx->cfw_filename); - if(ctx->language) free(ctx->language); - if(ctx->maca_data_path) free(ctx->maca_data_path); - free(ctx); -} - -context *context_new(void) -{ - context *ctx = (context *)memalloc(sizeof(context)); - - ctx->help = 0; - ctx->verbose = 0; - ctx->debug_mode = 0; - ctx->program_name = NULL; - ctx->fplm_filename = NULL; - ctx->language = strdup("fr"); - ctx->maca_data_path = NULL; - ctx->features_filename = NULL; - ctx->cfw_filename = NULL; - return ctx; -} - -void context_general_help_message(context *ctx) -{ - fprintf(stderr, "usage: %s [options]\n", ctx->program_name); - fprintf(stderr, "Options:\n"); - fprintf(stderr, "\t-h --help : print this message\n"); - fprintf(stderr, "\t-v --verbose : activate verbose mode\n"); - fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n"); -} - -void context_fplm_help_message(context *ctx){ - fprintf(stderr, "\t-f --fplm <file> : fplm (form pos lemma morpho) file\n"); -} - -void context_language_help_message(context *ctx){ - fprintf(stderr, "\t-L --language : identifier of the language to use\n"); -} - -void context_maca_data_path_help_message(context *ctx){ - fprintf(stderr, "\t-M --maca_data_path : path to maca_data directory\n"); -} - -void context_fm_help_message(context *ctx){ - fprintf(stderr, "\t-F --fm <file> : feature model file name\n"); -} - -void context_features_filename_help_message(context *ctx){ - fprintf(stderr, "\t-x --feat <file> : features dictionary file name\n"); -} - -void context_weights_matrix_filename_help_message(context *ctx){ - fprintf(stderr, "\t-w --weights <file> : weight matrix (cfw) filename\n"); -} - -void context_features_model_help_message(context *ctx){ - fprintf(stderr, "\t-F --feat_model <file> : feature model file name\n"); -} - -context *context_read_options(int argc, char *argv[]) -{ - int c; - int option_index = 0; - context *ctx = context_new(); - - ctx->program_name = strdup(argv[0]); - - static struct option long_options[10] = - { - {"help", no_argument, 0, 'h'}, - {"verbose", no_argument, 0, 'v'}, - {"debug", no_argument, 0, 'd'}, - {"mcd", required_argument, 0, 'C'}, - {"language", required_argument, 0, 'L'}, - {"fplm", required_argument, 0, 'f'}, - {"maca_data_path", required_argument, 0, 'D'}, - {"fm", required_argument, 0, 'F'}, - {"feat", required_argument, 0, 'x'}, - {"weights", required_argument, 0, 'w'} - }; - optind = 0; - opterr = 0; - - while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:w:", long_options, &option_index)) != -1){ - switch (c) - { - case 'd': - ctx->debug_mode = 1; - break; - case 'h': - ctx->help = 1; - break; - case 'v': - ctx->verbose = 1; - break; - case 'f': - ctx->fplm_filename = strdup(optarg); - break; - case 'L': - ctx->language = strdup(optarg); - break; - case 'D': - ctx->maca_data_path = strdup(optarg); - break; - case 'F': - ctx->fm_filename = strdup(optarg); - break; - case 'x': - ctx->features_filename = strdup(optarg); - break; - case 'w': - ctx->cfw_filename = strdup(optarg); - break; - } - } - - context_set_linguistic_resources_filenames(ctx); - - return ctx; -} - -void context_set_linguistic_resources_filenames(context *ctx) -{ - char absolute_path[500]; - char absolute_filename[500]; - - absolute_path[0] = '\0'; - - if(ctx->maca_data_path) - strcat(absolute_path, ctx->maca_data_path); - else { - char *e = getenv("MACAON_DIR"); - if (e != NULL) { - strcat(absolute_path, e); - } else { - fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n"); - } - } - - - strcat(absolute_path, "/"); - strcat(absolute_path, ctx->language); - strcat(absolute_path, "/bin/"); - - if(!ctx->fplm_filename){ - strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, DEFAULT_FPLM_FILENAME); - ctx->fplm_filename = strdup(absolute_filename); - } - -} diff --git a/maca_morpho/src/context.h b/maca_morpho/src/context.h deleted file mode 100644 index c1789a5..0000000 --- a/maca_morpho/src/context.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __MACA_MORPHO_CONTEXT__ -#define __MACA_MORPHO_CONTEXT__ - -#include "mcd.h" -#include <stdlib.h> - -#define DEFAULT_FPLM_FILENAME "fplm" - - - -typedef struct { - int help; - int verbose; - int debug_mode; - char *program_name; - char *fplm_filename; - char *language; - char *maca_data_path; - char *fm_filename; - char *features_filename; - char *cfw_filename; -} context; - - - -context *context_new(void); -void context_free(context *ctx); - -context *context_read_options(int argc, char *argv[]); -void context_general_help_message(context *ctx); -void context_language_help_message(context *ctx); -void context_fplm_help_message(context *ctx); -void context_maca_data_path_help_message(context *ctx); -void context_features_filename_help_message(context *ctx); -void context_weights_matrix_filename_help_message(context *ctx); -void context_features_model_help_message(context *ctx); -#endif diff --git a/maca_morpho/src/feat_fct.c b/maca_morpho/src/feat_fct.c deleted file mode 100644 index 0c7ddcb..0000000 --- a/maca_morpho/src/feat_fct.c +++ /dev/null @@ -1,19 +0,0 @@ -#include<stdio.h> -#include<stdlib.h> -#include<string.h> -#include"feat_lib.h" - - - -int p1(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 1];} -int p2(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 2];} - -feat_lib *feat_lib_build(void) -{ - feat_lib *fl = feat_lib_new(); - - feat_lib_add(fl, 1, (char *)"p1", p1); - feat_lib_add(fl, 1, (char *)"p2", p2); - return fl; -} - diff --git a/maca_morpho/src/fplm2cff.c b/maca_morpho/src/fplm2cff.c index ec346bc..e032136 100644 --- a/maca_morpho/src/fplm2cff.c +++ b/maca_morpho/src/fplm2cff.c @@ -1,42 +1,14 @@ -#include <stdio.h> #include <stdlib.h> -#include <string.h> - -#include "context.h" -#include "feat_model.h" -#include "feat_vec.h" -#include "dico.h" -#include "util.h" -#include "vectorize.h" - +#include <stdio.h> +#include "fplm2cff.h" int main(int argc, char *argv[]) { - context *ctx = context_read_options(argc, argv); - if(ctx->help){ - context_general_help_message(ctx); - context_language_help_message(ctx); - context_fplm_help_message(ctx); - context_maca_data_path_help_message(ctx); - context_features_filename_help_message(ctx); - context_features_model_help_message(ctx); - exit(1); - } - feat_vec *fv = feat_vec_new(10); - dico *dico_features = dico_new("dico_features", 1000); - feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); - char form[100]; - while(strcmp(form, "end")){ - fscanf(stdin, "%s", form); - printf("form = %s\n", form); - form2fv(form, fv, fm, dico_features, ADD_MODE); -/* void feat_vec_print_string(feat_vec *fv, dico *dico_features); */ - feat_vec_print(stdout, fv); - } - /* dico_print_fh(stdout, dico_features); */ - if(ctx->features_filename) - dico_print(ctx->features_filename, dico_features); - - - + context *ctx = context_read_options(argc, argv); + if(ctx->help) + fplm2cff_help_message(ctx); + create_cff(ctx); + printf("cff.txt has been generated in the Files directory.\n"); + return 0; } + diff --git a/maca_morpho/src/predict.c b/maca_morpho/src/predict.c index c694f04..9fc8a1c 100644 --- a/maca_morpho/src/predict.c +++ b/maca_morpho/src/predict.c @@ -1,53 +1,14 @@ -#include <stdio.h> #include <stdlib.h> -#include <string.h> - -#include "context.h" -#include "feat_model.h" -#include "feat_vec.h" -#include "dico.h" -#include "util.h" -#include "vectorize.h" -#include "feature_table.h" - -void predict_help_message(context *ctx) -{ - context_general_help_message(ctx); - context_language_help_message(ctx); - context_fplm_help_message(ctx); - context_maca_data_path_help_message(ctx); - context_features_filename_help_message(ctx); - context_weights_matrix_filename_help_message(ctx); - context_features_model_help_message(ctx); - exit(1); -} +#include <stdio.h> +#include "predict.h" int main(int argc, char *argv[]) { - context *ctx = context_read_options(argc, argv); - if(ctx->help) predict_help_message(ctx); - feature_table *cfw = feature_table_load(ctx->cfw_filename, ctx->verbose); - feat_vec *fv = feat_vec_new(10); - dico *dico_features = dico_read(ctx->features_filename, 0.5); - feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); - char form[100]; - int class; - float max; - - while(strcmp(form, "end")){ - fscanf(stdin, "%s", form); - printf("form = %s\n", form); - form2fv(form, fv, fm, dico_features, LOOKUP_MODE); - class = feature_table_argmax(fv, cfw, &max); - feat_vec_print(stdout, fv); - printf("class = %d\n", class); - - } - - if(ctx->features_filename) - dico_print(ctx->features_filename, dico_features); - - - + context *ctx = context_read_options(argc, argv); + if(ctx->help) + predict_help_message(ctx); + create_predictions_file(ctx); + printf("prediction.txt has been generated in the Files directory.\n"); + return 0; } -- GitLab