diff --git a/maca_morpho/CMakeLists.txt b/maca_morpho/CMakeLists.txt index 0a9b0bc7dc977135bf2d5c94dc9f12333da81c84..a45cdb404d593c735b80db2dfff156edad8793b0 100644 --- a/maca_morpho/CMakeLists.txt +++ b/maca_morpho/CMakeLists.txt @@ -1,6 +1,7 @@ set(SOURCES src/feat_fct.c src/context.c + src/vectorize.c ) @@ -21,3 +22,9 @@ target_link_libraries(fplm2cff maca_common) target_link_libraries(fplm2cff maca_morpho) install (TARGETS fplm2cff DESTINATION bin) +add_executable(predict ./src/predict.c) +target_link_libraries(predict perceptron) +target_link_libraries(predict maca_common) +target_link_libraries(predict maca_morpho) +install (TARGETS predict DESTINATION bin) + diff --git a/maca_morpho/src/context.c b/maca_morpho/src/context.c index 56516df6c963e3d4e314d605c1f30b8870be8f8e..49d8a4cf58bd2a414e97c87c3db393561f48e560 100644 --- a/maca_morpho/src/context.c +++ b/maca_morpho/src/context.c @@ -13,6 +13,7 @@ void context_free(context *ctx) { if(ctx->program_name) free(ctx->program_name); if(ctx->fplm_filename) free(ctx->fplm_filename); + if(ctx->cfw_filename) free(ctx->cfw_filename); if(ctx->language) free(ctx->language); if(ctx->maca_data_path) free(ctx->maca_data_path); free(ctx); @@ -30,6 +31,7 @@ context *context_new(void) ctx->language = strdup("fr"); ctx->maca_data_path = NULL; ctx->features_filename = NULL; + ctx->cfw_filename = NULL; return ctx; } @@ -62,6 +64,13 @@ void context_features_filename_help_message(context *ctx){ fprintf(stderr, "\t-x --feat <file> : features dictionary file name\n"); } +void context_weights_matrix_filename_help_message(context *ctx){ + fprintf(stderr, "\t-w --weights <file> : weight matrix (cfw) filename\n"); +} + +void context_features_model_help_message(context *ctx){ + fprintf(stderr, "\t-F --feat_model <file> : feature model file name\n"); +} context *context_read_options(int argc, char *argv[]) { @@ -71,7 +80,7 @@ context *context_read_options(int argc, char *argv[]) ctx->program_name = strdup(argv[0]); - static struct option long_options[9] = + static struct option long_options[10] = { {"help", no_argument, 0, 'h'}, {"verbose", no_argument, 0, 'v'}, @@ -81,12 +90,13 @@ context *context_read_options(int argc, char *argv[]) {"fplm", required_argument, 0, 'f'}, {"maca_data_path", required_argument, 0, 'D'}, {"fm", required_argument, 0, 'F'}, - {"feat", required_argument, 0, 'x'} + {"feat", required_argument, 0, 'x'}, + {"weights", required_argument, 0, 'w'} }; optind = 0; opterr = 0; - while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:", long_options, &option_index)) != -1){ + while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:w:", long_options, &option_index)) != -1){ switch (c) { case 'd': @@ -113,6 +123,9 @@ context *context_read_options(int argc, char *argv[]) case 'x': ctx->features_filename = strdup(optarg); break; + case 'w': + ctx->cfw_filename = strdup(optarg); + break; } } diff --git a/maca_morpho/src/context.h b/maca_morpho/src/context.h index c0621bda99bdce9bb211be01be76771c6b817553..c1789a54631e3bcc3ba695573dc1cf784e177f32 100644 --- a/maca_morpho/src/context.h +++ b/maca_morpho/src/context.h @@ -18,17 +18,20 @@ typedef struct { char *maca_data_path; char *fm_filename; char *features_filename; + char *cfw_filename; } context; context *context_new(void); -void context_free(context *ctx); +void context_free(context *ctx); context *context_read_options(int argc, char *argv[]); -void context_general_help_message(context *ctx); -void context_language_help_message(context *ctx); -void context_fplm_help_message(context *ctx); -void context_maca_data_path_help_message(context *ctx); -void context_features_filename_help_message(context *ctx); +void context_general_help_message(context *ctx); +void context_language_help_message(context *ctx); +void context_fplm_help_message(context *ctx); +void context_maca_data_path_help_message(context *ctx); +void context_features_filename_help_message(context *ctx); +void context_weights_matrix_filename_help_message(context *ctx); +void context_features_model_help_message(context *ctx); #endif diff --git a/maca_morpho/src/fplm2cff.c b/maca_morpho/src/fplm2cff.c index 92c10f8ce5186919c646bdd4b209e3ac45592097..ec346bc01583e008d36d26034bf589a6b54bd9cc 100644 --- a/maca_morpho/src/fplm2cff.c +++ b/maca_morpho/src/fplm2cff.c @@ -7,43 +7,7 @@ #include "feat_vec.h" #include "dico.h" #include "util.h" - -#define LOOKUP_MODE 1 -#define TRAIN_MODE 2 - -int get_feat_value(feat_model *fm, char *form, dico *dico_features, int feat_nb, int mode) -{ - feat_desc *fd = fm->array[feat_nb]; - int i; - int feat_val; - char str[10]; - - /* the name of the feature is built in fm->string and its value in the dictionnary (dico_features) is returned */ - fm->string[0] = '\0'; - for(i=0; i < fd->nbelem; i++){ - strcat(fm->string, fd->array[i]->name); - feat_val = fd->array[i]->fct(form); - sprintf(str, "%d", feat_val); - strcat(fm->string, str); - - /* catenate_int(fm->string, feat_val); */ - } - if(mode == LOOKUP_MODE){ - if(fm->string) - return dico_string2int(dico_features, fm->string); - } - return dico_add(dico_features, fm->string); -} - - -feat_vec *form2fv(char *form, feat_vec *fv, feat_model *fm, dico *dico_features) -{ - int i; - feat_vec_empty(fv); - for(i=0; i < fm->nbelem; i++) - feat_vec_add(fv, get_feat_value(fm, form, dico_features, i, TRAIN_MODE)); - return fv; -} +#include "vectorize.h" int main(int argc, char *argv[]) @@ -54,7 +18,8 @@ int main(int argc, char *argv[]) context_language_help_message(ctx); context_fplm_help_message(ctx); context_maca_data_path_help_message(ctx); - context_features_filename_help_message(ctx); + context_features_filename_help_message(ctx); + context_features_model_help_message(ctx); exit(1); } feat_vec *fv = feat_vec_new(10); @@ -64,7 +29,7 @@ int main(int argc, char *argv[]) while(strcmp(form, "end")){ fscanf(stdin, "%s", form); printf("form = %s\n", form); - form2fv(form, fv, fm, dico_features); + form2fv(form, fv, fm, dico_features, ADD_MODE); /* void feat_vec_print_string(feat_vec *fv, dico *dico_features); */ feat_vec_print(stdout, fv); } diff --git a/maca_morpho/src/predict.c b/maca_morpho/src/predict.c new file mode 100644 index 0000000000000000000000000000000000000000..c694f048b3b16000e9b25b547e9b507bea9214ec --- /dev/null +++ b/maca_morpho/src/predict.c @@ -0,0 +1,53 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "context.h" +#include "feat_model.h" +#include "feat_vec.h" +#include "dico.h" +#include "util.h" +#include "vectorize.h" +#include "feature_table.h" + +void predict_help_message(context *ctx) +{ + context_general_help_message(ctx); + context_language_help_message(ctx); + context_fplm_help_message(ctx); + context_maca_data_path_help_message(ctx); + context_features_filename_help_message(ctx); + context_weights_matrix_filename_help_message(ctx); + context_features_model_help_message(ctx); + exit(1); +} + + +int main(int argc, char *argv[]) +{ + context *ctx = context_read_options(argc, argv); + if(ctx->help) predict_help_message(ctx); + feature_table *cfw = feature_table_load(ctx->cfw_filename, ctx->verbose); + feat_vec *fv = feat_vec_new(10); + dico *dico_features = dico_read(ctx->features_filename, 0.5); + feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); + char form[100]; + int class; + float max; + + while(strcmp(form, "end")){ + fscanf(stdin, "%s", form); + printf("form = %s\n", form); + form2fv(form, fv, fm, dico_features, LOOKUP_MODE); + class = feature_table_argmax(fv, cfw, &max); + feat_vec_print(stdout, fv); + printf("class = %d\n", class); + + } + + if(ctx->features_filename) + dico_print(ctx->features_filename, dico_features); + + + +} diff --git a/maca_morpho/src/vectorize.c b/maca_morpho/src/vectorize.c new file mode 100644 index 0000000000000000000000000000000000000000..f7f43136a5f0c6e1a7d3e53f3ff1ea406d589070 --- /dev/null +++ b/maca_morpho/src/vectorize.c @@ -0,0 +1,38 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include"vectorize.h" + +int get_feat_value(feat_model *fm, char *form, dico *dico_features, int feat_nb, int mode) +{ + feat_desc *fd = fm->array[feat_nb]; + int i; + int feat_val; + char str[10]; + + /* the name of the feature is built in fm->string and its value in the dictionnary (dico_features) is returned */ + fm->string[0] = '\0'; + for(i=0; i < fd->nbelem; i++){ + strcat(fm->string, fd->array[i]->name); + feat_val = fd->array[i]->fct(form); + sprintf(str, "%d", feat_val); + strcat(fm->string, str); + + /* catenate_int(fm->string, feat_val); */ + } + if(mode == LOOKUP_MODE){ + if(fm->string) + return dico_string2int(dico_features, fm->string); + } + return dico_add(dico_features, fm->string); +} + + +feat_vec *form2fv(char *form, feat_vec *fv, feat_model *fm, dico *dico_features, int mode) +{ + int i; + feat_vec_empty(fv); + for(i=0; i < fm->nbelem; i++) + feat_vec_add(fv, get_feat_value(fm, form, dico_features, i, mode)); + return fv; +} diff --git a/maca_morpho/src/vectorize.h b/maca_morpho/src/vectorize.h new file mode 100644 index 0000000000000000000000000000000000000000..c859605c68cc9cbcfdc0ad169871047acfd0bec0 --- /dev/null +++ b/maca_morpho/src/vectorize.h @@ -0,0 +1,14 @@ +#ifndef __VECTORIZE__ +#define __VECTORIZE__ + +#include"dico.h" +#include"feat_model.h" +#include"feat_vec.h" + +#define LOOKUP_MODE 1 +#define ADD_MODE 2 + + +feat_vec *form2fv(char *form, feat_vec *fv, feat_model *fm, dico *dico_features, int mode); + +#endif