Skip to content
Snippets Groups Projects
Commit 121525d1 authored by Marjorie Armando's avatar Marjorie Armando
Browse files

generate train and test files, generate cff, predict test's forms' classes

parent 395684fc
No related branches found
No related tags found
No related merge requests found
set(SOURCES
src/feat_fct.c
src/context.c
src/maca_morpho_feat_fct.c
src/maca_morpho_context.c
src/vectorize.c
src/fplm2cff_fct.c
src/predict_fct.c
src/fplm_fct.c
)
......@@ -28,3 +31,8 @@ target_link_libraries(predict maca_common)
target_link_libraries(predict maca_morpho)
install (TARGETS predict DESTINATION bin)
add_executable(fplm2train_test ./src/fplm2train_test.c)
target_link_libraries(fplm2train_test perceptron)
target_link_libraries(fplm2train_test maca_common)
target_link_libraries(fplm2train_test maca_morpho)
install (TARGETS fplm2train_test DESTINATION bin)
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include "context.h"
#include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
if(ctx->fplm_filename) free(ctx->fplm_filename);
if(ctx->cfw_filename) free(ctx->cfw_filename);
if(ctx->language) free(ctx->language);
if(ctx->maca_data_path) free(ctx->maca_data_path);
free(ctx);
}
context *context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->program_name = NULL;
ctx->fplm_filename = NULL;
ctx->language = strdup("fr");
ctx->maca_data_path = NULL;
ctx->features_filename = NULL;
ctx->cfw_filename = NULL;
return ctx;
}
void context_general_help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n");
}
void context_fplm_help_message(context *ctx){
fprintf(stderr, "\t-f --fplm <file> : fplm (form pos lemma morpho) file\n");
}
void context_language_help_message(context *ctx){
fprintf(stderr, "\t-L --language : identifier of the language to use\n");
}
void context_maca_data_path_help_message(context *ctx){
fprintf(stderr, "\t-M --maca_data_path : path to maca_data directory\n");
}
void context_fm_help_message(context *ctx){
fprintf(stderr, "\t-F --fm <file> : feature model file name\n");
}
void context_features_filename_help_message(context *ctx){
fprintf(stderr, "\t-x --feat <file> : features dictionary file name\n");
}
void context_weights_matrix_filename_help_message(context *ctx){
fprintf(stderr, "\t-w --weights <file> : weight matrix (cfw) filename\n");
}
void context_features_model_help_message(context *ctx){
fprintf(stderr, "\t-F --feat_model <file> : feature model file name\n");
}
context *context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[10] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"mcd", required_argument, 0, 'C'},
{"language", required_argument, 0, 'L'},
{"fplm", required_argument, 0, 'f'},
{"maca_data_path", required_argument, 0, 'D'},
{"fm", required_argument, 0, 'F'},
{"feat", required_argument, 0, 'x'},
{"weights", required_argument, 0, 'w'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:w:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'f':
ctx->fplm_filename = strdup(optarg);
break;
case 'L':
ctx->language = strdup(optarg);
break;
case 'D':
ctx->maca_data_path = strdup(optarg);
break;
case 'F':
ctx->fm_filename = strdup(optarg);
break;
case 'x':
ctx->features_filename = strdup(optarg);
break;
case 'w':
ctx->cfw_filename = strdup(optarg);
break;
}
}
context_set_linguistic_resources_filenames(ctx);
return ctx;
}
void context_set_linguistic_resources_filenames(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
absolute_path[0] = '\0';
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else {
char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
if(!ctx->fplm_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_FPLM_FILENAME);
ctx->fplm_filename = strdup(absolute_filename);
}
}
#ifndef __MACA_MORPHO_CONTEXT__
#define __MACA_MORPHO_CONTEXT__
#include "mcd.h"
#include <stdlib.h>
#define DEFAULT_FPLM_FILENAME "fplm"
typedef struct {
int help;
int verbose;
int debug_mode;
char *program_name;
char *fplm_filename;
char *language;
char *maca_data_path;
char *fm_filename;
char *features_filename;
char *cfw_filename;
} context;
context *context_new(void);
void context_free(context *ctx);
context *context_read_options(int argc, char *argv[]);
void context_general_help_message(context *ctx);
void context_language_help_message(context *ctx);
void context_fplm_help_message(context *ctx);
void context_maca_data_path_help_message(context *ctx);
void context_features_filename_help_message(context *ctx);
void context_weights_matrix_filename_help_message(context *ctx);
void context_features_model_help_message(context *ctx);
#endif
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"feat_lib.h"
int p1(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 1];}
int p2(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 2];}
feat_lib *feat_lib_build(void)
{
feat_lib *fl = feat_lib_new();
feat_lib_add(fl, 1, (char *)"p1", p1);
feat_lib_add(fl, 1, (char *)"p2", p2);
return fl;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "context.h"
#include "feat_model.h"
#include "feat_vec.h"
#include "dico.h"
#include "util.h"
#include "vectorize.h"
#include <stdio.h>
#include "fplm2cff.h"
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
if(ctx->help){
context_general_help_message(ctx);
context_language_help_message(ctx);
context_fplm_help_message(ctx);
context_maca_data_path_help_message(ctx);
context_features_filename_help_message(ctx);
context_features_model_help_message(ctx);
exit(1);
if(ctx->help)
fplm2cff_help_message(ctx);
create_cff(ctx);
printf("cff.txt has been generated in the Files directory.\n");
return 0;
}
feat_vec *fv = feat_vec_new(10);
dico *dico_features = dico_new("dico_features", 1000);
feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
char form[100];
while(strcmp(form, "end")){
fscanf(stdin, "%s", form);
printf("form = %s\n", form);
form2fv(form, fv, fm, dico_features, ADD_MODE);
/* void feat_vec_print_string(feat_vec *fv, dico *dico_features); */
feat_vec_print(stdout, fv);
}
/* dico_print_fh(stdout, dico_features); */
if(ctx->features_filename)
dico_print(ctx->features_filename, dico_features);
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "context.h"
#include "feat_model.h"
#include "feat_vec.h"
#include "dico.h"
#include "util.h"
#include "vectorize.h"
#include "feature_table.h"
void predict_help_message(context *ctx)
{
context_general_help_message(ctx);
context_language_help_message(ctx);
context_fplm_help_message(ctx);
context_maca_data_path_help_message(ctx);
context_features_filename_help_message(ctx);
context_weights_matrix_filename_help_message(ctx);
context_features_model_help_message(ctx);
exit(1);
}
#include <stdio.h>
#include "predict.h"
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
if(ctx->help) predict_help_message(ctx);
feature_table *cfw = feature_table_load(ctx->cfw_filename, ctx->verbose);
feat_vec *fv = feat_vec_new(10);
dico *dico_features = dico_read(ctx->features_filename, 0.5);
feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
char form[100];
int class;
float max;
while(strcmp(form, "end")){
fscanf(stdin, "%s", form);
printf("form = %s\n", form);
form2fv(form, fv, fm, dico_features, LOOKUP_MODE);
class = feature_table_argmax(fv, cfw, &max);
feat_vec_print(stdout, fv);
printf("class = %d\n", class);
}
if(ctx->features_filename)
dico_print(ctx->features_filename, dico_features);
if(ctx->help)
predict_help_message(ctx);
create_predictions_file(ctx);
printf("prediction.txt has been generated in the Files directory.\n");
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment