diff --git a/maca_morpho/src/fplm.h b/maca_morpho/src/fplm.h index fe031f15edea7f5aa9257e9e5c32d9ab5ad73ceb..206d1a499bd0f646292670d3c03a2ba833de0d20 100644 --- a/maca_morpho/src/fplm.h +++ b/maca_morpho/src/fplm.h @@ -11,6 +11,6 @@ int choose_class(char* class); int extract_class_position(CLASS class); void extract_morpho_feature(CLASS class, char* morpho_feature, char* morpho); - int associate_number_to_classes(char* classes_array, char class); + int associate_number_to_classes(FILE* classes_code, int class); #endif diff --git a/maca_morpho/src/fplm2cff.c b/maca_morpho/src/fplm2cff.c index e0321366447c6fcc32e5366ba70ec9b405d15c44..6547410b7c17f9b347ee7bafd1c66baac55a1551 100644 --- a/maca_morpho/src/fplm2cff.c +++ b/maca_morpho/src/fplm2cff.c @@ -9,6 +9,7 @@ int main(int argc, char *argv[]) fplm2cff_help_message(ctx); create_cff(ctx); printf("cff.txt has been generated in the Files directory.\n"); + printf("code_class has been generated in the Files directory.\n"); return 0; } diff --git a/maca_morpho/src/fplm2cff.h b/maca_morpho/src/fplm2cff.h index 33b8d98a0718fda0ab6b333dd340ce4643b7b004..ebe8910d911cbd8394e133f9144e06480f5c041c 100644 --- a/maca_morpho/src/fplm2cff.h +++ b/maca_morpho/src/fplm2cff.h @@ -10,7 +10,7 @@ #include "fplm.h" void create_cff(context* ctx); - void write_cff(FILE *cff, CLASS class, char* classes_array, char* form, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features); + void write_cff(FILE *cff, CLASS class, FILE* code_class, char* form, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features); void fplm2cff_help_message(context *ctx); #endif diff --git a/maca_morpho/src/fplm2cff_fct.c b/maca_morpho/src/fplm2cff_fct.c index b2b6e2a1b43837305d0ea06f82186a4dcc1335f5..1f0ceaa96b506f99a67886f38f5a2f67dd361c07 100644 --- a/maca_morpho/src/fplm2cff_fct.c +++ b/maca_morpho/src/fplm2cff_fct.c @@ -19,6 +19,7 @@ void create_cff(context* ctx) { FILE* fplm = NULL; FILE* cff = NULL; + FILE* code_class = NULL; feat_vec *fv = NULL; dico *dico_features = NULL; feat_model *fm = NULL; @@ -27,11 +28,7 @@ void create_cff(context* ctx) char pos[50]; char lemma[100]; char morpho[50]; - char classes_array[100]; CLASS class = choose_class(ctx->class_name); - - classes_array[0]='0'; - classes_array[1]='\0'; fplm = fopen(ctx->fplm_filename,"r"); if(fplm == NULL) { @@ -47,6 +44,13 @@ void create_cff(context* ctx) fprintf(stderr,"Problem with the cff file.\n"); exit(EXIT_FAILURE); } + code_class = fopen("../../Files/code_class","w+"); + if(code_class==NULL) + { + fprintf(stderr,"Problem with the classes_code file.\n"); + exit(EXIT_FAILURE); + } + fprintf(code_class,"%s\n",ctx->class_name); while((fields_nb = read_line_fplm(fplm, form, pos, lemma, morpho)) != -1) { if(fields_nb!=4) @@ -58,22 +62,23 @@ void create_cff(context* ctx) } continue; } - write_cff(cff, class, classes_array, form, morpho, fv, fm, dico_features); + write_cff(cff, class, code_class, form, morpho, fv, fm, dico_features); } if(ctx->features_filename) dico_print(ctx->features_filename, dico_features); - fclose(fplm); fclose(cff); + fclose(code_class); } -void write_cff(FILE *cff, CLASS class, char* classes_array, char* form, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features) +void write_cff(FILE *cff, CLASS class, FILE* code_class, char* form, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features) { int i=0; + int tmp_code; char morpho_feature[10]; extract_morpho_feature(class, morpho_feature, morpho); - /*write the class' number in cff (ASCII code of the morpho_feature)*/ + /*write the class' code in cff*/ if(morpho_feature[0] == '\0') { fprintf(cff,"0"); @@ -81,36 +86,29 @@ void write_cff(FILE *cff, CLASS class, char* classes_array, char* form, char* mo feat_vec_print(cff, fv); return; } - if(class == GENDER || class == NUMBER) - { - fprintf(cff, "%d", associate_number_to_classes(classes_array, morpho_feature[i])); - form2fv(form, fv, fm, dico_features, ADD_MODE); - feat_vec_print(cff, fv); - } - else if(class == PERSON) - { - for(i=0; i<(int)strlen(morpho_feature); i++) - { - fprintf(cff,"%c", morpho_feature[i]); - /*because a word can have several persons in morpho so we have to write the - first class and the features' values, then the second class and the features' values, etc */ - form2fv(form, fv, fm, dico_features, ADD_MODE); - feat_vec_print(cff, fv); - } - } - else + //for example if the tense is 'PS' the class' number will be the ASCII code of 'P' concatenate with the ASCII code of 'S' + if(class != PERSON) { - //for example if tense is 'PS' the class' number will be the ASCII code of 'P' concatenate with the ASCII code of 'S' FILE* tmp = fopen("tmp.txt","w+"); for(i=0; i<(int)strlen(morpho_feature); i++) fprintf(tmp,"%d", morpho_feature[i]); rewind(tmp); - fscanf(tmp, "%d", &morpho_feature[0]); - fprintf(cff,"%d", associate_number_to_classes(classes_array, morpho_feature[0])); + fscanf(tmp, "%d", &tmp_code); + fprintf(cff,"%d", associate_number_to_classes(code_class,tmp_code)); form2fv(form, fv, fm, dico_features, ADD_MODE); feat_vec_print(cff, fv); fclose(tmp); remove("tmp.txt"); } + else + { + for(i=0; i<(int)strlen(morpho_feature);i++) + { + tmp_code = morpho_feature[i]; + fprintf(cff,"%d", associate_number_to_classes(code_class,tmp_code)); + form2fv(form, fv, fm, dico_features, ADD_MODE); + feat_vec_print(cff, fv); + } + } } diff --git a/maca_morpho/src/fplm_fct.c b/maca_morpho/src/fplm_fct.c index 9b5b050907fdec4453bdad4918a688eb132c91dc..4a40a967d12744c2d8d7130d4eaee5bc4630375b 100644 --- a/maca_morpho/src/fplm_fct.c +++ b/maca_morpho/src/fplm_fct.c @@ -3,8 +3,8 @@ #include <string.h> #include "fplm.h" -/* Read a line from the fplm file and extract the form/pos/lemma/morpho. - * Return -1 if there's no more line to read, else the number of string read*/ +/** Read a line from the fplm file and extract the form/pos/lemma/morpho. + * Return -1 if there's no more line to read, else the number of string read**/ int read_line_fplm(FILE* fplm, char* form, char* pos, char* lemma, char* morpho) { int fields_nb; @@ -15,8 +15,8 @@ int read_line_fplm(FILE* fplm, char* form, char* pos, char* lemma, char* morpho) return fields_nb; } -/* Return the class' position in morpho - * (the class could be the tense, the person, the gender or the number of a word)*/ +/** Return the class' position in morpho + * (the class could be the tense, the person, the gender or the number of a word)**/ int extract_class_position(CLASS class) { switch(class) @@ -36,7 +36,7 @@ int extract_class_position(CLASS class) return -1; } -/* Return the class choosen by the user if their class exists*/ +/** Return the class choosen by the user if their class exists**/ int choose_class(char* class) { if(!strcmp(class,"tense")) @@ -55,6 +55,7 @@ int choose_class(char* class) return -1; } +/** Extract the class we want from morpho and write it in morpho_feature**/ void extract_morpho_feature(CLASS class, char* morpho_feature, char* morpho) { int cpt_diese = 0; @@ -88,14 +89,18 @@ void extract_morpho_feature(CLASS class, char* morpho_feature, char* morpho) } } -int associate_number_to_classes(char* classes_array, char class) +/**Write the code_class file (use in predict to know the real class) + * Return the class' code**/ +int associate_number_to_classes(FILE* classes_code, int class) { - int i; - int size = (int)strlen(classes_array); - for(i=1; i<size; i++) - if(classes_array[i] == class) - return i; - classes_array[size] = class; - classes_array[size+1] = '\0'; - return size; + int code=0; + int cl; + char tmp[10]; + rewind(classes_code); + fscanf(classes_code,"%s",tmp); + while(fscanf(classes_code,"%d %d\n",&code,&cl) == 2) + if(cl == class) + return code; + fprintf(classes_code, "%d %d\n", code+1, class); + return code+1; } diff --git a/maca_morpho/src/maca_morpho_context.c b/maca_morpho/src/maca_morpho_context.c index 6c32b30a29518fb0326d2bc09fa5dde813631107..c141f8e3d6f28667a79e92ac936ea3014621b246 100644 --- a/maca_morpho/src/maca_morpho_context.c +++ b/maca_morpho/src/maca_morpho_context.c @@ -34,6 +34,7 @@ context *context_new(void) ctx->features_filename = NULL; ctx->cfw_filename = NULL; ctx->class_name = NULL; + ctx->code_class_name = NULL; return ctx; } @@ -82,6 +83,10 @@ void context_fplm_test_percent_help_message(context *ctx){ fprintf(stderr, "\t-p --percent <int> : percentage of the fplm file to make a test file\n"); } +void context_code_class_help_message(context* ctx){ + fprintf(stderr, "\t-y --code_class <file> : code_class file name\n"); +} + context *context_read_options(int argc, char *argv[]) { int c; @@ -90,7 +95,7 @@ context *context_read_options(int argc, char *argv[]) ctx->program_name = strdup(argv[0]); - static struct option long_options[12] = + static struct option long_options[13] = { {"help", no_argument, 0, 'h'}, {"verbose", no_argument, 0, 'v'}, @@ -103,12 +108,13 @@ context *context_read_options(int argc, char *argv[]) {"feat", required_argument, 0, 'x'}, {"weights", required_argument, 0, 'w'}, {"class", required_argument, 0, 'c'}, - {"percent", required_argument, 0, 'p'} + {"percent", required_argument, 0, 'p'}, + {"code_class", required_argument, 0, 'y'} }; optind = 0; opterr = 0; - while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:w:c:p:", long_options, &option_index)) != -1){ + while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:y:x:w:c:p:", long_options, &option_index)) != -1){ switch (c) { case 'd': @@ -138,12 +144,15 @@ context *context_read_options(int argc, char *argv[]) case 'w': ctx->cfw_filename = strdup(optarg); break; - case 'c': - ctx->class_name = strdup(optarg); - break; - case 'p': - ctx->fplm_test_percent = atoi(optarg); - break; + case 'c': + ctx->class_name = strdup(optarg); + break; + case 'p': + ctx->fplm_test_percent = atoi(optarg); + break; + case 'y': + ctx->code_class_name = strdup(optarg); + break; } } diff --git a/maca_morpho/src/maca_morpho_context.h b/maca_morpho/src/maca_morpho_context.h index 12443f327b856171e17995aa00c2676761f83613..cf66ad2f4703cc1414ae89afac92b098b5779afe 100644 --- a/maca_morpho/src/maca_morpho_context.h +++ b/maca_morpho/src/maca_morpho_context.h @@ -21,6 +21,7 @@ typedef struct { char *features_filename; char *cfw_filename; char *class_name; + char *code_class_name; } context; @@ -38,4 +39,5 @@ void context_weights_matrix_filename_help_message(context *ctx); void context_features_model_help_message(context *ctx); void context_class_help_message(context *ctx); void context_fplm_test_percent_help_message(context *ctx); +void context_code_class_help_message(context* ctx); #endif diff --git a/maca_morpho/src/predict.h b/maca_morpho/src/predict.h index 9967e5997970c372d116e3e4b46837d824150988..28dade5b89157489f9eff88cb7a626fef4cb5779 100644 --- a/maca_morpho/src/predict.h +++ b/maca_morpho/src/predict.h @@ -11,7 +11,9 @@ #include "fplm.h" void create_predictions_file(context* ctx); - void make_prediction(FILE* predictions, char* form, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm); + void make_prediction(FILE* predictions, FILE* code_class, CLASS cl, int* errors, char* form, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm); + void errors_nb(FILE* code_class, CLASS class, int class_predicted, int* errors, char* morpho); + int extract_real_class(FILE* code_class, CLASS class, char* morpho_feature, int* real_class); void predict_help_message(context *ctx); #endif diff --git a/maca_morpho/src/predict_fct.c b/maca_morpho/src/predict_fct.c index 0afe5ad9125fab6b700abfe6b4235e49dcad4f38..13f0df7146cd0635d262a95c87520b0050b7d78e 100644 --- a/maca_morpho/src/predict_fct.c +++ b/maca_morpho/src/predict_fct.c @@ -1,5 +1,6 @@ #include <stdlib.h> #include <stdio.h> +#include <string.h> #include "predict.h" void predict_help_message(context *ctx) @@ -11,6 +12,7 @@ void predict_help_message(context *ctx) context_features_filename_help_message(ctx); context_weights_matrix_filename_help_message(ctx); context_features_model_help_message(ctx); + context_code_class_help_message(ctx); exit(1); } @@ -18,22 +20,38 @@ void create_predictions_file(context* ctx) { FILE* fplm_test = NULL; FILE* predictions = NULL; + FILE* code_class = NULL; feature_table *cfw = NULL; feat_vec *fv = NULL; dico *dico_features = NULL; feat_model *fm = NULL; + int line_nb=0; int fields_nb; + int errors = 0; char form[100]; char pos[50]; char lemma[100]; char morpho[50]; - + char class_name[10]; + CLASS class; + code_class = fopen(ctx->code_class_name,"r"); + if(code_class==NULL) + { + fprintf(stderr, "Could not the code_class file.\n"); + exit(EXIT_FAILURE); + } + if(fscanf(code_class,"%s",class_name)!=1) + { + fprintf(stderr, "Your code_class file is not conform.\n"); + exit(EXIT_FAILURE); + } fplm_test = fopen(ctx->fplm_filename,"r"); if(fplm_test == NULL) { fprintf(stderr,"Could not open input file.\nYou can generate a fplm_test file with fplm2train_test\nThe fplm_test file will be in the Files directory.\n"); exit(EXIT_FAILURE); } + class = choose_class(class_name); cfw = feature_table_load(ctx->cfw_filename, ctx->verbose); fv = feat_vec_new(10); dico_features = dico_read(ctx->features_filename, 0.5); @@ -44,7 +62,6 @@ void create_predictions_file(context* ctx) fprintf(stderr,"Problem with the prediction file.\n"); exit(EXIT_FAILURE); } - while((fields_nb = read_line_fplm(fplm_test, form, pos, lemma, morpho)) != -1) { if(fields_nb!=4) @@ -56,30 +73,76 @@ void create_predictions_file(context* ctx) } continue; } - make_prediction(predictions, form, morpho, cfw, fv, dico_features, fm); + line_nb++; + make_prediction(predictions, code_class, class, &errors, form, morpho, cfw, fv, dico_features, fm); } - + printf("Error rate : %lf %%\n", (float)errors*100/line_nb); if(ctx->features_filename) dico_print(ctx->features_filename, dico_features); - fclose(fplm_test); fclose(predictions); + fclose(code_class); } -void make_prediction(FILE* predictions, char* form, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm) +void make_prediction(FILE* predictions, FILE* code_class, CLASS cl, int* errors, char* form, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm) { int class; float max; - fprintf(predictions, "form = %s\n", form); form2fv(form, fv, fm, dico_features, LOOKUP_MODE); class = feature_table_argmax(fv, cfw, &max); feat_vec_print(predictions, fv); - fprintf(predictions, "class predicted = %d\n", class); + fprintf(predictions, "class predicted = %d ", class); + errors_nb(code_class, cl, class, errors, morpho); } +void errors_nb(FILE* code_class, CLASS class, int class_predicted, int* errors, char* morpho) +{ + int i; + int size = 0; + int real_class[10]; + char morpho_feature[10]; + extract_morpho_feature(class,morpho_feature,morpho); + size = extract_real_class(code_class, class, morpho_feature, real_class); + for(i=0; i<=size; i++) + if(class_predicted == real_class[i]) + return; + *errors = *errors+1; +} - +int extract_real_class(FILE* code_class, CLASS class, char* morpho_feature, int* real_class) +{ + int size = -1; + int tmp_code; + int i; + if(morpho_feature[0]=='\0') + { + size++; + real_class[size]=0; + } + else if(class != PERSON) + { + FILE* tmp = fopen("tmp.txt","w+"); + for(i=0; i<(int)strlen(morpho_feature); i++) + fprintf(tmp,"%d", morpho_feature[i]); + rewind(tmp); + fscanf(tmp, "%d", &tmp_code); + size++; + real_class[size] = associate_number_to_classes(code_class,tmp_code); + fclose(tmp); + remove("tmp.txt"); + } + else + { + for(i=0; i<(int)strlen(morpho_feature);i++) + { + size++; + tmp_code = morpho_feature[i]; + real_class[size] = associate_number_to_classes(code_class,tmp_code); + } + } + return size; +} diff --git a/perceptron/lib/src/cf_file.c b/perceptron/lib/src/cf_file.c index 9afcf344fe2a8934f5dd0b71f52d968552a1c0ac..0fecf2ae3bd2a80d70f327229fc2408584062034 100644 --- a/perceptron/lib/src/cf_file.c +++ b/perceptron/lib/src/cf_file.c @@ -72,6 +72,7 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int *max_feat = *max_feat + 1; *max_class = *max_class + 1; + //printf("%d\n",*max_class +1); fclose(f); }