diff --git a/Files/fm2.txt b/Files/fm2.txt index 50530cfc5f8a85c309c740b36f31397d8aa5098d..a17f75409717b3a79d96c653b3acfd207171a171 100644 --- a/Files/fm2.txt +++ b/Files/fm2.txt @@ -1,3 +1,3 @@ -feat_person feat_pos s1 -feat_person feat_pos s2 -feat_person feat_pos s1 s2 +feat_pos s1 +feat_pos s2 +feat_pos s1 s2 diff --git a/Files/fm3.txt b/Files/fm3.txt index c69aac69839c42ffb5f98d0db6d5e7635c067ce7..78aa77381b018d72c19ac7dea05657da118d41ec 100644 --- a/Files/fm3.txt +++ b/Files/fm3.txt @@ -1,5 +1,5 @@ -feat_person feat_pos s1 -feat_person feat_pos s2 -feat_person feat_pos s3 -feat_person feat_pos s1 s2 -feat_person feat_pos s1 s2 s3 +feat_pos s1 +feat_pos s2 +feat_pos s3 +feat_pos s1 s2 +feat_pos s1 s2 s3 diff --git a/Files/fm4.txt b/Files/fm4.txt index 9b8dc89bbbf69f6de88dbdcf23c9e193672945f1..281ac4d6ea5fa0b1303466f33ad20fae9b31a549 100644 --- a/Files/fm4.txt +++ b/Files/fm4.txt @@ -1,7 +1,7 @@ -s1 -s2 -s3 -s4 -s1 s2 -s1 s2 s3 -s1 s2 s3 s4 +feat_pos s1 +feat_pos s2 +feat_pos s3 +feat_pos s4 +feat_pos s1 s2 +feat_pos s1 s2 s3 +feat_pos s1 s2 s3 s4 diff --git a/Files/fm5.txt b/Files/fm5.txt index 0d37c50f175f9c0e1395932700282ec685a0383a..44aeee5b083a3e56b29b623b7168d51099fac063 100644 --- a/Files/fm5.txt +++ b/Files/fm5.txt @@ -1,9 +1,9 @@ -feat_person feat_pos s1 -feat_person feat_pos s2 -feat_person feat_pos s3 -feat_person feat_pos s4 -feat_person feat_pos s5 -feat_person feat_pos s1 s2 -feat_person feat_pos s1 s2 s3 -feat_person feat_pos s1 s2 s3 s4 -feat_person feat_pos s1 s2 s3 s4 s5 +feat_pos s1 +feat_pos s2 +feat_pos s3 +feat_pos s4 +feat_pos s5 +feat_pos s1 s2 +feat_pos s1 s2 s3 +feat_pos s1 s2 s3 s4 +feat_pos s1 s2 s3 s4 s5 diff --git a/Files/fm6.txt b/Files/fm6.txt index 980c9adbec9a9e529457de7d2c1bdde8e08496c5..fce00f9152b7e5391d19960888392a024a66a312 100644 --- a/Files/fm6.txt +++ b/Files/fm6.txt @@ -1,11 +1,11 @@ -s1 -s2 -s3 -s4 -s5 -s6 -s1 s2 -s1 s2 s3 -s1 s2 s3 s4 -s1 s2 s3 s4 s5 -s1 s2 s3 s4 s5 s6 +feat_pos s1 +feat_pos s2 +feat_pos s3 +feat_pos s4 +feat_pos s5 +feat_pos s6 +feat_pos s1 s2 +feat_pos s1 s2 s3 +feat_pos s1 s2 s3 s4 +feat_pos s1 s2 s3 s4 s5 +feat_pos s1 s2 s3 s4 s5 s6 diff --git a/Files/fm7.txt b/Files/fm7.txt index ede6e21dd19a68f01f2532f137aff10539861fa5..69e20c33f0ba97f11ef10f409354f9f2fb798a47 100644 --- a/Files/fm7.txt +++ b/Files/fm7.txt @@ -1,13 +1,13 @@ -s1 -s2 -s3 -s4 -s5 -s6 -s7 -s1 s2 -s1 s2 s3 -s1 s2 s3 s4 -s1 s2 s3 s4 s5 -s1 s2 s3 s4 s5 s6 -s1 s2 s3 s4 s5 s6 s7 +feat_pos s1 +feat_pos s2 +feat_pos s3 +feat_pos s4 +feat_pos s5 +feat_pos s6 +feat_pos s7 +feat_pos s1 s2 +feat_pos s1 s2 s3 +feat_pos s1 s2 s3 s4 +feat_pos s1 s2 s3 s4 s5 +feat_pos s1 s2 s3 s4 s5 s6 +feat_pos s1 s2 s3 s4 s5 s6 s7 diff --git a/maca_common/include/feat_desc.h b/maca_common/include/feat_desc.h index 1fa17ce7a19a96e00a9e6b1afaf3465cb469b75d..30e8d5c5647bbbc3d28ea76313c5a9e41c24d515 100644 --- a/maca_common/include/feat_desc.h +++ b/maca_common/include/feat_desc.h @@ -1,14 +1,9 @@ #ifndef __FEAT_DESC__ #define __FEAT_DESC__ -typedef struct -{ - char* form; - char* pos; - char* morpho; -}FP; - -typedef int (*feat_fct) (FP *c); +#include "../../maca_morpho/src/fplm.h" + +typedef int (*feat_fct) (FPLM *c); typedef struct { char *name; diff --git a/maca_morpho/src/fplm.h b/maca_morpho/src/fplm.h index e6d539c6943146a8751e419213210942d3fd667d..d93e3c6440acfd87653133452f3d1204814f997c 100644 --- a/maca_morpho/src/fplm.h +++ b/maca_morpho/src/fplm.h @@ -1,17 +1,29 @@ #ifndef __FPLM__ #define __FPLM__ + #include <stdio.h> + #include <stdlib.h> /*everything related to the fplm file*/ + typedef struct + { + char* form; + char* pos; + char* morpho; + char* lemma; + }FPLM; + typedef enum { TENSE, PERSON, GENDER, NUMBER }TARGET; - int read_line_fplm(FILE* fplm, char* form, char* pos, char* lemma, char* morpho); - int extract_class_position(TARGET target); - int choose_target(char* target); - int associate_number_to_classes_separate(FILE* code_class, char* target_class, int current_target_class); - int associate_number_to_classes(FILE* code_class, char* target_class, int all_classes); - void extract_classes_from_morpho(TARGET target, char* target_class, char* morpho); + FPLM* new_fplm (void); + void free_fplm (FPLM* fplm); + void extract_classes_from_morpho (TARGET target, char* target_class, char* morpho); + int choose_target (char* target); + int extract_class_position (TARGET target); + int read_line_fplm (FILE* fplm_file, FPLM* fplm); + int associate_number_to_classes_separated (FILE* code_class, char* target_class, int current_target_class); + int associate_number_to_classes (FILE* code_class, char* target_class, int all_classes); #endif diff --git a/maca_morpho/src/fplm2cff.c b/maca_morpho/src/fplm2cff.c index e9208f56412def80d2a91779678aef975c2e63b0..cef32840719b994cab2b70138a43c027a90cda21 100644 --- a/maca_morpho/src/fplm2cff.c +++ b/maca_morpho/src/fplm2cff.c @@ -17,8 +17,8 @@ int main(int argc, char *argv[]) create_cff_all_classes(ctx); else create_cff(ctx); - printf("cff.txt has been generated in the Files directory.\n"); - printf("The code class file has been generated in the Files directory.\n"); + printf("cff.txt has been generated.\n"); + printf("The code class file has been generated.\n"); context_free(ctx); return 0; } diff --git a/maca_morpho/src/fplm2cff.h b/maca_morpho/src/fplm2cff.h index cc1c4f10deb4307b5f09deb3fe1fe0dd1d552596..449554950de5c5f6da22d88bdb99605bc061b094 100644 --- a/maca_morpho/src/fplm2cff.h +++ b/maca_morpho/src/fplm2cff.h @@ -11,12 +11,14 @@ void fplm2cff_help_message(context *ctx); - void create_cff_all_classes(context* ctx); - void write_cff_all_classes_non_separate(FILE *cff, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features); - void write_cff_all_classes_separate(FILE *cff, FILE* code_class_big, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features); + /**Fonctions to create cff when morpho is a class**/ + void create_cff_all_classes (context* ctx); + void write_cff_all_classes_not_separated (FILE *cff, FILE* code_class, FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features); + void write_cff_all_classes_separated (FILE *cff, FILE* code_class, FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features); - void create_cff(context* ctx); - void write_cff_non_separate(FILE *cff, TARGET target, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features); - void write_cff_separate(FILE *cff, TARGET target, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features); + /**Fonctions to create cff with one target (tense, person, gender or number)**/ + void create_cff (context* ctx); + void write_cff_not_separated (FILE *cff, FILE* code_class, FPLM* fplm, TARGET target, feat_vec *fv, feat_model *fm, dico *dico_features); + void write_cff_separated (FILE *cff, FILE* code_class, FPLM* fplm, TARGET target, feat_vec *fv, feat_model *fm, dico *dico_features); #endif diff --git a/maca_morpho/src/fplm2cff_fct.c b/maca_morpho/src/fplm2cff_fct.c index 47f49526c54a24bfa9286722a613f5df317d8b74..33681fee4be9236372a4a057111bdeaf231f3573 100644 --- a/maca_morpho/src/fplm2cff_fct.c +++ b/maca_morpho/src/fplm2cff_fct.c @@ -16,167 +16,146 @@ void fplm2cff_help_message(context *ctx) exit(1); } - /*Predict all morpho features -tense, person, gender, number- in one*/ + /**Fonctions to create cff when morpho is a class**/ void create_cff_all_classes(context* ctx) { - FILE* fplm = NULL; - FILE* cff = NULL; - FILE* code_class = NULL; - feat_vec *fv = NULL; - dico *dico_features = NULL; - feat_model *fm = NULL; + FPLM* fplm = new_fplm(); + FILE* fplm_file = fopen(ctx->fplm_filename,"r"); + FILE* cff = fopen("cff.txt","w"); + FILE* code_class = fopen("code_class","w+"); + feat_vec *fv = feat_vec_new(10); + dico *dico_features = dico_new("dico_features", 1000); + feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); int fields_nb; - char form[100]; - char pos[50]; - char lemma[100]; - char morpho[50]; - FP* fp = malloc(sizeof(FP)); - fp->form = malloc(sizeof(char)*100); - fp->pos = malloc(sizeof(char)*50); - fp->morpho = malloc(sizeof(char)*10); - fplm = fopen(ctx->fplm_filename,"r"); - if(fplm == NULL) + + if(fplm_file == NULL) { fprintf(stderr,"Could not open the fplm file.\n"); exit(EXIT_FAILURE); } - fv = feat_vec_new(10); - dico_features = dico_new("dico_features", 1000); - fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); - cff = fopen("../../Files/cff.txt","w"); if(cff==NULL) { fprintf(stderr,"Problem with the cff file.\n"); exit(EXIT_FAILURE); } - code_class = fopen("../../Files/code_class","w+"); if(code_class==NULL) { - fprintf(stderr,"Problem with the classes_code file.\n"); + fprintf(stderr,"Problem with the code_class file.\n"); exit(EXIT_FAILURE); } - while((fields_nb = read_line_fplm(fplm, form, pos, lemma, morpho)) != -1) + while((fields_nb = read_line_fplm(fplm_file, fplm)) != -1) { if(fields_nb!=4) { - if(1) + if(ctx->debug_mode) { - fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho); + fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", fplm->form, fplm->pos, fplm->lemma, fplm->morpho); fprintf(stderr, "incorrect fplm entry, skipping it\n"); } continue; } - strcpy(fp->form,form); - strcpy(fp->pos,pos); - strcpy(fp->morpho,morpho); if(ctx->separate_classes) - write_cff_all_classes_separate(cff, code_class, fp, morpho, fv, fm, dico_features); + write_cff_all_classes_separated(cff, code_class, fplm, fv, fm, dico_features); else - write_cff_all_classes_non_separate(cff, code_class, fp, morpho, fv, fm, dico_features); + write_cff_all_classes_not_separated(cff, code_class, fplm, fv, fm, dico_features); } if(ctx->features_filename) dico_print(ctx->features_filename, dico_features); - free(fp->form); - free(fp->pos); - free(fp); - fclose(fplm); + + free_fplm(fplm); + fclose(fplm_file); fclose(cff); fclose(code_class); } -void write_cff_all_classes_non_separate(FILE *cff, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features) +void write_cff_all_classes_not_separated(FILE *cff, FILE* code_class, FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features) { - fprintf(cff,"%d", associate_number_to_classes(code_class,morpho,1)); - form2fv(fp, fv, fm, dico_features, ADD_MODE); + fprintf(cff,"%d", associate_number_to_classes(code_class,fplm->morpho,1)); + form2fv(fplm, fv, fm, dico_features, ADD_MODE); feat_vec_print(cff, fv); } -void write_cff_all_classes_separate(FILE *cff, FILE* code_class_big, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features) +void write_cff_all_classes_separated(FILE *cff, FILE* code_class, FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features) { char tense_class[10]; char person_class[10]; char gender_class[10]; char number_class[10]; char all_target[20]; - int i,j; - extract_classes_from_morpho(TENSE, tense_class, morpho); - extract_classes_from_morpho(PERSON, person_class, morpho); - extract_classes_from_morpho(GENDER, gender_class, morpho); - extract_classes_from_morpho(NUMBER, number_class, morpho); - if(tense_class[0] == '\0') - { - tense_class[0]='#'; - tense_class[1]='\0'; - } - if(person_class[0] == '\0') - { - person_class[0]='#'; - person_class[1]='\0'; - } - if(gender_class[0] == '\0') - { - gender_class[0]='#'; - gender_class[1]='\0'; - } - if(number_class[0] == '\0') - { - number_class[0]='#'; - number_class[1]='\0'; - } + int i,j,size=-1; + extract_classes_from_morpho(TENSE, tense_class, fplm->morpho); + extract_classes_from_morpho(PERSON, person_class, fplm->morpho); + extract_classes_from_morpho(GENDER, gender_class, fplm->morpho); + extract_classes_from_morpho(NUMBER, number_class, fplm->morpho); for(i=0; i<(int)strlen(tense_class); i++) { for(j=0; j<(int)strlen(person_class); j++) { - all_target[0] = tense_class[i]; - all_target[1] = person_class[j]; - all_target[2] = gender_class[0]; - all_target[3] = number_class[0]; - all_target[4] = '\0'; + size=-1; + if(tense_class[i]!='#') + { + size++; + all_target[size] = tense_class[i]; + } + size++; + all_target[size]='#'; + if(person_class[j]!='#') + { + size++; + all_target[size] = person_class[j]; + } + size++; + all_target[size]='#'; + if(gender_class[0]!='#') + { + size++; + all_target[size] = gender_class[0]; + } + size++; + all_target[size]='#'; + if(number_class[0]!='#') + { + size++; + all_target[size] = number_class[0]; + } + size++; + all_target[size]='#'; + size++; + all_target[size] = '\0'; - fprintf(cff,"%d", associate_number_to_classes(code_class_big, all_target, 1)); - form2fv(fp, fv, fm, dico_features, ADD_MODE); + fprintf(cff,"%d", associate_number_to_classes(code_class, all_target, 1)); + form2fv(fplm, fv, fm, dico_features, ADD_MODE); feat_vec_print(cff, fv); } } } - /*Predict one morpho feature */ + /**Fonctions to create cff with one target (tense, person, gender or number)**/ void create_cff(context* ctx) { - FILE* fplm = NULL; - FILE* cff = NULL; + FILE* fplm_file = fopen(ctx->fplm_filename,"r"); + FILE* cff = fopen("cff.txt","w"); FILE* code_class = NULL; - feat_vec *fv = NULL; - dico *dico_features = NULL; - feat_model *fm = NULL; + feat_vec *fv = feat_vec_new(10); + dico *dico_features = dico_new("dico_features", 1000); + feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); int fields_nb; - char form[100]; - char pos[50]; - char lemma[100]; - char morpho[50]; char code_class_name[30]; - FP* fp = malloc(sizeof(FP)); - fp->form = malloc(sizeof(char)*100); - fp->pos = malloc(sizeof(char)*50); - fp->morpho = malloc(sizeof(char)*10); + FPLM* fplm = new_fplm(); TARGET target = choose_target(ctx->target_name); - fplm = fopen(ctx->fplm_filename,"r"); - if(fplm == NULL) + + if(fplm_file == NULL) { fprintf(stderr,"Could not open input file.\nThe fplm file is in the Files directory.\n"); exit(EXIT_FAILURE); } - fv = feat_vec_new(10); - dico_features = dico_new("dico_features", 1000); - fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); - cff = fopen("../../Files/cff.txt","w"); if(cff==NULL) { fprintf(stderr,"Problem with the cff file.\n"); exit(EXIT_FAILURE); } - strcpy(code_class_name,"../../Files/code_class_"); + strcpy(code_class_name,"code_class_"); strcat(code_class_name,ctx->target_name); code_class = fopen(code_class_name,"w+"); if(code_class==NULL) @@ -185,68 +164,58 @@ void create_cff(context* ctx) exit(EXIT_FAILURE); } fprintf(code_class,"%s\n",ctx->target_name); - while((fields_nb = read_line_fplm(fplm, form, pos, lemma, morpho)) != -1) + fprintf(code_class,"0 #\n"); + while((fields_nb = read_line_fplm(fplm_file, fplm)) != -1) { if(fields_nb!=4) { - if(1) + if(ctx->debug_mode) { - fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho); + fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", fplm->form, fplm->pos, fplm->lemma, fplm->morpho); fprintf(stderr, "incorrect fplm entry, skipping it\n"); } continue; } - strcpy(fp->form,form); - strcpy(fp->pos,pos); - strcpy(fp->morpho,morpho); if(ctx->separate_classes) - write_cff_separate(cff, target, code_class, fp, morpho, fv, fm, dico_features); + write_cff_separated(cff, code_class, fplm, target, fv, fm, dico_features); else - write_cff_non_separate(cff, target, code_class, fp, morpho, fv, fm, dico_features); + write_cff_not_separated(cff, code_class, fplm, target, fv, fm, dico_features); } if(ctx->features_filename) dico_print(ctx->features_filename, dico_features); - free(fp->form); - free(fp->pos); - free(fp); - fclose(fplm); + + free_fplm(fplm); + fclose(fplm_file); fclose(cff); fclose(code_class); } -void write_cff_non_separate(FILE *cff, TARGET target, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features) +void write_cff_not_separated(FILE *cff, FILE* code_class, FPLM* fplm, TARGET target, feat_vec *fv, feat_model *fm, dico *dico_features) { char target_class[10]; - extract_classes_from_morpho(target, target_class, morpho); + extract_classes_from_morpho(target, target_class, fplm->morpho); /*write the class' code in cff*/ - if(target_class[0] == '\0') - fprintf(cff,"0"); - else - fprintf(cff,"%d", associate_number_to_classes(code_class,target_class,0)); - form2fv(fp, fv, fm, dico_features, ADD_MODE); + fprintf(cff,"%d", associate_number_to_classes(code_class,target_class,0)); + + /*write the feature vector in cff*/ + form2fv(fplm, fv, fm, dico_features, ADD_MODE); feat_vec_print(cff, fv); } -void write_cff_separate(FILE *cff, TARGET target, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features) +void write_cff_separated(FILE *cff, FILE* code_class, FPLM* fplm, TARGET target, feat_vec *fv, feat_model *fm, dico *dico_features) { - int i=0; char target_class[10]; - extract_classes_from_morpho(target, target_class, morpho); + extract_classes_from_morpho(target, target_class, fplm->morpho); /*write the class' code in cff*/ - if(target_class[0] == '\0') - { - fprintf(cff,"0"); - form2fv(fp, fv, fm, dico_features, ADD_MODE); - feat_vec_print(cff, fv); - return; - } - for(i=0; i<(int)strlen(target_class);i++) + + for(i=0; i<(int)strlen(target_class); i++) { - fprintf(cff,"%d", associate_number_to_classes_separate(code_class,target_class,i)); - form2fv(fp, fv, fm, dico_features, ADD_MODE); + fprintf(cff,"%d", associate_number_to_classes_separated(code_class,target_class,i)); + /*write the feature vector in cff*/ + form2fv(fplm, fv, fm, dico_features, ADD_MODE); feat_vec_print(cff, fv); } } diff --git a/maca_morpho/src/fplm2train_test.c b/maca_morpho/src/fplm2train_test.c index dfa8a42ee06bc571f51bf5004ed60395f97ccce8..ba6b4bcde255110a7f744d1a3bac7750ff6fb2c3 100644 --- a/maca_morpho/src/fplm2train_test.c +++ b/maca_morpho/src/fplm2train_test.c @@ -15,7 +15,7 @@ int main(int argc, char** argv) if(ctx->help) fplm2traintest_help_message(ctx); generate_train_test(ctx); - printf("fplm_train.txt and fplm_test.txt have been generated in the Files directory.\n"); + printf("fplm_train.txt and fplm_test.txt have been generated.\n"); return 0; } diff --git a/maca_morpho/src/fplm_fct.c b/maca_morpho/src/fplm_fct.c index f70f3914c81d34c8aa0b2cd5217dc5925e4a51a6..fb8662638a9b5ad92375f65c43531c6adea0890e 100644 --- a/maca_morpho/src/fplm_fct.c +++ b/maca_morpho/src/fplm_fct.c @@ -1,17 +1,34 @@ -#include <stdlib.h> -#include <stdio.h> #include <string.h> #include "fplm.h" +FPLM* new_fplm(void) +{ + FPLM* fplm = malloc(sizeof(FPLM)); + fplm->form = malloc(sizeof(char)*100); + fplm->pos = malloc(sizeof(char)*20); + fplm->lemma = malloc(sizeof(char)*100); + fplm->morpho = malloc(sizeof(char)*20); + return fplm; +} + +void free_fplm(FPLM* fplm) +{ + free(fplm->form); + free(fplm->pos); + free(fplm->lemma); + free(fplm->morpho); + free(fplm); +} + /** Read a line from the fplm file and extract the form/pos/lemma/morpho. * Return -1 if there's no more line to read, else the number of string read**/ -int read_line_fplm(FILE* fplm, char* form, char* pos, char* lemma, char* morpho) +int read_line_fplm(FILE* fplm_file, FPLM* fplm) { int fields_nb; char buffer[10000]; - if(fgets(buffer, 10000, fplm)==NULL) + if(fgets(buffer, 10000, fplm_file)==NULL) return -1; - fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho); + fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", fplm->form, fplm->pos, fplm->lemma, fplm->morpho); return fields_nb; } @@ -48,7 +65,7 @@ int choose_target(char* target) return NUMBER; else { - fprintf(stderr,"-t argument must be \"tense\", \"person\", \"gender\" or \"number\"\n"); + fprintf(stderr,"-t argument must be \"tense\", \"person\", \"gender\", \"number\" or \"all\".\n"); exit(EXIT_FAILURE); } return -1; @@ -86,11 +103,16 @@ void extract_classes_from_morpho(TARGET target, char* target_class, char* morpho } target_class[j] = '\0'; } + if(target_class[0] == '\0') + { + target_class[0] = '#'; + target_class[1] = '\0'; + } } /** Write the code_class file (use in predict to know the real class) * Return the class' code**/ -int associate_number_to_classes_separate(FILE* code_class, char* target_class, int current_target_class) +int associate_number_to_classes_separated(FILE* code_class, char* target_class, int current_target_class) { int code = 0; char tmp[20]; diff --git a/maca_morpho/src/maca_morpho_context.c b/maca_morpho/src/maca_morpho_context.c index 43aa68ab101eec69ab4285c60634b627ea9196d1..0527a965e150105bacbdec5a0479498fb0a56df2 100644 --- a/maca_morpho/src/maca_morpho_context.c +++ b/maca_morpho/src/maca_morpho_context.c @@ -29,6 +29,7 @@ context *context_new(void) ctx->verbose = 0; ctx->debug_mode = 0; ctx->separate_classes = 0; + ctx->cascade = 0; ctx->program_name = NULL; ctx->fplm_filename = NULL; ctx->language = strdup("fr"); @@ -93,6 +94,10 @@ void context_separate_classes_help_message(context *ctx){ fprintf(stderr, "\t-s --separate <int> : separate the classes for tense and person if you enter 1\n"); } +void context_cascade_help_message(context *ctx){ + fprintf(stderr, "\t-c --cascade <int> : activate the cascade mode if you enter 1 (output fplm file)\n"); +} + context *context_read_options(int argc, char *argv[]) { int c; @@ -101,7 +106,7 @@ context *context_read_options(int argc, char *argv[]) ctx->program_name = strdup(argv[0]); - static struct option long_options[14] = + static struct option long_options[15] = { {"help", no_argument, 0, 'h'}, {"verbose", no_argument, 0, 'v'}, @@ -116,12 +121,13 @@ context *context_read_options(int argc, char *argv[]) {"target", required_argument, 0, 't'}, {"percent", required_argument, 0, 'p'}, {"code_class", required_argument, 0, 'y'}, - {"separate", required_argument, 0, 's'} + {"separate", required_argument, 0, 's'}, + {"cascade", required_argument, 0, 'c'} }; optind = 0; opterr = 0; - while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:y:x:w:t:p:s:", long_options, &option_index)) != -1){ + while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:y:x:w:t:p:s:c:", long_options, &option_index)) != -1){ switch (c) { case 'd': @@ -162,6 +168,9 @@ context *context_read_options(int argc, char *argv[]) break; case 's': ctx->separate_classes = atoi(optarg); + break; + case 'c': + ctx->cascade = atoi(optarg); break; } } diff --git a/maca_morpho/src/maca_morpho_context.h b/maca_morpho/src/maca_morpho_context.h index 333e1a267fb9d09ff647a2c531e3836fb51ab2c5..f2e6bf82e3a9a98c429a8bc4844be53869dea045 100644 --- a/maca_morpho/src/maca_morpho_context.h +++ b/maca_morpho/src/maca_morpho_context.h @@ -14,6 +14,7 @@ typedef struct { int debug_mode; int fplm_test_percent; int separate_classes; + int cascade; char *program_name; char *fplm_filename; char *language; @@ -42,4 +43,5 @@ void context_target_help_message(context *ctx); void context_fplm_test_percent_help_message(context *ctx); void context_code_class_help_message(context* ctx); void context_separate_classes_help_message(context *ctx); +void context_cascade_help_message(context *ctx); #endif diff --git a/maca_morpho/src/maca_morpho_feat_fct.c b/maca_morpho/src/maca_morpho_feat_fct.c index 2109f0e7d11287d15057b227e8bd00258ca4c2f1..06d367f237075372c085ad9c308beb5bff51b94e 100644 --- a/maca_morpho/src/maca_morpho_feat_fct.c +++ b/maca_morpho/src/maca_morpho_feat_fct.c @@ -5,118 +5,164 @@ #include "char16.h" #include "fplm.h" -char* all_pos[23] = {"np","adj","nc","adv","prep","poncts","csu","v","vprespart","vppart", - "vinf","pres","ponctw","clr","det","coo","cln","pro","pri","prorel","clo","advneg","titre"}; -char* all_person_non_separated[5] = {"1","2","3","12","13"}; - -int code_pos(char* pos) -{ - int i; - for(i=0; i<23; i++) - if(!strcmp(pos, all_pos[i])) - return i; - return -1; -} -int code_person(char* class) -{ - int i; - for(i=0; i<5; i++) - if(!strcmp(class, all_person_non_separated[i])) - return i; - return -1; -} - /*patterns feature*/ -int s1(FP* fp) +int s1(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 1 < 0) return -1; return tmp[size - 1]; } -int s2(FP* fp) +int s2(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 2 < 0) return -1; return tmp[size - 2]; } -int s3(FP* fp) +int s3(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 3 < 0) return -1; return tmp[size - 3]; } -int s4(FP* fp) +int s4(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 4 < 0) return -1; return tmp[size - 4]; } -int s5(FP* fp) +int s5(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 5 < 0) return -1; return tmp[size - 5]; } -int s6(FP* fp) +int s6(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 6 < 0) return -1; return tmp[size - 6]; } -int s7(FP* fp) +int s7(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 7 < 0) return -1; return tmp[size - 7]; } -int s8(FP* fp) +int s8(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 8 < 0) return -1; return tmp[size - 8]; } -int s9(FP* fp) +int s9(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 9 < 0) return -1; return tmp[size - 9]; } -int s10(FP* fp) +int s10(FPLM* fplm) { - char16* tmp = utf8tochar16(fp->form); + char16* tmp = utf8tochar16(fplm->form); int size = char16_strlen(tmp); if(tmp == NULL || size - 10 < 0) return -1; return tmp[size - 10]; } -int feat_person(FP* fp) + +int feat_person(FPLM* fplm) { char class[10]; - extract_classes_from_morpho(PERSON, class, fp->morpho); - return code_person(class); + extract_classes_from_morpho(PERSON, class, fplm->morpho); + if((int)strlen(class)==1) + return class[0]; + /*To concatenate ASCII code of 2 or more numbers*/ + FILE* tmp = fopen("tmp.txt","w+"); + if(tmp==NULL) + { + fprintf(stderr,"error tmp file -- maca_morpho_feat_fct.c\n"); + return -1; + } + int code; + for(int i=0; i<(int)strlen(class); i++) + fprintf(tmp,"%d",class[i]); + rewind(tmp); + fscanf(tmp,"%d",&code); + fclose(tmp); + remove("tmp.txt"); + return code; } -int feat_pos(FP* fp) +int feat_tense(FPLM* fplm) { - return code_pos(fp->pos); + char class[10]; + extract_classes_from_morpho(TENSE, class, fplm->morpho); + if((int)strlen(class)==1) + return class[0]; + /*To concatenate ASCII code of 2 or more letters*/ + FILE* tmp = fopen("tmp.txt","w+"); + if(tmp==NULL) + { + fprintf(stderr,"error tmp file -- maca_morpho_feat_fct.c\n"); + return -1; + } + int code; + for(int i=0; i<(int)strlen(class); i++) + fprintf(tmp,"%d",class[i]); + rewind(tmp); + fscanf(tmp,"%d",&code); + fclose(tmp); + remove("tmp.txt"); + return code; +} + +int feat_gender(FPLM* fplm) +{ + char class[10]; + extract_classes_from_morpho(GENDER, class, fplm->morpho); + return class[0]; //ASCII code of 'f' or 'm' or '#' +} + +int feat_number(FPLM* fplm) +{ + char class[10]; + extract_classes_from_morpho(NUMBER, class, fplm->morpho); + return class[0]; //ASCII code of 's' or 'p' or '#' +} + +int feat_pos(FPLM* fplm) +{ + FILE* tmp = fopen("tmp.txt","w+"); + if(tmp==NULL) + { + fprintf(stderr,"error tmp file -- code_pos\n"); + return -1; + } + int code; + for(int i=0; i<(int)strlen(fplm->pos); i++) + fprintf(tmp,"%d",fplm->pos[i]); + rewind(tmp); + fscanf(tmp,"%d",&code); + fclose(tmp); + remove("tmp.txt"); + return code; } feat_lib *feat_lib_build(void) { @@ -135,6 +181,8 @@ feat_lib *feat_lib_build(void) { feat_lib_add(fl, 1, (char *)"s10", s10); feat_lib_add(fl, 1, (char *)"feat_pos", feat_pos); feat_lib_add(fl, 1, (char *)"feat_person", feat_person); - + feat_lib_add(fl, 1, (char *)"feat_number", feat_number); + feat_lib_add(fl, 1, (char *)"feat_gender", feat_gender); + feat_lib_add(fl, 1, (char *)"feat_tense", feat_tense); return fl; } diff --git a/maca_morpho/src/predict.h b/maca_morpho/src/predict.h index b50bb1dc399eddf50d7e819669cf4d3511640267..90e9d3257e5ef41ddddd4d5834e5723772d026f4 100644 --- a/maca_morpho/src/predict.h +++ b/maca_morpho/src/predict.h @@ -10,33 +10,66 @@ #include "feature_table.h" #include "fplm.h" + typedef struct + { + FILE* f_error; + FILE* f_predict; + FILE* new_fplm; + FILE* morpho_predicted; + }Output_files; + + typedef struct + { + FILE* f_fplm; + FILE* code_class; + FILE* all_real_morphos; + FILE* code_class_tense; + FILE* code_class_person; + FILE* code_class_gender; + FILE* code_class_number; + FILE* predict_tense; + FILE* predict_person; + FILE* predict_gender; + FILE* predict_number; + }Input_files; + void predict_help_message(context *ctx); + void new_input_files (Input_files* in_files); + void init_input_files (Input_files* in_files, char* fplm_name, char* code, char* all, char* c1, char* c2, char* c3, char* c4, char* p1, char* p2, char* p3, char* p4); + void new_output_files (Output_files* out_files); + void init_output_files (Output_files* out_files, char* error_name, char* predict_name, char* fplm_name, char* morpho_name); + void free_input_files (Input_files* in_files); + void free_output_files (Output_files* out_files); /*Predict all classes in one*/ - void predict_all_classes(context* ctx); - /*classes are not separated*/ - void make_prediction_all_classes_non_separate(FILE* error_file, FILE* code_class, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm); - void errors_nb_all_classes_non_separate(FILE* error_file, FILE* code_class, FP* fp, int class_predicted, int* errors, char* morpho); - int extract_real_class_non_separate(FILE* code_class, char* target_class, int all_classes); - /*classes are separated*/ - void make_prediction_all_classes_separate(FILE* predictions, FILE* code_class_big, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm); - void errors_nb_all_classes_separate(FILE* error_file, FILE* code_class, FP* fp, int class_predicted, int* errors, char* morpho); - int extract_real_class_all_classes_separate(FILE* code_class, int* real_class, char* morpho); + void predict_all_classes (context* ctx); + void make_prediction_all_classes (context* ctx, FILE* error_file, FILE* code_class, int* errors, FPLM* fplm, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm); + /*classes are not separated*/ + void errors_nb_all_classes_not_separated (FILE* error_file, FILE* code_class, FPLM* fplm, int class_predicted, int* errors); + int extract_real_class_not_separated (FILE* code_class, char* target_class, int all_classes); + /*classes are separated*/ + void errors_nb_all_classes_separated (FILE* error_file, FILE* code_class, FPLM* fplm, int class_predicted, int* errors); + int extract_real_class_all_classes_separated (FILE* code_class, int* real_class, char* morpho); + /*Predict a target*/ - void predict_target(context* ctx); - /*classes are separated*/ - void make_prediction_separate(FILE* error_file, FILE* predict_file, FILE* code_class, TARGET target, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm); - void errors_nb_separate(FILE* error_file, FILE* code_class, TARGET target, int class_predicted, int* errors, char* morpho, FP* fp); - int extract_real_class_separate(FILE* code_class, char* target_class, int* real_class); - /*classes are not separated*/ - void make_prediction_non_separate(FILE* error_file, FILE* y, FILE* code_class, TARGET target, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm); - void errors_nb_non_separate(FILE* error_file, FILE* code_class, TARGET target, FP* fp, int class_predicted, int* errors, char* morpho); + void predict_target (context* ctx); + void make_prediction (context* ctx, Output_files* out_files, FILE* code_class, TARGET target, int* errors, FPLM* fplm, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm); + /*classes are separated*/ + void errors_nb_separated (FILE* error_file, FILE* code_class, TARGET target, int class_predicted, int* errors, FPLM* fplm); + int extract_real_class_separated (FILE* code_class, char* target_class, int* real_class); + /*classes are not separated*/ + void errors_nb_not_separated (FILE* error_file, FILE* code_class, TARGET target, FPLM* fplm, int class_predicted, int* errors); + /*Cascade*/ + void write_new_fplm (FPLM* fplm, TARGET target, FILE* new_fplm, FILE* code_class, int class_predicted); + /*Predict each targets then predict all classes in one*/ - void predict_each_and_all_targets(context* ctx); - void calculate_global_success_rate(context* ctx, FILE* y, FILE* y2, FILE* y3, FILE* y4,int* global_error, int* class_predicted_array,FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho); - void put_in_array_real_classes(int* real_classes_array, FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho); - void compare_predicted_and_real_class(FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho, int* global_error, int* class_predicted_array); + void predict_each_and_all_targets (context* ctx); + void calculate_global_success_rate (context* ctx, Input_files* in_files, FILE* morpho_predicted, int* global_error, int* class_predicted_array,char* morpho); + void create_morpho_predicted_file (FILE* morpho_predicted,int* class_predicted_array,Input_files* in_files); + void compare_morpho_predicted_and_real_morphos (FILE* all_real_morphos, FILE* morpho_predicted, int line_nb); + void put_in_array_real_classes (int* real_classes_array, Input_files* in_files, char* morpho); + void compare_predicted_and_real_class (Input_files* in_files,char* morpho, int* global_error, int* class_predicted_array); #endif diff --git a/maca_morpho/src/predict_fct.c b/maca_morpho/src/predict_fct.c index 4591ffa6f23e466cf5ef21c29b57fdef7ea09936..105650f08fa5d2d058a902976e61dda6f68e84fb 100644 --- a/maca_morpho/src/predict_fct.c +++ b/maca_morpho/src/predict_fct.c @@ -13,141 +13,294 @@ void predict_help_message(context *ctx) context_weights_matrix_filename_help_message(ctx); context_features_model_help_message(ctx); context_code_class_help_message(ctx); + context_cascade_help_message(ctx); exit(1); } - /*Fonctions for all classes' prediction*/ -void predict_all_classes(context* ctx) +void new_input_files(Input_files* in_files) { - FILE* fplm_test = NULL; - FILE* error_file = NULL; - FILE* code_class = NULL; - feature_table *cfw = NULL; - feat_vec *fv = NULL; - dico *dico_features = NULL; - feat_model *fm = NULL; - int line_nb=0; - int fields_nb; - int errors = 0; - char form[100]; - char pos[50]; - char lemma[100]; - char morpho[50]; - FP* fp = malloc(sizeof(FP)); - fp->form = malloc(sizeof(char)*100); - fp->pos = malloc(sizeof(char)*50); - fp->morpho = malloc(sizeof(char)*10); - code_class = fopen(ctx->code_class_filename,"r"); - if(code_class==NULL) - { - fprintf(stderr, "Could not open the code_class file.\n"); - exit(EXIT_FAILURE); + in_files->f_fplm = NULL; + in_files->code_class = NULL; + in_files->all_real_morphos = NULL; + in_files->code_class_tense = NULL; + in_files->code_class_person = NULL; + in_files->code_class_gender = NULL; + in_files->code_class_number = NULL; + in_files->predict_tense = NULL; + in_files->predict_person = NULL; + in_files->predict_gender = NULL; + in_files->predict_number = NULL; +} + +void init_input_files(Input_files* in_files, char* fplm_name, char* code, + char* all, char* c1, char* c2, char* c3, char* c4, char* p1, char* p2, char* p3, char* p4) +{ + new_input_files(in_files); + if(fplm_name != NULL) + { + in_files->f_fplm = fopen(fplm_name,"r"); + if(in_files->f_fplm == NULL) + { + fprintf(stderr, "Could not open the fplm file.\n"); + exit(EXIT_FAILURE); + } } - fplm_test = fopen(ctx->fplm_filename,"r"); - if(fplm_test == NULL) + if(code != NULL) { - fprintf(stderr,"Could not open input file.\nYou can generate a fplm_test file with fplm2train_test\nThe fplm_test file will be in the Files directory.\n"); - exit(EXIT_FAILURE); + in_files->code_class = fopen(code,"r"); + if(in_files->code_class == NULL) + { + fprintf(stderr, "Could not open the code_class file.\n"); + exit(EXIT_FAILURE); + } } - cfw = feature_table_load(ctx->cfw_filename, ctx->verbose); - fv = feat_vec_new(10); - dico_features = dico_read(ctx->features_filename, 0.5); - fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); - error_file = fopen("../../Files/predict_error.txt","w"); - if(error_file==NULL) + if(all != NULL) { - fprintf(stderr,"Problem with the error file.\n"); - exit(EXIT_FAILURE); + in_files->all_real_morphos = fopen(all,"r"); + if(in_files->all_real_morphos == NULL) + { + fprintf(stderr, "Could not open the all real classes file. Use fplm2cff with -t all to generate it.\n"); + exit(EXIT_FAILURE); + } + } + if(c1 != NULL) + { + in_files->code_class_tense = fopen(c1,"r"); + if(in_files->code_class_tense == NULL) + { + fprintf(stderr, "Could not open the code class tense file.\n"); + exit(EXIT_FAILURE); + } + } + if(c2 != NULL) + { + in_files->code_class_person = fopen(c2,"r"); + if(in_files->code_class_person == NULL) + { + fprintf(stderr, "Could not open the code class person file.\n"); + exit(EXIT_FAILURE); + } + } + if(c3 != NULL) + { + in_files->code_class_gender = fopen(c3,"r"); + if(in_files->code_class_gender == NULL) + { + fprintf(stderr, "Could not open the code class gender file.\n"); + exit(EXIT_FAILURE); + } + } + if(c4 != NULL) + { + in_files->code_class_number = fopen(c4,"r"); + if(in_files->code_class_number == NULL) + { + fprintf(stderr, "Could not open the code class number file.\n"); + exit(EXIT_FAILURE); + } + } + if(p1 != NULL) + { + in_files->predict_tense = fopen(p1,"r"); + if(in_files->predict_tense== NULL) + { + fprintf(stderr, "Could not open the predict tense file.\n"); + exit(EXIT_FAILURE); + } + } + if(p2 != NULL) + { + in_files->predict_person = fopen(p2,"r"); + if(in_files->predict_person == NULL) + { + fprintf(stderr, "Could not open the predict person file.\n"); + exit(EXIT_FAILURE); + } + } + if(p3 != NULL) + { + in_files->predict_gender = fopen(p3,"r"); + if(in_files->predict_gender == NULL) + { + fprintf(stderr, "Could not open the predict gender file.\n"); + exit(EXIT_FAILURE); + } + } + if(p4 != NULL) + { + in_files->predict_number = fopen(p4,"r"); + if(in_files->predict_number == NULL) + { + fprintf(stderr, "Could not open the predict number file.\n"); + exit(EXIT_FAILURE); + } + } +} + +void new_output_files(Output_files* out_files) +{ + out_files->f_error = NULL; + out_files->f_predict = NULL; + out_files->new_fplm = NULL; + out_files->morpho_predicted = NULL; +} +void init_output_files(Output_files* out_files, char* error_name, char* predict_name, char* fplm_name, char* morpho_name) +{ + new_output_files(out_files); + if(error_name != NULL) + { + out_files->f_error = fopen(error_name, "w"); + if(out_files->f_error==NULL) + { + fprintf(stderr,"Problem with the error file.\n"); + exit(EXIT_FAILURE); + } + } + if(predict_name != NULL) + { + out_files->f_predict = fopen(predict_name, "w"); + if(out_files->f_predict==NULL) + { + fprintf(stderr,"Problem with the predict file.\n"); + exit(EXIT_FAILURE); + } } - while((fields_nb = read_line_fplm(fplm_test, form, pos, lemma, morpho)) != -1) + if(fplm_name != NULL) + { + out_files->new_fplm = fopen(fplm_name, "w"); + if(out_files->new_fplm==NULL) + { + fprintf(stderr,"Problem with the new fplm file.\n"); + exit(EXIT_FAILURE); + } + } + if(morpho_name != NULL) + { + out_files->morpho_predicted = fopen(morpho_name, "w+"); + if(out_files->morpho_predicted==NULL) + { + fprintf(stderr,"Problem with the morpho_predicted file.\n"); + exit(EXIT_FAILURE); + } + } +} + +void free_input_files(Input_files* in_files) +{ + if(in_files->f_fplm != NULL) fclose(in_files->f_fplm); + if(in_files->code_class != NULL) fclose(in_files->code_class); + if(in_files->all_real_morphos != NULL) fclose(in_files->all_real_morphos); + if(in_files->code_class_tense != NULL) fclose(in_files->code_class_tense); + if(in_files->code_class_person != NULL) fclose(in_files->code_class_person); + if(in_files->code_class_gender != NULL) fclose(in_files->code_class_gender); + if(in_files->code_class_number != NULL) fclose(in_files->code_class_number); + if(in_files->predict_tense != NULL) fclose(in_files->predict_tense); + if(in_files->predict_person != NULL) fclose(in_files->predict_person); + if(in_files->predict_gender != NULL) fclose(in_files->predict_gender); + if(in_files->predict_number != NULL) fclose(in_files->predict_number); + free(in_files); +} + +void free_output_files(Output_files* out_files) +{ + if(out_files->f_error != NULL) fclose(out_files->f_error); + if(out_files->f_predict != NULL) fclose(out_files->f_predict); + if(out_files->new_fplm != NULL) fclose(out_files->new_fplm); + if(out_files->morpho_predicted != NULL) fclose(out_files->morpho_predicted); + free(out_files); +} + + /**Fonctions for all classes' prediction (morpho is a class)**/ +void predict_all_classes(context* ctx) +{ + Output_files* out_files = malloc(sizeof(Output_files)); + Input_files* in_files = malloc(sizeof(Input_files)); + FPLM* fplm = new_fplm(); + feature_table *cfw = feature_table_load(ctx->cfw_filename, ctx->verbose); + feat_vec *fv = feat_vec_new(10); + dico *dico_features = dico_read(ctx->features_filename, 0.5); + feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); + int line_nb=0; + int fields_nb; + int errors = 0; + + init_output_files(out_files, "error.txt", NULL, NULL, NULL); + init_input_files(in_files,ctx->fplm_filename,ctx->code_class_filename, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); + while((fields_nb = read_line_fplm(in_files->f_fplm, fplm)) != -1) { if(fields_nb!=4) { - if(1) + if(ctx->debug_mode) { - fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho); + fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", fplm->form, fplm->pos, fplm->lemma, fplm->morpho); fprintf(stderr, "incorrect fplm entry, skipping it\n"); } continue; } line_nb++; - strcpy(fp->form,form); - strcpy(fp->pos,pos); - strcpy(fp->morpho,morpho); - if(ctx->separate_classes) - make_prediction_all_classes_separate(error_file, code_class, &errors, fp, morpho, cfw, fv, dico_features, fm); - else - make_prediction_all_classes_non_separate(error_file, code_class, &errors, fp, morpho, cfw, fv, dico_features, fm); - } - printf("Success rate : %lf %%\n", (float)100-((float)errors*100/line_nb)); + make_prediction_all_classes(ctx, out_files->f_error, in_files->code_class, &errors, fplm, cfw, fv, dico_features, fm); + } + printf("Success rate : %lf %%\n", 100-((float)errors*100/line_nb)); if(ctx->features_filename) dico_print(ctx->features_filename, dico_features); - free(fp->form); - free(fp->pos); - free(fp); - fclose(fplm_test); - fclose(error_file); - fclose(code_class); + + free_fplm(fplm); + free_input_files(in_files); + free_output_files(out_files); } - /*Fonctions for not-separated classes*/ /** Predict a class for the current word **/ -void make_prediction_all_classes_non_separate(FILE* error_file, FILE* code_class, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm) +void make_prediction_all_classes(context* ctx, FILE* error_file, FILE* code_class, int* errors, FPLM* fplm, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm) { int class_predicted; float max; - form2fv(fp, fv, fm, dico_features, LOOKUP_MODE); + form2fv(fplm, fv, fm, dico_features, LOOKUP_MODE); class_predicted = feature_table_argmax(fv, cfw, &max); - errors_nb_all_classes_non_separate(error_file, code_class, fp,class_predicted, errors, morpho); + if(ctx->separate_classes) + errors_nb_all_classes_separated(error_file, code_class, fplm, class_predicted, errors); + else + errors_nb_all_classes_not_separated(error_file, code_class, fplm,class_predicted, errors); } + /**Fonctions for not-separated classes**/ + /** Increment the number of errors if the programm has predicted the wrong class **/ -void errors_nb_all_classes_non_separate(FILE* error_file, FILE* code_class, FP* fp, int class_predicted, int* errors, char* morpho) +void errors_nb_all_classes_not_separated(FILE* error_file, FILE* code_class, FPLM* fplm, int class_predicted, int* errors) { - int real_class; - real_class = extract_real_class_non_separate(code_class, morpho, 1); + int real_class = extract_real_class_not_separated(code_class, fplm->morpho, 1); if(class_predicted != real_class) { *errors = *errors+1; - fprintf(error_file, "form = %s | class predicted = %d | real class = %d\n", fp->form, class_predicted, real_class); + fprintf(error_file, "form = %s | class predicted = %d | real class = %d\n", fplm->form, class_predicted, real_class); } } /** Return the word's real class (also used to predict one target)**/ -int extract_real_class_non_separate(FILE* code_class, char* target_class, int all_classes) -{ - if(target_class[0]=='\0') - return 0; - return associate_number_to_classes(code_class, target_class,all_classes); -} - - /*Fonctions for separated classes*/ -void make_prediction_all_classes_separate(FILE* error_file, FILE* code_class, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm) +int extract_real_class_not_separated(FILE* code_class, char* target_class, int all_classes) { - int class_predicted; - float max; - form2fv(fp, fv, fm, dico_features, LOOKUP_MODE); - class_predicted = feature_table_argmax(fv, cfw, &max); - errors_nb_all_classes_separate(error_file, code_class, fp, class_predicted, errors, morpho); + return associate_number_to_classes(code_class, target_class, all_classes); } -void errors_nb_all_classes_separate(FILE* error_file, FILE* code_class, FP* fp, int class_predicted, int* errors, char* morpho) + /**Fonctions for separated classes**/ + +void errors_nb_all_classes_separated(FILE* error_file, FILE* code_class, FPLM* fplm, int class_predicted, int* errors) { int i; int size = 10; int real_class[40]; - size = extract_real_class_all_classes_separate(code_class, real_class, morpho); + size = extract_real_class_all_classes_separated(code_class, real_class, fplm->morpho); for(i=0; i<=size; i++) if(class_predicted == real_class[i]) return; *errors = *errors+1; - fprintf(error_file, "form = %s | class predicted = %d | real class =", fp->form, class_predicted); + fprintf(error_file, "form = %s | class predicted = %d | real class =", fplm->form, class_predicted); for(i=0; i<=size; i++) fprintf(error_file, " %d", real_class[i]); fprintf(error_file,"\n"); } -int extract_real_class_all_classes_separate(FILE* code_class, int* real_class, char* morpho) +int extract_real_class_all_classes_separated(FILE* code_class, int* real_class, char* morpho) { int size = -1; char tense_class[10]; @@ -160,26 +313,6 @@ int extract_real_class_all_classes_separate(FILE* code_class, int* real_class, c extract_classes_from_morpho(PERSON, person_class, morpho); extract_classes_from_morpho(GENDER, gender_class, morpho); extract_classes_from_morpho(NUMBER, number_class, morpho); - if(tense_class[0] == '\0') - { - tense_class[0]='#'; - tense_class[1]='\0'; - } - if(person_class[0] == '\0') - { - person_class[0]='#'; - person_class[1]='\0'; - } - if(gender_class[0] == '\0') - { - gender_class[0]='#'; - gender_class[1]='\0'; - } - if(number_class[0] == '\0') - { - number_class[0]='#'; - number_class[1]='\0'; - } for(i=0; i<(int)strlen(tense_class); i++) { for(j=0; j<(int)strlen(person_class); j++) @@ -193,214 +326,234 @@ int extract_real_class_all_classes_separate(FILE* code_class, int* real_class, c real_class[size] = associate_number_to_classes(code_class, all_target, 1); } } - return size; } - /*Fonctions for a target's prediction*/ + /**Fonctions for a target's prediction**/ void predict_target(context* ctx) { - FILE* y = NULL; - FILE* fplm_test = NULL; - FILE* error_file = NULL; - FILE* code_class = NULL; - feature_table *cfw = NULL; - feat_vec *fv = NULL; - dico *dico_features = NULL; - feat_model *fm = NULL; + Input_files* in_files = malloc(sizeof(Input_files)); + Output_files* out_files = malloc(sizeof(Output_files)); + feature_table *cfw = feature_table_load(ctx->cfw_filename, ctx->verbose); + feat_vec *fv = feat_vec_new(10); + dico *dico_features = dico_read(ctx->features_filename, 0.5); + feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); int line_nb=0; int fields_nb; int errors = 0; - char form[100]; - char pos[50]; - char lemma[100]; - char morpho[50]; char target_name[10]; + char output_fplm_name[20]; char predict_name[50]; - FP* fp = malloc(sizeof(FP)); - fp->form = malloc(sizeof(char)*100); - fp->pos = malloc(sizeof(char)*50); - fp->morpho = malloc(sizeof(char)*10); + FPLM* fplm = new_fplm(); TARGET target; - code_class = fopen(ctx->code_class_filename,"r"); - if(code_class==NULL) - { - fprintf(stderr, "Could not the code_class file.\n"); - exit(EXIT_FAILURE); - } - if(fscanf(code_class,"%s",target_name)!=1) + FILE* tmp = fopen("../../Files/tmp.txt","w"); + init_input_files(in_files, ctx->fplm_filename, ctx->code_class_filename, NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); + if(fscanf(in_files->code_class,"%s",target_name)!=1) { fprintf(stderr, "Your code_class file is not conform.\n"); exit(EXIT_FAILURE); } - strcpy(predict_name,"../../Files/predict_"); + strcpy(predict_name,"predict_"); strcat(predict_name,target_name); - y = fopen(predict_name,"w"); - fplm_test = fopen(ctx->fplm_filename,"r"); - if(fplm_test == NULL) - { - fprintf(stderr,"Could not open input file.\nYou can generate a fplm_test file with fplm2train_test\nThe fplm_test file will be in the Files directory.\n"); - exit(EXIT_FAILURE); - } target = choose_target(target_name); - cfw = feature_table_load(ctx->cfw_filename, ctx->verbose); - fv = feat_vec_new(10); - dico_features = dico_read(ctx->features_filename, 0.5); - fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); - error_file = fopen("../../Files/predict_error.txt","w"); - if(error_file==NULL) - { - fprintf(stderr,"Problem with the error file.\n"); - exit(EXIT_FAILURE); + if(ctx->cascade) + { + strcpy(output_fplm_name,"fplm_"); + strcat(output_fplm_name,target_name); + init_output_files(out_files, "predict_error", predict_name, output_fplm_name, NULL); } - while((fields_nb = read_line_fplm(fplm_test, form, pos, lemma, morpho)) != -1) + else + init_output_files(out_files, "predict_error", predict_name, NULL, NULL); + + while((fields_nb = read_line_fplm(in_files->f_fplm, fplm)) != -1) { if(fields_nb!=4) { - if(1) + if(ctx->debug_mode) { - fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho); + fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", fplm->form, fplm->pos, fplm->lemma, fplm->morpho); fprintf(stderr, "incorrect fplm entry, skipping it\n"); } continue; } + fprintf(tmp,"%s\t%s\t%s\t%s\n",fplm->form, fplm->pos, fplm->lemma, fplm->morpho); line_nb++; - strcpy(fp->form,form); - strcpy(fp->pos,pos); - strcpy(fp->morpho,morpho); - if(ctx->separate_classes) - make_prediction_separate(error_file, y, code_class, target, &errors, fp, morpho, cfw, fv, dico_features, fm); - else - make_prediction_non_separate(error_file, y, code_class, target, &errors, fp, morpho, cfw, fv, dico_features, fm); - } - printf("Success rate : %lf %%\n", (float)100-((float)errors*100/line_nb)); + if(ctx->cascade) + fprintf(out_files->new_fplm,"%s\t%s\t%s\t",fplm->form, fplm->pos, fplm->lemma); + make_prediction(ctx, out_files, in_files->code_class, target, &errors, fplm, cfw, fv, dico_features, fm); + } + printf("Success rate : %lf %%\n", 100-((float)errors*100/line_nb)); if(ctx->features_filename) dico_print(ctx->features_filename, dico_features); - free(fp->form); - free(fp->pos); - free(fp); - fclose(y); - fclose(fplm_test); - fclose(error_file); - fclose(code_class); + free_fplm(fplm); + free_input_files(in_files); + free_output_files(out_files); } - /*Fonctions for separated classes*/ -void make_prediction_separate(FILE* error_file, FILE* predict_file, FILE* code_class, TARGET target, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm) +void make_prediction(context* ctx, Output_files* out_files, FILE* code_class, TARGET target, int* errors, FPLM* fplm, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm) { int class_predicted; float max; - form2fv(fp, fv, fm, dico_features, LOOKUP_MODE); + form2fv(fplm, fv, fm, dico_features, LOOKUP_MODE); class_predicted = feature_table_argmax(fv, cfw, &max); - fprintf(predict_file,"%d\n",class_predicted); - errors_nb_separate(error_file, code_class, target, class_predicted, errors, morpho, fp); + fprintf(out_files->f_predict,"%d\n",class_predicted); + if(ctx->separate_classes) + errors_nb_separated(out_files->f_error, code_class, target, class_predicted, errors, fplm); + else + errors_nb_not_separated(out_files->f_error, code_class, target, fplm, class_predicted, errors); + + if(ctx->cascade) + write_new_fplm(fplm, target, out_files->new_fplm, code_class, class_predicted); + +} + +void write_new_fplm(FPLM* fplm, TARGET target, FILE* new_fplm, FILE* code_class, int class_predicted) +{ + int code; + int i=0; + int ok=1; + int cpt_diese = 0; + char tmp[20]; + char new_morpho[30]; //will contain the morpho feature with the prediction + int position_morpho = 0; + int position = extract_class_position(target); //position = the num of the diese we want + while(i<(int)strlen(fplm->morpho) && cpt_diese!=position) + { + new_morpho[i] = fplm->morpho[i]; + if(fplm->morpho[i]=='#') + cpt_diese++; + i++; + } + position = i; //position is now where we will include the class predicted + position_morpho = position; + while(position_morpho<(int)strlen(fplm->morpho) && fplm->morpho[position_morpho]!='#') + position_morpho++; //position_morpho is where we will copy the initial morpho feature in the new morpho feature + + if(class_predicted==0) + { + while(position_morpho<(int)strlen(fplm->morpho)) //writting the initial morpho feature's rest in the new morpho feature + { + new_morpho[position] = fplm->morpho[position_morpho]; + position++; + position_morpho++; + } + new_morpho[position]='\0'; + ok=0; + } + + rewind(code_class); + fscanf(code_class,"%s",tmp); //read the target name + while(ok && fscanf(code_class,"%d %s\n",&code,tmp)==2) //tmp = class + { + if(class_predicted==code && code!=0) //including the class predicted + { + i=0; + while(i<(int)strlen(tmp)) + { + new_morpho[position]=tmp[i]; + i++; + position++; + } + while(position_morpho<(int)strlen(fplm->morpho)) //writting the initial morpho feature's rest in the new morpho feature + { + new_morpho[position] = fplm->morpho[position_morpho]; + position++; + position_morpho++; + } + new_morpho[position]='\0'; + ok=0; + } + } + fprintf(new_fplm,"%s\n",new_morpho); } -void errors_nb_separate(FILE* error_file, FILE* code_class, TARGET target, int class_predicted, int* errors, char* morpho, FP* fp) + /**Fonctions for separated classes**/ +void errors_nb_separated(FILE* error_file, FILE* code_class, TARGET target, int class_predicted, int* errors, FPLM* fplm) { int i; - int size = 10; + int size = 0; int real_class[10]; char target_class[10]; - extract_classes_from_morpho(target,target_class,morpho); - size = extract_real_class_separate(code_class, target_class, real_class); + extract_classes_from_morpho(target,target_class,fplm->morpho); + size = extract_real_class_separated(code_class, target_class, real_class); for(i=0; i<=size; i++) if(class_predicted == real_class[i]) return; *errors = *errors+1; - fprintf(error_file, "form = %s | class predicted = %d | real class =", fp->form, class_predicted); + fprintf(error_file, "form = %s | class predicted = %d | real class =", fplm->form, class_predicted); for(i=0; i<=size; i++) fprintf(error_file, " %d",real_class[i]); fprintf(error_file, "\n"); } -int extract_real_class_separate(FILE* code_class, char* target_class, int* real_class) +int extract_real_class_separated(FILE* code_class, char* target_class, int* real_class) { int size = -1; int i; - if(target_class[0]=='\0') + for(i=0; i<(int)strlen(target_class); i++) { size++; - real_class[size]=0; - } - else - { - for(i=0; i<(int)strlen(target_class); i++) - { - size++; - real_class[size] = associate_number_to_classes_separate(code_class,target_class,i); - } + real_class[size] = associate_number_to_classes_separated(code_class,target_class,i); } return size; } - /*Fonctions for not-separated classes*/ -void make_prediction_non_separate(FILE* error_file, FILE* y, FILE* code_class, TARGET target, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm) -{ - int class_predicted; - float max; - form2fv(fp, fv, fm, dico_features, LOOKUP_MODE); - class_predicted = feature_table_argmax(fv, cfw, &max); - fprintf(y,"%d\n",class_predicted); - errors_nb_non_separate(error_file, code_class, target, fp, class_predicted, errors, morpho); -} - -void errors_nb_non_separate(FILE* error_file, FILE* code_class, TARGET target, FP* fp, int class_predicted, int* errors, char* morpho) + /**Fonctions for not-separated classes**/ +void errors_nb_not_separated(FILE* error_file, FILE* code_class, TARGET target, FPLM* fplm, int class_predicted, int* errors) { int real_class; char target_class[10]; - extract_classes_from_morpho(target, target_class, morpho); - real_class = extract_real_class_non_separate(code_class, target_class, 0); + extract_classes_from_morpho(target, target_class, fplm->morpho); + real_class = extract_real_class_not_separated(code_class, target_class, 0); if(class_predicted != real_class) { *errors = *errors+1; - fprintf(error_file, "form = %s | class predicted = %d | real class = %d\n", fp->form, class_predicted, real_class); + fprintf(error_file, "form = %s | class predicted = %d | real class = %d\n", fplm->form, class_predicted, real_class); } } - /* Fonctions for all target's prediction after having predicted each targets */ + /** Fonctions for all target's prediction after having predicted each targets **/ void predict_each_and_all_targets(context* ctx) { - FILE* fplm_test = fopen(ctx->fplm_filename,"r"); - FILE* y = fopen("../../Files/predict_tense","r"); - FILE* y2 = fopen("../../Files/predict_person","r"); - FILE* y3 = fopen("../../Files/predict_gender","r"); - FILE* y4 = fopen("../../Files/predict_number","r"); - FILE* ycode_class = fopen("../../Files/code_class_tense","r"); - FILE* y2code_class = fopen("../../Files/code_class_person","r"); - FILE* y3code_class = fopen("../../Files/code_class_gender","r"); - FILE* y4code_class = fopen("../../Files/code_class_number","r"); + Input_files* in_files = malloc(sizeof(Input_files)); + Output_files* out_files = malloc(sizeof(Output_files)); int class_predicted_array[4]; - int fields_nb; + int fields_nb = 0; int global_error = 0; int line_nb = 0; - char form[100]; - char pos[20]; - char lemma[100]; - char morpho[20]; - while((fields_nb = read_line_fplm(fplm_test, form, pos, lemma, morpho)) != -1) + FPLM* fplm = new_fplm(); + init_output_files(out_files, NULL, NULL, NULL, "morpho_predicted"); + init_input_files(in_files,ctx->fplm_filename, NULL, "code_class", "code_class_tense", "code_class_person", "code_class_gender", + "code_class_number", "predict_tense", "predict_person", "predict_gender", "predict_number"); + + while((fields_nb = read_line_fplm(in_files->f_fplm, fplm)) != -1) { line_nb++; - calculate_global_success_rate(ctx,y,y2,y3,y4,&global_error,class_predicted_array,ycode_class,y2code_class,y3code_class,y4code_class,morpho); + calculate_global_success_rate(ctx,in_files,out_files->morpho_predicted,&global_error,class_predicted_array,fplm->morpho); } + compare_morpho_predicted_and_real_morphos(in_files->all_real_morphos, out_files->morpho_predicted, line_nb); printf("Global success rate : %lf %%\n", 100-((float)global_error*100/line_nb)); } -void calculate_global_success_rate(context* ctx, FILE* y, FILE* y2, FILE* y3, FILE* y4,int* global_error, int* class_predicted_array,FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho) +void calculate_global_success_rate(context* ctx, Input_files* in_files, FILE* morpho_predicted, int* global_error, int* class_predicted_array,char* morpho) { - fscanf(y, "%d", &class_predicted_array[0]); //tense - fscanf(y2, "%d", &class_predicted_array[1]); //person - fscanf(y3, "%d", &class_predicted_array[2]); //gender - fscanf(y4, "%d", &class_predicted_array[3]); //number - + fscanf(in_files->predict_tense, "%d", &class_predicted_array[0]); //tense + fscanf(in_files->predict_person, "%d", &class_predicted_array[1]); //person + fscanf(in_files->predict_gender, "%d", &class_predicted_array[2]); //gender + fscanf(in_files->predict_number, "%d", &class_predicted_array[3]); //number + if(ctx->separate_classes) - compare_predicted_and_real_class(ycode_class,y2code_class,y3code_class,y4code_class,morpho,global_error,class_predicted_array); + { + create_morpho_predicted_file(morpho_predicted,class_predicted_array,in_files); + compare_predicted_and_real_class(in_files,morpho,global_error,class_predicted_array); + } else { int real_classes_array[4]; - put_in_array_real_classes(real_classes_array,ycode_class,y2code_class,y3code_class,y4code_class,morpho); + put_in_array_real_classes(real_classes_array,in_files,morpho); + create_morpho_predicted_file(morpho_predicted,class_predicted_array,in_files); for(int i=0; i<4; i++) if(class_predicted_array[i]!=real_classes_array[i]) { @@ -410,22 +563,166 @@ void calculate_global_success_rate(context* ctx, FILE* y, FILE* y2, FILE* y3, FI } } -/*for not-separated classes*/ -void put_in_array_real_classes(int* real_classes_array, FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho) +/**The morpho predicted file is used to know if there are incoherent labels predicted**/ +void create_morpho_predicted_file(FILE* morpho_predicted,int* class_predicted_array,Input_files* in_files) +{ + int size=-1; + int i, ok = 1; + int code; + char tmp[20]; + char new_morpho[20]; + if(class_predicted_array[0]!=0) + { + rewind(in_files->code_class_tense); + i=0; + fscanf(in_files->code_class_tense,"%s",tmp); + while(fscanf(in_files->code_class_tense,"%d %s\n",&code,tmp)==2) + { + if(code==class_predicted_array[0]) + { + while(tmp[i]!='\0') + { + size++; + new_morpho[size]=tmp[i]; + i++; + } + } + } + } + size++; + new_morpho[size]='#'; + if(class_predicted_array[1]!=0) + { + rewind(in_files->code_class_person); + i=0; + fscanf(in_files->code_class_person,"%s",tmp); + while(fscanf(in_files->code_class_person,"%d %s\n",&code,tmp)==2) + { + if(code==class_predicted_array[1]) + { + while(tmp[i]!='\0') + { + size++; + new_morpho[size]=tmp[i]; + i++; + } + } + } + } + size++; + new_morpho[size]='#'; + if(class_predicted_array[2]!=0) + { + rewind(in_files->code_class_gender); + i=0; + fscanf(in_files->code_class_gender,"%s",tmp); + while(fscanf(in_files->code_class_gender,"%d %s\n",&code,tmp)==2) + { + if(code==class_predicted_array[2]) + { + while(tmp[i]!='\0') + { + size++; + new_morpho[size]=tmp[i]; + i++; + } + } + } + } + size++; + new_morpho[size]='#'; + if(class_predicted_array[3]!=0) + { + rewind(in_files->code_class_number); + i=0; + fscanf(in_files->code_class_number,"%s",tmp); + while(fscanf(in_files->code_class_number,"%d %s\n",&code,tmp)==2) + { + if(code==class_predicted_array[3]) + { + while(tmp[i]!='\0') + { + size++; + new_morpho[size]=tmp[i]; + i++; + } + } + } + } + size++; + new_morpho[size]='#'; + size++; + new_morpho[size]='\0'; + + //don't duplicate the morpho feature predicted + rewind(morpho_predicted); + while(ok && fscanf(morpho_predicted,"%s\n",tmp)==1) + if(!strcmp(tmp,new_morpho)) + ok=0; + if(ok) + fprintf(morpho_predicted,"%s\n",new_morpho); +} + +/**Return the number of incoherent labels in the morpho predicted file**/ +void compare_morpho_predicted_and_real_morphos(FILE* all_real_morphos, FILE* morpho_predicted, int line_nb) +{ + //all_real_morphos est le fichier code_class du grand fplm (faire cff avec toutes les classes) + int cpt_diese_max = 4; + int code; + int i,j; + int incoherence, ok, cpt_incoherence=0; + char tmp[20]; + char real_morpho[20]; + char morpho_predicted_array[20]; + rewind(morpho_predicted); + while(fscanf(morpho_predicted,"%s\n",morpho_predicted_array)==1) + { + rewind(all_real_morphos); + incoherence = 1; + ok=1; + while(ok && fscanf(all_real_morphos,"%d %s\n",&code,tmp)==2) + { + j=0; + i=0; + while(i<cpt_diese_max) + { + if(tmp[j]=='#') + i++; + real_morpho[j]=tmp[j]; + j++; + } + real_morpho[j]='\0'; + if(!strcmp(real_morpho,morpho_predicted_array)) + { + incoherence = 0; + ok=0; + } + } + if(incoherence) + { + printf("incorrect = %s\n",morpho_predicted_array); + cpt_incoherence++; + } + } + printf("Number of incoherent labels = %d\nIncoherence rate = %lf %%\n",cpt_incoherence,(float)cpt_incoherence*100/line_nb); +} + +/**for not-separated classes**/ +void put_in_array_real_classes(int* real_classes_array, Input_files* in_files, char* morpho) { char target_class[10]; extract_classes_from_morpho(TENSE,target_class,morpho); - real_classes_array[0] = extract_real_class_non_separate(ycode_class, target_class, 0); + real_classes_array[0] = extract_real_class_not_separated(in_files->code_class_tense, target_class, 0); extract_classes_from_morpho(PERSON,target_class,morpho); - real_classes_array[1] = extract_real_class_non_separate(y2code_class, target_class, 0); + real_classes_array[1] = extract_real_class_not_separated(in_files->code_class_person, target_class, 0); extract_classes_from_morpho(GENDER,target_class,morpho); - real_classes_array[2] = extract_real_class_non_separate(y3code_class, target_class, 0); + real_classes_array[2] = extract_real_class_not_separated(in_files->code_class_gender, target_class, 0); extract_classes_from_morpho(NUMBER,target_class,morpho); - real_classes_array[3] = extract_real_class_non_separate(y4code_class, target_class, 0); + real_classes_array[3] = extract_real_class_not_separated(in_files->code_class_number, target_class, 0); } -/*for separated classes*/ -void compare_predicted_and_real_class(FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho, int* global_error, int* class_predicted_array) +/**for separated classes**/ +void compare_predicted_and_real_class(Input_files* in_files,char* morpho, int* global_error, int* class_predicted_array) { char target_class[10]; int real_class[10]; @@ -434,7 +731,7 @@ void compare_predicted_and_real_class(FILE* ycode_class,FILE* y2code_class,FILE* int err_glo=1; extract_classes_from_morpho(TENSE,target_class,morpho); - size = extract_real_class_separate(ycode_class, target_class, real_class); + size = extract_real_class_separated(in_files->code_class_tense, target_class, real_class); for(i=0; i<=size; i++) if(class_predicted_array[0] == real_class[i]) err_glo=0; @@ -445,7 +742,7 @@ void compare_predicted_and_real_class(FILE* ycode_class,FILE* y2code_class,FILE* } extract_classes_from_morpho(PERSON,target_class,morpho); - size = extract_real_class_separate(y2code_class, target_class, real_class); + size = extract_real_class_separated(in_files->code_class_person, target_class, real_class); err_glo=1; for(i=0; i<=size; i++) if(class_predicted_array[1] == real_class[i]) @@ -457,7 +754,7 @@ void compare_predicted_and_real_class(FILE* ycode_class,FILE* y2code_class,FILE* } extract_classes_from_morpho(GENDER,target_class,morpho); - size = extract_real_class_separate(y3code_class, target_class, real_class); + size = extract_real_class_separated(in_files->code_class_gender, target_class, real_class); err_glo=1; for(i=0; i<=size; i++) if(class_predicted_array[2] == real_class[i]) @@ -469,7 +766,7 @@ void compare_predicted_and_real_class(FILE* ycode_class,FILE* y2code_class,FILE* } extract_classes_from_morpho(NUMBER,target_class,morpho); - size = extract_real_class_separate(y4code_class, target_class, real_class); + size = extract_real_class_separated(in_files->code_class_number, target_class, real_class); err_glo=1; for(i=0; i<=size; i++) if(class_predicted_array[3] == real_class[i]) @@ -477,4 +774,3 @@ void compare_predicted_and_real_class(FILE* ycode_class,FILE* y2code_class,FILE* if(err_glo) *global_error = *global_error+1; } - diff --git a/maca_morpho/src/vectorize.c b/maca_morpho/src/vectorize.c index 15e38eb2a3eb4dd62e752db41bd2bdf95f5de11e..a5af0180fa623e0fbe4bfa0f37d8cb982946ea9f 100644 --- a/maca_morpho/src/vectorize.c +++ b/maca_morpho/src/vectorize.c @@ -3,7 +3,7 @@ #include<string.h> #include"vectorize.h" -int get_feat_value(feat_model *fm, FP* fp, dico *dico_features, int feat_nb, int mode) +int get_feat_value(feat_model *fm, FPLM* fplm, dico *dico_features, int feat_nb, int mode) { feat_desc *fd = fm->array[feat_nb]; int i; @@ -14,7 +14,7 @@ int get_feat_value(feat_model *fm, FP* fp, dico *dico_features, int feat_nb, int fm->string[0] = '\0'; for(i=0; i < fd->nbelem; i++){ strcat(fm->string, fd->array[i]->name); - feat_val = fd->array[i]->fct(fp); + feat_val = fd->array[i]->fct(fplm); sprintf(str, "%d", feat_val); strcat(fm->string, str); @@ -28,11 +28,11 @@ int get_feat_value(feat_model *fm, FP* fp, dico *dico_features, int feat_nb, int } -feat_vec *form2fv(FP* fp, feat_vec *fv, feat_model *fm, dico *dico_features, int mode) +feat_vec *form2fv(FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features, int mode) { int i; feat_vec_empty(fv); for(i=0; i < fm->nbelem; i++) - feat_vec_add(fv, get_feat_value(fm, fp, dico_features, i, mode)); + feat_vec_add(fv, get_feat_value(fm, fplm, dico_features, i, mode)); return fv; } diff --git a/maca_morpho/src/vectorize.h b/maca_morpho/src/vectorize.h index 7779a966699f360cf1a5662945d838832f4acbfd..73caec80025a5986a52ab1736d436e4342bad4df 100644 --- a/maca_morpho/src/vectorize.h +++ b/maca_morpho/src/vectorize.h @@ -9,6 +9,6 @@ #define ADD_MODE 2 -feat_vec *form2fv(FP* fp, feat_vec *fv, feat_model *fm, dico *dico_features, int mode); +feat_vec *form2fv(FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features, int mode); #endif