From 6b4c301612f718f6333483e24795d6d649e186f0 Mon Sep 17 00:00:00 2001 From: Marjorie Armando <marjorie.armando.1@etu.univ-amu.fr> Date: Mon, 17 Apr 2017 23:17:30 +0200 Subject: [PATCH] generate train and test files, generate cff, predict test's forms' classes --- maca_morpho/src/fplm2cff_fct.c | 116 +++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 maca_morpho/src/fplm2cff_fct.c diff --git a/maca_morpho/src/fplm2cff_fct.c b/maca_morpho/src/fplm2cff_fct.c new file mode 100644 index 0000000..b2b6e2a --- /dev/null +++ b/maca_morpho/src/fplm2cff_fct.c @@ -0,0 +1,116 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "fplm2cff.h" + +void fplm2cff_help_message(context *ctx) +{ + context_general_help_message(ctx); + context_language_help_message(ctx); + context_fplm_help_message(ctx); + context_maca_data_path_help_message(ctx); + context_features_filename_help_message(ctx); + context_features_model_help_message(ctx); + context_class_help_message(ctx); + exit(1); +} + +void create_cff(context* ctx) +{ + FILE* fplm = NULL; + FILE* cff = NULL; + feat_vec *fv = NULL; + dico *dico_features = NULL; + feat_model *fm = NULL; + int fields_nb; + char form[100]; + char pos[50]; + char lemma[100]; + char morpho[50]; + char classes_array[100]; + CLASS class = choose_class(ctx->class_name); + + classes_array[0]='0'; + classes_array[1]='\0'; + fplm = fopen(ctx->fplm_filename,"r"); + if(fplm == NULL) + { + fprintf(stderr,"Could not open input file.\nThe fplm file is in the Files directory.\n"); + exit(EXIT_FAILURE); + } + fv = feat_vec_new(10); + dico_features = dico_new("dico_features", 1000); + fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); + cff = fopen("../../Files/cff.txt","w"); + if(cff==NULL) + { + fprintf(stderr,"Problem with the cff file.\n"); + exit(EXIT_FAILURE); + } + while((fields_nb = read_line_fplm(fplm, form, pos, lemma, morpho)) != -1) + { + if(fields_nb!=4) + { + if(1) + { + fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho); + fprintf(stderr, "incorrect fplm entry, skipping it\n"); + } + continue; + } + write_cff(cff, class, classes_array, form, morpho, fv, fm, dico_features); + } + if(ctx->features_filename) + dico_print(ctx->features_filename, dico_features); + + fclose(fplm); + fclose(cff); +} + +void write_cff(FILE *cff, CLASS class, char* classes_array, char* form, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features) +{ + int i=0; + char morpho_feature[10]; + extract_morpho_feature(class, morpho_feature, morpho); + + /*write the class' number in cff (ASCII code of the morpho_feature)*/ + if(morpho_feature[0] == '\0') + { + fprintf(cff,"0"); + form2fv(form, fv, fm, dico_features, ADD_MODE); + feat_vec_print(cff, fv); + return; + } + if(class == GENDER || class == NUMBER) + { + fprintf(cff, "%d", associate_number_to_classes(classes_array, morpho_feature[i])); + form2fv(form, fv, fm, dico_features, ADD_MODE); + feat_vec_print(cff, fv); + } + else if(class == PERSON) + { + for(i=0; i<(int)strlen(morpho_feature); i++) + { + fprintf(cff,"%c", morpho_feature[i]); + /*because a word can have several persons in morpho so we have to write the + first class and the features' values, then the second class and the features' values, etc */ + form2fv(form, fv, fm, dico_features, ADD_MODE); + feat_vec_print(cff, fv); + } + } + else + { + //for example if tense is 'PS' the class' number will be the ASCII code of 'P' concatenate with the ASCII code of 'S' + FILE* tmp = fopen("tmp.txt","w+"); + for(i=0; i<(int)strlen(morpho_feature); i++) + fprintf(tmp,"%d", morpho_feature[i]); + rewind(tmp); + fscanf(tmp, "%d", &morpho_feature[0]); + fprintf(cff,"%d", associate_number_to_classes(classes_array, morpho_feature[0])); + form2fv(form, fv, fm, dico_features, ADD_MODE); + feat_vec_print(cff, fv); + fclose(tmp); + remove("tmp.txt"); + } + +} -- GitLab