Skip to content
Snippets Groups Projects
Commit 6b4c3016 authored by Marjorie Armando's avatar Marjorie Armando
Browse files

generate train and test files, generate cff, predict test's forms' classes

parent f0ea2309
No related branches found
No related tags found
No related merge requests found
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fplm2cff.h"
void fplm2cff_help_message(context *ctx)
{
context_general_help_message(ctx);
context_language_help_message(ctx);
context_fplm_help_message(ctx);
context_maca_data_path_help_message(ctx);
context_features_filename_help_message(ctx);
context_features_model_help_message(ctx);
context_class_help_message(ctx);
exit(1);
}
void create_cff(context* ctx)
{
FILE* fplm = NULL;
FILE* cff = NULL;
feat_vec *fv = NULL;
dico *dico_features = NULL;
feat_model *fm = NULL;
int fields_nb;
char form[100];
char pos[50];
char lemma[100];
char morpho[50];
char classes_array[100];
CLASS class = choose_class(ctx->class_name);
classes_array[0]='0';
classes_array[1]='\0';
fplm = fopen(ctx->fplm_filename,"r");
if(fplm == NULL)
{
fprintf(stderr,"Could not open input file.\nThe fplm file is in the Files directory.\n");
exit(EXIT_FAILURE);
}
fv = feat_vec_new(10);
dico_features = dico_new("dico_features", 1000);
fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
cff = fopen("../../Files/cff.txt","w");
if(cff==NULL)
{
fprintf(stderr,"Problem with the cff file.\n");
exit(EXIT_FAILURE);
}
while((fields_nb = read_line_fplm(fplm, form, pos, lemma, morpho)) != -1)
{
if(fields_nb!=4)
{
if(1)
{
fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho);
fprintf(stderr, "incorrect fplm entry, skipping it\n");
}
continue;
}
write_cff(cff, class, classes_array, form, morpho, fv, fm, dico_features);
}
if(ctx->features_filename)
dico_print(ctx->features_filename, dico_features);
fclose(fplm);
fclose(cff);
}
void write_cff(FILE *cff, CLASS class, char* classes_array, char* form, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features)
{
int i=0;
char morpho_feature[10];
extract_morpho_feature(class, morpho_feature, morpho);
/*write the class' number in cff (ASCII code of the morpho_feature)*/
if(morpho_feature[0] == '\0')
{
fprintf(cff,"0");
form2fv(form, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
return;
}
if(class == GENDER || class == NUMBER)
{
fprintf(cff, "%d", associate_number_to_classes(classes_array, morpho_feature[i]));
form2fv(form, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
}
else if(class == PERSON)
{
for(i=0; i<(int)strlen(morpho_feature); i++)
{
fprintf(cff,"%c", morpho_feature[i]);
/*because a word can have several persons in morpho so we have to write the
first class and the features' values, then the second class and the features' values, etc */
form2fv(form, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
}
}
else
{
//for example if tense is 'PS' the class' number will be the ASCII code of 'P' concatenate with the ASCII code of 'S'
FILE* tmp = fopen("tmp.txt","w+");
for(i=0; i<(int)strlen(morpho_feature); i++)
fprintf(tmp,"%d", morpho_feature[i]);
rewind(tmp);
fscanf(tmp, "%d", &morpho_feature[0]);
fprintf(cff,"%d", associate_number_to_classes(classes_array, morpho_feature[0]));
form2fv(form, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
fclose(tmp);
remove("tmp.txt");
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment