Skip to content
Snippets Groups Projects
Commit eeca92b7 authored by Marjorie Armando's avatar Marjorie Armando
Browse files

predict cascade -- updated code

parent eb06d12b
No related branches found
No related tags found
No related merge requests found
Showing
with 902 additions and 514 deletions
feat_person feat_pos s1
feat_person feat_pos s2
feat_person feat_pos s1 s2
feat_pos s1
feat_pos s2
feat_pos s1 s2
feat_person feat_pos s1
feat_person feat_pos s2
feat_person feat_pos s3
feat_person feat_pos s1 s2
feat_person feat_pos s1 s2 s3
feat_pos s1
feat_pos s2
feat_pos s3
feat_pos s1 s2
feat_pos s1 s2 s3
s1
s2
s3
s4
s1 s2
s1 s2 s3
s1 s2 s3 s4
feat_pos s1
feat_pos s2
feat_pos s3
feat_pos s4
feat_pos s1 s2
feat_pos s1 s2 s3
feat_pos s1 s2 s3 s4
feat_person feat_pos s1
feat_person feat_pos s2
feat_person feat_pos s3
feat_person feat_pos s4
feat_person feat_pos s5
feat_person feat_pos s1 s2
feat_person feat_pos s1 s2 s3
feat_person feat_pos s1 s2 s3 s4
feat_person feat_pos s1 s2 s3 s4 s5
feat_pos s1
feat_pos s2
feat_pos s3
feat_pos s4
feat_pos s5
feat_pos s1 s2
feat_pos s1 s2 s3
feat_pos s1 s2 s3 s4
feat_pos s1 s2 s3 s4 s5
s1
s2
s3
s4
s5
s6
s1 s2
s1 s2 s3
s1 s2 s3 s4
s1 s2 s3 s4 s5
s1 s2 s3 s4 s5 s6
feat_pos s1
feat_pos s2
feat_pos s3
feat_pos s4
feat_pos s5
feat_pos s6
feat_pos s1 s2
feat_pos s1 s2 s3
feat_pos s1 s2 s3 s4
feat_pos s1 s2 s3 s4 s5
feat_pos s1 s2 s3 s4 s5 s6
s1
s2
s3
s4
s5
s6
s7
s1 s2
s1 s2 s3
s1 s2 s3 s4
s1 s2 s3 s4 s5
s1 s2 s3 s4 s5 s6
s1 s2 s3 s4 s5 s6 s7
feat_pos s1
feat_pos s2
feat_pos s3
feat_pos s4
feat_pos s5
feat_pos s6
feat_pos s7
feat_pos s1 s2
feat_pos s1 s2 s3
feat_pos s1 s2 s3 s4
feat_pos s1 s2 s3 s4 s5
feat_pos s1 s2 s3 s4 s5 s6
feat_pos s1 s2 s3 s4 s5 s6 s7
#ifndef __FEAT_DESC__
#define __FEAT_DESC__
typedef struct
{
char* form;
char* pos;
char* morpho;
}FP;
typedef int (*feat_fct) (FP *c);
#include "../../maca_morpho/src/fplm.h"
typedef int (*feat_fct) (FPLM *c);
typedef struct {
char *name;
......
#ifndef __FPLM__
#define __FPLM__
#include <stdio.h>
#include <stdlib.h>
/*everything related to the fplm file*/
typedef struct
{
char* form;
char* pos;
char* morpho;
char* lemma;
}FPLM;
typedef enum
{
TENSE, PERSON, GENDER, NUMBER
}TARGET;
int read_line_fplm(FILE* fplm, char* form, char* pos, char* lemma, char* morpho);
int extract_class_position(TARGET target);
FPLM* new_fplm (void);
void free_fplm (FPLM* fplm);
void extract_classes_from_morpho (TARGET target, char* target_class, char* morpho);
int choose_target (char* target);
int associate_number_to_classes_separate(FILE* code_class, char* target_class, int current_target_class);
int extract_class_position (TARGET target);
int read_line_fplm (FILE* fplm_file, FPLM* fplm);
int associate_number_to_classes_separated (FILE* code_class, char* target_class, int current_target_class);
int associate_number_to_classes (FILE* code_class, char* target_class, int all_classes);
void extract_classes_from_morpho(TARGET target, char* target_class, char* morpho);
#endif
......@@ -17,8 +17,8 @@ int main(int argc, char *argv[])
create_cff_all_classes(ctx);
else
create_cff(ctx);
printf("cff.txt has been generated in the Files directory.\n");
printf("The code class file has been generated in the Files directory.\n");
printf("cff.txt has been generated.\n");
printf("The code class file has been generated.\n");
context_free(ctx);
return 0;
}
......
......@@ -11,12 +11,14 @@
void fplm2cff_help_message(context *ctx);
/**Fonctions to create cff when morpho is a class**/
void create_cff_all_classes (context* ctx);
void write_cff_all_classes_non_separate(FILE *cff, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features);
void write_cff_all_classes_separate(FILE *cff, FILE* code_class_big, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features);
void write_cff_all_classes_not_separated (FILE *cff, FILE* code_class, FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features);
void write_cff_all_classes_separated (FILE *cff, FILE* code_class, FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features);
/**Fonctions to create cff with one target (tense, person, gender or number)**/
void create_cff (context* ctx);
void write_cff_non_separate(FILE *cff, TARGET target, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features);
void write_cff_separate(FILE *cff, TARGET target, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features);
void write_cff_not_separated (FILE *cff, FILE* code_class, FPLM* fplm, TARGET target, feat_vec *fv, feat_model *fm, dico *dico_features);
void write_cff_separated (FILE *cff, FILE* code_class, FPLM* fplm, TARGET target, feat_vec *fv, feat_model *fm, dico *dico_features);
#endif
......@@ -16,167 +16,146 @@ void fplm2cff_help_message(context *ctx)
exit(1);
}
/*Predict all morpho features -tense, person, gender, number- in one*/
/**Fonctions to create cff when morpho is a class**/
void create_cff_all_classes(context* ctx)
{
FILE* fplm = NULL;
FILE* cff = NULL;
FILE* code_class = NULL;
feat_vec *fv = NULL;
dico *dico_features = NULL;
feat_model *fm = NULL;
FPLM* fplm = new_fplm();
FILE* fplm_file = fopen(ctx->fplm_filename,"r");
FILE* cff = fopen("cff.txt","w");
FILE* code_class = fopen("code_class","w+");
feat_vec *fv = feat_vec_new(10);
dico *dico_features = dico_new("dico_features", 1000);
feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
int fields_nb;
char form[100];
char pos[50];
char lemma[100];
char morpho[50];
FP* fp = malloc(sizeof(FP));
fp->form = malloc(sizeof(char)*100);
fp->pos = malloc(sizeof(char)*50);
fp->morpho = malloc(sizeof(char)*10);
fplm = fopen(ctx->fplm_filename,"r");
if(fplm == NULL)
if(fplm_file == NULL)
{
fprintf(stderr,"Could not open the fplm file.\n");
exit(EXIT_FAILURE);
}
fv = feat_vec_new(10);
dico_features = dico_new("dico_features", 1000);
fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
cff = fopen("../../Files/cff.txt","w");
if(cff==NULL)
{
fprintf(stderr,"Problem with the cff file.\n");
exit(EXIT_FAILURE);
}
code_class = fopen("../../Files/code_class","w+");
if(code_class==NULL)
{
fprintf(stderr,"Problem with the classes_code file.\n");
fprintf(stderr,"Problem with the code_class file.\n");
exit(EXIT_FAILURE);
}
while((fields_nb = read_line_fplm(fplm, form, pos, lemma, morpho)) != -1)
while((fields_nb = read_line_fplm(fplm_file, fplm)) != -1)
{
if(fields_nb!=4)
{
if(1)
if(ctx->debug_mode)
{
fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho);
fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", fplm->form, fplm->pos, fplm->lemma, fplm->morpho);
fprintf(stderr, "incorrect fplm entry, skipping it\n");
}
continue;
}
strcpy(fp->form,form);
strcpy(fp->pos,pos);
strcpy(fp->morpho,morpho);
if(ctx->separate_classes)
write_cff_all_classes_separate(cff, code_class, fp, morpho, fv, fm, dico_features);
write_cff_all_classes_separated(cff, code_class, fplm, fv, fm, dico_features);
else
write_cff_all_classes_non_separate(cff, code_class, fp, morpho, fv, fm, dico_features);
write_cff_all_classes_not_separated(cff, code_class, fplm, fv, fm, dico_features);
}
if(ctx->features_filename)
dico_print(ctx->features_filename, dico_features);
free(fp->form);
free(fp->pos);
free(fp);
fclose(fplm);
free_fplm(fplm);
fclose(fplm_file);
fclose(cff);
fclose(code_class);
}
void write_cff_all_classes_non_separate(FILE *cff, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features)
void write_cff_all_classes_not_separated(FILE *cff, FILE* code_class, FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features)
{
fprintf(cff,"%d", associate_number_to_classes(code_class,morpho,1));
form2fv(fp, fv, fm, dico_features, ADD_MODE);
fprintf(cff,"%d", associate_number_to_classes(code_class,fplm->morpho,1));
form2fv(fplm, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
}
void write_cff_all_classes_separate(FILE *cff, FILE* code_class_big, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features)
void write_cff_all_classes_separated(FILE *cff, FILE* code_class, FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features)
{
char tense_class[10];
char person_class[10];
char gender_class[10];
char number_class[10];
char all_target[20];
int i,j;
extract_classes_from_morpho(TENSE, tense_class, morpho);
extract_classes_from_morpho(PERSON, person_class, morpho);
extract_classes_from_morpho(GENDER, gender_class, morpho);
extract_classes_from_morpho(NUMBER, number_class, morpho);
if(tense_class[0] == '\0')
int i,j,size=-1;
extract_classes_from_morpho(TENSE, tense_class, fplm->morpho);
extract_classes_from_morpho(PERSON, person_class, fplm->morpho);
extract_classes_from_morpho(GENDER, gender_class, fplm->morpho);
extract_classes_from_morpho(NUMBER, number_class, fplm->morpho);
for(i=0; i<(int)strlen(tense_class); i++)
{
tense_class[0]='#';
tense_class[1]='\0';
}
if(person_class[0] == '\0')
for(j=0; j<(int)strlen(person_class); j++)
{
person_class[0]='#';
person_class[1]='\0';
}
if(gender_class[0] == '\0')
size=-1;
if(tense_class[i]!='#')
{
gender_class[0]='#';
gender_class[1]='\0';
size++;
all_target[size] = tense_class[i];
}
if(number_class[0] == '\0')
size++;
all_target[size]='#';
if(person_class[j]!='#')
{
number_class[0]='#';
number_class[1]='\0';
size++;
all_target[size] = person_class[j];
}
for(i=0; i<(int)strlen(tense_class); i++)
size++;
all_target[size]='#';
if(gender_class[0]!='#')
{
for(j=0; j<(int)strlen(person_class); j++)
size++;
all_target[size] = gender_class[0];
}
size++;
all_target[size]='#';
if(number_class[0]!='#')
{
all_target[0] = tense_class[i];
all_target[1] = person_class[j];
all_target[2] = gender_class[0];
all_target[3] = number_class[0];
all_target[4] = '\0';
size++;
all_target[size] = number_class[0];
}
size++;
all_target[size]='#';
size++;
all_target[size] = '\0';
fprintf(cff,"%d", associate_number_to_classes(code_class_big, all_target, 1));
form2fv(fp, fv, fm, dico_features, ADD_MODE);
fprintf(cff,"%d", associate_number_to_classes(code_class, all_target, 1));
form2fv(fplm, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
}
}
}
/*Predict one morpho feature */
/**Fonctions to create cff with one target (tense, person, gender or number)**/
void create_cff(context* ctx)
{
FILE* fplm = NULL;
FILE* cff = NULL;
FILE* fplm_file = fopen(ctx->fplm_filename,"r");
FILE* cff = fopen("cff.txt","w");
FILE* code_class = NULL;
feat_vec *fv = NULL;
dico *dico_features = NULL;
feat_model *fm = NULL;
feat_vec *fv = feat_vec_new(10);
dico *dico_features = dico_new("dico_features", 1000);
feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
int fields_nb;
char form[100];
char pos[50];
char lemma[100];
char morpho[50];
char code_class_name[30];
FP* fp = malloc(sizeof(FP));
fp->form = malloc(sizeof(char)*100);
fp->pos = malloc(sizeof(char)*50);
fp->morpho = malloc(sizeof(char)*10);
FPLM* fplm = new_fplm();
TARGET target = choose_target(ctx->target_name);
fplm = fopen(ctx->fplm_filename,"r");
if(fplm == NULL)
if(fplm_file == NULL)
{
fprintf(stderr,"Could not open input file.\nThe fplm file is in the Files directory.\n");
exit(EXIT_FAILURE);
}
fv = feat_vec_new(10);
dico_features = dico_new("dico_features", 1000);
fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
cff = fopen("../../Files/cff.txt","w");
if(cff==NULL)
{
fprintf(stderr,"Problem with the cff file.\n");
exit(EXIT_FAILURE);
}
strcpy(code_class_name,"../../Files/code_class_");
strcpy(code_class_name,"code_class_");
strcat(code_class_name,ctx->target_name);
code_class = fopen(code_class_name,"w+");
if(code_class==NULL)
......@@ -185,68 +164,58 @@ void create_cff(context* ctx)
exit(EXIT_FAILURE);
}
fprintf(code_class,"%s\n",ctx->target_name);
while((fields_nb = read_line_fplm(fplm, form, pos, lemma, morpho)) != -1)
fprintf(code_class,"0 #\n");
while((fields_nb = read_line_fplm(fplm_file, fplm)) != -1)
{
if(fields_nb!=4)
{
if(1)
if(ctx->debug_mode)
{
fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho);
fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", fplm->form, fplm->pos, fplm->lemma, fplm->morpho);
fprintf(stderr, "incorrect fplm entry, skipping it\n");
}
continue;
}
strcpy(fp->form,form);
strcpy(fp->pos,pos);
strcpy(fp->morpho,morpho);
if(ctx->separate_classes)
write_cff_separate(cff, target, code_class, fp, morpho, fv, fm, dico_features);
write_cff_separated(cff, code_class, fplm, target, fv, fm, dico_features);
else
write_cff_non_separate(cff, target, code_class, fp, morpho, fv, fm, dico_features);
write_cff_not_separated(cff, code_class, fplm, target, fv, fm, dico_features);
}
if(ctx->features_filename)
dico_print(ctx->features_filename, dico_features);
free(fp->form);
free(fp->pos);
free(fp);
fclose(fplm);
free_fplm(fplm);
fclose(fplm_file);
fclose(cff);
fclose(code_class);
}
void write_cff_non_separate(FILE *cff, TARGET target, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features)
void write_cff_not_separated(FILE *cff, FILE* code_class, FPLM* fplm, TARGET target, feat_vec *fv, feat_model *fm, dico *dico_features)
{
char target_class[10];
extract_classes_from_morpho(target, target_class, morpho);
extract_classes_from_morpho(target, target_class, fplm->morpho);
/*write the class' code in cff*/
if(target_class[0] == '\0')
fprintf(cff,"0");
else
fprintf(cff,"%d", associate_number_to_classes(code_class,target_class,0));
form2fv(fp, fv, fm, dico_features, ADD_MODE);
/*write the feature vector in cff*/
form2fv(fplm, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
}
void write_cff_separate(FILE *cff, TARGET target, FILE* code_class, FP* fp, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features)
void write_cff_separated(FILE *cff, FILE* code_class, FPLM* fplm, TARGET target, feat_vec *fv, feat_model *fm, dico *dico_features)
{
int i=0;
char target_class[10];
extract_classes_from_morpho(target, target_class, morpho);
extract_classes_from_morpho(target, target_class, fplm->morpho);
/*write the class' code in cff*/
if(target_class[0] == '\0')
{
fprintf(cff,"0");
form2fv(fp, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
return;
}
for(i=0; i<(int)strlen(target_class); i++)
{
fprintf(cff,"%d", associate_number_to_classes_separate(code_class,target_class,i));
form2fv(fp, fv, fm, dico_features, ADD_MODE);
fprintf(cff,"%d", associate_number_to_classes_separated(code_class,target_class,i));
/*write the feature vector in cff*/
form2fv(fplm, fv, fm, dico_features, ADD_MODE);
feat_vec_print(cff, fv);
}
}
......@@ -15,7 +15,7 @@ int main(int argc, char** argv)
if(ctx->help)
fplm2traintest_help_message(ctx);
generate_train_test(ctx);
printf("fplm_train.txt and fplm_test.txt have been generated in the Files directory.\n");
printf("fplm_train.txt and fplm_test.txt have been generated.\n");
return 0;
}
......
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fplm.h"
FPLM* new_fplm(void)
{
FPLM* fplm = malloc(sizeof(FPLM));
fplm->form = malloc(sizeof(char)*100);
fplm->pos = malloc(sizeof(char)*20);
fplm->lemma = malloc(sizeof(char)*100);
fplm->morpho = malloc(sizeof(char)*20);
return fplm;
}
void free_fplm(FPLM* fplm)
{
free(fplm->form);
free(fplm->pos);
free(fplm->lemma);
free(fplm->morpho);
free(fplm);
}
/** Read a line from the fplm file and extract the form/pos/lemma/morpho.
* Return -1 if there's no more line to read, else the number of string read**/
int read_line_fplm(FILE* fplm, char* form, char* pos, char* lemma, char* morpho)
int read_line_fplm(FILE* fplm_file, FPLM* fplm)
{
int fields_nb;
char buffer[10000];
if(fgets(buffer, 10000, fplm)==NULL)
if(fgets(buffer, 10000, fplm_file)==NULL)
return -1;
fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho);
fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", fplm->form, fplm->pos, fplm->lemma, fplm->morpho);
return fields_nb;
}
......@@ -48,7 +65,7 @@ int choose_target(char* target)
return NUMBER;
else
{
fprintf(stderr,"-t argument must be \"tense\", \"person\", \"gender\" or \"number\"\n");
fprintf(stderr,"-t argument must be \"tense\", \"person\", \"gender\", \"number\" or \"all\".\n");
exit(EXIT_FAILURE);
}
return -1;
......@@ -86,11 +103,16 @@ void extract_classes_from_morpho(TARGET target, char* target_class, char* morpho
}
target_class[j] = '\0';
}
if(target_class[0] == '\0')
{
target_class[0] = '#';
target_class[1] = '\0';
}
}
/** Write the code_class file (use in predict to know the real class)
* Return the class' code**/
int associate_number_to_classes_separate(FILE* code_class, char* target_class, int current_target_class)
int associate_number_to_classes_separated(FILE* code_class, char* target_class, int current_target_class)
{
int code = 0;
char tmp[20];
......
......@@ -29,6 +29,7 @@ context *context_new(void)
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->separate_classes = 0;
ctx->cascade = 0;
ctx->program_name = NULL;
ctx->fplm_filename = NULL;
ctx->language = strdup("fr");
......@@ -93,6 +94,10 @@ void context_separate_classes_help_message(context *ctx){
fprintf(stderr, "\t-s --separate <int> : separate the classes for tense and person if you enter 1\n");
}
void context_cascade_help_message(context *ctx){
fprintf(stderr, "\t-c --cascade <int> : activate the cascade mode if you enter 1 (output fplm file)\n");
}
context *context_read_options(int argc, char *argv[])
{
int c;
......@@ -101,7 +106,7 @@ context *context_read_options(int argc, char *argv[])
ctx->program_name = strdup(argv[0]);
static struct option long_options[14] =
static struct option long_options[15] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
......@@ -116,12 +121,13 @@ context *context_read_options(int argc, char *argv[])
{"target", required_argument, 0, 't'},
{"percent", required_argument, 0, 'p'},
{"code_class", required_argument, 0, 'y'},
{"separate", required_argument, 0, 's'}
{"separate", required_argument, 0, 's'},
{"cascade", required_argument, 0, 'c'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:y:x:w:t:p:s:", long_options, &option_index)) != -1){
while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:y:x:w:t:p:s:c:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
......@@ -162,6 +168,9 @@ context *context_read_options(int argc, char *argv[])
break;
case 's':
ctx->separate_classes = atoi(optarg);
break;
case 'c':
ctx->cascade = atoi(optarg);
break;
}
}
......
......@@ -14,6 +14,7 @@ typedef struct {
int debug_mode;
int fplm_test_percent;
int separate_classes;
int cascade;
char *program_name;
char *fplm_filename;
char *language;
......@@ -42,4 +43,5 @@ void context_target_help_message(context *ctx);
void context_fplm_test_percent_help_message(context *ctx);
void context_code_class_help_message(context* ctx);
void context_separate_classes_help_message(context *ctx);
void context_cascade_help_message(context *ctx);
#endif
......@@ -5,118 +5,164 @@
#include "char16.h"
#include "fplm.h"
char* all_pos[23] = {"np","adj","nc","adv","prep","poncts","csu","v","vprespart","vppart",
"vinf","pres","ponctw","clr","det","coo","cln","pro","pri","prorel","clo","advneg","titre"};
char* all_person_non_separated[5] = {"1","2","3","12","13"};
int code_pos(char* pos)
{
int i;
for(i=0; i<23; i++)
if(!strcmp(pos, all_pos[i]))
return i;
return -1;
}
int code_person(char* class)
{
int i;
for(i=0; i<5; i++)
if(!strcmp(class, all_person_non_separated[i]))
return i;
return -1;
}
/*patterns feature*/
int s1(FP* fp)
int s1(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 1 < 0)
return -1;
return tmp[size - 1];
}
int s2(FP* fp)
int s2(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 2 < 0)
return -1;
return tmp[size - 2];
}
int s3(FP* fp)
int s3(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 3 < 0)
return -1;
return tmp[size - 3];
}
int s4(FP* fp)
int s4(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 4 < 0)
return -1;
return tmp[size - 4];
}
int s5(FP* fp)
int s5(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 5 < 0)
return -1;
return tmp[size - 5];
}
int s6(FP* fp)
int s6(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 6 < 0)
return -1;
return tmp[size - 6];
}
int s7(FP* fp)
int s7(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 7 < 0)
return -1;
return tmp[size - 7];
}
int s8(FP* fp)
int s8(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 8 < 0)
return -1;
return tmp[size - 8];
}
int s9(FP* fp)
int s9(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 9 < 0)
return -1;
return tmp[size - 9];
}
int s10(FP* fp)
int s10(FPLM* fplm)
{
char16* tmp = utf8tochar16(fp->form);
char16* tmp = utf8tochar16(fplm->form);
int size = char16_strlen(tmp);
if(tmp == NULL || size - 10 < 0)
return -1;
return tmp[size - 10];
}
int feat_person(FP* fp)
int feat_person(FPLM* fplm)
{
char class[10];
extract_classes_from_morpho(PERSON, class, fp->morpho);
return code_person(class);
extract_classes_from_morpho(PERSON, class, fplm->morpho);
if((int)strlen(class)==1)
return class[0];
/*To concatenate ASCII code of 2 or more numbers*/
FILE* tmp = fopen("tmp.txt","w+");
if(tmp==NULL)
{
fprintf(stderr,"error tmp file -- maca_morpho_feat_fct.c\n");
return -1;
}
int code;
for(int i=0; i<(int)strlen(class); i++)
fprintf(tmp,"%d",class[i]);
rewind(tmp);
fscanf(tmp,"%d",&code);
fclose(tmp);
remove("tmp.txt");
return code;
}
int feat_pos(FP* fp)
int feat_tense(FPLM* fplm)
{
return code_pos(fp->pos);
char class[10];
extract_classes_from_morpho(TENSE, class, fplm->morpho);
if((int)strlen(class)==1)
return class[0];
/*To concatenate ASCII code of 2 or more letters*/
FILE* tmp = fopen("tmp.txt","w+");
if(tmp==NULL)
{
fprintf(stderr,"error tmp file -- maca_morpho_feat_fct.c\n");
return -1;
}
int code;
for(int i=0; i<(int)strlen(class); i++)
fprintf(tmp,"%d",class[i]);
rewind(tmp);
fscanf(tmp,"%d",&code);
fclose(tmp);
remove("tmp.txt");
return code;
}
int feat_gender(FPLM* fplm)
{
char class[10];
extract_classes_from_morpho(GENDER, class, fplm->morpho);
return class[0]; //ASCII code of 'f' or 'm' or '#'
}
int feat_number(FPLM* fplm)
{
char class[10];
extract_classes_from_morpho(NUMBER, class, fplm->morpho);
return class[0]; //ASCII code of 's' or 'p' or '#'
}
int feat_pos(FPLM* fplm)
{
FILE* tmp = fopen("tmp.txt","w+");
if(tmp==NULL)
{
fprintf(stderr,"error tmp file -- code_pos\n");
return -1;
}
int code;
for(int i=0; i<(int)strlen(fplm->pos); i++)
fprintf(tmp,"%d",fplm->pos[i]);
rewind(tmp);
fscanf(tmp,"%d",&code);
fclose(tmp);
remove("tmp.txt");
return code;
}
feat_lib *feat_lib_build(void) {
......@@ -135,6 +181,8 @@ feat_lib *feat_lib_build(void) {
feat_lib_add(fl, 1, (char *)"s10", s10);
feat_lib_add(fl, 1, (char *)"feat_pos", feat_pos);
feat_lib_add(fl, 1, (char *)"feat_person", feat_person);
feat_lib_add(fl, 1, (char *)"feat_number", feat_number);
feat_lib_add(fl, 1, (char *)"feat_gender", feat_gender);
feat_lib_add(fl, 1, (char *)"feat_tense", feat_tense);
return fl;
}
......@@ -10,33 +10,66 @@
#include "feature_table.h"
#include "fplm.h"
typedef struct
{
FILE* f_error;
FILE* f_predict;
FILE* new_fplm;
FILE* morpho_predicted;
}Output_files;
typedef struct
{
FILE* f_fplm;
FILE* code_class;
FILE* all_real_morphos;
FILE* code_class_tense;
FILE* code_class_person;
FILE* code_class_gender;
FILE* code_class_number;
FILE* predict_tense;
FILE* predict_person;
FILE* predict_gender;
FILE* predict_number;
}Input_files;
void predict_help_message(context *ctx);
void new_input_files (Input_files* in_files);
void init_input_files (Input_files* in_files, char* fplm_name, char* code, char* all, char* c1, char* c2, char* c3, char* c4, char* p1, char* p2, char* p3, char* p4);
void new_output_files (Output_files* out_files);
void init_output_files (Output_files* out_files, char* error_name, char* predict_name, char* fplm_name, char* morpho_name);
void free_input_files (Input_files* in_files);
void free_output_files (Output_files* out_files);
/*Predict all classes in one*/
void predict_all_classes (context* ctx);
void make_prediction_all_classes (context* ctx, FILE* error_file, FILE* code_class, int* errors, FPLM* fplm, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm);
/*classes are not separated*/
void make_prediction_all_classes_non_separate(FILE* error_file, FILE* code_class, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm);
void errors_nb_all_classes_non_separate(FILE* error_file, FILE* code_class, FP* fp, int class_predicted, int* errors, char* morpho);
int extract_real_class_non_separate(FILE* code_class, char* target_class, int all_classes);
void errors_nb_all_classes_not_separated (FILE* error_file, FILE* code_class, FPLM* fplm, int class_predicted, int* errors);
int extract_real_class_not_separated (FILE* code_class, char* target_class, int all_classes);
/*classes are separated*/
void make_prediction_all_classes_separate(FILE* predictions, FILE* code_class_big, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm);
void errors_nb_all_classes_separate(FILE* error_file, FILE* code_class, FP* fp, int class_predicted, int* errors, char* morpho);
int extract_real_class_all_classes_separate(FILE* code_class, int* real_class, char* morpho);
void errors_nb_all_classes_separated (FILE* error_file, FILE* code_class, FPLM* fplm, int class_predicted, int* errors);
int extract_real_class_all_classes_separated (FILE* code_class, int* real_class, char* morpho);
/*Predict a target*/
void predict_target (context* ctx);
void make_prediction (context* ctx, Output_files* out_files, FILE* code_class, TARGET target, int* errors, FPLM* fplm, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm);
/*classes are separated*/
void make_prediction_separate(FILE* error_file, FILE* predict_file, FILE* code_class, TARGET target, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm);
void errors_nb_separate(FILE* error_file, FILE* code_class, TARGET target, int class_predicted, int* errors, char* morpho, FP* fp);
int extract_real_class_separate(FILE* code_class, char* target_class, int* real_class);
void errors_nb_separated (FILE* error_file, FILE* code_class, TARGET target, int class_predicted, int* errors, FPLM* fplm);
int extract_real_class_separated (FILE* code_class, char* target_class, int* real_class);
/*classes are not separated*/
void make_prediction_non_separate(FILE* error_file, FILE* y, FILE* code_class, TARGET target, int* errors, FP* fp, char* morpho, feature_table *cfw, feat_vec *fv, dico *dico_features, feat_model *fm);
void errors_nb_non_separate(FILE* error_file, FILE* code_class, TARGET target, FP* fp, int class_predicted, int* errors, char* morpho);
void errors_nb_not_separated (FILE* error_file, FILE* code_class, TARGET target, FPLM* fplm, int class_predicted, int* errors);
/*Cascade*/
void write_new_fplm (FPLM* fplm, TARGET target, FILE* new_fplm, FILE* code_class, int class_predicted);
/*Predict each targets then predict all classes in one*/
void predict_each_and_all_targets (context* ctx);
void calculate_global_success_rate(context* ctx, FILE* y, FILE* y2, FILE* y3, FILE* y4,int* global_error, int* class_predicted_array,FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho);
void put_in_array_real_classes(int* real_classes_array, FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho);
void compare_predicted_and_real_class(FILE* ycode_class,FILE* y2code_class,FILE* y3code_class,FILE* y4code_class,char* morpho, int* global_error, int* class_predicted_array);
void calculate_global_success_rate (context* ctx, Input_files* in_files, FILE* morpho_predicted, int* global_error, int* class_predicted_array,char* morpho);
void create_morpho_predicted_file (FILE* morpho_predicted,int* class_predicted_array,Input_files* in_files);
void compare_morpho_predicted_and_real_morphos (FILE* all_real_morphos, FILE* morpho_predicted, int line_nb);
void put_in_array_real_classes (int* real_classes_array, Input_files* in_files, char* morpho);
void compare_predicted_and_real_class (Input_files* in_files,char* morpho, int* global_error, int* class_predicted_array);
#endif
This diff is collapsed.
......@@ -3,7 +3,7 @@
#include<string.h>
#include"vectorize.h"
int get_feat_value(feat_model *fm, FP* fp, dico *dico_features, int feat_nb, int mode)
int get_feat_value(feat_model *fm, FPLM* fplm, dico *dico_features, int feat_nb, int mode)
{
feat_desc *fd = fm->array[feat_nb];
int i;
......@@ -14,7 +14,7 @@ int get_feat_value(feat_model *fm, FP* fp, dico *dico_features, int feat_nb, int
fm->string[0] = '\0';
for(i=0; i < fd->nbelem; i++){
strcat(fm->string, fd->array[i]->name);
feat_val = fd->array[i]->fct(fp);
feat_val = fd->array[i]->fct(fplm);
sprintf(str, "%d", feat_val);
strcat(fm->string, str);
......@@ -28,11 +28,11 @@ int get_feat_value(feat_model *fm, FP* fp, dico *dico_features, int feat_nb, int
}
feat_vec *form2fv(FP* fp, feat_vec *fv, feat_model *fm, dico *dico_features, int mode)
feat_vec *form2fv(FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features, int mode)
{
int i;
feat_vec_empty(fv);
for(i=0; i < fm->nbelem; i++)
feat_vec_add(fv, get_feat_value(fm, fp, dico_features, i, mode));
feat_vec_add(fv, get_feat_value(fm, fplm, dico_features, i, mode));
return fv;
}
......@@ -9,6 +9,6 @@
#define ADD_MODE 2
feat_vec *form2fv(FP* fp, feat_vec *fv, feat_model *fm, dico *dico_features, int mode);
feat_vec *form2fv(FPLM* fplm, feat_vec *fv, feat_model *fm, dico *dico_features, int mode);
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment