Select Git revision
fplm2train_test.c
-
Ibtissem Benchikh LeHocine authoredIbtissem Benchikh LeHocine authored
fplm2train_test.c 2.13 KiB
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "maca_morpho_context.h"
#define MAX 100
void fplm2traintest_help_message(context *ctx);
int count_line_number_fplm(FILE* fplm);
void generate_train_test(context* ctx);
int main(int argc, char** argv)
{
context *ctx = context_read_options(argc, argv);
if(ctx->help)
fplm2traintest_help_message(ctx);
generate_train_test(ctx);
printf("fplm_train.txt and fplm_test.txt have been generated.\n");
return 0;
}
void fplm2traintest_help_message(context *ctx)
{
context_general_help_message(ctx);
context_language_help_message(ctx);
context_fplm_help_message(ctx);
context_maca_data_path_help_message(ctx);
context_fplm_test_percent_help_message(ctx);
exit(1);
}
int count_line_number_fplm(FILE* fplm)
{
int line_nb=0;
char buffer[10000];
while(fgets(buffer, 10000, fplm))
{
line_nb++;
}
rewind(fplm);
return line_nb;
}
void generate_train_test(context* ctx)
{
FILE* train = NULL;
FILE* test = NULL;
FILE* fplm = NULL;
int percent = ctx->fplm_test_percent;
int test_size;
int choice;
int line_nb = 0;
int line_number_test = 0;
int line_number_train = 0;
int write_test = 1;
int write_train = 1;
char buffer[10000];
fplm = fopen(ctx->fplm_filename, "r");
line_nb = count_line_number_fplm(fplm);
train = fopen("fplm_train.txt", "w");
if(train==NULL)
{
fprintf(stderr,"Problem with the train file.\n");
exit(EXIT_FAILURE);
}
test = fopen("fplm_test.txt","w");
if(test==NULL)
{
fprintf(stderr,"Problem with the test file.\n");
exit(EXIT_FAILURE);
}
test_size = line_nb*percent/100;
srand(time(NULL));
while(fgets(buffer, 10000, fplm))
{
if(write_test && write_train)
{
choice = rand()%(MAX + 1);
if(choice > MAX-percent)
{
fprintf(test,"%s",buffer);
line_number_test++;
}
else
{
fprintf(train,"%s",buffer);
line_number_train++;
}
if(line_number_test == test_size)
write_test = 0;
if(line_number_train == (line_nb - test_size))
write_train = 0;
}
else
{
if(!write_test)
fprintf(train,"%s",buffer);
else
fprintf(test,"%s",buffer);
}
}
fclose(train);
fclose(test);
fclose(fplm);
}