diff --git a/maca_morpho/src/fplm2train_test.c b/maca_morpho/src/fplm2train_test.c new file mode 100644 index 0000000000000000000000000000000000000000..dfa8a42ee06bc571f51bf5004ed60395f97ccce8 --- /dev/null +++ b/maca_morpho/src/fplm2train_test.c @@ -0,0 +1,106 @@ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include "maca_morpho_context.h" +#define MAX 100 + +void fplm2traintest_help_message(context *ctx); +int count_line_number_fplm(FILE* fplm); +void generate_train_test(context* ctx); + + +int main(int argc, char** argv) +{ + context *ctx = context_read_options(argc, argv); + if(ctx->help) + fplm2traintest_help_message(ctx); + generate_train_test(ctx); + printf("fplm_train.txt and fplm_test.txt have been generated in the Files directory.\n"); + return 0; +} + +void fplm2traintest_help_message(context *ctx) +{ + context_general_help_message(ctx); + context_language_help_message(ctx); + context_fplm_help_message(ctx); + context_maca_data_path_help_message(ctx); + context_fplm_test_percent_help_message(ctx); + exit(1); +} + +int count_line_number_fplm(FILE* fplm) +{ + int line_nb=0; + char buffer[10000]; + while(fgets(buffer, 10000, fplm)) + { + line_nb++; + } + rewind(fplm); + return line_nb; +} + +void generate_train_test(context* ctx) +{ + FILE* train = NULL; + FILE* test = NULL; + FILE* fplm = NULL; + int percent = ctx->fplm_test_percent; + int test_size; + int choice; + int line_nb = 0; + int line_number_test = 0; + int line_number_train = 0; + int write_test = 1; + int write_train = 1; + char buffer[10000]; + + fplm = fopen(ctx->fplm_filename, "r"); + line_nb = count_line_number_fplm(fplm); + train = fopen("../../Files/fplm_train.txt", "w"); + if(train==NULL) + { + fprintf(stderr,"Problem with the train file.\n"); + exit(EXIT_FAILURE); + } + test = fopen("../../Files/fplm_test.txt","w"); + if(test==NULL) + { + fprintf(stderr,"Problem with the test file.\n"); + exit(EXIT_FAILURE); + } + test_size = line_nb*percent/100; + srand(time(NULL)); + while(fgets(buffer, 10000, fplm)) + { + if(write_test && write_train) + { + choice = rand()%(MAX + 1); + if(choice > MAX-percent) + { + fprintf(test,"%s",buffer); + line_number_test++; + } + else + { + fprintf(train,"%s",buffer); + line_number_train++; + } + if(line_number_test == test_size) + write_test = 0; + if(line_number_train == (line_nb - test_size)) + write_train = 0; + } + else + { + if(!write_test) + fprintf(train,"%s",buffer); + else + fprintf(test,"%s",buffer); + } + } + fclose(train); + fclose(test); + fclose(fplm); +}