Skip to content
Snippets Groups Projects
Select Git revision
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
20 results

fplm2train_test.c

Blame
  • fplm2train_test.c 2.13 KiB
    #include <stdio.h>
    #include <stdlib.h>
    #include <time.h>
    #include "maca_morpho_context.h"
    #define MAX 100
    
    void fplm2traintest_help_message(context *ctx);
    int count_line_number_fplm(FILE* fplm);
    void generate_train_test(context* ctx);
    
    
    int main(int argc, char** argv)
    {
    	context *ctx = context_read_options(argc, argv);
    	if(ctx->help)
    		fplm2traintest_help_message(ctx);
    	generate_train_test(ctx);
    	printf("fplm_train.txt and fplm_test.txt have been generated.\n");
    	return 0;
    }
    
    void fplm2traintest_help_message(context *ctx)
    {
    	context_general_help_message(ctx);
    	context_language_help_message(ctx);
    	context_fplm_help_message(ctx);
    	context_maca_data_path_help_message(ctx);
    	context_fplm_test_percent_help_message(ctx);
    	exit(1);
    }
    
    int count_line_number_fplm(FILE* fplm)
    {
    	int line_nb=0;
    	char buffer[10000];
    	while(fgets(buffer, 10000, fplm))
    	{
    		line_nb++;
    	}
    	rewind(fplm);
    	return line_nb;
    }
    
    void generate_train_test(context* ctx)
    {
    	FILE* train = NULL;
    	FILE* test = NULL;
    	FILE* fplm = NULL;
    	int percent = ctx->fplm_test_percent;
    	int test_size;
    	int choice;
    	int line_nb = 0;
    	int line_number_test = 0;
    	int line_number_train = 0; 
    	int write_test = 1;
    	int write_train = 1;
    	char buffer[10000];
    	
    	fplm = fopen(ctx->fplm_filename, "r");
    	line_nb = count_line_number_fplm(fplm);
    	train = fopen("fplm_train.txt", "w");
    	if(train==NULL)
    	{
    		fprintf(stderr,"Problem with the train file.\n");
    		exit(EXIT_FAILURE);	
    	}
    	test = fopen("fplm_test.txt","w");
    	if(test==NULL)
    	{
    		fprintf(stderr,"Problem with the test file.\n");
    		exit(EXIT_FAILURE);
    	}
    	test_size = line_nb*percent/100; 
    	srand(time(NULL));
    	while(fgets(buffer, 10000, fplm))
    	{
    		if(write_test && write_train)
    		{
    			choice = rand()%(MAX + 1);
    			if(choice > MAX-percent)
    			{
    				fprintf(test,"%s",buffer);
    				line_number_test++;
    			}
    			else
    			{
    				fprintf(train,"%s",buffer);
    				line_number_train++;
    			}			
    			if(line_number_test == test_size)
    				write_test = 0;
    			if(line_number_train == (line_nb - test_size))
    				write_train = 0;
    		}
    		else
    		{
    			if(!write_test)
    				fprintf(train,"%s",buffer);
    			else
    				fprintf(test,"%s",buffer);
    		}
    	}
    	fclose(train);
    	fclose(test);
    	fclose(fplm);
    }