Skip to content
Snippets Groups Projects
Commit 34a0feca authored by Franck Dary's avatar Franck Dary
Browse files

Created a class to represent a fann file in memory

parent 833a2f51
No related branches found
No related tags found
No related merge requests found
...@@ -18,6 +18,7 @@ set(SOURCES src/util.c ...@@ -18,6 +18,7 @@ set(SOURCES src/util.c
src/l_rule.c src/l_rule.c
src/json_parser.c src/json_parser.c
src/json_tree.c src/json_tree.c
src/Fann_file.c
) )
#compiling library #compiling library
......
#ifndef __FANN_FILE__
#define __FANN_FILE__
#include <stdio.h>
#include <string>
#include <vector>
/*
Represent a fann file formated like this :
# 0 b0f b0p
# 0 FORM POS
# 1 b0m
# 1 FEATS
0 1 63 13
1 12 4
0 2 3 11
Where the first number of each line is the id of the classifier.
Lines that start with # are headers lines, there a 2 per classifier,
they describe the features used by this classifier.
Other lines are training examples, where the second number is the class and
all the remaining numbers are the feature vector.
*/
class Fann_file
{
private :
static constexpr const char *separator = "\t";
static constexpr unsigned int max_line_size = 1000;
private :
std::vector<unsigned int> order;
unsigned int next_line;
std::vector< std::vector<int> > features;
std::vector<int> classes;
std::vector<std::string> feature_names;
std::vector<std::string> feature_types;
public :
using Example = std::pair< int, std::vector<int>& >;
Fann_file(const char * filename, unsigned int classifier_id);
void shuffle();
void rewind();
bool finished();
std::vector<Example> get_batch(unsigned int batch_size);
};
#endif
#include "Fann_file.h"
#include "util.h"
#include "string.h"
#include <algorithm>
Fann_file::Fann_file(const char * filename, unsigned int classifier_id)
{
FILE * f = myfopen(filename, "r");
char buffer[max_line_size];
auto close_and_exit = [&](const char * infos)
{
if(f)
fclose(f);
fprintf(stderr, "ERROR : fann file '%s' bad format\n", filename);
fprintf(stderr, "infos : %s\n", infos);
fprintf(stderr, "buffer = <%s>\n", buffer);
exit(1);
};
while(fscanf(f, "#%[^\n]\n", buffer) == 1) //While we read header lines
{
char * token = strtok(buffer, separator);
if(!token)
close_and_exit("empty header line (features names)");
if(std::stoi(token) != (int)classifier_id)
continue;
while((token = strtok(NULL, separator)))
feature_names.emplace_back(token);
if(fscanf(f, "#%[^\n]\n", buffer) != 1) //Header lines come in pair
close_and_exit("missing features types in header");
token = strtok(buffer, separator);
if(!token)
close_and_exit("empty header line (features types)");
if(std::stoi(token) != (int)classifier_id)
close_and_exit("header line (features types) does not correspond to the same classifier");
while((token = strtok(NULL, separator)))
feature_types.emplace_back(token);
} //Finished reading header
if(feature_names.size() != feature_types.size())
close_and_exit("number of features sizes and features types are differents");
while(fscanf(f, "%[^\n]\n", buffer) == 1) //While we read training examples
{
char * token = strtok(buffer, separator);
if(!token)
close_and_exit("empty line (training examples)");
if(std::stoi(token) != (int)classifier_id)
continue;
token = strtok(NULL, separator);
if(!token)
close_and_exit("class is missing in training example");
classes.emplace_back(std::stoi(token));
features.emplace_back();
while((token = strtok(NULL, separator)))
features.back().emplace_back(std::stoi(token));
if(features.back().size() != feature_types.size())
close_and_exit("feature vector of training example don't have the right number of features");
} //Finished reading training examples
for(unsigned int i = 0; i < classes.size(); i++)
order.emplace_back(i);
next_line = 0;
}
void Fann_file::shuffle()
{
random_shuffle(order.begin(), order.end(), [](int i){return std::rand()%i;});
}
void Fann_file::rewind()
{
next_line = 0;
}
bool Fann_file::finished()
{
return next_line >= classes.size();
}
/* Return a batch of training examples of size batch_size or less if we don't have */
/* enough remaining examples */
std::vector<Fann_file::Example> Fann_file::get_batch(unsigned int batch_size)
{
std::vector<Example> batch;
for (unsigned int i = 0; i < batch_size && !finished(); i++)
{
unsigned int example_index = order[next_line];
next_line++;
batch.emplace_back(classes[example_index], features[example_index]);
}
return batch;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment