Skip to content
Snippets Groups Projects
Commit 10297e05 authored by Franck Dary's avatar Franck Dary
Browse files

Added a way to delete duplicates training examples

parent 6145de69
No related branches found
No related tags found
No related merge requests found
...@@ -52,6 +52,7 @@ class Fann_file ...@@ -52,6 +52,7 @@ class Fann_file
unsigned int get_nb_examples(); unsigned int get_nb_examples();
unsigned int get_nb_classes(); unsigned int get_nb_classes();
void over_sample(float ratio); void over_sample(float ratio);
void delete_duplicates();
}; };
#endif #endif
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "string.h" #include "string.h"
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <set>
Fann_file::Fann_file(const char * filename, unsigned int classifier_id) Fann_file::Fann_file(const char * filename, unsigned int classifier_id)
{ {
...@@ -162,3 +163,25 @@ void Fann_file::over_sample(float ratio) ...@@ -162,3 +163,25 @@ void Fann_file::over_sample(float ratio)
} }
} }
void Fann_file::delete_duplicates()
{
std::vector<unsigned int> new_order;
std::set<std::string> examples;
for(int index : order)
{
std::string example = std::to_string(classes[index]);
for(int feat : features[index])
example += "," + std::to_string(feat);
if(examples.count(example))
continue;
examples.insert(example);
new_order.push_back(index);
}
order = new_order;
}
...@@ -158,6 +158,7 @@ void train_nn(context * ctx) ...@@ -158,6 +158,7 @@ void train_nn(context * ctx)
Fann_file fann_train(ctx->fann_filename, classif_nb); Fann_file fann_train(ctx->fann_filename, classif_nb);
Fann_file fann_dev(ctx->fann_dev_filename, classif_nb); Fann_file fann_dev(ctx->fann_dev_filename, classif_nb);
fann_train.delete_duplicates();
fann_train.over_sample(0.0); //The ratio between the nb of examples of the majority class and of the minority class will not be inferior to this value fann_train.over_sample(0.0); //The ratio between the nb of examples of the majority class and of the minority class will not be inferior to this value
auto first_example_batch = fann_train.get_batch(1); auto first_example_batch = fann_train.get_batch(1);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment