Skip to content
Snippets Groups Projects
Commit fba42b89 authored by Franck Dary's avatar Franck Dary
Browse files

Added minor functionnalities to multi layer perceptron

parent 052c00fc
Branches
No related tags found
No related merge requests found
......@@ -105,7 +105,7 @@ std::vector<Layer> create_layers_lemmatizer(unsigned int nb_inputs, unsigned int
std::vector<Layer> create_layers_parser(unsigned int nb_inputs, unsigned int nb_classes)
{
std::vector<Layer> layers{
{nb_inputs, 500, 0.5, RELU},
{nb_inputs, 500, 0.2, RELU},
{500, nb_classes, 0.0, LINEAR}
};
......@@ -175,7 +175,7 @@ void train_nn(context * ctx)
unsigned int nb_classes = fann_train.get_nb_classes();
unsigned int nb_inputs = first_example_dnn[0].size();
unsigned int batch_size = 16;
unsigned int batch_size = 256;
unsigned int nb_iter_max = ctx->iteration_nb;
std::vector<Layer> layers;
......@@ -208,7 +208,8 @@ void train_nn(context * ctx)
classifier_get_mlp_struct_filename(classif));
bool shuffle_training = true;
classif->mlp->train(nb_iter_max, fann_train, fann_dev, convert_batch_to_dnn, shuffle_training);
bool use_score = true; // either we use score or loss to chose if we want to save a model
classif->mlp->train(nb_iter_max, fann_train, fann_dev, convert_batch_to_dnn, shuffle_training, use_score);
classifier_print_desc_file(classif->filename, classif);
......
......@@ -76,7 +76,7 @@ class Mlp{
void train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
std::function<void(std::vector< std::vector<float> >&,
std::vector<Fann_file::Example> &)>
convert_batch_to_dnn, bool must_shuffle);
convert_batch_to_dnn, bool must_shuffle, bool use_score);
void save();
void set_filenames(char * model_filename, char * struct_filename);
void *get_vcode_array();
......@@ -94,8 +94,11 @@ class Mlp{
void enable_dropout();
void disable_dropout();
unsigned int predict(dynet::Expression x);
double get_loss_on_set(std::vector< std::vector<float> > & x_dev,
std::vector<int> & y_dev);
double get_loss_on_set(Fann_file & fann_set,
std::function<void(std::vector< std::vector<float> >&,
std::vector<Fann_file::Example> &)>
convert_batch_to_dnn);
double get_score_on_set(Fann_file & fann_set,
std::function<void(std::vector< std::vector<float> >&,
......
......@@ -183,23 +183,31 @@ unsigned int Mlp::predict(float * features, int nb_features){
return predict(x);
}
double Mlp::get_loss_on_set(std::vector< std::vector<float> > & x_dev,
std::vector<int> & y_dev){
double Mlp::get_loss_on_set(Fann_file & fann_set,
std::function<void(std::vector< std::vector<float> >&,
std::vector<Fann_file::Example> &)>
convert_batch_to_dnn)
{
unsigned int nb_batches;
if(batch_size_test == 0)
batch_size_test = x_dev.size();
nb_batches = (x_dev.size() % batch_size_test != 0 ? 1 : 0) + x_dev.size() / batch_size_test;
batch_size_test = fann_set.get_nb_examples();
nb_batches = 1 + fann_set.get_nb_examples() / batch_size_test;
nb_batches = (fann_set.get_nb_examples() % batch_size_test != 0 ? 1 : 0) + fann_set.get_nb_examples() / batch_size_test;
std::vector<dynet::Expression> cur_batch;
std::vector< std::vector<float> > cur_batch_dnn;
std::vector<unsigned int> cur_labels;
double loss = 0.0;
fann_set.rewind();
double total_loss = 0.0;
for(unsigned int si = 0; si < nb_batches; si++){
computation_graph.clear();
int id = si * batch_size_test;
unsigned int cur_batch_size =
std::min((unsigned int)x_dev.size() - id, batch_size_test);
auto batch = fann_set.get_batch(batch_size_test);
convert_batch_to_dnn(cur_batch_dnn, batch);
unsigned int cur_batch_size = batch.size();
cur_batch.clear();
cur_labels.clear();
......@@ -209,11 +217,15 @@ double Mlp::get_loss_on_set(std::vector< std::vector<float> > & x_dev,
unsigned int nb_inputs = layers[0].input_dim;
for(unsigned int idx = 0; idx < cur_batch_size; idx++){
cur_batch[idx] = input(computation_graph, {nb_inputs}, x_dev[id+idx]);
unsigned int gold = y_dev[id+idx];
if(gold >= layers.back().output_dim)
gold = 0;
cur_labels[idx] = gold;
if(cur_batch_dnn[idx].size() != nb_inputs)
{
fprintf(stderr, "ERROR (%s) : example size=%lu nb_inputs=%u mismatch\n",
__func__, cur_batch_dnn[idx].size(), nb_inputs);
exit(1);
}
cur_batch[idx] = input(computation_graph, {nb_inputs}, cur_batch_dnn[idx]);
cur_labels[idx] = batch[idx].first;
}
dynet::Expression x_batch = reshape(concatenate_cols(cur_batch),
......@@ -221,10 +233,10 @@ double Mlp::get_loss_on_set(std::vector< std::vector<float> > & x_dev,
dynet::Expression loss_expr = get_loss(x_batch, cur_labels);
loss += as_scalar(computation_graph.forward(loss_expr));
total_loss += as_scalar(computation_graph.forward(loss_expr));
}
return loss;
return total_loss / fann_set.get_nb_examples();
}
double Mlp::get_score_on_set(Fann_file & fann_set,
......@@ -297,7 +309,7 @@ double Mlp::get_score_on_set(Fann_file & fann_set,
void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
std::function<void(std::vector< std::vector<float> >&,
std::vector<Fann_file::Example> &)>
convert_batch_to_dnn, bool must_shuffle)
convert_batch_to_dnn, bool must_shuffle, bool use_score)
{
std::unique_ptr<dynet::Timer> timer(new dynet::Timer("Training finished in"));
if(layers.back().output_dim == 1){
......@@ -305,7 +317,9 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
return;
}
std::vector<double> losses;
int nb_examples_with_batch_size_1 = 0;
std::vector<double> dev_losses;
std::vector<double> train_losses;
std::vector<double> dev_scores;
std::vector<double> train_scores;
auto has_converged = []
......@@ -328,11 +342,6 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
double best_score = 0.0;
int best_epoch = 0;
unsigned int nb_batches;
if(batch_size_train == 0)
batch_size_train = fann_train.get_nb_examples();
nb_batches = (fann_train.get_nb_examples() % batch_size_train != 0 ? 1 : 0) + fann_train.get_nb_examples() / batch_size_train;
std::vector<dynet::Expression> cur_batch;
std::vector< std::vector<float> > cur_batch_dnn;
std::vector<unsigned int> cur_labels;
......@@ -342,7 +351,9 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
if(nb_iter_max > 0 && epoch > nb_iter_max)
break;
if(has_converged(5, 0.005, train_scores))
if(use_score && has_converged(5, 0.005, train_scores))
break;
else if(!use_score && has_converged(5, 0.005, train_losses))
break;
fann_train.rewind();
......@@ -352,11 +363,24 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
double loss = 0.0;
double nb_samples = 0.0;
for(unsigned int si = 0; si < nb_batches; si++){
computation_graph.clear();
while(true)
{
unsigned int batch_size_train = this->batch_size_train;
if(nb_examples_with_batch_size_1 > 0)
batch_size_train = 1;
if(batch_size_train == 0)
batch_size_train = fann_train.get_nb_examples();
auto batch = fann_train.get_batch(batch_size_train);
if(batch.empty())
break;
computation_graph.clear();
unsigned int cur_batch_size = batch.size();
nb_examples_with_batch_size_1 -= cur_batch_size;
convert_batch_to_dnn(cur_batch_dnn, batch);
cur_batch.clear();
......@@ -391,12 +415,13 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
}
loss /= nb_samples;
disable_dropout();
// double dev_loss = get_loss_on_set(x_dev, y_dev);
// dev_loss /= x_dev.size();
if(use_score)
{
double dev_score = get_score_on_set(fann_dev, convert_batch_to_dnn);
double train_score = get_score_on_set(fann_train, convert_batch_to_dnn);
enable_dropout();
if(dev_score > best_score || epoch == 1){
best_score = dev_score;
best_epoch = epoch;
......@@ -404,14 +429,34 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
}
fprintf(stderr, "[%d(%.2lf->%.2lf)%s]", epoch, train_score, dev_score,
best_epoch == epoch ? "!" : "");
losses.emplace_back(loss);
train_scores.emplace_back(train_score);
dev_scores.emplace_back(dev_score);
}
else
{
double dev_loss = get_loss_on_set(fann_dev, convert_batch_to_dnn);
if(dev_loss < best_score || epoch == 1){
best_score = dev_loss;
best_epoch = epoch;
save();
}
fprintf(stderr, "[%d(%.2lf->%.2lf)%s]", epoch, loss, dev_loss,
best_epoch == epoch ? "!" : "");
train_losses.emplace_back(loss);
dev_losses.emplace_back(dev_loss);
}
enable_dropout();
epoch++;
}
fprintf(stderr, "\nBest epoch = %d\n", best_epoch);
if(use_score)
fprintf(stderr, "Best dev score = %0.2lf\n", best_score);
else
fprintf(stderr, "Best dev loss = %0.2lf\n", best_score);
}
dynet::Expression Mlp::get_loss(dynet::Expression & x,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment