Added minor functionnalities to multi layer perceptron

fba42b89 · Franck Dary · 052c00fc · fba42b89 · fba42b89 · fba42b89
Commit fba42b89 authored Apr 19, 2018 by Franck Dary
--- a/maca_trans_parser/src/maca_tm_train.c
+++ b/maca_trans_parser/src/maca_tm_train.c
@@ -105,7 +105,7 @@ std::vector<Layer> create_layers_lemmatizer(unsigned int nb_inputs, unsigned int
 std::vector<Layer> create_layers_parser(unsigned int nb_inputs, unsigned int nb_classes)
 {
  std::vector<Layer> layers{
-    {nb_inputs, 500, 0.5, RELU},
+    {nb_inputs, 500, 0.2, RELU},
    {500, nb_classes, 0.0, LINEAR}
  };

@@ -175,7 +175,7 @@ void train_nn(context * ctx)
    unsigned int nb_classes = fann_train.get_nb_classes();
    unsigned int nb_inputs = first_example_dnn[0].size();

-    unsigned int batch_size = 16;
+    unsigned int batch_size = 256;
    unsigned int nb_iter_max = ctx->iteration_nb;

    std::vector<Layer> layers;
@@ -208,7 +208,8 @@ void train_nn(context * ctx)
        classifier_get_mlp_struct_filename(classif));

    bool shuffle_training = true;
-    classif->mlp->train(nb_iter_max, fann_train, fann_dev, convert_batch_to_dnn, shuffle_training);
+    bool use_score = true; // either we use score or loss to chose if we want to save a model
+    classif->mlp->train(nb_iter_max, fann_train, fann_dev, convert_batch_to_dnn, shuffle_training, use_score);

    classifier_print_desc_file(classif->filename, classif);


--- a/mlp/include/mlp.h
+++ b/mlp/include/mlp.h
@@ -76,7 +76,7 @@ class Mlp{
  void train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
             std::function<void(std::vector< std::vector<float> >&,
                                std::vector<Fann_file::Example> &)>
-             convert_batch_to_dnn, bool must_shuffle);
+             convert_batch_to_dnn, bool must_shuffle, bool use_score);
  void save();
  void set_filenames(char * model_filename, char * struct_filename);
  void *get_vcode_array();
@@ -94,8 +94,11 @@ class Mlp{
  void enable_dropout();
  void disable_dropout();
  unsigned int predict(dynet::Expression x);
-  double get_loss_on_set(std::vector< std::vector<float> > & x_dev,
-                        std::vector<int> & y_dev);
+
+  double get_loss_on_set(Fann_file & fann_set,
+                         std::function<void(std::vector< std::vector<float> >&,
+                         std::vector<Fann_file::Example> &)>
+                         convert_batch_to_dnn);

  double get_score_on_set(Fann_file & fann_set,
                          std::function<void(std::vector< std::vector<float> >&,

--- a/mlp/src/mlp.cpp
+++ b/mlp/src/mlp.cpp
@@ -183,23 +183,31 @@ unsigned int Mlp::predict(float * features, int nb_features){
  return predict(x);
 }

-double Mlp::get_loss_on_set(std::vector< std::vector<float> > & x_dev,
-                            std::vector<int> & y_dev){
+double Mlp::get_loss_on_set(Fann_file & fann_set,
+                            std::function<void(std::vector< std::vector<float> >&,
+                            std::vector<Fann_file::Example> &)>
+                            convert_batch_to_dnn)
+{
  unsigned int nb_batches;
  if(batch_size_test == 0)
-    batch_size_test = x_dev.size();
-  nb_batches = (x_dev.size() % batch_size_test != 0 ? 1 : 0) + x_dev.size() / batch_size_test;
+    batch_size_test = fann_set.get_nb_examples();
+  nb_batches = 1 + fann_set.get_nb_examples() / batch_size_test;
+  nb_batches = (fann_set.get_nb_examples()  % batch_size_test != 0 ? 1 : 0) + fann_set.get_nb_examples() / batch_size_test;

  std::vector<dynet::Expression> cur_batch;
+  std::vector< std::vector<float> > cur_batch_dnn;
  std::vector<unsigned int> cur_labels;

-  double loss = 0.0;
+  fann_set.rewind();
+
+  double total_loss = 0.0;
  for(unsigned int si = 0; si < nb_batches; si++){
    computation_graph.clear();

-    int id = si * batch_size_test;
-    unsigned int cur_batch_size = 
-      std::min((unsigned int)x_dev.size() - id, batch_size_test);
+    auto batch = fann_set.get_batch(batch_size_test);
+    convert_batch_to_dnn(cur_batch_dnn, batch);
+
+    unsigned int cur_batch_size = batch.size();

    cur_batch.clear();
    cur_labels.clear();
@@ -209,11 +217,15 @@ double Mlp::get_loss_on_set(std::vector< std::vector<float> > & x_dev,
    unsigned int nb_inputs = layers[0].input_dim;

    for(unsigned int idx = 0; idx < cur_batch_size; idx++){
-      cur_batch[idx] = input(computation_graph, {nb_inputs}, x_dev[id+idx]);
-      unsigned int gold = y_dev[id+idx];
-      if(gold >= layers.back().output_dim)
-        gold = 0;
-      cur_labels[idx] = gold;
+      if(cur_batch_dnn[idx].size() != nb_inputs)
+      {
+        fprintf(stderr, "ERROR (%s) : example size=%lu nb_inputs=%u mismatch\n",
+                __func__, cur_batch_dnn[idx].size(), nb_inputs);
+        exit(1);
+      }
+
+      cur_batch[idx] = input(computation_graph, {nb_inputs}, cur_batch_dnn[idx]);
+      cur_labels[idx] = batch[idx].first;
    }

    dynet::Expression x_batch = reshape(concatenate_cols(cur_batch),
@@ -221,10 +233,10 @@ double Mlp::get_loss_on_set(std::vector< std::vector<float> > & x_dev,

    dynet::Expression loss_expr = get_loss(x_batch, cur_labels);

-    loss += as_scalar(computation_graph.forward(loss_expr));
+    total_loss += as_scalar(computation_graph.forward(loss_expr));
  }

-  return loss;
+  return total_loss / fann_set.get_nb_examples();
 }

 double Mlp::get_score_on_set(Fann_file & fann_set,
@@ -297,7 +309,7 @@ double Mlp::get_score_on_set(Fann_file & fann_set,
 void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
                std::function<void(std::vector< std::vector<float> >&,
                                   std::vector<Fann_file::Example> &)>
-                convert_batch_to_dnn, bool must_shuffle)
+                convert_batch_to_dnn, bool must_shuffle, bool use_score)
 {
  std::unique_ptr<dynet::Timer> timer(new dynet::Timer("Training finished in"));
  if(layers.back().output_dim == 1){
@@ -305,7 +317,9 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
    return;
  }

-  std::vector<double> losses;
+  int nb_examples_with_batch_size_1 = 0;
+  std::vector<double> dev_losses;
+  std::vector<double> train_losses;
  std::vector<double> dev_scores;
  std::vector<double> train_scores;
  auto has_converged = []
@@ -328,11 +342,6 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
  double best_score = 0.0;
  int best_epoch = 0;

-  unsigned int nb_batches;
-  if(batch_size_train == 0)
-    batch_size_train = fann_train.get_nb_examples();
-  nb_batches = (fann_train.get_nb_examples()  % batch_size_train != 0 ? 1 : 0) + fann_train.get_nb_examples() / batch_size_train;
-    
  std::vector<dynet::Expression> cur_batch;
  std::vector< std::vector<float> > cur_batch_dnn;
  std::vector<unsigned int> cur_labels;
@@ -342,7 +351,9 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
    if(nb_iter_max > 0 && epoch > nb_iter_max)
      break;

-    if(has_converged(5, 0.005, train_scores))
+    if(use_score && has_converged(5, 0.005, train_scores))
+      break;
+    else if(!use_score && has_converged(5, 0.005, train_losses))
      break;

    fann_train.rewind();
@@ -352,11 +363,24 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
    double loss = 0.0;
    double nb_samples = 0.0;

-    for(unsigned int si = 0; si < nb_batches; si++){
-      computation_graph.clear();
+    while(true)
+    {
+      unsigned int batch_size_train = this->batch_size_train;
+      if(nb_examples_with_batch_size_1 > 0)
+        batch_size_train = 1;
+
+      if(batch_size_train == 0)
+        batch_size_train = fann_train.get_nb_examples();

      auto batch = fann_train.get_batch(batch_size_train);
+
+      if(batch.empty())
+        break;
+
+      computation_graph.clear();
+
      unsigned int cur_batch_size = batch.size();
+      nb_examples_with_batch_size_1 -= cur_batch_size;
      convert_batch_to_dnn(cur_batch_dnn, batch);

      cur_batch.clear();
@@ -391,12 +415,13 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
    }

    loss /= nb_samples;
+
    disable_dropout();
-//    double dev_loss = get_loss_on_set(x_dev, y_dev);
-//    dev_loss /= x_dev.size();
+    if(use_score)
+    {
      double dev_score = get_score_on_set(fann_dev, convert_batch_to_dnn);
      double train_score = get_score_on_set(fann_train, convert_batch_to_dnn);
-    enable_dropout();
+
      if(dev_score > best_score || epoch == 1){
        best_score = dev_score;
        best_epoch = epoch;
@@ -404,14 +429,34 @@ void Mlp::train(int nb_iter_max, Fann_file & fann_train, Fann_file & fann_dev,
      }
      fprintf(stderr, "[%d(%.2lf->%.2lf)%s]", epoch, train_score, dev_score,
                                              best_epoch == epoch ? "!" : "");
-    losses.emplace_back(loss);
      train_scores.emplace_back(train_score);
      dev_scores.emplace_back(dev_score);
+    }
+    else
+    {
+      double dev_loss = get_loss_on_set(fann_dev, convert_batch_to_dnn);
+
+      if(dev_loss < best_score || epoch == 1){
+        best_score = dev_loss;
+        best_epoch = epoch;
+        save();
+      }
+      fprintf(stderr, "[%d(%.2lf->%.2lf)%s]", epoch, loss, dev_loss,
+                                              best_epoch == epoch ? "!" : "");
+      train_losses.emplace_back(loss);
+      dev_losses.emplace_back(dev_loss);
+    }
+
+    enable_dropout();
+
    epoch++;
  }

    fprintf(stderr, "\nBest epoch = %d\n", best_epoch);
+    if(use_score)
      fprintf(stderr, "Best dev score = %0.2lf\n", best_score);
+    else
+      fprintf(stderr, "Best dev loss = %0.2lf\n", best_score);
 }

 dynet::Expression Mlp::get_loss(dynet::Expression & x,