From c17a1abf624da8ecdf78df6a7f3a58f86e08086a Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Thu, 14 Mar 2019 14:08:00 +0100
Subject: [PATCH] Improved training speed (x2)

---
 decoder/src/Decoder.cpp                  |  8 ++++----
 neural_network/include/NeuralNetwork.hpp |  8 ++++++++
 neural_network/src/MLP.cpp               | 14 +++++++++-----
 neural_network/src/MLPBase.cpp           |  2 +-
 trainer/src/Trainer.cpp                  |  8 ++++----
 5 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/decoder/src/Decoder.cpp b/decoder/src/Decoder.cpp
index 81b9fb2..8e2d471 100644
--- a/decoder/src/Decoder.cpp
+++ b/decoder/src/Decoder.cpp
@@ -58,13 +58,13 @@ void checkAndRecordError(Config & config, Classifier * classifier, Classifier::W
     }
 }
 
-void printAdvancement(Config & config, float currentSpeed)
+void printAdvancement(Config & config, float currentSpeed, int nbActionsCutoff)
 {
   if (ProgramParameters::interactive)
   {
     int totalSize = ProgramParameters::tapeSize;
     int steps = config.getHead();
-    if (steps && (steps % 200 == 0 || totalSize-steps < 200))
+    if (steps && (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff))
       fprintf(stderr, "Decode : %.2f%%  speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str());
   }
 }
@@ -213,7 +213,7 @@ void Decoder::decodeNoBeam()
 
     auto weightedActions = tm.getCurrentClassifier()->weightActions(config);
 
-    printAdvancement(config, currentSpeed);
+    printAdvancement(config, currentSpeed, nbActionsCutoff);
     printDebugInfos(stderr, config, tm, weightedActions);
 
     std::pair<float,std::string> predictedAction;
@@ -343,7 +343,7 @@ void Decoder::decodeBeam()
 
       node->weightedActions = node->tm.getCurrentClassifier()->weightActions(node->config);
 
-      printAdvancement(node->config, currentSpeed);
+      printAdvancement(node->config, currentSpeed, nbActionsCutoff);
 
       unsigned int nbActionsMax = std::min(std::max(node->tm.getCurrentClassifier()->getNbActions(),(unsigned int)1),(unsigned int)ProgramParameters::nbChilds);
       for (unsigned int actionIndex = 0; actionIndex < nbActionsMax; actionIndex++)
diff --git a/neural_network/include/NeuralNetwork.hpp b/neural_network/include/NeuralNetwork.hpp
index 28c2ef1..9842338 100644
--- a/neural_network/include/NeuralNetwork.hpp
+++ b/neural_network/include/NeuralNetwork.hpp
@@ -10,6 +10,14 @@
 #include <string>
 #include "FeatureModel.hpp"
 
+struct BatchNotFull : public std::exception
+{
+  const char * what() const throw()
+  {
+    return "Current batch is not full, no need to update.";
+  }
+};
+
 class NeuralNetwork
 {
   public :
diff --git a/neural_network/src/MLP.cpp b/neural_network/src/MLP.cpp
index 47dc945..3dae489 100644
--- a/neural_network/src/MLP.cpp
+++ b/neural_network/src/MLP.cpp
@@ -49,11 +49,15 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd)
 
 float MLP::update(FeatureModel::FeatureDescription & fd, int gold)
 {
-  float loss = mlp.update(fd, gold);
-
-  trainer->update();
-
-  return loss;
+  try
+  {
+    float loss = mlp.update(fd, gold);
+    trainer->update();
+    return loss;
+  } catch (BatchNotFull &)
+  {
+    return 0.0;
+  }
 }
 
 void MLP::save(const std::string & filename)
diff --git a/neural_network/src/MLPBase.cpp b/neural_network/src/MLPBase.cpp
index 39b57b1..aa06c1d 100644
--- a/neural_network/src/MLPBase.cpp
+++ b/neural_network/src/MLPBase.cpp
@@ -95,7 +95,7 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold)
   golds.emplace_back(gold);
 
   if ((int)fds.size() < ProgramParameters::batchSize)
-    return 0.0;
+    throw BatchNotFull();
 
   std::vector<dynet::Expression> inputs;
   dynet::ComputationGraph cg;
diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp
index ca8ef45..bb8ff40 100644
--- a/trainer/src/Trainer.cpp
+++ b/trainer/src/Trainer.cpp
@@ -27,7 +27,7 @@ void Trainer::computeScoreOnDev()
   float entropyAccumulator = 0.0;
   bool justFlipped = false;
   int nbActions = 0;
-  int nbActionsCutoff = 200;
+  int nbActionsCutoff = 2*ProgramParameters::batchSize;
   float currentSpeed = 0.0;
   auto pastTime = std::chrono::high_resolution_clock::now();
   std::vector<float> entropies;
@@ -56,7 +56,7 @@ void Trainer::computeScoreOnDev()
       {
         int totalSize = ProgramParameters::devTapeSize;
         int steps = devConfig->getHead();
-        if (steps && (steps % 200 == 0 || totalSize-steps < 200))
+        if (steps && (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff))
         {
           fprintf(stderr, "                                                      \r");
           fprintf(stderr, "Eval on dev : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str());
@@ -162,7 +162,7 @@ void Trainer::train()
 
   int nbSteps = 0;
   int nbActions = 0;
-  int nbActionsCutoff = 200;
+  int nbActionsCutoff = 2*ProgramParameters::batchSize;
   float currentSpeed = 0.0;
   auto pastTime = std::chrono::high_resolution_clock::now();
   while (TI.getEpoch() <= ProgramParameters::nbIter)
@@ -204,7 +204,7 @@ void Trainer::train()
         {
           int totalSize = ProgramParameters::iterationSize == -1 ? ProgramParameters::tapeSize : ProgramParameters::iterationSize;
           int steps = ProgramParameters::iterationSize == -1 ? trainConfig.getHead() : nbSteps;
-          if (steps % 200 == 0 || totalSize-steps < 200)
+          if (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff)
           {
             fprintf(stderr, "                                                      \r");
             fprintf(stderr, "Current Iteration : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str());
-- 
GitLab