Added devLoss option, allow to compute dev loss

613f04a9 · Franck Dary · 408336d8 · 613f04a9 · 613f04a9 · 613f04a9
Commit 613f04a9 authored Apr 4, 2019 by Franck Dary
--- a/maca_common/include/ProgramParameters.hpp
+++ b/maca_common/include/ProgramParameters.hpp
@@ -72,6 +72,7 @@ struct ProgramParameters
  static float maskRate;
  static bool featureExtraction;
  static bool devEvalOnGold;
+  static bool devLoss;
  private :

--- a/maca_common/src/ProgramParameters.cpp
+++ b/maca_common/src/ProgramParameters.cpp
@@ -66,4 +66,5 @@ std::string ProgramParameters::tapeToMask;
 float ProgramParameters::maskRate;
 bool ProgramParameters::featureExtraction;
 bool ProgramParameters::devEvalOnGold;
+bool ProgramParameters::devLoss;
--- a/neural_network/include/GeneticAlgorithm.hpp
+++ b/neural_network/include/GeneticAlgorithm.hpp
@@ -105,6 +105,14 @@ class GeneticAlgorithm : public NeuralNetwork
  /// @return The loss.
  float update(FeatureModel::FeatureDescription & fd, int gold) override;
+  /// @brief Get the loss according to the given gold class.
+  ///
+  /// @param fd The input to use.
+  /// @param gold The gold class of this input.
+  ///
+  /// @return The loss.
+  float getLoss(FeatureModel::FeatureDescription & fd, int gold) override;
  /// @brief Save the GeneticAlgorithm to a file.
  /// 
  /// @param filename The file to write the GeneticAlgorithm to.

--- a/neural_network/include/MLP.hpp
+++ b/neural_network/include/MLP.hpp
@@ -54,6 +54,13 @@ class MLP : public NeuralNetwork
  ///
  /// @return The loss.
  float update(FeatureModel::FeatureDescription & fd, int gold) override;
+  /// @brief Get the loss according to the given gold class.
+  ///
+  /// @param fd The input to use.
+  /// @param gold The gold class of this input.
+  ///
+  /// @return The loss.
+  float getLoss(FeatureModel::FeatureDescription & fd, int gold) override;
  /// @brief Save the MLP to a file.
  /// 
  /// @param filename The file to write the MLP to.

--- a/neural_network/include/MLPBase.hpp
+++ b/neural_network/include/MLPBase.hpp
@@ -123,6 +123,13 @@ class MLPBase
  ///
  /// @return The loss.
  float update(FeatureModel::FeatureDescription & fd, int gold);
+  /// @brief Get the loss according to the given gold class.
+  ///
+  /// @param fd The input to use.
+  /// @param gold The gold class of this input.
+  ///
+  /// @return The loss.
+  float getLoss(FeatureModel::FeatureDescription & fd, int gold);
  /// @brief Print the topology (Layers) of the MLP.
  ///
  /// @param output Where the topology will be printed.

--- a/neural_network/include/NeuralNetwork.hpp
+++ b/neural_network/include/NeuralNetwork.hpp
@@ -145,6 +145,14 @@ class NeuralNetwork
  /// @return The loss.
  virtual float update(FeatureModel::FeatureDescription & fd, int gold) = 0;
+  /// @brief Get the loss according to the given gold class.
+  ///
+  /// @param fd The input to use.
+  /// @param gold The gold class of this input.
+  ///
+  /// @return The loss.
+  virtual float getLoss(FeatureModel::FeatureDescription & fd, int gold) = 0;
  /// @brief Save the NeuralNetwork to a file.
  /// 
  /// @param filename The file to write the NeuralNetwork to.

--- a/neural_network/src/GeneticAlgorithm.cpp
+++ b/neural_network/src/GeneticAlgorithm.cpp
@@ -61,6 +61,16 @@ std::vector<float> GeneticAlgorithm::predict(FeatureModel::FeatureDescription &
  return prediction;
 }
+float GeneticAlgorithm::getLoss(FeatureModel::FeatureDescription &, int)
+{
+  float loss = 0.0;
+  for (auto & individual : generation)
+    loss += individual->loss;
+  return loss;
+}
 float GeneticAlgorithm::update(FeatureModel::FeatureDescription & fd, int gold)
 {
  bool haveBeenUpdated = false;

--- a/neural_network/src/MLP.cpp
+++ b/neural_network/src/MLP.cpp
@@ -68,6 +68,11 @@ float MLP::update(FeatureModel::FeatureDescription & fd, int gold)
  }
 }
+float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold)
+{
+  return mlp.getLoss(fd, gold);
+}
 void MLP::save(const std::string & filename)
 {
  File * file = new File(filename, "w");

--- a/neural_network/src/MLPBase.cpp
+++ b/neural_network/src/MLPBase.cpp
@@ -143,6 +143,46 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold)
  return as_scalar(batchedLoss.value());
 }
+float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, int gold)
+{
+  std::vector<dynet::Expression> inputs;
+  std::vector<unsigned int> goldss;
+  goldss.emplace_back(gold);
+  dynet::ComputationGraph cg;
+  std::vector<dynet::Expression> expressions;
+  for (auto & featValue : fd.values)
+    expressions.emplace_back(NeuralNetwork::featValue2Expression(cg, featValue));
+  dynet::Expression input = dynet::concatenate(expressions);
+  inputs.emplace_back(input);
+  dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs);
+  dynet::Expression output = run(cg, batchedInput);
+  dynet::Expression batchedLoss;
+  if (ProgramParameters::loss == "neglogsoftmax")
+  {
+    batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, goldss));
+  }
+  else if (ProgramParameters::loss == "weighted")
+  {
+    batchedLoss = weightedLoss(output, goldss);
+  }
+  else if (ProgramParameters::loss == "errorCorrection")
+  {
+    batchedLoss = errorCorrectionLoss(cg, output, goldss);
+  }
+  else
+  {
+    fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str());
+    exit(1);
+  }
+  return as_scalar(batchedLoss.value());
+}
 void MLPBase::checkGradients()
 {
  bool printGradients = false;

--- a/trainer/include/TrainInfos.hpp
+++ b/trainer/include/TrainInfos.hpp
@@ -24,6 +24,8 @@ class TrainInfos
  std::map< std::string, std::vector<bool> > mustSavePerClassifierPerEpoch;
  std::map< std::string, std::pair<int,int> > trainCounter;
+  std::map<std::string, float> trainLossCounter;
+  std::map<std::string, float> devLossCounter;
  std::map< std::string, std::pair<int,int> > devCounter;
  std::map<std::string, bool> topologyPrinted;
@@ -42,8 +44,9 @@ class TrainInfos
  TrainInfos();
  void addTrainLoss(const std::string & classifier, float loss);
  void addDevLoss(const std::string & classifier, float loss);
-  void addTrainExample(const std::string & classifier);
+  void addTrainExample(const std::string & classifier, float loss);
  void addDevExample(const std::string & classifier);
+  void addDevExample(const std::string & classifier, float loss);
  void addTrainSuccess(const std::string & classifier);
  void addDevSuccess(const std::string & classifier);
  void resetCounters();

--- a/trainer/src/TrainInfos.cpp
+++ b/trainer/src/TrainInfos.cpp
@@ -155,9 +155,10 @@ float TrainInfos::computeDevScore(const std::string & classifier)
  return 100.0*devCounter[classifier].first / devCounter[classifier].second;
 }
-void TrainInfos::addTrainExample(const std::string & classifier)
+void TrainInfos::addTrainExample(const std::string & classifier, float loss)
 {
  trainCounter[classifier].second++;
+  trainLossCounter[classifier] += loss;
 }
 void TrainInfos::addDevExample(const std::string & classifier)
@@ -165,6 +166,12 @@ void TrainInfos::addDevExample(const std::string & classifier)
  devCounter[classifier].second++;
 }
+void TrainInfos::addDevExample(const std::string & classifier, float loss)
+{
+  devCounter[classifier].second++;
+  devLossCounter[classifier] += loss;
+}
 void TrainInfos::addTrainSuccess(const std::string & classifier)
 {
  trainCounter[classifier].first++;
@@ -184,13 +191,24 @@ void TrainInfos::resetCounters()
 void TrainInfos::computeTrainScores()
 {
  for (auto & it : trainCounter)
+  {
    addTrainScore(it.first, computeTrainScore(it.first));
+    addTrainLoss(it.first, trainLossCounter[it.first]);
+    trainLossCounter[it.first] = 0.0;
+  }
 }
 void TrainInfos::computeDevScores()
 {
  for (auto & it : devCounter)
+  {
    addDevScore(it.first, computeDevScore(it.first));
+    if (devLossCounter.count(it.first))
+    {
+      addDevLoss(it.first, devLossCounter[it.first]);
+      devLossCounter[it.first] = 0.0;
+    }
+  }
 }
 int TrainInfos::getEpoch()
@@ -246,7 +264,9 @@ void TrainInfos::printScores(FILE * output)
  std::vector<std::string> names;
  std::vector<std::string> acc;
  std::vector<std::string> train;
+  std::vector<std::string> lossTrain;
  std::vector<std::string> dev;
+  std::vector<std::string> lossDev;
  std::vector<std::string> savedStr;
  for (auto & it : trainScoresPerClassifierPerEpoch)
@@ -254,7 +274,9 @@ void TrainInfos::printScores(FILE * output)
    names.emplace_back(it.first);
    acc.emplace_back("accuracy");
    train.emplace_back(": train(" + float2str(it.second.back(), "%.2f") + "%)");
+    lossTrain.emplace_back(trainLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +float2str(trainLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")");
    dev.emplace_back(devScoresPerClassifierPerEpoch.empty() ? "" : "dev(" +float2str(devScoresPerClassifierPerEpoch[it.first].back(), "%.2f") + "%)");
+    lossDev.emplace_back(devLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +float2str(devLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")");
    savedStr.emplace_back(mustSavePerClassifierPerEpoch[it.first].back() ? "SAVED" : "");
  }
@@ -264,7 +286,7 @@ void TrainInfos::printScores(FILE * output)
    fprintf(output, "[%s] ", getTime().c_str());
  fprintf(output, "Iteration %d/%d :                                      \n", getEpoch(), ProgramParameters::nbIter);
-  printColumns(output, {names, acc, train, dev, savedStr});
+  printColumns(output, {names, acc, train, lossTrain, dev, lossDev, savedStr});
 }
 bool TrainInfos::mustSave(const std::string & classifier)

--- a/trainer/src/Trainer.cpp
+++ b/trainer/src/Trainer.cpp
@@ -82,7 +82,16 @@ void Trainer::computeScoreOnDev()
      bool pActionIsZeroCost = tm.getCurrentClassifier()->getActionCost(*devConfig, pAction) == 0;
+      if (ProgramParameters::devLoss)
+      {
+        float loss = tm.getCurrentClassifier()->getLoss(*devConfig, tm.getCurrentClassifier()->getActionIndex(oAction));
+        TI.addDevExample(tm.getCurrentClassifier()->name, loss);
+      }
+      else
+      {
        TI.addDevExample(tm.getCurrentClassifier()->name);
+      }
      if (((!ProgramParameters::devEvalOnGold) && pActionIsZeroCost) || (pAction == oAction))
        TI.addDevSuccess(tm.getCurrentClassifier()->name);
@@ -273,10 +282,11 @@ void Trainer::train()
          exit(1);
        }
+        float loss = 0.0;
        if (!ProgramParameters::featureExtraction)
-          tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction));
+          loss = tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction));
-        TI.addTrainExample(tm.getCurrentClassifier()->name);
+        TI.addTrainExample(tm.getCurrentClassifier()->name, loss);
        if (pActionIsZeroCost)
          TI.addTrainSuccess(tm.getCurrentClassifier()->name);

--- a/trainer/src/macaon_train.cpp
+++ b/trainer/src/macaon_train.cpp
@@ -88,6 +88,8 @@ po::options_description getOptionsDescription()
    ("printTime", "Print time on stderr.")
    ("featureExtraction", "Use macaon only a feature extractor, print corpus to stdout.")
    ("devEvalOnGold", "If true, dev accuracy will be computed on gold configurations.")
+    ("devLoss", po::value<bool>()->default_value(false),
+      "Compute and print total loss on dev for every epoch.")
    ("shuffle", po::value<bool>()->default_value(true),
      "Shuffle examples after each iteration");
@@ -283,6 +285,7 @@ int main(int argc, char * argv[])
  ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>();
  ProgramParameters::interactive = vm["interactive"].as<bool>();
  ProgramParameters::shuffleExamples = vm["shuffle"].as<bool>();
+  ProgramParameters::devLoss = vm["devLoss"].as<bool>();
  ProgramParameters::randomEmbeddings = vm["randomEmbeddings"].as<bool>();
  ProgramParameters::randomParameters = vm["randomParameters"].as<bool>();
  ProgramParameters::sequenceDelimiterTape = vm["sequenceDelimiterTape"].as<std::string>();

--- a/transition_machine/include/Classifier.hpp
+++ b/transition_machine/include/Classifier.hpp
@@ -156,6 +156,13 @@ class Classifier
  ///
  /// @return The loss.
  float trainOnExample(Config & config, int gold);
+  /// @brief Get the loss of the classifier on a training example.
+  ///
+  /// @param config The Config to work with.
+  /// @param gold The gold class of the Config.
+  ///
+  /// @return The loss.
+  float getLoss(Config & config, int gold);
  /// @brief Get the name of an Action from its index.
  ///
  /// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network.

--- a/transition_machine/src/Classifier.cpp
+++ b/transition_machine/src/Classifier.cpp
@@ -273,6 +273,12 @@ float Classifier::trainOnExample(Config & config, int gold)
  return nn->update(fd, gold);
 }
+float Classifier::getLoss(Config & config, int gold)
+{
+  auto & fd = fm->getFeatureDescription(config);
+  return nn->getLoss(fd, gold);
+}
 void Classifier::explainCostOfActions(FILE * output, Config & config)
 {
  for (Action & a : as->actions)