Allow to specify batchsie in cla files, and changed error detection loss function to l1

c9a28275 · Franck Dary · 006cdfc9 · c9a28275 · c9a28275 · c9a28275
Commit c9a28275 authored Aug 2, 2019 by Franck Dary
--- a/neural_network/include/MLPBase.hpp
+++ b/neural_network/include/MLPBase.hpp
@@ -38,6 +38,7 @@ class MLPBase
  std::vector<FeatureModel::FeatureDescription> fdsContinuous;
  /// @brief gold outputs of the current minibatch.
  std::vector< std::vector<float> > goldsContinuous;
+  int batchSize;

  private :

@@ -144,6 +145,7 @@ class MLPBase
  void printTopology(FILE * output);
  /// @brief Clear the current batch.
  void endOfIteration();
+  void setBatchSize(int batchSize);
 };

 #endif
--- a/neural_network/include/NeuralNetwork.hpp
+++ b/neural_network/include/NeuralNetwork.hpp
@@ -103,6 +103,9 @@ class NeuralNetwork
  /// @brief The dynet model containing the parameters to be trained.
  dynet::ParameterCollection model;

+  /// @brief The size of batches, if not specified, use the global batchsize.
+  int batchSize;
+
  protected :

  /// @brief Set dynet and srand() seeds.
@@ -192,6 +195,12 @@ class NeuralNetwork
  ///
  /// \return The number of input neurons taken by fv.
  static unsigned int featureSize(const FeatureModel::FeatureValue & fv);
+
+  /// @brief Set the batchSize.
+  void setBatchSize(int batchSize);
+
+  /// @brief Get the batchSize.
+  int getBatchSize();
 };

 #endif
--- a/neural_network/src/MLP.cpp
+++ b/neural_network/src/MLP.cpp
@@ -19,6 +19,7 @@ MLP::MLP(const std::string & filename) : mlp("MLP")

 void MLP::init(int nbInputs, const std::string & topology, int nbOutputs)
 {
+  setBatchSize(0);
  mlp.init(model, nbInputs, topology, nbOutputs);
 }

@@ -57,6 +58,7 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd)

 float MLP::update(FeatureModel::FeatureDescription & fd, int gold)
 {
+  mlp.setBatchSize(getBatchSize());
  try
  {
    float loss = mlp.update(fd, gold);
@@ -70,6 +72,7 @@ float MLP::update(FeatureModel::FeatureDescription & fd, int gold)

 float MLP::update(FeatureModel::FeatureDescription & fd, const std::vector<float> & gold)
 {
+  mlp.setBatchSize(getBatchSize());
  try
  {
    float loss = mlp.update(fd, gold);
@@ -83,6 +86,7 @@ float MLP::update(FeatureModel::FeatureDescription & fd, const std::vector<float

 float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold)
 {
+  mlp.setBatchSize(getBatchSize());
  try
  {
    float loss = mlp.getLoss(fd, gold);
@@ -95,6 +99,7 @@ float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold)

 float MLP::getLoss(FeatureModel::FeatureDescription & fd, const std::vector<float> & gold)
 {
+  mlp.setBatchSize(getBatchSize());
  try
  {
    float loss = mlp.getLoss(fd, gold);

--- a/neural_network/src/MLPBase.cpp
+++ b/neural_network/src/MLPBase.cpp
@@ -4,6 +4,7 @@ MLPBase::MLPBase(std::string name)
 {
  this->name = name;
  dropoutActive = true;
+  batchSize = 0;
 }

 void MLPBase::init(dynet::ParameterCollection & model, int nbInputs, const std::string & topology, int nbOutputs)
@@ -94,7 +95,9 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold)
  fdsOneHot.emplace_back(fd);
  goldsOneHot.emplace_back(gold);

-  if ((int)fdsOneHot.size() < ProgramParameters::batchSize)
+  int effectiveBatchSize = batchSize ? batchSize : ProgramParameters::batchSize;
+
+  if ((int)fdsOneHot.size() < effectiveBatchSize)
    throw BatchNotFull();

  std::vector<dynet::Expression> inputs;
@@ -140,7 +143,9 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f
  fdsContinuous.emplace_back(fd);
  goldsContinuous.emplace_back(gold);

-  if ((int)fdsContinuous.size() < ProgramParameters::batchSize)
+  int effectiveBatchSize = batchSize ? batchSize : ProgramParameters::batchSize;
+
+  if ((int)fdsContinuous.size() < effectiveBatchSize)
    throw BatchNotFull();

  std::vector<dynet::Expression> inputs;
@@ -165,7 +170,7 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f
    goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({(unsigned int)gold.size()}), gold));
 
  dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions);
-  batchedLoss = dynet::sum_batches(dynet::squared_distance(output, batchedGold));
+  batchedLoss = dynet::sum_batches(dynet::l1_distance(output, batchedGold));

  cg.backward(batchedLoss);

@@ -182,7 +187,9 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, int gold)
  fdsOneHot.emplace_back(fd);
  goldsOneHot.emplace_back(gold);

-  if ((int)fdsOneHot.size() < ProgramParameters::batchSize)
+  int effectiveBatchSize = batchSize ? batchSize : ProgramParameters::batchSize;
+
+  if ((int)fdsOneHot.size() < effectiveBatchSize)
    throw BatchNotFull();

  std::vector<dynet::Expression> inputs;
@@ -226,7 +233,9 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, const std::vector<
  fdsContinuous.emplace_back(fd);
  goldsContinuous.emplace_back(gold);

-  if ((int)fdsContinuous.size() < ProgramParameters::batchSize)
+  int effectiveBatchSize = batchSize ? batchSize : ProgramParameters::batchSize;
+
+  if ((int)fdsContinuous.size() < effectiveBatchSize)
    throw BatchNotFull();

  std::vector<dynet::Expression> inputs;
@@ -251,7 +260,7 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, const std::vector<
    goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({1,(unsigned int)gold.size()}), gold));
 
  dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions);
-  batchedLoss = dynet::sum_batches(dynet::squared_distance(output, batchedGold));
+  batchedLoss = dynet::sum_batches(dynet::l1_distance(output, batchedGold));

  checkGradients();

@@ -482,3 +491,8 @@ void MLPBase::endOfIteration()
  goldsContinuous.clear();
 }

+void MLPBase::setBatchSize(int batchSize)
+{
+  this->batchSize = batchSize;
+}
+
--- a/neural_network/src/NeuralNetwork.cpp
+++ b/neural_network/src/NeuralNetwork.cpp
@@ -188,3 +188,13 @@ unsigned int NeuralNetwork::featureSize(const FeatureModel::FeatureValue & fv)
  return res;
 }

+void NeuralNetwork::setBatchSize(int batchSize)
+{
+  this->batchSize = batchSize;
+}
+
+int NeuralNetwork::getBatchSize()
+{
+  return batchSize;
+}
+
--- a/transition_machine/include/Classifier.hpp
+++ b/transition_machine/include/Classifier.hpp
@@ -54,6 +54,8 @@ class Classifier
  std::shared_ptr<NeuralNetwork> nn;
  /// @brief A string describing the topology of the underlying neural network.
  std::string topology;
+  /// @brief The size of a batch. If not specified, use the global batchsize.
+  int batchSize;
  /// @brief The oracle being used by this Classifier.
  ///
  /// For Classifier of type Prediction, the Oracle is used in train mode, to find

--- a/transition_machine/src/Classifier.cpp
+++ b/transition_machine/src/Classifier.cpp
@@ -73,6 +73,10 @@ Classifier::Classifier(const std::string & filename, bool trainMode)
    badFormatAndAbort(ERRINFO);

  topology = buffer;
+
+  int batchsizeRead = 0;
+  if(fscanf(fd, "Batchsize : %d\n", &batchsizeRead) == 1)
+    batchSize = batchsizeRead;
 }

 Classifier::Type Classifier::str2type(const std::string & s)
@@ -158,6 +162,8 @@ void Classifier::initClassifier(Config & config)
    nbInputs += NeuralNetwork::featureSize(feat);

  nn->init(nbInputs, topology, nbOutputs);
+  if (batchSize)
+    nn->setBatchSize(batchSize);
 }

 FeatureModel::FeatureDescription Classifier::getFeatureDescription(Config & config)