From 7438dcd363b5af387ddcf5b1078da3d3feea8e3e Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Tue, 16 Jul 2019 15:36:45 +0200
Subject: [PATCH] Added noNeuralNetwork option to macaon_decode and added size
 limit to the stack

---
 decoder/src/Decoder.cpp                     |  6 ++++++
 decoder/src/macaon_decode.cpp               |  5 +++++
 maca_common/include/ProgramParameters.hpp   |  1 +
 maca_common/src/ProgramParameters.cpp       |  1 +
 maca_common/src/programOptionsTemplates.cpp |  3 +++
 transition_machine/src/ActionBank.cpp       |  4 ++--
 transition_machine/src/Classifier.cpp       | 16 +++++++++-------
 7 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/decoder/src/Decoder.cpp b/decoder/src/Decoder.cpp
index 3ca314a..5766d64 100644
--- a/decoder/src/Decoder.cpp
+++ b/decoder/src/Decoder.cpp
@@ -83,6 +83,12 @@ void printDebugInfos(FILE * output, Config & config, TransitionMachine & tm, Cla
 
 std::pair<float,std::string> getClassifierAction(Config & config, Classifier::WeightedActions & weightedActions, Classifier * classifier, unsigned int index)
 {
+    if (weightedActions.empty())
+    {
+      fprintf(stderr, "ERROR (%s) : weightedActions is empty. Aborting.\n", ERRINFO);
+      exit(1);
+    }
+
     std::string & predictedAction = weightedActions[0].second.second;
     float proba = weightedActions[0].second.first;
     Action * action = classifier->getAction(predictedAction);
diff --git a/decoder/src/macaon_decode.cpp b/decoder/src/macaon_decode.cpp
index a253cc0..dbe947f 100644
--- a/decoder/src/macaon_decode.cpp
+++ b/decoder/src/macaon_decode.cpp
@@ -39,6 +39,7 @@ po::options_description getOptionsDescription()
     ("debug,d", "Print infos on stderr")
     ("delayedOutput", "Print the output only at the end")
     ("showActions", "Print actions predicted by each classifier")
+    ("noNeuralNetwork", "Don't use any neural network, useful to speed up debug")
     ("dicts", po::value<std::string>()->default_value(""),
       "The .dict file describing all the dictionaries to be used in the experiement. By default the filename specified in the .tm file will be used")
     ("featureModels", po::value<std::string>()->default_value(""),
@@ -55,6 +56,8 @@ po::options_description getOptionsDescription()
       "The number of lines of input that will be read and stored in memory at once.")
     ("dictCapacity", po::value<int>()->default_value(50000),
       "The maximal size of each Dict (number of differents embeddings).")
+    ("maxStackSize", po::value<int>()->default_value(200),
+      "The maximal size of the stack (dependency parsing).")
     ("interactive", po::value<bool>()->default_value(true),
       "Is the shell interactive ? Display advancement informations")
     ("tapeToMask", po::value<std::string>()->default_value("FORM"),
@@ -143,6 +146,7 @@ int main(int argc, char * argv[])
   ProgramParameters::debug = vm.count("debug") == 0 ? false : true;
   ProgramParameters::delayedOutput = vm.count("delayedOutput") == 0 ? false : true;
   ProgramParameters::showActions = vm.count("showActions") == 0 ? false : true;
+  ProgramParameters::noNeuralNetwork = vm.count("noNeuralNetwork") == 0 ? false : true;
   ProgramParameters::interactive = vm["interactive"].as<bool>();
   ProgramParameters::errorAnalysis = vm.count("errorAnalysis") == 0 ? false : true;
   ProgramParameters::nbErrorsToShow = vm["nbErrorsToShow"].as<int>();
@@ -160,6 +164,7 @@ int main(int argc, char * argv[])
   if (ProgramParameters::readSize == 0)
     ProgramParameters::readSize = ProgramParameters::tapeSize;
   ProgramParameters::dictCapacity = vm["dictCapacity"].as<int>();
+  ProgramParameters::maxStackSize = vm["maxStackSize"].as<int>();
   ProgramParameters::beamSize = vm["beamSize"].as<int>();
   ProgramParameters::nbChilds = vm["nbChilds"].as<int>();
   ProgramParameters::tapeToMask = vm["tapeToMask"].as<std::string>();
diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp
index 8d6df77..8cd22a7 100644
--- a/maca_common/include/ProgramParameters.hpp
+++ b/maca_common/include/ProgramParameters.hpp
@@ -79,6 +79,7 @@ struct ProgramParameters
   static bool noNeuralNetwork;
   static bool showActions;
   static bool delayedOutput;
+  static int maxStackSize;
 
   private :
 
diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp
index e1a8721..c6fc86f 100644
--- a/maca_common/src/ProgramParameters.cpp
+++ b/maca_common/src/ProgramParameters.cpp
@@ -73,4 +73,5 @@ bool ProgramParameters::alwaysSave;
 bool ProgramParameters::noNeuralNetwork;
 bool ProgramParameters::showActions;
 bool ProgramParameters::delayedOutput;
+int ProgramParameters::maxStackSize;
 
diff --git a/maca_common/src/programOptionsTemplates.cpp b/maca_common/src/programOptionsTemplates.cpp
index 85eabc5..0e97371 100644
--- a/maca_common/src/programOptionsTemplates.cpp
+++ b/maca_common/src/programOptionsTemplates.cpp
@@ -87,6 +87,8 @@ po::options_description getTrainOptionsDescription()
       "The size of each minibatch (in number of taining examples)")
     ("dictCapacity", po::value<int>()->default_value(50000),
       "The maximal size of each Dict (number of differents embeddings).")
+    ("maxStackSize", po::value<int>()->default_value(200),
+      "The maximal size of the stack (transition based parsing).")
     ("tapeToMask", po::value<std::string>()->default_value("FORM"),
       "The name of the Tape for which some of the elements will be masked.")
     ("maskRate", po::value<float>()->default_value(0.0),
@@ -284,6 +286,7 @@ void loadTrainProgramParameters(int argc, char * argv[])
   ProgramParameters::seed = vm["seed"].as<int>();
   ProgramParameters::batchSize = vm["batchSize"].as<int>();
   ProgramParameters::dictCapacity = vm["dictCapacity"].as<int>();
+  ProgramParameters::maxStackSize = vm["maxStackSize"].as<int>();
   ProgramParameters::nbTrain = vm["nbTrain"].as<int>();
   ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>();
   ProgramParameters::interactive = vm["interactive"].as<bool>();
diff --git a/transition_machine/src/ActionBank.cpp b/transition_machine/src/ActionBank.cpp
index cadee4f..e255379 100644
--- a/transition_machine/src/ActionBank.cpp
+++ b/transition_machine/src/ActionBank.cpp
@@ -177,7 +177,7 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na
     auto undo = [](Config & c, Action::BasicAction &)
       {c.stackPop();};
     auto appliable = [](Config & c, Action::BasicAction &)
-      {return !c.endOfTapes();};
+      {return !(c.stackSize() >= ProgramParameters::maxStackSize) && (!c.endOfTapes());};
     Action::BasicAction basicAction =
       {Action::BasicAction::Type::Push, "", apply, undo, appliable};
 
@@ -323,7 +323,7 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na
       };
     auto appliable3 = [](Config & c, Action::BasicAction &)
       {
-        return !c.isFinal() && !c.endOfTapes();
+        return (!c.isFinal()) && (!c.endOfTapes()) && !(c.stackSize() >= ProgramParameters::maxStackSize);
       };
     Action::BasicAction basicAction3 =
       {Action::BasicAction::Type::Push, b1, apply3, undo3, appliable3};
diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp
index 319c5ac..89f9a93 100644
--- a/transition_machine/src/Classifier.cpp
+++ b/transition_machine/src/Classifier.cpp
@@ -94,16 +94,18 @@ Classifier::WeightedActions Classifier::weightActions(Config & config)
 {
   WeightedActions result;
 
-  if (ProgramParameters::noNeuralNetwork)
+  if(type == Type::Prediction)
   {
-    for (unsigned int i = 0; i < as->actions.size(); i++)
-      result.emplace_back(as->actions[i].appliable(config), std::pair<float, std::string>(1.0, as->actions[i].name));
+    if (ProgramParameters::noNeuralNetwork)
+    {
+      for (unsigned int i = 0; i < as->actions.size(); i++)
+        result.emplace_back(as->actions[i].appliable(config), std::pair<float, std::string>(1.0, as->actions[i].name));
 
-    return result;
-  }
+      std::random_shuffle(result.begin(), result.end());
+
+      return result;
+    }
 
-  if(type == Type::Prediction)
-  {
     initClassifier(config);
 
     auto & fd = fm->getFeatureDescription(config);
-- 
GitLab