From 8118cf69ca1caa62062f98688152c05c09ecb03d Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Fri, 22 Mar 2019 20:52:29 +0100
Subject: [PATCH] Added featureExtraction option

---
 maca_common/include/ProgramParameters.hpp   |  1 +
 maca_common/src/ProgramParameters.cpp       |  1 +
 trainer/src/Trainer.cpp                     | 48 ++++++++++++++-------
 trainer/src/macaon_train.cpp                |  4 +-
 transition_machine/include/Classifier.hpp   |  2 +
 transition_machine/include/FeatureModel.hpp |  4 ++
 transition_machine/src/Classifier.cpp       |  5 +++
 transition_machine/src/FeatureModel.cpp     | 14 ++++++
 8 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp
index 07f5455..274d397 100644
--- a/maca_common/include/ProgramParameters.hpp
+++ b/maca_common/include/ProgramParameters.hpp
@@ -70,6 +70,7 @@ struct ProgramParameters
   static bool printOutputEntropy;
   static std::string tapeToMask;
   static float maskRate;
+  static bool featureExtraction;
 
   private :
 
diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp
index 4fab231..ac15fa8 100644
--- a/maca_common/src/ProgramParameters.cpp
+++ b/maca_common/src/ProgramParameters.cpp
@@ -64,4 +64,5 @@ bool ProgramParameters::printOutputEntropy;
 int ProgramParameters::dictCapacity;
 std::string ProgramParameters::tapeToMask;
 float ProgramParameters::maskRate;
+bool ProgramParameters::featureExtraction;
 
diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp
index bb8ff40..cf0c844 100644
--- a/trainer/src/Trainer.cpp
+++ b/trainer/src/Trainer.cpp
@@ -52,7 +52,7 @@ void Trainer::computeScoreOnDev()
     else
     {
       // Print current iter advancement in percentage
-      if (ProgramParameters::interactive)
+      if (ProgramParameters::interactive && !ProgramParameters::featureExtraction)
       {
         int totalSize = ProgramParameters::devTapeSize;
         int steps = devConfig->getHead();
@@ -200,7 +200,7 @@ void Trainer::train()
         }
 
         // Print current iter advancement in percentage
-        if (ProgramParameters::interactive)
+        if (ProgramParameters::interactive && !ProgramParameters::featureExtraction)
         {
           int totalSize = ProgramParameters::iterationSize == -1 ? ProgramParameters::tapeSize : ProgramParameters::iterationSize;
           int steps = ProgramParameters::iterationSize == -1 ? trainConfig.getHead() : nbSteps;
@@ -211,27 +211,35 @@ void Trainer::train()
           }
         }
 
-        auto weightedActions = tm.getCurrentClassifier()->weightActions(trainConfig);
         std::string pAction = "";
         std::string oAction = "";
-
         bool pActionIsZeroCost = false;
 
-        for (auto & it : weightedActions)
-          if (it.first)
-          {
-            if (pAction == "")
-              pAction = it.second.second;
+        Classifier::WeightedActions weightedActions;
+        if (!ProgramParameters::featureExtraction)
+        {
+          weightedActions = tm.getCurrentClassifier()->weightActions(trainConfig);
 
-            if (tm.getCurrentClassifier()->getActionCost(trainConfig, it.second.second) == 0)
+          for (auto & it : weightedActions)
+            if (it.first)
             {
-              oAction = it.second.second;
-              break;
+              if (pAction == "")
+                pAction = it.second.second;
+
+              if (tm.getCurrentClassifier()->getActionCost(trainConfig, it.second.second) == 0)
+              {
+                oAction = it.second.second;
+                break;
+              }
             }
-          }
 
-        if (pAction == oAction)
-          pActionIsZeroCost = true;
+          if (pAction == oAction)
+            pActionIsZeroCost = true;
+        }
+        else
+        {
+          oAction = tm.getCurrentClassifier()->getZeroCostActions(trainConfig)[0];
+        }
 
         if (oAction.empty())
           oAction = tm.getCurrentClassifier()->getDefaultAction();
@@ -252,7 +260,8 @@ void Trainer::train()
           exit(1);
         }
 
-        tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction));
+        if (!ProgramParameters::featureExtraction)
+          tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction));
 
         TI.addTrainExample(tm.getCurrentClassifier()->name);
         if (pActionIsZeroCost)
@@ -262,6 +271,13 @@ void Trainer::train()
 
         std::string actionName = "";
 
+        //ici
+        if (ProgramParameters::featureExtraction)
+        {
+          auto features = tm.getCurrentClassifier()->getFeatureModel()->getFeatureDescription(trainConfig).featureValues();
+          fprintf(stdout, "%s\t%s\n", oAction.c_str(), features.c_str());
+        }
+
         if (TI.getEpoch() >= k && choiceWithProbability(ProgramParameters::dynamicProbability))
         {
           actionName = pAction;
diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp
index 6062288..7885ac4 100644
--- a/trainer/src/macaon_train.cpp
+++ b/trainer/src/macaon_train.cpp
@@ -85,7 +85,8 @@ po::options_description getOptionsDescription()
       "The name of the Tape for which some of the elements will be masked.")
     ("maskRate", po::value<float>()->default_value(0.0),
       "The rate of elements of the Tape that will be masked.")
-    ("printTime", "Print time on stderr")
+    ("printTime", "Print time on stderr.")
+    ("featureExtraction", "Use macaon only a feature extractor, print corpus to stdout.")
     ("shuffle", po::value<bool>()->default_value(true),
       "Shuffle examples after each iteration");
 
@@ -268,6 +269,7 @@ int main(int argc, char * argv[])
   ProgramParameters::debug = vm.count("debug") == 0 ? false : true;
   ProgramParameters::printEntropy = vm.count("printEntropy") == 0 ? false : true;
   ProgramParameters::printTime = vm.count("printTime") == 0 ? false : true;
+  ProgramParameters::featureExtraction = vm.count("featureExtraction") == 0 ? false : true;
   ProgramParameters::trainName = vm["train"].as<std::string>();
   ProgramParameters::devName = vm["dev"].as<std::string>();
   ProgramParameters::lang = vm["lang"].as<std::string>();
diff --git a/transition_machine/include/Classifier.hpp b/transition_machine/include/Classifier.hpp
index 338325e..7c35d04 100644
--- a/transition_machine/include/Classifier.hpp
+++ b/transition_machine/include/Classifier.hpp
@@ -197,6 +197,8 @@ class Classifier
   ///
   /// @return The number of actions.
   unsigned int getNbActions();
+  /// @brief Get a pointer to the FeatureModel.
+  FeatureModel * getFeatureModel();
 };
 
 #endif
diff --git a/transition_machine/include/FeatureModel.hpp b/transition_machine/include/FeatureModel.hpp
index f83c8d2..052caf8 100644
--- a/transition_machine/include/FeatureModel.hpp
+++ b/transition_machine/include/FeatureModel.hpp
@@ -64,6 +64,10 @@ class FeatureModel
     ///
     /// @return The string representing this FeatureDescription
     std::string toString();
+    /// @brief Return a string representing the values of the features
+    ///
+    /// @return The string representing the values of the features
+    std::string featureValues();
   };
 
   private :
diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp
index b5999d4..d1945e7 100644
--- a/transition_machine/src/Classifier.cpp
+++ b/transition_machine/src/Classifier.cpp
@@ -328,3 +328,8 @@ unsigned int Classifier::getNbActions()
   return as->size();
 }
 
+FeatureModel * Classifier::getFeatureModel()
+{
+  return fm.get();
+}
+
diff --git a/transition_machine/src/FeatureModel.cpp b/transition_machine/src/FeatureModel.cpp
index c65b6b4..ec6ebc3 100644
--- a/transition_machine/src/FeatureModel.cpp
+++ b/transition_machine/src/FeatureModel.cpp
@@ -133,3 +133,17 @@ FeatureModel::FeatureValue::FeatureValue()
 {
 }
 
+std::string FeatureModel::FeatureDescription::featureValues()
+{
+  std::string res;
+
+  for (auto & feature : values)
+    for (auto & value : feature.values)
+      res += value + "\t";
+
+  if (!res.empty())
+    res.pop_back();
+
+  return res;
+}
+
-- 
GitLab