From 8118cf69ca1caa62062f98688152c05c09ecb03d Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 22 Mar 2019 20:52:29 +0100 Subject: [PATCH] Added featureExtraction option --- maca_common/include/ProgramParameters.hpp | 1 + maca_common/src/ProgramParameters.cpp | 1 + trainer/src/Trainer.cpp | 48 ++++++++++++++------- trainer/src/macaon_train.cpp | 4 +- transition_machine/include/Classifier.hpp | 2 + transition_machine/include/FeatureModel.hpp | 4 ++ transition_machine/src/Classifier.cpp | 5 +++ transition_machine/src/FeatureModel.cpp | 14 ++++++ 8 files changed, 62 insertions(+), 17 deletions(-) diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 07f5455..274d397 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -70,6 +70,7 @@ struct ProgramParameters static bool printOutputEntropy; static std::string tapeToMask; static float maskRate; + static bool featureExtraction; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index 4fab231..ac15fa8 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -64,4 +64,5 @@ bool ProgramParameters::printOutputEntropy; int ProgramParameters::dictCapacity; std::string ProgramParameters::tapeToMask; float ProgramParameters::maskRate; +bool ProgramParameters::featureExtraction; diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index bb8ff40..cf0c844 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -52,7 +52,7 @@ void Trainer::computeScoreOnDev() else { // Print current iter advancement in percentage - if (ProgramParameters::interactive) + if (ProgramParameters::interactive && !ProgramParameters::featureExtraction) { int totalSize = ProgramParameters::devTapeSize; int steps = devConfig->getHead(); @@ -200,7 +200,7 @@ void Trainer::train() } // Print current iter advancement in percentage - if (ProgramParameters::interactive) + if (ProgramParameters::interactive && !ProgramParameters::featureExtraction) { int totalSize = ProgramParameters::iterationSize == -1 ? ProgramParameters::tapeSize : ProgramParameters::iterationSize; int steps = ProgramParameters::iterationSize == -1 ? trainConfig.getHead() : nbSteps; @@ -211,27 +211,35 @@ void Trainer::train() } } - auto weightedActions = tm.getCurrentClassifier()->weightActions(trainConfig); std::string pAction = ""; std::string oAction = ""; - bool pActionIsZeroCost = false; - for (auto & it : weightedActions) - if (it.first) - { - if (pAction == "") - pAction = it.second.second; + Classifier::WeightedActions weightedActions; + if (!ProgramParameters::featureExtraction) + { + weightedActions = tm.getCurrentClassifier()->weightActions(trainConfig); - if (tm.getCurrentClassifier()->getActionCost(trainConfig, it.second.second) == 0) + for (auto & it : weightedActions) + if (it.first) { - oAction = it.second.second; - break; + if (pAction == "") + pAction = it.second.second; + + if (tm.getCurrentClassifier()->getActionCost(trainConfig, it.second.second) == 0) + { + oAction = it.second.second; + break; + } } - } - if (pAction == oAction) - pActionIsZeroCost = true; + if (pAction == oAction) + pActionIsZeroCost = true; + } + else + { + oAction = tm.getCurrentClassifier()->getZeroCostActions(trainConfig)[0]; + } if (oAction.empty()) oAction = tm.getCurrentClassifier()->getDefaultAction(); @@ -252,7 +260,8 @@ void Trainer::train() exit(1); } - tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); + if (!ProgramParameters::featureExtraction) + tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); TI.addTrainExample(tm.getCurrentClassifier()->name); if (pActionIsZeroCost) @@ -262,6 +271,13 @@ void Trainer::train() std::string actionName = ""; + //ici + if (ProgramParameters::featureExtraction) + { + auto features = tm.getCurrentClassifier()->getFeatureModel()->getFeatureDescription(trainConfig).featureValues(); + fprintf(stdout, "%s\t%s\n", oAction.c_str(), features.c_str()); + } + if (TI.getEpoch() >= k && choiceWithProbability(ProgramParameters::dynamicProbability)) { actionName = pAction; diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 6062288..7885ac4 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -85,7 +85,8 @@ po::options_description getOptionsDescription() "The name of the Tape for which some of the elements will be masked.") ("maskRate", po::value<float>()->default_value(0.0), "The rate of elements of the Tape that will be masked.") - ("printTime", "Print time on stderr") + ("printTime", "Print time on stderr.") + ("featureExtraction", "Use macaon only a feature extractor, print corpus to stdout.") ("shuffle", po::value<bool>()->default_value(true), "Shuffle examples after each iteration"); @@ -268,6 +269,7 @@ int main(int argc, char * argv[]) ProgramParameters::debug = vm.count("debug") == 0 ? false : true; ProgramParameters::printEntropy = vm.count("printEntropy") == 0 ? false : true; ProgramParameters::printTime = vm.count("printTime") == 0 ? false : true; + ProgramParameters::featureExtraction = vm.count("featureExtraction") == 0 ? false : true; ProgramParameters::trainName = vm["train"].as<std::string>(); ProgramParameters::devName = vm["dev"].as<std::string>(); ProgramParameters::lang = vm["lang"].as<std::string>(); diff --git a/transition_machine/include/Classifier.hpp b/transition_machine/include/Classifier.hpp index 338325e..7c35d04 100644 --- a/transition_machine/include/Classifier.hpp +++ b/transition_machine/include/Classifier.hpp @@ -197,6 +197,8 @@ class Classifier /// /// @return The number of actions. unsigned int getNbActions(); + /// @brief Get a pointer to the FeatureModel. + FeatureModel * getFeatureModel(); }; #endif diff --git a/transition_machine/include/FeatureModel.hpp b/transition_machine/include/FeatureModel.hpp index f83c8d2..052caf8 100644 --- a/transition_machine/include/FeatureModel.hpp +++ b/transition_machine/include/FeatureModel.hpp @@ -64,6 +64,10 @@ class FeatureModel /// /// @return The string representing this FeatureDescription std::string toString(); + /// @brief Return a string representing the values of the features + /// + /// @return The string representing the values of the features + std::string featureValues(); }; private : diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index b5999d4..d1945e7 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -328,3 +328,8 @@ unsigned int Classifier::getNbActions() return as->size(); } +FeatureModel * Classifier::getFeatureModel() +{ + return fm.get(); +} + diff --git a/transition_machine/src/FeatureModel.cpp b/transition_machine/src/FeatureModel.cpp index c65b6b4..ec6ebc3 100644 --- a/transition_machine/src/FeatureModel.cpp +++ b/transition_machine/src/FeatureModel.cpp @@ -133,3 +133,17 @@ FeatureModel::FeatureValue::FeatureValue() { } +std::string FeatureModel::FeatureDescription::featureValues() +{ + std::string res; + + for (auto & feature : values) + for (auto & value : feature.values) + res += value + "\t"; + + if (!res.empty()) + res.pop_back(); + + return res; +} + -- GitLab