diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 07f54550a207903db231e58bc10dfe93018c7039..274d3970a2ba18163dd504e96ae5245aba13462f 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -70,6 +70,7 @@ struct ProgramParameters static bool printOutputEntropy; static std::string tapeToMask; static float maskRate; + static bool featureExtraction; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index 4fab2314fb8dd9690e2b8ecb33fa3a3171728084..ac15fa8de85c340709f1b62400de2e7858553f2f 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -64,4 +64,5 @@ bool ProgramParameters::printOutputEntropy; int ProgramParameters::dictCapacity; std::string ProgramParameters::tapeToMask; float ProgramParameters::maskRate; +bool ProgramParameters::featureExtraction; diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index bb8ff4030165efd99ce6716851c5be13ab27a67c..cf0c8449326f64b5cd70744bc1ba985c604d1a99 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -52,7 +52,7 @@ void Trainer::computeScoreOnDev() else { // Print current iter advancement in percentage - if (ProgramParameters::interactive) + if (ProgramParameters::interactive && !ProgramParameters::featureExtraction) { int totalSize = ProgramParameters::devTapeSize; int steps = devConfig->getHead(); @@ -200,7 +200,7 @@ void Trainer::train() } // Print current iter advancement in percentage - if (ProgramParameters::interactive) + if (ProgramParameters::interactive && !ProgramParameters::featureExtraction) { int totalSize = ProgramParameters::iterationSize == -1 ? ProgramParameters::tapeSize : ProgramParameters::iterationSize; int steps = ProgramParameters::iterationSize == -1 ? trainConfig.getHead() : nbSteps; @@ -211,27 +211,35 @@ void Trainer::train() } } - auto weightedActions = tm.getCurrentClassifier()->weightActions(trainConfig); std::string pAction = ""; std::string oAction = ""; - bool pActionIsZeroCost = false; - for (auto & it : weightedActions) - if (it.first) - { - if (pAction == "") - pAction = it.second.second; + Classifier::WeightedActions weightedActions; + if (!ProgramParameters::featureExtraction) + { + weightedActions = tm.getCurrentClassifier()->weightActions(trainConfig); - if (tm.getCurrentClassifier()->getActionCost(trainConfig, it.second.second) == 0) + for (auto & it : weightedActions) + if (it.first) { - oAction = it.second.second; - break; + if (pAction == "") + pAction = it.second.second; + + if (tm.getCurrentClassifier()->getActionCost(trainConfig, it.second.second) == 0) + { + oAction = it.second.second; + break; + } } - } - if (pAction == oAction) - pActionIsZeroCost = true; + if (pAction == oAction) + pActionIsZeroCost = true; + } + else + { + oAction = tm.getCurrentClassifier()->getZeroCostActions(trainConfig)[0]; + } if (oAction.empty()) oAction = tm.getCurrentClassifier()->getDefaultAction(); @@ -252,7 +260,8 @@ void Trainer::train() exit(1); } - tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); + if (!ProgramParameters::featureExtraction) + tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); TI.addTrainExample(tm.getCurrentClassifier()->name); if (pActionIsZeroCost) @@ -262,6 +271,13 @@ void Trainer::train() std::string actionName = ""; + //ici + if (ProgramParameters::featureExtraction) + { + auto features = tm.getCurrentClassifier()->getFeatureModel()->getFeatureDescription(trainConfig).featureValues(); + fprintf(stdout, "%s\t%s\n", oAction.c_str(), features.c_str()); + } + if (TI.getEpoch() >= k && choiceWithProbability(ProgramParameters::dynamicProbability)) { actionName = pAction; diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 60622887416b509bf37be04b254478455b6911ff..7885ac4ce7846a636006618aa4b89c41ff350c23 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -85,7 +85,8 @@ po::options_description getOptionsDescription() "The name of the Tape for which some of the elements will be masked.") ("maskRate", po::value<float>()->default_value(0.0), "The rate of elements of the Tape that will be masked.") - ("printTime", "Print time on stderr") + ("printTime", "Print time on stderr.") + ("featureExtraction", "Use macaon only a feature extractor, print corpus to stdout.") ("shuffle", po::value<bool>()->default_value(true), "Shuffle examples after each iteration"); @@ -268,6 +269,7 @@ int main(int argc, char * argv[]) ProgramParameters::debug = vm.count("debug") == 0 ? false : true; ProgramParameters::printEntropy = vm.count("printEntropy") == 0 ? false : true; ProgramParameters::printTime = vm.count("printTime") == 0 ? false : true; + ProgramParameters::featureExtraction = vm.count("featureExtraction") == 0 ? false : true; ProgramParameters::trainName = vm["train"].as<std::string>(); ProgramParameters::devName = vm["dev"].as<std::string>(); ProgramParameters::lang = vm["lang"].as<std::string>(); diff --git a/transition_machine/include/Classifier.hpp b/transition_machine/include/Classifier.hpp index 338325e7af07ebde1b3fc7c80ac94ddf26a76b7f..7c35d048c7c10e105fd3aa6b25f06abe379bc0a3 100644 --- a/transition_machine/include/Classifier.hpp +++ b/transition_machine/include/Classifier.hpp @@ -197,6 +197,8 @@ class Classifier /// /// @return The number of actions. unsigned int getNbActions(); + /// @brief Get a pointer to the FeatureModel. + FeatureModel * getFeatureModel(); }; #endif diff --git a/transition_machine/include/FeatureModel.hpp b/transition_machine/include/FeatureModel.hpp index f83c8d248717f798d806149b0619263e840647bf..052caf8fd04e066234797048652beed02dd07d4b 100644 --- a/transition_machine/include/FeatureModel.hpp +++ b/transition_machine/include/FeatureModel.hpp @@ -64,6 +64,10 @@ class FeatureModel /// /// @return The string representing this FeatureDescription std::string toString(); + /// @brief Return a string representing the values of the features + /// + /// @return The string representing the values of the features + std::string featureValues(); }; private : diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index b5999d48ff305833e799bfd5362b0a71d195febb..d1945e79a158e9a6c4497dce67eebc63adc9c6e2 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -328,3 +328,8 @@ unsigned int Classifier::getNbActions() return as->size(); } +FeatureModel * Classifier::getFeatureModel() +{ + return fm.get(); +} + diff --git a/transition_machine/src/FeatureModel.cpp b/transition_machine/src/FeatureModel.cpp index c65b6b4220f5f8cc9f43dad96c9575a3717c0bc4..ec6ebc3a600b6a7ce55e43b9ee9828124484776d 100644 --- a/transition_machine/src/FeatureModel.cpp +++ b/transition_machine/src/FeatureModel.cpp @@ -133,3 +133,17 @@ FeatureModel::FeatureValue::FeatureValue() { } +std::string FeatureModel::FeatureDescription::featureValues() +{ + std::string res; + + for (auto & feature : values) + for (auto & value : feature.values) + res += value + "\t"; + + if (!res.empty()) + res.pop_back(); + + return res; +} +