From 3f47e8a0ce47fccc85c58b7cb3e5b50da8f41ce0 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Sun, 17 Nov 2019 14:45:33 +0100 Subject: [PATCH] Fixed a bug in dev eval for tokenization, and removed program options incompatible with tokenization --- maca_common/include/ProgramParameters.hpp | 2 -- maca_common/src/ProgramParameters.cpp | 2 -- maca_common/src/programOptionsTemplates.cpp | 5 ---- trainer/src/TrainInfos.cpp | 6 ----- trainer/src/Trainer.cpp | 26 ++++----------------- 5 files changed, 4 insertions(+), 37 deletions(-) diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index c2efe89..ec8411c 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -78,8 +78,6 @@ struct ProgramParameters static std::string tapeToMask; static float maskRate; static bool featureExtraction; - static bool devEvalOnGold; - static bool devLoss; static bool randomDebug; static float randomDebugProbability; static bool alwaysSave; diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index fa4ca4c..feedefe 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -72,8 +72,6 @@ int ProgramParameters::dictCapacity; std::string ProgramParameters::tapeToMask; float ProgramParameters::maskRate; bool ProgramParameters::featureExtraction; -bool ProgramParameters::devEvalOnGold; -bool ProgramParameters::devLoss; bool ProgramParameters::randomDebug; float ProgramParameters::randomDebugProbability; bool ProgramParameters::alwaysSave; diff --git a/maca_common/src/programOptionsTemplates.cpp b/maca_common/src/programOptionsTemplates.cpp index d08163d..72045ae 100644 --- a/maca_common/src/programOptionsTemplates.cpp +++ b/maca_common/src/programOptionsTemplates.cpp @@ -102,9 +102,6 @@ po::options_description getTrainOptionsDescription() "The rate of elements of the Tape that will be masked.") ("printTime", "Print time on stderr.") ("featureExtraction", "Use macaon only as a feature extractor, print corpus to stdout.") - ("devEvalOnGold", "If true, dev accuracy will be computed on gold configurations.") - ("devLoss", po::value<bool>()->default_value(false), - "Compute and print total loss on dev for every epoch.") ("shuffle", po::value<bool>()->default_value(true), "Shuffle examples after each iteration"); @@ -296,7 +293,6 @@ void loadTrainProgramParameters(int argc, char * argv[]) ProgramParameters::printEntropy = vm.count("printEntropy") == 0 ? false : true; ProgramParameters::printTime = vm.count("printTime") == 0 ? false : true; ProgramParameters::featureExtraction = vm.count("featureExtraction") == 0 ? false : true; - ProgramParameters::devEvalOnGold = vm.count("devEvalOnGold") == 0 ? false : true; ProgramParameters::trainName = vm["train"].as<std::string>(); ProgramParameters::devName = vm["dev"].as<std::string>(); ProgramParameters::lang = vm["lang"].as<std::string>(); @@ -309,7 +305,6 @@ void loadTrainProgramParameters(int argc, char * argv[]) ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>(); ProgramParameters::interactive = vm["interactive"].as<bool>(); ProgramParameters::shuffleExamples = vm["shuffle"].as<bool>(); - ProgramParameters::devLoss = vm["devLoss"].as<bool>(); ProgramParameters::randomEmbeddings = vm["randomEmbeddings"].as<bool>(); ProgramParameters::randomParameters = vm["randomParameters"].as<bool>(); ProgramParameters::sequenceDelimiterTape = vm["sequenceDelimiterTape"].as<std::string>(); diff --git a/trainer/src/TrainInfos.cpp b/trainer/src/TrainInfos.cpp index ae1d6ee..ab7e106 100644 --- a/trainer/src/TrainInfos.cpp +++ b/trainer/src/TrainInfos.cpp @@ -300,8 +300,6 @@ void TrainInfos::setTopologyPrinted(const std::string & classifier) { topologyPrinted[classifier] = true; trainLossesPerClassifierPerEpoch[classifier].emplace_back(0.0); - if (ProgramParameters::devLoss) - devLossesPerClassifierPerEpoch[classifier].emplace_back(0.0); } void TrainInfos::nextEpoch() @@ -309,11 +307,7 @@ void TrainInfos::nextEpoch() lastEpoch++; saveToFilename(); for (auto & it : topologyPrinted) - { trainLossesPerClassifierPerEpoch[it.first].emplace_back(0.0); - if (ProgramParameters::devLoss) - devLossesPerClassifierPerEpoch[it.first].emplace_back(0.0); - } } void TrainInfos::computeMustSaves() diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index 1c1b0b7..406c33e 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -112,19 +112,12 @@ void Trainer::computeScoreOnDev() auto weightedActions = tm.getCurrentClassifier()->weightActions(*devConfig); std::string pAction = ""; - std::string oAction = ""; for (auto & it : weightedActions) if (it.first) { - if (pAction.empty()) - pAction = it.second.second; - - if (tm.getCurrentClassifier()->getActionCost(*devConfig, it.second.second) == 0) - { - oAction = it.second.second; - break; - } + pAction = it.second.second; + break; } if (pAction.empty()) @@ -134,18 +127,7 @@ void Trainer::computeScoreOnDev() break; } - if (ProgramParameters::devLoss) - { - float loss = tm.getCurrentClassifier()->getLoss(*devConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); - TI.addDevLoss(tm.getCurrentClassifier()->name, loss); - } - - std::string actionName; - - if (ProgramParameters::devEvalOnGold) - actionName = oAction; - else - actionName = pAction; + std::string actionName = pAction; Action * action = tm.getCurrentClassifier()->getAction(actionName); @@ -158,7 +140,7 @@ void Trainer::computeScoreOnDev() TransitionMachine::Transition * transition = tm.getTransition(actionName); action->setInfos(tm.getCurrentClassifier()->name); - devConfig->addToActionsHistory(tm.getCurrentClassifier()->name, actionName, tm.getCurrentClassifier()->getActionCost(*devConfig, actionName)); + devConfig->addToActionsHistory(tm.getCurrentClassifier()->name, actionName, 0); action->apply(*devConfig); tm.takeTransition(transition); -- GitLab