diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 7534930a2ea9c717c791acd2b266cb2e63dbb3ac..a149a50196c277e2e79c0fa282ce8b735346cdec 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -72,6 +72,7 @@ struct ProgramParameters static float maskRate; static bool featureExtraction; static bool devEvalOnGold; + static bool devLoss; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index 988120f137cdabec72c2887a5f2733e7d2a99541..07be5b4422bb0c37d52a272046e1edcdbcf0add5 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -66,4 +66,5 @@ std::string ProgramParameters::tapeToMask; float ProgramParameters::maskRate; bool ProgramParameters::featureExtraction; bool ProgramParameters::devEvalOnGold; +bool ProgramParameters::devLoss; diff --git a/neural_network/include/GeneticAlgorithm.hpp b/neural_network/include/GeneticAlgorithm.hpp index 0af8e2e103a93a4f041fffbe23aa10e23912d44b..83ce2d1997f3c54414e9ea706a6b086253897c58 100644 --- a/neural_network/include/GeneticAlgorithm.hpp +++ b/neural_network/include/GeneticAlgorithm.hpp @@ -105,6 +105,14 @@ class GeneticAlgorithm : public NeuralNetwork /// @return The loss. float update(FeatureModel::FeatureDescription & fd, int gold) override; + /// @brief Get the loss according to the given gold class. + /// + /// @param fd The input to use. + /// @param gold The gold class of this input. + /// + /// @return The loss. + float getLoss(FeatureModel::FeatureDescription & fd, int gold) override; + /// @brief Save the GeneticAlgorithm to a file. /// /// @param filename The file to write the GeneticAlgorithm to. diff --git a/neural_network/include/MLP.hpp b/neural_network/include/MLP.hpp index ab7b3c8bcbf0246f7f1a83a4de7ff15121be95eb..47bc80e05c7a3e14e571315e180bbc98d83e95a6 100644 --- a/neural_network/include/MLP.hpp +++ b/neural_network/include/MLP.hpp @@ -54,6 +54,13 @@ class MLP : public NeuralNetwork /// /// @return The loss. float update(FeatureModel::FeatureDescription & fd, int gold) override; + /// @brief Get the loss according to the given gold class. + /// + /// @param fd The input to use. + /// @param gold The gold class of this input. + /// + /// @return The loss. + float getLoss(FeatureModel::FeatureDescription & fd, int gold) override; /// @brief Save the MLP to a file. /// /// @param filename The file to write the MLP to. diff --git a/neural_network/include/MLPBase.hpp b/neural_network/include/MLPBase.hpp index 2d047e1056025c65408e51810221b417ac74a531..06c02ad3c7c0dafcef7cbda00e4fd66816f2a6fc 100644 --- a/neural_network/include/MLPBase.hpp +++ b/neural_network/include/MLPBase.hpp @@ -123,6 +123,13 @@ class MLPBase /// /// @return The loss. float update(FeatureModel::FeatureDescription & fd, int gold); + /// @brief Get the loss according to the given gold class. + /// + /// @param fd The input to use. + /// @param gold The gold class of this input. + /// + /// @return The loss. + float getLoss(FeatureModel::FeatureDescription & fd, int gold); /// @brief Print the topology (Layers) of the MLP. /// /// @param output Where the topology will be printed. diff --git a/neural_network/include/NeuralNetwork.hpp b/neural_network/include/NeuralNetwork.hpp index 19751c44bb190da97df7d343682ed794034009cb..4b88540886d481d539518bc09f14243d93ee762a 100644 --- a/neural_network/include/NeuralNetwork.hpp +++ b/neural_network/include/NeuralNetwork.hpp @@ -145,6 +145,14 @@ class NeuralNetwork /// @return The loss. virtual float update(FeatureModel::FeatureDescription & fd, int gold) = 0; + /// @brief Get the loss according to the given gold class. + /// + /// @param fd The input to use. + /// @param gold The gold class of this input. + /// + /// @return The loss. + virtual float getLoss(FeatureModel::FeatureDescription & fd, int gold) = 0; + /// @brief Save the NeuralNetwork to a file. /// /// @param filename The file to write the NeuralNetwork to. diff --git a/neural_network/src/GeneticAlgorithm.cpp b/neural_network/src/GeneticAlgorithm.cpp index a0c971679f35c2ef9402a1e643ee72d3093ba745..6d206e93b81e11345e211c5d72de83026fd835ad 100644 --- a/neural_network/src/GeneticAlgorithm.cpp +++ b/neural_network/src/GeneticAlgorithm.cpp @@ -61,6 +61,16 @@ std::vector<float> GeneticAlgorithm::predict(FeatureModel::FeatureDescription & return prediction; } +float GeneticAlgorithm::getLoss(FeatureModel::FeatureDescription &, int) +{ + float loss = 0.0; + + for (auto & individual : generation) + loss += individual->loss; + + return loss; +} + float GeneticAlgorithm::update(FeatureModel::FeatureDescription & fd, int gold) { bool haveBeenUpdated = false; diff --git a/neural_network/src/MLP.cpp b/neural_network/src/MLP.cpp index c9388422080c59b92c7c4b8821ddc19f8a03eeef..15b5742a874ade9d297051b0dfbe6baf192532d3 100644 --- a/neural_network/src/MLP.cpp +++ b/neural_network/src/MLP.cpp @@ -68,6 +68,11 @@ float MLP::update(FeatureModel::FeatureDescription & fd, int gold) } } +float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold) +{ + return mlp.getLoss(fd, gold); +} + void MLP::save(const std::string & filename) { File * file = new File(filename, "w"); diff --git a/neural_network/src/MLPBase.cpp b/neural_network/src/MLPBase.cpp index 60236bd2870496fe1aa2df6aac57a1925380d712..b88147c23bb6b90ecb2d0f0d0e765341f705883e 100644 --- a/neural_network/src/MLPBase.cpp +++ b/neural_network/src/MLPBase.cpp @@ -143,6 +143,46 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold) return as_scalar(batchedLoss.value()); } +float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, int gold) +{ + std::vector<dynet::Expression> inputs; + std::vector<unsigned int> goldss; + goldss.emplace_back(gold); + dynet::ComputationGraph cg; + + std::vector<dynet::Expression> expressions; + + for (auto & featValue : fd.values) + expressions.emplace_back(NeuralNetwork::featValue2Expression(cg, featValue)); + + dynet::Expression input = dynet::concatenate(expressions); + inputs.emplace_back(input); + + dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs); + dynet::Expression output = run(cg, batchedInput); + dynet::Expression batchedLoss; + + if (ProgramParameters::loss == "neglogsoftmax") + { + batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, goldss)); + } + else if (ProgramParameters::loss == "weighted") + { + batchedLoss = weightedLoss(output, goldss); + } + else if (ProgramParameters::loss == "errorCorrection") + { + batchedLoss = errorCorrectionLoss(cg, output, goldss); + } + else + { + fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str()); + exit(1); + } + + return as_scalar(batchedLoss.value()); +} + void MLPBase::checkGradients() { bool printGradients = false; diff --git a/trainer/include/TrainInfos.hpp b/trainer/include/TrainInfos.hpp index cd4d0b262d8714db93f9dec76b4dbbfaa78b7c9b..2fa6adf4e990e3cb7a4cc1760a081f08acab476c 100644 --- a/trainer/include/TrainInfos.hpp +++ b/trainer/include/TrainInfos.hpp @@ -24,6 +24,8 @@ class TrainInfos std::map< std::string, std::vector<bool> > mustSavePerClassifierPerEpoch; std::map< std::string, std::pair<int,int> > trainCounter; + std::map<std::string, float> trainLossCounter; + std::map<std::string, float> devLossCounter; std::map< std::string, std::pair<int,int> > devCounter; std::map<std::string, bool> topologyPrinted; @@ -42,8 +44,9 @@ class TrainInfos TrainInfos(); void addTrainLoss(const std::string & classifier, float loss); void addDevLoss(const std::string & classifier, float loss); - void addTrainExample(const std::string & classifier); + void addTrainExample(const std::string & classifier, float loss); void addDevExample(const std::string & classifier); + void addDevExample(const std::string & classifier, float loss); void addTrainSuccess(const std::string & classifier); void addDevSuccess(const std::string & classifier); void resetCounters(); diff --git a/trainer/src/TrainInfos.cpp b/trainer/src/TrainInfos.cpp index ee4ee2fed983850f9052ec9b9f9eb28f450bb283..d754cd6577f713d74efe0217f83d6c3046242a7f 100644 --- a/trainer/src/TrainInfos.cpp +++ b/trainer/src/TrainInfos.cpp @@ -155,9 +155,10 @@ float TrainInfos::computeDevScore(const std::string & classifier) return 100.0*devCounter[classifier].first / devCounter[classifier].second; } -void TrainInfos::addTrainExample(const std::string & classifier) +void TrainInfos::addTrainExample(const std::string & classifier, float loss) { trainCounter[classifier].second++; + trainLossCounter[classifier] += loss; } void TrainInfos::addDevExample(const std::string & classifier) @@ -165,6 +166,12 @@ void TrainInfos::addDevExample(const std::string & classifier) devCounter[classifier].second++; } +void TrainInfos::addDevExample(const std::string & classifier, float loss) +{ + devCounter[classifier].second++; + devLossCounter[classifier] += loss; +} + void TrainInfos::addTrainSuccess(const std::string & classifier) { trainCounter[classifier].first++; @@ -184,13 +191,24 @@ void TrainInfos::resetCounters() void TrainInfos::computeTrainScores() { for (auto & it : trainCounter) + { addTrainScore(it.first, computeTrainScore(it.first)); + addTrainLoss(it.first, trainLossCounter[it.first]); + trainLossCounter[it.first] = 0.0; + } } void TrainInfos::computeDevScores() { for (auto & it : devCounter) + { addDevScore(it.first, computeDevScore(it.first)); + if (devLossCounter.count(it.first)) + { + addDevLoss(it.first, devLossCounter[it.first]); + devLossCounter[it.first] = 0.0; + } + } } int TrainInfos::getEpoch() @@ -246,7 +264,9 @@ void TrainInfos::printScores(FILE * output) std::vector<std::string> names; std::vector<std::string> acc; std::vector<std::string> train; + std::vector<std::string> lossTrain; std::vector<std::string> dev; + std::vector<std::string> lossDev; std::vector<std::string> savedStr; for (auto & it : trainScoresPerClassifierPerEpoch) @@ -254,7 +274,9 @@ void TrainInfos::printScores(FILE * output) names.emplace_back(it.first); acc.emplace_back("accuracy"); train.emplace_back(": train(" + float2str(it.second.back(), "%.2f") + "%)"); + lossTrain.emplace_back(trainLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +float2str(trainLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")"); dev.emplace_back(devScoresPerClassifierPerEpoch.empty() ? "" : "dev(" +float2str(devScoresPerClassifierPerEpoch[it.first].back(), "%.2f") + "%)"); + lossDev.emplace_back(devLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +float2str(devLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")"); savedStr.emplace_back(mustSavePerClassifierPerEpoch[it.first].back() ? "SAVED" : ""); } @@ -264,7 +286,7 @@ void TrainInfos::printScores(FILE * output) fprintf(output, "[%s] ", getTime().c_str()); fprintf(output, "Iteration %d/%d : \n", getEpoch(), ProgramParameters::nbIter); - printColumns(output, {names, acc, train, dev, savedStr}); + printColumns(output, {names, acc, train, lossTrain, dev, lossDev, savedStr}); } bool TrainInfos::mustSave(const std::string & classifier) diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index 1fcbe952b6245f1b2d8ef414e44d412ba4a24e46..ed834948f5c28224f2e9d8a6daef7d22a2cc5246 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -82,7 +82,16 @@ void Trainer::computeScoreOnDev() bool pActionIsZeroCost = tm.getCurrentClassifier()->getActionCost(*devConfig, pAction) == 0; - TI.addDevExample(tm.getCurrentClassifier()->name); + if (ProgramParameters::devLoss) + { + float loss = tm.getCurrentClassifier()->getLoss(*devConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); + TI.addDevExample(tm.getCurrentClassifier()->name, loss); + } + else + { + TI.addDevExample(tm.getCurrentClassifier()->name); + } + if (((!ProgramParameters::devEvalOnGold) && pActionIsZeroCost) || (pAction == oAction)) TI.addDevSuccess(tm.getCurrentClassifier()->name); @@ -273,10 +282,11 @@ void Trainer::train() exit(1); } + float loss = 0.0; if (!ProgramParameters::featureExtraction) - tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); + loss = tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); - TI.addTrainExample(tm.getCurrentClassifier()->name); + TI.addTrainExample(tm.getCurrentClassifier()->name, loss); if (pActionIsZeroCost) TI.addTrainSuccess(tm.getCurrentClassifier()->name); diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index e3036854e7a1d555a21cd8a7bbcd4d02d2feb3dc..e15580770a0c21f5871a606485a39f172531b0b1 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -88,6 +88,8 @@ po::options_description getOptionsDescription() ("printTime", "Print time on stderr.") ("featureExtraction", "Use macaon only a feature extractor, print corpus to stdout.") ("devEvalOnGold", "If true, dev accuracy will be computed on gold configurations.") + ("devLoss", po::value<bool>()->default_value(false), + "Compute and print total loss on dev for every epoch.") ("shuffle", po::value<bool>()->default_value(true), "Shuffle examples after each iteration"); @@ -283,6 +285,7 @@ int main(int argc, char * argv[]) ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>(); ProgramParameters::interactive = vm["interactive"].as<bool>(); ProgramParameters::shuffleExamples = vm["shuffle"].as<bool>(); + ProgramParameters::devLoss = vm["devLoss"].as<bool>(); ProgramParameters::randomEmbeddings = vm["randomEmbeddings"].as<bool>(); ProgramParameters::randomParameters = vm["randomParameters"].as<bool>(); ProgramParameters::sequenceDelimiterTape = vm["sequenceDelimiterTape"].as<std::string>(); diff --git a/transition_machine/include/Classifier.hpp b/transition_machine/include/Classifier.hpp index 7c35d048c7c10e105fd3aa6b25f06abe379bc0a3..0ece5edfea89dc02971ded3f3885105a7514fc66 100644 --- a/transition_machine/include/Classifier.hpp +++ b/transition_machine/include/Classifier.hpp @@ -156,6 +156,13 @@ class Classifier /// /// @return The loss. float trainOnExample(Config & config, int gold); + /// @brief Get the loss of the classifier on a training example. + /// + /// @param config The Config to work with. + /// @param gold The gold class of the Config. + /// + /// @return The loss. + float getLoss(Config & config, int gold); /// @brief Get the name of an Action from its index. /// /// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network. diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index 06641a9eb56727d25d5a701e4b2406ea4252630b..6059fbac526f1fcce6dc3e96bcbcf60c87d7d4ea 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -273,6 +273,12 @@ float Classifier::trainOnExample(Config & config, int gold) return nn->update(fd, gold); } +float Classifier::getLoss(Config & config, int gold) +{ + auto & fd = fm->getFeatureDescription(config); + return nn->getLoss(fd, gold); +} + void Classifier::explainCostOfActions(FILE * output, Config & config) { for (Action & a : as->actions)