From 1f6407c2bf3a8edf1528b91b362cb29c61fa28be Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@etu.univ-amu.fr> Date: Tue, 11 Dec 2018 17:21:10 +0100 Subject: [PATCH] Added a way to change the loss function used during training --- MLP/include/MLP.hpp | 13 +++- MLP/src/MLP.cpp | 68 +++++++++++++++++-- .../src/macaon_train_error_detector.cpp | 14 ++-- maca_common/include/ProgramParameters.hpp | 1 + maca_common/src/ProgramParameters.cpp | 1 + trainer/src/macaon_train.cpp | 3 + transition_machine/include/Classifier.hpp | 4 +- transition_machine/src/Classifier.cpp | 4 +- 8 files changed, 96 insertions(+), 12 deletions(-) diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp index 00e55f6..958741e 100644 --- a/MLP/include/MLP.hpp +++ b/MLP/include/MLP.hpp @@ -169,6 +169,15 @@ class MLP /// /// Must be called only once, and before any call to dynet functions. void initDynet(); + /// @brief Get the loss expression + /// + /// @param output Output from the neural network + /// @param oneHotGolds Indexes of gold classes (batched form) + /// + /// @return The loss expression + dynet::Expression weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); + + dynet::Expression errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); public : @@ -205,7 +214,9 @@ class MLP /// /// @param fd The input to use. /// @param gold The gold class of this input. - void update(FeatureModel::FeatureDescription & fd, int gold); + /// + /// @return The loss. + float update(FeatureModel::FeatureDescription & fd, int gold); /// @brief Save the MLP to a file. /// diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp index 409b054..e25b86a 100644 --- a/MLP/src/MLP.cpp +++ b/MLP/src/MLP.cpp @@ -202,13 +202,13 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd) return as_vector(cg.forward(output)); } -void MLP::update(FeatureModel::FeatureDescription & fd, int gold) +float MLP::update(FeatureModel::FeatureDescription & fd, int gold) { fds.emplace_back(fd); golds.emplace_back(gold); if ((int)fds.size() < ProgramParameters::batchSize) - return; + return 0.0; std::vector<dynet::Expression> inputs; dynet::ComputationGraph cg; @@ -226,13 +226,73 @@ void MLP::update(FeatureModel::FeatureDescription & fd, int gold) dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs); dynet::Expression output = run(cg, batchedInput); - dynet::Expression batchedLoss = pickneglogsoftmax(output, golds); + dynet::Expression batchedLoss; + + if (ProgramParameters::loss == "neglogsoftmax") + { + batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, golds)); + } + else if (ProgramParameters::loss == "weighted") + { + batchedLoss = weightedLoss(output, golds); + } + else if (ProgramParameters::loss == "errorCorrection") + { + batchedLoss = errorCorrectionLoss(cg,output, golds); + } + else + { + fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str()); + exit(1); + } - cg.backward(dynet::sum_batches(batchedLoss)); + cg.backward(batchedLoss); trainer->update(); fds.clear(); golds.clear(); + + return as_scalar(batchedLoss.value()); +} + +dynet::Expression MLP::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) +{ + std::vector<dynet::Expression> lossExpr; + for (unsigned int i = 0; i < output.dim().batch_elems(); i++) + { + lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i])); + auto outputVect = dynet::as_vector(dynet::pick_batch_elem(output,i).value()); + int prediction = 0; + for (unsigned int j = 1; j < outputVect.size(); j++) + if(outputVect[j] > outputVect[prediction]) + prediction = j; + int gold = oneHotGolds[i]; + if (prediction == 1 && gold == 0) + { + lossExpr.back() = lossExpr.back() * 100.0; + } + } + + return dynet::sum(lossExpr); +} + +dynet::Expression MLP::errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) +{ + std::vector<dynet::Expression> lossExpr; + for (unsigned int i = 0; i < output.dim().batch_elems(); i++) + { + unsigned int u = 0; + dynet::Expression c = dynet::pick(dynet::one_hot(cg, layers.back().output_dim, oneHotGolds[i]),u); + dynet::Expression a = dynet::pick(dynet::softmax(dynet::pick_batch_elem(output,i)),u); + lossExpr.emplace_back(pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i]) + c-a*c); + if (ProgramParameters::debug) + { + cg.forward(lossExpr.back()); + fprintf(stderr, "a=%.2f c=%.2f loss=%.2f\n", dynet::as_scalar(a.value()),dynet::as_scalar(c.value()),dynet::as_scalar(lossExpr.back().value())); + } + } + + return dynet::sum(lossExpr); } dynet::DynetParams & MLP::getDefaultParams() diff --git a/error_correction/src/macaon_train_error_detector.cpp b/error_correction/src/macaon_train_error_detector.cpp index e1f87e8..3bb6651 100644 --- a/error_correction/src/macaon_train_error_detector.cpp +++ b/error_correction/src/macaon_train_error_detector.cpp @@ -45,6 +45,8 @@ po::options_description getOptionsDescription() ("printEntropy", "Print mean entropy and standard deviation accross sequences") ("optimizer", po::value<std::string>()->default_value("amsgrad"), "The learning algorithm to use : amsgrad | adam | sgd") + ("loss", po::value<std::string>()->default_value("neglogsoftmax"), + "The loss function to use : neglogsoftmax | weighted") ("lang", po::value<std::string>()->default_value("fr"), "Language you are working with") ("nbiter,n", po::value<int>()->default_value(5), @@ -290,7 +292,7 @@ std::map<std::string, std::pair<float, std::pair<float, float> > > getScoreOnDev return scores; } -void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int> > & trainCounter, std::map< std::string, float > & scores, TransitionMachine & tm, int curIter, std::map< std::string, float > & bestScores, std::vector<int> & devIsErrors, std::vector<int> & devErrorIndexes, File & devFile, Config & config) +void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int> > & trainCounter, std::map< std::string, float > & scores, TransitionMachine & tm, int curIter, std::map< std::string, float > & bestScores, std::vector<int> & devIsErrors, std::vector<int> & devErrorIndexes, File & devFile, Config & config, float totalLoss) { for (auto & it : trainCounter) scores[it.first] = 100.0 * it.second.second / it.second.first; @@ -338,6 +340,7 @@ void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int savedStr.emplace_back(saved[it.first] ? "SAVED" : ""); if (ProgramParameters::printEntropy) savedStr.back() += " Entropy[" + float2str(devScores[it.first].second.first, "%.2f") + "\u00B1" + float2str(devScores[it.first].second.second, "%.2f") + "]"; + savedStr.back() += " Loss[" + float2str(totalLoss, "%.2f") + "]"; } if (ProgramParameters::interactive) @@ -399,13 +402,15 @@ void launchTraining() } fprintf(stderr, " done !\n"); - auto resetAndShuffle = [&trainCounter,&train,&dev,&trainPtr]() + float totalLoss = 0.0; + auto resetAndShuffle = [&trainCounter,&train,&dev,&trainPtr,&totalLoss]() { train.rewind(); dev.rewind(); trainPtr = train.getDescriptor(); for (auto & it : trainCounter) it.second.first = it.second.second = 0; + totalLoss = 0.0; }; Config trainConfig(trainBD); @@ -457,13 +462,13 @@ void launchTraining() std::string oAction = isError ? "ERROR" : "CORRECT"; - classifier->trainOnExample(trainConfig, classifier->getActionIndex(oAction)); + totalLoss += classifier->trainOnExample(trainConfig, classifier->getActionIndex(oAction)); trainCounter[classifier->name].first++; trainCounter[classifier->name].second += pAction == oAction ? 1 : 0; } - printScoresAndSave(stderr, trainCounter, scores, tm, curIter, bestScores, devIsErrors, devErrorIndexes, dev, config); + printScoresAndSave(stderr, trainCounter, scores, tm, curIter, bestScores, devIsErrors, devErrorIndexes, dev, config, totalLoss); curIter++; } } @@ -521,6 +526,7 @@ int main(int argc, char * argv[]) ProgramParameters::bias = vm["bias"].as<float>(); ProgramParameters::optimizer = vm["optimizer"].as<std::string>(); ProgramParameters::dynamicEpoch = vm["epochd"].as<int>(); + ProgramParameters::loss = vm["loss"].as<std::string>(); ProgramParameters::dynamicProbability = vm["proba"].as<float>(); ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>(); ProgramParameters::iterationSize = vm["iterationSize"].as<int>(); diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 0bc4074..e3aa19f 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -52,6 +52,7 @@ struct ProgramParameters static std::string sequenceDelimiter; static std::string classifierName; static int batchSize; + static std::string loss; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index c5c8328..c7c56d7 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -47,3 +47,4 @@ std::string ProgramParameters::sequenceDelimiterTape; std::string ProgramParameters::sequenceDelimiter; std::string ProgramParameters::classifierName; int ProgramParameters::batchSize; +std::string ProgramParameters::loss; diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 3585999..62d1e07 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -43,6 +43,8 @@ po::options_description getOptionsDescription() ("printEntropy", "Print mean entropy and standard deviation accross sequences") ("optimizer", po::value<std::string>()->default_value("amsgrad"), "The learning algorithm to use : amsgrad | adam | sgd") + ("loss", po::value<std::string>()->default_value("neglogsoftmax"), + "The loss function to use : neglogsoftmax | weighted") ("dev", po::value<std::string>()->default_value(""), "Development corpus formated according to the MCD") ("lang", po::value<std::string>()->default_value("fr"), @@ -263,6 +265,7 @@ int main(int argc, char * argv[]) ProgramParameters::beta2 = vm["b2"].as<float>(); ProgramParameters::bias = vm["bias"].as<float>(); ProgramParameters::optimizer = vm["optimizer"].as<std::string>(); + ProgramParameters::loss = vm["loss"].as<std::string>(); ProgramParameters::dynamicEpoch = vm["epochd"].as<int>(); ProgramParameters::dynamicProbability = vm["proba"].as<float>(); ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>(); diff --git a/transition_machine/include/Classifier.hpp b/transition_machine/include/Classifier.hpp index 667f6a1..b672702 100644 --- a/transition_machine/include/Classifier.hpp +++ b/transition_machine/include/Classifier.hpp @@ -128,7 +128,9 @@ class Classifier /// /// @param config The Config to work with. /// @param gold The gold class of the Config. - void trainOnExample(Config & config, int gold); + /// + /// @return The loss. + float trainOnExample(Config & config, int gold); /// @brief Get the name of an Action from its index. /// /// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network. diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index b54e3aa..88b1cd6 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -245,10 +245,10 @@ std::vector<std::string> Classifier::getZeroCostActions(Config & config) return result; } -void Classifier::trainOnExample(Config & config, int gold) +float Classifier::trainOnExample(Config & config, int gold) { auto fd = fm->getFeatureDescription(config); - mlp->update(fd, gold); + return mlp->update(fd, gold); } void Classifier::explainCostOfActions(FILE * output, Config & config) -- GitLab