diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp index 00e55f6b67d131450424e9866c564abac5ebaa2a..958741e25f2e4b9294cae3496d9294cd085895ea 100644 --- a/MLP/include/MLP.hpp +++ b/MLP/include/MLP.hpp @@ -169,6 +169,15 @@ class MLP /// /// Must be called only once, and before any call to dynet functions. void initDynet(); + /// @brief Get the loss expression + /// + /// @param output Output from the neural network + /// @param oneHotGolds Indexes of gold classes (batched form) + /// + /// @return The loss expression + dynet::Expression weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); + + dynet::Expression errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); public : @@ -205,7 +214,9 @@ class MLP /// /// @param fd The input to use. /// @param gold The gold class of this input. - void update(FeatureModel::FeatureDescription & fd, int gold); + /// + /// @return The loss. + float update(FeatureModel::FeatureDescription & fd, int gold); /// @brief Save the MLP to a file. /// diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp index 409b054d287d985755e70980bb4fd90a7e57e566..e25b86a611381f3d402c68f180d73862ee97f537 100644 --- a/MLP/src/MLP.cpp +++ b/MLP/src/MLP.cpp @@ -202,13 +202,13 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd) return as_vector(cg.forward(output)); } -void MLP::update(FeatureModel::FeatureDescription & fd, int gold) +float MLP::update(FeatureModel::FeatureDescription & fd, int gold) { fds.emplace_back(fd); golds.emplace_back(gold); if ((int)fds.size() < ProgramParameters::batchSize) - return; + return 0.0; std::vector<dynet::Expression> inputs; dynet::ComputationGraph cg; @@ -226,13 +226,73 @@ void MLP::update(FeatureModel::FeatureDescription & fd, int gold) dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs); dynet::Expression output = run(cg, batchedInput); - dynet::Expression batchedLoss = pickneglogsoftmax(output, golds); + dynet::Expression batchedLoss; + + if (ProgramParameters::loss == "neglogsoftmax") + { + batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, golds)); + } + else if (ProgramParameters::loss == "weighted") + { + batchedLoss = weightedLoss(output, golds); + } + else if (ProgramParameters::loss == "errorCorrection") + { + batchedLoss = errorCorrectionLoss(cg,output, golds); + } + else + { + fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str()); + exit(1); + } - cg.backward(dynet::sum_batches(batchedLoss)); + cg.backward(batchedLoss); trainer->update(); fds.clear(); golds.clear(); + + return as_scalar(batchedLoss.value()); +} + +dynet::Expression MLP::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) +{ + std::vector<dynet::Expression> lossExpr; + for (unsigned int i = 0; i < output.dim().batch_elems(); i++) + { + lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i])); + auto outputVect = dynet::as_vector(dynet::pick_batch_elem(output,i).value()); + int prediction = 0; + for (unsigned int j = 1; j < outputVect.size(); j++) + if(outputVect[j] > outputVect[prediction]) + prediction = j; + int gold = oneHotGolds[i]; + if (prediction == 1 && gold == 0) + { + lossExpr.back() = lossExpr.back() * 100.0; + } + } + + return dynet::sum(lossExpr); +} + +dynet::Expression MLP::errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) +{ + std::vector<dynet::Expression> lossExpr; + for (unsigned int i = 0; i < output.dim().batch_elems(); i++) + { + unsigned int u = 0; + dynet::Expression c = dynet::pick(dynet::one_hot(cg, layers.back().output_dim, oneHotGolds[i]),u); + dynet::Expression a = dynet::pick(dynet::softmax(dynet::pick_batch_elem(output,i)),u); + lossExpr.emplace_back(pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i]) + c-a*c); + if (ProgramParameters::debug) + { + cg.forward(lossExpr.back()); + fprintf(stderr, "a=%.2f c=%.2f loss=%.2f\n", dynet::as_scalar(a.value()),dynet::as_scalar(c.value()),dynet::as_scalar(lossExpr.back().value())); + } + } + + return dynet::sum(lossExpr); } dynet::DynetParams & MLP::getDefaultParams() diff --git a/error_correction/src/macaon_train_error_detector.cpp b/error_correction/src/macaon_train_error_detector.cpp index e1f87e87db3afd78d0ff638cee24be9d1f634817..3bb665138ba11e60355dace2e15ecc29af6a231c 100644 --- a/error_correction/src/macaon_train_error_detector.cpp +++ b/error_correction/src/macaon_train_error_detector.cpp @@ -45,6 +45,8 @@ po::options_description getOptionsDescription() ("printEntropy", "Print mean entropy and standard deviation accross sequences") ("optimizer", po::value<std::string>()->default_value("amsgrad"), "The learning algorithm to use : amsgrad | adam | sgd") + ("loss", po::value<std::string>()->default_value("neglogsoftmax"), + "The loss function to use : neglogsoftmax | weighted") ("lang", po::value<std::string>()->default_value("fr"), "Language you are working with") ("nbiter,n", po::value<int>()->default_value(5), @@ -290,7 +292,7 @@ std::map<std::string, std::pair<float, std::pair<float, float> > > getScoreOnDev return scores; } -void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int> > & trainCounter, std::map< std::string, float > & scores, TransitionMachine & tm, int curIter, std::map< std::string, float > & bestScores, std::vector<int> & devIsErrors, std::vector<int> & devErrorIndexes, File & devFile, Config & config) +void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int> > & trainCounter, std::map< std::string, float > & scores, TransitionMachine & tm, int curIter, std::map< std::string, float > & bestScores, std::vector<int> & devIsErrors, std::vector<int> & devErrorIndexes, File & devFile, Config & config, float totalLoss) { for (auto & it : trainCounter) scores[it.first] = 100.0 * it.second.second / it.second.first; @@ -338,6 +340,7 @@ void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int savedStr.emplace_back(saved[it.first] ? "SAVED" : ""); if (ProgramParameters::printEntropy) savedStr.back() += " Entropy[" + float2str(devScores[it.first].second.first, "%.2f") + "\u00B1" + float2str(devScores[it.first].second.second, "%.2f") + "]"; + savedStr.back() += " Loss[" + float2str(totalLoss, "%.2f") + "]"; } if (ProgramParameters::interactive) @@ -399,13 +402,15 @@ void launchTraining() } fprintf(stderr, " done !\n"); - auto resetAndShuffle = [&trainCounter,&train,&dev,&trainPtr]() + float totalLoss = 0.0; + auto resetAndShuffle = [&trainCounter,&train,&dev,&trainPtr,&totalLoss]() { train.rewind(); dev.rewind(); trainPtr = train.getDescriptor(); for (auto & it : trainCounter) it.second.first = it.second.second = 0; + totalLoss = 0.0; }; Config trainConfig(trainBD); @@ -457,13 +462,13 @@ void launchTraining() std::string oAction = isError ? "ERROR" : "CORRECT"; - classifier->trainOnExample(trainConfig, classifier->getActionIndex(oAction)); + totalLoss += classifier->trainOnExample(trainConfig, classifier->getActionIndex(oAction)); trainCounter[classifier->name].first++; trainCounter[classifier->name].second += pAction == oAction ? 1 : 0; } - printScoresAndSave(stderr, trainCounter, scores, tm, curIter, bestScores, devIsErrors, devErrorIndexes, dev, config); + printScoresAndSave(stderr, trainCounter, scores, tm, curIter, bestScores, devIsErrors, devErrorIndexes, dev, config, totalLoss); curIter++; } } @@ -521,6 +526,7 @@ int main(int argc, char * argv[]) ProgramParameters::bias = vm["bias"].as<float>(); ProgramParameters::optimizer = vm["optimizer"].as<std::string>(); ProgramParameters::dynamicEpoch = vm["epochd"].as<int>(); + ProgramParameters::loss = vm["loss"].as<std::string>(); ProgramParameters::dynamicProbability = vm["proba"].as<float>(); ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>(); ProgramParameters::iterationSize = vm["iterationSize"].as<int>(); diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 0bc40747e9e3d9b575ff3c3f03863f3fc03f4c7b..e3aa19f178fb68535d11f28f521b0ffc4566fdc9 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -52,6 +52,7 @@ struct ProgramParameters static std::string sequenceDelimiter; static std::string classifierName; static int batchSize; + static std::string loss; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index c5c8328d0d9a671ecf1e1be9548ca20e9f02dda1..c7c56d73a45e4c43993f64927188256a22b9be0e 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -47,3 +47,4 @@ std::string ProgramParameters::sequenceDelimiterTape; std::string ProgramParameters::sequenceDelimiter; std::string ProgramParameters::classifierName; int ProgramParameters::batchSize; +std::string ProgramParameters::loss; diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 358599950f44e82268186ac214d198d858958114..62d1e0728f3275af7591e07bf8088e42540470b5 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -43,6 +43,8 @@ po::options_description getOptionsDescription() ("printEntropy", "Print mean entropy and standard deviation accross sequences") ("optimizer", po::value<std::string>()->default_value("amsgrad"), "The learning algorithm to use : amsgrad | adam | sgd") + ("loss", po::value<std::string>()->default_value("neglogsoftmax"), + "The loss function to use : neglogsoftmax | weighted") ("dev", po::value<std::string>()->default_value(""), "Development corpus formated according to the MCD") ("lang", po::value<std::string>()->default_value("fr"), @@ -263,6 +265,7 @@ int main(int argc, char * argv[]) ProgramParameters::beta2 = vm["b2"].as<float>(); ProgramParameters::bias = vm["bias"].as<float>(); ProgramParameters::optimizer = vm["optimizer"].as<std::string>(); + ProgramParameters::loss = vm["loss"].as<std::string>(); ProgramParameters::dynamicEpoch = vm["epochd"].as<int>(); ProgramParameters::dynamicProbability = vm["proba"].as<float>(); ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>(); diff --git a/transition_machine/include/Classifier.hpp b/transition_machine/include/Classifier.hpp index 667f6a1770e767630c90560ebf622aa68e0fe444..b672702ebcac4b81fc39191ec8240f332d10c686 100644 --- a/transition_machine/include/Classifier.hpp +++ b/transition_machine/include/Classifier.hpp @@ -128,7 +128,9 @@ class Classifier /// /// @param config The Config to work with. /// @param gold The gold class of the Config. - void trainOnExample(Config & config, int gold); + /// + /// @return The loss. + float trainOnExample(Config & config, int gold); /// @brief Get the name of an Action from its index. /// /// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network. diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index b54e3aa4834ae72c13e804b0e56986bc2cbd10c9..88b1cd690087e1817a2077de137b616370877e6b 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -245,10 +245,10 @@ std::vector<std::string> Classifier::getZeroCostActions(Config & config) return result; } -void Classifier::trainOnExample(Config & config, int gold) +float Classifier::trainOnExample(Config & config, int gold) { auto fd = fm->getFeatureDescription(config); - mlp->update(fd, gold); + return mlp->update(fd, gold); } void Classifier::explainCostOfActions(FILE * output, Config & config)