Skip to content
Snippets Groups Projects
Commit 613f04a9 authored by Franck Dary's avatar Franck Dary
Browse files

Added devLoss option, allow to compute dev loss

parent 408336d8
No related branches found
No related tags found
No related merge requests found
Showing
with 144 additions and 6 deletions
...@@ -72,6 +72,7 @@ struct ProgramParameters ...@@ -72,6 +72,7 @@ struct ProgramParameters
static float maskRate; static float maskRate;
static bool featureExtraction; static bool featureExtraction;
static bool devEvalOnGold; static bool devEvalOnGold;
static bool devLoss;
private : private :
......
...@@ -66,4 +66,5 @@ std::string ProgramParameters::tapeToMask; ...@@ -66,4 +66,5 @@ std::string ProgramParameters::tapeToMask;
float ProgramParameters::maskRate; float ProgramParameters::maskRate;
bool ProgramParameters::featureExtraction; bool ProgramParameters::featureExtraction;
bool ProgramParameters::devEvalOnGold; bool ProgramParameters::devEvalOnGold;
bool ProgramParameters::devLoss;
...@@ -105,6 +105,14 @@ class GeneticAlgorithm : public NeuralNetwork ...@@ -105,6 +105,14 @@ class GeneticAlgorithm : public NeuralNetwork
/// @return The loss. /// @return The loss.
float update(FeatureModel::FeatureDescription & fd, int gold) override; float update(FeatureModel::FeatureDescription & fd, int gold) override;
/// @brief Get the loss according to the given gold class.
///
/// @param fd The input to use.
/// @param gold The gold class of this input.
///
/// @return The loss.
float getLoss(FeatureModel::FeatureDescription & fd, int gold) override;
/// @brief Save the GeneticAlgorithm to a file. /// @brief Save the GeneticAlgorithm to a file.
/// ///
/// @param filename The file to write the GeneticAlgorithm to. /// @param filename The file to write the GeneticAlgorithm to.
......
...@@ -54,6 +54,13 @@ class MLP : public NeuralNetwork ...@@ -54,6 +54,13 @@ class MLP : public NeuralNetwork
/// ///
/// @return The loss. /// @return The loss.
float update(FeatureModel::FeatureDescription & fd, int gold) override; float update(FeatureModel::FeatureDescription & fd, int gold) override;
/// @brief Get the loss according to the given gold class.
///
/// @param fd The input to use.
/// @param gold The gold class of this input.
///
/// @return The loss.
float getLoss(FeatureModel::FeatureDescription & fd, int gold) override;
/// @brief Save the MLP to a file. /// @brief Save the MLP to a file.
/// ///
/// @param filename The file to write the MLP to. /// @param filename The file to write the MLP to.
......
...@@ -123,6 +123,13 @@ class MLPBase ...@@ -123,6 +123,13 @@ class MLPBase
/// ///
/// @return The loss. /// @return The loss.
float update(FeatureModel::FeatureDescription & fd, int gold); float update(FeatureModel::FeatureDescription & fd, int gold);
/// @brief Get the loss according to the given gold class.
///
/// @param fd The input to use.
/// @param gold The gold class of this input.
///
/// @return The loss.
float getLoss(FeatureModel::FeatureDescription & fd, int gold);
/// @brief Print the topology (Layers) of the MLP. /// @brief Print the topology (Layers) of the MLP.
/// ///
/// @param output Where the topology will be printed. /// @param output Where the topology will be printed.
......
...@@ -145,6 +145,14 @@ class NeuralNetwork ...@@ -145,6 +145,14 @@ class NeuralNetwork
/// @return The loss. /// @return The loss.
virtual float update(FeatureModel::FeatureDescription & fd, int gold) = 0; virtual float update(FeatureModel::FeatureDescription & fd, int gold) = 0;
/// @brief Get the loss according to the given gold class.
///
/// @param fd The input to use.
/// @param gold The gold class of this input.
///
/// @return The loss.
virtual float getLoss(FeatureModel::FeatureDescription & fd, int gold) = 0;
/// @brief Save the NeuralNetwork to a file. /// @brief Save the NeuralNetwork to a file.
/// ///
/// @param filename The file to write the NeuralNetwork to. /// @param filename The file to write the NeuralNetwork to.
......
...@@ -61,6 +61,16 @@ std::vector<float> GeneticAlgorithm::predict(FeatureModel::FeatureDescription & ...@@ -61,6 +61,16 @@ std::vector<float> GeneticAlgorithm::predict(FeatureModel::FeatureDescription &
return prediction; return prediction;
} }
float GeneticAlgorithm::getLoss(FeatureModel::FeatureDescription &, int)
{
float loss = 0.0;
for (auto & individual : generation)
loss += individual->loss;
return loss;
}
float GeneticAlgorithm::update(FeatureModel::FeatureDescription & fd, int gold) float GeneticAlgorithm::update(FeatureModel::FeatureDescription & fd, int gold)
{ {
bool haveBeenUpdated = false; bool haveBeenUpdated = false;
......
...@@ -68,6 +68,11 @@ float MLP::update(FeatureModel::FeatureDescription & fd, int gold) ...@@ -68,6 +68,11 @@ float MLP::update(FeatureModel::FeatureDescription & fd, int gold)
} }
} }
float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold)
{
return mlp.getLoss(fd, gold);
}
void MLP::save(const std::string & filename) void MLP::save(const std::string & filename)
{ {
File * file = new File(filename, "w"); File * file = new File(filename, "w");
......
...@@ -143,6 +143,46 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold) ...@@ -143,6 +143,46 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold)
return as_scalar(batchedLoss.value()); return as_scalar(batchedLoss.value());
} }
float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, int gold)
{
std::vector<dynet::Expression> inputs;
std::vector<unsigned int> goldss;
goldss.emplace_back(gold);
dynet::ComputationGraph cg;
std::vector<dynet::Expression> expressions;
for (auto & featValue : fd.values)
expressions.emplace_back(NeuralNetwork::featValue2Expression(cg, featValue));
dynet::Expression input = dynet::concatenate(expressions);
inputs.emplace_back(input);
dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs);
dynet::Expression output = run(cg, batchedInput);
dynet::Expression batchedLoss;
if (ProgramParameters::loss == "neglogsoftmax")
{
batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, goldss));
}
else if (ProgramParameters::loss == "weighted")
{
batchedLoss = weightedLoss(output, goldss);
}
else if (ProgramParameters::loss == "errorCorrection")
{
batchedLoss = errorCorrectionLoss(cg, output, goldss);
}
else
{
fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str());
exit(1);
}
return as_scalar(batchedLoss.value());
}
void MLPBase::checkGradients() void MLPBase::checkGradients()
{ {
bool printGradients = false; bool printGradients = false;
......
...@@ -24,6 +24,8 @@ class TrainInfos ...@@ -24,6 +24,8 @@ class TrainInfos
std::map< std::string, std::vector<bool> > mustSavePerClassifierPerEpoch; std::map< std::string, std::vector<bool> > mustSavePerClassifierPerEpoch;
std::map< std::string, std::pair<int,int> > trainCounter; std::map< std::string, std::pair<int,int> > trainCounter;
std::map<std::string, float> trainLossCounter;
std::map<std::string, float> devLossCounter;
std::map< std::string, std::pair<int,int> > devCounter; std::map< std::string, std::pair<int,int> > devCounter;
std::map<std::string, bool> topologyPrinted; std::map<std::string, bool> topologyPrinted;
...@@ -42,8 +44,9 @@ class TrainInfos ...@@ -42,8 +44,9 @@ class TrainInfos
TrainInfos(); TrainInfos();
void addTrainLoss(const std::string & classifier, float loss); void addTrainLoss(const std::string & classifier, float loss);
void addDevLoss(const std::string & classifier, float loss); void addDevLoss(const std::string & classifier, float loss);
void addTrainExample(const std::string & classifier); void addTrainExample(const std::string & classifier, float loss);
void addDevExample(const std::string & classifier); void addDevExample(const std::string & classifier);
void addDevExample(const std::string & classifier, float loss);
void addTrainSuccess(const std::string & classifier); void addTrainSuccess(const std::string & classifier);
void addDevSuccess(const std::string & classifier); void addDevSuccess(const std::string & classifier);
void resetCounters(); void resetCounters();
......
...@@ -155,9 +155,10 @@ float TrainInfos::computeDevScore(const std::string & classifier) ...@@ -155,9 +155,10 @@ float TrainInfos::computeDevScore(const std::string & classifier)
return 100.0*devCounter[classifier].first / devCounter[classifier].second; return 100.0*devCounter[classifier].first / devCounter[classifier].second;
} }
void TrainInfos::addTrainExample(const std::string & classifier) void TrainInfos::addTrainExample(const std::string & classifier, float loss)
{ {
trainCounter[classifier].second++; trainCounter[classifier].second++;
trainLossCounter[classifier] += loss;
} }
void TrainInfos::addDevExample(const std::string & classifier) void TrainInfos::addDevExample(const std::string & classifier)
...@@ -165,6 +166,12 @@ void TrainInfos::addDevExample(const std::string & classifier) ...@@ -165,6 +166,12 @@ void TrainInfos::addDevExample(const std::string & classifier)
devCounter[classifier].second++; devCounter[classifier].second++;
} }
void TrainInfos::addDevExample(const std::string & classifier, float loss)
{
devCounter[classifier].second++;
devLossCounter[classifier] += loss;
}
void TrainInfos::addTrainSuccess(const std::string & classifier) void TrainInfos::addTrainSuccess(const std::string & classifier)
{ {
trainCounter[classifier].first++; trainCounter[classifier].first++;
...@@ -184,13 +191,24 @@ void TrainInfos::resetCounters() ...@@ -184,13 +191,24 @@ void TrainInfos::resetCounters()
void TrainInfos::computeTrainScores() void TrainInfos::computeTrainScores()
{ {
for (auto & it : trainCounter) for (auto & it : trainCounter)
{
addTrainScore(it.first, computeTrainScore(it.first)); addTrainScore(it.first, computeTrainScore(it.first));
addTrainLoss(it.first, trainLossCounter[it.first]);
trainLossCounter[it.first] = 0.0;
}
} }
void TrainInfos::computeDevScores() void TrainInfos::computeDevScores()
{ {
for (auto & it : devCounter) for (auto & it : devCounter)
{
addDevScore(it.first, computeDevScore(it.first)); addDevScore(it.first, computeDevScore(it.first));
if (devLossCounter.count(it.first))
{
addDevLoss(it.first, devLossCounter[it.first]);
devLossCounter[it.first] = 0.0;
}
}
} }
int TrainInfos::getEpoch() int TrainInfos::getEpoch()
...@@ -246,7 +264,9 @@ void TrainInfos::printScores(FILE * output) ...@@ -246,7 +264,9 @@ void TrainInfos::printScores(FILE * output)
std::vector<std::string> names; std::vector<std::string> names;
std::vector<std::string> acc; std::vector<std::string> acc;
std::vector<std::string> train; std::vector<std::string> train;
std::vector<std::string> lossTrain;
std::vector<std::string> dev; std::vector<std::string> dev;
std::vector<std::string> lossDev;
std::vector<std::string> savedStr; std::vector<std::string> savedStr;
for (auto & it : trainScoresPerClassifierPerEpoch) for (auto & it : trainScoresPerClassifierPerEpoch)
...@@ -254,7 +274,9 @@ void TrainInfos::printScores(FILE * output) ...@@ -254,7 +274,9 @@ void TrainInfos::printScores(FILE * output)
names.emplace_back(it.first); names.emplace_back(it.first);
acc.emplace_back("accuracy"); acc.emplace_back("accuracy");
train.emplace_back(": train(" + float2str(it.second.back(), "%.2f") + "%)"); train.emplace_back(": train(" + float2str(it.second.back(), "%.2f") + "%)");
lossTrain.emplace_back(trainLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +float2str(trainLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")");
dev.emplace_back(devScoresPerClassifierPerEpoch.empty() ? "" : "dev(" +float2str(devScoresPerClassifierPerEpoch[it.first].back(), "%.2f") + "%)"); dev.emplace_back(devScoresPerClassifierPerEpoch.empty() ? "" : "dev(" +float2str(devScoresPerClassifierPerEpoch[it.first].back(), "%.2f") + "%)");
lossDev.emplace_back(devLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +float2str(devLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")");
savedStr.emplace_back(mustSavePerClassifierPerEpoch[it.first].back() ? "SAVED" : ""); savedStr.emplace_back(mustSavePerClassifierPerEpoch[it.first].back() ? "SAVED" : "");
} }
...@@ -264,7 +286,7 @@ void TrainInfos::printScores(FILE * output) ...@@ -264,7 +286,7 @@ void TrainInfos::printScores(FILE * output)
fprintf(output, "[%s] ", getTime().c_str()); fprintf(output, "[%s] ", getTime().c_str());
fprintf(output, "Iteration %d/%d : \n", getEpoch(), ProgramParameters::nbIter); fprintf(output, "Iteration %d/%d : \n", getEpoch(), ProgramParameters::nbIter);
printColumns(output, {names, acc, train, dev, savedStr}); printColumns(output, {names, acc, train, lossTrain, dev, lossDev, savedStr});
} }
bool TrainInfos::mustSave(const std::string & classifier) bool TrainInfos::mustSave(const std::string & classifier)
......
...@@ -82,7 +82,16 @@ void Trainer::computeScoreOnDev() ...@@ -82,7 +82,16 @@ void Trainer::computeScoreOnDev()
bool pActionIsZeroCost = tm.getCurrentClassifier()->getActionCost(*devConfig, pAction) == 0; bool pActionIsZeroCost = tm.getCurrentClassifier()->getActionCost(*devConfig, pAction) == 0;
if (ProgramParameters::devLoss)
{
float loss = tm.getCurrentClassifier()->getLoss(*devConfig, tm.getCurrentClassifier()->getActionIndex(oAction));
TI.addDevExample(tm.getCurrentClassifier()->name, loss);
}
else
{
TI.addDevExample(tm.getCurrentClassifier()->name); TI.addDevExample(tm.getCurrentClassifier()->name);
}
if (((!ProgramParameters::devEvalOnGold) && pActionIsZeroCost) || (pAction == oAction)) if (((!ProgramParameters::devEvalOnGold) && pActionIsZeroCost) || (pAction == oAction))
TI.addDevSuccess(tm.getCurrentClassifier()->name); TI.addDevSuccess(tm.getCurrentClassifier()->name);
...@@ -273,10 +282,11 @@ void Trainer::train() ...@@ -273,10 +282,11 @@ void Trainer::train()
exit(1); exit(1);
} }
float loss = 0.0;
if (!ProgramParameters::featureExtraction) if (!ProgramParameters::featureExtraction)
tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction)); loss = tm.getCurrentClassifier()->trainOnExample(trainConfig, tm.getCurrentClassifier()->getActionIndex(oAction));
TI.addTrainExample(tm.getCurrentClassifier()->name); TI.addTrainExample(tm.getCurrentClassifier()->name, loss);
if (pActionIsZeroCost) if (pActionIsZeroCost)
TI.addTrainSuccess(tm.getCurrentClassifier()->name); TI.addTrainSuccess(tm.getCurrentClassifier()->name);
......
...@@ -88,6 +88,8 @@ po::options_description getOptionsDescription() ...@@ -88,6 +88,8 @@ po::options_description getOptionsDescription()
("printTime", "Print time on stderr.") ("printTime", "Print time on stderr.")
("featureExtraction", "Use macaon only a feature extractor, print corpus to stdout.") ("featureExtraction", "Use macaon only a feature extractor, print corpus to stdout.")
("devEvalOnGold", "If true, dev accuracy will be computed on gold configurations.") ("devEvalOnGold", "If true, dev accuracy will be computed on gold configurations.")
("devLoss", po::value<bool>()->default_value(false),
"Compute and print total loss on dev for every epoch.")
("shuffle", po::value<bool>()->default_value(true), ("shuffle", po::value<bool>()->default_value(true),
"Shuffle examples after each iteration"); "Shuffle examples after each iteration");
...@@ -283,6 +285,7 @@ int main(int argc, char * argv[]) ...@@ -283,6 +285,7 @@ int main(int argc, char * argv[])
ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>(); ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>();
ProgramParameters::interactive = vm["interactive"].as<bool>(); ProgramParameters::interactive = vm["interactive"].as<bool>();
ProgramParameters::shuffleExamples = vm["shuffle"].as<bool>(); ProgramParameters::shuffleExamples = vm["shuffle"].as<bool>();
ProgramParameters::devLoss = vm["devLoss"].as<bool>();
ProgramParameters::randomEmbeddings = vm["randomEmbeddings"].as<bool>(); ProgramParameters::randomEmbeddings = vm["randomEmbeddings"].as<bool>();
ProgramParameters::randomParameters = vm["randomParameters"].as<bool>(); ProgramParameters::randomParameters = vm["randomParameters"].as<bool>();
ProgramParameters::sequenceDelimiterTape = vm["sequenceDelimiterTape"].as<std::string>(); ProgramParameters::sequenceDelimiterTape = vm["sequenceDelimiterTape"].as<std::string>();
......
...@@ -156,6 +156,13 @@ class Classifier ...@@ -156,6 +156,13 @@ class Classifier
/// ///
/// @return The loss. /// @return The loss.
float trainOnExample(Config & config, int gold); float trainOnExample(Config & config, int gold);
/// @brief Get the loss of the classifier on a training example.
///
/// @param config The Config to work with.
/// @param gold The gold class of the Config.
///
/// @return The loss.
float getLoss(Config & config, int gold);
/// @brief Get the name of an Action from its index. /// @brief Get the name of an Action from its index.
/// ///
/// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network. /// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network.
......
...@@ -273,6 +273,12 @@ float Classifier::trainOnExample(Config & config, int gold) ...@@ -273,6 +273,12 @@ float Classifier::trainOnExample(Config & config, int gold)
return nn->update(fd, gold); return nn->update(fd, gold);
} }
float Classifier::getLoss(Config & config, int gold)
{
auto & fd = fm->getFeatureDescription(config);
return nn->getLoss(fd, gold);
}
void Classifier::explainCostOfActions(FILE * output, Config & config) void Classifier::explainCostOfActions(FILE * output, Config & config)
{ {
for (Action & a : as->actions) for (Action & a : as->actions)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment