diff --git a/decoder/src/Decoder.cpp b/decoder/src/Decoder.cpp index 81b9fb25069ff3abf2c7fef3d56d28ff98a7ac54..8e2d471d0b300dc56a2046e36acd4766729278ec 100644 --- a/decoder/src/Decoder.cpp +++ b/decoder/src/Decoder.cpp @@ -58,13 +58,13 @@ void checkAndRecordError(Config & config, Classifier * classifier, Classifier::W } } -void printAdvancement(Config & config, float currentSpeed) +void printAdvancement(Config & config, float currentSpeed, int nbActionsCutoff) { if (ProgramParameters::interactive) { int totalSize = ProgramParameters::tapeSize; int steps = config.getHead(); - if (steps && (steps % 200 == 0 || totalSize-steps < 200)) + if (steps && (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff)) fprintf(stderr, "Decode : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str()); } } @@ -213,7 +213,7 @@ void Decoder::decodeNoBeam() auto weightedActions = tm.getCurrentClassifier()->weightActions(config); - printAdvancement(config, currentSpeed); + printAdvancement(config, currentSpeed, nbActionsCutoff); printDebugInfos(stderr, config, tm, weightedActions); std::pair<float,std::string> predictedAction; @@ -343,7 +343,7 @@ void Decoder::decodeBeam() node->weightedActions = node->tm.getCurrentClassifier()->weightActions(node->config); - printAdvancement(node->config, currentSpeed); + printAdvancement(node->config, currentSpeed, nbActionsCutoff); unsigned int nbActionsMax = std::min(std::max(node->tm.getCurrentClassifier()->getNbActions(),(unsigned int)1),(unsigned int)ProgramParameters::nbChilds); for (unsigned int actionIndex = 0; actionIndex < nbActionsMax; actionIndex++) diff --git a/neural_network/include/NeuralNetwork.hpp b/neural_network/include/NeuralNetwork.hpp index 28c2ef1ef5fe894103c30135ed908456cb272ddd..984233801c0dbc7c60d6be0d987ef8ab4d711256 100644 --- a/neural_network/include/NeuralNetwork.hpp +++ b/neural_network/include/NeuralNetwork.hpp @@ -10,6 +10,14 @@ #include <string> #include "FeatureModel.hpp" +struct BatchNotFull : public std::exception +{ + const char * what() const throw() + { + return "Current batch is not full, no need to update."; + } +}; + class NeuralNetwork { public : diff --git a/neural_network/src/MLP.cpp b/neural_network/src/MLP.cpp index 47dc9453a24af6dc2a0bba5d0f0380f13e7a29e3..3dae489df88984fb618976525c683ee8eb416f62 100644 --- a/neural_network/src/MLP.cpp +++ b/neural_network/src/MLP.cpp @@ -49,11 +49,15 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd) float MLP::update(FeatureModel::FeatureDescription & fd, int gold) { - float loss = mlp.update(fd, gold); - - trainer->update(); - - return loss; + try + { + float loss = mlp.update(fd, gold); + trainer->update(); + return loss; + } catch (BatchNotFull &) + { + return 0.0; + } } void MLP::save(const std::string & filename) diff --git a/neural_network/src/MLPBase.cpp b/neural_network/src/MLPBase.cpp index 39b57b1a679b9959e1aaa4f1fbc36a2b785836a6..aa06c1dd3534e138d35fee0735807fb34d887aca 100644 --- a/neural_network/src/MLPBase.cpp +++ b/neural_network/src/MLPBase.cpp @@ -95,7 +95,7 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold) golds.emplace_back(gold); if ((int)fds.size() < ProgramParameters::batchSize) - return 0.0; + throw BatchNotFull(); std::vector<dynet::Expression> inputs; dynet::ComputationGraph cg; diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index ca8ef4588ba78e0fc47e6dadaf298462e06cceeb..bb8ff4030165efd99ce6716851c5be13ab27a67c 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -27,7 +27,7 @@ void Trainer::computeScoreOnDev() float entropyAccumulator = 0.0; bool justFlipped = false; int nbActions = 0; - int nbActionsCutoff = 200; + int nbActionsCutoff = 2*ProgramParameters::batchSize; float currentSpeed = 0.0; auto pastTime = std::chrono::high_resolution_clock::now(); std::vector<float> entropies; @@ -56,7 +56,7 @@ void Trainer::computeScoreOnDev() { int totalSize = ProgramParameters::devTapeSize; int steps = devConfig->getHead(); - if (steps && (steps % 200 == 0 || totalSize-steps < 200)) + if (steps && (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff)) { fprintf(stderr, " \r"); fprintf(stderr, "Eval on dev : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str()); @@ -162,7 +162,7 @@ void Trainer::train() int nbSteps = 0; int nbActions = 0; - int nbActionsCutoff = 200; + int nbActionsCutoff = 2*ProgramParameters::batchSize; float currentSpeed = 0.0; auto pastTime = std::chrono::high_resolution_clock::now(); while (TI.getEpoch() <= ProgramParameters::nbIter) @@ -204,7 +204,7 @@ void Trainer::train() { int totalSize = ProgramParameters::iterationSize == -1 ? ProgramParameters::tapeSize : ProgramParameters::iterationSize; int steps = ProgramParameters::iterationSize == -1 ? trainConfig.getHead() : nbSteps; - if (steps % 200 == 0 || totalSize-steps < 200) + if (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff) { fprintf(stderr, " \r"); fprintf(stderr, "Current Iteration : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str());