diff --git a/neural_network/include/MLPBase.hpp b/neural_network/include/MLPBase.hpp index b8bafcd08115227c4fbb28503b27c29e01ffa292..2d047e1056025c65408e51810221b417ac74a531 100644 --- a/neural_network/include/MLPBase.hpp +++ b/neural_network/include/MLPBase.hpp @@ -35,6 +35,11 @@ class MLPBase /// @brief gold classes of the current minibatch. std::vector<unsigned int> golds; + private : + + /// \brief Check gradients values for debug purpose. + void checkGradients(); + public : /// @brief Add the parameters of a layer into the dynet model. diff --git a/neural_network/src/MLP.cpp b/neural_network/src/MLP.cpp index 3dae489df88984fb618976525c683ee8eb416f62..c9388422080c59b92c7c4b8821ddc19f8a03eeef 100644 --- a/neural_network/src/MLP.cpp +++ b/neural_network/src/MLP.cpp @@ -26,15 +26,23 @@ dynet::Trainer * MLP::createTrainer() { auto optimizer = noAccentLower(ProgramParameters::optimizer); + dynet::Trainer * trainer = nullptr; + if (optimizer == "amsgrad") - return new dynet::AmsgradTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias); + trainer = new dynet::AmsgradTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias); else if (optimizer == "adam") - return new dynet::AdamTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias); + trainer = new dynet::AdamTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias); else if (optimizer == "sgd") - return new dynet::SimpleSGDTrainer(model, ProgramParameters::learningRate); + trainer = new dynet::SimpleSGDTrainer(model, ProgramParameters::learningRate); else if (optimizer == "none") return nullptr; + if (trainer) + { + trainer->sparse_updates_enabled = true; + return trainer; + } + fprintf(stderr, "ERROR (%s) : unknown optimizer \'%s\'. Aborting.\n", ERRINFO, optimizer.c_str()); exit(1); diff --git a/neural_network/src/MLPBase.cpp b/neural_network/src/MLPBase.cpp index aa06c1dd3534e138d35fee0735807fb34d887aca..60236bd2870496fe1aa2df6aac57a1925380d712 100644 --- a/neural_network/src/MLPBase.cpp +++ b/neural_network/src/MLPBase.cpp @@ -135,12 +135,37 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold) cg.backward(batchedLoss); + checkGradients(); + fds.clear(); golds.clear(); return as_scalar(batchedLoss.value()); } +void MLPBase::checkGradients() +{ + bool printGradients = false; + + if (printGradients) + { + fprintf(stderr, "Gradients :\n"); + for (auto & layer : parameters) + for (auto & param : layer) + { + auto dim = param.dim(); + auto gradients = param.gradients()->v; + fprintf(stderr, "Parameter's gradients :\n"); + int nbRows = dim.rows(); + int nbCols = dim.cols(); + + for (int i = 0; i < nbRows; i++) + for (int j = 0; j < nbCols; j++) + fprintf(stderr, "%8.5f%s", gradients[i*nbRows + j], j == nbCols-1 ? "\n" : " "); + } + } +} + dynet::Expression MLPBase::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) { std::vector<dynet::Expression> lossExpr;