diff --git a/neural_network/include/MLPBase.hpp b/neural_network/include/MLPBase.hpp
index b8bafcd08115227c4fbb28503b27c29e01ffa292..2d047e1056025c65408e51810221b417ac74a531 100644
--- a/neural_network/include/MLPBase.hpp
+++ b/neural_network/include/MLPBase.hpp
@@ -35,6 +35,11 @@ class MLPBase
   /// @brief gold classes of the current minibatch.
   std::vector<unsigned int> golds;
 
+  private :
+
+  /// \brief Check gradients values for debug purpose.
+  void checkGradients();
+
   public :
 
   /// @brief Add the parameters of a layer into the dynet model.
diff --git a/neural_network/src/MLP.cpp b/neural_network/src/MLP.cpp
index 3dae489df88984fb618976525c683ee8eb416f62..c9388422080c59b92c7c4b8821ddc19f8a03eeef 100644
--- a/neural_network/src/MLP.cpp
+++ b/neural_network/src/MLP.cpp
@@ -26,15 +26,23 @@ dynet::Trainer * MLP::createTrainer()
 {
   auto optimizer = noAccentLower(ProgramParameters::optimizer);
 
+  dynet::Trainer * trainer = nullptr;
+
   if (optimizer == "amsgrad")
-    return new dynet::AmsgradTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias);
+    trainer = new dynet::AmsgradTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias);
   else if (optimizer == "adam")
-    return new dynet::AdamTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias);
+    trainer =  new dynet::AdamTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias);
   else if (optimizer == "sgd")
-    return new dynet::SimpleSGDTrainer(model, ProgramParameters::learningRate);
+    trainer =  new dynet::SimpleSGDTrainer(model, ProgramParameters::learningRate);
   else if (optimizer == "none")
     return nullptr;
 
+  if (trainer)
+  {
+    trainer->sparse_updates_enabled = true;
+    return trainer;
+  }
+
   fprintf(stderr, "ERROR (%s) : unknown optimizer \'%s\'. Aborting.\n", ERRINFO, optimizer.c_str());
 
   exit(1);
diff --git a/neural_network/src/MLPBase.cpp b/neural_network/src/MLPBase.cpp
index aa06c1dd3534e138d35fee0735807fb34d887aca..60236bd2870496fe1aa2df6aac57a1925380d712 100644
--- a/neural_network/src/MLPBase.cpp
+++ b/neural_network/src/MLPBase.cpp
@@ -135,12 +135,37 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold)
 
   cg.backward(batchedLoss);
 
+  checkGradients();
+
   fds.clear();
   golds.clear();
 
   return as_scalar(batchedLoss.value());
 }
 
+void MLPBase::checkGradients()
+{
+  bool printGradients = false;
+
+  if (printGradients)
+  {
+    fprintf(stderr, "Gradients :\n");
+    for (auto & layer : parameters)
+      for (auto & param : layer)
+      {
+        auto dim = param.dim();
+        auto gradients = param.gradients()->v;
+        fprintf(stderr, "Parameter's gradients :\n");
+        int nbRows = dim.rows();
+        int nbCols = dim.cols();
+
+        for (int i = 0; i < nbRows; i++)
+          for (int j = 0; j < nbCols; j++)
+            fprintf(stderr, "%8.5f%s", gradients[i*nbRows + j], j == nbCols-1 ? "\n" : " ");
+      }
+  }
+}
+
 dynet::Expression MLPBase::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds)
 {
   std::vector<dynet::Expression> lossExpr;