From 2996bbfd4b8759283e5b93b18189cfcdf38790ca Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Mon, 27 Apr 2020 15:25:55 +0200
Subject: [PATCH] added safety check on number of examples extracted to avoid
 disk filling

---
 trainer/include/Trainer.hpp | 2 ++
 trainer/src/Trainer.cpp     | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/trainer/include/Trainer.hpp b/trainer/include/Trainer.hpp
index c087469..c2099fd 100644
--- a/trainer/include/Trainer.hpp
+++ b/trainer/include/Trainer.hpp
@@ -9,6 +9,8 @@ class Trainer
 {
   private :
 
+  static constexpr std::size_t safetyNbExamplesMax = 10*1000*1000;
+
   struct Examples
   {
     std::vector<torch::Tensor> contexts;
diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp
index c19ca2c..5306a2c 100644
--- a/trainer/src/Trainer.cpp
+++ b/trainer/src/Trainer.cpp
@@ -122,6 +122,8 @@ void Trainer::extractExamples(SubConfig & config, bool debug, std::filesystem::p
     int goldIndex = machine.getTransitionSet().getTransitionIndex(goldTransition);
 
     totalNbExamples += context.size();
+    if (totalNbExamples >= (int)safetyNbExamplesMax)
+      util::myThrow(fmt::format("Trying to extract more examples than the limit ({})", util::int2HumanStr(safetyNbExamplesMax)));
 
     examplesPerState[config.getState()].addContext(context);
     examplesPerState[config.getState()].addClass(goldIndex);
-- 
GitLab