From 43456222fd2db3ad509986255e3751abdb5fd15d Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Thu, 20 Feb 2020 13:53:37 +0100
Subject: [PATCH] added tracebacks tracking in HPS

---
 .../utils/hyper_parameter_search.py           | 62 ++++++++++++-------
 1 file changed, 39 insertions(+), 23 deletions(-)

diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py
index df22a183..7d6b2ca4 100644
--- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py
+++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py
@@ -3,6 +3,7 @@ import sys
 
 import matplotlib.pyplot as plt
 import numpy as np
+import traceback
 from scipy.stats import randint, uniform
 from sklearn.model_selection import RandomizedSearchCV
 
@@ -181,7 +182,8 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV):
         elif self.framework == "multiview":
             return self.fit_multiview(X, y=y, groups=groups,**fit_params)
 
-    def fit_multiview(self, X, y=None, groups=None, **fit_params):
+    def fit_multiview(self, X, y=None, groups=None, track_tracebacks=True,
+                      **fit_params):
         n_splits = self.cv.get_n_splits(self.available_indices, y[self.available_indices])
         folds = list(self.cv.split(self.available_indices, y[self.available_indices]))
         if self.equivalent_draws:
@@ -195,30 +197,44 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV):
         results = {}
         self.cv_results_ = dict(("param_"+param_name, []) for param_name in candidate_params[0].keys())
         self.cv_results_["mean_test_score"] = []
+        n_failed = 0
+        tracebacks = []
         for candidate_param_idx, candidate_param in enumerate(candidate_params):
             test_scores = np.zeros(n_splits)+1000
-            for fold_idx, (train_indices, test_indices) in enumerate(folds):
-                current_estimator = clone(base_estimator)
-                current_estimator.set_params(**candidate_param)
-                current_estimator.fit(X, y,
-                                      train_indices=self.available_indices[train_indices],
-                                      view_indices=self.view_indices)
-                test_prediction = current_estimator.predict(
-                    X,
-                    self.available_indices[test_indices],
-                    view_indices=self.view_indices)
-                test_score = self.scoring._score_func(y[self.available_indices[test_indices]],
-                                                      test_prediction,
-                                                     **self.scoring._kwargs)
-                test_scores[fold_idx] = test_score
-            for param_name, param in candidate_param.items():
-                self.cv_results_["param_"+param_name].append(param)
-            cross_validation_score = np.mean(test_scores)
-            self.cv_results_["mean_test_score"].append(cross_validation_score)
-            results[candidate_param_idx] = cross_validation_score
-            if cross_validation_score <= min(results.values()):
-                self.best_params_ = candidate_params[candidate_param_idx]
-                self.best_score_ = cross_validation_score
+            try:
+                for fold_idx, (train_indices, test_indices) in enumerate(folds):
+                    current_estimator = clone(base_estimator)
+                    current_estimator.set_params(**candidate_param)
+                    current_estimator.fit(X, y,
+                                          train_indices=self.available_indices[train_indices],
+                                          view_indices=self.view_indices)
+                    test_prediction = current_estimator.predict(
+                        X,
+                        self.available_indices[test_indices],
+                        view_indices=self.view_indices)
+                    test_score = self.scoring._score_func(y[self.available_indices[test_indices]],
+                                                          test_prediction,
+                                                         **self.scoring._kwargs)
+                    test_scores[fold_idx] = test_score
+                for param_name, param in candidate_param.items():
+                    self.cv_results_["param_"+param_name].append(param)
+                cross_validation_score = np.mean(test_scores)
+                self.cv_results_["mean_test_score"].append(cross_validation_score)
+                results[candidate_param_idx] = cross_validation_score
+                if cross_validation_score <= min(results.values()):
+                    self.best_params_ = candidate_params[candidate_param_idx]
+                    self.best_score_ = cross_validation_score
+            except:
+                if track_tracebacks:
+                    n_failed += 1
+                    tracebacks.append(traceback.format_exc())
+                else:
+                    raise
+        if n_failed == self.n_iter:
+            raise ValueError(
+                'No fits were performed. All HP combination returned errors \n\n' + '\n'.join(
+                    tracebacks))
+
         if self.refit:
             self.best_estimator_ = clone(base_estimator).set_params(**self.best_params_)
             self.best_estimator_.fit(X, y, **fit_params)
-- 
GitLab