From 369a053aaabd12d05d194dfbdacb5b0cedfb2afd Mon Sep 17 00:00:00 2001
From: Luc Giffon <luc.giffon@lis-lab.fr>
Date: Sun, 29 Mar 2020 17:35:52 +0200
Subject: [PATCH] solve bug solution too fast

---
 code/bolsonaro/models/nn_omp.py                  | 8 ++++++++
 code/bolsonaro/models/nn_omp_forest_regressor.py | 1 +
 2 files changed, 9 insertions(+)

diff --git a/code/bolsonaro/models/nn_omp.py b/code/bolsonaro/models/nn_omp.py
index aeb3868..af8a11a 100644
--- a/code/bolsonaro/models/nn_omp.py
+++ b/code/bolsonaro/models/nn_omp.py
@@ -72,6 +72,9 @@ class NonNegativeOrthogonalMatchingPursuit:
             tmp_T = T[:, bool_arr_selected_indexes]
             sol = nnls(tmp_T, y)[0]  # non negative least square
             residual = y - tmp_T @ sol
+            int_used_atoms = np.sum(sol.astype(bool))
+            if  int_used_atoms != i+1:
+                self._logger.warning("Atom found but not used. {} < {}".format(int_used_atoms, i+1))
 
             if i + 1 == next_solution:
                 final_vec = np.zeros(T.shape[1])
@@ -81,6 +84,11 @@ class NonNegativeOrthogonalMatchingPursuit:
 
             i += 1
 
+        if len(lst_intermediate_solutions) == 0 and np.isclose(np.linalg.norm(residual), 0):
+            final_vec = np.zeros(T.shape[1])
+            final_vec[bool_arr_selected_indexes] = sol  # solution is full of zero but on selected indices
+            lst_intermediate_solutions.append(final_vec)
+
         nb_missing_solutions = len(self.requested_intermediate_solutions_sizes) - len(lst_intermediate_solutions)
 
         if nb_missing_solutions > 0:
diff --git a/code/bolsonaro/models/nn_omp_forest_regressor.py b/code/bolsonaro/models/nn_omp_forest_regressor.py
index c66742d..067401d 100644
--- a/code/bolsonaro/models/nn_omp_forest_regressor.py
+++ b/code/bolsonaro/models/nn_omp_forest_regressor.py
@@ -115,6 +115,7 @@ if __name__ == "__main__":
     print("Score full forest on test", nn_ompforest.score_base_estimator(X_test, y_test))
     print("Size full forest", nnmodel_params.hyperparameters["n_estimators"])
     print("Size extracted forests", intermediate_solutions)
+    print("Actual size extracted forest", [np.sum(coef.astype(bool)) for coef in nn_ompforest._omp.get_coef()])
     print("Score non negative omp on train", nn_ompforest.score(X_train, y_train))
     print("Score non negative omp on test", nn_ompforest.score(X_test, y_test))
     print("Score omp on train", omp_forest.score(X_train, y_train))
-- 
GitLab