From 95f543a14563816200eb438e4492a893f8dec63e Mon Sep 17 00:00:00 2001
From: Charly Lamothe <charly.lamothe@univ-amu.fr>
Date: Wed, 25 Mar 2020 15:20:18 +0100
Subject: [PATCH] Fix omp_wo_weights when coupled with normalize_D. Fix missing
 selected trees saving for omp and random. Update compute results (not done
 yet). Fix omp_distillation.

---
 code/bolsonaro/models/omp_forest.py            |  7 +------
 code/bolsonaro/models/omp_forest_classifier.py |  7 ++-----
 code/bolsonaro/trainer.py                      | 15 +++++----------
 code/compute_results.py                        | 13 ++++++++-----
 4 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/code/bolsonaro/models/omp_forest.py b/code/bolsonaro/models/omp_forest.py
index e4830f0..5918eea 100644
--- a/code/bolsonaro/models/omp_forest.py
+++ b/code/bolsonaro/models/omp_forest.py
@@ -41,7 +41,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
         # print(set([type(y) for y in y_forest]))
         self._base_forest_estimator.fit(X_forest, y_forest)
         self._extract_subforest(X_omp,
-            self.predict_base_estimator(X_forest) if use_distillation else y_omp) # type: OrthogonalMatchingPursuit
+            self.predict_base_estimator(X_omp) if use_distillation else y_omp) # type: OrthogonalMatchingPursuit
         return self
 
     def _extract_subforest(self, X, y):
@@ -153,11 +153,6 @@ class SingleOmpForest(OmpForest):
         """
         forest_predictions = np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_])
 
-        if self._models_parameters.normalize_D:
-            forest_predictions = forest_predictions.T
-            forest_predictions /= self._forest_norms
-            forest_predictions = forest_predictions.T
-
         weights = self._omp.coef_
         select_trees = np.mean(forest_predictions[weights != 0], axis=0)
         return select_trees
diff --git a/code/bolsonaro/models/omp_forest_classifier.py b/code/bolsonaro/models/omp_forest_classifier.py
index 4255eeb..002dd93 100644
--- a/code/bolsonaro/models/omp_forest_classifier.py
+++ b/code/bolsonaro/models/omp_forest_classifier.py
@@ -19,11 +19,11 @@ class OmpForestBinaryClassifier(SingleOmpForest):
     def _check_classes(self, y):
         assert len(set(y).difference({-1, 1})) == 0, "Classes for binary classifier must be {-1, +1}"
 
-    def fit(self, X_forest, y_forest, X_omp, y_omp):
+    def fit(self, X_forest, y_forest, X_omp, y_omp, use_distillation=False):
         self._check_classes(y_forest)
         self._check_classes(y_omp)
 
-        return super().fit(X_forest, y_forest, X_omp, y_omp)
+        return super().fit(X_forest, y_forest, X_omp, y_omp, use_distillation=use_distillation)
 
     def _base_estimator_predictions(self, X):
         predictions_0_1 = super()._base_estimator_predictions(X)
@@ -42,9 +42,6 @@ class OmpForestBinaryClassifier(SingleOmpForest):
 
         forest_predictions = self._base_estimator_predictions(X)
 
-        if self._models_parameters.normalize_D:
-            forest_predictions /= self._forest_norms
-
         weights = self._omp.coef_
         omp_trees_predictions = forest_predictions[:, weights != 0]
 
diff --git a/code/bolsonaro/trainer.py b/code/bolsonaro/trainer.py
index 0986200..7761c53 100644
--- a/code/bolsonaro/trainer.py
+++ b/code/bolsonaro/trainer.py
@@ -155,16 +155,7 @@ class Trainer(object):
         return result
 
     def _evaluate_predictions(self, model, X, aggregation_function):
-        if type(model) in [OmpForestRegressor, OmpForestBinaryClassifier, OmpForestMulticlassClassifier]:
-            estimators = model.forest
-            estimators = np.asarray(estimators)[model._omp.coef_ != 0]
-        elif type(model) in [SimilarityForestRegressor, KMeansForestRegressor, EnsembleSelectionForestRegressor, 
-            SimilarityForestClassifier, KMeansForestClassifier, EnsembleSelectionForestClassifier]:
-            estimators = model.selected_trees
-        elif type(model) in [RandomForestRegressor, RandomForestClassifier]:
-            estimators = model.estimators_
-
-        predictions = np.array([tree.predict(X) for tree in estimators])
+        predictions = np.array([tree.predict(X) for tree in self._selected_trees])
 
         predictions = normalize(predictions)
 
@@ -187,6 +178,10 @@ class Trainer(object):
         if type(model) in [SimilarityForestRegressor, KMeansForestRegressor, EnsembleSelectionForestRegressor, 
             SimilarityForestClassifier, KMeansForestClassifier, EnsembleSelectionForestClassifier]:
             self._selected_trees = model.selected_trees
+        elif type(model) in [OmpForestRegressor, OmpForestMulticlassClassifier, OmpForestBinaryClassifier]:
+            self._selected_trees = np.asarray(model.forest)[model._omp.coef_ != 0]
+        elif type(model) in [RandomForestRegressor, RandomForestClassifier]:
+            self._selected_trees = model.estimators_
 
         if len(self._selected_trees) > 0:
             with open(os.path.join(models_dir, 'selected_trees.pickle'), 'wb') as output_file:
diff --git a/code/compute_results.py b/code/compute_results.py
index ab90b85..28b08ac 100644
--- a/code/compute_results.py
+++ b/code/compute_results.py
@@ -473,9 +473,10 @@ if __name__ == "__main__":
             30 + 1,
             endpoint=True)[1:]).astype(np.int)).tolist()"""
 
-        extracted_forest_sizes = [4, 7, 11, 14, 18, 22, 25, 29, 32, 36, 40, 43, 47, 50, 54, 58, 61, 65, 68, 72, 76, 79, 83, 86, 90, 94, 97, 101, 104, 108]
+        #extracted_forest_sizes = [4, 7, 11, 14, 18, 22, 25, 29, 32, 36, 40, 43, 47, 50, 54, 58, 61, 65, 68, 72, 76, 79, 83, 86, 90, 94, 97, 101, 104, 108]
 
-        extracted_forest_sizes = [str(forest_size) for forest_size in extracted_forest_sizes]
+        #extracted_forest_sizes = [str(forest_size) for forest_size in extracted_forest_sizes]
+        extracted_forest_sizes= list()
 
         # base_with_params
         logger.info('Loading base_with_params experiment scores...')
@@ -508,8 +509,10 @@ if __name__ == "__main__":
         for i in range(3, len(args.experiment_ids)):
             if 'kmeans' in args.experiment_ids[i]:
                 label = 'kmeans'
-            elif 'similarity' in args.experiment_ids[i]:
-                label = 'similarity'
+            elif 'similarity_similarities' in args.experiment_ids[i]:
+                label = 'similarity_similarities'
+            elif 'similarity_predictions' in args.experiment_ids[i]:
+                label = 'similarity_predictions'
             elif 'ensemble' in args.experiment_ids[i]:
                 label = 'ensemble'
             else:
@@ -528,7 +531,7 @@ if __name__ == "__main__":
         pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
 
         Plotter.plot_stage2_losses(
-            file_path=output_path + os.sep + f"losses_{'-'.join(all_labels)}_test.png",
+            file_path=output_path + os.sep + f"losses_{'-'.join(all_labels)}_test_train,dev.png",
             all_experiment_scores=all_scores,
             all_labels=all_labels,
             x_value=with_params_extracted_forest_sizes,
-- 
GitLab