From 4d4c084882d21f87cf69c681b63ff4dae2f4f5f3 Mon Sep 17 00:00:00 2001
From: Charly Lamothe <charly.lamothe@univ-amu.fr>
Date: Fri, 13 Mar 2020 12:33:14 +0100
Subject: [PATCH] Merge from master

---
 code/bolsonaro/models/kmeans_forest_regressor.py | 6 +++---
 code/train.py                                    | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/code/bolsonaro/models/kmeans_forest_regressor.py b/code/bolsonaro/models/kmeans_forest_regressor.py
index ad1de68..d0d6412 100644
--- a/code/bolsonaro/models/kmeans_forest_regressor.py
+++ b/code/bolsonaro/models/kmeans_forest_regressor.py
@@ -19,7 +19,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
     def __init__(self, models_parameters, score_metric=mean_squared_error):
         self._models_parameters = models_parameters
         self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters,
-            random_state=self._models_parameters.seed, n_jobs=-1)
+            random_state=self._models_parameters.seed, n_jobs=2)
         self._extracted_forest_size = self._models_parameters.extracted_forest_size
         self._score_metric = score_metric
         self._selected_trees = list()
@@ -46,7 +46,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
         # For each cluster select the best tree on the validation set
         extracted_forest_sizes = list(range(self._extracted_forest_size))
         with tqdm_joblib(tqdm(total=self._extracted_forest_size, disable=True)) as prune_forest_job_pb:
-            pruned_forest = Parallel(n_jobs=-1)(delayed(self._prune_forest_job)(prune_forest_job_pb,
+            pruned_forest = Parallel(n_jobs=2)(delayed(self._prune_forest_job)(prune_forest_job_pb,
                 extracted_forest_sizes[i], labels, X_val, y_val, self._score_metric)
                 for i in range(self._extracted_forest_size))
 
@@ -56,7 +56,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
     def _prune_forest_job(self, prune_forest_job_pb, c, labels, X_val, y_val, score_metric):
         index = np.where(labels == c)[0]
         with tqdm_joblib(tqdm(total=len(index), disable=True)) as cluster_job_pb:
-            cluster = Parallel(n_jobs=-1)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val, 
+            cluster = Parallel(n_jobs=2)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val, 
                 y_val, score_metric) for i in range(len(index)))
         best_tree_index = np.argmax(cluster)
         prune_forest_job_pb.update()
diff --git a/code/train.py b/code/train.py
index ff0e9e0..95498cd 100644
--- a/code/train.py
+++ b/code/train.py
@@ -283,6 +283,8 @@ if __name__ == "__main__":
         parameters['extracted_forest_size_samples'] + 1,
         endpoint=True)[1:]).astype(np.int)).tolist()
 
+    logger.info(f"extracted forest sizes: {parameters['extracted_forest_size']}")
+
     if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
         logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')    
 
-- 
GitLab