diff --git a/code/bolsonaro/models/kmeans_forest_regressor.py b/code/bolsonaro/models/kmeans_forest_regressor.py index ad1de681bb613fece320fdb12d770a3407d33662..d0d64120d1c391ae31d107d73ed22b1a2306e8c9 100644 --- a/code/bolsonaro/models/kmeans_forest_regressor.py +++ b/code/bolsonaro/models/kmeans_forest_regressor.py @@ -19,7 +19,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): def __init__(self, models_parameters, score_metric=mean_squared_error): self._models_parameters = models_parameters self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters, - random_state=self._models_parameters.seed, n_jobs=-1) + random_state=self._models_parameters.seed, n_jobs=2) self._extracted_forest_size = self._models_parameters.extracted_forest_size self._score_metric = score_metric self._selected_trees = list() @@ -46,7 +46,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): # For each cluster select the best tree on the validation set extracted_forest_sizes = list(range(self._extracted_forest_size)) with tqdm_joblib(tqdm(total=self._extracted_forest_size, disable=True)) as prune_forest_job_pb: - pruned_forest = Parallel(n_jobs=-1)(delayed(self._prune_forest_job)(prune_forest_job_pb, + pruned_forest = Parallel(n_jobs=2)(delayed(self._prune_forest_job)(prune_forest_job_pb, extracted_forest_sizes[i], labels, X_val, y_val, self._score_metric) for i in range(self._extracted_forest_size)) @@ -56,7 +56,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): def _prune_forest_job(self, prune_forest_job_pb, c, labels, X_val, y_val, score_metric): index = np.where(labels == c)[0] with tqdm_joblib(tqdm(total=len(index), disable=True)) as cluster_job_pb: - cluster = Parallel(n_jobs=-1)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val, + cluster = Parallel(n_jobs=2)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val, y_val, score_metric) for i in range(len(index))) best_tree_index = np.argmax(cluster) prune_forest_job_pb.update() diff --git a/code/train.py b/code/train.py index ff0e9e0d4bd99e41421d19eb102d060a8b6b625d..95498cdf03a894ca8c8cf91d6702acc6aef1a799 100644 --- a/code/train.py +++ b/code/train.py @@ -283,6 +283,8 @@ if __name__ == "__main__": parameters['extracted_forest_size_samples'] + 1, endpoint=True)[1:]).astype(np.int)).tolist() + logger.info(f"extracted forest sizes: {parameters['extracted_forest_size']}") + if parameters['seeds'] != None and parameters['random_seed_number'] > 1: logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')