Skip to content
Snippets Groups Projects

Resolve "integration-sota"

Merged Charly Lamothe requested to merge 15-integration-sota into master
2 files
+ 82
71
Compare changes
  • Side-by-side
  • Inline
Files
2
from bolsonaro.utils import tqdm_joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator
from abc import abstractmethod, ABCMeta
import numpy as np
from joblib import Parallel, delayed
from tqdm import tqdm
@@ -34,35 +31,40 @@ class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta):
selected_trees = list()
tree_list = list(self._estimator.estimators_)
for _ in range(self._extracted_forest_size):
best_similarity = 100000
found_index = 0
for i in range(len(tree_list)):
lonely_tree = tree_list[i]
del tree_list[i]
begin_time = time.time()
with tqdm_joblib(tqdm(total=len(tree_list), disable=True)) as job_pb:
val_list = Parallel(n_jobs=-1)(delayed(self._tree_predict_job)(
job_pb, tree_list[i], X_val)
for i in range(len(tree_list)))
val_list = np.array(val_list)
val_mean = np.mean(val_list, axis=0)
val_score = self._score_metric(val_mean, y_val)
temp_similarity = abs(forest_pred - val_score)
if (temp_similarity < best_similarity):
found_index = i
best_similarity = temp_similarity
tree_list.insert(i, lonely_tree)
selected_trees.append(tree_list[found_index])
del tree_list[found_index]
val_scores = list()
with tqdm(tree_list) as tree_pred_bar:
tree_pred_bar.set_description('[Initial tree predictions]')
for tree in tree_pred_bar:
val_scores.append(tree.predict(X_val))
tree_pred_bar.update(1)
with tqdm(range(self._extracted_forest_size), disable=False) as pruning_forest_bar:
pruning_forest_bar.set_description(f'[Pruning forest s={self._extracted_forest_size}]')
for i in pruning_forest_bar:
best_similarity = 100000
found_index = 0
with tqdm(range(len(tree_list)), disable=False) as tree_list_bar:
tree_list_bar.set_description(f'[Tree selection s={self._extracted_forest_size} #{i}]')
for j in tree_list_bar:
lonely_tree = tree_list[j]
del tree_list[j]
val_mean = np.mean(np.asarray(val_scores), axis=0)
val_score = self._score_metric(val_mean, y_val)
temp_similarity = abs(forest_pred - val_score)
if (temp_similarity < best_similarity):
found_index = j
best_similarity = temp_similarity
tree_list.insert(j, lonely_tree)
val_scores.insert(j, lonely_tree.predict(X_val))
tree_list_bar.update(1)
selected_trees.append(tree_list[found_index])
del tree_list[found_index]
del val_scores[found_index]
pruning_forest_bar.update(1)
pruned_forest = list(set(forest) - set(selected_trees))
self._estimator.estimators_ = pruned_forest
def _tree_predict_job(self, job_pb, tree, X_val):
val_pred = tree.predict(X_val)
return val_pred
def score(self, X, y):
test_list = list()
for mod in self._estimator.estimators_:
Loading