Skip to content
Snippets Groups Projects

Resolve "integration-sota"

Merged Charly Lamothe requested to merge 15-integration-sota into master
5 files
+ 148
74
Compare changes
  • Side-by-side
  • Inline
Files
5
import time
from bolsonaro.models.utils import score_metric_mse, score_metric_indicator, aggregation_classification, aggregation_regression
from bolsonaro.utils import tqdm_joblib
from sklearn.ensemble import RandomForestRegressor
@@ -53,72 +54,85 @@ class KmeansForest(BaseEstimator, metaclass=ABCMeta):
lst_pruned_forest.append(self._estimator.estimators_[index_trees_cluster[best_tree_index]])
self._selected_trees = lst_pruned_forest
self._estimator.estimators_ = lst_pruned_forest
# self._estimator.estimators_ = lst_pruned_forest
def score(self, X, y):
predictions = np.empty((len(self._estimator.estimators_), X.shape[0]))
for idx_tree, tree in enumerate(self._estimator.estimators_):
predictions[idx_tree, :] = tree.predict(X)
final_predictions = self._aggregate(predictions)
final_predictions = self.predict(X)
score = self._score_metric(final_predictions, y)[0]
return score
def predict(self, X):
return self._estimator.predict(X)
predictions = np.empty((len(self._selected_trees), X.shape[0]))
for idx_tree, tree in enumerate(self._selected_trees):
predictions[idx_tree, :] = tree.predict(X)
final_predictions = self._aggregate(predictions)
return final_predictions
def predict_base_estimator(self, X):
return self._estimator.predict(X)
def _get_best_tree_index(self, y_preds, y_true):
score = self._score_metric(y_preds, y_true)
best_tree_index = self._best(score) # get best scoring tree (the one with lowest mse)
return best_tree_index
@abstractmethod
def _score_metric(self, y_preds, y_true):
"""
get score of each predictors in y_preds
y_preds.shape == (nb_trees, nb_sample)
y_true.shape == (1, nb_sample)
:param y_preds:
:param y_true:
:return:
"""
pass
@staticmethod
@abstractmethod
def _get_best_tree_index(self, y_preds, y_true):
def _best(array):
"""
return index of best element in array
:param array:
:return:
"""
pass
@abstractmethod
def _aggregate(self, predictions):
"""
Aggregates votes of predictors in predictions
predictions shape: (nb_trees, nb_samples)
:param predictions:
:return:
"""
pass
class KMeansForestRegressor(KmeansForest, metaclass=ABCMeta):
def _aggregate(self, predictions):
return np.mean(predictions, axis=0)
return aggregation_regression(predictions)
def _score_metric(self, y_preds, y_true):
if len(y_true.shape) == 1:
y_true = y_true[np.newaxis, :]
if len(y_preds.shape) == 1:
y_preds = y_preds[np.newaxis, :]
assert y_preds.shape[1] == y_true.shape[1], "Number of examples to compare should be the same in y_preds and y_true"
return score_metric_mse(y_preds, y_true)
diff = y_preds - y_true
squared_diff = diff ** 2
mean_squared_diff = np.mean(squared_diff, axis=1)
return mean_squared_diff
@staticmethod
def _best(array):
return np.argmin(array)
def _get_best_tree_index(self, y_preds, y_true):
score = self._score_metric(y_preds, y_true)
best_tree_index = np.argmin(score) # get best scoring tree (the one with lowest mse)
return best_tree_index
class KMeansForestClassifier(KmeansForest, metaclass=ABCMeta):
def _aggregate(self, predictions):
return np.sign(np.sum(predictions, axis=0))
return aggregation_classification(predictions)
def _score_metric(self, y_preds, y_true):
if len(y_true.shape) == 1:
y_true = y_true[np.newaxis, :]
if len(y_preds.shape) == 1:
y_preds = y_preds[np.newaxis, :]
assert y_preds.shape[1] == y_true.shape[1], "Number of examples to compare should be the same in y_preds and y_true"
return score_metric_indicator(y_preds, y_true)
bool_arr_correct_predictions = y_preds == y_true
return np.average(bool_arr_correct_predictions, axis=1)
def _get_best_tree_index(self, y_preds, y_true):
score = self._score_metric(y_preds, y_true)
best_tree_index = np.argmax(score) # get best scoring tree (the one with lowest mse)
return best_tree_index
\ No newline at end of file
@staticmethod
def _best(array):
return np.argmax(array)
Loading