Skip to content
Snippets Groups Projects

WIP: Resolve "coherence des arbres de predictions"

Open Charly Lamothe requested to merge 20-coherence-des-arbres-de-predictions into master
1 file
+ 4
6
Compare changes
  • Side-by-side
  • Inline
+ 28
2
@@ -77,7 +77,7 @@ class Trainer(object):
else:
raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
def train(self, model, extracted_forest_size=None):
def train(self, model, extracted_forest_size=None, seed=None):
"""
:param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor,
OmpForestBinaryClassifier, OmpForestMulticlassClassifier.
@@ -88,6 +88,7 @@ class Trainer(object):
if type(model) in [RandomForestRegressor, RandomForestClassifier]:
if extracted_forest_size is not None:
estimators_index = np.arange(len(model.estimators_))
np.random.seed(seed)
np.random.shuffle(estimators_index)
choosen_estimators = estimators_index[:extracted_forest_size]
model.estimators_ = np.array(model.estimators_)[choosen_estimators]
@@ -141,6 +142,30 @@ class Trainer(object):
result = self._base_regression_score_metric(y_true, y_pred)
return result
def compute_preds_coherence(self, model, X):
from sklearn.preprocessing import normalize
import itertools
if type(model) in [OmpForestRegressor, SimilarityForestRegressor, KMeansForestRegressor, EnsembleSelectionForestRegressor,
OmpForestBinaryClassifier, OmpForestMulticlassClassifier]:
estimators = model.forest
estimators = np.asarray(estimators)[model._omp.coef_ != 0]
elif type(model) in [RandomForestRegressor, RandomForestClassifier]:
estimators = model.estimators_
predictions = np.array([tree.predict(X) for tree in estimators])
predictions = normalize(predictions)
"""similarities = list()
for ti, tj in list(itertools.combinations(predictions, 2)):
similarities.append(np.abs(ti @ tj))
coherence = np.max(np.asarray(similarities))"""
coherence = np.max(np.abs((predictions @ predictions.T - np.eye(len(predictions)))))
return coherence
def compute_results(self, model, models_dir):
"""
:param model: Object with
@@ -173,7 +198,8 @@ class Trainer(object):
dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev),
test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test),
score_metric=self._score_metric_name,
base_score_metric=self._base_score_metric_name
base_score_metric=self._base_score_metric_name,
coherence=self.compute_preds_coherence(model, self._dataset.X_train)
)
results.save(models_dir)
self._logger.info("Base performance on test: {}".format(results.test_score_base))
Loading