Skip to content
Snippets Groups Projects

Luc manage normalization

Merged Luc Giffon requested to merge luc_manage_normalization into wip_clean_scripts
3 files
+ 48
8
Compare changes
  • Side-by-side
  • Inline
Files
3
@@ -34,6 +34,9 @@ class OmpForestRegressor(BaseEstimator):
def models_parameters(self):
return self._models_parameters
def score_regressor(self, X, y):
return self._regressor.score(X, y)
def _train_forest(self, X_train, y_train):
self._regressor.fit(X_train, y_train)
forest = self._regressor.estimators_
@@ -51,24 +54,57 @@ class OmpForestRegressor(BaseEstimator):
:return:
"""
self._logger.debug("Forest make prediction on X_train")
D = np.array([tree.predict(X_train) for tree in self._forest]).T
D = self._forest_prediction(X_train)
if self._models_parameters.normalize:
self._logger.debug("Compute norm of predicted vectors on X_train")
self._forest_norms = np.linalg.norm(D, axis=0)
D /= self._forest_norms
omp = OrthogonalMatchingPursuit(
n_nonzero_coefs=self._models_parameters.extracted_forest_size,
fit_intercept=False, normalize=False)
self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
.format(self._models_parameters.extracted_forest_size))
omp.fit(D, y_train)
weights = omp.coef_ # why not to use directly the omp estimator and bypass it using the coefs?
weights = omp.coef_
# question: why not to use directly the omp estimator instead of bypassing it using the coefs?
return weights
def predict(self):
raise NotImplementedError("TODO: implement predict function")
# todo don't forget to deal with the normalize parameter
# should the norm used on train or the new norms be used for normalization?
def _forest_prediction(self, X):
return np.array([tree.predict(X) for tree in self._forest]).T
def predict(self, X):
"""
Apply the OMPForestRegressor to X.
:param X:
:return:
"""
D = self._forest_prediction(X)
if self._models_parameters.normalize:
D /= self._forest_norms
predictions = D @ self.weights
return predictions
def score(self, X, y, metric="mse"):
"""
Evaluate OMPForestRegressor on (`X`, `y`) using `metric`
:param X:
:param y:
:param metric:
:return:
"""
predictions = self.predict(X)
if metric == "mse":
evaluation = np.mean(np.square(predictions - y))
else:
raise ValueError("Metric value {} is not known.")
return evaluation
\ No newline at end of file
Loading