Luc Giffon · Charly Lamothe
--- a/code/bolsonaro/models/omp_forest_regressor.py

+ 43

− 7
+++ b/code/bolsonaro/models/omp_forest_regressor.py

+ 43

− 7
 @@ -34,6 +34,9 @@ class OmpForestRegressor(BaseEstimator):
    def models_parameters(self):
        return self._models_parameters

+    def score_regressor(self, X, y):
+        return self._regressor.score(X, y)
+
    def _train_forest(self, X_train, y_train):
        self._regressor.fit(X_train, y_train)
        forest = self._regressor.estimators_
 @@ -51,24 +54,57 @@ class OmpForestRegressor(BaseEstimator):
        :return:
        """
        self._logger.debug("Forest make prediction on X_train")
-        D = np.array([tree.predict(X_train) for tree in self._forest]).T
+        D = self._forest_prediction(X_train)

        if self._models_parameters.normalize:
            self._logger.debug("Compute norm of predicted vectors on X_train")
            self._forest_norms = np.linalg.norm(D, axis=0)
            D /= self._forest_norms

-
        omp = OrthogonalMatchingPursuit(
            n_nonzero_coefs=self._models_parameters.extracted_forest_size,
            fit_intercept=False, normalize=False)
        self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
                           .format(self._models_parameters.extracted_forest_size))
        omp.fit(D, y_train)
-        weights = omp.coef_  # why not to use directly the omp estimator and bypass it using the coefs?
+        weights = omp.coef_
+        # question: why not to use directly the omp estimator instead of bypassing it using the coefs?
        return weights

-    def predict(self):
-        raise NotImplementedError("TODO: implement predict function")
-        # todo don't forget to deal with the normalize parameter
-        # should the norm used on train or the new norms be used for normalization?
+    def _forest_prediction(self, X):
+        return np.array([tree.predict(X) for tree in self._forest]).T
+
+    def predict(self, X):
+        """
+        Apply the OMPForestRegressor to X.
+
+        :param X:
+        :return:
+        """
+        D = self._forest_prediction(X)
+
+        if self._models_parameters.normalize:
+            D /= self._forest_norms
+
+        predictions = D @ self.weights
+
+        return predictions
+
+
+    def score(self, X, y, metric="mse"):
+        """
+        Evaluate OMPForestRegressor on (`X`, `y`) using `metric`
+
+        :param X:
+        :param y:
+        :param metric:
+        :return:
+        """
+        predictions = self.predict(X)
+
+        if metric == "mse":
+            evaluation = np.mean(np.square(predictions - y))
+        else:
+            raise ValueError("Metric value {} is not known.")
+
+        return evaluation
+\ No newline at end of file