Luc Giffon
--- a/code/bolsonaro/models/omp_forest_regressor.py

+ 13

− 3
+++ b/code/bolsonaro/models/omp_forest_regressor.py

+ 13

− 3
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.linear_model import OrthogonalMatchingPursuit
 from sklearn.base import BaseEstimator
-
+import numpy as np

 class OmpForestRegressor(BaseEstimator):

 @@ -35,10 +35,20 @@ class OmpForestRegressor(BaseEstimator):
        return forest
    
    def _extract_subforest(self, X_train, y_train):
-        D = [[tree.predict([elem])[0] for tree in self._forest] for elem in X_train]
+        """
+        Given an already estimated regressor: apply OMP to get the weight of each tree.
+
+        The X_train data is used for interrogation of every tree in the forest. The y_train data
+        is used for finding the weights in OMP.
+
+        :param X_train: (n_sample, n_features) array
+        :param y_train: (n_sample,) array
+        :return:
+        """
+        D = np.array([tree.predict(X_train) for tree in self._forest]).T
        omp = OrthogonalMatchingPursuit(
            n_nonzero_coefs=self._models_parameters.extracted_forest_size,
            fit_intercept=False, normalize=False)
        omp.fit(D, y_train)
-        weights = omp.coef_
+        weights = omp.coef_  # why not to use directly the omp estimator and bypass it using the coefs?
        return weights