Skip to content
Snippets Groups Projects
Commit b62b7df7 authored by Luc Giffon's avatar Luc Giffon
Browse files

support for normalize parameter + optimisation on train (wtf was that for loop)

parent c9dff280
No related branches found
No related tags found
2 merge requests!3clean scripts,!2Luc manage normalization
...@@ -4,10 +4,11 @@ import os ...@@ -4,10 +4,11 @@ import os
class ModelParameters(object): class ModelParameters(object):
def __init__(self, forest_size, extracted_forest_size, seed=None): def __init__(self, forest_size, extracted_forest_size, normalize, seed=None):
self._forest_size = forest_size self._forest_size = forest_size
self._extracted_forest_size = extracted_forest_size self._extracted_forest_size = extracted_forest_size
self._seed = seed self._seed = seed
self._normalize = normalize
@property @property
def forest_size(self): def forest_size(self):
...@@ -21,12 +22,17 @@ class ModelParameters(object): ...@@ -21,12 +22,17 @@ class ModelParameters(object):
def seed(self): def seed(self):
return self._seed return self._seed
@property
def normalize(self):
return self._normalize
def save(self, directory_path, experiment_id): def save(self, directory_path, experiment_id):
with open(directory_path + os.sep + 'model_parameters_{}.json'.format(experiment_id), 'w') as output_file: with open(directory_path + os.sep + 'model_parameters_{}.json'.format(experiment_id), 'w') as output_file:
json.dump({ json.dump({
'forest_size': self._forest_size, 'forest_size': self._forest_size,
'extracted_forest_size': self._extracted_forest_size, 'extracted_forest_size': self._extracted_forest_size,
'seed': self._seed 'seed': self._seed,
'normalize': self._normalize
}, },
output_file, output_file,
indent=4) indent=4)
...@@ -3,12 +3,17 @@ from sklearn.linear_model import OrthogonalMatchingPursuit ...@@ -3,12 +3,17 @@ from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
import numpy as np import numpy as np
from bolsonaro import LOG_PATH
from bolsonaro.error_handling.logger_factory import LoggerFactory
class OmpForestRegressor(BaseEstimator): class OmpForestRegressor(BaseEstimator):
def __init__(self, models_parameters): def __init__(self, models_parameters):
self._regressor = RandomForestRegressor(n_estimators=models_parameters.forest_size, self._regressor = RandomForestRegressor(n_estimators=models_parameters.forest_size,
random_state=models_parameters.seed) random_state=models_parameters.seed)
self._models_parameters = models_parameters self._models_parameters = models_parameters
self._logger = LoggerFactory.create(LOG_PATH, __name__)
def fit(self, X_train, y_train): def fit(self, X_train, y_train):
self._forest = self._train_forest(X_train, y_train) self._forest = self._train_forest(X_train, y_train)
...@@ -45,10 +50,25 @@ class OmpForestRegressor(BaseEstimator): ...@@ -45,10 +50,25 @@ class OmpForestRegressor(BaseEstimator):
:param y_train: (n_sample,) array :param y_train: (n_sample,) array
:return: :return:
""" """
self._logger.debug("Forest make prediction on X_train")
D = np.array([tree.predict(X_train) for tree in self._forest]).T D = np.array([tree.predict(X_train) for tree in self._forest]).T
if self._models_parameters.normalize:
self._logger.debug("Compute norm of predicted vectors on X_train")
self._forest_norms = np.linalg.norm(D, axis=0)
D /= self._forest_norms
omp = OrthogonalMatchingPursuit( omp = OrthogonalMatchingPursuit(
n_nonzero_coefs=self._models_parameters.extracted_forest_size, n_nonzero_coefs=self._models_parameters.extracted_forest_size,
fit_intercept=False, normalize=False) fit_intercept=False, normalize=False)
self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
.format(self._models_parameters.extracted_forest_size))
omp.fit(D, y_train) omp.fit(D, y_train)
weights = omp.coef_ # why not to use directly the omp estimator and bypass it using the coefs? weights = omp.coef_ # why not to use directly the omp estimator and bypass it using the coefs?
return weights return weights
def predict(self):
raise NotImplementedError("TODO: implement predict function")
# todo don't forget to deal with the normalize parameter
# should the norm used on train or the new norms be used for normalization?
...@@ -92,7 +92,8 @@ if __name__ == "__main__": ...@@ -92,7 +92,8 @@ if __name__ == "__main__":
model_parameters = ModelParameters( model_parameters = ModelParameters(
forest_size=args.forest_size, forest_size=args.forest_size,
extracted_forest_size=extracted_forest_size, extracted_forest_size=extracted_forest_size,
seed=random_seed seed=random_seed,
normalize=args.normalize
) )
model_parameters.save(sub_models_dir, experiment_id) model_parameters.save(sub_models_dir, experiment_id)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment