Skip to content
Snippets Groups Projects
Commit b62b7df7 authored by Luc Giffon's avatar Luc Giffon
Browse files

support for normalize parameter + optimisation on train (wtf was that for loop)

parent c9dff280
No related branches found
No related tags found
2 merge requests!3clean scripts,!2Luc manage normalization
This commit is part of merge request !2. Comments created here will be created in the context of that merge request.
......@@ -4,10 +4,11 @@ import os
class ModelParameters(object):
def __init__(self, forest_size, extracted_forest_size, seed=None):
def __init__(self, forest_size, extracted_forest_size, normalize, seed=None):
self._forest_size = forest_size
self._extracted_forest_size = extracted_forest_size
self._seed = seed
self._normalize = normalize
@property
def forest_size(self):
......@@ -21,12 +22,17 @@ class ModelParameters(object):
def seed(self):
return self._seed
@property
def normalize(self):
return self._normalize
def save(self, directory_path, experiment_id):
with open(directory_path + os.sep + 'model_parameters_{}.json'.format(experiment_id), 'w') as output_file:
json.dump({
'forest_size': self._forest_size,
'extracted_forest_size': self._extracted_forest_size,
'seed': self._seed
'seed': self._seed,
'normalize': self._normalize
},
output_file,
indent=4)
......@@ -3,12 +3,17 @@ from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.base import BaseEstimator
import numpy as np
from bolsonaro import LOG_PATH
from bolsonaro.error_handling.logger_factory import LoggerFactory
class OmpForestRegressor(BaseEstimator):
def __init__(self, models_parameters):
self._regressor = RandomForestRegressor(n_estimators=models_parameters.forest_size,
random_state=models_parameters.seed)
self._models_parameters = models_parameters
self._logger = LoggerFactory.create(LOG_PATH, __name__)
def fit(self, X_train, y_train):
self._forest = self._train_forest(X_train, y_train)
......@@ -45,10 +50,25 @@ class OmpForestRegressor(BaseEstimator):
:param y_train: (n_sample,) array
:return:
"""
self._logger.debug("Forest make prediction on X_train")
D = np.array([tree.predict(X_train) for tree in self._forest]).T
if self._models_parameters.normalize:
self._logger.debug("Compute norm of predicted vectors on X_train")
self._forest_norms = np.linalg.norm(D, axis=0)
D /= self._forest_norms
omp = OrthogonalMatchingPursuit(
n_nonzero_coefs=self._models_parameters.extracted_forest_size,
fit_intercept=False, normalize=False)
self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
.format(self._models_parameters.extracted_forest_size))
omp.fit(D, y_train)
weights = omp.coef_ # why not to use directly the omp estimator and bypass it using the coefs?
return weights
def predict(self):
raise NotImplementedError("TODO: implement predict function")
# todo don't forget to deal with the normalize parameter
# should the norm used on train or the new norms be used for normalization?
......@@ -92,7 +92,8 @@ if __name__ == "__main__":
model_parameters = ModelParameters(
forest_size=args.forest_size,
extracted_forest_size=extracted_forest_size,
seed=random_seed
seed=random_seed,
normalize=args.normalize
)
model_parameters.save(sub_models_dir, experiment_id)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment