Skip to content
Snippets Groups Projects
Commit d7865314 authored by Luc Giffon's avatar Luc Giffon
Browse files

now the model can make predictions: todo: manage result recording

parent b62b7df7
No related branches found
No related tags found
2 merge requests!3clean scripts,!2Luc manage normalization
......@@ -34,6 +34,9 @@ class OmpForestRegressor(BaseEstimator):
def models_parameters(self):
return self._models_parameters
def score_regressor(self, X, y):
return self._regressor.score(X, y)
def _train_forest(self, X_train, y_train):
self._regressor.fit(X_train, y_train)
forest = self._regressor.estimators_
......@@ -51,24 +54,57 @@ class OmpForestRegressor(BaseEstimator):
:return:
"""
self._logger.debug("Forest make prediction on X_train")
D = np.array([tree.predict(X_train) for tree in self._forest]).T
D = self._forest_prediction(X_train)
if self._models_parameters.normalize:
self._logger.debug("Compute norm of predicted vectors on X_train")
self._forest_norms = np.linalg.norm(D, axis=0)
D /= self._forest_norms
omp = OrthogonalMatchingPursuit(
n_nonzero_coefs=self._models_parameters.extracted_forest_size,
fit_intercept=False, normalize=False)
self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
.format(self._models_parameters.extracted_forest_size))
omp.fit(D, y_train)
weights = omp.coef_ # why not to use directly the omp estimator and bypass it using the coefs?
weights = omp.coef_
# question: why not to use directly the omp estimator instead of bypassing it using the coefs?
return weights
def predict(self):
raise NotImplementedError("TODO: implement predict function")
# todo don't forget to deal with the normalize parameter
# should the norm used on train or the new norms be used for normalization?
def _forest_prediction(self, X):
return np.array([tree.predict(X) for tree in self._forest]).T
def predict(self, X):
"""
Apply the OMPForestRegressor to X.
:param X:
:return:
"""
D = self._forest_prediction(X)
if self._models_parameters.normalize:
D /= self._forest_norms
predictions = D @ self.weights
return predictions
def score(self, X, y, metric="mse"):
"""
Evaluate OMPForestRegressor on (`X`, `y`) using `metric`
:param X:
:param y:
:param metric:
:return:
"""
predictions = self.predict(X)
if metric == "mse":
evaluation = np.mean(np.square(predictions - y))
else:
raise ValueError("Metric value {} is not known.")
return evaluation
\ No newline at end of file
......@@ -17,6 +17,7 @@ class Trainer(object):
# why is this function named iterate?
self._logger.info('Training model using train set...')
begin_time = time.time()
# todo: OMP may be running with X_dev ou Y_dev
model.fit(self._dataset.X_train, self._dataset.y_train)
end_time = time.time()
......
......@@ -20,7 +20,7 @@ if __name__ == "__main__":
load_dotenv(find_dotenv())
default_dataset_name = 'boston'
default_normalize = False
default_normalize = True
default_forest_size = 100
default_extracted_forest_size = 10
# the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_size/{nb_extracted_trees}
......@@ -100,3 +100,6 @@ if __name__ == "__main__":
model = ModelFactory.build(dataset.task, model_parameters)
trainer.iterate(model, sub_models_dir)
print(model.score(dataset.X_test, dataset.y_test))
print(model.score_regressor(dataset.X_test, dataset.y_test))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment