Commit 2a265135 authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Fix hyperparams bugs in base and random. Fix extracted forest size used in...

Fix hyperparams bugs in base and random. Fix extracted forest size used in random. Factorize random fitting
parent 86c4cf10
......@@ -53,14 +53,14 @@ class EnsembleSelectionForestRegressor(BaseEstimator, metaclass=ABCMeta):
def score(self, X, y):
predictions = self._predict_base_estimator(X)
mean_predictions = np.mean(predictions, axis=0)
return self._score_metric(mean_predictions, y)
return self._score_metric(predictions, y)
def predict_base_estimator(self, X):
predictions = list()
for tree in self._ensemble_selected:
predictions.append(tree.predict(X))
return np.array(predictions)
mean_predictions = np.mean(np.array(predictions), axis=0)
return mean_predictions
@staticmethod
def generate_library(X_train, y_train, random_state=None):
......
......@@ -22,10 +22,10 @@ class ModelFactory(object):
if model_parameters.extraction_strategy == 'omp':
return OmpForestBinaryClassifier(model_parameters)
elif model_parameters.extraction_strategy == 'random':
return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
return RandomForestClassifier(**model_parameters.hyperparameters,
random_state=model_parameters.seed)
elif model_parameters.extraction_strategy == 'none':
return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
return RandomForestClassifier(**model_parameters.hyperparameters,
random_state=model_parameters.seed)
else:
raise ValueError('Invalid extraction strategy')
......@@ -33,7 +33,7 @@ class ModelFactory(object):
if model_parameters.extraction_strategy == 'omp':
return OmpForestRegressor(model_parameters)
elif model_parameters.extraction_strategy == 'random':
return RandomForestRegressor(n_estimators=model_parameters.extracted_forest_size,
return RandomForestRegressor(**model_parameters.hyperparameters,
random_state=model_parameters.seed)
elif model_parameters.extraction_strategy == 'similarity':
return SimilarityForestRegressor(model_parameters)
......@@ -42,7 +42,7 @@ class ModelFactory(object):
elif model_parameters.extraction_strategy == 'ensemble':
return EnsembleSelectionForestRegressor(model_parameters, library=library)
elif model_parameters.extraction_strategy == 'none':
return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'],
return RandomForestRegressor(**model_parameters.hyperparameters,
random_state=model_parameters.seed)
else:
raise ValueError('Invalid extraction strategy')
......@@ -50,10 +50,10 @@ class ModelFactory(object):
if model_parameters.extraction_strategy == 'omp':
return OmpForestMulticlassClassifier(model_parameters)
elif model_parameters.extraction_strategy == 'random':
return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
return RandomForestClassifier(**model_parameters.hyperparameters,
random_state=model_parameters.seed)
elif model_parameters.extraction_strategy == 'none':
return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
return RandomForestClassifier(**model_parameters.hyperparameters,
random_state=model_parameters.seed)
else:
raise ValueError('Invalid extraction strategy')
......@@ -145,5 +145,4 @@ class SingleOmpForest(OmpForest):
omp_trees_indices = np.nonzero(weights)[0]
select_trees = np.mean(forest_predictions[omp_trees_indices], axis=0)
print(len(omp_trees_indices))
return select_trees
......@@ -3,6 +3,7 @@ from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.models.kmeans_forest_regressor import KMeansForestRegressor
from bolsonaro.models.ensemble_selection_forest_regressor import EnsembleSelectionForestRegressor
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.data.task import Task
from . import LOG_PATH
......@@ -73,7 +74,7 @@ class Trainer(object):
else:
raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
def train(self, model):
def train(self, model, extracted_forest_size=None):
"""
:param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor,
OmpForestBinaryClassifier, OmpForestMulticlassClassifier.
......@@ -83,10 +84,13 @@ class Trainer(object):
self._logger.debug('Training model using train set...')
self._begin_time = time.time()
if type(model) in [RandomForestRegressor, RandomForestClassifier]:
model.fit(
X=self._X_forest,
y=self._y_forest
)
if extracted_forest_size is not None:
model.estimators_ = np.random.choice(model.estimators_, extracted_forest_size)
else:
model.fit(
X=self._X_forest,
y=self._y_forest
)
else:
model.fit(
self._X_forest,
......@@ -112,12 +116,12 @@ class Trainer(object):
y_pred = np.sign(y_pred)
y_pred = np.where(y_pred == 0, 1, y_pred)
result = self._classification_score_metric(y_true, y_pred)
elif type(model) in [SimilarityForestRegressor, KMeansForestRegressor]:
elif type(model) in [SimilarityForestRegressor, KMeansForestRegressor, EnsembleSelectionForestRegressor]:
result = model.score(X, y_true)
return result
def __score_func_base(self, model, X, y_true):
if type(model) in [OmpForestRegressor, SimilarityForestRegressor, KMeansForestRegressor]:
if type(model) in [OmpForestRegressor, SimilarityForestRegressor, KMeansForestRegressor, EnsembleSelectionForestRegressor]:
y_pred = model.predict_base_estimator(X)
result = self._base_regression_score_metric(y_true, y_pred)
elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier]:
......
......@@ -59,10 +59,26 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
else:
library = None
if parameters['extraction_strategy'] == 'random':
pretrained_model_parameters = ModelParameters(
extracted_forest_size=parameters['forest_size'],
normalize_D=parameters['normalize_D'],
subsets_used=parameters['subsets_used'],
normalize_weights=parameters['normalize_weights'],
seed=seed,
hyperparameters=hyperparameters,
extraction_strategy=parameters['extraction_strategy']
)
pretrained_estimator = ModelFactory.build(dataset.task, pretrained_model_parameters, library=library)
else:
pretrained_estimator = None
pretrained_model_parameters = None
if parameters['extraction_strategy'] != 'none':
with tqdm_joblib(tqdm(total=len(parameters['extracted_forest_size']), disable=not verbose)) as extracted_forest_size_job_pb:
Parallel(n_jobs=-1)(delayed(extracted_forest_size_job)(extracted_forest_size_job_pb, parameters['extracted_forest_size'][i],
models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer, library)
models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer, library,
pretrained_estimator=pretrained_estimator, pretrained_model_parameters=pretrained_model_parameters)
for i in range(len(parameters['extracted_forest_size'])))
else:
forest_size = hyperparameters['n_estimators']
......@@ -103,7 +119,8 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
seed_job_pb.update(1)
def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_size, models_dir,
seed, parameters, dataset, hyperparameters, experiment_id, trainer, library):
seed, parameters, dataset, hyperparameters, experiment_id, trainer, library,
pretrained_estimator=None, pretrained_model_parameters=None):
logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_extracted_forest_size{}_ti{}'.format(
seed, extracted_forest_size, threading.get_ident()))
......@@ -127,21 +144,24 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz
pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
model_parameters = ModelParameters(
extracted_forest_size=extracted_forest_size,
normalize_D=parameters['normalize_D'],
subsets_used=parameters['subsets_used'],
normalize_weights=parameters['normalize_weights'],
seed=seed,
hyperparameters=hyperparameters,
extraction_strategy=parameters['extraction_strategy']
)
model_parameters.save(sub_models_dir, experiment_id)
model = ModelFactory.build(dataset.task, model_parameters, library=library)
if not pretrained_estimator:
model_parameters = ModelParameters(
extracted_forest_size=extracted_forest_size,
normalize_D=parameters['normalize_D'],
subsets_used=parameters['subsets_used'],
normalize_weights=parameters['normalize_weights'],
seed=seed,
hyperparameters=hyperparameters,
extraction_strategy=parameters['extraction_strategy']
)
model_parameters.save(sub_models_dir, experiment_id)
model = ModelFactory.build(dataset.task, model_parameters, library=library)
else:
model = pretrained_estimator
pretrained_model_parameters.save(sub_models_dir, experiment_id)
trainer.init(model, subsets_used=parameters['subsets_used'])
trainer.train(model)
trainer.train(model, extracted_forest_size=extracted_forest_size)
trainer.compute_results(model, sub_models_dir)
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment