Skip to content
Snippets Groups Projects
Commit c66d117d authored by Léo Bouscarrat's avatar Léo Bouscarrat
Browse files

When training, look if there is bayesian search results, if yes use this....

When training, look if there is bayesian search results, if yes use this. Exception: forest_size use the one given by parser if applicable
parent bf5803b6
No related branches found
No related tags found
2 merge requests!6Resolve "Gridsearching of the base forest",!3clean scripts
This commit is part of merge request !6. Comments created here will be created in the context of that merge request.
models/*
results/*
experiments/*
*/.kile/*
*.kilepr
......
......@@ -45,7 +45,7 @@ class DatasetLoader(object):
elif name == '20newsgroups_vectorized':
dataset_loading_func = fetch_20newsgroups_vectorized
task = Task.CLASSIFICATION
elif name == 'lfw_people':
elif name == 'lfw_people': # needs PIL (image dataset)
dataset_loading_func = fetch_lfw_people
task = Task.CLASSIFICATION
elif name == 'lfw_pairs':
......
......@@ -5,16 +5,13 @@ import os
class ModelParameters(object):
def __init__(self, forest_size, extracted_forest_size, normalize_D, subsets_used, seed=None):
self._forest_size = forest_size
def __init__(self, extracted_forest_size, normalize_D, subsets_used, hyperparameters, seed=None):
self._extracted_forest_size = extracted_forest_size
self._normalize_D = normalize_D
self._subsets_used = subsets_used
self._seed = seed
self._hyperparameters = hyperparameters
@property
def forest_size(self):
return self._forest_size
@property
def extracted_forest_size(self):
......@@ -32,6 +29,10 @@ class ModelParameters(object):
def seed(self):
return self._seed
@property
def hyperparameters(self):
return self._hyperparameters
def save(self, directory_path, experiment_id):
save_obj_to_json(directory_path + os.sep + 'model_parameters_{}.json'.format(experiment_id),
self.__dict__)
......
......@@ -12,7 +12,7 @@ class OmpForestRegressor(BaseEstimator):
DEFAULT_SCORE_METRIC = 'mse'
def __init__(self, models_parameters):
self._regressor = RandomForestRegressor(n_estimators=models_parameters.forest_size,
self._regressor = RandomForestRegressor(**models_parameters.hyperparameters,
random_state=models_parameters.seed)
self._models_parameters = models_parameters
self._logger = LoggerFactory.create(LOG_PATH, __name__)
......
......@@ -35,7 +35,7 @@ if __name__ == "__main__":
load_dotenv(find_dotenv('.env.example'))
DEFAULT_CV = 3
DEFAULT_N_ITER = 30
DEFAULT_N_ITER = 50
DICT_PARAM_SPACE = {'n_estimators': Integer(10, 1000),
'min_samples_leaf': Integer(1, 1000),
'max_depth': Integer(1, 20),
......@@ -56,7 +56,7 @@ if __name__ == "__main__":
begin_random_seed_range = 1
end_random_seed_range = 2000
if args.seed is not None:
if args.seed is None:
random_seed = random.randint(begin_random_seed_range, end_random_seed_range)
else:
random_seed = args.seed
......
......@@ -9,6 +9,7 @@ from bolsonaro.error_handling.logger_factory import LoggerFactory
from dotenv import find_dotenv, load_dotenv
import argparse
import json
import pathlib
import random
import os
......@@ -22,7 +23,6 @@ if __name__ == "__main__":
DEFAULT_DATASET_NAME = 'boston'
DEFAULT_NORMALIZE_D = False
DEFAULT_DATASET_NORMALIZER = None
DEFAULT_FOREST_SIZE = 100
DEFAULT_EXTRACTED_FOREST_SIZE = 10
# the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_size/{nb_extracted_trees}
DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
......@@ -39,7 +39,7 @@ if __name__ == "__main__":
parser.add_argument('--dataset_name', nargs='?', type=str, default=DEFAULT_DATASET_NAME, help='Specify the dataset. Regression: boston, diabetes, linnerud, california_housing. Classification: iris, digits, wine, breast_cancer, olivetti_faces, 20newsgroups, 20newsgroups_vectorized, lfw_people, lfw_pairs, covtype, rcv1, kddcup99.')
parser.add_argument('--normalize_D', action='store_true', default=DEFAULT_NORMALIZE_D, help='Specify if we want to normalize the prediction of the forest by doing the L2 division of the pred vectors.')
parser.add_argument('--dataset_normalizer', nargs='?', type=str, default=DEFAULT_DATASET_NORMALIZER, help='Specify which dataset normalizer use (either standard, minmax, robust or normalizer).')
parser.add_argument('--forest_size', nargs='?', type=int, default=DEFAULT_FOREST_SIZE, help='The number of trees of the random forest.')
parser.add_argument('--forest_size', nargs='?', type=int, default=None, help='The number of trees of the random forest.')
parser.add_argument('--extracted_forest_size', nargs='+', type=int, default=DEFAULT_EXTRACTED_FOREST_SIZE, help='The number of trees selected by OMP.')
parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
parser.add_argument('--dev_size', nargs='?', type=float, default=DEFAULT_DEV_SIZE, help='Dev subset ratio.')
......@@ -62,9 +62,20 @@ if __name__ == "__main__":
logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')
seeds = args.seeds if args.seeds is not None \
else [random.randint(begin_random_seed_range, end_random_seed_range) \
else [random.randint(begin_random_seed_range, end_random_seed_range)
for i in range(args.random_seed_number)]
path_hyperparameter = os.path.join('experiments', args.dataset_name, 'stage1', 'params.json')
if os.path.exists(path_hyperparameter):
with open(path_hyperparameter, 'r+') as file_hyperparameter:
hyperparameters = json.load(file_hyperparameter)['best_parameters']
else:
hyperparameters = {}
if args.forest_size is not None:
hyperparameters['n_estimators'] = args.forest_size
experiment_id = resolve_experiment_id(args.models_dir)
experiment_id_str = str(experiment_id)
......@@ -98,11 +109,11 @@ if __name__ == "__main__":
pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
model_parameters = ModelParameters(
forest_size=args.forest_size,
extracted_forest_size=extracted_forest_size,
normalize_D=args.normalize_D,
subsets_used=args.subsets_used,
seed=seed
seed=seed,
hyperparameters=hyperparameters
)
model_parameters.save(sub_models_dir, experiment_id)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment