Skip to content
Snippets Groups Projects
Commit 880ff78f authored by Charly Lamothe's avatar Charly Lamothe
Browse files

- Add an option to not use the best hyperparameters file;

- Definitely use the correct forest size (either the one from best hyperparameters or the one specified in parameter);
- Use a number of extracted forest sizes proportional as the forest size instead of fixed forest size;
- Add an option to save the current command line name instead of using the unamed directory;
- Add new california housing dataset best hyperparameters, and convert all value types that are number from string to int/float in other best hyperparameter files;
- Remove useless code from compute_results.py in prevision of the changes;
- Before best hyperparameters saving, save number as int or float instead of string;
- Add job_number option for parallelisation in both train.py and compute_hyperparameters.py scripts;
- Clean-up TODO list.
parent 11017545
No related branches found
No related tags found
1 merge request!9Resolve "Experiment pipeline"
Showing
with 219 additions and 175 deletions
* Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
* Fix ModelFactory.load function.
* Fix model results loading in compute_results.py.
* Check that omp multiclasses classifier is working as expected.
* In the bayesian search computation, output a different file name depending on the task of the trained model.
* Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
* Prepare the json experiment files to run.
\ No newline at end of file
* Fix the dataset error of fetcher when job_number > 1.
\ No newline at end of file
......@@ -19,7 +19,7 @@ class DatasetLoader(object):
DEFAULT_NORMALIZE_D = False
DEFAULT_DATASET_NORMALIZER = 'standard'
DEFAULT_FOREST_SIZE = 100
DEFAULT_EXTRACTED_FOREST_SIZE = 10
DEFAULT_EXTRACTED_FOREST_SIZE_SAMPLES = 4
DEFAULT_DEV_SIZE = 0.2
DEFAULT_TEST_SIZE = 0.2
DEFAULT_RANDOM_SEED_NUMBER = 1
......
......@@ -79,7 +79,6 @@ def change_binary_func_load(base_load_function):
return X, y
return func_load
@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
"""Context manager to patch joblib to report into tqdm progress bar given as argument"""
......@@ -100,3 +99,17 @@ def tqdm_joblib(tqdm_object):
finally:
joblib.parallel.BatchCompletionCallBack = old_batch_callback
tqdm_object.close()
def is_int(value):
try:
int(value)
return True
except ValueError:
return False
def is_float(value):
try:
float(value)
return True
except ValueError:
return False
......@@ -4,7 +4,7 @@ from bolsonaro.data.dataset_parameters import DatasetParameters
from bolsonaro.data.task import Task
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.hyperparameter_searcher import HyperparameterSearcher
from bolsonaro.utils import save_obj_to_json, tqdm_joblib
from bolsonaro.utils import save_obj_to_json, tqdm_joblib, is_int, is_float
import argparse
import os
......@@ -68,7 +68,7 @@ def process_job(dataset_name, seed, param_space, args):
def run_hyperparameter_search_jobs(seeds, dataset_name, param_space, args):
# Run one hyperparameter search job per seed
with tqdm_joblib(tqdm(total=len(seeds), disable=not args.verbose)) as progress_bar:
opt_results = Parallel(n_jobs=-1)(delayed(process_job)(
opt_results = Parallel(n_jobs=args.job_number)(delayed(process_job)(
dataset_name, seeds[i], param_space, args) for i in range(len(seeds)))
return opt_results
......@@ -108,6 +108,10 @@ def compute_best_params_over_seeds(seeds, dataset_name, param_space, args):
split = element.split('_')
param, value = '_'.join(split[:-1]), split[-1]
if param not in best_params:
if is_int(value):
value = int(value)
elif is_float(value):
value = float(value)
best_params[param] = value
if len(best_params) == len(all_param_names):
break
......@@ -128,6 +132,7 @@ if __name__ == "__main__":
DEFAULT_CV = 3
DEFAULT_N_ITER = 50
DEFAULT_VERBOSE = False
DEFAULT_JOB_NUMBER = -1
DICT_PARAM_SPACE = {'n_estimators': Integer(10, 1000),
'min_samples_leaf': Integer(1, 1000),
'max_depth': Integer(1, 20),
......@@ -144,6 +149,7 @@ if __name__ == "__main__":
parser.add_argument('--use_variable_seed_number', action='store_true', default=DEFAULT_USE_VARIABLE_SEED_NUMBER, help='Compute the amount of random seeds depending on the dataset.')
parser.add_argument('--datasets', nargs='+', type=str, default=DatasetLoader.dataset_names, help='Specify the dataset used by the estimator.')
parser.add_argument('--verbose', action='store_true', default=DEFAULT_VERBOSE, help='Print tqdm progress bar.')
parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
args = parser.parse_args()
logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))
......
......@@ -16,147 +16,31 @@ if __name__ == "__main__":
DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results'
DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
DEFAULT_EXPERIMENT_IDS = None
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--stage_number', nargs='?', type=int, required=True, help='Specify the stage number among [1, 4].')
parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).')
parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
parser.add_argument('--experiment_ids', nargs='+', type=int, default=DEFAULT_EXPERIMENT_IDS, help='Compute the results of the specified experiment id(s)')
args = parser.parse_args()
if int(args.stage_number) not in list(range(1, 5)):
raise ValueError('stage_number must be a supported stage id (i.e. [1, 4]).')
# Create recursively the results dir tree
pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True)
"""
Use specified list of experiments ids if availabe.
Otherwise, list all existing experiment ids from
the specified models directory.
"""
experiments_ids = [str(experiment_id) for experiment_id in args.experiment_ids] \
if args.experiment_ids is not None \
else os.listdir(args.models_dir)
"""
Raise an error if there's no experiments ids found both
in parameter or in models directory.
"""
if experiments_ids is None or len(experiments_ids) == 0:
raise ValueError("No experiment id was found or specified.")
# Compute the plots for each experiment id
for experiment_id in experiments_ids:
experiment_id_path = args.models_dir + os.sep + experiment_id # models/{experiment_id}
# Create recursively the tree results/{experiment_id}
pathlib.Path(args.results_dir + os.sep + experiment_id).mkdir(parents=True, exist_ok=True)
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
"""
Dictionaries to temporarly store the scalar results with the following structure:
{seed_1: [score_1, ..., score_m], ... seed_n: [score_1, ..., score_k]}
TODO: to complete to retreive more results
"""
experiment_train_scores = dict()
experiment_dev_scores = dict()
experiment_test_scores = dict()
experiment_weights = dict()
# Used to check if all losses were computed using the same metric (it should be the case)
experiment_score_metrics = list()
# For each seed results stored in models/{experiment_id}/seeds
for seed in os.listdir(experiment_seed_root_path):
experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
dataset_parameters = DatasetParameters.load(experiment_seed_path, experiment_id) # Load the dataset parameters of this experiment, with this specific seed
dataset = DatasetLoader.load(dataset_parameters) # Load the dataset using the previously loaded dataset parameters
extracted_forest_size_root_path = experiment_seed_path + os.sep + 'extracted_forest_size' # models/{experiment_id}/seeds/{seed}/extracted_forest_size
# {{seed}:[]}
experiment_train_scores[seed] = list()
experiment_dev_scores[seed] = list()
experiment_test_scores[seed] = list()
experiment_weights[seed] = list()
# List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_size
extracted_forest_sizes = os.listdir(extracted_forest_size_root_path)
for extracted_forest_size in extracted_forest_sizes:
# models/{experiment_id}/seeds/{seed}/extracted_forest_size/{extracted_forest_size}
extracted_forest_size_path = extracted_forest_size_root_path + os.sep + extracted_forest_size
# Load models/{experiment_id}/seeds/{seed}/extracted_forest_size/{extracted_forest_size}/model_raw_results.pickle file
model_raw_results = ModelRawResults.load(extracted_forest_size_path)
# Load [...]/model_parameters.json file and build the model using these parameters and the weights and forest from model_raw_results.pickle
model = ModelFactory.load(dataset.task, extracted_forest_size_path, experiment_id, model_raw_results)
# Save temporarly some raw results (TODO: to complete to retreive more results)
# Save the scores
experiment_train_scores[seed].append(model_raw_results.train_score)
experiment_dev_scores[seed].append(model_raw_results.dev_score)
experiment_test_scores[seed].append(model_raw_results.test_score)
# Save the weights
experiment_weights[seed].append(model_raw_results.weights)
# Save the metric
experiment_score_metrics.append(model_raw_results.score_metric)
if len(set(experiment_score_metrics)) > 1:
raise ValueError("The metrics used to compute the dev score aren't the same everytime")
"""
Example of plot that just plots the losses computed
on the train, dev and test subsets using a trained
model, with the CI, and depending on the extracted
forest size.
"""
Plotter.plot_losses(
file_path=args.results_dir + os.sep + experiment_id + os.sep + 'losses.png',
all_experiment_scores=[experiment_train_scores, experiment_dev_scores, experiment_test_scores],
x_value=extracted_forest_sizes,
xlabel='Number of trees extracted',
ylabel=experiment_score_metrics[0],
all_labels=['train', 'dev', 'test'],
title='Loss values of the trained model'
)
"""
TODO:
For each dataset:
Stage 1) A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams)
Stage 2) A figure for the selection of the best dataset normalization method
Stage 3) A figure for the selection of the best combination of dataset: normalization vs D normalization vs weights normalization
Stage 4) A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev
Stage 5) A figure for the selection of the best extracted forest size?
Stage 6) A figure to finally compare the perf of our approach using the previous selected parameters vs the baseline vs other papers
Stage 3)
In all axis:
- untrained forest
- trained base forest (straight line cause it doesn't depend on the number of extracted trees)
Axis 1:
- test with forest on train+dev and OMP on train+dev
- test with forest on train+dev and OMP on train+dev with dataset normalization
- test with forest on train+dev and OMP on train+dev with dataset normalization + D normalization
- test with forest on train+dev and OMP on train+dev with dataset normalization + weights normalization
- test with forest on train+dev and OMP on train+dev with dataset normalization + D normalization + weights normalization
Stage 2) A figure for the selection of the best combination of normalization: D normalization vs weights normalization (4 combinations)
Stage 3) A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev
Stage 4) A figure to finally compare the perf of our approach using the previous selected
parameters vs the baseline vs other papers using different extracted forest size
(percentage of the tree size found previously in best hyperparams search) on the abscissa.
Axis 2:
- test with forest on train and OMP on dev
- test with forest on train and OMP on dev with dataset normalization
- test with forest on train and OMP on dev with dataset normalization + D normalization
- test with forest on train and OMP on dev with dataset normalization + weights normalization
- test with forest on train and OMP on dev with dataset normalization + D normalization + weights normalization
Axis 3:
- test with forest on train and OMP train+dev
- test with forest on train and OMP train+dev with dataset normalization
- test with forest on train and OMP train+dev with dataset normalization + D normalization
- test with forest on train and OMP train+dev with dataset normalization + weights normalization
- test with forest on train and OMP train+dev with dataset normalization + D normalization + weights normalization
IMPORTANT: Same seeds used in all axis.
IMPORTANT: Compare experiments that used the same seeds among them (except for stage 1).
"""
# Plot the density of the weights
Plotter.weight_density(
file_path=args.results_dir + os.sep + experiment_id + os.sep + 'density_weight.png',
all_experiment_weights=experiment_weights
)
......@@ -17,6 +17,7 @@ from joblib import Parallel, delayed
import threading
import json
from tqdm import tqdm
import numpy as np
def process_job(seed, parameters, experiment_id, hyperparameters):
......@@ -82,6 +83,8 @@ if __name__ == "__main__":
# the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_size/{nb_extracted_trees}
DEFAULT_MODELS_DIR = os.environ['project_dir'] + os.sep + 'models'
DEFAULT_VERBOSE = False
DEFAULT_SKIP_BEST_HYPERPARAMS = False
DEFAULT_JOB_NUMBER = -1
begin_random_seed_range = 1
end_random_seed_range = 2000
......@@ -92,8 +95,8 @@ if __name__ == "__main__":
parser.add_argument('--dataset_name', nargs='?', type=str, default=DatasetLoader.DEFAULT_DATASET_NAME, help='Specify the dataset. Regression: boston, diabetes, linnerud, california_housing. Classification: iris, digits, wine, breast_cancer, olivetti_faces, 20newsgroups, 20newsgroups_vectorized, lfw_people, lfw_pairs, covtype, rcv1, kddcup99.')
parser.add_argument('--normalize_D', action='store_true', default=DatasetLoader.DEFAULT_NORMALIZE_D, help='Specify if we want to normalize the prediction of the forest by doing the L2 division of the pred vectors.')
parser.add_argument('--dataset_normalizer', nargs='?', type=str, default=DatasetLoader.DEFAULT_DATASET_NORMALIZER, help='Specify which dataset normalizer use (either standard, minmax, robust or normalizer).')
parser.add_argument('--forest_size', nargs='?', type=int, default=DatasetLoader.DEFAULT_FOREST_SIZE, help='The number of trees of the random forest.')
parser.add_argument('--extracted_forest_size', nargs='+', type=int, default=DatasetLoader.DEFAULT_EXTRACTED_FOREST_SIZE, help='The number of trees selected by OMP.')
parser.add_argument('--forest_size', nargs='?', type=int, default=None, help='The number of trees of the random forest.')
parser.add_argument('--extracted_forest_size_samples', nargs='?', type=int, default=DatasetLoader.DEFAULT_EXTRACTED_FOREST_SIZE_SAMPLES, help='The number of extracted forest sizes (proportional to the forest size) selected by OMP.')
parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
parser.add_argument('--dev_size', nargs='?', type=float, default=DatasetLoader.DEFAULT_DEV_SIZE, help='Dev subset ratio.')
parser.add_argument('--test_size', nargs='?', type=float, default=DatasetLoader.DEFAULT_TEST_SIZE, help='Test subset ratio.')
......@@ -102,6 +105,9 @@ if __name__ == "__main__":
parser.add_argument('--subsets_used', nargs='+', type=str, default=DatasetLoader.DEFAULT_SUBSETS_USED, help='train,dev: forest on train, OMP on dev. train+dev,train+dev: both forest and OMP on train+dev. train,train+dev: forest on train+dev and OMP on dev.')
parser.add_argument('--normalize_weights', action='store_true', default=DatasetLoader.DEFAULT_NORMALIZE_WEIGHTS, help='Divide the predictions by the weights sum.')
parser.add_argument('--verbose', action='store_true', default=DEFAULT_VERBOSE, help='Print tqdm progress bar.')
parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
parser.add_argument('--save_experiment_configuration', nargs='+', default=None, help='Save the experiment parameters specified in the command line in a file. Args: {{stage_num}} {{name}}')
parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
args = parser.parse_args()
if args.experiment_configuration:
......@@ -115,22 +121,31 @@ if __name__ == "__main__":
logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))
# The number of tree to extract from forest (K)
parameters['extracted_forest_size'] = parameters['extracted_forest_size'] \
if type(parameters['extracted_forest_size']) == list \
else [parameters['extracted_forest_size']]
hyperparameters_path = os.path.join('experiments', args.dataset_name, 'stage1', 'params.json')
if os.path.exists(hyperparameters_path):
if os.path.exists(hyperparameters_path) and not args.skip_best_hyperparams:
logger.info("Hyperparameters found for this dataset at '{}'".format(hyperparameters_path))
with open(hyperparameters_path, 'r+') as file_hyperparameter:
hyperparameters = json.load(file_hyperparameter)['best_parameters']
else:
hyperparameters = {}
if parameters['forest_size'] is not None:
"""
First case: no best hyperparameters are specified and no forest_size parameter
specified in argument, so use the DEFAULT_FOREST_SIZE.
Second case: no matter if hyperparameters are specified, the forest_size parameter
will override it.
Third implicit case: use the number of estimators found in specified hyperparameters.
"""
if len(hyperparameters) == 0 and parameters['forest_size'] is None:
hyperparameters['n_estimators'] = DatasetLoader.DEFAULT_FOREST_SIZE
elif parameters['forest_size'] is not None:
hyperparameters['n_estimators'] = parameters['forest_size']
# The number of tree to extract from forest (K)
parameters['extracted_forest_size'] = [int(hyperparameters['n_estimators'] * coeff) \
for coeff in np.linspace(0, 1, parameters['extracted_forest_size_samples'] + 1,
endpoint=False)[1:]]
if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')
......@@ -142,15 +157,29 @@ if __name__ == "__main__":
# Resolve the next experiment id number (last id + 1)
experiment_id = resolve_experiment_id(parameters['models_dir'])
logger.info('Experiment id: {}'.format(experiment_id))
parameters['experiment_id'] = experiment_id
"""
If the experiment configuration isn't coming from
an already existing file, save it to a json file to
keep trace of it.
keep trace of it (either a specified path, either in 'unnamed' dir.).
"""
if args.experiment_configuration is None:
with open(args.experiment_configuration_path + os.sep + 'unnamed' + os.sep + 'unnamed_{}.json'.format(
experiment_id), 'w') as output_file:
if args.save_experiment_configuration:
if len(args.save_experiment_configuration) != 2:
raise ValueError('save_experiment_configuration must have two parameters.')
elif int(args.save_experiment_configuration[0]) not in list(range(1, 5)):
raise ValueError('save_experiment_configuration first parameter must be a supported stage id (i.e. [1, 4]).')
output_experiment_configuration_path = os.path.join(args.experiment_configuration_path,
args.dataset_name, 'stage' + args.save_experiment_configuration[0],
args.save_experiment_configuration[1] + '_{}.json'.format(
experiment_id))
else:
pathlib.Path(os.path.join(args.experiment_configuration_path, 'unnamed')).mkdir(parents=True, exist_ok=True)
output_experiment_configuration_path = os.path.join(
args.experiment_configuration_path, 'unnamed', 'unnamed_{}.json'.format(
experiment_id))
with open(output_experiment_configuration_path, 'w') as output_file:
json.dump(
parameters,
output_file,
......@@ -159,5 +188,5 @@ if __name__ == "__main__":
# Run as much job as there are seeds
with tqdm_joblib(tqdm(total=len(seeds), disable=not args.verbose)) as progress_bar:
Parallel(n_jobs=-1)(delayed(process_job)(seeds[i],
Parallel(n_jobs=args.job_number)(delayed(process_job)(seeds[i],
parameters, experiment_id, hyperparameters) for i in range(len(seeds)))
......@@ -4,9 +4,9 @@
"best_score_test": -13.650326577972058,
"best_parameters": {
"max_features": "auto",
"min_samples_leaf": "1",
"max_depth": "20",
"n_estimators": "1000"
"min_samples_leaf": 1,
"max_depth": 20,
"n_estimators": 1000
},
"random_seed": [
1812,
......
......@@ -3,9 +3,9 @@
"best_score_train": 0.9562271062271059,
"best_score_test": 0.9514619883040936,
"best_parameters": {
"max_depth": "20",
"min_samples_leaf": "1",
"n_estimators": "1000",
"max_depth": 20,
"min_samples_leaf": 1,
"n_estimators": 1000,
"max_features": "log2"
},
"random_seed": [
......
{
"scorer": "neg_mean_squared_error",
"best_score_train": -0.2535049905518054,
"best_score_test": -0.24128661227361273,
"best_parameters": {
"max_features": "log2",
"min_samples_leaf": 1,
"n_estimators": 1000,
"max_depth": 18
},
"random_seed": [
1012,
529,
42
]
}
\ No newline at end of file
......@@ -4,9 +4,9 @@
"best_score_test": -3305.635542701523,
"best_parameters": {
"max_features": "auto",
"min_samples_leaf": "1",
"max_depth": "15",
"n_estimators": "108"
"min_samples_leaf": 1,
"max_depth": 15,
"n_estimators": 108
},
"random_seed": [
661,
......
......@@ -4,9 +4,9 @@
"best_score_test": 0.9738888888888889,
"best_parameters": {
"max_features": "sqrt",
"min_samples_leaf": "1",
"n_estimators": "1000",
"max_depth": "20"
"min_samples_leaf": 1,
"n_estimators": 1000,
"max_depth": 20
},
"random_seed": [
1,
......
......@@ -4,9 +4,9 @@
"best_score_test": 0.9155555555555556,
"best_parameters": {
"max_features": "sqrt",
"min_samples_leaf": "1",
"max_depth": "1",
"n_estimators": "1000"
"min_samples_leaf": 1,
"max_depth": 1,
"n_estimators": 1000
},
"random_seed": [
771,
......
{
"experiment_configuration": null,
"experiment_configuration_path": "experiments",
"dataset_name": "iris",
"normalize_D": false,
"dataset_normalizer": "standard",
"forest_size": null,
"extracted_forest_size_samples": 4,
"models_dir": ".\\models",
"dev_size": 0.2,
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
],
"subsets_used": "train,dev",
"normalize_weights": false,
"verbose": false,
"skip_best_hyperparams": false,
"save_experiment_configuration": [
"1",
"with_best_params"
],
"job_number": -1,
"extracted_forest_size": [
200,
400,
600,
800
],
"experiment_id": 16
}
\ No newline at end of file
{
"experiment_configuration": null,
"experiment_configuration_path": "experiments",
"dataset_name": "iris",
"normalize_D": false,
"dataset_normalizer": "standard",
"forest_size": null,
"extracted_forest_size_samples": 4,
"models_dir": ".\\models",
"dev_size": 0.2,
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
],
"subsets_used": "train,dev",
"normalize_weights": false,
"verbose": false,
"skip_best_hyperparams": true,
"save_experiment_configuration": [
"1",
"wo_best_params"
],
"job_number": -1,
"extracted_forest_size": [
20,
40,
60,
80
],
"experiment_id": 17
}
\ No newline at end of file
......@@ -3,10 +3,10 @@
"best_score_train": -223.81438159498393,
"best_score_test": -262.4415311793658,
"best_parameters": {
"max_depth": "1",
"min_samples_leaf": "1",
"max_depth": 1,
"min_samples_leaf": 1,
"max_features": "sqrt",
"n_estimators": "1000"
"n_estimators": 1000
},
"random_seed": [
1109,
......
{
"scorer": "accuracy",
"best_score_train": 0.8890625,
"best_score_test": 0.89,
"best_parameters": {
"max_features": "log2",
"min_samples_leaf": 1,
"n_estimators": 1000,
"max_depth": 18
},
"random_seed": [
899,
249,
1367,
942,
846,
1576,
285,
839,
1974,
1216,
540,
1292,
1642,
712,
1511
]
}
\ No newline at end of file
......@@ -3,9 +3,9 @@
"best_score_train": 0.9846607669616517,
"best_score_test": 0.9796296296296295,
"best_parameters": {
"max_depth": "20",
"min_samples_leaf": "1",
"n_estimators": "1000",
"max_depth": 20,
"min_samples_leaf": 1,
"n_estimators": 1000,
"max_features": "log2"
},
"random_seed": [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment