Commit 1194ee2f authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Integrate Paolo's code of method 'Ensemble selection from libraries of models'...

Integrate Paolo's code of method 'Ensemble selection from libraries of models' by Rich Caruana et al
parent 0363926f
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator
from sklearn.tree import DecisionTreeRegressor
from abc import abstractmethod, ABCMeta
import numpy as np
from tqdm import tqdm
class EnsembleSelectionForestRegressor(BaseEstimator, metaclass=ABCMeta):
"""
'Ensemble selection from libraries of models' by Rich Caruana et al
"""
def __init__(self, models_parameters, library, score_metric=mean_squared_error):
self._models_parameters = models_parameters
self._library = library
self._extracted_forest_size = self._models_parameters.extracted_forest_size
self._score_metric = score_metric
@property
def models_parameters(self):
return self._models_parameters
@property
def library(self):
return self._library
def fit(self, X_train, y_train, X_val, y_val):
scores_list = list()
for estimator in self._library:
val_score = self._score_metric(estimator.predict(X_val), y_val)
scores_list.append(val_score)
class_list = list(library)
m = np.argmax(np.asarray(scores_list))
self._ensemble_selected = [class_list[m]]
temp_pred = class_list[m].predict(X_val)
del class_list[m]
for k in range(self._extracted_forest_size - 1):
candidate_index = 0
best_score = 100000
for j in range(len(class_list)):
temp_pred = np.vstack((temp_pred, class_list[j].predict(X_val)))
temp_mean = np.mean(temp_pred, axis=0)
temp_score = self._score_metric(temp_mean, y_val)
if (temp_score < best_score):
candidate_index = j
best_score = tmp_score
temp_pred = np.delete(temp_pred, -1, 0)
self._ensemble_selected.append(class_list[candidate_index])
temp_pred = np.vstack((temp_pred, class_list[candidate_index].predict(X_val)))
del class_list[candidate_index]
def score(self, X, y):
predictions = self._predict_base_estimator(X)
mean_predictions = np.mean(predictions, axis=0)
return self._score_metric(mean_predictions, y)
def predict_base_estimator(self, X):
predictions = list()
for tree in self._ensemble_selected:
predictions.append(tree.predict(X))
return np.array(predictions)
@staticmethod
def generate_library(X_train, y_train, random_state=None):
criterion_arr = ["mse"]#, "friedman_mse", "mae"]
splitter_arr = ["best"]#, "random"]
depth_arr = [i for i in range(5, 20, 1)]
min_samples_split_arr = [i for i in range(2, 20, 1)]
min_samples_leaf_arr = [i for i in range(2, 20, 1)]
max_features_arr = ["sqrt"]#["auto", "sqrt", "log2"]
library = list()
with tqdm(total=len(criterion_arr) * len(splitter_arr) * \
len(depth_arr) * len(min_samples_split_arr) * len(min_samples_leaf_arr) * \
len(max_features_arr)) as bar:
bar.set_description('Generating library')
for criterion in criterion_arr:
for splitter in splitter_arr:
for depth in depth_arr:
for min_samples_split in min_samples_split_arr:
for min_samples_leaf in min_samples_leaf_arr:
for max_features in max_features_arr:
t = DecisionTreeRegressor(criterion=criterion, splitter=splitter, max_depth=depth, min_samples_split=min_samples_split,
min_samples_leaf=min_samples_leaf, max_features=max_features, random_state=random_state)
t.fit(X_train, y_train)
library.append(t)
bar.update(1)
return library
......@@ -3,6 +3,7 @@ from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.models.kmeans_forest_regressor import KMeansForestRegressor
from bolsonaro.models.ensemble_selection_forest_regressor import EnsembleSelectionForestRegressor
from bolsonaro.data.task import Task
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
......@@ -13,7 +14,7 @@ import pickle
class ModelFactory(object):
@staticmethod
def build(task, model_parameters):
def build(task, model_parameters, library=None):
if task not in [Task.BINARYCLASSIFICATION, Task.REGRESSION, Task.MULTICLASSIFICATION]:
raise ValueError("Unsupported task '{}'".format(task))
......@@ -38,6 +39,8 @@ class ModelFactory(object):
return SimilarityForestRegressor(model_parameters)
elif model_parameters.extraction_strategy == 'kmeans':
return KMeansForestRegressor(model_parameters)
elif model_parameters.extraction_strategy == 'ensemble':
return EnsembleSelectionForestRegressor(model_parameters, library=library)
elif model_parameters.extraction_strategy == 'none':
return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'],
random_state=model_parameters.seed)
......
......@@ -133,10 +133,11 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].')
parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).' + \
parser.add_argument('--experiment_ids', nargs='+', type=str, required=True, help='Compute the results of the specified experiment id(s).' + \
'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}' + \
'stage=2: {{no_normalization}} {{normalize_D}} {{normalize_weights}} {{normalize_D_and_weights}}' + \
'stage=3: {{train-dev_subset}} {{train-dev_train-dev_subset}} {{train-train-dev_subset}}')
'stage=3: {{train-dev_subset}} {{train-dev_train-dev_subset}} {{train-train-dev_subset}}' + \
'stage=5: {{base_with_params}} {{random_with_params}} {{omp_with_params}} [ensemble={{id}}] [similarity={{id}}] [kmean={{id}}]')
parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
......@@ -159,7 +160,7 @@ if __name__ == "__main__":
raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.')
# Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])
extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1]))
# Experiments that used the best hyperparameters found for this dataset
......@@ -167,18 +168,18 @@ if __name__ == "__main__":
logger.info('Loading base_with_params experiment scores...')
base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
base_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
extracted_forest_sizes_number)
# random_with_params
logger.info('Loading random_with_params experiment scores...')
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
# omp_with_params
logger.info('Loading omp_with_params experiment scores...')
omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2])
args.models_dir, args.results_dir, int(args.experiment_ids[2]))
# Experiments that didn't use the best hyperparameters found for this dataset
......@@ -186,19 +187,19 @@ if __name__ == "__main__":
logger.info('Loading base_wo_params experiment scores...')
base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores, \
base_wo_params_experiment_score_metric = extract_scores_across_seeds_and_forest_size(
args.models_dir, args.results_dir, args.experiment_ids[3],
args.models_dir, args.results_dir, int(args.experiment_ids[3]),
extracted_forest_sizes_number)
# random_wo_params
logger.info('Loading random_wo_params experiment scores...')
random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \
wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[4])
args.models_dir, args.results_dir, int(args.experiment_ids[4]))
# base_wo_params
logger.info('Loading base_wo_params experiment scores...')
omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \
omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[5])
args.models_dir, args.results_dir, int(args.experiment_ids[5]))
# Sanity check on the metrics retreived
if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric ==
......@@ -243,25 +244,25 @@ if __name__ == "__main__":
logger.info('Loading no_normalization experiment scores...')
_, _, no_normalization_test_scores, extracted_forest_sizes, no_normalization_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
args.experiment_ids[0])
int(args.experiment_ids[0]))
# normalize_D
logger.info('Loading normalize_D experiment scores...')
_, _, normalize_D_test_scores, _, normalize_D_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
args.experiment_ids[1])
int(args.experiment_ids[1]))
# normalize_weights
logger.info('Loading normalize_weights experiment scores...')
_, _, normalize_weights_test_scores, _, normalize_weights_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
args.experiment_ids[2])
int(args.experiment_ids[2]))
# normalize_D_and_weights
logger.info('Loading normalize_D_and_weights experiment scores...')
_, _, normalize_D_and_weights_test_scores, _, normalize_D_and_weights_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
args.experiment_ids[3])
int(args.experiment_ids[3]))
# Sanity check on the metrics retreived
if not (no_normalization_experiment_score_metric == normalize_D_experiment_score_metric
......@@ -290,21 +291,21 @@ if __name__ == "__main__":
train_dev_subset_train_scores, train_dev_subset_dev_scores, train_dev_subset_test_scores, \
extracted_forest_sizes, train_dev_subset_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
args.experiment_ids[0])
int(args.experiment_ids[0]))
# train-dev_train-dev_subset
logger.info('Loading train-dev_train-dev_subset experiment scores...')
train_dev_train_dev_subset_train_scores, train_dev_train_dev_subset_dev_scores, train_dev_train_dev_subset_test_scores, \
_, train_dev_train_dev_subset_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
args.experiment_ids[1])
int(args.experiment_ids[1]))
# train-train-dev_subset
logger.info('Loading train-train-dev_subset experiment scores...')
train_train_dev_subset_train_scores, train_train_dev_subset_dev_scores, train_train_dev_subset_test_scores, \
_, train_train_dev_subset_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
args.experiment_ids[2])
int(args.experiment_ids[2]))
# Sanity check on the metrics retreived
if not (train_dev_subset_experiment_score_metric == train_dev_train_dev_subset_experiment_score_metric
......@@ -349,13 +350,13 @@ if __name__ == "__main__":
logger.info('Loading base_with_params experiment scores...')
base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
base_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
extracted_forest_sizes_number)
# random_with_params
logger.info('Loading random_with_params experiment scores...')
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
# omp_with_params
logger.info('Loading omp_with_params experiment scores...')
"""omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
......@@ -363,12 +364,12 @@ if __name__ == "__main__":
args.models_dir, args.results_dir, args.experiment_ids[2])"""
omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2])
args.models_dir, args.results_dir, int(args.experiment_ids[2]))
#omp_with_params_without_weights
logger.info('Loading omp_with_params experiment scores...')
omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)
args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False)
"""# base_with_params
logger.info('Loading base_with_params experiment scores 2...')
......@@ -402,57 +403,63 @@ if __name__ == "__main__":
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
elif args.stage == 5:
# Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])
extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1]))
all_labels = list()
all_scores = list()
# base_with_params
logger.info('Loading base_with_params experiment scores...')
base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
base_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
extracted_forest_sizes_number)
# random_with_params
logger.info('Loading random_with_params experiment scores...')
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
# omp_with_params
logger.info('Loading omp_with_params experiment scores...')
omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2])
args.models_dir, args.results_dir, int(args.experiment_ids[2]))
#omp_with_params_without_weights
logger.info('Loading omp_with_params experiment scores...')
omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)
# kmeans_with_params
logger.info('Loading kmeans_with_params experiment scores...')
kmeans_with_params_train_scores, kmeans_with_params_dev_scores, kmeans_with_params_test_scores, _, \
kmeans_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[3])
# similarity_with_params
logger.info('Loading similarity_with_params experiment scores...')
similarity_with_params_train_scores, similarity_with_params_dev_scores, similarity_with_params_test_scores, _, \
similarity_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[4])
args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False)
all_labels = ['base', 'random', 'omp', 'omp_without_weights']
all_scores = [base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
omp_with_params_without_weights_test_scores]
for i in range(3, len(args.experiment_ids)):
if 'kmeans' in args.experiment_ids[i]:
label = 'kmeans'
elif 'similarity' in args.experiment_ids[i]:
label = 'similarity'
elif 'ensemble' in args.experiment_ids[i]:
label = 'ensemble'
else:
logger.error('Invalid value encountered')
continue
# Sanity check on the metrics retreived
if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric
== omp_with_params_experiment_score_metric == kmeans_with_params_experiment_score_metric):
raise ValueError('Score metrics of all experiments must be the same.')
experiments_score_metric = base_with_params_experiment_score_metric
logger.info(f'Loading {label} experiment scores...')
_, _, current_test_scores, _, _ = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, int(args.experiment_ids[i].split('=')[1]))
all_labels.append(label)
all_scores.append(current_test_scores)
output_path = os.path.join(args.results_dir, args.dataset_name, 'stage5')
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
Plotter.plot_stage2_losses(
file_path=output_path + os.sep + 'losses.png',
all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
omp_with_params_without_weights_test_scores, kmeans_with_params_test_scores, similarity_with_params_test_scores],
all_labels=['base', 'random', 'omp', 'omp_without_weights', 'kmeans', 'similarity'],
file_path=output_path + os.sep + f"losses_{'-'.join(all_labels)}.png",
all_experiment_scores=all_scores,
all_labels=all_labels,
x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted',
ylabel=experiments_score_metric,
ylabel=base_with_params_experiment_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
else:
raise ValueError('This stage number is not supported yet, but it will be!')
......
......@@ -2,6 +2,7 @@ from bolsonaro.data.dataset_parameters import DatasetParameters
from bolsonaro.data.dataset_loader import DatasetLoader
from bolsonaro.models.model_factory import ModelFactory
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.ensemble_selection_forest_regressor import EnsembleSelectionForestRegressor
from bolsonaro.trainer import Trainer
from bolsonaro.utils import resolve_experiment_id, tqdm_joblib
from bolsonaro import LOG_PATH
......@@ -53,10 +54,15 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
trainer = Trainer(dataset)
if parameters['extraction_strategy'] == 'ensemble':
library = EnsembleSelectionForestRegressor.generate_library(dataset.X_train, dataset.y_train, random_state=seed)
else:
library = None
if parameters['extraction_strategy'] != 'none':
with tqdm_joblib(tqdm(total=len(parameters['extracted_forest_size']), disable=not verbose)) as extracted_forest_size_job_pb:
Parallel(n_jobs=-1)(delayed(extracted_forest_size_job)(extracted_forest_size_job_pb, parameters['extracted_forest_size'][i],
models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer)
models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer, library)
for i in range(len(parameters['extracted_forest_size'])))
else:
forest_size = hyperparameters['n_estimators']
......@@ -88,7 +94,7 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
)
model_parameters.save(sub_models_dir, experiment_id)
model = ModelFactory.build(dataset.task, model_parameters)
model = ModelFactory.build(dataset.task, model_parameters, library=library)
trainer.init(model, subsets_used=parameters['subsets_used'])
trainer.train(model)
......@@ -97,7 +103,7 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
seed_job_pb.update(1)
def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_size, models_dir,
seed, parameters, dataset, hyperparameters, experiment_id, trainer):
seed, parameters, dataset, hyperparameters, experiment_id, trainer, library):
logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_extracted_forest_size{}_ti{}'.format(
seed, extracted_forest_size, threading.get_ident()))
......@@ -132,7 +138,7 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz
)
model_parameters.save(sub_models_dir, experiment_id)
model = ModelFactory.build(dataset.task, model_parameters)
model = ModelFactory.build(dataset.task, model_parameters, library=library)
trainer.init(model, subsets_used=parameters['subsets_used'])
trainer.train(model)
......@@ -202,7 +208,7 @@ if __name__ == "__main__":
parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
parser.add_argument('--save_experiment_configuration', nargs='+', default=None, help='Save the experiment parameters specified in the command line in a file. Args: {{stage_num}} {{name}}')
parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none, similarity, kmeans.')
parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none, similarity, kmeans, ensemble.')
parser.add_argument('--overwrite', action='store_true', default=DEFAULT_OVERWRITE, help='Overwrite the experiment id')
args = parser.parse_args()
......@@ -213,7 +219,7 @@ if __name__ == "__main__":
else:
parameters = args.__dict__
if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity', 'kmeans']:
if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity', 'kmeans', 'ensemble']:
raise ValueError('Specified extraction strategy {} is not supported.'.format(parameters.extraction_strategy))
pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)
......
#!/bin/bash
core_number=1
walltime=5:00
seeds='1 2 3'
for dataset in california_housing # kin8nm kr-vs-kp spambase steel-plates diabetes diamonds boston california_housing
do
#oarsub -p "(gpu is null)" -l /core=5,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=1 --models_dir=models/$dataset/stage5_ensemble --subsets_used train+dev,train+dev"
#oarsub -p "(gpu is null)" -l /core=5,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=2 --models_dir=models/$dataset/stage5_ensemble --subsets_used train+dev,train+dev"
#oarsub -p "(gpu is null)" -l /core=5,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=omp --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=3 --models_dir=models/$dataset/stage5_ensemble --subsets_used train+dev,train+dev"
oarsub -p "(gpu is null)" -l /core=5,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=ensemble --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=4 --models_dir=models/$dataset/stage5_ensemble --subsets_used train+dev,train+dev"
done
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment