Skip to content
Snippets Groups Projects
Commit 3c309a5c authored by Charly Lamothe's avatar Charly Lamothe
Browse files

set random results linear to ease the read of coherence preds plot reading

parent 347b7f5d
Branches
No related tags found
1 merge request!22WIP: Resolve "coherence des arbres de predictions"
......@@ -19,7 +19,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
def __init__(self, models_parameters, score_metric=mean_squared_error):
self._models_parameters = models_parameters
self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters,
random_state=self._models_parameters.seed, n_jobs=2)
random_state=self._models_parameters.seed, n_jobs=1)
self._extracted_forest_size = self._models_parameters.extracted_forest_size
self._score_metric = score_metric
self._selected_trees = list()
......@@ -46,7 +46,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
# For each cluster select the best tree on the validation set
extracted_forest_sizes = list(range(self._extracted_forest_size))
with tqdm_joblib(tqdm(total=self._extracted_forest_size, disable=True)) as prune_forest_job_pb:
pruned_forest = Parallel(n_jobs=2)(delayed(self._prune_forest_job)(prune_forest_job_pb,
pruned_forest = Parallel(n_jobs=1)(delayed(self._prune_forest_job)(prune_forest_job_pb,
extracted_forest_sizes[i], labels, X_val, y_val, self._score_metric)
for i in range(self._extracted_forest_size))
......@@ -56,7 +56,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
def _prune_forest_job(self, prune_forest_job_pb, c, labels, X_val, y_val, score_metric):
index = np.where(labels == c)[0]
with tqdm_joblib(tqdm(total=len(index), disable=True)) as cluster_job_pb:
cluster = Parallel(n_jobs=2)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val,
cluster = Parallel(n_jobs=1)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val,
y_val, score_metric) for i in range(len(index)))
best_tree_index = np.argmax(cluster)
prune_forest_job_pb.update()
......
......@@ -77,7 +77,7 @@ class Trainer(object):
else:
raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
def train(self, model, extracted_forest_size=None):
def train(self, model, extracted_forest_size=None, seed=None):
"""
:param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor,
OmpForestBinaryClassifier, OmpForestMulticlassClassifier.
......@@ -88,6 +88,7 @@ class Trainer(object):
if type(model) in [RandomForestRegressor, RandomForestClassifier]:
if extracted_forest_size is not None:
estimators_index = np.arange(len(model.estimators_))
np.random.seed(seed)
np.random.shuffle(estimators_index)
choosen_estimators = estimators_index[:extracted_forest_size]
model.estimators_ = np.array(model.estimators_)[choosen_estimators]
......
......@@ -521,7 +521,7 @@ if __name__ == "__main__":
ylabel=base_with_params_experiment_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
if args.plot_weight_density or args.plot_preds_coherence:
"""if args.plot_weight_density or args.plot_preds_coherence:
root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}')
if args.stage == 1:
......@@ -551,28 +551,28 @@ if __name__ == "__main__":
continue
current_experiment_id = int(args.experiment_ids[i].split('=')[1])
omp_experiment_ids.append((label, current_experiment_id))
omp_experiment_ids.append((label, current_experiment_id))"""
for (experiment_label, experiment_id) in omp_experiment_ids:
#for (experiment_label, experiment_id) in omp_experiment_ids:
if args.plot_weight_density:
logger.info(f'Computing weight density plot for experiment {experiment_label}...')
experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id)
Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png'))
if args.plot_preds_coherence:
all_labels = ['random', 'omp']
root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage4')
all_labels = ['random', 'omp', 'omp_normalize_D']
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in range(2, 4)]
print(coherence_values[1])
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, 2)
coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in [2, 3, 4]]
Plotter.plot_stage2_losses(
file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}.png",
file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}_30_all.png",
all_experiment_scores=coherence_values,
all_labels=all_labels,
x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted',
ylabel='Coherence',
title='Coherence values of {}'.format(args.dataset_name))
logger.info(f'Computing preds coherence plot for experiment {experiment_label}...')
logger.info(f'Computing preds coherence plot...')
logger.info('Done.')
......@@ -169,7 +169,7 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz
pretrained_model_parameters.save(sub_models_dir, experiment_id)
trainer.init(model, subsets_used=parameters['subsets_used'])
trainer.train(model, extracted_forest_size=extracted_forest_size)
trainer.train(model, extracted_forest_size=extracted_forest_size, seed=seed)
#trainer.compute_preds_coherence(model)
trainer.compute_results(model, sub_models_dir)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment