Skip to content
Snippets Groups Projects
Commit 3c309a5c authored by Charly Lamothe's avatar Charly Lamothe
Browse files

set random results linear to ease the read of coherence preds plot reading

parent 347b7f5d
Branches
No related tags found
1 merge request!22WIP: Resolve "coherence des arbres de predictions"
...@@ -19,7 +19,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): ...@@ -19,7 +19,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
def __init__(self, models_parameters, score_metric=mean_squared_error): def __init__(self, models_parameters, score_metric=mean_squared_error):
self._models_parameters = models_parameters self._models_parameters = models_parameters
self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters, self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters,
random_state=self._models_parameters.seed, n_jobs=2) random_state=self._models_parameters.seed, n_jobs=1)
self._extracted_forest_size = self._models_parameters.extracted_forest_size self._extracted_forest_size = self._models_parameters.extracted_forest_size
self._score_metric = score_metric self._score_metric = score_metric
self._selected_trees = list() self._selected_trees = list()
...@@ -46,7 +46,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): ...@@ -46,7 +46,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
# For each cluster select the best tree on the validation set # For each cluster select the best tree on the validation set
extracted_forest_sizes = list(range(self._extracted_forest_size)) extracted_forest_sizes = list(range(self._extracted_forest_size))
with tqdm_joblib(tqdm(total=self._extracted_forest_size, disable=True)) as prune_forest_job_pb: with tqdm_joblib(tqdm(total=self._extracted_forest_size, disable=True)) as prune_forest_job_pb:
pruned_forest = Parallel(n_jobs=2)(delayed(self._prune_forest_job)(prune_forest_job_pb, pruned_forest = Parallel(n_jobs=1)(delayed(self._prune_forest_job)(prune_forest_job_pb,
extracted_forest_sizes[i], labels, X_val, y_val, self._score_metric) extracted_forest_sizes[i], labels, X_val, y_val, self._score_metric)
for i in range(self._extracted_forest_size)) for i in range(self._extracted_forest_size))
...@@ -56,7 +56,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): ...@@ -56,7 +56,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
def _prune_forest_job(self, prune_forest_job_pb, c, labels, X_val, y_val, score_metric): def _prune_forest_job(self, prune_forest_job_pb, c, labels, X_val, y_val, score_metric):
index = np.where(labels == c)[0] index = np.where(labels == c)[0]
with tqdm_joblib(tqdm(total=len(index), disable=True)) as cluster_job_pb: with tqdm_joblib(tqdm(total=len(index), disable=True)) as cluster_job_pb:
cluster = Parallel(n_jobs=2)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val, cluster = Parallel(n_jobs=1)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val,
y_val, score_metric) for i in range(len(index))) y_val, score_metric) for i in range(len(index)))
best_tree_index = np.argmax(cluster) best_tree_index = np.argmax(cluster)
prune_forest_job_pb.update() prune_forest_job_pb.update()
......
...@@ -77,7 +77,7 @@ class Trainer(object): ...@@ -77,7 +77,7 @@ class Trainer(object):
else: else:
raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used)) raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
def train(self, model, extracted_forest_size=None): def train(self, model, extracted_forest_size=None, seed=None):
""" """
:param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor, :param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor,
OmpForestBinaryClassifier, OmpForestMulticlassClassifier. OmpForestBinaryClassifier, OmpForestMulticlassClassifier.
...@@ -88,6 +88,7 @@ class Trainer(object): ...@@ -88,6 +88,7 @@ class Trainer(object):
if type(model) in [RandomForestRegressor, RandomForestClassifier]: if type(model) in [RandomForestRegressor, RandomForestClassifier]:
if extracted_forest_size is not None: if extracted_forest_size is not None:
estimators_index = np.arange(len(model.estimators_)) estimators_index = np.arange(len(model.estimators_))
np.random.seed(seed)
np.random.shuffle(estimators_index) np.random.shuffle(estimators_index)
choosen_estimators = estimators_index[:extracted_forest_size] choosen_estimators = estimators_index[:extracted_forest_size]
model.estimators_ = np.array(model.estimators_)[choosen_estimators] model.estimators_ = np.array(model.estimators_)[choosen_estimators]
......
...@@ -521,7 +521,7 @@ if __name__ == "__main__": ...@@ -521,7 +521,7 @@ if __name__ == "__main__":
ylabel=base_with_params_experiment_score_metric, ylabel=base_with_params_experiment_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
if args.plot_weight_density or args.plot_preds_coherence: """if args.plot_weight_density or args.plot_preds_coherence:
root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}') root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}')
if args.stage == 1: if args.stage == 1:
...@@ -551,28 +551,28 @@ if __name__ == "__main__": ...@@ -551,28 +551,28 @@ if __name__ == "__main__":
continue continue
current_experiment_id = int(args.experiment_ids[i].split('=')[1]) current_experiment_id = int(args.experiment_ids[i].split('=')[1])
omp_experiment_ids.append((label, current_experiment_id)) omp_experiment_ids.append((label, current_experiment_id))"""
for (experiment_label, experiment_id) in omp_experiment_ids: #for (experiment_label, experiment_id) in omp_experiment_ids:
if args.plot_weight_density: if args.plot_weight_density:
logger.info(f'Computing weight density plot for experiment {experiment_label}...') logger.info(f'Computing weight density plot for experiment {experiment_label}...')
experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id) experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id)
Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png')) Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png'))
if args.plot_preds_coherence: if args.plot_preds_coherence:
all_labels = ['random', 'omp'] root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage4')
all_labels = ['random', 'omp', 'omp_normalize_D']
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \ random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \ with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1])) extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, 2)
coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in range(2, 4)] coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in [2, 3, 4]]
print(coherence_values[1])
Plotter.plot_stage2_losses( Plotter.plot_stage2_losses(
file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}.png", file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}_30_all.png",
all_experiment_scores=coherence_values, all_experiment_scores=coherence_values,
all_labels=all_labels, all_labels=all_labels,
x_value=with_params_extracted_forest_sizes, x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel='Coherence', ylabel='Coherence',
title='Coherence values of {}'.format(args.dataset_name)) title='Coherence values of {}'.format(args.dataset_name))
logger.info(f'Computing preds coherence plot for experiment {experiment_label}...') logger.info(f'Computing preds coherence plot...')
logger.info('Done.') logger.info('Done.')
...@@ -169,7 +169,7 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz ...@@ -169,7 +169,7 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz
pretrained_model_parameters.save(sub_models_dir, experiment_id) pretrained_model_parameters.save(sub_models_dir, experiment_id)
trainer.init(model, subsets_used=parameters['subsets_used']) trainer.init(model, subsets_used=parameters['subsets_used'])
trainer.train(model, extracted_forest_size=extracted_forest_size) trainer.train(model, extracted_forest_size=extracted_forest_size, seed=seed)
#trainer.compute_preds_coherence(model) #trainer.compute_preds_coherence(model)
trainer.compute_results(model, sub_models_dir) trainer.compute_results(model, sub_models_dir)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment