From 3c309a5cc8dfc39ffe604916b4757588ab102c7f Mon Sep 17 00:00:00 2001 From: Charly Lamothe <charly.lamothe@univ-amu.fr> Date: Sun, 22 Mar 2020 02:54:28 +0100 Subject: [PATCH] set random results linear to ease the read of coherence preds plot reading --- .../models/kmeans_forest_regressor.py | 6 +-- code/bolsonaro/trainer.py | 3 +- code/compute_results.py | 48 +++++++++---------- code/train.py | 2 +- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/code/bolsonaro/models/kmeans_forest_regressor.py b/code/bolsonaro/models/kmeans_forest_regressor.py index d0d6412..81801ef 100644 --- a/code/bolsonaro/models/kmeans_forest_regressor.py +++ b/code/bolsonaro/models/kmeans_forest_regressor.py @@ -19,7 +19,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): def __init__(self, models_parameters, score_metric=mean_squared_error): self._models_parameters = models_parameters self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters, - random_state=self._models_parameters.seed, n_jobs=2) + random_state=self._models_parameters.seed, n_jobs=1) self._extracted_forest_size = self._models_parameters.extracted_forest_size self._score_metric = score_metric self._selected_trees = list() @@ -46,7 +46,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): # For each cluster select the best tree on the validation set extracted_forest_sizes = list(range(self._extracted_forest_size)) with tqdm_joblib(tqdm(total=self._extracted_forest_size, disable=True)) as prune_forest_job_pb: - pruned_forest = Parallel(n_jobs=2)(delayed(self._prune_forest_job)(prune_forest_job_pb, + pruned_forest = Parallel(n_jobs=1)(delayed(self._prune_forest_job)(prune_forest_job_pb, extracted_forest_sizes[i], labels, X_val, y_val, self._score_metric) for i in range(self._extracted_forest_size)) @@ -56,7 +56,7 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta): def _prune_forest_job(self, prune_forest_job_pb, c, labels, X_val, y_val, score_metric): index = np.where(labels == c)[0] with tqdm_joblib(tqdm(total=len(index), disable=True)) as cluster_job_pb: - cluster = Parallel(n_jobs=2)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val, + cluster = Parallel(n_jobs=1)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val, y_val, score_metric) for i in range(len(index))) best_tree_index = np.argmax(cluster) prune_forest_job_pb.update() diff --git a/code/bolsonaro/trainer.py b/code/bolsonaro/trainer.py index 9d4911e..5cea2bd 100644 --- a/code/bolsonaro/trainer.py +++ b/code/bolsonaro/trainer.py @@ -77,7 +77,7 @@ class Trainer(object): else: raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used)) - def train(self, model, extracted_forest_size=None): + def train(self, model, extracted_forest_size=None, seed=None): """ :param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor, OmpForestBinaryClassifier, OmpForestMulticlassClassifier. @@ -88,6 +88,7 @@ class Trainer(object): if type(model) in [RandomForestRegressor, RandomForestClassifier]: if extracted_forest_size is not None: estimators_index = np.arange(len(model.estimators_)) + np.random.seed(seed) np.random.shuffle(estimators_index) choosen_estimators = estimators_index[:extracted_forest_size] model.estimators_ = np.array(model.estimators_)[choosen_estimators] diff --git a/code/compute_results.py b/code/compute_results.py index e05fff6..9e276e6 100644 --- a/code/compute_results.py +++ b/code/compute_results.py @@ -521,7 +521,7 @@ if __name__ == "__main__": ylabel=base_with_params_experiment_score_metric, title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) - if args.plot_weight_density or args.plot_preds_coherence: + """if args.plot_weight_density or args.plot_preds_coherence: root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}') if args.stage == 1: @@ -551,28 +551,28 @@ if __name__ == "__main__": continue current_experiment_id = int(args.experiment_ids[i].split('=')[1]) - omp_experiment_ids.append((label, current_experiment_id)) - - for (experiment_label, experiment_id) in omp_experiment_ids: - if args.plot_weight_density: - logger.info(f'Computing weight density plot for experiment {experiment_label}...') - experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id) - Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png')) - if args.plot_preds_coherence: - all_labels = ['random', 'omp'] - random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \ - with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \ - extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1])) - coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in range(2, 4)] - print(coherence_values[1]) - Plotter.plot_stage2_losses( - file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}.png", - all_experiment_scores=coherence_values, - all_labels=all_labels, - x_value=with_params_extracted_forest_sizes, - xlabel='Number of trees extracted', - ylabel='Coherence', - title='Coherence values of {}'.format(args.dataset_name)) - logger.info(f'Computing preds coherence plot for experiment {experiment_label}...') + omp_experiment_ids.append((label, current_experiment_id))""" + + #for (experiment_label, experiment_id) in omp_experiment_ids: + if args.plot_weight_density: + logger.info(f'Computing weight density plot for experiment {experiment_label}...') + experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id) + Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png')) + if args.plot_preds_coherence: + root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage4') + all_labels = ['random', 'omp', 'omp_normalize_D'] + random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \ + with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \ + extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, 2) + coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in [2, 3, 4]] + Plotter.plot_stage2_losses( + file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}_30_all.png", + all_experiment_scores=coherence_values, + all_labels=all_labels, + x_value=with_params_extracted_forest_sizes, + xlabel='Number of trees extracted', + ylabel='Coherence', + title='Coherence values of {}'.format(args.dataset_name)) + logger.info(f'Computing preds coherence plot...') logger.info('Done.') diff --git a/code/train.py b/code/train.py index ad80795..9708b71 100644 --- a/code/train.py +++ b/code/train.py @@ -169,7 +169,7 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz pretrained_model_parameters.save(sub_models_dir, experiment_id) trainer.init(model, subsets_used=parameters['subsets_used']) - trainer.train(model, extracted_forest_size=extracted_forest_size) + trainer.train(model, extracted_forest_size=extracted_forest_size, seed=seed) #trainer.compute_preds_coherence(model) trainer.compute_results(model, sub_models_dir) -- GitLab