diff --git a/code/bolsonaro/visualization/plotter.py b/code/bolsonaro/visualization/plotter.py index 354344c24f5481f39334322fb86c91b72dfda146..e82743db1386b1aa7a51d46fbc29818794ce64cb 100644 --- a/code/bolsonaro/visualization/plotter.py +++ b/code/bolsonaro/visualization/plotter.py @@ -57,10 +57,14 @@ class Plotter(object): ax.plot(x_value, mean, c=color_mean, label=label) @staticmethod - def plot_losses(file_path, all_experiment_scores_1, all_experiment_scores_2, x_value, xlabel, ylabel, all_labels, title): - fig, axes = plt.subplots(nrows=1, ncols=2) + def plot_stage1_losses(file_path, all_experiment_scores_with_params, + all_experiment_scores_wo_params, x_value, xlabel, ylabel, all_labels, title): + fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True) - n = len(all_experiment_scores_1) + n = len(all_experiment_scores_with_params) + + if n != len(all_experiment_scores_wo_params): + raise ValueError('all_experiment_scores_with_params and all_experiment_scores_wo_params must have the same len to be compared.') """ Get as many different colors from the specified cmap (here nipy_spectral) @@ -68,7 +72,8 @@ class Plotter(object): """ colors = Plotter.get_colors_from_cmap(n) - for j, all_experiment_scores in enumerate([all_experiment_scores_1, all_experiment_scores_2]): + for j, all_experiment_scores in enumerate([all_experiment_scores_with_params, + all_experiment_scores_wo_params]): # For each curve to plot for i in range(n): # Retreive the scores in a list for each seed @@ -88,11 +93,13 @@ class Plotter(object): label=all_labels[i] ) - plt.xlabel(xlabel) - plt.ylabel(ylabel) - plt.title(title) - plt.legend(loc='upper right') - fig.savefig(file_path, dpi=fig.dpi) + axes[0].set_xlabel(xlabel) + axes[1].set_xlabel(xlabel) + axes[0].set_ylabel(ylabel) + plt.suptitle(title) + handles, labels = axes[0].get_legend_handles_labels() + legend = axes[0].legend(handles, labels, loc='upper center', bbox_to_anchor=(1.1, -0.15)) + fig.savefig(file_path, dpi=fig.dpi, bbox_extra_artists=(legend,), bbox_inches='tight') plt.close(fig) @staticmethod diff --git a/code/compute_results.py b/code/compute_results.py index 7902b2b4c90f1aa7f36a40c5970687dadee7dc14..2db767586436f1d7e0fe44209fab953e80da91d6 100644 --- a/code/compute_results.py +++ b/code/compute_results.py @@ -11,8 +11,6 @@ import os def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id): experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} - # Create recursively the tree results/{experiment_id} - pathlib.Path(results_dir + os.sep + str(experiment_id)).mkdir(parents=True, exist_ok=True) experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds """ @@ -65,8 +63,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number): experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} - # Create recursively the tree results/{experiment_id} - pathlib.Path(results_dir + os.sep + str(experiment_id)).mkdir(parents=True, exist_ok=True) experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds """ @@ -122,6 +118,7 @@ if __name__ == "__main__": parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).' + \ 'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}') parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.') + parser.add_argument('--extracted_forest_sizes_number', nargs='?', type=int, required=True, help='Specify the number of extracted forest sizes. TODO: read it from models dir directly.') parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.') parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.') args = parser.parse_args() @@ -135,7 +132,6 @@ if __name__ == "__main__": pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True) if args.stage == 1: - extracted_forest_sizes_number = 5 # TODO: hardcoded if len(args.experiment_ids) != 6: raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.') @@ -145,7 +141,7 @@ if __name__ == "__main__": logger.info('Loading base_with_params experiment scores...') base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores = \ extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0], - extracted_forest_sizes_number) + args.extracted_forest_sizes_number) # random_with_params logger.info('Loading random_with_params experiment scores...') random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \ @@ -161,7 +157,7 @@ if __name__ == "__main__": logger.info('Loading base_wo_params experiment scores...') base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores = \ extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[3], - extracted_forest_sizes_number) + args.extracted_forest_sizes_number) # random_wo_params logger.info('Loading random_wo_params experiment scores...') random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \ @@ -174,21 +170,29 @@ if __name__ == "__main__": output_path = os.path.join(args.results_dir, args.dataset_name, 'stage1') pathlib.Path(output_path).mkdir(parents=True, exist_ok=True) - Plotter.plot_losses( - file_path=output_path + os.sep + 'losses.png', - all_experiment_scores_1=[base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, + """all_experiment_scores_with_params=[base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores], - all_experiment_scores_2=[base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores, + all_experiment_scores_wo_params=[base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores, random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores], + all_labels=['base_with_params_train', 'base_with_params_dev', 'base_with_params_test', + 'random_with_params_train', 'random_with_params_dev', 'random_with_params_test', + 'omp_with_params_train', 'omp_with_params_dev', 'omp_with_params_test'],""" + + Plotter.plot_stage1_losses( + file_path=output_path + os.sep + 'losses.png', + all_experiment_scores_with_params=[base_with_params_test_scores, + random_with_params_test_scores, + omp_with_params_test_scores], + all_experiment_scores_wo_params=[base_wo_params_test_scores, + random_wo_params_test_scores, + omp_wo_params_test_scores], + all_labels=['base', 'random', 'omp'], x_value=with_params_extracted_forest_sizes, xlabel='Number of trees extracted', ylabel='MSE', # TODO: hardcoded - all_labels=['base_with_params_train', 'base_with_params_dev', 'base_with_params_test', - 'random_with_params_train', 'random_with_params_dev', 'random_with_params_test', - 'omp_with_params_train', 'omp_with_params_dev', 'omp_with_params_test'], - title='Loss values of {} using the best hyperparams'.format(args.dataset_name) + title='Loss values of {} using best and default hyperparameters'.format(args.dataset_name) ) else: raise ValueError('This stage number is not supported yet, but it will be!') @@ -196,7 +200,7 @@ if __name__ == "__main__": """ TODO: For each dataset: - Stage 1) A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams) + [ALMOST DONE] Stage 1) A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams) Stage 2) A figure for the selection of the best combination of normalization: D normalization vs weights normalization (4 combinations) Stage 3) A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev Stage 4) A figure to finally compare the perf of our approach using the previous selected diff --git a/results/california_housing/stage1/losses.png b/results/california_housing/stage1/losses.png new file mode 100644 index 0000000000000000000000000000000000000000..902c5b74488875024b925388edac21bd749c10b0 Binary files /dev/null and b/results/california_housing/stage1/losses.png differ