diff --git a/code/bolsonaro/models/omp_forest.py b/code/bolsonaro/models/omp_forest.py index 0e2a109a9745b3d94021eb523ad7fc3129fb8793..86359fa38e325726d49b1f48c84e3fded723282e 100644 --- a/code/bolsonaro/models/omp_forest.py +++ b/code/bolsonaro/models/omp_forest.py @@ -123,9 +123,7 @@ class SingleOmpForest(OmpForest): forest_predictions = self._base_estimator_predictions(X) if self._models_parameters.normalize_D: - forest_predictions = forest_predictions.T forest_predictions /= self._forest_norms - forest_predictions = forest_predictions.T return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights) diff --git a/code/bolsonaro/models/omp_forest_classifier.py b/code/bolsonaro/models/omp_forest_classifier.py index 9dfabb7d9c2836b116ccce97aa090a67cd134403..30012c05f066cc9452e92f7976c143cb2dcf967f 100644 --- a/code/bolsonaro/models/omp_forest_classifier.py +++ b/code/bolsonaro/models/omp_forest_classifier.py @@ -119,9 +119,7 @@ class OmpForestMulticlassClassifier(OmpForest): forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T if self._models_parameters.normalize_D: - forest_predictions = forest_predictions.T forest_predictions /= self._forest_norms - forest_predictions = forest_predictions.T label_names = [] preds = [] @@ -149,7 +147,9 @@ class OmpForestMulticlassClassifier(OmpForest): forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T if self._models_parameters.normalize_D: + forest_predictions = forest_predictions.T forest_predictions /= self._forest_norms + forest_predictions = forest_predictions.T label_names = [] preds = [] diff --git a/code/compute_results.py b/code/compute_results.py index 477bcf6796b6cdaa5b6df862356db5edd6425a29..7d80b4c69308263530566704a8c6747033e99245 100644 --- a/code/compute_results.py +++ b/code/compute_results.py @@ -156,6 +156,7 @@ if __name__ == "__main__": DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results' DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models' DEFAULT_PLOT_WEIGHT_DENSITY = False + DEFAULT_WO_LOSS_PLOTS = False parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].') @@ -168,6 +169,7 @@ if __name__ == "__main__": parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.') parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.') parser.add_argument('--plot_weight_density', action='store_true', default=DEFAULT_PLOT_WEIGHT_DENSITY, help='Plot the weight density. Only working for regressor models for now.') + parser.add_argument('--wo_loss_plots', action='store_true', default=DEFAULT_WO_LOSS_PLOTS, help='Do not compute the loss plots.') args = parser.parse_args() if args.stage not in list(range(1, 6)): @@ -181,7 +183,7 @@ if __name__ == "__main__": # Create recursively the results dir tree pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True) - if args.stage == 1: + if args.stage == 1 and not args.wo_loss_plots: if len(args.experiment_ids) != 6: raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.') @@ -221,8 +223,8 @@ if __name__ == "__main__": wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \ extract_scores_across_seeds_and_extracted_forest_sizes( args.models_dir, args.results_dir, int(args.experiment_ids[4])) - # base_wo_params - logger.info('Loading base_wo_params experiment scores...') + # omp_wo_params + logger.info('Loading omp_wo_params experiment scores...') omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \ omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes( args.models_dir, args.results_dir, int(args.experiment_ids[5])) @@ -262,7 +264,7 @@ if __name__ == "__main__": ylabel=experiments_score_metric, title='Loss values of {}\nusing best and default hyperparameters'.format(args.dataset_name) ) - elif args.stage == 2: + elif args.stage == 2 and not args.wo_loss_plots: if len(args.experiment_ids) != 4: raise ValueError('In the case of stage 2, the number of specified experiment ids must be 4.') @@ -308,7 +310,7 @@ if __name__ == "__main__": xlabel='Number of trees extracted', ylabel=experiments_score_metric, title='Loss values of {}\nusing different normalizations'.format(args.dataset_name)) - elif args.stage == 3: + elif args.stage == 3 and not args.wo_loss_plots: if len(args.experiment_ids) != 3: raise ValueError('In the case of stage 3, the number of specified experiment ids must be 3.') @@ -365,7 +367,7 @@ if __name__ == "__main__": xlabel='Number of trees extracted', ylabel=experiments_score_metric, title='Loss values of {}\nusing different training subsets'.format(args.dataset_name))""" - elif args.stage == 4: + elif args.stage == 4 and not args.wo_loss_plots: if len(args.experiment_ids) != 3: raise ValueError('In the case of stage 4, the number of specified experiment ids must be 3.') @@ -427,11 +429,7 @@ if __name__ == "__main__": xlabel='Number of trees extracted', ylabel=experiments_score_metric, title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) - - experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, args.experiment_ids[2]) - - Plotter.weight_density(experiment_weights, os.path.join(output_path, 'weight_density.png')) - elif args.stage == 5: + elif args.stage == 5 and not args.wo_loss_plots: # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1])) all_labels = list() @@ -475,8 +473,9 @@ if __name__ == "__main__": continue logger.info(f'Loading {label} experiment scores...') + current_experiment_id = int(args.experiment_ids[i].split('=')[1]) _, _, current_test_scores, _, _ = extract_scores_across_seeds_and_extracted_forest_sizes( - args.models_dir, args.results_dir, int(args.experiment_ids[i].split('=')[1])) + args.models_dir, args.results_dir, current_experiment_id) all_labels.append(label) all_scores.append(current_test_scores) @@ -491,7 +490,42 @@ if __name__ == "__main__": xlabel='Number of trees extracted', ylabel=base_with_params_experiment_score_metric, title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) - else: - raise ValueError('This stage number is not supported yet, but it will be!') + + if args.plot_weight_density: + root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}') + + if args.stage == 1: + omp_experiment_ids = [('omp_with_params', args.experiment_ids[2]), ('omp_wo_params', args.experiment_ids[2])] + elif args.stage == 2: + omp_experiment_ids = [('no_normalization', args.experiment_ids[0]), + ('normalize_D', args.experiment_ids[1]), + ('normalize_weights', args.experiment_ids[2]), + ('normalize_D_and_weights', args.experiment_ids[3])] + elif args.stage == 3: + omp_experiment_ids = [('train-dev_subset', args.experiment_ids[0]), + ('train-dev_train-dev_subset', args.experiment_ids[1]), + ('train-train-dev_subset', args.experiment_ids[2])] + elif args.stage == 4: + omp_experiment_ids = [('omp_with_params', args.experiment_ids[2])] + elif args.stage == 5: + omp_experiment_ids = [('omp_with_params', args.experiment_ids[2])] + for i in range(3, len(args.experiment_ids)): + if 'kmeans' in args.experiment_ids[i]: + label = 'kmeans' + elif 'similarity' in args.experiment_ids[i]: + label = 'similarity' + elif 'ensemble' in args.experiment_ids[i]: + label = 'ensemble' + else: + logger.error('Invalid value encountered') + continue + + current_experiment_id = int(args.experiment_ids[i].split('=')[1]) + omp_experiment_ids.append((label, current_experiment_id)) + + for (experiment_label, experiment_id) in omp_experiment_ids: + logger.info(f'Computing weight density plot for experiment {experiment_label}...') + experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id) + Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png')) logger.info('Done.') diff --git a/results/boston/stage1/weight_density_omp_with_params.png b/results/boston/stage1/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..240db037616a0659d6f90203f0cb49552843016e Binary files /dev/null and b/results/boston/stage1/weight_density_omp_with_params.png differ diff --git a/results/boston/stage1/weight_density_omp_wo_params.png b/results/boston/stage1/weight_density_omp_wo_params.png new file mode 100644 index 0000000000000000000000000000000000000000..240db037616a0659d6f90203f0cb49552843016e Binary files /dev/null and b/results/boston/stage1/weight_density_omp_wo_params.png differ diff --git a/results/boston/stage3/weight_density_train-dev_subset.png b/results/boston/stage3/weight_density_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..04f1bfadf06b0ddf871feef1dca0a3b06be0c85b Binary files /dev/null and b/results/boston/stage3/weight_density_train-dev_subset.png differ diff --git a/results/boston/stage3/weight_density_train-dev_train-dev_subset.png b/results/boston/stage3/weight_density_train-dev_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..b9fc4215663404ec30b2440c786412b9b7f80a53 Binary files /dev/null and b/results/boston/stage3/weight_density_train-dev_train-dev_subset.png differ diff --git a/results/boston/stage3/weight_density_train-train-dev_subset.png b/results/boston/stage3/weight_density_train-train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..53883f9ea5facac2afd88de2153f680970700e8c Binary files /dev/null and b/results/boston/stage3/weight_density_train-train-dev_subset.png differ diff --git a/results/boston/stage4/weight_density_omp_with_params.png b/results/boston/stage4/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..c2363f31823c49231e2a1438f652db75c08b294b Binary files /dev/null and b/results/boston/stage4/weight_density_omp_with_params.png differ diff --git a/results/breast_cancer/stage4/weight_density_omp_with_params.png b/results/breast_cancer/stage4/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..e5a815138aee0af48e2f04420a48f485063893fc Binary files /dev/null and b/results/breast_cancer/stage4/weight_density_omp_with_params.png differ diff --git a/results/california_housing/stage4/weight_density_omp_with_params.png b/results/california_housing/stage4/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..e98b27a0881aad8335c21b4f78958c89f2e67644 Binary files /dev/null and b/results/california_housing/stage4/weight_density_omp_with_params.png differ diff --git a/results/diabetes/stage1/weight_density_omp_with_params.png b/results/diabetes/stage1/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..31a2060e1d0d4028d12ecf50804581a26c1897a8 Binary files /dev/null and b/results/diabetes/stage1/weight_density_omp_with_params.png differ diff --git a/results/diabetes/stage1/weight_density_omp_wo_params.png b/results/diabetes/stage1/weight_density_omp_wo_params.png new file mode 100644 index 0000000000000000000000000000000000000000..31a2060e1d0d4028d12ecf50804581a26c1897a8 Binary files /dev/null and b/results/diabetes/stage1/weight_density_omp_wo_params.png differ diff --git a/results/diabetes/stage3/weight_density_train-dev_subset.png b/results/diabetes/stage3/weight_density_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..c7263c93c5ebaa12ed76e84731229b570efb0118 Binary files /dev/null and b/results/diabetes/stage3/weight_density_train-dev_subset.png differ diff --git a/results/diabetes/stage3/weight_density_train-dev_train-dev_subset.png b/results/diabetes/stage3/weight_density_train-dev_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..64baf308e19e31ccce9cfd588b09bc86757a100f Binary files /dev/null and b/results/diabetes/stage3/weight_density_train-dev_train-dev_subset.png differ diff --git a/results/diabetes/stage3/weight_density_train-train-dev_subset.png b/results/diabetes/stage3/weight_density_train-train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..6d21ba6f21a9e44001934372d6b9e5538272c479 Binary files /dev/null and b/results/diabetes/stage3/weight_density_train-train-dev_subset.png differ diff --git a/results/diabetes/stage4/weight_density_omp_with_params.png b/results/diabetes/stage4/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..64baf308e19e31ccce9cfd588b09bc86757a100f Binary files /dev/null and b/results/diabetes/stage4/weight_density_omp_with_params.png differ diff --git a/results/diamonds/stage3/weight_density_train-dev_subset.png b/results/diamonds/stage3/weight_density_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..deda1734a26c87d22ff645882dc5682b78e22bb7 Binary files /dev/null and b/results/diamonds/stage3/weight_density_train-dev_subset.png differ diff --git a/results/diamonds/stage3/weight_density_train-dev_train-dev_subset.png b/results/diamonds/stage3/weight_density_train-dev_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..946fbddf649201b865b8977d4636f55f31c0dbef Binary files /dev/null and b/results/diamonds/stage3/weight_density_train-dev_train-dev_subset.png differ diff --git a/results/diamonds/stage3/weight_density_train-train-dev_subset.png b/results/diamonds/stage3/weight_density_train-train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..607a7271c0783b942656f8907ab7f80829c926ea Binary files /dev/null and b/results/diamonds/stage3/weight_density_train-train-dev_subset.png differ diff --git a/results/diamonds/stage4/weight_density_omp_with_params.png b/results/diamonds/stage4/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..946fbddf649201b865b8977d4636f55f31c0dbef Binary files /dev/null and b/results/diamonds/stage4/weight_density_omp_with_params.png differ diff --git a/results/kin8nm/stage1/weight_density_omp_with_params.png b/results/kin8nm/stage1/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..5a6eb55598f4366054b8bfb6fc706bad243ccbda Binary files /dev/null and b/results/kin8nm/stage1/weight_density_omp_with_params.png differ diff --git a/results/kin8nm/stage1/weight_density_omp_wo_params.png b/results/kin8nm/stage1/weight_density_omp_wo_params.png new file mode 100644 index 0000000000000000000000000000000000000000..5a6eb55598f4366054b8bfb6fc706bad243ccbda Binary files /dev/null and b/results/kin8nm/stage1/weight_density_omp_wo_params.png differ diff --git a/results/kin8nm/stage3/weight_density_train-dev_subset.png b/results/kin8nm/stage3/weight_density_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..5a6eb55598f4366054b8bfb6fc706bad243ccbda Binary files /dev/null and b/results/kin8nm/stage3/weight_density_train-dev_subset.png differ diff --git a/results/kin8nm/stage3/weight_density_train-dev_train-dev_subset.png b/results/kin8nm/stage3/weight_density_train-dev_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..b02a9435091efc7ee6481f67b94d6e9819115412 Binary files /dev/null and b/results/kin8nm/stage3/weight_density_train-dev_train-dev_subset.png differ diff --git a/results/kin8nm/stage3/weight_density_train-train-dev_subset.png b/results/kin8nm/stage3/weight_density_train-train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..af588dfe92b3af80754e279b7e50be2f4e319757 Binary files /dev/null and b/results/kin8nm/stage3/weight_density_train-train-dev_subset.png differ diff --git a/results/kin8nm/stage4/weight_density_omp_with_params.png b/results/kin8nm/stage4/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..b02a9435091efc7ee6481f67b94d6e9819115412 Binary files /dev/null and b/results/kin8nm/stage4/weight_density_omp_with_params.png differ diff --git a/results/kr-vs-kp/stage1/weight_density_omp_with_params.png b/results/kr-vs-kp/stage1/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..a81fc2db8baebce903778fd1830cc7d94e0cbd57 Binary files /dev/null and b/results/kr-vs-kp/stage1/weight_density_omp_with_params.png differ diff --git a/results/kr-vs-kp/stage1/weight_density_omp_wo_params.png b/results/kr-vs-kp/stage1/weight_density_omp_wo_params.png new file mode 100644 index 0000000000000000000000000000000000000000..a81fc2db8baebce903778fd1830cc7d94e0cbd57 Binary files /dev/null and b/results/kr-vs-kp/stage1/weight_density_omp_wo_params.png differ diff --git a/results/kr-vs-kp/stage3/weight_density_train-dev_subset.png b/results/kr-vs-kp/stage3/weight_density_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..a81fc2db8baebce903778fd1830cc7d94e0cbd57 Binary files /dev/null and b/results/kr-vs-kp/stage3/weight_density_train-dev_subset.png differ diff --git a/results/kr-vs-kp/stage3/weight_density_train-dev_train-dev_subset.png b/results/kr-vs-kp/stage3/weight_density_train-dev_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..030dfa3b9bafba398e3eadecd5a3c0769f90a5b1 Binary files /dev/null and b/results/kr-vs-kp/stage3/weight_density_train-dev_train-dev_subset.png differ diff --git a/results/kr-vs-kp/stage3/weight_density_train-train-dev_subset.png b/results/kr-vs-kp/stage3/weight_density_train-train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..16dd729e58f8785bedd7bfcc349f7a7d947f14ac Binary files /dev/null and b/results/kr-vs-kp/stage3/weight_density_train-train-dev_subset.png differ diff --git a/results/kr-vs-kp/stage4/weight_density_omp_with_params.png b/results/kr-vs-kp/stage4/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..030dfa3b9bafba398e3eadecd5a3c0769f90a5b1 Binary files /dev/null and b/results/kr-vs-kp/stage4/weight_density_omp_with_params.png differ diff --git a/results/spambase/stage1/weight_density_omp_with_params.png b/results/spambase/stage1/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..537828f4a189c1b2d3bbcced166dbfa77274c114 Binary files /dev/null and b/results/spambase/stage1/weight_density_omp_with_params.png differ diff --git a/results/spambase/stage1/weight_density_omp_wo_params.png b/results/spambase/stage1/weight_density_omp_wo_params.png new file mode 100644 index 0000000000000000000000000000000000000000..537828f4a189c1b2d3bbcced166dbfa77274c114 Binary files /dev/null and b/results/spambase/stage1/weight_density_omp_wo_params.png differ diff --git a/results/spambase/stage3/weight_density_train-dev_subset.png b/results/spambase/stage3/weight_density_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..06867df07f03a150e22fdf87ef14d34f39dd3e4e Binary files /dev/null and b/results/spambase/stage3/weight_density_train-dev_subset.png differ diff --git a/results/spambase/stage3/weight_density_train-dev_train-dev_subset.png b/results/spambase/stage3/weight_density_train-dev_train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..28d04759330153f7d641e785c93c0a3507d8a37b Binary files /dev/null and b/results/spambase/stage3/weight_density_train-dev_train-dev_subset.png differ diff --git a/results/spambase/stage3/weight_density_train-train-dev_subset.png b/results/spambase/stage3/weight_density_train-train-dev_subset.png new file mode 100644 index 0000000000000000000000000000000000000000..390efa211548a5eb5abd3f7380bbe95124002ff5 Binary files /dev/null and b/results/spambase/stage3/weight_density_train-train-dev_subset.png differ diff --git a/results/spambase/stage4/weight_density_omp_with_params.png b/results/spambase/stage4/weight_density_omp_with_params.png new file mode 100644 index 0000000000000000000000000000000000000000..28d04759330153f7d641e785c93c0a3507d8a37b Binary files /dev/null and b/results/spambase/stage4/weight_density_omp_with_params.png differ diff --git a/scripts/run_compute_results_density.sh b/scripts/run_compute_results_density.sh new file mode 100755 index 0000000000000000000000000000000000000000..acbeb76c59d744a820a382d7411c67dce089503e --- /dev/null +++ b/scripts/run_compute_results_density.sh @@ -0,0 +1,8 @@ +seeds='1 2 3 4 5' +for dataset in kin8nm kr-vs-kp spambase steel-plates california_housing boston iris diabetes digits wine breast_cancer olivetti_faces diamonds +do + python code/compute_results.py --stage=1 --experiment_ids 1 2 3 4 5 6 --dataset_name=$dataset --models_dir=models/$dataset/stage1 --plot_weight_density --wo_loss_plots + python code/compute_results.py --stage=2 --experiment_ids 1 2 3 4 --dataset_name=$dataset --models_dir=models/$dataset/stage2 --plot_weight_density --wo_loss_plots + python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage3 --plot_weight_density --wo_loss_plots + python code/compute_results.py --stage=4 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage4 --plot_weight_density --wo_loss_plots +done