diff --git a/code/compute_results.py b/code/compute_results.py index 23e3db3ad7c95e5f5732b4d09e945ce53dfd4467..ef5477433a22e966c238ecacb3130263097cc6c7 100644 --- a/code/compute_results.py +++ b/code/compute_results.py @@ -211,7 +211,7 @@ def extract_correlations_across_seeds(models_dir, results_dir, experiment_id): extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file model_raw_results = ModelRawResults.load(extracted_forest_size_path) - experiment_correlations[seed].append(model_raw_results.correlation) + experiment_correlations[seed].append(model_raw_results.train_correlation) return experiment_correlations @@ -239,10 +239,38 @@ def extract_coherences_across_seeds(models_dir, results_dir, experiment_id): extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file model_raw_results = ModelRawResults.load(extracted_forest_size_path) - experiment_coherences[seed].append(model_raw_results.coherence) + experiment_coherences[seed].append(model_raw_results.train_coherence) return experiment_coherences +def extract_strengths_across_seeds(models_dir, results_dir, experiment_id): + experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} + experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds + experiment_strengths = dict() + + # For each seed results stored in models/{experiment_id}/seeds + seeds = os.listdir(experiment_seed_root_path) + seeds.sort(key=int) + for seed in seeds: + experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed} + extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' # models/{experiment_id}/seeds/{seed}/forest_size + + # {{seed}:[]} + experiment_strengths[seed] = list() + + # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes + extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path) + extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ] + extracted_forest_sizes.sort(key=int) + for extracted_forest_size in extracted_forest_sizes: + # models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size} + extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size + # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file + model_raw_results = ModelRawResults.load(extracted_forest_size_path) + experiment_strengths[seed].append(model_raw_results.test_strength) + + return experiment_strengths + def extract_selected_trees_scores_across_seeds(models_dir, results_dir, experiment_id, weighted=False): experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds @@ -784,14 +812,14 @@ if __name__ == "__main__": experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id) Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png')) if args.plot_preds_coherence: - root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5_new') + root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5_27-03-20') pathlib.Path(root_output_path).mkdir(parents=True, exist_ok=True) all_labels = ['random', 'omp', 'kmeans', 'similarity_similarities', 'similarity_predictions', 'ensemble'] _, _, _, with_params_extracted_forest_sizes, _ = \ extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, 2) coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in args.experiment_ids] Plotter.plot_stage2_losses( - file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}.png", + file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}_train.png", all_experiment_scores=coherence_values, all_labels=all_labels, x_value=with_params_extracted_forest_sizes, @@ -801,14 +829,14 @@ if __name__ == "__main__": logger.info(f'Computing preds coherence plot...') if args.plot_preds_correlation: - root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5_new') + root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5_27-03-20') pathlib.Path(root_output_path).mkdir(parents=True, exist_ok=True) all_labels = ['none', 'random', 'omp', 'kmeans', 'similarity_similarities', 'similarity_predictions', 'ensemble'] _, _, _, with_params_extracted_forest_sizes, _ = \ extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, 2) correlation_values = [extract_correlations_across_seeds(args.models_dir, args.results_dir, i) for i in args.experiment_ids] Plotter.plot_stage2_losses( - file_path=root_output_path + os.sep + f"correlations_{'-'.join(all_labels)}.png", + file_path=root_output_path + os.sep + f"correlations_{'-'.join(all_labels)}_train.png", all_experiment_scores=correlation_values, all_labels=all_labels, x_value=with_params_extracted_forest_sizes, @@ -818,7 +846,7 @@ if __name__ == "__main__": logger.info(f'Computing preds correlation plot...') if args.plot_forest_strength: - root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5_strength') + root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5_27-03-20') pathlib.Path(root_output_path).mkdir(parents=True, exist_ok=True) _, _, _, with_params_extracted_forest_sizes, _ = \ @@ -837,7 +865,7 @@ if __name__ == "__main__": #random_selected_trees_scores = extract_selected_trees_scores_across_seeds( # args.models_dir, args.results_dir, 2, weighted=True) - omp_selected_trees_scores = extract_selected_trees_scores_across_seeds( + """omp_selected_trees_scores = extract_selected_trees_scores_across_seeds( args.models_dir, args.results_dir, 3, weighted=True) similarity_similarities_selected_trees_scores = extract_selected_trees_scores_across_seeds( @@ -847,27 +875,25 @@ if __name__ == "__main__": # args.models_dir, args.results_dir, 7) ensemble_selected_trees_scores = extract_selected_trees_scores_across_seeds( - args.models_dir, args.results_dir, 8, weighted=True) + args.models_dir, args.results_dir, 8, weighted=True)""" # kmeans=5 # similarity_similarities=6 # similarity_predictions=7 # ensemble=8 - all_selected_trees_scores = [random_selected_trees_scores, omp_selected_trees_scores, similarity_similarities_selected_trees_scores, - ensemble_selected_trees_scores] + all_labels = ['random', 'omp', 'kmeans', 'similarity_similarities', 'similarity_predictions', 'ensemble'] + strengths_values = [extract_strengths_across_seeds(args.models_dir, args.results_dir, i) for i in args.experiment_ids] - with open('california_housing_forest_strength_scores.pickle', 'wb') as file: - pickle.dump(all_selected_trees_scores, file) + """with open('california_housing_forest_strength_scores.pickle', 'wb') as file: + pickle.dump(all_selected_trees_scores, file)""" """with open('forest_strength_scores.pickle', 'rb') as file: all_selected_trees_scores = pickle.load(file)""" - all_labels = ['random', 'omp', 'similarity_similarities', 'ensemble'] - Plotter.plot_stage2_losses( - file_path=root_output_path + os.sep + f"forest_strength_{'-'.join(all_labels)}_v2_sota.png", - all_experiment_scores=all_selected_trees_scores, + file_path=root_output_path + os.sep + f"forest_strength_{'-'.join(all_labels)}.png", + all_experiment_scores=strengths_values, all_labels=all_labels, x_value=with_params_extracted_forest_sizes, xlabel='Number of trees extracted',