diff --git a/code/compute_results.py b/code/compute_results.py index 5f7fac2c7718cf887d3d83a5b3a7eb9cdebfb9d9..cff7ba253acfa67d7b4b8eabadcd69a19522e2df 100644 --- a/code/compute_results.py +++ b/code/compute_results.py @@ -28,7 +28,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d experiment_train_scores = dict() experiment_dev_scores = dict() experiment_test_scores = dict() - experiment_weights = dict() all_extracted_forest_sizes = list() # Used to check if all losses were computed using the same metric (it should be the case) @@ -45,7 +44,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d experiment_train_scores[seed] = list() experiment_dev_scores[seed] = list() experiment_test_scores[seed] = list() - experiment_weights[seed] = list() # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path) @@ -66,8 +64,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d experiment_test_scores[seed].append(model_raw_results.test_score) # Save the metric experiment_score_metrics.append(model_raw_results.score_metric) - # Save the weights - #experiment_weights[seed].append(model_raw_results.model_weights) # Sanity checks if len(set(experiment_score_metrics)) > 1: @@ -76,7 +72,7 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d raise ValueError("The extracted forest sizes aren't the sames across seeds.") return experiment_train_scores, experiment_dev_scores, experiment_test_scores, \ - all_extracted_forest_sizes[0], experiment_score_metrics[0]#, experiment_weights + all_extracted_forest_sizes[0], experiment_score_metrics[0] def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number): experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} @@ -123,6 +119,37 @@ def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experim return experiment_train_scores, experiment_dev_scores, experiment_test_scores, experiment_score_metrics[0] +def extract_weights_across_seeds(models_dir, results_dir, experiment_id): + experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} + experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds + experiment_weights = dict() + + # For each seed results stored in models/{experiment_id}/seeds + seeds = os.listdir(experiment_seed_root_path) + seeds.sort(key=int) + for seed in seeds: + experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed} + extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' # models/{experiment_id}/seeds/{seed}/forest_size + + # {{seed}:[]} + experiment_weights[seed] = list() + + # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes + extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path) + extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ] + extracted_forest_sizes.sort(key=int) + all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes))) + for extracted_forest_size in extracted_forest_sizes: + # models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size} + extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size + # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file + model_raw_results = ModelRawResults.load(extracted_forest_size_path) + # Save the weights + experiment_weights[seed].append(model_raw_results.model_weights) + + return experiment_weights + + if __name__ == "__main__": # get environment variables in .env load_dotenv(find_dotenv('.env')) @@ -400,6 +427,10 @@ if __name__ == "__main__": xlabel='Number of trees extracted', ylabel=experiments_score_metric, title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) + + experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, args.experiment_ids[2]) + + Plotter.weight_density(experiment_weights, os.path.join(output_path, 'weight_density.png')) elif args.stage == 5: # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])