Commit ec55d270 authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Starting to add a first version of weight density plot for stage4

parent 1db36b5d
...@@ -28,7 +28,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -28,7 +28,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
experiment_train_scores = dict() experiment_train_scores = dict()
experiment_dev_scores = dict() experiment_dev_scores = dict()
experiment_test_scores = dict() experiment_test_scores = dict()
experiment_weights = dict()
all_extracted_forest_sizes = list() all_extracted_forest_sizes = list()
# Used to check if all losses were computed using the same metric (it should be the case) # Used to check if all losses were computed using the same metric (it should be the case)
...@@ -45,7 +44,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -45,7 +44,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
experiment_train_scores[seed] = list() experiment_train_scores[seed] = list()
experiment_dev_scores[seed] = list() experiment_dev_scores[seed] = list()
experiment_test_scores[seed] = list() experiment_test_scores[seed] = list()
experiment_weights[seed] = list()
# List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path) extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
...@@ -66,8 +64,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -66,8 +64,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
experiment_test_scores[seed].append(model_raw_results.test_score) experiment_test_scores[seed].append(model_raw_results.test_score)
# Save the metric # Save the metric
experiment_score_metrics.append(model_raw_results.score_metric) experiment_score_metrics.append(model_raw_results.score_metric)
# Save the weights
#experiment_weights[seed].append(model_raw_results.model_weights)
# Sanity checks # Sanity checks
if len(set(experiment_score_metrics)) > 1: if len(set(experiment_score_metrics)) > 1:
...@@ -76,7 +72,7 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -76,7 +72,7 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
raise ValueError("The extracted forest sizes aren't the sames across seeds.") raise ValueError("The extracted forest sizes aren't the sames across seeds.")
return experiment_train_scores, experiment_dev_scores, experiment_test_scores, \ return experiment_train_scores, experiment_dev_scores, experiment_test_scores, \
all_extracted_forest_sizes[0], experiment_score_metrics[0]#, experiment_weights all_extracted_forest_sizes[0], experiment_score_metrics[0]
def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number): def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
...@@ -123,6 +119,37 @@ def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experim ...@@ -123,6 +119,37 @@ def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experim
return experiment_train_scores, experiment_dev_scores, experiment_test_scores, experiment_score_metrics[0] return experiment_train_scores, experiment_dev_scores, experiment_test_scores, experiment_score_metrics[0]
def extract_weights_across_seeds(models_dir, results_dir, experiment_id):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
experiment_weights = dict()
# For each seed results stored in models/{experiment_id}/seeds
seeds = os.listdir(experiment_seed_root_path)
seeds.sort(key=int)
for seed in seeds:
experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' # models/{experiment_id}/seeds/{seed}/forest_size
# {{seed}:[]}
experiment_weights[seed] = list()
# List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ]
extracted_forest_sizes.sort(key=int)
all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes)))
for extracted_forest_size in extracted_forest_sizes:
# models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
# Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
model_raw_results = ModelRawResults.load(extracted_forest_size_path)
# Save the weights
experiment_weights[seed].append(model_raw_results.model_weights)
return experiment_weights
if __name__ == "__main__": if __name__ == "__main__":
# get environment variables in .env # get environment variables in .env
load_dotenv(find_dotenv('.env')) load_dotenv(find_dotenv('.env'))
...@@ -400,6 +427,10 @@ if __name__ == "__main__": ...@@ -400,6 +427,10 @@ if __name__ == "__main__":
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=experiments_score_metric, ylabel=experiments_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, args.experiment_ids[2])
Plotter.weight_density(experiment_weights, os.path.join(output_path, 'weight_density.png'))
elif args.stage == 5: elif args.stage == 5:
# Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1]) extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment