Skip to content
Snippets Groups Projects
Commit eb638666 authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Last changes in compute_results. TODO: clean the code

parent 95f543a1
No related branches found
No related tags found
1 merge request!23Resolve "integration-sota"
...@@ -51,6 +51,7 @@ class Plotter(object): ...@@ -51,6 +51,7 @@ class Plotter(object):
@staticmethod @staticmethod
def plot_mean_and_CI(ax, mean, lb, ub, x_value, color_mean=None, facecolor=None, label=None): def plot_mean_and_CI(ax, mean, lb, ub, x_value, color_mean=None, facecolor=None, label=None):
#print(x_value, mean, lb, ub)
# plot the shaded range of the confidence intervals # plot the shaded range of the confidence intervals
ax.fill_between(x_value, ub, lb, facecolor=facecolor, alpha=.5) ax.fill_between(x_value, ub, lb, facecolor=facecolor, alpha=.5)
# plot the mean on top # plot the mean on top
...@@ -105,7 +106,7 @@ class Plotter(object): ...@@ -105,7 +106,7 @@ class Plotter(object):
@staticmethod @staticmethod
def plot_stage2_losses(file_path, all_experiment_scores, x_value, def plot_stage2_losses(file_path, all_experiment_scores, x_value,
xlabel, ylabel, all_labels, title): xlabel, ylabel, all_labels, title, filter_num=-1):
fig, ax = plt.subplots() fig, ax = plt.subplots()
...@@ -124,13 +125,14 @@ class Plotter(object): ...@@ -124,13 +125,14 @@ class Plotter(object):
# Compute the mean and the std for the CI # Compute the mean and the std for the CI
mean_experiment_scores = np.average(experiment_scores, axis=0) mean_experiment_scores = np.average(experiment_scores, axis=0)
std_experiment_scores = np.std(experiment_scores, axis=0) std_experiment_scores = np.std(experiment_scores, axis=0)
# Plot the score curve with the CI # Plot the score curve with the CI
Plotter.plot_mean_and_CI( Plotter.plot_mean_and_CI(
ax=ax, ax=ax,
mean=mean_experiment_scores, mean=mean_experiment_scores,
lb=mean_experiment_scores + std_experiment_scores, lb=mean_experiment_scores + std_experiment_scores,
ub=mean_experiment_scores - std_experiment_scores, ub=mean_experiment_scores - std_experiment_scores,
x_value=x_value, x_value=x_value[:filter_num] if len(mean_experiment_scores) == filter_num else x_value,
color_mean=colors[i], color_mean=colors[i],
facecolor=colors[i], facecolor=colors[i],
label=all_labels[i] label=all_labels[i]
......
...@@ -150,6 +150,35 @@ def extract_weights_across_seeds(models_dir, results_dir, experiment_id): ...@@ -150,6 +150,35 @@ def extract_weights_across_seeds(models_dir, results_dir, experiment_id):
return experiment_weights return experiment_weights
def extract_correlations_across_seeds(models_dir, results_dir, experiment_id):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
experiment_correlations = dict()
# For each seed results stored in models/{experiment_id}/seeds
seeds = os.listdir(experiment_seed_root_path)
seeds.sort(key=int)
for seed in seeds:
experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' # models/{experiment_id}/seeds/{seed}/forest_size
# {{seed}:[]}
experiment_correlations[seed] = list()
# List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ]
extracted_forest_sizes.sort(key=int)
for extracted_forest_size in extracted_forest_sizes:
# models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
# Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
model_raw_results = ModelRawResults.load(extracted_forest_size_path)
# Save the weights
experiment_correlations[seed].append(model_raw_results.correlation)
return experiment_correlations
def extract_coherences_across_seeds(models_dir, results_dir, experiment_id): def extract_coherences_across_seeds(models_dir, results_dir, experiment_id):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
...@@ -179,7 +208,6 @@ def extract_coherences_across_seeds(models_dir, results_dir, experiment_id): ...@@ -179,7 +208,6 @@ def extract_coherences_across_seeds(models_dir, results_dir, experiment_id):
return experiment_coherences return experiment_coherences
if __name__ == "__main__": if __name__ == "__main__":
# get environment variables in .env # get environment variables in .env
load_dotenv(find_dotenv('.env')) load_dotenv(find_dotenv('.env'))
...@@ -203,6 +231,7 @@ if __name__ == "__main__": ...@@ -203,6 +231,7 @@ if __name__ == "__main__":
parser.add_argument('--plot_weight_density', action='store_true', default=DEFAULT_PLOT_WEIGHT_DENSITY, help='Plot the weight density. Only working for regressor models for now.') parser.add_argument('--plot_weight_density', action='store_true', default=DEFAULT_PLOT_WEIGHT_DENSITY, help='Plot the weight density. Only working for regressor models for now.')
parser.add_argument('--wo_loss_plots', action='store_true', default=DEFAULT_WO_LOSS_PLOTS, help='Do not compute the loss plots.') parser.add_argument('--wo_loss_plots', action='store_true', default=DEFAULT_WO_LOSS_PLOTS, help='Do not compute the loss plots.')
parser.add_argument('--plot_preds_coherence', action='store_true', default=DEFAULT_PLOT_PREDS_COHERENCE, help='Plot the coherence of the prediction trees.') parser.add_argument('--plot_preds_coherence', action='store_true', default=DEFAULT_PLOT_PREDS_COHERENCE, help='Plot the coherence of the prediction trees.')
parser.add_argument('--plot_preds_correlation', action='store_true', default=DEFAULT_PLOT_PREDS_COHERENCE, help='Plot the correlation of the prediction trees.')
args = parser.parse_args() args = parser.parse_args()
if args.stage not in list(range(1, 6)): if args.stage not in list(range(1, 6)):
...@@ -501,8 +530,20 @@ if __name__ == "__main__": ...@@ -501,8 +530,20 @@ if __name__ == "__main__":
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes( omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False, extracted_forest_sizes=extracted_forest_sizes) args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False, extracted_forest_sizes=extracted_forest_sizes)
"""print(omp_with_params_dev_scores)
import sys
sys.exit(0)"""
#all_labels = ['base', 'random', 'omp', 'omp_wo_weights']
all_labels = ['base', 'random', 'omp'] all_labels = ['base', 'random', 'omp']
all_scores = [base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores] omp_with_params_test_scores_new = dict()
filter_num = -1
"""filter_num = 9
for key, value in omp_with_params_test_scores.items():
omp_with_params_test_scores_new[key] = value[:filter_num]"""
#all_scores = [base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
# omp_with_params_without_weights_test_scores]
all_scores = [base_with_params_dev_scores, random_with_params_dev_scores, omp_with_params_dev_scores]
#all_scores = [base_with_params_train_scores, random_with_params_train_scores, omp_with_params_train_scores, #all_scores = [base_with_params_train_scores, random_with_params_train_scores, omp_with_params_train_scores,
# omp_with_params_without_weights_train_scores] # omp_with_params_without_weights_train_scores]
...@@ -515,29 +556,32 @@ if __name__ == "__main__": ...@@ -515,29 +556,32 @@ if __name__ == "__main__":
label = 'similarity_predictions' label = 'similarity_predictions'
elif 'ensemble' in args.experiment_ids[i]: elif 'ensemble' in args.experiment_ids[i]:
label = 'ensemble' label = 'ensemble'
elif 'omp_distillation' in args.experiment_ids[i]:
label = 'omp_distillation'
else: else:
logger.error('Invalid value encountered') logger.error('Invalid value encountered')
continue continue
logger.info(f'Loading {label} experiment scores...') logger.info(f'Loading {label} experiment scores...')
current_experiment_id = int(args.experiment_ids[i].split('=')[1]) current_experiment_id = int(args.experiment_ids[i].split('=')[1])
current_train_scores, _, current_test_scores, _, _ = extract_scores_across_seeds_and_extracted_forest_sizes( current_train_scores, current_dev_scores, current_test_scores, _, _ = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, current_experiment_id) args.models_dir, args.results_dir, current_experiment_id)
all_labels.append(label) all_labels.append(label)
all_scores.append(current_test_scores) #all_scores.append(current_test_scores)
#all_scores.append(current_train_scores) #all_scores.append(current_train_scores)
all_scores.append(current_dev_scores)
output_path = os.path.join(args.results_dir, args.dataset_name, 'stage5') output_path = os.path.join(args.results_dir, args.dataset_name, 'stage5_new')
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True) pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
Plotter.plot_stage2_losses( Plotter.plot_stage2_losses(
file_path=output_path + os.sep + f"losses_{'-'.join(all_labels)}_test_train,dev.png", file_path=output_path + os.sep + f"losses_{'-'.join(all_labels)}_dev_clean.png",
all_experiment_scores=all_scores, all_experiment_scores=all_scores,
all_labels=all_labels, all_labels=all_labels,
x_value=with_params_extracted_forest_sizes, x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=base_with_params_experiment_score_metric, ylabel=base_with_params_experiment_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name), filter_num=filter_num)
"""if args.plot_weight_density: """if args.plot_weight_density:
root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}') root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}')
...@@ -581,14 +625,14 @@ if __name__ == "__main__": ...@@ -581,14 +625,14 @@ if __name__ == "__main__":
experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id) experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id)
Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png')) Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png'))
if args.plot_preds_coherence: if args.plot_preds_coherence:
root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5') root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5_new')
all_labels = ['random', 'omp', 'omp_normalize_D'] pathlib.Path(root_output_path).mkdir(parents=True, exist_ok=True)
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \ all_labels = ['random', 'omp', 'kmeans', 'similarity_similarities', 'similarity_predictions', 'ensemble']
with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \ _, _, _, with_params_extracted_forest_sizes, _ = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, 2) extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, 2)
coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in [2, 3, 4]] coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in [2, 3, 5, 6, 7, 8]]
Plotter.plot_stage2_losses( Plotter.plot_stage2_losses(
file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}_30_all.png", file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}.png",
all_experiment_scores=coherence_values, all_experiment_scores=coherence_values,
all_labels=all_labels, all_labels=all_labels,
x_value=with_params_extracted_forest_sizes, x_value=with_params_extracted_forest_sizes,
...@@ -596,5 +640,21 @@ if __name__ == "__main__": ...@@ -596,5 +640,21 @@ if __name__ == "__main__":
ylabel='Coherence', ylabel='Coherence',
title='Coherence values of {}'.format(args.dataset_name)) title='Coherence values of {}'.format(args.dataset_name))
logger.info(f'Computing preds coherence plot...') logger.info(f'Computing preds coherence plot...')
if args.plot_preds_correlation:
root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage5_new')
pathlib.Path(root_output_path).mkdir(parents=True, exist_ok=True)
all_labels = ['random', 'omp', 'kmeans', 'similarity_similarities', 'similarity_predictions', 'ensemble']
_, _, _, with_params_extracted_forest_sizes, _ = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, 2)
correlation_values = [extract_correlations_across_seeds(args.models_dir, args.results_dir, i) for i in [2, 3, 5, 6, 7, 8]]
Plotter.plot_stage2_losses(
file_path=root_output_path + os.sep + f"correlations_{'-'.join(all_labels)}.png",
all_experiment_scores=correlation_values,
all_labels=all_labels,
x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted',
ylabel='correlation',
title='correlation values of {}'.format(args.dataset_name))
logger.info(f'Computing preds correlation plot...')
logger.info('Done.') logger.info('Done.')
import pathlib
import glob2
import os
import shutil
from tqdm import tqdm
if __name__ == "__main__":
models_source_path = 'models'
models_destination_path = 'bolsonaro_models_25-03-20'
datasets = ['boston', 'diabetes', 'linnerud', 'breast_cancer', 'california_housing', 'diamonds',
'steel-plates', 'kr-vs-kp', 'kin8nm', 'spambase', 'gamma', 'lfw_pairs']
pathlib.Path(models_destination_path).mkdir(parents=True, exist_ok=True)
with tqdm(datasets) as dataset_bar:
for dataset in dataset_bar:
dataset_bar.set_description(dataset)
found_paths = glob2.glob(os.path.join(models_source_path, dataset, 'stage5_new',
'**', 'model_raw_results.pickle'), recursive=True)
pathlib.Path(os.path.join(models_destination_path, dataset)).mkdir(parents=True, exist_ok=True)
with tqdm(found_paths) as found_paths_bar:
for path in found_paths_bar:
found_paths_bar.set_description(path)
new_path = path.replace(f'models/{dataset}/stage5_new/', '')
(new_path, filename) = os.path.split(new_path)
new_path = os.path.join(models_destination_path, dataset, new_path)
pathlib.Path(new_path).mkdir(parents=True, exist_ok=True)
shutil.copyfile(src=path, dst=os.path.join(new_path, filename))
found_paths_bar.update(1)
dataset_bar.update(1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment