Skip to content
Snippets Groups Projects
Commit e9d1cefa authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Add missing files in previous commit...

parent 51ba8a0e
No related branches found
No related tags found
1 merge request!9Resolve "Experiment pipeline"
......@@ -57,10 +57,14 @@ class Plotter(object):
ax.plot(x_value, mean, c=color_mean, label=label)
@staticmethod
def plot_losses(file_path, all_experiment_scores_1, all_experiment_scores_2, x_value, xlabel, ylabel, all_labels, title):
fig, axes = plt.subplots(nrows=1, ncols=2)
def plot_stage1_losses(file_path, all_experiment_scores_with_params,
all_experiment_scores_wo_params, x_value, xlabel, ylabel, all_labels, title):
fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)
n = len(all_experiment_scores_1)
n = len(all_experiment_scores_with_params)
if n != len(all_experiment_scores_wo_params):
raise ValueError('all_experiment_scores_with_params and all_experiment_scores_wo_params must have the same len to be compared.')
"""
Get as many different colors from the specified cmap (here nipy_spectral)
......@@ -68,7 +72,8 @@ class Plotter(object):
"""
colors = Plotter.get_colors_from_cmap(n)
for j, all_experiment_scores in enumerate([all_experiment_scores_1, all_experiment_scores_2]):
for j, all_experiment_scores in enumerate([all_experiment_scores_with_params,
all_experiment_scores_wo_params]):
# For each curve to plot
for i in range(n):
# Retreive the scores in a list for each seed
......@@ -88,11 +93,13 @@ class Plotter(object):
label=all_labels[i]
)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(title)
plt.legend(loc='upper right')
fig.savefig(file_path, dpi=fig.dpi)
axes[0].set_xlabel(xlabel)
axes[1].set_xlabel(xlabel)
axes[0].set_ylabel(ylabel)
plt.suptitle(title)
handles, labels = axes[0].get_legend_handles_labels()
legend = axes[0].legend(handles, labels, loc='upper center', bbox_to_anchor=(1.1, -0.15))
fig.savefig(file_path, dpi=fig.dpi, bbox_extra_artists=(legend,), bbox_inches='tight')
plt.close(fig)
@staticmethod
......
......@@ -11,8 +11,6 @@ import os
def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
# Create recursively the tree results/{experiment_id}
pathlib.Path(results_dir + os.sep + str(experiment_id)).mkdir(parents=True, exist_ok=True)
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
"""
......@@ -65,8 +63,6 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
# Create recursively the tree results/{experiment_id}
pathlib.Path(results_dir + os.sep + str(experiment_id)).mkdir(parents=True, exist_ok=True)
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
"""
......@@ -122,6 +118,7 @@ if __name__ == "__main__":
parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).' + \
'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}')
parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
parser.add_argument('--extracted_forest_sizes_number', nargs='?', type=int, required=True, help='Specify the number of extracted forest sizes. TODO: read it from models dir directly.')
parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
args = parser.parse_args()
......@@ -135,7 +132,6 @@ if __name__ == "__main__":
pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True)
if args.stage == 1:
extracted_forest_sizes_number = 5 # TODO: hardcoded
if len(args.experiment_ids) != 6:
raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.')
......@@ -145,7 +141,7 @@ if __name__ == "__main__":
logger.info('Loading base_with_params experiment scores...')
base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores = \
extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
extracted_forest_sizes_number)
args.extracted_forest_sizes_number)
# random_with_params
logger.info('Loading random_with_params experiment scores...')
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
......@@ -161,7 +157,7 @@ if __name__ == "__main__":
logger.info('Loading base_wo_params experiment scores...')
base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores = \
extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[3],
extracted_forest_sizes_number)
args.extracted_forest_sizes_number)
# random_wo_params
logger.info('Loading random_wo_params experiment scores...')
random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \
......@@ -174,21 +170,29 @@ if __name__ == "__main__":
output_path = os.path.join(args.results_dir, args.dataset_name, 'stage1')
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
Plotter.plot_losses(
file_path=output_path + os.sep + 'losses.png',
all_experiment_scores_1=[base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores,
"""all_experiment_scores_with_params=[base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores,
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores,
omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores],
all_experiment_scores_2=[base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores,
all_experiment_scores_wo_params=[base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores,
random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores,
omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores],
all_labels=['base_with_params_train', 'base_with_params_dev', 'base_with_params_test',
'random_with_params_train', 'random_with_params_dev', 'random_with_params_test',
'omp_with_params_train', 'omp_with_params_dev', 'omp_with_params_test'],"""
Plotter.plot_stage1_losses(
file_path=output_path + os.sep + 'losses.png',
all_experiment_scores_with_params=[base_with_params_test_scores,
random_with_params_test_scores,
omp_with_params_test_scores],
all_experiment_scores_wo_params=[base_wo_params_test_scores,
random_wo_params_test_scores,
omp_wo_params_test_scores],
all_labels=['base', 'random', 'omp'],
x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted',
ylabel='MSE', # TODO: hardcoded
all_labels=['base_with_params_train', 'base_with_params_dev', 'base_with_params_test',
'random_with_params_train', 'random_with_params_dev', 'random_with_params_test',
'omp_with_params_train', 'omp_with_params_dev', 'omp_with_params_test'],
title='Loss values of {} using the best hyperparams'.format(args.dataset_name)
title='Loss values of {} using best and default hyperparameters'.format(args.dataset_name)
)
else:
raise ValueError('This stage number is not supported yet, but it will be!')
......@@ -196,7 +200,7 @@ if __name__ == "__main__":
"""
TODO:
For each dataset:
Stage 1) A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams)
[ALMOST DONE] Stage 1) A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams)
Stage 2) A figure for the selection of the best combination of normalization: D normalization vs weights normalization (4 combinations)
Stage 3) A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev
Stage 4) A figure to finally compare the perf of our approach using the previous selected
......
results/california_housing/stage1/losses.png

70.1 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment