Skip to content
Snippets Groups Projects
Commit 5e990b2b authored by Charly LAMOTHE's avatar Charly LAMOTHE
Browse files

- Finish plot_losses implementation;

- Fix bug of resolve_experiment_id after 10 exp ids;
- Display the new experiment id at the beginning of the training;
- For now there's only a simple losses plot in compute_results.
parent 00ed2453
No related branches found
No related tags found
1 merge request!3clean scripts
......@@ -14,5 +14,5 @@ def resolve_experiment_id(models_dir):
if os.path.isdir(models_dir + os.sep + x)]
if len(ids) > 0:
ids.sort(key=int)
return int(max(ids)) + 1
return int(max([int(i) for i in ids])) + 1
return 1
......@@ -6,17 +6,14 @@ from sklearn.neighbors.kde import KernelDensity
class Plotter(object):
@staticmethod
def weight_density(weights):
"""
TODO: to complete
"""
def weight_density(weights, X, file_path):
X_plot = [np.exp(elem) for elem in weights]
fig, ax = plt.subplots()
for kernel in ['gaussian', 'tophat', 'epanechnikov']:
kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X_plot)
log_dens = kde.score_samples(X_plot)
ax.plot(X_plot[:, 0], np.exp(log_dens), '-',
ax.plot(X_plot, np.exp(log_dens), '-',
label="kernel = '{0}'".format(kernel))
ax.legend(loc='upper left')
......@@ -24,4 +21,46 @@ class Plotter(object):
ax.set_xlim(-4, 9)
ax.set_ylim(-0.02, 0.4)
plt.show()
fig.savefig(file_path, dpi=fig.dpi)
plt.close(fig)
@staticmethod
def plot_mean_and_CI(ax, mean, lb, ub, x_value, color_mean=None, facecolor=None, label=None):
# plot the shaded range of the confidence intervals
ax.fill_between(x_value, ub, lb, facecolor=facecolor, alpha=.5)
# plot the mean on top
ax.plot(x_value, mean, c=color_mean, label=label)
@staticmethod
def plot_losses(file_path, all_experiment_scores, x_value, xlabel, ylabel, all_labels, title):
fig, ax = plt.subplots()
n = len(all_experiment_scores)
colors = Plotter.get_colors_from_cmap(n)
for i in range(n):
experiment_scores = list(all_experiment_scores[i].values())
mean_experiment_scores = np.average(experiment_scores, axis=0)
std_experiment_scores = np.std(experiment_scores, axis=0)
Plotter.plot_mean_and_CI(
ax=ax,
mean=mean_experiment_scores,
lb=mean_experiment_scores + std_experiment_scores,
ub=mean_experiment_scores - std_experiment_scores,
x_value=x_value,
color_mean=colors[i],
facecolor=colors[i],
label=all_labels[i]
)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(title)
plt.legend(loc='upper right')
fig.savefig(file_path, dpi=fig.dpi)
plt.close(fig)
@staticmethod
def get_colors_from_cmap(n_colors, colormap_name='nipy_spectral'):
return [plt.get_cmap(colormap_name)(1. * i/n_colors) for i in range(n_colors)]
......@@ -2,6 +2,7 @@ from bolsonaro.data.dataset_parameters import DatasetParameters
from bolsonaro.data.dataset_loader import DatasetLoader
from bolsonaro.models.model_raw_results import ModelRawResults
from bolsonaro.models.model_factory import ModelFactory
from bolsonaro.visualization.plotter import Plotter
import argparse
import pathlib
......@@ -34,15 +35,43 @@ if __name__ == "__main__":
for experiment_id in experiments_ids:
experiment_id_path = args.models_dir + os.sep + experiment_id
pathlib.Path(args.results_dir + os.sep + experiment_id).mkdir(parents=True, exist_ok=True)
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds'
experiment_train_scores = dict()
experiment_dev_scores = dict()
experiment_test_scores = dict()
experiment_score_metrics = list()
for seed in os.listdir(experiment_seed_root_path):
experiment_seed_path = experiment_seed_root_path + os.sep + seed
dataset_parameters = DatasetParameters.load(experiment_seed_path, experiment_id)
dataset = DatasetLoader.load(dataset_parameters)
extracted_forest_size_root_path = experiment_seed_path + os.sep + 'extracted_forest_size'
for extracted_forest_size in os.listdir(extracted_forest_size_root_path):
experiment_train_scores[seed] = list()
experiment_dev_scores[seed] = list()
experiment_test_scores[seed] = list()
extracted_forest_sizes = os.listdir(extracted_forest_size_root_path)
for extracted_forest_size in extracted_forest_sizes:
extracted_forest_size_path = extracted_forest_size_root_path + os.sep + extracted_forest_size
model_raw_results = ModelRawResults.load(extracted_forest_size_path)
model = ModelFactory.load(dataset.task, extracted_forest_size_path, experiment_id, model_raw_results)
experiment_train_scores[seed].append(model_raw_results.train_score)
experiment_dev_scores[seed].append(model_raw_results.dev_score)
experiment_test_scores[seed].append(model_raw_results.test_score)
experiment_score_metrics.append(model_raw_results.score_metric)
if len(set(experiment_score_metrics)) > 1:
raise ValueError("The metrics used to compute the dev score aren't the same everytime")
Plotter.plot_losses(
file_path=args.results_dir + os.sep + experiment_id + os.sep + 'losses.png',
all_experiment_scores=[experiment_train_scores, experiment_dev_scores, experiment_test_scores],
x_value=extracted_forest_sizes,
xlabel='Number of trees extracted',
ylabel=experiment_score_metrics[0],
all_labels=['train', 'dev', 'test'],
title='Loss values of the trained model'
)
......@@ -66,6 +66,8 @@ if __name__ == "__main__":
experiment_id = resolve_experiment_id(args.models_dir)
experiment_id_str = str(experiment_id)
logger.info('Experiment id: {}'.format(experiment_id_str))
with tqdm(seeds) as seed_bar:
for seed in seed_bar:
seed_bar.set_description('seed={}'.format(seed))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment