Newer
Older
Charly LAMOTHE
committed
from bolsonaro.data.dataset_parameters import DatasetParameters
from bolsonaro.data.dataset_loader import DatasetLoader
from bolsonaro.models.model_raw_results import ModelRawResults
from bolsonaro.models.model_factory import ModelFactory
from bolsonaro.visualization.plotter import Plotter
Charly LAMOTHE
committed
import argparse
import pathlib
Charly LAMOTHE
committed
from dotenv import find_dotenv, load_dotenv
import os
if __name__ == "__main__":
Charly LAMOTHE
committed
# get environment variables in .env
load_dotenv(find_dotenv('.env.example'))
DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results'
DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
DEFAULT_EXPERIMENT_IDS = None
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
parser.add_argument('--experiment_ids', nargs='+', type=int, default=DEFAULT_EXPERIMENT_IDS, help='Compute the results of the specified experiment id(s)')
args = parser.parse_args()
pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True)
Charly LAMOTHE
committed
experiments_ids = [str(experiment_id) for experiment_id in args.experiment_ids] \
if args.experiment_ids is not None \
else os.listdir(args.models_dir)
if experiments_ids is None or len(experiments_ids) == 0:
raise ValueError("No experiment id was found or specified.")
for experiment_id in experiments_ids:
experiment_id_path = args.models_dir + os.sep + experiment_id
pathlib.Path(args.results_dir + os.sep + experiment_id).mkdir(parents=True, exist_ok=True)
Charly LAMOTHE
committed
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds'
experiment_train_scores = dict()
experiment_dev_scores = dict()
experiment_test_scores = dict()
experiment_score_metrics = list()
Charly LAMOTHE
committed
for seed in os.listdir(experiment_seed_root_path):
experiment_seed_path = experiment_seed_root_path + os.sep + seed
dataset_parameters = DatasetParameters.load(experiment_seed_path, experiment_id)
dataset = DatasetLoader.load(dataset_parameters)
extracted_forest_size_root_path = experiment_seed_path + os.sep + 'extracted_forest_size'
experiment_train_scores[seed] = list()
experiment_dev_scores[seed] = list()
experiment_test_scores[seed] = list()
extracted_forest_sizes = os.listdir(extracted_forest_size_root_path)
for extracted_forest_size in extracted_forest_sizes:
Charly LAMOTHE
committed
extracted_forest_size_path = extracted_forest_size_root_path + os.sep + extracted_forest_size
model_raw_results = ModelRawResults.load(extracted_forest_size_path)
model = ModelFactory.load(dataset.task, extracted_forest_size_path, experiment_id, model_raw_results)
experiment_train_scores[seed].append(model_raw_results.train_score)
experiment_dev_scores[seed].append(model_raw_results.dev_score)
experiment_test_scores[seed].append(model_raw_results.test_score)
experiment_score_metrics.append(model_raw_results.score_metric)
if len(set(experiment_score_metrics)) > 1:
raise ValueError("The metrics used to compute the dev score aren't the same everytime")
Plotter.plot_losses(
file_path=args.results_dir + os.sep + experiment_id + os.sep + 'losses.png',
all_experiment_scores=[experiment_train_scores, experiment_dev_scores, experiment_test_scores],
x_value=extracted_forest_sizes,
xlabel='Number of trees extracted',
ylabel=experiment_score_metrics[0],
all_labels=['train', 'dev', 'test'],
title='Loss values of the trained model'
)