compute_results.py 19 KB
Newer Older
1
from bolsonaro.models.model_raw_results import ModelRawResults
2
from bolsonaro.visualization.plotter import Plotter
3
4
from bolsonaro import LOG_PATH
from bolsonaro.error_handling.logger_factory import LoggerFactory
5

6
7
import argparse
import pathlib
8
9
from dotenv import find_dotenv, load_dotenv
import os
10
11


12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id):
    experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
    experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds

    """
    Dictionaries to temporarly store the scalar results with the following structure:
    {seed_1: [score_1, ..., score_m], ... seed_n: [score_1, ..., score_k]}
    """
    experiment_train_scores = dict()
    experiment_dev_scores = dict()
    experiment_test_scores = dict()
    all_extracted_forest_sizes = list()

    # Used to check if all losses were computed using the same metric (it should be the case)
    experiment_score_metrics = list()

    # For each seed results stored in models/{experiment_id}/seeds
    seeds = os.listdir(experiment_seed_root_path)
    seeds.sort(key=int)
    for seed in seeds:
        experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
        extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' # models/{experiment_id}/seeds/{seed}/forest_size

        # {{seed}:[]}
        experiment_train_scores[seed] = list()
        experiment_dev_scores[seed] = list()
        experiment_test_scores[seed] = list()

        # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
        extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
        extracted_forest_sizes.sort(key=int)
        all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes)))
        for extracted_forest_size in extracted_forest_sizes:
            # models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
            extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
            # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
            model_raw_results = ModelRawResults.load(extracted_forest_size_path)
            # Save the scores
            experiment_train_scores[seed].append(model_raw_results.train_score)
            experiment_dev_scores[seed].append(model_raw_results.dev_score)
            experiment_test_scores[seed].append(model_raw_results.test_score)
            # Save the metric
            experiment_score_metrics.append(model_raw_results.score_metric)

    # Sanity checks
    if len(set(experiment_score_metrics)) > 1:
        raise ValueError("The metrics used to compute the scores aren't the sames across seeds.")
    if len(set([sum(extracted_forest_sizes) for extracted_forest_sizes in all_extracted_forest_sizes])) != 1:
        raise ValueError("The extracted forest sizes aren't the sames across seeds.")

62
    return experiment_train_scores, experiment_dev_scores, experiment_test_scores, all_extracted_forest_sizes[0], experiment_score_metrics[0]
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number):
    experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
    experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds

    """
    Dictionaries to temporarly store the scalar results with the following structure:
    {seed_1: [score_1, ..., score_m], ... seed_n: [score_1, ..., score_k]}
    """
    experiment_train_scores = dict()
    experiment_dev_scores = dict()
    experiment_test_scores = dict()

    # Used to check if all losses were computed using the same metric (it should be the case)
    experiment_score_metrics = list()

    # For each seed results stored in models/{experiment_id}/seeds
    seeds = os.listdir(experiment_seed_root_path)
    seeds.sort(key=int)
    for seed in seeds:
        experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
        forest_size_root_path = experiment_seed_path + os.sep + 'forest_size' # models/{experiment_id}/seeds/{seed}/forest_size

        # {{seed}:[]}
        experiment_train_scores[seed] = list()
        experiment_dev_scores[seed] = list()
        experiment_test_scores[seed] = list()

        forest_size = os.listdir(forest_size_root_path)[0]
        # models/{experiment_id}/seeds/{seed}/forest_size/{forest_size}
        forest_size_path = forest_size_root_path + os.sep + forest_size
        # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
        model_raw_results = ModelRawResults.load(forest_size_path)
        for _ in range(extracted_forest_sizes_number):
            # Save the scores
            experiment_train_scores[seed].append(model_raw_results.train_score)
            experiment_dev_scores[seed].append(model_raw_results.dev_score)
            experiment_test_scores[seed].append(model_raw_results.test_score)
            # Save the metric
            experiment_score_metrics.append(model_raw_results.score_metric)

    if len(set(experiment_score_metrics)) > 1:
        raise ValueError("The metrics used to compute the scores aren't the same everytime")

107
    return experiment_train_scores, experiment_dev_scores, experiment_test_scores, experiment_score_metrics[0]
108

109
if __name__ == "__main__":
110
    # get environment variables in .env
Charly Lamothe's avatar
Charly Lamothe committed
111
    load_dotenv(find_dotenv('.env'))
112

Charly LAMOTHE's avatar
Charly LAMOTHE committed
113
114
    DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results'
    DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
115
116

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
117
118
    parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 4].')
    parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).' + \
119
120
121
        'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}' + \
        'stage=2: {{no_normalization}} {{normalize_D}} {{normalize_weights}} {{normalize_D_and_weights}}' + \
        'stage=3: {{train-dev_subset}} {{train-dev_train-dev_subset}} {{train-train-dev_subset}}')
122
    parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
123
    parser.add_argument('--extracted_forest_sizes_number', nargs='?', type=int, required=True, help='Specify the number of extracted forest sizes. TODO: read it from models dir directly.')
Charly LAMOTHE's avatar
Charly LAMOTHE committed
124
125
    parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
    parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
126
127
    args = parser.parse_args()

128
129
    if args.stage not in list(range(1, 5)):
        raise ValueError('stage must be a supported stage id (i.e. [1, 4]).')
130

131
132
    logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))

133
    # Create recursively the results dir tree
134
135
    pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True)

136
    if args.stage == 1:
137
138
139
140
141
        if len(args.experiment_ids) != 6:
            raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.')

        # Experiments that used the best hyperparameters found for this dataset

142
        # base_with_params
143
        logger.info('Loading base_with_params experiment scores...')
144
145
        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
            base_with_params_experiment_score_metric = \
146
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
147
            args.extracted_forest_sizes_number)
148
        # random_with_params
149
150
        logger.info('Loading random_with_params experiment scores...')
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
151
152
            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
153
        # omp_with_params
154
        logger.info('Loading omp_with_params experiment scores...')
155
156
157
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2])
158
159

        # Experiments that didn't use the best hyperparameters found for this dataset
160
161

        # base_wo_params
162
        logger.info('Loading base_wo_params experiment scores...')
163
164
165
        base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores, \
            base_wo_params_experiment_score_metric = extract_scores_across_seeds_and_forest_size(
                args.models_dir, args.results_dir, args.experiment_ids[3],
166
            args.extracted_forest_sizes_number)
167
        # random_wo_params
168
169
        logger.info('Loading random_wo_params experiment scores...')
        random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \
170
171
172
            wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \
                extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[4])
173
        # base_wo_params
174
        logger.info('Loading base_wo_params experiment scores...')
175
176
177
178
179
180
181
182
183
184
185
        omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \
            omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[5])

        # Sanity check on the metrics retreived
        if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric ==
            omp_with_params_experiment_score_metric == base_wo_params_experiment_score_metric ==
            random_wo_params_experiment_score_metric ==
            omp_wo_params_experiment_score_metric):
            raise ValueError('Score metrics of all experiments must be the same.')
        experiments_score_metric = base_with_params_experiment_score_metric
186
187
188
189

        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage1')
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

190
        """all_experiment_scores_with_params=[base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores,
191
192
                random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores,
                omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores],
193
            all_experiment_scores_wo_params=[base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores,
194
195
                random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores,
                omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores],
196
197
198
199
200
201
202
203
204
205
206
207
208
            all_labels=['base_with_params_train', 'base_with_params_dev', 'base_with_params_test',
                'random_with_params_train', 'random_with_params_dev', 'random_with_params_test',
                'omp_with_params_train', 'omp_with_params_dev', 'omp_with_params_test'],"""

        Plotter.plot_stage1_losses(
            file_path=output_path + os.sep + 'losses.png',
            all_experiment_scores_with_params=[base_with_params_test_scores,
                random_with_params_test_scores,
                omp_with_params_test_scores],
            all_experiment_scores_wo_params=[base_wo_params_test_scores,
                random_wo_params_test_scores,
                omp_wo_params_test_scores],
            all_labels=['base', 'random', 'omp'],
209
210
            x_value=with_params_extracted_forest_sizes,
            xlabel='Number of trees extracted',
211
            ylabel=experiments_score_metric,
212
            title='Loss values of {}\nusing best and default hyperparameters'.format(args.dataset_name)
213
        )
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
    elif args.stage == 2:
        if len(args.experiment_ids) != 4:
            raise ValueError('In the case of stage 2, the number of specified experiment ids must be 4.')

        # no_normalization
        logger.info('Loading no_normalization experiment scores...')
        _, _, no_normalization_test_scores, extracted_forest_sizes, no_normalization_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[0])

        # normalize_D
        logger.info('Loading normalize_D experiment scores...')
        _, _, normalize_D_test_scores, _, normalize_D_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[1])

        # normalize_weights
        logger.info('Loading normalize_weights experiment scores...')
        _, _, normalize_weights_test_scores, _, normalize_weights_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[2])

        # normalize_D_and_weights
        logger.info('Loading normalize_D_and_weights experiment scores...')
        _, _, normalize_D_and_weights_test_scores, _, normalize_D_and_weights_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[3])

        # Sanity check on the metrics retreived
        if not (no_normalization_experiment_score_metric == normalize_D_experiment_score_metric
            == normalize_weights_experiment_score_metric == normalize_D_and_weights_experiment_score_metric):
            raise ValueError('Score metrics of all experiments must be the same.')
        experiments_score_metric = no_normalization_experiment_score_metric

        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage2')
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

        Plotter.plot_stage2_losses(
            file_path=output_path + os.sep + 'losses.png',
            all_experiment_scores=[no_normalization_test_scores, normalize_D_test_scores,
                normalize_weights_test_scores, normalize_D_and_weights_test_scores],
            all_labels=['no_normalization', 'normalize_D', 'normalize_weights', 'normalize_D_and_weights'],
            x_value=extracted_forest_sizes,
            xlabel='Number of trees extracted',
            ylabel=experiments_score_metric,
            title='Loss values of {}\nusing different normalizations'.format(args.dataset_name))
    elif args.stage == 3:
        if len(args.experiment_ids) != 3:
            raise ValueError('In the case of stage 3, the number of specified experiment ids must be 3.')

        # train-dev_subset
        logger.info('Loading train-dev_subset experiment scores...')
        train_dev_subset_train_scores, train_dev_subset_dev_scores, train_dev_subset_test_scores, \
            extracted_forest_sizes, train_dev_subset_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[0])

        # train-dev_train-dev_subset
        logger.info('Loading train-dev_train-dev_subset experiment scores...')
        train_dev_train_dev_subset_train_scores, train_dev_train_dev_subset_dev_scores, train_dev_train_dev_subset_test_scores, \
            _, train_dev_train_dev_subset_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[1])

        # train-train-dev_subset
        logger.info('Loading train-train-dev_subset experiment scores...')
        train_train_dev_subset_train_scores, train_train_dev_subset_dev_scores, train_train_dev_subset_test_scores, \
            _, train_train_dev_subset_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[2])

        # Sanity check on the metrics retreived
        if not (train_dev_subset_experiment_score_metric == train_dev_train_dev_subset_experiment_score_metric
            == train_train_dev_subset_experiment_score_metric):
            raise ValueError('Score metrics of all experiments must be the same.')
        experiments_score_metric = train_dev_subset_experiment_score_metric

        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage3')
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

        Plotter.plot_stage2_losses(
            file_path=output_path + os.sep + 'losses.png',
            all_experiment_scores=[train_dev_subset_train_scores, train_train_dev_subset_train_scores,
                train_train_dev_subset_train_scores, train_dev_subset_dev_scores, train_dev_train_dev_subset_dev_scores,
                train_train_dev_subset_dev_scores, train_dev_subset_test_scores, train_dev_train_dev_subset_test_scores,
                train_train_dev_subset_test_scores],
            all_labels=['train,dev - train', 'train+dev,train+dev - train', 'train,train+dev - train',
                'train,dev - dev', 'train+dev,train+dev - dev', 'train,train+dev - dev',
                'train,dev - test', 'train+dev,train+dev - test', 'train,train+dev - test'],
            x_value=extracted_forest_sizes,
            xlabel='Number of trees extracted',
            ylabel=experiments_score_metric,
            title='Loss values of {}\nusing different training subsets'.format(args.dataset_name))
307
308
    else:
        raise ValueError('This stage number is not supported yet, but it will be!')
309

310
    """
311
312
    TODO:
    For each dataset:
313
314
315
    Stage 1) [DONE for california_housing] A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams)
    Stage 2) [DONE for california_housing] A figure for the selection of the best combination of normalization: D normalization vs weights normalization (4 combinations)
    Stage 3) [DONE for california_housing] A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev
316
317
318
319
320
    Stage 4) A figure to finally compare the perf of our approach using the previous selected
        parameters vs the baseline vs other papers using different extracted forest size
        (percentage of the tree size found previously in best hyperparams search) on the abscissa.

    IMPORTANT: Compare experiments that used the same seeds among them (except for stage 1).
321
    """