- Add code for stages 2 and 3 results;

- Add command lines example for stage 3; - Add experiment_id option that is useful sometimes; - Fix subsets_used param; - Remove experiment_id in config experiment file names; - Add config experiment files for stages 2 and 3; - Add results for stages 2 and 3 (california_housing).

- Add code for stages 2 and 3 results;
8de5e96a · Charly Lamothe · 58061ea4 · 8de5e96a · 8de5e96a · 8de5e96a
Commit 8de5e96a authored 5 years ago by Charly Lamothe
--- a/code/bolsonaro/visualization/plotter.py
+++ b/code/bolsonaro/visualization/plotter.py
@@ -59,6 +59,7 @@ class Plotter(object):
    @staticmethod
    def plot_stage1_losses(file_path, all_experiment_scores_with_params,
        all_experiment_scores_wo_params, x_value, xlabel, ylabel, all_labels, title):
        fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)
        n = len(all_experiment_scores_with_params)
@@ -102,6 +103,46 @@ class Plotter(object):
        fig.savefig(file_path, dpi=fig.dpi, bbox_extra_artists=(legend,), bbox_inches='tight')
        plt.close(fig)
+    @staticmethod
+    def plot_stage2_losses(file_path, all_experiment_scores, x_value,
+        xlabel, ylabel, all_labels, title):
+        fig, ax = plt.subplots()
+        n = len(all_experiment_scores)
+        """
+        Get as many different colors from the specified cmap (here nipy_spectral)
+        as there are curve to plot.
+        """
+        colors = Plotter.get_colors_from_cmap(n)
+         # For each curve to plot
+        for i in range(n):
+            # Retreive the scores in a list for each seed
+            experiment_scores = list(all_experiment_scores[i].values())
+            # Compute the mean and the std for the CI
+            mean_experiment_scores = np.average(experiment_scores, axis=0)
+            std_experiment_scores = np.std(experiment_scores, axis=0)
+            # Plot the score curve with the CI
+            Plotter.plot_mean_and_CI(
+                ax=ax,
+                mean=mean_experiment_scores,
+                lb=mean_experiment_scores + std_experiment_scores,
+                ub=mean_experiment_scores - std_experiment_scores,
+                x_value=x_value,
+                color_mean=colors[i],
+                facecolor=colors[i],
+                label=all_labels[i]
+            )
+        plt.xlabel(xlabel)
+        plt.ylabel(ylabel)
+        plt.title(title)
+        plt.legend(loc='upper right')
+        fig.savefig(file_path, dpi=fig.dpi, bbox_inches='tight')
+        plt.close(fig)
    @staticmethod
    def get_colors_from_cmap(n_colors, colormap_name='nipy_spectral'):
        return [plt.get_cmap(colormap_name)(1. * i/n_colors) for i in range(n_colors)]
--- a/code/compute_results.py
+++ b/code/compute_results.py
@@ -59,7 +59,7 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
    if len(set([sum(extracted_forest_sizes) for extracted_forest_sizes in all_extracted_forest_sizes])) != 1:
        raise ValueError("The extracted forest sizes aren't the sames across seeds.")
-    return experiment_train_scores, experiment_dev_scores, experiment_test_scores, all_extracted_forest_sizes[0]
+    return experiment_train_scores, experiment_dev_scores, experiment_test_scores, all_extracted_forest_sizes[0], experiment_score_metrics[0]
 def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number):
    experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
@@ -104,7 +104,7 @@ def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experim
    if len(set(experiment_score_metrics)) > 1:
        raise ValueError("The metrics used to compute the scores aren't the same everytime")
-    return experiment_train_scores, experiment_dev_scores, experiment_test_scores
+    return experiment_train_scores, experiment_dev_scores, experiment_test_scores, experiment_score_metrics[0]
 if __name__ == "__main__":
    # get environment variables in .env
@@ -116,7 +116,9 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 4].')
    parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).' + \
-        'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}')
+        'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}' + \
+        'stage=2: {{no_normalization}} {{normalize_D}} {{normalize_weights}} {{normalize_D_and_weights}}' + \
+        'stage=3: {{train-dev_subset}} {{train-dev_train-dev_subset}} {{train-train-dev_subset}}')
    parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
    parser.add_argument('--extracted_forest_sizes_number', nargs='?', type=int, required=True, help='Specify the number of extracted forest sizes. TODO: read it from models dir directly.')
    parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
@@ -139,33 +141,48 @@ if __name__ == "__main__":
        # base_with_params
        logger.info('Loading base_with_params experiment scores...')
-        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores = \
+        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
+            base_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
            args.extracted_forest_sizes_number)
        # random_with_params
        logger.info('Loading random_with_params experiment scores...')
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
-            with_params_extracted_forest_sizes = extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
+            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
        # omp_with_params
        logger.info('Loading omp_with_params experiment scores...')
-        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _ = \
+        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
-            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[2])
+            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
+                args.models_dir, args.results_dir, args.experiment_ids[2])
        # Experiments that didn't use the best hyperparameters found for this dataset
        # base_wo_params
        logger.info('Loading base_wo_params experiment scores...')
-        base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores = \
+        base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores, \
-            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[3],
+            base_wo_params_experiment_score_metric = extract_scores_across_seeds_and_forest_size(
+                args.models_dir, args.results_dir, args.experiment_ids[3],
            args.extracted_forest_sizes_number)
        # random_wo_params
        logger.info('Loading random_wo_params experiment scores...')
        random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \
-            wo_params_extracted_forest_sizes = extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[4])
+            wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \
+                extract_scores_across_seeds_and_extracted_forest_sizes(
+                args.models_dir, args.results_dir, args.experiment_ids[4])
        # base_wo_params
        logger.info('Loading base_wo_params experiment scores...')
-        omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _ = \
+        omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \
-            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[5])
+            omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
+                args.models_dir, args.results_dir, args.experiment_ids[5])
+        # Sanity check on the metrics retreived
+        if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric ==
+            omp_with_params_experiment_score_metric == base_wo_params_experiment_score_metric ==
+            random_wo_params_experiment_score_metric ==
+            omp_wo_params_experiment_score_metric):
+            raise ValueError('Score metrics of all experiments must be the same.')
+        experiments_score_metric = base_with_params_experiment_score_metric
        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage1')
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
@@ -191,18 +208,111 @@ if __name__ == "__main__":
            all_labels=['base', 'random', 'omp'],
            x_value=with_params_extracted_forest_sizes,
            xlabel='Number of trees extracted',
-            ylabel='MSE', # TODO: hardcoded
+            ylabel=experiments_score_metric,
            title='Loss values of {}\nusing best and default hyperparameters'.format(args.dataset_name)
        )
+    elif args.stage == 2:
+        if len(args.experiment_ids) != 4:
+            raise ValueError('In the case of stage 2, the number of specified experiment ids must be 4.')
+        # no_normalization
+        logger.info('Loading no_normalization experiment scores...')
+        _, _, no_normalization_test_scores, extracted_forest_sizes, no_normalization_experiment_score_metric = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
+            args.experiment_ids[0])
+        # normalize_D
+        logger.info('Loading normalize_D experiment scores...')
+        _, _, normalize_D_test_scores, _, normalize_D_experiment_score_metric = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
+            args.experiment_ids[1])
+        # normalize_weights
+        logger.info('Loading normalize_weights experiment scores...')
+        _, _, normalize_weights_test_scores, _, normalize_weights_experiment_score_metric = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
+            args.experiment_ids[2])
+        # normalize_D_and_weights
+        logger.info('Loading normalize_D_and_weights experiment scores...')
+        _, _, normalize_D_and_weights_test_scores, _, normalize_D_and_weights_experiment_score_metric = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
+            args.experiment_ids[3])
+        # Sanity check on the metrics retreived
+        if not (no_normalization_experiment_score_metric == normalize_D_experiment_score_metric
+            == normalize_weights_experiment_score_metric == normalize_D_and_weights_experiment_score_metric):
+            raise ValueError('Score metrics of all experiments must be the same.')
+        experiments_score_metric = no_normalization_experiment_score_metric
+        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage2')
+        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
+        Plotter.plot_stage2_losses(
+            file_path=output_path + os.sep + 'losses.png',
+            all_experiment_scores=[no_normalization_test_scores, normalize_D_test_scores,
+                normalize_weights_test_scores, normalize_D_and_weights_test_scores],
+            all_labels=['no_normalization', 'normalize_D', 'normalize_weights', 'normalize_D_and_weights'],
+            x_value=extracted_forest_sizes,
+            xlabel='Number of trees extracted',
+            ylabel=experiments_score_metric,
+            title='Loss values of {}\nusing different normalizations'.format(args.dataset_name))
+    elif args.stage == 3:
+        if len(args.experiment_ids) != 3:
+            raise ValueError('In the case of stage 3, the number of specified experiment ids must be 3.')
+        # train-dev_subset
+        logger.info('Loading train-dev_subset experiment scores...')
+        train_dev_subset_train_scores, train_dev_subset_dev_scores, train_dev_subset_test_scores, \
+            extracted_forest_sizes, train_dev_subset_experiment_score_metric = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
+            args.experiment_ids[0])
+        # train-dev_train-dev_subset
+        logger.info('Loading train-dev_train-dev_subset experiment scores...')
+        train_dev_train_dev_subset_train_scores, train_dev_train_dev_subset_dev_scores, train_dev_train_dev_subset_test_scores, \
+            _, train_dev_train_dev_subset_experiment_score_metric = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
+            args.experiment_ids[1])
+        # train-train-dev_subset
+        logger.info('Loading train-train-dev_subset experiment scores...')
+        train_train_dev_subset_train_scores, train_train_dev_subset_dev_scores, train_train_dev_subset_test_scores, \
+            _, train_train_dev_subset_experiment_score_metric = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
+            args.experiment_ids[2])
+        # Sanity check on the metrics retreived
+        if not (train_dev_subset_experiment_score_metric == train_dev_train_dev_subset_experiment_score_metric
+            == train_train_dev_subset_experiment_score_metric):
+            raise ValueError('Score metrics of all experiments must be the same.')
+        experiments_score_metric = train_dev_subset_experiment_score_metric
+        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage3')
+        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
+        Plotter.plot_stage2_losses(
+            file_path=output_path + os.sep + 'losses.png',
+            all_experiment_scores=[train_dev_subset_train_scores, train_train_dev_subset_train_scores,
+                train_train_dev_subset_train_scores, train_dev_subset_dev_scores, train_dev_train_dev_subset_dev_scores,
+                train_train_dev_subset_dev_scores, train_dev_subset_test_scores, train_dev_train_dev_subset_test_scores,
+                train_train_dev_subset_test_scores],
+            all_labels=['train,dev - train', 'train+dev,train+dev - train', 'train,train+dev - train',
+                'train,dev - dev', 'train+dev,train+dev - dev', 'train,train+dev - dev',
+                'train,dev - test', 'train+dev,train+dev - test', 'train,train+dev - test'],
+            x_value=extracted_forest_sizes,
+            xlabel='Number of trees extracted',
+            ylabel=experiments_score_metric,
+            title='Loss values of {}\nusing different training subsets'.format(args.dataset_name))
    else:
        raise ValueError('This stage number is not supported yet, but it will be!')
    """
    TODO:
    For each dataset:
-    [ALMOST DONE] Stage 1) A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams)
+    Stage 1) [DONE for california_housing] A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams)
-    Stage 2) A figure for the selection of the best combination of normalization: D normalization vs weights normalization (4 combinations)
+    Stage 2) [DONE for california_housing] A figure for the selection of the best combination of normalization: D normalization vs weights normalization (4 combinations)
-    Stage 3) A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev
+    Stage 3) [DONE for california_housing] A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev
    Stage 4) A figure to finally compare the perf of our approach using the previous selected
        parameters vs the baseline vs other papers using different extracted forest size
        (percentage of the tree size found previously in best hyperparams search) on the abscissa.

--- a/code/train.py
+++ b/code/train.py
@@ -18,6 +18,7 @@ import threading
 import json
 from tqdm import tqdm
 import numpy as np
+import shutil
 def process_job(seed, parameters, experiment_id, hyperparameters):
@@ -100,7 +101,7 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
    logger.info('Training done')
 """
-Example for stage 1:
+Command lines example for stage 1:
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.05
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.05
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.05
@@ -109,12 +110,18 @@ python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extra
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05
 python code/compute_results.py --stage 1 --experiment_ids 1 2 3 4 5 6 --dataset_name=california_housing --extracted_forest_sizes_number=5
-Example for stage 2:
+Command lines example for stage 2:
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 no_normalization --extracted_forest_size_stop=0.05
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D --normalize_D --extracted_forest_size_stop=0.05
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_weights --normalize_weights --extracted_forest_size_stop=0.05
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D_and_weights --normalize_D --normalize_weights --extracted_forest_size_stop=0.05
 python code/compute_results.py --stage 2 --experiment_ids 7 8 9 10 --dataset_name=california_housing --extracted_forest_sizes_number=5
+Command lines example for stage 3:
+python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 3 train-dev_subset --extracted_forest_size_stop=0.05 --subsets_used train,dev
+python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 3 train-dev_train-dev_subset --extracted_forest_size_stop=0.05 --subsets_used train+dev,train+dev
+python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 3 train-train-dev_subset --extracted_forest_size_stop=0.05 --subsets_used train,train+dev
+python code/compute_results.py --stage 3 --experiment_ids 11 12 13 --dataset_name=california_housing --extracted_forest_sizes_number=5
 """
 if __name__ == "__main__":
    load_dotenv(find_dotenv('.env'))
@@ -130,6 +137,7 @@ if __name__ == "__main__":
    end_random_seed_range = 2000
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--experiment_id', nargs='?', type=int, default=None, help='Specify an experiment id. Remove already existing model with this specified experiment id.')
    parser.add_argument('--experiment_configuration', nargs='?', type=str, default=None, help='Specify an experiment configuration file name. Overload all other parameters.')
    parser.add_argument('--experiment_configuration_path', nargs='?', type=str, default=DEFAULT_EXPERIMENT_CONFIGURATION_PATH, help='Specify the experiment configuration directory path.')
    parser.add_argument('--dataset_name', nargs='?', type=str, default=DatasetLoader.DEFAULT_DATASET_NAME, help='Specify the dataset. Regression: boston, diabetes, linnerud, california_housing. Classification: iris, digits, wine, breast_cancer, olivetti_faces, 20newsgroups, 20newsgroups_vectorized, lfw_people, lfw_pairs, covtype, rcv1, kddcup99.')
@@ -143,7 +151,7 @@ if __name__ == "__main__":
    parser.add_argument('--test_size', nargs='?', type=float, default=DatasetLoader.DEFAULT_TEST_SIZE, help='Test subset ratio.')
    parser.add_argument('--random_seed_number', nargs='?', type=int, default=DatasetLoader.DEFAULT_RANDOM_SEED_NUMBER, help='Number of random seeds used.')
    parser.add_argument('--seeds', nargs='+', type=int, default=None, help='Specific a list of seeds instead of generate them randomly')
-    parser.add_argument('--subsets_used', nargs='+', type=str, default=DatasetLoader.DEFAULT_SUBSETS_USED, help='train,dev: forest on train, OMP on dev. train+dev,train+dev: both forest and OMP on train+dev. train,train+dev: forest on train+dev and OMP on dev.')
+    parser.add_argument('--subsets_used', nargs='?', type=str, default=DatasetLoader.DEFAULT_SUBSETS_USED, help='train,dev: forest on train, OMP on dev. train+dev,train+dev: both forest and OMP on train+dev. train,train+dev: forest on train+dev and OMP on dev.')
    parser.add_argument('--normalize_weights', action='store_true', default=DatasetLoader.DEFAULT_NORMALIZE_WEIGHTS, help='Divide the predictions by the weights sum.')
    parser.add_argument('--verbose', action='store_true', default=DEFAULT_VERBOSE, help='Print tqdm progress bar.')
    parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
@@ -204,10 +212,13 @@ if __name__ == "__main__":
        else [random.randint(begin_random_seed_range, end_random_seed_range) \
        for i in range(parameters['random_seed_number'])]
+    if args.experiment_id:
+        experiment_id = args.experiment_id
+        shutil.rmtree(os.path.join(parameters['models_dir'], str(experiment_id)), ignore_errors=True)
+    else:
        # Resolve the next experiment id number (last id + 1)
        experiment_id = resolve_experiment_id(parameters['models_dir'])
    logger.info('Experiment id: {}'.format(experiment_id))
-    parameters['experiment_id'] = experiment_id
    """
    If the experiment configuration isn't coming from
@@ -224,8 +235,7 @@ if __name__ == "__main__":
                args.dataset_name, 'stage' + args.save_experiment_configuration[0])
            pathlib.Path(output_experiment_stage_path).mkdir(parents=True, exist_ok=True)
            output_experiment_configuration_path = os.path.join(output_experiment_stage_path,
-                args.save_experiment_configuration[1] + '_{}.json'.format(
+                args.save_experiment_configuration[1] + '.json')
-                    experiment_id))
        else:
            pathlib.Path(os.path.join(args.experiment_configuration_path, 'unnamed')).mkdir(parents=True, exist_ok=True)
            output_experiment_configuration_path = os.path.join(

--- a/experiments/california_housing/stage1/none_with_params_1.json
+++ b/experiments/california_housing/stage1/none_with_params_1.json
@@ -32,6 +32,5 @@
        50,
        66,
        83
-    ],
+    ]
-    "experiment_id": 1
 }
\ No newline at end of file
--- a/experiments/california_housing/stage1/none_wo_params_4.json
+++ b/experiments/california_housing/stage1/none_wo_params_4.json
@@ -32,6 +32,5 @@
        50,
        66,
        83
-    ],
+    ]
-    "experiment_id": 4
 }
\ No newline at end of file
--- a/experiments/california_housing/stage1/omp_with_params_3.json
+++ b/experiments/california_housing/stage1/omp_with_params_3.json
@@ -32,6 +32,5 @@
        50,
        66,
        83
-    ],
+    ]
-    "experiment_id": 3
 }
\ No newline at end of file
--- a/experiments/california_housing/stage1/omp_wo_params_6.json
+++ b/experiments/california_housing/stage1/omp_wo_params_6.json
@@ -32,6 +32,5 @@
        50,
        66,
        83
-    ],
+    ]
-    "experiment_id": 6
 }
\ No newline at end of file
--- a/experiments/california_housing/stage1/random_with_params_2.json
+++ b/experiments/california_housing/stage1/random_with_params_2.json
@@ -32,6 +32,5 @@
        50,
        66,
        83
-    ],
+    ]
-    "experiment_id": 2
 }
\ No newline at end of file
--- a/experiments/california_housing/stage1/random_wo_params_5.json
+++ b/experiments/california_housing/stage1/random_wo_params_5.json
@@ -32,6 +32,5 @@
        50,
        66,
        83
-    ],
+    ]
-    "experiment_id": 5
 }
\ No newline at end of file
--- a/experiments/california_housing/stage2/no_normalization.json
+++ b/experiments/california_housing/stage2/no_normalization.json
+{
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": false,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.05,
+    "models_dir": "./models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3,
+        4,
+        5
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": false,
+    "verbose": false,
+    "skip_best_hyperparams": false,
+    "save_experiment_configuration": [
+        "2",
+        "no_normalization"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "omp",
+    "extracted_forest_size": [
+        8,
+        17,
+        25,
+        33,
+        42
+    ]
+}
\ No newline at end of file
--- a/experiments/california_housing/stage2/normalize_D.json
+++ b/experiments/california_housing/stage2/normalize_D.json
+{
+    "experiment_id": 8,
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": true,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.05,
+    "models_dir": "./models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3,
+        4,
+        5
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": false,
+    "verbose": false,
+    "skip_best_hyperparams": false,
+    "save_experiment_configuration": [
+        "2",
+        "normalize_D"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "omp",
+    "extracted_forest_size": [
+        8,
+        17,
+        25,
+        33,
+        42
+    ]
+}
\ No newline at end of file
--- a/experiments/california_housing/stage2/normalize_D_and_weights.json
+++ b/experiments/california_housing/stage2/normalize_D_and_weights.json
+{
+    "experiment_id": 10,
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": true,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.05,
+    "models_dir": "./models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3,
+        4,
+        5
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": true,
+    "verbose": false,
+    "skip_best_hyperparams": false,
+    "save_experiment_configuration": [
+        "2",
+        "normalize_D_and_weights"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "omp",
+    "extracted_forest_size": [
+        8,
+        17,
+        25,
+        33,
+        42
+    ]
+}
\ No newline at end of file
--- a/experiments/california_housing/stage2/normalize_weights.json
+++ b/experiments/california_housing/stage2/normalize_weights.json
+{
+    "experiment_id": 9,
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": false,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.05,
+    "models_dir": "./models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3,
+        4,
+        5
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": true,
+    "verbose": false,
+    "skip_best_hyperparams": false,
+    "save_experiment_configuration": [
+        "2",
+        "normalize_weights"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "omp",
+    "extracted_forest_size": [
+        8,
+        17,
+        25,
+        33,
+        42
+    ]
+}
\ No newline at end of file
--- a/results/california_housing/stage2/losses.png
+++ b/results/california_housing/stage2/losses.png
--- a/results/california_housing/stage3/stage3_losses_all.png
+++ b/results/california_housing/stage3/stage3_losses_all.png
--- a/results/california_housing/stage3/stage3_losses_test.png
+++ b/results/california_housing/stage3/stage3_losses_test.png