From 649b4e64702a373b136565c970291987b4b2cb09 Mon Sep 17 00:00:00 2001
From: Charly Lamothe <charly.lamothe@univ-amu.fr>
Date: Thu, 19 Dec 2019 12:56:04 +0100
Subject: [PATCH] - Finish first stable version of stage1 plotting; - Fix some
 variable names; - Add exp files of stage1 for california housing

---
 code/compute_results.py                       | 235 ++++++++++++------
 code/train.py                                 |   8 +-
 .../stage1/none_with_params_1.json            |  37 +++
 .../stage1/none_wo_params_4.json              |  37 +++
 .../stage1/omp_with_params_3.json             |  37 +++
 .../stage1/omp_wo_params_6.json               |  37 +++
 .../stage1/random_with_params_2.json          |  37 +++
 .../stage1/random_wo_params_5.json            |  37 +++
 8 files changed, 390 insertions(+), 75 deletions(-)
 create mode 100644 experiments/california_housing/stage1/none_with_params_1.json
 create mode 100644 experiments/california_housing/stage1/none_wo_params_4.json
 create mode 100644 experiments/california_housing/stage1/omp_with_params_3.json
 create mode 100644 experiments/california_housing/stage1/omp_wo_params_6.json
 create mode 100644 experiments/california_housing/stage1/random_with_params_2.json
 create mode 100644 experiments/california_housing/stage1/random_wo_params_5.json

diff --git a/code/compute_results.py b/code/compute_results.py
index f0e8cb3..45b5ca7 100644
--- a/code/compute_results.py
+++ b/code/compute_results.py
@@ -1,8 +1,7 @@
-from bolsonaro.data.dataset_parameters import DatasetParameters
-from bolsonaro.data.dataset_loader import DatasetLoader
 from bolsonaro.models.model_raw_results import ModelRawResults
-from bolsonaro.models.model_factory import ModelFactory
 from bolsonaro.visualization.plotter import Plotter
+from bolsonaro import LOG_PATH
+from bolsonaro.error_handling.logger_factory import LoggerFactory
 
 import argparse
 import pathlib
@@ -10,6 +9,107 @@ from dotenv import find_dotenv, load_dotenv
 import os
 
 
+def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id):
+    experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
+    # Create recursively the tree results/{experiment_id}
+    pathlib.Path(results_dir + os.sep + str(experiment_id)).mkdir(parents=True, exist_ok=True)
+    experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
+
+    """
+    Dictionaries to temporarly store the scalar results with the following structure:
+    {seed_1: [score_1, ..., score_m], ... seed_n: [score_1, ..., score_k]}
+    """
+    experiment_train_scores = dict()
+    experiment_dev_scores = dict()
+    experiment_test_scores = dict()
+    all_extracted_forest_sizes = list()
+
+    # Used to check if all losses were computed using the same metric (it should be the case)
+    experiment_score_metrics = list()
+
+    # For each seed results stored in models/{experiment_id}/seeds
+    seeds = os.listdir(experiment_seed_root_path)
+    seeds.sort(key=int)
+    for seed in seeds:
+        experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
+        extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' # models/{experiment_id}/seeds/{seed}/forest_size
+
+        # {{seed}:[]}
+        experiment_train_scores[seed] = list()
+        experiment_dev_scores[seed] = list()
+        experiment_test_scores[seed] = list()
+
+        # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
+        extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
+        extracted_forest_sizes.sort(key=int)
+        all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes)))
+        for extracted_forest_size in extracted_forest_sizes:
+            # models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
+            extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
+            # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
+            model_raw_results = ModelRawResults.load(extracted_forest_size_path)
+            # Save the scores
+            experiment_train_scores[seed].append(model_raw_results.train_score)
+            experiment_dev_scores[seed].append(model_raw_results.dev_score)
+            experiment_test_scores[seed].append(model_raw_results.test_score)
+            # Save the metric
+            experiment_score_metrics.append(model_raw_results.score_metric)
+
+    # Sanity checks
+    if len(set(experiment_score_metrics)) > 1:
+        raise ValueError("The metrics used to compute the scores aren't the sames across seeds.")
+    if len(set([sum(extracted_forest_sizes) for extracted_forest_sizes in all_extracted_forest_sizes])) != 1:
+        raise ValueError("The extracted forest sizes aren't the sames across seeds.")
+
+    return experiment_train_scores, experiment_dev_scores, experiment_test_scores, all_extracted_forest_sizes[0]
+
+def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number):
+    experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
+    # Create recursively the tree results/{experiment_id}
+    pathlib.Path(results_dir + os.sep + str(experiment_id)).mkdir(parents=True, exist_ok=True)
+    experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
+
+    """
+    Dictionaries to temporarly store the scalar results with the following structure:
+    {seed_1: [score_1, ..., score_m], ... seed_n: [score_1, ..., score_k]}
+    """
+    experiment_train_scores = dict()
+    experiment_dev_scores = dict()
+    experiment_test_scores = dict()
+
+    # Used to check if all losses were computed using the same metric (it should be the case)
+    experiment_score_metrics = list()
+
+    # For each seed results stored in models/{experiment_id}/seeds
+    seeds = os.listdir(experiment_seed_root_path)
+    seeds.sort(key=int)
+    for seed in seeds:
+        experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
+        forest_size_root_path = experiment_seed_path + os.sep + 'forest_size' # models/{experiment_id}/seeds/{seed}/forest_size
+
+        # {{seed}:[]}
+        experiment_train_scores[seed] = list()
+        experiment_dev_scores[seed] = list()
+        experiment_test_scores[seed] = list()
+
+        forest_size = os.listdir(forest_size_root_path)[0]
+        # models/{experiment_id}/seeds/{seed}/forest_size/{forest_size}
+        forest_size_path = forest_size_root_path + os.sep + forest_size
+        # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
+        model_raw_results = ModelRawResults.load(forest_size_path)
+        for _ in range(extracted_forest_sizes_number):
+            # Save the scores
+            experiment_train_scores[seed].append(model_raw_results.train_score)
+            experiment_dev_scores[seed].append(model_raw_results.dev_score)
+            experiment_test_scores[seed].append(model_raw_results.test_score)
+            # Save the metric
+            experiment_score_metrics.append(model_raw_results.score_metric)
+
+    if len(set(experiment_score_metrics)) > 1:
+        raise ValueError("The metrics used to compute the scores aren't the same everytime")
+
+    return experiment_train_scores, experiment_dev_scores, experiment_test_scores
+
 if __name__ == "__main__":
     # get environment variables in .env
     load_dotenv(find_dotenv('.env'))
@@ -21,6 +121,7 @@ if __name__ == "__main__":
     parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 4].')
     parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).' + \
         'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}')
+    parser.add_argument('--dataset_name', nargs='?', type=str, help='Specify the dataset name. TODO: read it from models dir directly.')
     parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
     parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
     args = parser.parse_args()
@@ -28,85 +129,77 @@ if __name__ == "__main__":
     if args.stage not in list(range(1, 5)):
         raise ValueError('stage must be a supported stage id (i.e. [1, 4]).')
 
+    logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))
+
     # Create recursively the results dir tree
     pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True)
 
     if args.stage == 1:
-        # First axis:
+        extracted_forest_sizes_number = 5 # TODO: hardcoded
+        if len(args.experiment_ids) != 6:
+            raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.')
+
+        # Experiments that used the best hyperparameters found for this dataset
+
         # base_with_params
+        logger.info('Loading base_with_params experiment scores...')
+        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores = \
+            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
+            extracted_forest_sizes_number)
         # random_with_params
+        logger.info('Loading random_with_params experiment scores...')
+        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
+            with_params_extracted_forest_sizes = extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
         # omp_with_params
+        logger.info('Loading omp_with_params experiment scores...')
+        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _ = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[2])
+
+        # Experiments that didn't use the best hyperparameters found for this dataset
 
-        # Second axis:
         # base_wo_params
+        logger.info('Loading base_wo_params experiment scores...')
+        base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores = \
+            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[3],
+            extracted_forest_sizes_number)
         # random_wo_params
+        logger.info('Loading random_wo_params experiment scores...')
+        random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \
+            wo_params_extracted_forest_sizes = extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[4])
         # base_wo_params
-        for experiment_id in args.experiment_ids:
-            experiment_id_path = args.models_dir + os.sep + str(experiment_id) # models/{experiment_id}
-            # Create recursively the tree results/{experiment_id}
-            pathlib.Path(args.results_dir + os.sep + str(experiment_id)).mkdir(parents=True, exist_ok=True)
-            experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
-
-            """
-            Dictionaries to temporarly store the scalar results with the following structure:
-            {seed_1: [score_1, ..., score_m], ... seed_n: [score_1, ..., score_k]}
-            TODO: to complete to retreive more results
-            """
-            experiment_train_scores = dict()
-            experiment_dev_scores = dict()
-            experiment_test_scores = dict()
-
-            # Used to check if all losses were computed using the same metric (it should be the case)
-            experiment_score_metrics = list()
-
-            # For each seed results stored in models/{experiment_id}/seeds
-            seeds = os.listdir(experiment_seed_root_path)
-            seeds.sort(key=int)
-            for seed in seeds:
-                experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
-                dataset_parameters = DatasetParameters.load(experiment_seed_path, experiment_id) # Load the dataset parameters of this experiment, with this specific seed
-                dataset = DatasetLoader.load(dataset_parameters) # Load the dataset using the previously loaded dataset parameters
-                extracted_forest_size_root_path = experiment_seed_path + os.sep + 'extracted_forest_size' # models/{experiment_id}/seeds/{seed}/extracted_forest_size
-
-                # {{seed}:[]}
-                experiment_train_scores[seed] = list()
-                experiment_dev_scores[seed] = list()
-                experiment_test_scores[seed] = list()
-
-                # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_size
-                extracted_forest_sizes = os.listdir(extracted_forest_size_root_path)
-                extracted_forest_sizes.sort(key=int)
-                for extracted_forest_size in extracted_forest_sizes:
-                    # models/{experiment_id}/seeds/{seed}/extracted_forest_size/{extracted_forest_size}
-                    extracted_forest_size_path = extracted_forest_size_root_path + os.sep + extracted_forest_size
-                    # Load models/{experiment_id}/seeds/{seed}/extracted_forest_size/{extracted_forest_size}/model_raw_results.pickle file
-                    model_raw_results = ModelRawResults.load(extracted_forest_size_path)
-                    # Save temporarly some raw results (TODO: to complete to retreive more results)
-                    # Save the scores
-                    experiment_train_scores[seed].append(model_raw_results.train_score)
-                    experiment_dev_scores[seed].append(model_raw_results.dev_score)
-                    experiment_test_scores[seed].append(model_raw_results.test_score)
-                    # Save the metric
-                    experiment_score_metrics.append(model_raw_results.score_metric)
-
-            if len(set(experiment_score_metrics)) > 1:
-                raise ValueError("The metrics used to compute the dev score aren't the same everytime")
-
-            """
-            Example of plot that just plots the losses computed
-            on the train, dev and test subsets using a trained
-            model, with the CI, and depending on the extracted
-            forest size.
-            """
-            Plotter.plot_losses(
-                file_path=args.results_dir + os.sep + str(experiment_id) + os.sep + 'losses.png',
-                all_experiment_scores=[experiment_train_scores, experiment_dev_scores, experiment_test_scores],
-                x_value=extracted_forest_sizes,
-                xlabel='Number of trees extracted',
-                ylabel=experiment_score_metrics[0],
-                all_labels=['train', 'dev', 'test'],
-                title='Loss values of the trained model'
-            )
+        logger.info('Loading base_wo_params experiment scores...')
+        omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _ = \
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[5])
+
+        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage1')
+        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
+
+        Plotter.plot_losses(
+            file_path=output_path + os.sep + 'losses_with_params.png',
+            all_experiment_scores=[base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores,
+                random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores,
+                omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores],
+            x_value=with_params_extracted_forest_sizes,
+            xlabel='Number of trees extracted',
+            ylabel='MSE', # TODO: hardcoded
+            all_labels=['base_with_params_train', 'base_with_params_dev', 'base_with_params_test',
+                'random_with_params_train', 'random_with_params_dev', 'random_with_params_test',
+                'omp_with_params_train', 'omp_with_params_dev', 'omp_with_params_test'],
+            title='Loss values of {} using the best hyperparams'.format(args.dataset_name)
+        )
+        Plotter.plot_losses(
+            file_path=output_path + 'stage1' + os.sep + 'losses_wo_params.png',
+            all_experiment_scores=[base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores,
+                random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores,
+                omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores],
+            x_value=wo_params_extracted_forest_sizes,
+            xlabel='Number of trees extracted',
+            ylabel='MSE', # TODO: hardcoded
+            all_labels=['base_wo_params_train', 'base_wo_params_dev', 'base_wo_params_test',
+                'random_wo_params_train', 'random_wo_params_dev', 'random_wo_params_test',
+                'omp_wo_params_train', 'omp_wo_params_dev', 'omp_wo_params_test'],
+            title='Loss values of {} without using the best hyperparams'.format(args.dataset_name)
+        )
     else:
         raise ValueError('This stage number is not supported yet, but it will be!')
 
diff --git a/code/train.py b/code/train.py
index 815eac0..b922fb5 100644
--- a/code/train.py
+++ b/code/train.py
@@ -56,7 +56,7 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
     if parameters['extraction_strategy'] != 'none':
         for extracted_forest_size in parameters['extracted_forest_size']:
             logger.info('extracted_forest_size={}'.format(extracted_forest_size))
-            sub_models_dir = models_dir + os.sep + 'extracted_forest_size' + os.sep + str(extracted_forest_size)
+            sub_models_dir = models_dir + os.sep + 'extracted_forest_sizes' + os.sep + str(extracted_forest_size)
             pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
 
             model_parameters = ModelParameters(
@@ -78,7 +78,7 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
     else:
         forest_size = hyperparameters['n_estimators']
         logger.info('Base forest training with fixed forest size of {}'.format(forest_size))
-        sub_models_dir = models_dir + os.sep + str(forest_size)
+        sub_models_dir = models_dir + os.sep + 'forest_size' + os.sep + str(forest_size)
         pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
 
         model_parameters = ModelParameters(
@@ -107,12 +107,12 @@ python code/train.py --dataset_name=california_housing --seeds 1 2 3 --save_expe
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params
 python code/train.py --dataset_name=california_housing --seeds 1 2 3 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params
-python code/compute_results.py --stage_number 1 --experiment_ids 1 2 3 4 5 6
+python code/compute_results.py --stage 1 --experiment_ids 1 2 3 4 5 6
 """
 if __name__ == "__main__":
     load_dotenv(find_dotenv('.env'))
     DEFAULT_EXPERIMENT_CONFIGURATION_PATH = 'experiments'
-    # the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_size/{nb_extracted_trees}
+    # the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_sizes/{extracted_forest_size}
     DEFAULT_MODELS_DIR = os.environ['project_dir'] + os.sep + 'models'
     DEFAULT_VERBOSE = False
     DEFAULT_SKIP_BEST_HYPERPARAMS = False
diff --git a/experiments/california_housing/stage1/none_with_params_1.json b/experiments/california_housing/stage1/none_with_params_1.json
new file mode 100644
index 0000000..7b36180
--- /dev/null
+++ b/experiments/california_housing/stage1/none_with_params_1.json
@@ -0,0 +1,37 @@
+{
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": false,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.1,
+    "models_dir": ".\\models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": false,
+    "verbose": false,
+    "skip_best_hyperparams": false,
+    "save_experiment_configuration": [
+        "1",
+        "none_with_params"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "none",
+    "extracted_forest_size": [
+        16,
+        33,
+        50,
+        66,
+        83
+    ],
+    "experiment_id": 1
+}
\ No newline at end of file
diff --git a/experiments/california_housing/stage1/none_wo_params_4.json b/experiments/california_housing/stage1/none_wo_params_4.json
new file mode 100644
index 0000000..d7aa207
--- /dev/null
+++ b/experiments/california_housing/stage1/none_wo_params_4.json
@@ -0,0 +1,37 @@
+{
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": false,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.1,
+    "models_dir": ".\\models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": false,
+    "verbose": false,
+    "skip_best_hyperparams": true,
+    "save_experiment_configuration": [
+        "1",
+        "none_wo_params"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "none",
+    "extracted_forest_size": [
+        1,
+        3,
+        5,
+        6,
+        8
+    ],
+    "experiment_id": 4
+}
\ No newline at end of file
diff --git a/experiments/california_housing/stage1/omp_with_params_3.json b/experiments/california_housing/stage1/omp_with_params_3.json
new file mode 100644
index 0000000..5b09f67
--- /dev/null
+++ b/experiments/california_housing/stage1/omp_with_params_3.json
@@ -0,0 +1,37 @@
+{
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": false,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.1,
+    "models_dir": ".\\models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": false,
+    "verbose": false,
+    "skip_best_hyperparams": false,
+    "save_experiment_configuration": [
+        "1",
+        "omp_with_params"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "omp",
+    "extracted_forest_size": [
+        16,
+        33,
+        50,
+        66,
+        83
+    ],
+    "experiment_id": 3
+}
\ No newline at end of file
diff --git a/experiments/california_housing/stage1/omp_wo_params_6.json b/experiments/california_housing/stage1/omp_wo_params_6.json
new file mode 100644
index 0000000..94a4452
--- /dev/null
+++ b/experiments/california_housing/stage1/omp_wo_params_6.json
@@ -0,0 +1,37 @@
+{
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": false,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.1,
+    "models_dir": ".\\models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": false,
+    "verbose": false,
+    "skip_best_hyperparams": true,
+    "save_experiment_configuration": [
+        "1",
+        "omp_wo_params"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "omp",
+    "extracted_forest_size": [
+        1,
+        3,
+        5,
+        6,
+        8
+    ],
+    "experiment_id": 6
+}
\ No newline at end of file
diff --git a/experiments/california_housing/stage1/random_with_params_2.json b/experiments/california_housing/stage1/random_with_params_2.json
new file mode 100644
index 0000000..4239862
--- /dev/null
+++ b/experiments/california_housing/stage1/random_with_params_2.json
@@ -0,0 +1,37 @@
+{
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": false,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.1,
+    "models_dir": ".\\models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": false,
+    "verbose": false,
+    "skip_best_hyperparams": false,
+    "save_experiment_configuration": [
+        "1",
+        "random_with_params"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "random",
+    "extracted_forest_size": [
+        16,
+        33,
+        50,
+        66,
+        83
+    ],
+    "experiment_id": 2
+}
\ No newline at end of file
diff --git a/experiments/california_housing/stage1/random_wo_params_5.json b/experiments/california_housing/stage1/random_wo_params_5.json
new file mode 100644
index 0000000..22d6bc7
--- /dev/null
+++ b/experiments/california_housing/stage1/random_wo_params_5.json
@@ -0,0 +1,37 @@
+{
+    "experiment_configuration": null,
+    "experiment_configuration_path": "experiments",
+    "dataset_name": "california_housing",
+    "normalize_D": false,
+    "dataset_normalizer": "standard",
+    "forest_size": null,
+    "extracted_forest_size_samples": 5,
+    "extracted_forest_size_stop": 0.1,
+    "models_dir": ".\\models",
+    "dev_size": 0.2,
+    "test_size": 0.2,
+    "random_seed_number": 1,
+    "seeds": [
+        1,
+        2,
+        3
+    ],
+    "subsets_used": "train,dev",
+    "normalize_weights": false,
+    "verbose": false,
+    "skip_best_hyperparams": true,
+    "save_experiment_configuration": [
+        "1",
+        "random_wo_params"
+    ],
+    "job_number": -1,
+    "extraction_strategy": "random",
+    "extracted_forest_size": [
+        1,
+        3,
+        5,
+        6,
+        8
+    ],
+    "experiment_id": 5
+}
\ No newline at end of file
-- 
GitLab