diff --git a/code/bolsonaro/models/ensemble_selection_forest_regressor.py b/code/bolsonaro/models/ensemble_selection_forest_regressor.py
new file mode 100644
index 0000000000000000000000000000000000000000..a20d8ec8bee5a124a10bf708ae176cf48796458e
--- /dev/null
+++ b/code/bolsonaro/models/ensemble_selection_forest_regressor.py
@@ -0,0 +1,90 @@
+from sklearn.metrics import mean_squared_error
+from sklearn.base import BaseEstimator
+from sklearn.tree import DecisionTreeRegressor
+from abc import abstractmethod, ABCMeta
+import numpy as np
+from tqdm import tqdm
+
+
+class EnsembleSelectionForestRegressor(BaseEstimator, metaclass=ABCMeta):
+    """
+    'Ensemble selection from libraries of models' by Rich Caruana et al
+    """
+
+    def __init__(self, models_parameters, library, score_metric=mean_squared_error):
+        self._models_parameters = models_parameters
+        self._library = library
+        self._extracted_forest_size = self._models_parameters.extracted_forest_size
+        self._score_metric = score_metric
+
+    @property
+    def models_parameters(self):
+        return self._models_parameters
+
+    @property
+    def library(self):
+        return self._library
+
+    def fit(self, X_train, y_train, X_val, y_val):
+        scores_list = list()
+        for estimator in self._library:
+            val_score = self._score_metric(estimator.predict(X_val), y_val)
+            scores_list.append(val_score)
+
+        class_list = list(library)
+        m = np.argmax(np.asarray(scores_list))
+        self._ensemble_selected = [class_list[m]]
+        temp_pred = class_list[m].predict(X_val)
+        del class_list[m]
+        for k in range(self._extracted_forest_size - 1):
+            candidate_index = 0
+            best_score = 100000
+            for j in range(len(class_list)):
+                temp_pred = np.vstack((temp_pred, class_list[j].predict(X_val)))
+                temp_mean = np.mean(temp_pred, axis=0)
+                temp_score = self._score_metric(temp_mean, y_val)
+                if (temp_score < best_score):
+                    candidate_index = j
+                    best_score = tmp_score
+                temp_pred = np.delete(temp_pred, -1, 0)
+            self._ensemble_selected.append(class_list[candidate_index])
+            temp_pred = np.vstack((temp_pred, class_list[candidate_index].predict(X_val)))
+            del class_list[candidate_index]
+
+    def score(self, X, y):
+        predictions = self._predict_base_estimator(X)
+        mean_predictions = np.mean(predictions, axis=0)
+        return self._score_metric(mean_predictions, y)
+
+    def predict_base_estimator(self, X):
+        predictions = list()
+        for tree in self._ensemble_selected:
+            predictions.append(tree.predict(X))
+        return np.array(predictions)
+
+    @staticmethod
+    def generate_library(X_train, y_train, random_state=None):
+        criterion_arr = ["mse"]#, "friedman_mse", "mae"]
+        splitter_arr = ["best"]#, "random"]
+        depth_arr = [i for i in range(5, 20, 1)]
+        min_samples_split_arr = [i for i in range(2, 20, 1)]
+        min_samples_leaf_arr = [i for i in range(2, 20, 1)]
+        max_features_arr = ["sqrt"]#["auto", "sqrt", "log2"]
+
+        library = list()
+        with tqdm(total=len(criterion_arr) * len(splitter_arr) * \
+            len(depth_arr) * len(min_samples_split_arr) * len(min_samples_leaf_arr) * \
+            len(max_features_arr)) as bar:
+            bar.set_description('Generating library')
+            for criterion in criterion_arr:
+                for splitter in splitter_arr:
+                    for depth in depth_arr:
+                        for min_samples_split in min_samples_split_arr:
+                            for min_samples_leaf in min_samples_leaf_arr:
+                                for max_features in max_features_arr:
+                                    t = DecisionTreeRegressor(criterion=criterion, splitter=splitter, max_depth=depth, min_samples_split=min_samples_split,
+                                        min_samples_leaf=min_samples_leaf, max_features=max_features, random_state=random_state)
+                                    t.fit(X_train, y_train)
+                                    library.append(t)
+                                    bar.update(1)
+        return library
diff --git a/code/bolsonaro/models/model_factory.py b/code/bolsonaro/models/model_factory.py
index bbda6cae89d218c7831780f71b9fc6a7bc022d54..eb3e8b50d7411a2beee8e79bf7da46f6561558a2 100644
--- a/code/bolsonaro/models/model_factory.py
+++ b/code/bolsonaro/models/model_factory.py
@@ -3,6 +3,7 @@ from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
 from bolsonaro.models.model_parameters import ModelParameters
 from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
 from bolsonaro.models.kmeans_forest_regressor import KMeansForestRegressor
+from bolsonaro.models.ensemble_selection_forest_regressor import EnsembleSelectionForestRegressor
 from bolsonaro.data.task import Task
 
 from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
@@ -13,7 +14,7 @@ import pickle
 class ModelFactory(object):
 
     @staticmethod
-    def build(task, model_parameters):
+    def build(task, model_parameters, library=None):
         if task not in [Task.BINARYCLASSIFICATION, Task.REGRESSION, Task.MULTICLASSIFICATION]:
             raise ValueError("Unsupported task '{}'".format(task))
 
@@ -38,6 +39,8 @@ class ModelFactory(object):
                 return SimilarityForestRegressor(model_parameters)
             elif model_parameters.extraction_strategy == 'kmeans':
                 return KMeansForestRegressor(model_parameters)
+            elif model_parameters.extraction_strategy == 'ensemble':
+                return EnsembleSelectionForestRegressor(model_parameters, library=library)
             elif model_parameters.extraction_strategy == 'none':
                 return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'],
                     random_state=model_parameters.seed)
diff --git a/code/compute_results.py b/code/compute_results.py
index 3bd2f8a22cab9fe5dc224bd36fbc90e154b9f558..406f0e0636111add9f3cb6075fe26fd91ff3cf4d 100644
--- a/code/compute_results.py
+++ b/code/compute_results.py
@@ -133,10 +133,11 @@ if __name__ == "__main__":
 
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].')
-    parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).' + \
+    parser.add_argument('--experiment_ids', nargs='+', type=str, required=True, help='Compute the results of the specified experiment id(s).' + \
         'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}' + \
         'stage=2: {{no_normalization}} {{normalize_D}} {{normalize_weights}} {{normalize_D_and_weights}}' + \
-        'stage=3: {{train-dev_subset}} {{train-dev_train-dev_subset}} {{train-train-dev_subset}}')
+        'stage=3: {{train-dev_subset}} {{train-dev_train-dev_subset}} {{train-train-dev_subset}}' + \
+        'stage=5: {{base_with_params}} {{random_with_params}} {{omp_with_params}} [ensemble={{id}}] [similarity={{id}}] [kmean={{id}}]')
     parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
     parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
     parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
@@ -159,7 +160,7 @@ if __name__ == "__main__":
             raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.')
 
         # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
-        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])
+        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1]))
 
         # Experiments that used the best hyperparameters found for this dataset
 
@@ -167,18 +168,18 @@ if __name__ == "__main__":
         logger.info('Loading base_with_params experiment scores...')
         base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
             base_with_params_experiment_score_metric = \
-            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
+            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
             extracted_forest_sizes_number)
         # random_with_params
         logger.info('Loading random_with_params experiment scores...')
         random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
             with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
-            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
         # omp_with_params
         logger.info('Loading omp_with_params experiment scores...')
         omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
             omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[2])
+                args.models_dir, args.results_dir, int(args.experiment_ids[2]))
 
         # Experiments that didn't use the best hyperparameters found for this dataset
 
@@ -186,19 +187,19 @@ if __name__ == "__main__":
         logger.info('Loading base_wo_params experiment scores...')
         base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores, \
             base_wo_params_experiment_score_metric = extract_scores_across_seeds_and_forest_size(
-                args.models_dir, args.results_dir, args.experiment_ids[3],
+                args.models_dir, args.results_dir, int(args.experiment_ids[3]),
             extracted_forest_sizes_number)
         # random_wo_params
         logger.info('Loading random_wo_params experiment scores...')
         random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \
             wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \
                 extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[4])
+                args.models_dir, args.results_dir, int(args.experiment_ids[4]))
         # base_wo_params
         logger.info('Loading base_wo_params experiment scores...')
         omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \
             omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[5])
+                args.models_dir, args.results_dir, int(args.experiment_ids[5]))
 
         # Sanity check on the metrics retreived
         if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric ==
@@ -243,25 +244,25 @@ if __name__ == "__main__":
         logger.info('Loading no_normalization experiment scores...')
         _, _, no_normalization_test_scores, extracted_forest_sizes, no_normalization_experiment_score_metric = \
             extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
-            args.experiment_ids[0])
+            int(args.experiment_ids[0]))
 
         # normalize_D
         logger.info('Loading normalize_D experiment scores...')
         _, _, normalize_D_test_scores, _, normalize_D_experiment_score_metric = \
             extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
-            args.experiment_ids[1])
+            int(args.experiment_ids[1]))
 
         # normalize_weights
         logger.info('Loading normalize_weights experiment scores...')
         _, _, normalize_weights_test_scores, _, normalize_weights_experiment_score_metric = \
             extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
-            args.experiment_ids[2])
+            int(args.experiment_ids[2]))
 
         # normalize_D_and_weights
         logger.info('Loading normalize_D_and_weights experiment scores...')
         _, _, normalize_D_and_weights_test_scores, _, normalize_D_and_weights_experiment_score_metric = \
             extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
-            args.experiment_ids[3])
+            int(args.experiment_ids[3]))
 
         # Sanity check on the metrics retreived
         if not (no_normalization_experiment_score_metric == normalize_D_experiment_score_metric
@@ -290,21 +291,21 @@ if __name__ == "__main__":
         train_dev_subset_train_scores, train_dev_subset_dev_scores, train_dev_subset_test_scores, \
             extracted_forest_sizes, train_dev_subset_experiment_score_metric = \
             extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
-            args.experiment_ids[0])
+            int(args.experiment_ids[0]))
 
         # train-dev_train-dev_subset
         logger.info('Loading train-dev_train-dev_subset experiment scores...')
         train_dev_train_dev_subset_train_scores, train_dev_train_dev_subset_dev_scores, train_dev_train_dev_subset_test_scores, \
             _, train_dev_train_dev_subset_experiment_score_metric = \
             extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
-            args.experiment_ids[1])
+            int(args.experiment_ids[1]))
 
         # train-train-dev_subset
         logger.info('Loading train-train-dev_subset experiment scores...')
         train_train_dev_subset_train_scores, train_train_dev_subset_dev_scores, train_train_dev_subset_test_scores, \
             _, train_train_dev_subset_experiment_score_metric = \
             extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
-            args.experiment_ids[2])
+            int(args.experiment_ids[2]))
 
         # Sanity check on the metrics retreived
         if not (train_dev_subset_experiment_score_metric == train_dev_train_dev_subset_experiment_score_metric
@@ -349,13 +350,13 @@ if __name__ == "__main__":
         logger.info('Loading base_with_params experiment scores...')
         base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
             base_with_params_experiment_score_metric = \
-            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
+            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
             extracted_forest_sizes_number)
         # random_with_params
         logger.info('Loading random_with_params experiment scores...')
         random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
             with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
-            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
         # omp_with_params
         logger.info('Loading omp_with_params experiment scores...')
         """omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
@@ -363,12 +364,12 @@ if __name__ == "__main__":
                 args.models_dir, args.results_dir, args.experiment_ids[2])"""
         omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
             omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[2])
+                args.models_dir, args.results_dir, int(args.experiment_ids[2]))
         #omp_with_params_without_weights
         logger.info('Loading omp_with_params experiment scores...')
         omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
             omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)
+                args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False)
 
         """# base_with_params
         logger.info('Loading base_with_params experiment scores 2...')
@@ -402,57 +403,63 @@ if __name__ == "__main__":
             title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
     elif args.stage == 5:
         # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
-        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])
+        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1]))
+        all_labels = list()
+        all_scores = list()
 
         # base_with_params
         logger.info('Loading base_with_params experiment scores...')
         base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
             base_with_params_experiment_score_metric = \
-            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
+            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
             extracted_forest_sizes_number)
         # random_with_params
         logger.info('Loading random_with_params experiment scores...')
         random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
             with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
-            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
+            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
         # omp_with_params
         logger.info('Loading omp_with_params experiment scores...')
         omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
             omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[2])
+                args.models_dir, args.results_dir, int(args.experiment_ids[2]))
         #omp_with_params_without_weights
         logger.info('Loading omp_with_params experiment scores...')
         omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
             omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)
-        # kmeans_with_params
-        logger.info('Loading kmeans_with_params experiment scores...')
-        kmeans_with_params_train_scores, kmeans_with_params_dev_scores, kmeans_with_params_test_scores, _, \
-            kmeans_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[3])
-        # similarity_with_params
-        logger.info('Loading similarity_with_params experiment scores...')
-        similarity_with_params_train_scores, similarity_with_params_dev_scores, similarity_with_params_test_scores, _, \
-            similarity_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
-                args.models_dir, args.results_dir, args.experiment_ids[4])
+                args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False)
+
+        all_labels = ['base', 'random', 'omp', 'omp_without_weights']
+        all_scores = [base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
+            omp_with_params_without_weights_test_scores]
+
+        for i in range(3, len(args.experiment_ids)):
+            if 'kmeans' in args.experiment_ids[i]:
+                label = 'kmeans'
+            elif 'similarity' in args.experiment_ids[i]:
+                label = 'similarity'
+            elif 'ensemble' in args.experiment_ids[i]:
+                label = 'ensemble'
+            else:
+                logger.error('Invalid value encountered')
+                continue
 
-        # Sanity check on the metrics retreived
-        if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric
-            == omp_with_params_experiment_score_metric == kmeans_with_params_experiment_score_metric):
-            raise ValueError('Score metrics of all experiments must be the same.')
-        experiments_score_metric = base_with_params_experiment_score_metric
+            logger.info(f'Loading {label} experiment scores...')
+            _, _, current_test_scores, _, _ = extract_scores_across_seeds_and_extracted_forest_sizes(
+                args.models_dir, args.results_dir, int(args.experiment_ids[i].split('=')[1]))
+            all_labels.append(label)
+            all_scores.append(current_test_scores)
 
         output_path = os.path.join(args.results_dir, args.dataset_name, 'stage5')
         pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
 
         Plotter.plot_stage2_losses(
-            file_path=output_path + os.sep + 'losses.png',
-            all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
-                omp_with_params_without_weights_test_scores, kmeans_with_params_test_scores, similarity_with_params_test_scores],
-            all_labels=['base', 'random', 'omp', 'omp_without_weights', 'kmeans', 'similarity'],
+            file_path=output_path + os.sep + f"losses_{'-'.join(all_labels)}.png",
+            all_experiment_scores=all_scores,
+            all_labels=all_labels,
             x_value=with_params_extracted_forest_sizes,
             xlabel='Number of trees extracted',
-            ylabel=experiments_score_metric,
+            ylabel=base_with_params_experiment_score_metric,
             title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
     else:
         raise ValueError('This stage number is not supported yet, but it will be!')
diff --git a/code/train.py b/code/train.py
index 1d75e98b9044165abb075a346761a910d8479a83..b7ea2a77ab08cac3e544c12e2d0c3036ae83f379 100644
--- a/code/train.py
+++ b/code/train.py
@@ -2,6 +2,7 @@ from bolsonaro.data.dataset_parameters import DatasetParameters
 from bolsonaro.data.dataset_loader import DatasetLoader
 from bolsonaro.models.model_factory import ModelFactory
 from bolsonaro.models.model_parameters import ModelParameters
+from bolsonaro.models.ensemble_selection_forest_regressor import EnsembleSelectionForestRegressor
 from bolsonaro.trainer import Trainer
 from bolsonaro.utils import resolve_experiment_id, tqdm_joblib
 from bolsonaro import LOG_PATH
@@ -53,10 +54,15 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
 
     trainer = Trainer(dataset)
 
+    if parameters['extraction_strategy'] == 'ensemble':
+        library = EnsembleSelectionForestRegressor.generate_library(dataset.X_train, dataset.y_train, random_state=seed)
+    else:
+        library = None
+
     if parameters['extraction_strategy'] != 'none':
         with tqdm_joblib(tqdm(total=len(parameters['extracted_forest_size']), disable=not verbose)) as extracted_forest_size_job_pb:
             Parallel(n_jobs=-1)(delayed(extracted_forest_size_job)(extracted_forest_size_job_pb, parameters['extracted_forest_size'][i],
-                models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer)
+                models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer, library)
                 for i in range(len(parameters['extracted_forest_size'])))
     else:
         forest_size = hyperparameters['n_estimators']
@@ -88,7 +94,7 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
             )
             model_parameters.save(sub_models_dir, experiment_id)
 
-            model = ModelFactory.build(dataset.task, model_parameters)
+            model = ModelFactory.build(dataset.task, model_parameters, library=library)
 
             trainer.init(model, subsets_used=parameters['subsets_used'])
             trainer.train(model)
@@ -97,7 +103,7 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
     seed_job_pb.update(1)
 
 def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_size, models_dir,
-    seed, parameters, dataset, hyperparameters, experiment_id, trainer):
+    seed, parameters, dataset, hyperparameters, experiment_id, trainer, library):
 
     logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_extracted_forest_size{}_ti{}'.format(
         seed, extracted_forest_size, threading.get_ident()))
@@ -132,7 +138,7 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz
     )
     model_parameters.save(sub_models_dir, experiment_id)
 
-    model = ModelFactory.build(dataset.task, model_parameters)
+    model = ModelFactory.build(dataset.task, model_parameters, library=library)
 
     trainer.init(model, subsets_used=parameters['subsets_used'])
     trainer.train(model)
@@ -202,7 +208,7 @@ if __name__ == "__main__":
     parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
     parser.add_argument('--save_experiment_configuration', nargs='+', default=None, help='Save the experiment parameters specified in the command line in a file. Args: {{stage_num}} {{name}}')
     parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
-    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none, similarity, kmeans.')
+    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none, similarity, kmeans, ensemble.')
     parser.add_argument('--overwrite', action='store_true', default=DEFAULT_OVERWRITE, help='Overwrite the experiment id')
     args = parser.parse_args()
 
@@ -213,7 +219,7 @@ if __name__ == "__main__":
     else:
         parameters = args.__dict__
 
-    if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity', 'kmeans']:
+    if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity', 'kmeans', 'ensemble']:
         raise ValueError('Specified extraction strategy {} is not supported.'.format(parameters.extraction_strategy))
 
     pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)
diff --git a/scripts/run_stage5_experiments_ensemble.sh b/scripts/run_stage5_experiments_ensemble.sh
new file mode 100755
index 0000000000000000000000000000000000000000..7387a97abae531c34a47a038ccc964a787eebcfe
--- /dev/null
+++ b/scripts/run_stage5_experiments_ensemble.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+core_number=1
+walltime=5:00
+seeds='1 2 3'
+
+for dataset in california_housing # kin8nm kr-vs-kp spambase steel-plates diabetes diamonds boston california_housing
+do
+    #oarsub -p "(gpu is null)" -l /core=5,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=1 --models_dir=models/$dataset/stage5_ensemble --subsets_used train+dev,train+dev"
+    #oarsub -p "(gpu is null)" -l /core=5,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=2 --models_dir=models/$dataset/stage5_ensemble --subsets_used train+dev,train+dev"
+    #oarsub -p "(gpu is null)" -l /core=5,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=omp --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=3 --models_dir=models/$dataset/stage5_ensemble --subsets_used train+dev,train+dev"
+    oarsub -p "(gpu is null)" -l /core=5,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=ensemble --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=4 --models_dir=models/$dataset/stage5_ensemble --subsets_used train+dev,train+dev"
+done