diff --git a/.env.example b/.env.example
deleted file mode 100644
index 9ca543b382b4889be1d93ed2065ef234b789153c..0000000000000000000000000000000000000000
--- a/.env.example
+++ /dev/null
@@ -1,12 +0,0 @@
-# Environment variables go here, can be read by `python-dotenv` package:
-#
-#   `src/script.py`
-#   ----------------------------------------------------------------
-#    import dotenv
-#
-#    project_dir = os.path.join(os.path.dirname(__file__), os.pardir)
-#    dotenv_path = os.path.join(project_dir, '.env')
-#    dotenv.load_dotenv(dotenv_path)
-#   ----------------------------------------------------------------
-
-project_dir = "."
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 5d8638d3bfb7c3a28756fe87a9cc84aed2cdb5e7..ed07278aa03dbf293f143b22d927fa9f08876edb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -371,3 +371,6 @@ TSWLatexianTemp*
 *.lpz
 
 reports/*.pdf
+
+# Image
+*.png
diff --git a/TODO.md b/TODO.md
index bfb32e8a131b5147b36c9ccba729a6e13e04e5b7..5ea6cc5cf2c933eed2e7ffbf2567d4fe812412cf 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,8 +1,7 @@
-* Trouver des jeux de données pertinents
-* Entraîner et tester des forêts de différentes tailles
-* Entraîner et tester en regression et classification
-* Entraîner et tester sur différentes modalités (pas seulement des datasets d'images)
-* Entraîner avec différents hyperparamètres (d, profondeur, epsilon)
-* Appliquer OMP avec différentes valeurs de k (notamment un petit k)
-* Faire des figures
-* Implémenter et comparer les systèmes concurrents
\ No newline at end of file
+* Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
+* Fix ModelFactory.load function.
+* Fix model results loading in compute_results.py.
+* Check that omp multiclasses classifier is working as expected.
+* In the bayesian search computation, output a different file name depending on the task of the trained model.
+* Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
+* Prepare the json experiment files to run.
\ No newline at end of file
diff --git a/code/bolsonaro/data/__init__.py b/code/bolsonaro/data/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ce8e424d0cc66e3f349fcafd744216d818bacaa5 100644
--- a/code/bolsonaro/data/__init__.py
+++ b/code/bolsonaro/data/__init__.py
@@ -0,0 +1,3 @@
+import os
+
+LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
diff --git a/code/bolsonaro/data/dataset.py b/code/bolsonaro/data/dataset.py
index b2f67c489be94791397649ba292a49f65dd604d8..7108eb5781e1ef2b69926e9fd2239deaa81f44e2 100644
--- a/code/bolsonaro/data/dataset.py
+++ b/code/bolsonaro/data/dataset.py
@@ -1,9 +1,8 @@
 class Dataset(object):
 
-    def __init__(self, task, dataset_parameters, X_train, X_dev, X_test, y_train,
+    def __init__(self, task, X_train, X_dev, X_test, y_train,
         y_dev, y_test):
         self._task = task
-        self._dataset_parameters = dataset_parameters
         self._X_train = X_train
         self._X_dev = X_dev
         self._X_test = X_test
diff --git a/code/bolsonaro/data/dataset_loader.py b/code/bolsonaro/data/dataset_loader.py
index d706b7a07751f715b6398b1f451ec9f337d00f60..b22ecaa83f6e69610c82796e068e91db23e83646 100644
--- a/code/bolsonaro/data/dataset_loader.py
+++ b/code/bolsonaro/data/dataset_loader.py
@@ -1,7 +1,9 @@
 from bolsonaro.data.dataset import Dataset
 from bolsonaro.data.task import Task
+from bolsonaro.utils import change_binary_func_load
 
-from sklearn.datasets import load_boston, load_iris, load_diabetes, load_digits, load_linnerud, load_wine, load_breast_cancer
+from sklearn.datasets import load_boston, load_iris, load_diabetes, \
+    load_digits, load_linnerud, load_wine, load_breast_cancer
 from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
     fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \
     fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
@@ -19,45 +21,46 @@ class DatasetLoader(object):
             task = Task.REGRESSION
         elif name == 'iris':
             dataset_loading_func = load_iris
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'diabetes':
             dataset_loading_func = load_diabetes
             task = Task.REGRESSION
         elif name == 'digits':
             dataset_loading_func = load_digits
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'linnerud':
             dataset_loading_func = load_linnerud
             task = Task.REGRESSION
         elif name == 'wine':
             dataset_loading_func = load_wine
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'breast_cancer':
-            dataset_loading_func = load_breast_cancer
-            task = Task.CLASSIFICATION
-        elif name == 'olivetti_faces':
+            dataset_loading_func = change_binary_func_load(load_breast_cancer)
+            task = Task.BINARYCLASSIFICATION
+        elif name == 'olivetti_faces':  # bug (no return X_y)
             dataset_loading_func = fetch_olivetti_faces
-            task = Task.CLASSIFICATION
-        elif name == '20newsgroups':
+            task = Task.MULTICLASSIFICATION
+        elif name == '20newsgroups':  # bug (no return X_y)
             dataset_loading_func = fetch_20newsgroups
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == '20newsgroups_vectorized':
             dataset_loading_func = fetch_20newsgroups_vectorized
-            task = Task.CLASSIFICATION
-        elif name == 'lfw_people':
+            task = Task.MULTICLASSIFICATION
+        elif name == 'lfw_people':  # needs PIL (image dataset)
             dataset_loading_func = fetch_lfw_people
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'lfw_pairs':
             dataset_loading_func = fetch_lfw_pairs
+            task = Task.MULTICLASSIFICATION
         elif name == 'covtype':
             dataset_loading_func = fetch_covtype
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'rcv1':
             dataset_loading_func = fetch_rcv1
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'kddcup99':
             dataset_loading_func = fetch_kddcup99
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'california_housing':
             dataset_loading_func = fetch_california_housing
             task = Task.REGRESSION
@@ -87,5 +90,5 @@ class DatasetLoader(object):
             X_dev = scaler.transform(X_dev)
             X_test = scaler.transform(X_test)
 
-        return Dataset(task, dataset_parameters, X_train,
+        return Dataset(task, X_train,
             X_dev, X_test, y_train, y_dev, y_test)
diff --git a/code/bolsonaro/data/dataset_parameters.py b/code/bolsonaro/data/dataset_parameters.py
index 9854a75eb27ec83990b8ae032c58e7dec52a5e8c..88054257d4241ae0426c16ebaca1cb4985c3b65f 100644
--- a/code/bolsonaro/data/dataset_parameters.py
+++ b/code/bolsonaro/data/dataset_parameters.py
@@ -15,11 +15,11 @@ class DatasetParameters(object):
     @property
     def name(self):
         return self._name
-    
+
     @property
     def test_size(self):
         return self._test_size
-    
+
     @property
     def dev_size(self):
         return self._dev_size
diff --git a/code/bolsonaro/data/task.py b/code/bolsonaro/data/task.py
index 2f47fa22f472f769c075f40e1c25a7bf3de45f0d..f1214a64a27873e49f5dbbcb853e4f65f9b07f68 100644
--- a/code/bolsonaro/data/task.py
+++ b/code/bolsonaro/data/task.py
@@ -2,5 +2,6 @@ from enum import Enum
 
 
 class Task(Enum):
-    CLASSIFICATION = 1
+    BINARYCLASSIFICATION = 1
     REGRESSION = 2
+    MULTICLASSIFICATION = 3
diff --git a/code/bolsonaro/hyperparameter_searcher.py b/code/bolsonaro/hyperparameter_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..7884d2d4271203e9ebee1e804baa7c1e94a76770
--- /dev/null
+++ b/code/bolsonaro/hyperparameter_searcher.py
@@ -0,0 +1,47 @@
+'''
+This module is used to find the best hyperparameters for a given dataset.
+'''
+
+from bolsonaro.data.dataset_parameters import DatasetParameters
+from bolsonaro.data.dataset_loader import DatasetLoader
+from bolsonaro.data.task import Task
+from bolsonaro.error_handling.logger_factory import LoggerFactory
+from . import LOG_PATH
+
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from skopt import BayesSearchCV
+
+
+class HyperparameterSearcher(object):
+
+    def __init__(self):
+        self._logger = LoggerFactory.create(LOG_PATH, __name__)
+
+    def search(self, dataset, hyperparameter_space, n_iter, cv,
+               random_seed, scorer, verbose=False):
+        '''
+        For a given dataset and the space of hyperparameters, does a
+        bayesian hyperparameters search.
+        :input dataset: a Dataset object
+        :input hyperparameter_space: a dictionnary, keys are hyperparameters,
+        value their spaces defined with skopt
+        :input n_iter: the number of iterations of the bayesian search
+        :input cv: the size of the cross validation
+        :input random_seed: int, the seed for the bayesian search
+        :input scorer: str, the name of the scorer
+        :input verbose: bool, print state of the research
+        :return: a skopt.searchcv.BayesSearchCV object
+        '''
+
+        if dataset.task == Task.REGRESSION:
+            estimator = RandomForestRegressor(n_jobs=-1, random_state=random_seed)
+        else:
+            estimator = RandomForestClassifier(n_jobs=-1, random_state=random_seed)
+
+        opt = BayesSearchCV(estimator, hyperparameter_space, n_iter=n_iter,
+                            cv=cv, n_jobs=-1, random_state=random_seed,
+                            scoring=scorer, verbose=verbose)
+
+        opt.fit(dataset.X_train, dataset.y_train)
+
+        return opt
diff --git a/code/bolsonaro/models/__init__.py b/code/bolsonaro/models/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ce8e424d0cc66e3f349fcafd744216d818bacaa5 100644
--- a/code/bolsonaro/models/__init__.py
+++ b/code/bolsonaro/models/__init__.py
@@ -0,0 +1,3 @@
+import os
+
+LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
diff --git a/code/bolsonaro/models/model_factory.py b/code/bolsonaro/models/model_factory.py
index fb6b32cb26727d2221367f208598f04e1a19dfb1..a93e6090e253dc9bdb3aacfc53e1c99a1f9ef120 100644
--- a/code/bolsonaro/models/model_factory.py
+++ b/code/bolsonaro/models/model_factory.py
@@ -1,4 +1,4 @@
-from bolsonaro.models.omp_forest_classifier import OmpForestClassifier
+from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
 from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
 from bolsonaro.data.task import Task
 from bolsonaro.models.model_parameters import ModelParameters
@@ -11,18 +11,22 @@ class ModelFactory(object):
 
     @staticmethod
     def build(task, model_parameters):
-        if task == Task.CLASSIFICATION:
-            model_func = OmpForestClassifier
+        if task == Task.BINARYCLASSIFICATION:
+            model_func = OmpForestBinaryClassifier
         elif task == Task.REGRESSION:
             model_func = OmpForestRegressor
+        elif task == Task.MULTICLASSIFICATION:
+            model_func = OmpForestMulticlassClassifier
         else:
             raise ValueError("Unsupported task '{}'".format(task))
         return model_func(model_parameters)
 
     @staticmethod
     def load(task, directory_path, experiment_id, model_raw_results):
+        raise NotImplementedError
         model_parameters = ModelParameters.load(directory_path, experiment_id)
         model = ModelFactory.build(task, model_parameters)
-        model.set_forest(model_raw_results.forest)
-        model.set_weights(model_raw_results.weights)
+        # todo faire ce qu'il faut ici pour rétablir correctement le modèle
+        model.set_forest(model_raw_results.model_object.forest)
+        model.set_weights(model_raw_results.model_object.weights)
         return model
diff --git a/code/bolsonaro/models/model_parameters.py b/code/bolsonaro/models/model_parameters.py
index 450d97bc01fe8f02ccd399d2c47a0c7397b4cb13..31a451b70578835fe4663508de9e15f99bf6cc19 100644
--- a/code/bolsonaro/models/model_parameters.py
+++ b/code/bolsonaro/models/model_parameters.py
@@ -5,16 +5,13 @@ import os
 
 class ModelParameters(object):
 
-    def __init__(self, forest_size, extracted_forest_size, normalize_D, use_dev_subset, seed=None):
-        self._forest_size = forest_size
+    def __init__(self, extracted_forest_size, normalize_D, subsets_used, normalize_weights, seed, hyperparameters):
         self._extracted_forest_size = extracted_forest_size
         self._normalize_D = normalize_D
-        self._use_dev_subset = use_dev_subset
+        self._subsets_used = subsets_used
+        self._normalize_weights = normalize_weights
         self._seed = seed
-
-    @property
-    def forest_size(self):
-        return self._forest_size
+        self._hyperparameters = hyperparameters
 
     @property
     def extracted_forest_size(self):
@@ -25,13 +22,21 @@ class ModelParameters(object):
         return self._normalize_D
 
     @property
-    def use_dev_subset(self):
-        return self._use_dev_subset
+    def subsets_used(self):
+        return self._subsets_used
+
+    @property
+    def normalize_weights(self):
+        return self._normalize_weights
 
     @property
     def seed(self):
         return self._seed
 
+    @property
+    def hyperparameters(self):
+        return self._hyperparameters
+
     def save(self, directory_path, experiment_id):
         save_obj_to_json(directory_path + os.sep + 'model_parameters_{}.json'.format(experiment_id),
             self.__dict__)
diff --git a/code/bolsonaro/models/model_raw_results.py b/code/bolsonaro/models/model_raw_results.py
index 673cb0fc65b7378e95c03b186d246cb70b384a07..df8b2ec0b10704a8a8c397b9012298e8b901e14b 100644
--- a/code/bolsonaro/models/model_raw_results.py
+++ b/code/bolsonaro/models/model_raw_results.py
@@ -6,13 +6,12 @@ import datetime
 
 class ModelRawResults(object):
 
-    def __init__(self, forest, weights, training_time,
+    def __init__(self, model_object, training_time,
         datetime, train_score, dev_score, test_score,
         score_metric, train_score_regressor, dev_score_regressor,
         test_score_regressor):
 
-        self._forest = forest
-        self._weights = weights
+        self._model_object = model_object
         self._training_time = training_time
         self._datetime = datetime
         self._train_score = train_score
@@ -24,12 +23,8 @@ class ModelRawResults(object):
         self._test_score_regressor = test_score_regressor
     
     @property
-    def forest(self):
-        return self._forest
-
-    @property
-    def weights(self):
-        return self._weights
+    def model_object(self):
+        return self.model_object
 
     @property
     def training_time(self):
diff --git a/code/bolsonaro/models/omp_forest.py b/code/bolsonaro/models/omp_forest.py
new file mode 100644
index 0000000000000000000000000000000000000000..2da0beab64ef5361efbde6d6197f957fe627886c
--- /dev/null
+++ b/code/bolsonaro/models/omp_forest.py
@@ -0,0 +1,121 @@
+from bolsonaro import LOG_PATH
+from bolsonaro.error_handling.logger_factory import LoggerFactory
+
+from abc import abstractmethod, ABCMeta
+import numpy as np
+from sklearn.linear_model import OrthogonalMatchingPursuit
+from sklearn.base import BaseEstimator
+
+
+class OmpForest(BaseEstimator, metaclass=ABCMeta):
+    def __init__(self, models_parameters, base_forest_estimator):
+        self._base_forest_estimator = base_forest_estimator
+        self._models_parameters = models_parameters
+        self._logger = LoggerFactory.create(LOG_PATH, __name__)
+
+    @property
+    def models_parameters(self):
+        return self._models_parameters
+
+    def score_base_estimator(self, X, y):
+        return self._base_forest_estimator.score(X, y)
+
+    def _base_estimator_predictions(self, X):
+        return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
+
+    @property
+    def forest(self):
+        return self._base_forest_estimator.estimators_
+
+    # sklearn baseestimator api methods
+    def fit(self, X_forest, y_forest, X_omp, y_omp):
+        self._base_forest_estimator.fit(X_forest, y_forest)
+        self._extract_subforest(X_omp, y_omp)  # type: OrthogonalMatchingPursuit
+        return self
+
+    def _extract_subforest(self, X, y):
+        """
+        Given an already estimated regressor: apply OMP to get the weight of each tree.
+
+        The X data is used for interrogation of every tree in the forest. The y data
+        is used for finding the weights in OMP.
+
+        :param X: (n_sample, n_features) array
+        :param y: (n_sample,) array
+        :return:
+        """
+        self._logger.debug("Forest make prediction on X")
+        D = self._base_estimator_predictions(X)
+
+        if self._models_parameters.normalize_D:
+            # question: maybe consider other kinds of normalization.. centering?
+            self._logger.debug("Compute norm of predicted vectors on X")
+            self._forest_norms = np.linalg.norm(D, axis=0)
+            D /= self._forest_norms
+
+        self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
+                           .format(self._models_parameters.extracted_forest_size))
+
+        self.fit_omp(D, y)
+
+    @staticmethod
+    def _make_omp_weighted_prediction(base_predictions, omp_obj, normalize_weights=False):
+        if normalize_weights:
+            # we can normalize weights (by their sum) so that they sum to 1
+            # and they can be interpreted as impact percentages for interpretability.
+            # this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef)
+
+            # question: je comprend pas le truc avec nonszero?
+            # predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
+            coef_signs = np.sign(omp_obj.coef_)[np.newaxis, :]  # add axis to make sure it will be broadcasted line-wise (there might be a confusion when forest_prediction is square)
+            unsigned_coef = (coef_signs * omp_obj.coef_).squeeze()
+            intercept = omp_obj.intercept_
+
+            adjusted_forest_predictions = base_predictions * coef_signs
+            predictions = adjusted_forest_predictions.dot(unsigned_coef) + intercept
+
+        else:
+            predictions = omp_obj.predict(base_predictions)
+
+        return predictions
+
+    @abstractmethod
+    def fit_omp(self, atoms, objective):
+        pass
+
+    @abstractmethod
+    def predict(self, X):
+        pass
+
+    @abstractmethod
+    def score(self, X, y):
+        pass
+
+class SingleOmpForest(OmpForest):
+    def __init__(self, models_parameters, base_forest_estimator):
+        # fit_intercept shouldn't be set to False as the data isn't necessarily centered here
+        # normalization is handled outsite OMP
+        self._omp = OrthogonalMatchingPursuit(
+            n_nonzero_coefs=models_parameters.extracted_forest_size,
+            fit_intercept=True, normalize=False)
+
+        super().__init__(models_parameters, base_forest_estimator)
+
+    def fit_omp(self, atoms, objective):
+        self._omp.fit(atoms, objective)
+
+    def predict(self, X):
+        """
+        Apply the SingleOmpForest to X.
+
+        Make all the base tree predictions then apply the OMP weights for pruning.
+
+        :param X:
+        :return:
+        """
+        forest_predictions = self._base_estimator_predictions(X)
+
+        if self._models_parameters.normalize_D:
+            forest_predictions /= self._forest_norms
+
+        return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)
diff --git a/code/bolsonaro/models/omp_forest_classifier.py b/code/bolsonaro/models/omp_forest_classifier.py
index 12cc23fab69fc0b79ff40b1d6957db5532a8c452..128347aa61caf79dc908397ef0588d646d8b0dee 100644
--- a/code/bolsonaro/models/omp_forest_classifier.py
+++ b/code/bolsonaro/models/omp_forest_classifier.py
@@ -1,11 +1,106 @@
-from sklearn.base import BaseEstimator
+from bolsonaro.models.omp_forest import OmpForest, SingleOmpForest
+from bolsonaro.utils import binarize_class_data
+
+import numpy as np
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import OrthogonalMatchingPursuit
+
+
+class OmpForestBinaryClassifier(SingleOmpForest):
+
+    DEFAULT_SCORE_METRIC = 'indicator'
+
+    def __init__(self, models_parameters):
+        estimator = RandomForestClassifier(**models_parameters.hyperparameters,
+                                           random_state=models_parameters.seed, n_jobs=-1)
+        super().__init__(models_parameters, estimator)
+
+    def _check_classes(self, y):
+        assert len(set(y).difference({-1, 1})) == 0, "Classes for binary classifier must be {-1, +1}"
+
+    def fit(self, X_forest, y_forest, X_omp, y_omp):
+        self._check_classes(y_forest)
+        self._check_classes(y_omp)
+
+        return super().fit(X_forest, y_forest, X_omp, y_omp)
+
+    def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
+        """
+        Evaluate OMPForestClassifer on (`X`, `y`) using `metric`
+
+        :param X:
+        :param y:
+        :param metric: might be "indicator"
+        :return:
+        """
+        predictions = self.predict(X)
+
+        if metric == 'indicator':
+            evaluation = np.abs(np.mean(np.abs(np.sign(predictions) - y) - 1))
+        else:
+            raise ValueError("Unsupported metric '{}'.".format(metric))
+
+        return evaluation
+
+
+class OmpForestMulticlassClassifier(OmpForest):
+
+    DEFAULT_SCORE_METRIC = 'indicator'
+
+    def __init__(self, models_parameters):
+        estimator = RandomForestClassifier(**models_parameters.hyperparameters,
+                                           random_state=models_parameters.seed, n_jobs=-1)
+        super().__init__(models_parameters, estimator)
+        # question: peut-être initialiser les omps dans le __init__? comme pour le SingleOmpForest
+        self._dct_class_omp = {}
+
+    def fit_omp(self, atoms, objective):
+        assert len(self._dct_class_omp) == 0, "fit_omp can be called only once on {}".format(self.__class__.__name__)
+        possible_classes = sorted(set(objective))
+        for class_label in possible_classes:
+            atoms_binary = binarize_class_data(atoms, class_label, inplace=False)
+            objective_binary = binarize_class_data(objective, class_label, inplace=False)
+            # todo peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP...
+            omp_class = OrthogonalMatchingPursuit(
+                n_nonzero_coefs=self.models_parameters.extracted_forest_size,
+                fit_intercept=True, normalize=False)
+            omp_class.fit(atoms_binary, objective_binary)
+            self._dct_class_omp[class_label] = omp_class
+        return self._dct_class_omp
+
+    def predict(self, X):
+        forest_predictions = self._base_estimator_predictions(X)
+
+        if self._models_parameters.normalize_D:
+            forest_predictions /= self._forest_norms
+
+        label_names = []
+        preds = []
+        for class_label, omp_class in self._dct_class_omp.items():
+            label_names.append(class_label)
+            atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False)
+            preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
+
+        # todo verifier que ce n'est pas bugué ici
+
+        preds = np.array(preds).T
+        max_preds = np.argmax(preds, axis=1)
+        return np.array(label_names)[max_preds]
+
+    def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
+        predictions = self.predict(X)
 
+        if metric == 'indicator':
+            evaluation = np.sum(np.ones_like(predictions)[predictions == y]) / X.shape[0]
+        else:
+            raise ValueError("Unsupported metric '{}'.".format(metric))
 
-class OmpForestClassifier(BaseEstimator):
+        return evaluation
 
-    def __init__(self):
-        raise ValueError('Classification tasks are not supported for now')
 
-    def fit(self, X, y):
-        pass
+if __name__ == "__main__":
+    forest = RandomForestClassifier(n_estimators=10)
+    X = np.random.rand(10, 5)
+    y = np.random.choice([-1, +1], 10)
+    forest.fit(X, y)
+    print(forest.predict(np.random.rand(10, 5)))
\ No newline at end of file
diff --git a/code/bolsonaro/models/omp_forest_regressor.py b/code/bolsonaro/models/omp_forest_regressor.py
index 50754246abc13e0282e4cbd6aa1e917a0e3544ed..a0c8b4708d52336bf39544ffd0b66c527466620a 100644
--- a/code/bolsonaro/models/omp_forest_regressor.py
+++ b/code/bolsonaro/models/omp_forest_regressor.py
@@ -1,64 +1,18 @@
-from bolsonaro import LOG_PATH
-from bolsonaro.error_handling.logger_factory import LoggerFactory
+from bolsonaro.models.omp_forest import SingleOmpForest
 
 from sklearn.ensemble import RandomForestRegressor
-from sklearn.linear_model import OrthogonalMatchingPursuit
-from sklearn.base import BaseEstimator
 import numpy as np
 
 
-class OmpForestRegressor(BaseEstimator):
+class OmpForestRegressor(SingleOmpForest):
 
     DEFAULT_SCORE_METRIC = 'mse'
 
     def __init__(self, models_parameters):
-        self._regressor = RandomForestRegressor(n_estimators=models_parameters.forest_size,
-            random_state=models_parameters.seed)
-        self._models_parameters = models_parameters
-        self._logger = LoggerFactory.create(LOG_PATH, __name__)
+        estimator = RandomForestRegressor(**models_parameters.hyperparameters,
+                                          random_state=models_parameters.seed, n_jobs=-1)
 
-    @property
-    def forest(self):
-        return self._forest
-
-    def set_forest(self, forest):
-        self._forest = forest
-        self._regressor.estimators_ = forest
-
-    @property
-    def weights(self):
-        return self._weights
-
-    def set_weights(self, weights):
-        self._weights = weights
-
-    @property
-    def models_parameters(self):
-        return self._models_parameters
-
-    def fit(self, X_forest, y_forest, X_omp, y_omp):
-        self._forest = self._train_forest(X_forest, y_forest)
-        self._weights = self._extract_subforest(X_omp, y_omp)
-        return self
-
-    def score_regressor(self, X, y):
-        return self._regressor.score(X, y)
-
-    def predict(self, X):
-        """
-        Apply the OMPForestRegressor to X.
-
-        :param X:
-        :return:
-        """
-        D = self._forest_prediction(X)
-
-        if self._models_parameters.normalize_D:
-            D /= self._forest_norms
-
-        predictions = D @ self.weights
-
-        return predictions
+        super().__init__(models_parameters, estimator)
 
     def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
         """
@@ -77,41 +31,3 @@ class OmpForestRegressor(BaseEstimator):
             raise ValueError("Unsupported metric '{}'.".format(metric))
 
         return evaluation
-
-    def _train_forest(self, X, y):
-        self._regressor.fit(X, y)
-        forest = self._regressor.estimators_
-        return forest
-    
-    def _extract_subforest(self, X, y):
-        """
-        Given an already estimated regressor: apply OMP to get the weight of each tree.
-
-        The X data is used for interrogation of every tree in the forest. The y data
-        is used for finding the weights in OMP.
-
-        :param X: (n_sample, n_features) array
-        :param y: (n_sample,) array
-        :return:
-        """
-        self._logger.debug("Forest make prediction on X")
-        D = self._forest_prediction(X)
-
-        if self._models_parameters.normalize_D:
-            # question: maybe consider other kinds of normalization
-            self._logger.debug("Compute norm of predicted vectors on X")
-            self._forest_norms = np.linalg.norm(D, axis=0)
-            D /= self._forest_norms
-
-        omp = OrthogonalMatchingPursuit(
-            n_nonzero_coefs=self._models_parameters.extracted_forest_size,
-            fit_intercept=False, normalize=False)
-        self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
-                           .format(self._models_parameters.extracted_forest_size))
-        omp.fit(D, y)
-        weights = omp.coef_
-        # question: why not to use directly the omp estimator instead of bypassing it using the coefs?
-        return weights
-
-    def _forest_prediction(self, X):
-        return np.array([tree.predict(X) for tree in self._forest]).T
diff --git a/code/bolsonaro/trainer.py b/code/bolsonaro/trainer.py
index 01d0a036c8058cb55f947c0d80645b5619da0cdb..4a32bffb8c7a2e129bf6f010c8a2c3a339a53a4b 100644
--- a/code/bolsonaro/trainer.py
+++ b/code/bolsonaro/trainer.py
@@ -8,46 +8,78 @@ import numpy as np
 
 
 class Trainer(object):
+    """
+    Class capable of fitting any model object to some prepared data then evaluate and save results through the `train` method.
+    """
 
     def __init__(self, dataset):
+        """
+
+        :param dataset: Object with X_train, y_train, X_dev, y_dev, X_test and Y_test attributes
+        """
         self._dataset = dataset
         self._logger = LoggerFactory.create(LOG_PATH, __name__)
 
-    def train(self, model, models_dir):
-        self._logger.debug('Training model using train set...')
-        begin_time = time.time()
-
-        if model.models_parameters.use_dev_subset:
-            X_forest = self._dataset.X_train
-            y_forest = self._dataset.y_train
-            X_omp = self._dataset.X_dev
-            y_omp = self._dataset.y_dev
+    def init(self, model):
+        if model.models_parameters.subsets_used == 'train,dev':
+            self._X_forest = self._dataset.X_train
+            self._y_forest = self._dataset.y_train
+            self._X_omp = self._dataset.X_dev
+            self._y_omp = self._dataset.y_dev
             self._logger.debug('Fitting the forest on train subset and OMP on dev subset.')
-        else:
-            X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
-            X_omp = X_forest
-            y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
-            y_omp = y_forest
+        elif model.models_parameters.subsets_used == 'train+dev,train+dev':
+            self._X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
+            self._X_omp = self._X_forest
+            self._y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
+            self._y_omp = self._y_forest
             self._logger.debug('Fitting both the forest and OMP on train+dev subsets.')
+        elif model.models_parameters.subsets_used == 'train,train+dev':
+            self._X_forest = self._dataset.X_train
+            self._y_forest = self._dataset.y_train
+            self._X_omp = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
+            self._y_omp = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
+        else:
+            raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
 
+    def train(self, model):
+        """
+        :param model: Object with
+        :return:
+        """
+        
+        self._logger.debug('Training model using train set...')
+        self._begin_time = time.time()
         model.fit(
-            X_forest=X_forest,
-            y_forest=y_forest,
-            X_omp=X_omp,
-            y_omp=y_omp
+            X_forest=self._X_forest,
+            y_forest=self._y_forest,
+            X_omp=self._X_omp,
+            y_omp=self._y_omp
         )
-        end_time = time.time()
+        self._end_time = time.time()
 
-        ModelRawResults(
-            forest=model.forest,
-            weights=model.weights,
-            training_time=end_time - begin_time,
+    def compute_results(self, model, models_dir):
+        """
+        :param model: Object with
+        :param models_dir: Where the results will be saved
+        """
+        results = ModelRawResults(
+            model_object=model,
+            training_time=self._end_time - self._begin_time,
             datetime=datetime.datetime.now(),
             train_score=model.score(self._dataset.X_train, self._dataset.y_train),
             dev_score=model.score(self._dataset.X_dev, self._dataset.y_dev),
             test_score=model.score(self._dataset.X_test, self._dataset.y_test),
             score_metric=model.DEFAULT_SCORE_METRIC, # TODO: resolve the used metric in a proper way
-            train_score_regressor=model.score_regressor(self._dataset.X_train, self._dataset.y_train),
-            dev_score_regressor=model.score_regressor(self._dataset.X_dev, self._dataset.y_dev),
-            test_score_regressor=model.score_regressor(self._dataset.X_test, self._dataset.y_test)
-        ).save(models_dir)
+            train_score_regressor=model.score_base_estimator(self._dataset.X_train, self._dataset.y_train),
+            dev_score_regressor=model.score_base_estimator(self._dataset.X_dev, self._dataset.y_dev),
+            test_score_regressor=model.score_base_estimator(self._dataset.X_test, self._dataset.y_test)
+        )
+        results.save(models_dir)
+        self._logger.info("Base performance on test: {}".format(results.test_score_regressor))
+        self._logger.info("Performance on test: {}".format(results.test_score))
+
+        self._logger.info("Base performance on train: {}".format(results.train_score_regressor))
+        self._logger.info("Performance on train: {}".format(results.train_score))
+
+        self._logger.info("Base performance on dev: {}".format(results.dev_score_regressor))
+        self._logger.info("Performance on dev: {}".format(results.dev_score))
diff --git a/code/bolsonaro/utils.py b/code/bolsonaro/utils.py
index 82e501878ba06320914230096213d2d28548e4dc..d7509ad9e85cde3cc0c649f85cfb5b60ead9a854 100644
--- a/code/bolsonaro/utils.py
+++ b/code/bolsonaro/utils.py
@@ -1,6 +1,7 @@
 import os
 import json
 import pickle
+from copy import deepcopy
 
 
 def resolve_experiment_id(models_dir):
@@ -45,3 +46,30 @@ def load_obj_from_pickle(file_path, constructor):
     with open(file_path, 'rb') as input_file:
         parameters = pickle.load(input_file)
     return constructor(**parameters)
+
+def binarize_class_data(data, class_pos, inplace=True):
+    """
+    Replace class_pos by +1 and ~class_pos by -1.
+
+    :param data: an array of classes
+    :param class_pos: the positive class to be replaced by +1
+    :param inplace: If True, modify data in place (still return it, also)
+    :return:
+    """
+    if not inplace:
+        data = deepcopy(data)
+
+    position_class_labels = (data == class_pos)
+    data[~(position_class_labels)] = -1
+    data[(position_class_labels)] = +1
+
+    return data
+
+def change_binary_func_load(base_load_function):
+    def func_load(return_X_y):
+        X, y = base_load_function(return_X_y=return_X_y)
+        possible_classes = sorted(set(y))
+        assert len(possible_classes) == 2, "Function change binary_func_load only work for binary classfication"
+        y = binarize_class_data(y, possible_classes[-1])
+        return X, y
+    return func_load
diff --git a/code/bolsonaro/visualization/plotter.py b/code/bolsonaro/visualization/plotter.py
index 9d7058732970fb5981ef04ce7a56e022ee68d5a9..0d5706bc27cb0745fe065456231b7e3023707ac9 100644
--- a/code/bolsonaro/visualization/plotter.py
+++ b/code/bolsonaro/visualization/plotter.py
@@ -1,26 +1,51 @@
 import matplotlib.pyplot as plt
 import numpy as np
-from sklearn.neighbors.kde import KernelDensity
+import pandas as pd
 
 
 class Plotter(object):
 
     @staticmethod
-    def weight_density(weights, X, file_path):
-        X_plot = [np.exp(elem) for elem in weights]
-        fig, ax = plt.subplots()
+    def weight_density(all_experiment_weights, file_path):
+        """
+        Function that creates the figure with the density of the weights
+        :param all_experiment_weights: The weights for the different experiments
+        :param file path: str, path where the figure will be saved
+        TODO: colored by seed number or not?
+        TODO: represents both the seed AND the extracted tree information in the legend
+        """
 
-        for kernel in ['gaussian', 'tophat', 'epanechnikov']:
-            kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X_plot)
-            log_dens = kde.score_samples(X_plot)
-            ax.plot(X_plot, np.exp(log_dens), '-',
-                    label="kernel = '{0}'".format(kernel))
+        """
+        Convert dictionnary of structure
+        {seed_1:[M x W]], seed_k:[M x W]}
+        to numpy.ndarray with dim [K x M x W]
+        where K is the seed number, M is the
+        number of extracted trees and W the
+        weight number.
+        """
+        all_experiment_weights = np.array(list(all_experiment_weights.values()))
+
+        n = len(all_experiment_weights)
 
-        ax.legend(loc='upper left')
-        ax.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), '+k')
+        """
+        Get as many different colors from the specified cmap (here nipy_spectral)
+        as there are seeds used.
+        """
+        colors = Plotter.get_colors_from_cmap(n)
+
+        fig, ax = plt.subplots()
+        # For each seed
+        for i in range(n):
+            # For each weight set of a given extracted tree number
+            for weights in all_experiment_weights[i]:
+                """
+                Plot the series of weights that aren't zero,
+                colored by seed number.
+                """
+                pd.Series(weights[np.nonzero(weights)]).plot.kde(
+                    figsize=(15, 10), ax=ax, color=colors[i])
 
-        ax.set_xlim(-4, 9)
-        ax.set_ylim(-0.02, 0.4)
+        ax.set_title('Density weights of the OMP')
         fig.savefig(file_path, dpi=fig.dpi)
         plt.close(fig)
 
diff --git a/code/compute_hyperparameters.py b/code/compute_hyperparameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f7aa3a666d61618a3a5d50b1de8e996c235034c
--- /dev/null
+++ b/code/compute_hyperparameters.py
@@ -0,0 +1,97 @@
+from bolsonaro import LOG_PATH
+from bolsonaro.data.dataset_loader import DatasetLoader
+from bolsonaro.data.dataset_parameters import DatasetParameters
+from bolsonaro.data.task import Task
+from bolsonaro.error_handling.logger_factory import LoggerFactory
+from bolsonaro.hyperparameter_searcher import HyperparameterSearcher
+from bolsonaro.utils import save_obj_to_json
+
+import argparse
+import os
+import pathlib
+import pickle
+import random
+from dotenv import find_dotenv, load_dotenv
+
+"""
+I had to install skopt from this repository
+https://github.com/darenr/scikit-optimize that handles
+the issue described here https://github.com/scikit-optimize/scikit-optimize/issues/762.
+"""
+from skopt.space import Categorical, Integer, Real
+
+
+def clean_numpy_int_dict(dictionary):
+    return dict([a, int(x)] if type(x) == Integer else
+                [a, clean_numpy_int_dict(x)] if type(x) == dict else
+                [a, clean_numpy_int_list(x)] if type(x) == list else [a, (x)]
+                for a, x in dictionary.items())
+
+
+def clean_numpy_int_list(list_n):
+    return [int(elem) if type(elem) == Integer else
+            clean_numpy_int_dict(elem) if type(elem) == dict else
+            clean_numpy_int_list(elem) if type(elem) == list else elem
+            for elem in list_n]
+
+
+if __name__ == "__main__":
+    # get environment variables in .env
+    load_dotenv(find_dotenv('.env'))
+
+    DEFAULT_CV = 3
+    DEFAULT_N_ITER = 50
+    DICT_PARAM_SPACE = {'n_estimators': Integer(10, 1000),
+                        'min_samples_leaf': Integer(1, 1000),
+                        'max_depth': Integer(1, 20),
+                        'max_features': Categorical(['auto', 'sqrt', 'log2'], [0.5, 0.25, 0.25])}
+    DATASET_LIST = ['boston', 'iris', 'diabetes']
+    # , 'digits', 'linnerud', 'wine']
+
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--cv', nargs='?', type=int, default=DEFAULT_CV, help='Specify the size of the cross-validation.')
+    parser.add_argument('--n_iter', nargs='?', type=int, default=DEFAULT_N_ITER, help='Specify the number of iterations for the bayesian search.')
+    parser.add_argument('--seed', nargs='?', type=int, default=None, help='Specify a seed instead of generate it randomly.')
+    parser.add_argument('--datasets', nargs='+', type=str, default=DATASET_LIST, help='Specify the dataset used by the estimator.')
+    parser.add_argument('--verbose', action='store_true', default=False, help='Print information during the bayesian search.')
+
+    args = parser.parse_args()
+
+    logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))
+
+    begin_random_seed_range = 1
+    end_random_seed_range = 2000
+
+    if args.seed is None:
+        random_seed = random.randint(begin_random_seed_range, end_random_seed_range)
+    else:
+        random_seed = args.seed
+
+    for dataset_name in args.datasets:
+
+        dataset_dir = os.path.join('experiments', dataset_name, 'stage1')
+
+        pathlib.Path(dataset_dir).mkdir(parents=True, exist_ok=True)
+
+        logger.info('Bayesian search on dataset {}'.format(dataset_name))
+
+        dataset_parameters = DatasetParameters(dataset_name, test_size=0.2, dev_size=0.01, random_state=random_seed, dataset_normalizer=None)
+        dataset = DatasetLoader.load(dataset_parameters)
+
+        if dataset.task == Task.REGRESSION:
+            scorer = 'neg_mean_squared_error'
+        else:
+            scorer = 'accuracy'
+
+        bayesian_searcher = HyperparameterSearcher()
+        opt = bayesian_searcher.search(dataset, DICT_PARAM_SPACE, args.n_iter,
+            args.cv, random_seed, scorer, args.verbose)
+
+        dict_results = {'_scorer': scorer,
+                        '_best_score_train': opt.best_score_,
+                        '_best_score_test': opt.score(dataset.X_test, dataset.y_test),
+                        '_best_parameters': clean_numpy_int_dict(opt.best_params_),
+                        '_random_seed': random_seed
+                        }
+
+        save_obj_to_json(os.path.join(dataset_dir, 'params.json'), dict_results)
diff --git a/code/compute_results.py b/code/compute_results.py
index fb09e42d3fc6829358b622ad4291bd064a9c65a3..64124af70954cc6af6a923f03f5a122a75f453fb 100644
--- a/code/compute_results.py
+++ b/code/compute_results.py
@@ -12,7 +12,7 @@ import os
 
 if __name__ == "__main__":
     # get environment variables in .env
-    load_dotenv(find_dotenv('.env.example'))
+    load_dotenv(find_dotenv('.env'))
 
     DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results'
     DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
@@ -59,6 +59,8 @@ if __name__ == "__main__":
         experiment_dev_scores = dict()
         experiment_test_scores = dict()
 
+        experiment_weights = dict()
+
         # Used to check if all losses were computed using the same metric (it should be the case)
         experiment_score_metrics = list()
 
@@ -74,6 +76,8 @@ if __name__ == "__main__":
             experiment_dev_scores[seed] = list()
             experiment_test_scores[seed] = list()
 
+            experiment_weights[seed] = list()
+
             # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_size
             extracted_forest_sizes = os.listdir(extracted_forest_size_root_path)
             for extracted_forest_size in extracted_forest_sizes:
@@ -84,9 +88,13 @@ if __name__ == "__main__":
                 # Load [...]/model_parameters.json file and build the model using these parameters and the weights and forest from model_raw_results.pickle
                 model = ModelFactory.load(dataset.task, extracted_forest_size_path, experiment_id, model_raw_results)
                 # Save temporarly some raw results (TODO: to complete to retreive more results)
+                # Save the scores
                 experiment_train_scores[seed].append(model_raw_results.train_score)
                 experiment_dev_scores[seed].append(model_raw_results.dev_score)
                 experiment_test_scores[seed].append(model_raw_results.test_score)
+                # Save the weights
+                experiment_weights[seed].append(model_raw_results.weights)
+                # Save the metric
                 experiment_score_metrics.append(model_raw_results.score_metric)
 
         if len(set(experiment_score_metrics)) > 1:
@@ -107,3 +115,48 @@ if __name__ == "__main__":
             all_labels=['train', 'dev', 'test'],
             title='Loss values of the trained model'
         )
+
+        """
+        TODO:
+        For each dataset:
+        Stage 1) A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams)
+        Stage 2) A figure for the selection of the best dataset normalization method
+        Stage 3) A figure for the selection of the best combination of dataset: normalization vs D normalization vs weights normalization
+        Stage 4) A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev
+        Stage 5) A figure for the selection of the best extracted forest size?
+        Stage 6) A figure to finally compare the perf of our approach using the previous selected parameters vs the baseline vs other papers
+
+        Stage 3)
+        In all axis:
+        - untrained forest
+        - trained base forest (straight line cause it doesn't depend on the number of extracted trees)
+
+        Axis 1:
+        - test with forest on train+dev and OMP on train+dev
+        - test with forest on train+dev and OMP on train+dev with dataset normalization
+        - test with forest on train+dev and OMP on train+dev with dataset normalization + D normalization
+        - test with forest on train+dev and OMP on train+dev with dataset normalization + weights normalization
+        - test with forest on train+dev and OMP on train+dev with dataset normalization + D normalization + weights normalization
+
+        Axis 2:
+        - test with forest on train and OMP on dev
+        - test with forest on train and OMP on dev with dataset normalization
+        - test with forest on train and OMP on dev with dataset normalization + D normalization
+        - test with forest on train and OMP on dev with dataset normalization + weights normalization
+        - test with forest on train and OMP on dev with dataset normalization + D normalization + weights normalization
+
+        Axis 3:
+        - test with forest on train and OMP train+dev
+        - test with forest on train and OMP train+dev with dataset normalization
+        - test with forest on train and OMP train+dev with dataset normalization + D normalization
+        - test with forest on train and OMP train+dev with dataset normalization + weights normalization
+        - test with forest on train and OMP train+dev with dataset normalization + D normalization + weights normalization
+
+        IMPORTANT: Same seeds used in all axis.
+        """
+
+        # Plot the density of the weights
+        Plotter.weight_density(
+            file_path=args.results_dir + os.sep + experiment_id + os.sep + 'density_weight.png',
+            all_experiment_weights=experiment_weights
+        )
diff --git a/code/train.py b/code/train.py
index 5783fef4bd32d75f62f5dcf05c9812e82f9c0338..0d9713252b0e5e2345331952edaca6adfa5424c0 100644
--- a/code/train.py
+++ b/code/train.py
@@ -9,16 +9,75 @@ from bolsonaro.error_handling.logger_factory import LoggerFactory
 
 from dotenv import find_dotenv, load_dotenv
 import argparse
+import json
 import pathlib
 import random
 import os
-from tqdm import tqdm
+from concurrent import futures
+import threading
+import json
 
 
+def process_job(seed, parameters, experiment_id, hyperparameters):
+    """
+    Experiment function.
+
+    Will be used as base function for worker in multithreaded application.
+
+    :param seed:
+    :param parameters:
+    :param experiment_id:
+    :return:
+    """
+    logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_ti{}'.format(
+        seed, threading.get_ident()))
+    logger.info('seed={}'.format(seed))
+
+    seed_str = str(seed)
+    experiment_id_str = str(experiment_id)
+    models_dir = parameters['models_dir'] + os.sep + experiment_id_str + os.sep + 'seeds' + \
+        os.sep + seed_str
+    pathlib.Path(models_dir).mkdir(parents=True, exist_ok=True)
+
+    dataset_parameters = DatasetParameters(
+        name=parameters['dataset_name'],
+        test_size=parameters['test_size'],
+        dev_size=parameters['dev_size'],
+        random_state=seed,
+        dataset_normalizer=parameters['dataset_normalizer']
+    )
+    dataset_parameters.save(models_dir, experiment_id_str)
+    dataset = DatasetLoader.load(dataset_parameters)
+
+    trainer = Trainer(dataset)
+
+    for extracted_forest_size in parameters['extracted_forest_size']:
+        # question if training is too long, one may also split experiments for different forest sizes into different workers
+        logger.info('extracted_forest_size={}'.format(extracted_forest_size))
+        sub_models_dir = models_dir + os.sep + 'extracted_forest_size' + os.sep + str(extracted_forest_size)
+        pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
+
+        model_parameters = ModelParameters(
+            extracted_forest_size=extracted_forest_size,
+            normalize_D=parameters['normalize_D'],
+            subsets_used=parameters['subsets_used'],
+            normalize_weights=parameters['normalize_weights'],
+            seed=seed,
+            hyperparameters=hyperparameters
+        )
+        model_parameters.save(sub_models_dir, experiment_id)
+
+        model = ModelFactory.build(dataset.task, model_parameters)
+
+        trainer.init(model)
+        trainer.train(model)
+        trainer.compute_results(model, sub_models_dir)
+    logger.info('Training done')
+
 if __name__ == "__main__":
-    # get environment variables in .env
-    load_dotenv(find_dotenv('.env.example'))
+    load_dotenv(find_dotenv('.env'))
 
+    DEFAULT_EXPERIMENT_CONFIGURATION_PATH = 'experiments'
     DEFAULT_DATASET_NAME = 'boston'
     DEFAULT_NORMALIZE_D = False
     DEFAULT_DATASET_NORMALIZER = None
@@ -29,13 +88,15 @@ if __name__ == "__main__":
     DEFAULT_DEV_SIZE = 0.2
     DEFAULT_TEST_SIZE = 0.2
     DEFAULT_RANDOM_SEED_NUMBER = 1
-    DEFAULT_USE_DEV_SUBSET = False
-    DEFAULT_DISABLE_PROGRESS = False
+    DEFAULT_SUBSETS_USED = 'train,dev'
+    DEFAULT_NORMALIZE_WEIGHTS = False
 
     begin_random_seed_range = 1
     end_random_seed_range = 2000
 
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--experiment_configuration', nargs='?', type=str, default=None, help='Specify an experiment configuration file name. Overload all other parameters.')
+    parser.add_argument('--experiment_configuration_path', nargs='?', type=str, default=DEFAULT_EXPERIMENT_CONFIGURATION_PATH, help='Specify the experiment configuration directory path.')
     parser.add_argument('--dataset_name', nargs='?', type=str, default=DEFAULT_DATASET_NAME, help='Specify the dataset. Regression: boston, diabetes, linnerud, california_housing. Classification: iris, digits, wine, breast_cancer, olivetti_faces, 20newsgroups, 20newsgroups_vectorized, lfw_people, lfw_pairs, covtype, rcv1, kddcup99.')
     parser.add_argument('--normalize_D', action='store_true', default=DEFAULT_NORMALIZE_D, help='Specify if we want to normalize the prediction of the forest by doing the L2 division of the pred vectors.')
     parser.add_argument('--dataset_normalizer', nargs='?', type=str, default=DEFAULT_DATASET_NORMALIZER, help='Specify which dataset normalizer use (either standard, minmax, robust or normalizer).')
@@ -46,66 +107,64 @@ if __name__ == "__main__":
     parser.add_argument('--test_size', nargs='?', type=float, default=DEFAULT_TEST_SIZE, help='Test subset ratio.')
     parser.add_argument('--random_seed_number', nargs='?', type=int, default=DEFAULT_RANDOM_SEED_NUMBER, help='Number of random seeds used.')
     parser.add_argument('--seeds', nargs='+', type=int, default=None, help='Specific a list of seeds instead of generate them randomly')
-    parser.add_argument('--use_dev_subset', action='store_true', default=DEFAULT_USE_DEV_SUBSET, help='If specify the forest will be trained on train subset and OMP on dev subset. Otherwise both the forest and OMP will be trained on train+dev subsets.')
-    parser.add_argument('--disable_progress', action='store_true', default=DEFAULT_DISABLE_PROGRESS, help='Disable the progress bars.')
+    parser.add_argument('--subsets_used', nargs='+', type=str, default=DEFAULT_SUBSETS_USED, help='train,dev: forest on train, OMP on dev. train+dev,train+dev: both forest and OMP on train+dev. train,train+dev: forest on train+dev and OMP on dev.')
+    parser.add_argument('--normalize_weights', action='store_true', default=DEFAULT_NORMALIZE_WEIGHTS, help='Divide the predictions by the weights sum.')
     args = parser.parse_args()
 
-    pathlib.Path(args.models_dir).mkdir(parents=True, exist_ok=True)
+    if args.experiment_configuration:
+        with open(args.experiment_configuration_path + os.sep + \
+            args.experiment_configuration + '.json', 'r') as input_file:
+            parameters = json.load(input_file)
+    else:
+        parameters = args.__dict__
+
+    pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)
 
     logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))
 
-    args.extracted_forest_size = args.extracted_forest_size \
-        if type(args.extracted_forest_size) == list \
-        else [args.extracted_forest_size]
+    # The number of tree to extract from forest (K)
+    parameters['extracted_forest_size'] = parameters['extracted_forest_size'] \
+        if type(parameters['extracted_forest_size']) == list \
+        else [parameters['extracted_forest_size']]
 
-    if args.seeds != None and args.random_seed_number > 1:
-        logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')    
+    hyperparameters_path = os.path.join('experiments', args.dataset_name, 'stage1', 'params.json')
+    if os.path.exists(hyperparameters_path):
+        logger.info("Hyperparameters found for this dataset at '{}'".format(hyperparameters_path))
+        with open(hyperparameters_path, 'r+') as file_hyperparameter:
+            hyperparameters = json.load(file_hyperparameter)['best_parameters']
+    else:
+        hyperparameters = {}
 
-    seeds = args.seeds if args.seeds is not None \
-        else [random.randint(begin_random_seed_range, end_random_seed_range) \
-        for i in range(args.random_seed_number)]
+    if parameters['forest_size'] is not None:
+        hyperparameters['n_estimators'] = parameters['forest_size']
 
-    experiment_id = resolve_experiment_id(args.models_dir)
-    experiment_id_str = str(experiment_id)
+    if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
+        logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')    
 
-    logger.info('Experiment id: {}'.format(experiment_id_str))
-
-    with tqdm(seeds, disable=args.disable_progress) as seed_bar:
-        for seed in seed_bar:
-            seed_bar.set_description('seed={}'.format(seed))
-            seed_str = str(seed)
-            models_dir = args.models_dir + os.sep + experiment_id_str + os.sep + 'seeds' + \
-                os.sep + seed_str
-            pathlib.Path(models_dir).mkdir(parents=True, exist_ok=True)
-
-            dataset_parameters = DatasetParameters(
-                name=args.dataset_name,
-                test_size=args.test_size,
-                dev_size=args.dev_size,
-                random_state=seed,
-                dataset_normalizer=args.dataset_normalizer
+    # Seeds are either provided as parameters or generated at random
+    seeds = parameters['seeds'] if parameters['seeds'] is not None \
+        else [random.randint(begin_random_seed_range, end_random_seed_range) \
+        for i in range(parameters['random_seed_number'])]
+
+    # Resolve the next experiment id number (last id + 1)
+    experiment_id = resolve_experiment_id(parameters['models_dir'])
+    logger.info('Experiment id: {}'.format(experiment_id))
+
+    """
+    If the experiment configuration isn't coming from
+    an already existing file, save it to a json file to
+    keep trace of it.
+    """
+    if args.experiment_configuration is None:
+        with open(args.experiment_configuration_path + os.sep + 'unnamed_{}.json'.format(
+            experiment_id), 'w') as output_file:
+            json.dump(
+                parameters,
+                output_file,
+                indent=4
             )
-            dataset_parameters.save(models_dir, experiment_id_str)
-
-            dataset = DatasetLoader.load(dataset_parameters)
-
-            trainer = Trainer(dataset)
-
-            with tqdm(args.extracted_forest_size, disable=args.disable_progress) as extracted_forest_size_bar:
-                for extracted_forest_size in extracted_forest_size_bar:
-                    extracted_forest_size_bar.set_description('extracted_forest_size={}'.format(extracted_forest_size))
-                    sub_models_dir = models_dir + os.sep + 'extracted_forest_size' + os.sep + str(extracted_forest_size)
-                    pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
-
-                    model_parameters = ModelParameters(
-                        forest_size=args.forest_size,
-                        extracted_forest_size=extracted_forest_size,
-                        normalize_D=args.normalize_D,
-                        use_dev_subset=args.use_dev_subset,
-                        seed=seed
-                    )
-                    model_parameters.save(sub_models_dir, experiment_id)
-
-                    model = ModelFactory.build(dataset.task, model_parameters)
 
-                    trainer.train(model, sub_models_dir)
+    # Train as much job as there are seeds
+    with futures.ProcessPoolExecutor(len(seeds)) as executor:
+        list(f.result() for f in futures.as_completed(executor.submit(process_job, seed,
+            parameters, experiment_id, hyperparameters) for seed in seeds))
diff --git a/experiments/.gitkeep b/experiments/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/experiments/boston/stage1/params.json b/experiments/boston/stage1/params.json
index 6a5a1e9a05d8f081af6abe38fa0aadfff1e736b8..ad9a695df71e3bf329b4d7b30094329daf6c6b4f 100644
--- a/experiments/boston/stage1/params.json
+++ b/experiments/boston/stage1/params.json
@@ -1,5 +1,6 @@
 {
     "scorer": "neg_mean_squared_error",
+<<<<<<< HEAD
     "best_score_train": -11.238253315624897,
     "best_score_test": -7.312532120669678,
     "best_parameters": {
@@ -9,4 +10,15 @@
         "n_estimators": 1000
     },
     "random_seed": 289
+=======
+    "best_score_train": -12.900217003727361,
+    "best_score_test": -12.682938620298733,
+    "best_parameters": {
+        "max_depth": 18,
+        "max_features": "sqrt",
+        "min_samples_leaf": 1,
+        "n_estimators": 1000
+    },
+    "random_seed": 883
+>>>>>>> a0f7c96f51b3e1575f6db1b704579b0cf1042c42
 }
\ No newline at end of file
diff --git a/experiments/iris/stage1/params.json b/experiments/iris/stage1/params.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd852cace9852ee492649374e915b639fe785b28
--- /dev/null
+++ b/experiments/iris/stage1/params.json
@@ -0,0 +1,12 @@
+{
+    "scorer": "accuracy",
+    "best_score_train": 0.9576271186440678,
+    "best_score_test": 1.0,
+    "best_parameters": {
+        "max_depth": 20,
+        "max_features": "log2",
+        "min_samples_leaf": 1,
+        "n_estimators": 1000
+    },
+    "random_seed": 883
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 437c0178801cca15b39561318f6e244c15b34533..d585159632442f7293f858e1fad391031658d60a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,7 @@ awscli
 flake8
 python-dotenv>=0.5.1
 scikit-learn
+git+git://github.com/darenr/scikit-optimize@master
 python-dotenv
-tqdm
-matplotlib
\ No newline at end of file
+matplotlib
+pandas
\ No newline at end of file