diff --git a/.env.example b/.env.example
deleted file mode 100644
index 9ca543b382b4889be1d93ed2065ef234b789153c..0000000000000000000000000000000000000000
--- a/.env.example
+++ /dev/null
@@ -1,12 +0,0 @@
-# Environment variables go here, can be read by `python-dotenv` package:
-#
-#   `src/script.py`
-#   ----------------------------------------------------------------
-#    import dotenv
-#
-#    project_dir = os.path.join(os.path.dirname(__file__), os.pardir)
-#    dotenv_path = os.path.join(project_dir, '.env')
-#    dotenv.load_dotenv(dotenv_path)
-#   ----------------------------------------------------------------
-
-project_dir = "."
\ No newline at end of file
diff --git a/code/bolsonaro/data/dataset_loader.py b/code/bolsonaro/data/dataset_loader.py
index ec1f321f70115542a2164c474193a246faa5639d..f4a6d085f45cfa6580949ad6acfabbe4abe71d8a 100644
--- a/code/bolsonaro/data/dataset_loader.py
+++ b/code/bolsonaro/data/dataset_loader.py
@@ -9,6 +9,17 @@ from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
 from sklearn.model_selection import train_test_split
 from sklearn import preprocessing
 
+from bolsonaro.utils import binarize_class_data
+
+
+def change_binary_func_load(base_load_function):
+    def func_load(return_X_y):
+        X, y = base_load_function(return_X_y=return_X_y)
+        possible_classes = sorted(set(y))
+        assert len(possible_classes) == 2, "Function change binary_func_load only work for binary classfication"
+        y = binarize_class_data(y, possible_classes[-1])
+        return X, y
+    return func_load
 
 class DatasetLoader(object):
 
@@ -20,45 +31,46 @@ class DatasetLoader(object):
             task = Task.REGRESSION
         elif name == 'iris':
             dataset_loading_func = load_iris
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'diabetes':
             dataset_loading_func = load_diabetes
             task = Task.REGRESSION
         elif name == 'digits':
             dataset_loading_func = load_digits
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'linnerud':
             dataset_loading_func = load_linnerud
             task = Task.REGRESSION
         elif name == 'wine':
             dataset_loading_func = load_wine
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'breast_cancer':
-            dataset_loading_func = load_breast_cancer
-            task = Task.CLASSIFICATION
+            dataset_loading_func = change_binary_func_load(load_breast_cancer)
+            task = Task.BINARYCLASSIFICATION
         elif name == 'olivetti_faces':  # bug (no return X_y)
             dataset_loading_func = fetch_olivetti_faces
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == '20newsgroups':  # bug (no return X_y)
             dataset_loading_func = fetch_20newsgroups
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == '20newsgroups_vectorized':
             dataset_loading_func = fetch_20newsgroups_vectorized
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'lfw_people':  # needs PIL (image dataset)
             dataset_loading_func = fetch_lfw_people
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'lfw_pairs':
             dataset_loading_func = fetch_lfw_pairs
+            task = Task.MULTICLASSIFICATION
         elif name == 'covtype':
             dataset_loading_func = fetch_covtype
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'rcv1':
             dataset_loading_func = fetch_rcv1
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'kddcup99':
             dataset_loading_func = fetch_kddcup99
-            task = Task.CLASSIFICATION
+            task = Task.MULTICLASSIFICATION
         elif name == 'california_housing':
             dataset_loading_func = fetch_california_housing
             task = Task.REGRESSION
diff --git a/code/bolsonaro/data/task.py b/code/bolsonaro/data/task.py
index 2f47fa22f472f769c075f40e1c25a7bf3de45f0d..f1214a64a27873e49f5dbbcb853e4f65f9b07f68 100644
--- a/code/bolsonaro/data/task.py
+++ b/code/bolsonaro/data/task.py
@@ -2,5 +2,6 @@ from enum import Enum
 
 
 class Task(Enum):
-    CLASSIFICATION = 1
+    BINARYCLASSIFICATION = 1
     REGRESSION = 2
+    MULTICLASSIFICATION = 3
diff --git a/code/bolsonaro/hyperparameter_searcher.py b/code/bolsonaro/hyperparameter_searcher.py
index 1f54c84e02f02ab8d62ba1441475cbfe2d572858..7884d2d4271203e9ebee1e804baa7c1e94a76770 100644
--- a/code/bolsonaro/hyperparameter_searcher.py
+++ b/code/bolsonaro/hyperparameter_searcher.py
@@ -33,11 +33,10 @@ class HyperparameterSearcher(object):
         :return: a skopt.searchcv.BayesSearchCV object
         '''
 
-        if dataset.task == Task.CLASSIFICATION:
-            estimator = RandomForestClassifier(n_jobs=-1, random_state=random_seed)
-
         if dataset.task == Task.REGRESSION:
             estimator = RandomForestRegressor(n_jobs=-1, random_state=random_seed)
+        else:
+            estimator = RandomForestClassifier(n_jobs=-1, random_state=random_seed)
 
         opt = BayesSearchCV(estimator, hyperparameter_space, n_iter=n_iter,
                             cv=cv, n_jobs=-1, random_state=random_seed,
diff --git a/code/bolsonaro/models/model_factory.py b/code/bolsonaro/models/model_factory.py
index fb6b32cb26727d2221367f208598f04e1a19dfb1..2dc578cfaacc99f9fea17b9ae8e64cc08e3038dc 100644
--- a/code/bolsonaro/models/model_factory.py
+++ b/code/bolsonaro/models/model_factory.py
@@ -1,4 +1,4 @@
-from bolsonaro.models.omp_forest_classifier import OmpForestClassifier
+from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
 from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
 from bolsonaro.data.task import Task
 from bolsonaro.models.model_parameters import ModelParameters
@@ -11,18 +11,22 @@ class ModelFactory(object):
 
     @staticmethod
     def build(task, model_parameters):
-        if task == Task.CLASSIFICATION:
-            model_func = OmpForestClassifier
+        if task == Task.BINARYCLASSIFICATION:
+            model_func = OmpForestBinaryClassifier
         elif task == Task.REGRESSION:
             model_func = OmpForestRegressor
+        elif task == Task.MULTICLASSIFICATION:
+            model_func = OmpForestMulticlassClassifier
         else:
             raise ValueError("Unsupported task '{}'".format(task))
         return model_func(model_parameters)
 
     @staticmethod
     def load(task, directory_path, experiment_id, model_raw_results):
+        raise NotImplementedError
         model_parameters = ModelParameters.load(directory_path, experiment_id)
         model = ModelFactory.build(task, model_parameters)
-        model.set_forest(model_raw_results.forest)
-        model.set_weights(model_raw_results.weights)
+        # todo faire ce qu'il faut ici pour rétablir correctement le modèle
+        # model.set_forest(model_raw_results.forest)
+        # model.set_weights(model_raw_results.weights)
         return model
diff --git a/code/bolsonaro/models/model_raw_results.py b/code/bolsonaro/models/model_raw_results.py
index 673cb0fc65b7378e95c03b186d246cb70b384a07..df8b2ec0b10704a8a8c397b9012298e8b901e14b 100644
--- a/code/bolsonaro/models/model_raw_results.py
+++ b/code/bolsonaro/models/model_raw_results.py
@@ -6,13 +6,12 @@ import datetime
 
 class ModelRawResults(object):
 
-    def __init__(self, forest, weights, training_time,
+    def __init__(self, model_object, training_time,
         datetime, train_score, dev_score, test_score,
         score_metric, train_score_regressor, dev_score_regressor,
         test_score_regressor):
 
-        self._forest = forest
-        self._weights = weights
+        self._model_object = model_object
         self._training_time = training_time
         self._datetime = datetime
         self._train_score = train_score
@@ -24,12 +23,8 @@ class ModelRawResults(object):
         self._test_score_regressor = test_score_regressor
     
     @property
-    def forest(self):
-        return self._forest
-
-    @property
-    def weights(self):
-        return self._weights
+    def model_object(self):
+        return self.model_object
 
     @property
     def training_time(self):
diff --git a/code/bolsonaro/models/omp_forest.py b/code/bolsonaro/models/omp_forest.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c33f09dd07142cfc9f94cee500be3ed8c795fba
--- /dev/null
+++ b/code/bolsonaro/models/omp_forest.py
@@ -0,0 +1,123 @@
+from abc import abstractmethod, ABCMeta
+
+import numpy as np
+from sklearn.linear_model import OrthogonalMatchingPursuit
+
+from bolsonaro import LOG_PATH
+from bolsonaro.error_handling.logger_factory import LoggerFactory
+from sklearn.base import BaseEstimator
+
+
+class OmpForest(BaseEstimator, metaclass=ABCMeta):
+    def __init__(self, models_parameters, base_forest_estimator):
+        self._base_forest_estimator = base_forest_estimator
+        self._models_parameters = models_parameters
+        self._logger = LoggerFactory.create(LOG_PATH, __name__)
+
+    @property
+    def models_parameters(self):
+        return self._models_parameters
+
+    def score_base_estimator(self, X, y):
+        return self._base_forest_estimator.score(X, y)
+
+
+    def _base_estimator_predictions(self, X):
+        return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
+
+    @property
+    def forest(self):
+        return self._base_forest_estimator.estimators_
+
+    # sklearn baseestimator api methods
+    def fit(self, X_forest, y_forest, X_omp, y_omp):
+        self._base_forest_estimator.fit(X_forest, y_forest)
+        self._extract_subforest(X_omp, y_omp)  # type: OrthogonalMatchingPursuit
+        return self
+
+    def _extract_subforest(self, X, y):
+        """
+        Given an already estimated regressor: apply OMP to get the weight of each tree.
+
+        The X data is used for interrogation of every tree in the forest. The y data
+        is used for finding the weights in OMP.
+
+        :param X: (n_sample, n_features) array
+        :param y: (n_sample,) array
+        :return:
+        """
+        self._logger.debug("Forest make prediction on X")
+        D = self._base_estimator_predictions(X)
+
+        if self._models_parameters.normalize_D:
+            # question: maybe consider other kinds of normalization.. centering?
+            self._logger.debug("Compute norm of predicted vectors on X")
+            self._forest_norms = np.linalg.norm(D, axis=0)
+            D /= self._forest_norms
+
+        self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
+                           .format(self._models_parameters.extracted_forest_size))
+
+        self.fit_omp(D, y)
+
+    @staticmethod
+    def _make_omp_weighted_prediction(base_predictions, omp_obj, normalize_weights=False):
+        if normalize_weights:
+            # we can normalize weights (by their sum) so that they sum to 1
+            # and they can be interpreted as impact percentages for interpretability.
+            # this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef)
+
+            # question: je comprend pas le truc avec nonszero?
+            # predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
+            coef_signs = np.sign(omp_obj.coef_)[np.newaxis, :]  # add axis to make sure it will be broadcasted line-wise (there might be a confusion when forest_prediction is square)
+            unsigned_coef = (coef_signs * omp_obj.coef_).squeeze()
+            intercept = omp_obj.intercept_
+
+            adjusted_forest_predictions = base_predictions * coef_signs
+            predictions = adjusted_forest_predictions.dot(unsigned_coef) + intercept
+
+        else:
+            predictions = omp_obj.predict(base_predictions)
+
+        return predictions
+
+    @abstractmethod
+    def fit_omp(self, atoms, objective):
+        pass
+
+    @abstractmethod
+    def predict(self, X):
+        pass
+
+    @abstractmethod
+    def score(self, X, y):
+        pass
+
+class SingleOmpForest(OmpForest):
+    def __init__(self, models_parameters, base_forest_estimator):
+        # fit_intercept shouldn't be set to False as the data isn't necessarily centered here
+        # normalization is handled outsite OMP
+        self._omp = OrthogonalMatchingPursuit(
+            n_nonzero_coefs=models_parameters.extracted_forest_size,
+            fit_intercept=True, normalize=False)
+
+        super().__init__(models_parameters, base_forest_estimator)
+
+    def fit_omp(self, atoms, objective):
+        self._omp.fit(atoms, objective)
+
+    def predict(self, X):
+        """
+        Apply the SingleOmpForest to X.
+
+        Make all the base tree predictions then apply the OMP weights for pruning.
+
+        :param X:
+        :return:
+        """
+        forest_predictions = self._base_estimator_predictions(X)
+
+        if self._models_parameters.normalize_D:
+            forest_predictions /= self._forest_norms
+
+        return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)
\ No newline at end of file
diff --git a/code/bolsonaro/models/omp_forest_classifier.py b/code/bolsonaro/models/omp_forest_classifier.py
index 12cc23fab69fc0b79ff40b1d6957db5532a8c452..c0526fbad4da9255b99c88a7c2e1239047c08587 100644
--- a/code/bolsonaro/models/omp_forest_classifier.py
+++ b/code/bolsonaro/models/omp_forest_classifier.py
@@ -1,11 +1,117 @@
+from collections import namedtuple
+from copy import deepcopy
+
 from sklearn.base import BaseEstimator
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import OrthogonalMatchingPursuit
+
+from bolsonaro import LOG_PATH
+from bolsonaro.error_handling.logger_factory import LoggerFactory
+from bolsonaro.models.omp_forest import OmpForest, SingleOmpForest
+import numpy as np
+
+from bolsonaro.utils import binarize_class_data
+
+
+class OmpForestBinaryClassifier(SingleOmpForest):
+
+    DEFAULT_SCORE_METRIC = 'indicator'
+
+    def __init__(self, models_parameters):
+        estimator = RandomForestClassifier(n_estimators=models_parameters.forest_size,
+                                           random_state=models_parameters.seed, n_jobs=-1)
+        super().__init__(models_parameters, estimator)
+
+    def _check_classes(self, y):
+        assert len(set(y).difference({-1, 1})) == 0, "Classes for binary classifier should be {-1, +1}"
+
+    def fit(self, X_forest, y_forest, X_omp, y_omp):
+        self._check_classes(y_forest)
+        self._check_classes(y_omp)
+
+        return super().fit(X_forest, y_forest, X_omp, y_omp)
+
+
+    def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
+        """
+        Evaluate OMPForestClassifer on (`X`, `y`) using `metric`
+
+        :param X:
+        :param y:
+        :param metric: might be "indicator"
+        :return:
+        """
+        predictions = self.predict(X)
+
+        if metric == 'indicator':
+            evaluation = np.abs(np.mean(np.abs(np.sign(predictions) - y) - 1))
+        else:
+            raise ValueError("Unsupported metric '{}'.".format(metric))
+
+        return evaluation
+
+
+class OmpForestMulticlassClassifier(OmpForest):
+
+    DEFAULT_SCORE_METRIC = 'indicator'
+
+    def __init__(self, models_parameters):
+        estimator = RandomForestClassifier(n_estimators=models_parameters.forest_size,
+                                           random_state=models_parameters.seed, n_jobs=-1)
+        super().__init__(models_parameters, estimator)
+        # question: peut-être initialiser les omps dans le __init__? comme pour le SingleOmpForest
+        self._dct_class_omp = {}
+
+    def fit_omp(self, atoms, objective):
+        assert len(self._dct_class_omp) == 0, "fit_omp can be called only once on {}".format(self.__class__.__name__)
+        possible_classes = sorted(set(objective))
+        for class_label in possible_classes:
+            atoms_binary = binarize_class_data(atoms, class_label, inplace=False)
+            objective_binary = binarize_class_data(objective, class_label, inplace=False)
+            # todo peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP...
+            omp_class = OrthogonalMatchingPursuit(
+                n_nonzero_coefs=self.models_parameters.extracted_forest_size,
+                fit_intercept=True, normalize=False)
+            omp_class.fit(atoms_binary, objective_binary)
+            self._dct_class_omp[class_label] = omp_class
+        return self._dct_class_omp
+
+    def predict(self, X):
+        forest_predictions = self._base_estimator_predictions(X)
+
+        if self._models_parameters.normalize_D:
+            forest_predictions /= self._forest_norms
+
+        label_names = []
+        preds = []
+        for class_label, omp_class in self._dct_class_omp.items():
+            label_names.append(class_label)
+            atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False)
+            preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
+
+        # todo verifier que ce n'est pas bugué ici
+
+        preds = np.array(preds).T
+        max_preds = np.argmax(preds, axis=1)
+        return np.array(label_names)[max_preds]
+
+
+    def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
+        predictions = self.predict(X)
+
+        if metric == 'indicator':
+            evaluation = np.sum(np.ones_like(predictions)[predictions == y]) / X.shape[0]
+        else:
+            raise ValueError("Unsupported metric '{}'.".format(metric))
+
+        return evaluation
 
 
-class OmpForestClassifier(BaseEstimator):
 
-    def __init__(self):
-        raise ValueError('Classification tasks are not supported for now')
 
-    def fit(self, X, y):
-        pass
+if __name__ == "__main__":
+    forest = RandomForestClassifier(n_estimators=10)
+    X = np.random.rand(10, 5)
+    y = np.random.choice([-1, +1], 10)
+    forest.fit(X, y)
+    print(forest.predict(np.random.rand(10, 5)))
\ No newline at end of file
diff --git a/code/bolsonaro/models/omp_forest_regressor.py b/code/bolsonaro/models/omp_forest_regressor.py
index 013a86a2e889d3ebdc1b809b6d0d50ac5a697f26..9e95453df26e9dc5a688b2dd5217276361b5e96d 100644
--- a/code/bolsonaro/models/omp_forest_regressor.py
+++ b/code/bolsonaro/models/omp_forest_regressor.py
@@ -1,67 +1,20 @@
-from bolsonaro import LOG_PATH
-from bolsonaro.error_handling.logger_factory import LoggerFactory
+
 
 from sklearn.ensemble import RandomForestRegressor
-from sklearn.linear_model import OrthogonalMatchingPursuit
-from sklearn.base import BaseEstimator
 import numpy as np
 
+from bolsonaro.models.omp_forest import SingleOmpForest
+
 
-class OmpForestRegressor(BaseEstimator):
+class OmpForestRegressor(SingleOmpForest):
 
     DEFAULT_SCORE_METRIC = 'mse'
 
     def __init__(self, models_parameters):
-        self._regressor = RandomForestRegressor(**models_parameters.hyperparameters,
-            random_state=models_parameters.seed, n_jobs=-1)
-        self._models_parameters = models_parameters
-        self._logger = LoggerFactory.create(LOG_PATH, __name__)
-
-    @property
-    def forest(self):
-        return self._forest
-
-    def set_forest(self, forest):
-        self._forest = forest
-        self._regressor.estimators_ = forest
-
-    @property
-    def weights(self):
-        return self._weights
-
-    def set_weights(self, weights):
-        self._weights = weights
-
-    @property
-    def models_parameters(self):
-        return self._models_parameters
+        estimator = RandomForestRegressor(**models_parameters.hyperparameters,
+                                          random_state=models_parameters.seed, n_jobs=-1)
 
-    def fit(self, X_forest, y_forest, X_omp, y_omp):
-        self._forest = self._train_forest(X_forest, y_forest)
-        self._omp = self._extract_subforest(X_omp, y_omp)
-        self._weights = self._omp.coef_
-        return self
-
-    def score_regressor(self, X, y):
-        return self._regressor.score(X, y)
-
-    def predict(self, X):
-        """
-        Apply the OMPForestRegressor to X.
-
-        :param X:
-        :return:
-        """
-        forest_predictions = self._forest_prediction(X)
-
-        if self._models_parameters.normalize_D:
-            forest_predictions /= self._forest_norms
-
-        predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_)))) \
-            if self._models_parameters.normalize_weights \
-            else self._omp.predict(forest_predictions)
-
-        return predictions
+        super().__init__(models_parameters, estimator)
 
     def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
         """
@@ -80,38 +33,3 @@ class OmpForestRegressor(BaseEstimator):
             raise ValueError("Unsupported metric '{}'.".format(metric))
 
         return evaluation
-
-    def _train_forest(self, X, y):
-        self._regressor.fit(X, y)
-        forest = self._regressor.estimators_
-        return forest
-
-    def _extract_subforest(self, X, y):
-        """
-        Given an already estimated regressor: apply OMP to get the weight of each tree.
-
-        The X data is used for interrogation of every tree in the forest. The y data
-        is used for finding the weights in OMP.
-
-        :param X: (n_sample, n_features) array
-        :param y: (n_sample,) array
-        :return:
-        """
-        self._logger.debug("Forest make prediction on X")
-        D = self._forest_prediction(X)
-
-        if self._models_parameters.normalize_D:
-            # question: maybe consider other kinds of normalization
-            self._logger.debug("Compute norm of predicted vectors on X")
-            self._forest_norms = np.linalg.norm(D, axis=0)
-            D /= self._forest_norms
-
-        omp = OrthogonalMatchingPursuit(
-            n_nonzero_coefs=self._models_parameters.extracted_forest_size,
-            fit_intercept=False, normalize=False)
-        self._logger.debug("Apply orthogonal maching pursuit on forest for {} extracted trees."
-                           .format(self._models_parameters.extracted_forest_size))
-        return omp.fit(D, y)
-
-    def _forest_prediction(self, X):
-        return np.array([tree.predict(X) for tree in self._forest]).T
diff --git a/code/bolsonaro/trainer.py b/code/bolsonaro/trainer.py
index b586914166cf80f274a502d8d44b83f6b6f97484..a9bebe044b68475f5cc0cf6c6a2097ffe986e47c 100644
--- a/code/bolsonaro/trainer.py
+++ b/code/bolsonaro/trainer.py
@@ -8,12 +8,26 @@ import numpy as np
 
 
 class Trainer(object):
+    """
+    Class capable of fitting any model object to some prepared data then evaluate and save results through the `train` method.
+    """
 
     def __init__(self, dataset):
+        """
+
+        :param dataset: Object with X_train, y_train, X_dev, y_dev, X_test and Y_test attributes
+        """
         self._dataset = dataset
         self._logger = LoggerFactory.create(LOG_PATH, __name__)
 
     def train(self, model, models_dir):
+        """
+
+        :param model: Object with
+        :param models_dir: Where the results will be saved
+        :return:
+        """
+        # todo cette fonction ne fait pas que "train", elle choisit le jeu de données, train et evalue le modèle -> nom à changer
         self._logger.debug('Training model using train set...')
         begin_time = time.time()
 
@@ -45,16 +59,24 @@ class Trainer(object):
         )
         end_time = time.time()
 
-        ModelRawResults(
-            forest=model.forest,
-            weights=model.weights,
+        results = ModelRawResults(
+            model_object=model,
             training_time=end_time - begin_time,
             datetime=datetime.datetime.now(),
             train_score=model.score(self._dataset.X_train, self._dataset.y_train),
             dev_score=model.score(self._dataset.X_dev, self._dataset.y_dev),
             test_score=model.score(self._dataset.X_test, self._dataset.y_test),
             score_metric=model.DEFAULT_SCORE_METRIC, # TODO: resolve the used metric in a proper way
-            train_score_regressor=model.score_regressor(self._dataset.X_train, self._dataset.y_train),
-            dev_score_regressor=model.score_regressor(self._dataset.X_dev, self._dataset.y_dev),
-            test_score_regressor=model.score_regressor(self._dataset.X_test, self._dataset.y_test)
-        ).save(models_dir)
+            train_score_regressor=model.score_base_estimator(self._dataset.X_train, self._dataset.y_train),
+            dev_score_regressor=model.score_base_estimator(self._dataset.X_dev, self._dataset.y_dev),
+            test_score_regressor=model.score_base_estimator(self._dataset.X_test, self._dataset.y_test)
+        )
+        results.save(models_dir)
+        self._logger.info("Base performance on test: {}".format(results.test_score_regressor))
+        self._logger.info("Performance on test: {}".format(results.test_score))
+
+        self._logger.info("Base performance on train: {}".format(results.train_score_regressor))
+        self._logger.info("Performance on train: {}".format(results.train_score))
+
+        self._logger.info("Base performance on dev: {}".format(results.dev_score_regressor))
+        self._logger.info("Performance on dev: {}".format(results.dev_score))
diff --git a/code/bolsonaro/utils.py b/code/bolsonaro/utils.py
index 82e501878ba06320914230096213d2d28548e4dc..21c7f72ac9173caf2cf1b5ccbbe6dde61193d1aa 100644
--- a/code/bolsonaro/utils.py
+++ b/code/bolsonaro/utils.py
@@ -1,6 +1,7 @@
 import os
 import json
 import pickle
+from copy import deepcopy
 
 
 def resolve_experiment_id(models_dir):
@@ -45,3 +46,21 @@ def load_obj_from_pickle(file_path, constructor):
     with open(file_path, 'rb') as input_file:
         parameters = pickle.load(input_file)
     return constructor(**parameters)
+
+def binarize_class_data(data, class_pos, inplace=True):
+    """
+    Replace class_pos by +1 and ~class_pos by -1.
+
+    :param data: an array of classes
+    :param class_pos: the positive class to be replaced by +1
+    :param inplace: If True, modify data in place (still return it, also)
+    :return:
+    """
+    if not inplace:
+        data = deepcopy(data)
+
+    position_class_labels = (data == class_pos)
+    data[~(position_class_labels)] = -1
+    data[(position_class_labels)] = +1
+
+    return data
\ No newline at end of file
diff --git a/code/compute_hyperparameters.py b/code/compute_hyperparameters.py
index 199e060f3ee2e3a125a7af05e9205453ae079b83..414a7df007d1c0b020705f29d7481d9453391ab6 100644
--- a/code/compute_hyperparameters.py
+++ b/code/compute_hyperparameters.py
@@ -38,7 +38,7 @@ def clean_numpy_int_list(list_n):
 
 if __name__ == "__main__":
     # get environment variables in .env
-    load_dotenv(find_dotenv('.env.example'))
+    load_dotenv(find_dotenv('.env'))
 
     DEFAULT_CV = 3
     DEFAULT_N_ITER = 50
@@ -79,11 +79,10 @@ if __name__ == "__main__":
         dataset_parameters = DatasetParameters(dataset_name, test_size=0.2, dev_size=0.01, random_state=random_seed, dataset_normalizer=None)
         dataset = DatasetLoader.load(dataset_parameters)
 
-        if dataset.task == Task.CLASSIFICATION:
-            scorer = 'accuracy'
-
         if dataset.task == Task.REGRESSION:
             scorer = 'neg_mean_squared_error'
+        else:
+            scorer = 'accuracy'
 
         bayesian_searcher = HyperparameterSearcher()
         opt = bayesian_searcher.search(dataset, DICT_PARAM_SPACE, args.n_iter,
diff --git a/code/compute_results.py b/code/compute_results.py
index 0f26eb101a1910577593223a166e19b495f73d85..64124af70954cc6af6a923f03f5a122a75f453fb 100644
--- a/code/compute_results.py
+++ b/code/compute_results.py
@@ -12,7 +12,7 @@ import os
 
 if __name__ == "__main__":
     # get environment variables in .env
-    load_dotenv(find_dotenv('.env.example'))
+    load_dotenv(find_dotenv('.env'))
 
     DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results'
     DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
diff --git a/code/train.py b/code/train.py
index d58871db980369efa254313b9997f5f9e99c0bbe..34c2003db8aef25d105831989b5c38b4e966f640 100644
--- a/code/train.py
+++ b/code/train.py
@@ -19,9 +19,20 @@ import json
 
 
 def process_job(seed, parameters, experiment_id, hyperparameters):
+    """
+    Experiment function.
+
+    Will be used as base function for worker in multithreaded application.
+
+    :param seed:
+    :param parameters:
+    :param experiment_id:
+    :return:
+    """
     logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_ti{}'.format(
         seed, threading.get_ident()))
     logger.info('seed={}'.format(seed))
+
     seed_str = str(seed)
     experiment_id_str = str(experiment_id)
     models_dir = parameters['models_dir'] + os.sep + experiment_id_str + os.sep + 'seeds' + \
@@ -36,12 +47,12 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
         dataset_normalizer=parameters['dataset_normalizer']
     )
     dataset_parameters.save(models_dir, experiment_id_str)
-
     dataset = DatasetLoader.load(dataset_parameters)
 
     trainer = Trainer(dataset)
 
     for extracted_forest_size in parameters['extracted_forest_size']:
+        # question if training is too long, one may also split experiments for different forest sizes into different workers
         logger.info('extracted_forest_size={}'.format(extracted_forest_size))
         sub_models_dir = models_dir + os.sep + 'extracted_forest_size' + os.sep + str(extracted_forest_size)
         pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
@@ -62,8 +73,7 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
     logger.info('Training done')
 
 if __name__ == "__main__":
-    # get environment variables in .env
-    load_dotenv(find_dotenv('.env.example'))
+    load_dotenv(find_dotenv('.env'))
 
     DEFAULT_EXPERIMENT_CONFIGURATION_PATH = 'experiments'
     DEFAULT_DATASET_NAME = 'boston'
@@ -110,6 +120,7 @@ if __name__ == "__main__":
 
     logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))
 
+    # The number of tree to extract from forest (K)
     parameters['extracted_forest_size'] = parameters['extracted_forest_size'] \
         if type(parameters['extracted_forest_size']) == list \
         else [parameters['extracted_forest_size']]
@@ -128,6 +139,7 @@ if __name__ == "__main__":
     if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
         logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')    
 
+    # Seeds are either provided as parameters or generated at random
     seeds = parameters['seeds'] if parameters['seeds'] is not None \
         else [random.randint(begin_random_seed_range, end_random_seed_range) \
         for i in range(parameters['random_seed_number'])]
diff --git a/experiments/.gitkeep b/experiments/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev.json b/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev.json
deleted file mode 100644
index b6dd49c4a7f9ef9b8ae97c1ac578d35f0a47c171..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": false,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train+dev,train+dev",
-    "normalize_weights": false
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_normalize-D.json b/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_normalize-D.json
deleted file mode 100644
index 8d50e1964663c6f4cd88efc2e7c85e4e19b2ced3..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_normalize-D.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": true,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train+dev,train+dev",
-    "normalize_weights": false
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_normalize-D_weights-normalization.json b/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_normalize-D_weights-normalization.json
deleted file mode 100644
index 2e7b19ec64d0d36048022df069377e3cb3b0d88e..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_normalize-D_weights-normalization.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": true,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train+dev,train+dev",
-    "normalize_weights": true
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_weights-normalization.json b/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_weights-normalization.json
deleted file mode 100644
index c0fa623dadbb7a142c9f3916428e225dea94ddba..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train+dev,train+dev/boston_train+dev,train+dev_weights-normalization.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": false,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train+dev,train+dev",
-    "normalize_weights": true
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train,dev/boston_train,dev.json b/experiments/boston/stage3/train,dev/boston_train,dev.json
deleted file mode 100644
index 0ffac35eb43a7568bb14a85010e538b094490b72..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train,dev/boston_train,dev.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": false,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train,dev",
-    "normalize_weights": false
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train,dev/boston_train,dev_normalize-D.json b/experiments/boston/stage3/train,dev/boston_train,dev_normalize-D.json
deleted file mode 100644
index d7f1c2e8427278615e76b7dc734c8936bef6fe57..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train,dev/boston_train,dev_normalize-D.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": true,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train,dev",
-    "normalize_weights": false
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train,dev/boston_train,dev_normalize-D_weights-normalization.json b/experiments/boston/stage3/train,dev/boston_train,dev_normalize-D_weights-normalization.json
deleted file mode 100644
index 824133af36f3c226799c3d5d025f3cfab9fbd421..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train,dev/boston_train,dev_normalize-D_weights-normalization.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": true,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train,dev",
-    "normalize_weights": true
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train,dev/boston_train,dev_weights-normalization.json b/experiments/boston/stage3/train,dev/boston_train,dev_weights-normalization.json
deleted file mode 100644
index 45e91739f838f6c1dbcc94e6dd5da136eca08f1d..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train,dev/boston_train,dev_weights-normalization.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": false,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train,dev",
-    "normalize_weights": true
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train,train+dev/boston_train,train+dev.json b/experiments/boston/stage3/train,train+dev/boston_train,train+dev.json
deleted file mode 100644
index 4da1e6d4b9b10d620b23adee3a6b1719078da01d..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train,train+dev/boston_train,train+dev.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": false,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train,train+dev",
-    "normalize_weights": false
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train,train+dev/boston_train,train+dev_normalize-D.json b/experiments/boston/stage3/train,train+dev/boston_train,train+dev_normalize-D.json
deleted file mode 100644
index ccc9befa778ccac3eb5d9efeebaa3fb8f1624c61..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train,train+dev/boston_train,train+dev_normalize-D.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": true,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train,train+dev",
-    "normalize_weights": false
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train,train+dev/boston_train,train+dev_normalize-D_weights-normalization.json b/experiments/boston/stage3/train,train+dev/boston_train,train+dev_normalize-D_weights-normalization.json
deleted file mode 100644
index 93c0082c477841a765b3feb4bda6d4529ee14dcc..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train,train+dev/boston_train,train+dev_normalize-D_weights-normalization.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": true,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train,train+dev",
-    "normalize_weights": true
-}
\ No newline at end of file
diff --git a/experiments/boston/stage3/train,train+dev/boston_train,train+dev_weights-normalization.json b/experiments/boston/stage3/train,train+dev/boston_train,train+dev_weights-normalization.json
deleted file mode 100644
index ed3bf0823c1d2c7b6da82f9554b492820ca9c638..0000000000000000000000000000000000000000
--- a/experiments/boston/stage3/train,train+dev/boston_train,train+dev_weights-normalization.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dataset_name": "boston",
-    "normalize_D": false,
-    "dataset_normalizer": "standard",
-    "forest_size": 100,
-    "extracted_forest_size": [
-        10,
-        20,
-        30
-    ],
-    "models_dir": ".\\models",
-    "dev_size": 0.2,
-    "test_size": 0.2,
-    "random_seed_number": 3,
-    "seeds": null,
-    "subsets_used": "train,train+dev",
-    "normalize_weights": true
-}
\ No newline at end of file