diff --git a/code/bolsonaro/models/model_factory.py b/code/bolsonaro/models/model_factory.py
index 262d2560054ba4177852d883cafd48eaccbe475d..74993cc0a30b754595a490de40d69e064687bc24 100644
--- a/code/bolsonaro/models/model_factory.py
+++ b/code/bolsonaro/models/model_factory.py
@@ -1,7 +1,8 @@
 from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
 from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
-from bolsonaro.data.task import Task
 from bolsonaro.models.model_parameters import ModelParameters
+from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
+from bolsonaro.data.task import Task
 
 from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 import os
@@ -30,6 +31,8 @@ class ModelFactory(object):
             elif model_parameters.extraction_strategy == 'random':
                 return RandomForestRegressor(n_estimators=model_parameters.extracted_forest_size,
                     random_state=model_parameters.seed)
+            elif model_parameters.extraction_strategy == 'similarity':
+                return SimilarityForestRegressor(model_parameters)
             else:
                 return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'],
                     random_state=model_parameters.seed)
diff --git a/code/bolsonaro/models/similarity_forest_regressor.py b/code/bolsonaro/models/similarity_forest_regressor.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8d9c3ed349cf8c9e27acbcd7982694a65e11636
--- /dev/null
+++ b/code/bolsonaro/models/similarity_forest_regressor.py
@@ -0,0 +1,65 @@
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.metrics import mean_squared_error
+from sklearn.base import BaseEstimator
+from abc import abstractmethod, ABCMeta
+import numpy as np
+
+
+class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta):
+    """
+    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2822360/
+    """
+
+    def __init__(self, models_parameters):
+        self._models_parameters = models_parameters
+        self._regressor = RandomForestRegressor(n_estimators=self._models_parameters.hyperparameters['n_estimators'],
+            random_state=models_parameters.seed)
+        self._extracted_forest_size = self._models_parameters.extracted_forest_size
+
+    @property
+    def models_parameters(self):
+        return self._models_parameters
+
+    def fit(self, X_train, y_train, X_val, y_val, score_metric=mean_squared_error):
+
+        self._regressor.fit(X_train, y_train)
+
+        y_val_pred = self._regressor.predict(X_val)
+        forest_pred = score_metric(y_val, y_val_pred)
+        forest = self._regressor.estimators_
+        selected_trees = list()
+        tree_list = list(self._regressor.estimators_)
+
+        for _ in range(self._extracted_forest_size):
+            best_similarity = 100000
+            found_index = 0
+            for i in range(len(tree_list)):
+                lonely_tree = tree_list[i]
+                del tree_list[i]
+                val_list = list()
+                for tree in tree_list:
+                    val_pred = tree.predict(X_val)
+                    val_list.append(val_pred)
+                val_list = np.array(val_list)
+                val_mean = np.mean(val_list, axis=0)
+                val_score = score_metric(val_mean, y_val)
+                temp_similarity = abs(forest_pred - val_score)
+                if (temp_similarity < best_similarity):
+                    found_index = i
+                    best_similarity = temp_similarity
+                tree_list.insert(i, lonely_tree)
+            selected_trees.append(tree_list[found_index])
+            del tree_list[found_index]
+
+        pruned_forest = list(set(forest) - set(selected_trees))
+        self._regressor.estimators_ = pruned_forest
+
+    def score(self, X, y):
+        test_list = list()
+        for mod in self._regressor.estimators_:
+            test_pred = mod.predict(X)
+            test_list.append(test_pred)
+        test_list = np.array(test_list)
+        test_mean = np.mean(test_list, axis=0)
+        score = mean_squared_error(test_mean, y)
+        return score
diff --git a/code/bolsonaro/trainer.py b/code/bolsonaro/trainer.py
index 9fea5053f83a774026ac69c5ed7da47a6a36a296..ce233d56c5242166a852922fa5ef3c0ab4ac3f31 100644
--- a/code/bolsonaro/trainer.py
+++ b/code/bolsonaro/trainer.py
@@ -1,6 +1,7 @@
 from bolsonaro.models.model_raw_results import ModelRawResults
 from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
 from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
+from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
 from bolsonaro.error_handling.logger_factory import LoggerFactory
 from bolsonaro.data.task import Task
 from . import LOG_PATH
@@ -87,15 +88,15 @@ class Trainer(object):
             )
         else:
             model.fit(
-                X_forest=self._X_forest,
-                y_forest=self._y_forest,
-                X_omp=self._X_omp,
-                y_omp=self._y_omp
+                self._X_forest,
+                self._y_forest,
+                self._X_omp,
+                self._y_omp
             )
         self._end_time = time.time()
 
     def __score_func(self, model, X, y_true):
-        if type(model) in [OmpForestRegressor, RandomForestRegressor]:
+        if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]:
             y_pred = model.predict(X)
             result = self._regression_score_metric(y_true, y_pred)
         elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]:
@@ -115,7 +116,7 @@ class Trainer(object):
         elif type(model) == RandomForestClassifier:
             y_pred = model.predict(X)
             result = self._base_classification_score_metric(y_true, y_pred)
-        elif type(model) == RandomForestRegressor:
+        elif type(model) in [RandomForestRegressor, SimilarityForestRegressor]:
             y_pred = model.predict(X)
             result = self._base_regression_score_metric(y_true, y_pred)
         return result
diff --git a/code/train.py b/code/train.py
index 1131f2bf390f545385654ae59aea65a54e3f9977..e51514cc254ee564993243a676b05d07e3aa7597 100644
--- a/code/train.py
+++ b/code/train.py
@@ -163,7 +163,7 @@ if __name__ == "__main__":
     parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
     parser.add_argument('--save_experiment_configuration', nargs='+', default=None, help='Save the experiment parameters specified in the command line in a file. Args: {{stage_num}} {{name}}')
     parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
-    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random or none.')
+    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none or similarity.')
     args = parser.parse_args()
 
     if args.experiment_configuration:
@@ -173,7 +173,7 @@ if __name__ == "__main__":
     else:
         parameters = args.__dict__
 
-    if parameters['extraction_strategy'] not in ['omp', 'random', 'none']:
+    if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity']:
         raise ValueError('Specified extraction strategy {} is not supported.'.format(parameters.extraction_strategy))
 
     pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)
diff --git a/results/california_housing/stage4/losses_2.png b/results/california_housing/stage4/losses_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..5562fd0076c01cf38e93936a22d69c9e36c53fc5
Binary files /dev/null and b/results/california_housing/stage4/losses_2.png differ
diff --git a/results/california_housing/stage4_backup/losses_2.png b/results/california_housing/stage4_backup/losses_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..5562fd0076c01cf38e93936a22d69c9e36c53fc5
Binary files /dev/null and b/results/california_housing/stage4_backup/losses_2.png differ