Commit 69f71671 authored by Léo Bouscarrat's avatar Léo Bouscarrat
Browse files

Merge branch '17-adding-new-datasets' of...

Merge branch '17-adding-new-datasets' of https://gitlab.lis-lab.fr/luc.giffon/bolsonaro into 17-adding-new-datasets
parents af068a00 1db36b5d
from bolsonaro.utils import tqdm_joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator
from sklearn.cluster import KMeans
from abc import abstractmethod, ABCMeta
import numpy as np
from scipy.stats import mode
from joblib import Parallel, delayed
from tqdm import tqdm
class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
"""
On extreme pruning of random forest ensembles for ral-time predictive applications', by Khaled Fawagreh, Mohamed Medhat Gaber and Eyad Elyan.
"""
def __init__(self, models_parameters, score_metric=mean_squared_error):
self._models_parameters = models_parameters
self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters,
random_state=self._models_parameters.seed, n_jobs=-1)
self._extracted_forest_size = self._models_parameters.extracted_forest_size
self._score_metric = score_metric
@property
def models_parameters(self):
return self._models_parameters
def fit(self, X_train, y_train, X_val, y_val):
self._estimator.fit(X_train, y_train)
predictions = list()
for tree in self._estimator.estimators_:
predictions.append(tree.predict(X_train))
predictions = np.array(predictions)
kmeans = KMeans(n_clusters=self._extracted_forest_size, random_state=self._models_parameters.seed).fit(predictions)
labels = np.array(kmeans.labels_)
# For each cluster select the best tree on the validation set
extracted_forest_sizes = list(range(self._extracted_forest_size))
with tqdm_joblib(tqdm(total=self._extracted_forest_size, disable=True)) as prune_forest_job_pb:
pruned_forest = Parallel(n_jobs=-1)(delayed(self._prune_forest_job)(prune_forest_job_pb,
extracted_forest_sizes[i], labels, X_val, y_val, self._score_metric)
for i in range(self._extracted_forest_size))
self._estimator.estimators_ = pruned_forest
def _prune_forest_job(self, prune_forest_job_pb, c, labels, X_val, y_val, score_metric):
index = np.where(labels == c)[0]
with tqdm_joblib(tqdm(total=len(index), disable=True)) as cluster_job_pb:
cluster = Parallel(n_jobs=-1)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val,
y_val, score_metric) for i in range(len(index)))
best_tree_index = np.argmax(cluster)
prune_forest_job_pb.update()
return self._estimator.estimators_[index[best_tree_index]]
def _cluster_job(self, cluster_job_pb, i, X_val, y_val, score_metric):
y_val_pred = self._estimator.estimators_[i].predict(X_val)
tree_pred = score_metric(y_val, y_val_pred)
cluster_job_pb.update()
return tree_pred
def predict(self, X):
return self._estimator.predict(X)
def score(self, X, y):
predictions = list()
for tree in self._estimator.estimators_:
predictions.append(tree.predict(X))
predictions = np.array(predictions)
mean_predictions = np.mean(predictions, axis=0)
score = self._score_metric(mean_predictions, y)
return score
def predict_base_estimator(self, X):
return self._estimator.predict(X)
...@@ -2,6 +2,7 @@ from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, Om ...@@ -2,6 +2,7 @@ from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, Om
from bolsonaro.models.omp_forest_regressor import OmpForestRegressor from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.model_parameters import ModelParameters from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.models.kmeans_forest_regressor import KMeansForestRegressor
from bolsonaro.data.task import Task from bolsonaro.data.task import Task
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
...@@ -22,9 +23,11 @@ class ModelFactory(object): ...@@ -22,9 +23,11 @@ class ModelFactory(object):
elif model_parameters.extraction_strategy == 'random': elif model_parameters.extraction_strategy == 'random':
return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size, return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
random_state=model_parameters.seed) random_state=model_parameters.seed)
else: elif model_parameters.extraction_strategy == 'none':
return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'], return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
random_state=model_parameters.seed) random_state=model_parameters.seed)
else:
raise ValueError('Invalid extraction strategy')
elif task == Task.REGRESSION: elif task == Task.REGRESSION:
if model_parameters.extraction_strategy == 'omp': if model_parameters.extraction_strategy == 'omp':
return OmpForestRegressor(model_parameters) return OmpForestRegressor(model_parameters)
...@@ -33,15 +36,21 @@ class ModelFactory(object): ...@@ -33,15 +36,21 @@ class ModelFactory(object):
random_state=model_parameters.seed) random_state=model_parameters.seed)
elif model_parameters.extraction_strategy == 'similarity': elif model_parameters.extraction_strategy == 'similarity':
return SimilarityForestRegressor(model_parameters) return SimilarityForestRegressor(model_parameters)
else: elif model_parameters.extraction_strategy == 'kmeans':
return KMeansForestRegressor(model_parameters)
elif model_parameters.extraction_strategy == 'none':
return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'], return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'],
random_state=model_parameters.seed) random_state=model_parameters.seed)
else:
raise ValueError('Invalid extraction strategy')
elif task == Task.MULTICLASSIFICATION: elif task == Task.MULTICLASSIFICATION:
if model_parameters.extraction_strategy == 'omp': if model_parameters.extraction_strategy == 'omp':
return OmpForestMulticlassClassifier(model_parameters) return OmpForestMulticlassClassifier(model_parameters)
elif model_parameters.extraction_strategy == 'random': elif model_parameters.extraction_strategy == 'random':
return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size, return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
random_state=model_parameters.seed) random_state=model_parameters.seed)
else: elif model_parameters.extraction_strategy == 'none':
return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'], return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
random_state=model_parameters.seed) random_state=model_parameters.seed)
else:
raise ValueError('Invalid extraction strategy')
...@@ -3,6 +3,7 @@ from sklearn.metrics import mean_squared_error ...@@ -3,6 +3,7 @@ from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
from abc import abstractmethod, ABCMeta from abc import abstractmethod, ABCMeta
import numpy as np import numpy as np
from tqdm import tqdm
class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta): class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta):
...@@ -10,56 +11,69 @@ class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta): ...@@ -10,56 +11,69 @@ class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta):
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2822360/ https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2822360/
""" """
def __init__(self, models_parameters): def __init__(self, models_parameters, score_metric=mean_squared_error):
self._models_parameters = models_parameters self._models_parameters = models_parameters
self._regressor = RandomForestRegressor(n_estimators=self._models_parameters.hyperparameters['n_estimators'], self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters,
random_state=models_parameters.seed) random_state=self._models_parameters.seed, n_jobs=-1)
self._extracted_forest_size = self._models_parameters.extracted_forest_size self._extracted_forest_size = self._models_parameters.extracted_forest_size
self._score_metric = score_metric
@property @property
def models_parameters(self): def models_parameters(self):
return self._models_parameters return self._models_parameters
def fit(self, X_train, y_train, X_val, y_val, score_metric=mean_squared_error): def fit(self, X_train, y_train, X_val, y_val):
self._estimator.fit(X_train, y_train)
self._regressor.fit(X_train, y_train) y_val_pred = self._estimator.predict(X_val)
forest_pred = self._score_metric(y_val, y_val_pred)
y_val_pred = self._regressor.predict(X_val) forest = self._estimator.estimators_
forest_pred = score_metric(y_val, y_val_pred)
forest = self._regressor.estimators_
selected_trees = list() selected_trees = list()
tree_list = list(self._regressor.estimators_) tree_list = list(self._estimator.estimators_)
val_scores = list()
with tqdm(tree_list) as tree_pred_bar:
tree_pred_bar.set_description('[Initial tree predictions]')
for tree in tree_pred_bar:
val_scores.append(tree.predict(X_val))
tree_pred_bar.update(1)
for _ in range(self._extracted_forest_size): with tqdm(range(self._extracted_forest_size), disable=True) as pruning_forest_bar:
best_similarity = 100000 pruning_forest_bar.set_description(f'[Pruning forest s={self._extracted_forest_size}]')
found_index = 0 for i in pruning_forest_bar:
for i in range(len(tree_list)): best_similarity = 100000
lonely_tree = tree_list[i] found_index = 0
del tree_list[i] with tqdm(range(len(tree_list)), disable=True) as tree_list_bar:
val_list = list() tree_list_bar.set_description(f'[Tree selection s={self._extracted_forest_size} #{i}]')
for tree in tree_list: for j in tree_list_bar:
val_pred = tree.predict(X_val) lonely_tree = tree_list[j]
val_list.append(val_pred) del tree_list[j]
val_list = np.array(val_list) val_mean = np.mean(np.asarray(val_scores), axis=0)
val_mean = np.mean(val_list, axis=0) val_score = self._score_metric(val_mean, y_val)
val_score = score_metric(val_mean, y_val) temp_similarity = abs(forest_pred - val_score)
temp_similarity = abs(forest_pred - val_score) if (temp_similarity < best_similarity):
if (temp_similarity < best_similarity): found_index = j
found_index = i best_similarity = temp_similarity
best_similarity = temp_similarity tree_list.insert(j, lonely_tree)
tree_list.insert(i, lonely_tree) val_scores.insert(j, lonely_tree.predict(X_val))
selected_trees.append(tree_list[found_index]) tree_list_bar.update(1)
del tree_list[found_index] selected_trees.append(tree_list[found_index])
del tree_list[found_index]
del val_scores[found_index]
pruning_forest_bar.update(1)
pruned_forest = list(set(forest) - set(selected_trees)) pruned_forest = list(set(forest) - set(selected_trees))
self._regressor.estimators_ = pruned_forest self._estimator.estimators_ = pruned_forest
def score(self, X, y): def score(self, X, y):
test_list = list() test_list = list()
for mod in self._regressor.estimators_: for mod in self._estimator.estimators_:
test_pred = mod.predict(X) test_pred = mod.predict(X)
test_list.append(test_pred) test_list.append(test_pred)
test_list = np.array(test_list) test_list = np.array(test_list)
test_mean = np.mean(test_list, axis=0) test_mean = np.mean(test_list, axis=0)
score = mean_squared_error(test_mean, y) score = self._score_metric(test_mean, y)
return score return score
def predict_base_estimator(self, X):
return self._estimator.predict(X)
from bolsonaro.models.model_raw_results import ModelRawResults from bolsonaro.models.model_raw_results import ModelRawResults
from bolsonaro.models.omp_forest_regressor import OmpForestRegressor from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.error_handling.logger_factory import LoggerFactory from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.data.task import Task from bolsonaro.data.task import Task
from . import LOG_PATH from . import LOG_PATH
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score from sklearn.metrics import mean_squared_error, accuracy_score
import time import time
import datetime import datetime
import numpy as np import numpy as np
class Trainer(object): class Trainer(object):
""" """
Class capable of fitting any model object to some prepared data then evaluate and save results through the `train` method. Class capable of fitting any model object to some prepared data then evaluate and save results through the `train` method.
""" """
def __init__(self, dataset, regression_score_metric=mean_squared_error, classification_score_metric=accuracy_score, def __init__(self, dataset, regression_score_metric=mean_squared_error, classification_score_metric=accuracy_score,
base_regression_score_metric=mean_squared_error, base_classification_score_metric=accuracy_score): base_regression_score_metric=mean_squared_error, base_classification_score_metric=accuracy_score):
""" """
:param dataset: Object with X_train, y_train, X_dev, y_dev, X_test and Y_test attributes :param dataset: Object with X_train, y_train, X_dev, y_dev, X_test and Y_test attributes
""" """
self._dataset = dataset self._dataset = dataset
self._logger = LoggerFactory.create(LOG_PATH, __name__) self._logger = LoggerFactory.create(LOG_PATH, __name__)
self._regression_score_metric = regression_score_metric self._regression_score_metric = regression_score_metric
self._classification_score_metric = classification_score_metric self._classification_score_metric = classification_score_metric
self._base_regression_score_metric = base_regression_score_metric self._base_regression_score_metric = base_regression_score_metric
self._base_classification_score_metric = base_classification_score_metric self._base_classification_score_metric = base_classification_score_metric
self._score_metric_name = regression_score_metric.__name__ if dataset.task == Task.REGRESSION \ self._score_metric_name = regression_score_metric.__name__ if dataset.task == Task.REGRESSION \
else classification_score_metric.__name__ else classification_score_metric.__name__
self._base_score_metric_name = base_regression_score_metric.__name__ if dataset.task == Task.REGRESSION \ self._base_score_metric_name = base_regression_score_metric.__name__ if dataset.task == Task.REGRESSION \
else base_classification_score_metric.__name__ else base_classification_score_metric.__name__
@property @property
def score_metric_name(self): def score_metric_name(self):
return self._score_metric_name return self._score_metric_name
@property @property
def base_score_metric_name(self): def base_score_metric_name(self):
return self._base_score_metric_name return self._base_score_metric_name
def init(self, model, subsets_used='train,dev'): def init(self, model, subsets_used='train,dev'):
if type(model) in [RandomForestRegressor, RandomForestClassifier]: if type(model) in [RandomForestRegressor, RandomForestClassifier]:
if subsets_used == 'train,dev': if subsets_used == 'train,dev':
self._X_forest = self._dataset.X_train self._X_forest = self._dataset.X_train
self._y_forest = self._dataset.y_train self._y_forest = self._dataset.y_train
else: else:
self._X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev]) self._X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
self._y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev]) self._y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
self._logger.debug('Fitting the forest on train subset') self._logger.debug('Fitting the forest on train subset')
elif model.models_parameters.subsets_used == 'train,dev': elif model.models_parameters.subsets_used == 'train,dev':
self._X_forest = self._dataset.X_train self._X_forest = self._dataset.X_train
self._y_forest = self._dataset.y_train self._y_forest = self._dataset.y_train
self._X_omp = self._dataset.X_dev self._X_omp = self._dataset.X_dev
self._y_omp = self._dataset.y_dev self._y_omp = self._dataset.y_dev
self._logger.debug('Fitting the forest on train subset and OMP on dev subset.') self._logger.debug('Fitting the forest on train subset and OMP on dev subset.')
elif model.models_parameters.subsets_used == 'train+dev,train+dev': elif model.models_parameters.subsets_used == 'train+dev,train+dev':
self._X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev]) self._X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
self._X_omp = self._X_forest self._X_omp = self._X_forest
self._y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev]) self._y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
self._y_omp = self._y_forest self._y_omp = self._y_forest
self._logger.debug('Fitting both the forest and OMP on train+dev subsets.') self._logger.debug('Fitting both the forest and OMP on train+dev subsets.')
elif model.models_parameters.subsets_used == 'train,train+dev': elif model.models_parameters.subsets_used == 'train,train+dev':
self._X_forest = self._dataset.X_train self._X_forest = self._dataset.X_train
self._y_forest = self._dataset.y_train self._y_forest = self._dataset.y_train
self._X_omp = np.concatenate([self._dataset.X_train, self._dataset.X_dev]) self._X_omp = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
self._y_omp = np.concatenate([self._dataset.y_train, self._dataset.y_dev]) self._y_omp = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
else: else:
raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used)) raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
def train(self, model): def train(self, model):
""" """
:param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor, :param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor,
OmpForestBinaryClassifier, OmpForestMulticlassClassifier. OmpForestBinaryClassifier, OmpForestMulticlassClassifier.
:return: :return:
""" """
self._logger.debug('Training model using train set...') self._logger.debug('Training model using train set...')
self._begin_time = time.time() self._begin_time = time.time()
if type(model) in [RandomForestRegressor, RandomForestClassifier]: if type(model) in [RandomForestRegressor, RandomForestClassifier]:
model.fit( model.fit(
X=self._X_forest, X=self._X_forest,
y=self._y_forest y=self._y_forest
) )
else: else:
model.fit( model.fit(
self._X_forest, self._X_forest,
self._y_forest, self._y_forest,
self._X_omp, self._X_omp,
self._y_omp self._y_omp
) )
self._end_time = time.time() self._end_time = time.time()
def __score_func(self, model, X, y_true, weights=True): def __score_func(self, model, X, y_true, weights=True):
if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]: if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]:
if weights: if weights:
y_pred = model.predict(X) y_pred = model.predict(X)
else: else:
y_pred = model.predict_no_weights(X) y_pred = model.predict_no_weights(X)
result = self._regression_score_metric(y_true, y_pred) result = self._regression_score_metric(y_true, y_pred)
elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]: elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]:
if weights: if weights:
y_pred = model.predict(X) y_pred = model.predict(X)
else: else:
y_pred = model.predict_no_weights(X) y_pred = model.predict_no_weights(X)
if type(model) is OmpForestBinaryClassifier: if type(model) is OmpForestBinaryClassifier:
y_pred = np.sign(y_pred) y_pred = np.sign(y_pred)
y_pred = np.where(y_pred==0, 1, y_pred) y_pred = np.where(y_pred==0, 1, y_pred)
result = self._classification_score_metric(y_true, y_pred) result = self._classification_score_metric(y_true, y_pred)
return result return result
def __score_func_base(self, model, X, y_true): def __score_func_base(self, model, X, y_true):
if type(model) == OmpForestRegressor: if type(model) == OmpForestRegressor:
y_pred = model.predict_base_estimator(X) y_pred = model.predict_base_estimator(X)
result = self._base_regression_score_metric(y_true, y_pred) result = self._base_regression_score_metric(y_true, y_pred)
elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier]: elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier]:
y_pred = model.predict_base_estimator(X) y_pred = model.predict_base_estimator(X)
result = self._base_classification_score_metric(y_true, y_pred) result = self._base_classification_score_metric(y_true, y_pred)
elif type(model) == RandomForestClassifier: elif type(model) == RandomForestClassifier:
y_pred = model.predict(X) y_pred = model.predict(X)
result = self._base_classification_score_metric(y_true, y_pred) result = self._base_classification_score_metric(y_true, y_pred)
elif type(model) in [RandomForestRegressor, SimilarityForestRegressor]: elif type(model) in [RandomForestRegressor, SimilarityForestRegressor]:
y_pred = model.predict(X) y_pred = model.predict(X)
result = self._base_regression_score_metric(y_true, y_pred) result = self._base_regression_score_metric(y_true, y_pred)
return result return result
def compute_results(self, model, models_dir): def compute_results(self, model, models_dir):
""" """
:param model: Object with :param model: Object with
:param models_dir: Where the results will be saved :param models_dir: Where the results will be saved
""" """
model_weights = '' model_weights = ''
if type(model) in [OmpForestRegressor, OmpForestBinaryClassifier]: if type(model) in [OmpForestRegressor, OmpForestBinaryClassifier]:
model_weights = model._omp.coef_ model_weights = model._omp.coef_
elif type(model) == OmpForestMulticlassClassifier: elif type(model) == OmpForestMulticlassClassifier:
model_weights = model._dct_class_omp model_weights = model._dct_class_omp
elif type(model) == OmpForestBinaryClassifier: elif type(model) == OmpForestBinaryClassifier:
model_weights = model._omp model_weights = model._omp
results = ModelRawResults( results = ModelRawResults(
model_weights=model_weights, model_weights=model_weights,
training_time=self._end_time - self._begin_time, training_time=self._end_time - self._begin_time,
datetime=datetime.datetime.now(), datetime=datetime.datetime.now(),
train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train), train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train),
dev_score=self.__score_func(model, self._dataset.X_dev, self._dataset.y_dev), dev_score=self.__score_func(model, self._dataset.X_dev, self._dataset.y_dev),
test_score=self.__score_func(model, self._dataset.X_test, self._dataset.y_test), test_score=self.__score_func(model, self._dataset.X_test, self._dataset.y_test),
train_score_base=self.__score_func_base(model, self._dataset.X_train, self._dataset.y_train), train_score_base=self.__score_func_base(model, self._dataset.X_train, self._dataset.y_train),
dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev), dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev),
test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test), test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test),
score_metric=self._score_metric_name, score_metric=self._score_metric_name,
base_score_metric=self._base_score_metric_name base_score_metric=self._base_score_metric_name
) )
results.save(models_dir) results.save(models_dir)
self._logger.info("Base performance on test: {}".format(results.test_score_base)) self._logger.info("Base performance on test: {}".format(results.test_score_base))
self._logger.info("Performance on test: {}".format(results.test_score)) self._logger.info("Performance on test: {}".format(results.test_score))
self._logger.info("Base performance on train: {}".format(results.train_score_base)) self._logger.info("Base performance on train: {}".format(results.train_score_base))
self._logger.info("Performance on train: {}".format(results.train_score)) self._logger.info("Performance on train: {}".format(results.train_score))
self._logger.info("Base performance on dev: {}".format(results.dev_score_base)) self._logger.info("Base performance on dev: {}".format(results.dev_score_base))
self._logger.info("Performance on dev: {}".format(results.dev_score)) self._logger.info("Performance on dev: {}".format(results.dev_score))
if type(model) not in [RandomForestRegressor, RandomForestClassifier]: if type(model) not in [RandomForestRegressor, RandomForestClassifier