Select Git revision
-
Franck Dary authoredFranck Dary authored
Adaboost.py 3.10 KiB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint
import numpy as np
from .. import Metrics
from ..utils.HyperParameterSearch import genHeatMaps
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def canProbas():
return True
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
num_estimators = int(kwargs['0'])
base_estimators = DecisionTreeClassifier() # kwargs['1']
classifier = AdaBoostClassifier(n_estimators=num_estimators, base_estimator=base_estimators,
random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
def paramsToSet(nIter, randomState):
paramsSet = []
for _ in range(nIter):
paramsSet.append([randomState.randint(1, 15), DecisionTreeClassifier()])
return paramsSet
def getKWARGS(kwargsList):
kwargsDict = {}
for (kwargName, kwargValue) in kwargsList:
if kwargName == "CL_Adaboost_n_est":
kwargsDict['0'] = int(kwargValue)
elif kwargName == "CL_Adaboost_b_est":
kwargsDict['1'] = kwargValue
return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, metric=["accuracy_score", None], nIter=30,
nbCores=1):
pipeline = Pipeline([('classifier', AdaBoostClassifier())])
param = {"classifier__n_estimators": randint(1, 150),
"classifier__base_estimator": [DecisionTreeClassifier()]}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=param, refit=True, n_jobs=nbCores,
scoring=scorer, cv=KFolds, random_state=randomState)
detector = grid.fit(X_train, y_train)
desc_estimators = [detector.best_params_["classifier__n_estimators"],
detector.best_params_["classifier__base_estimator"]]
scoresArray = detector.cv_results_['mean_test_score']
params = [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
genHeatMaps(params, scoresArray, outputFileName)
return desc_estimators
def getConfig(config):
if type(config) not in [list, dict]:
return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str(
config.base_estimator)
else:
try:
return "\n\t\t- Adaboost with num_esimators : " + str(config[0]) + ", base_estimators : " + str(config[1])
except:
return "\n\t\t- Adaboost with num_esimators : " + str(config["0"]) + ", base_estimators : " + str(
config["1"])