Skip to content
Snippets Groups Projects
Commit a77543da authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Modified successfully adaboost for monoview classification

parent 887224c4
Branches
Tags
No related merge requests found
......@@ -166,7 +166,7 @@ def initMonoviewKWARGS(args, classifiersNames):
Returns
-------
monoviewKWARGS : Dictionary of dictionaries
Dictionary resuming all the specific arguments for the benchmark, oe dictionary for each classifier.
Dictionary resuming all the specific arguments for the benchmark, one dictionary for each classifier.
For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`"""
......@@ -178,7 +178,7 @@ def initMonoviewKWARGS(args, classifiersNames):
except AttributeError:
raise AttributeError(classifiersName+" is not implemented in MonoviewClassifiers, "
"please specify the name of the file in MonoviewClassifiers")
monoviewKWARGS[classifiersName + "KWARGSInit"] = classifierModule.getKWARGS(args)
monoviewKWARGS[classifiersName + "KWARGSInit"] = classifierModule.formatCmdArgs(args)
logging.debug("Done:\t Initializing Monoview classifiers arguments")
return monoviewKWARGS
......
......@@ -73,19 +73,20 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
logging.debug("Done:\t Generate classifier args")
logging.debug("Start:\t Training")
cl_res = classifierModule.fit(X_train, y_train, randomState, NB_CORES=nbCores, **clKWARGS)
classifier = getattr(classifierModule, CL_type)(randomState, **clKWARGS)
classifier.fit(X_train, y_train) # NB_CORES=nbCores,
logging.debug("Done:\t Training")
logging.debug("Start:\t Predicting")
y_train_pred = cl_res.predict(X_train)
y_test_pred = cl_res.predict(X_test)
y_train_pred = classifier.predict(X_train)
y_test_pred = classifier.predict(X_test)
full_labels_pred = np.zeros(Y.shape, dtype=int)-100
for trainIndex, index in enumerate(classificationIndices[0]):
full_labels_pred[index] = y_train_pred[trainIndex]
for testIndex, index in enumerate(classificationIndices[1]):
full_labels_pred[index] = y_test_pred[testIndex]
if X_test_multiclass != []:
y_test_multiclass_pred = cl_res.predict(X_test_multiclass)
y_test_multiclass_pred = classifier.predict(X_test_multiclass)
else:
y_test_multiclass_pred = []
logging.debug("Done:\t Predicting")
......@@ -100,7 +101,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
hyperParamSearch, metrics, nIter, feat, CL_type,
clKWARGS, labelsNames, X.shape,
y_train, y_train_pred, y_test, y_test_pred, t_end,
randomState, cl_res, outputFileName)
randomState, classifier, outputFileName)
cl_desc = [value for key, value in sorted(clKWARGS.items())]
logging.debug("Done:\t Getting Results")
......@@ -158,7 +159,7 @@ def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train,
logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type)
classifierHPSearch = getattr(MonoviewUtils, hyperParamSearch)
clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, randomState,
outputFileName, classifierModule,
outputFileName, classifierModule, CL_type,
KFolds=KFolds, nbCores=nbCores,
metric=metrics[0], nIter=nIter)
logging.debug("Done:\t " + hyperParamSearch + " best settings")
......
from sklearn.model_selection import RandomizedSearchCV
import numpy as np
from scipy.stats import uniform, randint
from sklearn.pipeline import Pipeline
import numpy as np
from .. import Metrics
from ..utils import HyperParameterSearch
......@@ -11,11 +12,11 @@ __status__ = "Prototype" # Production, Development, Prototype
# __date__ = 2016 - 03 - 25
def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierModule, KFolds = 4, nbCores = 1,
def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierModule, CL_type, KFolds = 4, nbCores = 1,
metric = ["accuracy_score", None], nIter = 30):
pipeline = classifierModule.genPipeline()
params_dict = classifierModule.genParamsDict(randomState)
estimator = getattr(classifierModule, CL_type)(randomState)
params_dict = estimator.genDistribs()
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
......@@ -23,15 +24,15 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
randomSearch = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=params_dict, refit=True,
randomSearch = RandomizedSearchCV(estimator, n_iter=nIter, param_distributions=params_dict, refit=True,
n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState)
detector = randomSearch.fit(X_train, y_train)
bestParams = classifierModule.genBestParams(detector)
# desc_params = {"C": SVMPoly_detector.best_params_["classifier__C"], "degree": SVMPoly_detector.best_params_["classifier__degree"]}
bestParams = estimator.genBestParams(detector)
scoresArray = detector.cv_results_['mean_test_score']
params = classifierModule.genParamsFromDetector(detector)
# params = [("c", np.array(SVMPoly_detector.cv_results_['param_classifier__C'])), ("degree", np.array(SVMPoly_detector.cv_results_['param_classifier__degree']))]
params = estimator.genParamsFromDetector(detector)
HyperParameterSearch.genHeatMaps(params, scoresArray, outputFileName)
testFoldsPreds = genTestFoldsPreds(X_train, y_train, KFolds, detector.best_estimator_)
......@@ -53,7 +54,10 @@ def genTestFoldsPreds(X_train, y_train, KFolds, estimator):
class CustomRandint:
def __init__(self, low=0, high=0, multiplier="e-"):
"""Used as a distribution returning a integer between low and high-1.
It can be used with a multiplier agrument to be able to perform more complex generation
for example 10 e -(randint)"""
def __init__(self, low=0, high=0, multiplier=""):
self.randint = randint(low, high)
self.multiplier = multiplier
......@@ -61,10 +65,15 @@ class CustomRandint:
randinteger = self.randint.rvs(random_state=random_state)
if self.multiplier == "e-":
return 10 ** -randinteger
else:
return randinteger
class CustomUniform:
def __init__(self, loc=0, state=1, multiplier="e-"):
"""Used as a distribution returning a float between loc and loc + scale..
It can be used with a multiplier agrument to be able to perform more complex generation
for example 10 e -(float)"""
def __init__(self, loc=0, state=1, multiplier=""):
self.uniform = uniform(loc, state)
self.multiplier = multiplier
......@@ -72,6 +81,43 @@ class CustomUniform:
unif = self.uniform.rvs(random_state=random_state)
if self.multiplier == 'e-':
return 10 ** -unif
else:
return unif
class BaseMonoviewClassifier(object):
def genBestParams(self, detector):
return dict((param_name, detector.best_params_[param_name]) for param_name in self.param_names)
def genParamsFromDetector(self, detector):
if self.classed_params is not None:
classed_dict = dict((classed_param, get_names(detector.cv_results_["param_"+classed_param]))
for classed_param in self.classed_params)
return [(param_name, np.array(detector.cv_results_["param_"+param_name]))
if param_name not in self.classed_params else (param_name, classed_dict[param_name])
for param_name in self.param_names]
def genDistribs(self):
return dict((param_name, distrib) for param_name, distrib in zip(self.param_names, self.distribs))
def getConfig(self):
return "\n\t\t- "+self.__class__.__name__+ "with "+ ", ".join([ param_name+" : " + self.to_str(param_name) for param_name in self.param_names])
def to_str(self, param_name):
if param_name in self.weird_strings:
if self.weird_strings[param_name] == "class_name":
return self.get_params()[param_name].__class__.__name__
else:
return self.weird_strings[param_name](self.get_params()[param_name])
else:
return str(self.get_params()[param_name])
def get_names(classed_list):
return np.array([object_.__class__.__name__ for object_ in classed_list])
# def isUseful(labelSupports, index, CLASS_LABELS, labelDict):
......
......@@ -16,15 +16,14 @@ def getDBConfigString(name, feat, classificationIndices, shape, classLabelsNames
return dbConfigString
def getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS, classifier, directory):
classifierModule = getattr(MonoviewClassifiers, CL_type)
def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory):
classifierConfigString = "Classifier configuration : \n"
classifierConfigString += "\t- " + classifierModule.getConfig(clKWARGS)[5:] + "\n"
classifierConfigString += "\t- " + classifier.getConfig(clKWARGS)[5:] + "\n"
classifierConfigString += "\t- Executed on " + str(nbCores) + " core(s) \n"
if gridSearch:
classifierConfigString += "\t- Got configuration using randomized search with " + str(nIter) + " iterations \n"
classifierConfigString += "\n\n"
classifierInterpretString = classifierModule.getInterpret(classifier, directory)
classifierInterpretString = classifier.getInterpret(classifier, directory)
return classifierConfigString, classifierInterpretString
......@@ -53,7 +52,7 @@ def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, fea
stringAnalysis += metrics[0][0] + " on train : " + str(trainScore) + "\n" + metrics[0][0] + " on test : " + str(
testScore) + "\n\n"
stringAnalysis += getDBConfigString(name, feat, learningRate, shape, classLabelsNames, KFolds)
classifierConfigString, classifierIntepretString = getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS, classifier, directory)
classifierConfigString, classifierIntepretString = getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory)
stringAnalysis += classifierConfigString
for metric in metrics:
metricString, metricScore = getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred)
......
from sklearn.ensemble import AdaBoostClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint
import numpy as np
# import cPickle
# import matplotlib.pyplot as plt
# from matplotlib.ticker import FuncFormatter
# from .. import Metrics
# from ..utils.HyperParameterSearch import genHeatMaps
from ..utils.Interpret import getFeatureImportance
# from ..Monoview.MonoviewUtils import randomizedSearch
from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
class Adaboost(AdaBoostClassifier):
def __init__(self, random_state, **kwargs):
super(AdaBoostClassifier, self).__init__(
n_estimators=kwargs['n_estimators'],
base_estimator=kwargs['base_estimator'],
random_state=random_state)
class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
def __init__(self, random_state, n_estimators=10,
base_estimator=DecisionTreeClassifier(), **kwargs):
super(Adaboost, self).__init__(
random_state=random_state,
n_estimators=n_estimators,
base_estimator=base_estimator,
)
self.param_names = ["n_estimators", "base_estimator"]
self.classed_params = ["base_estimator"]
self.distribs = [CustomRandint(low=1, high=500), [DecisionTreeClassifier()]]
self.weird_strings = {"base_estimator":"class_name"}
def canProbas(self):
"""Used to know if the classifier can return label probabilities"""
......@@ -37,28 +35,17 @@ class Adaboost(AdaBoostClassifier):
"base_estimator": DecisionTreeClassifier()})
return paramsSet
def getKWARGS(self, args):
"""Used to format kwargs for the parsed args"""
kwargsDict = {}
kwargsDict['n_estimators'] = args.Ada_n_est
kwargsDict['base_estimator'] = DecisionTreeClassifier() # args.Ada_b_est
return kwargsDict
def genPipeline(self):
return Pipeline([('classifier', AdaBoostClassifier())])
def genParamsDict(self, randomState):
return {"classifier__n_estimators": np.arange(150) + 1,
"classifier__base_estimator": [DecisionTreeClassifier()]}
# def genPipeline(self):
# return Pipeline([('classifier', AdaBoostClassifier())])
def genBestParams(self, detector):
return {"n_estimators": detector.best_params_["classifier__n_estimators"],
"base_estimator": detector.best_params_["classifier__base_estimator"]}
# def genDistribs(self,):
# return {"classifier__n_estimators": CustomRandint(low=1, high=500),
# "classifier__base_estimator": [DecisionTreeClassifier()]}
def genParamsFromDetector(self, detector):
nIter = len(detector.cv_results_['param_classifier__n_estimators'])
return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
# def genParamsFromDetector(self, detector):
# nIter = len(detector.cv_results_['param_classifier__n_estimators'])
# return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
# ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
def getConfig(self, config):
if type(config) is not dict: # Used in late fusion when config is a classifier
......@@ -68,71 +55,75 @@ class Adaboost(AdaBoostClassifier):
return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str(
config["base_estimator"])
def getInterpret(self, classifier, directory):
interpretString = getFeatureImportance(classifier, directory)
return interpretString
def canProbas():
return True
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
"""Used to fit the monoview classifier with the args stored in kwargs"""
classifier = AdaBoostClassifier(n_estimators=kwargs['n_estimators'],
base_estimator=kwargs['base_estimator'],
random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
def paramsToSet(nIter, randomState):
"""Used for weighted linear early fusion to generate random search sets"""
paramsSet = []
for _ in range(nIter):
paramsSet.append({"n_estimators": randomState.randint(1, 15),
"base_estimator": DecisionTreeClassifier()})
return paramsSet
def getKWARGS(args):
def formatCmdArgs(args):
"""Used to format kwargs for the parsed args"""
kwargsDict = {}
kwargsDict['n_estimators'] = args.Ada_n_est
kwargsDict['base_estimator'] = DecisionTreeClassifier() #args.Ada_b_est
kwargsDict = {'n_estimators': args.Ada_n_est,
'base_estimator': DecisionTreeClassifier()}
return kwargsDict
def genPipeline():
return Pipeline([('classifier', AdaBoostClassifier())])
def genParamsDict(randomState):
return {"classifier__n_estimators": np.arange(150)+1,
"classifier__base_estimator": [DecisionTreeClassifier()]}
def genBestParams(detector):
return {"n_estimators": detector.best_params_["classifier__n_estimators"],
"base_estimator": detector.best_params_["classifier__base_estimator"]}
def genParamsFromDetector(detector):
nIter = len(detector.cv_results_['param_classifier__n_estimators'])
return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
def getConfig(config):
if type(config) is not dict: # Used in late fusion when config is a classifier
return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str(
config.base_estimator)
else:
return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str(
config["base_estimator"])
def getInterpret(classifier, directory):
interpretString = getFeatureImportance(classifier, directory)
return interpretString
\ No newline at end of file
# def canProbas():
# return True
#
#
# def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
# """Used to fit the monoview classifier with the args stored in kwargs"""
# classifier = AdaBoostClassifier(n_estimators=kwargs['n_estimators'],
# base_estimator=kwargs['base_estimator'],
# random_state=randomState)
# classifier.fit(DATASET, CLASS_LABELS)
# return classifier
#
#
# def paramsToSet(nIter, randomState):
# """Used for weighted linear early fusion to generate random search sets"""
# paramsSet = []
# for _ in range(nIter):
# paramsSet.append({"n_estimators": randomState.randint(1, 15),
# "base_estimator": DecisionTreeClassifier()})
# return paramsSet
#
#
# def getKWARGS(args):
# """Used to format kwargs for the parsed args"""
# kwargsDict = {}
# kwargsDict['n_estimators'] = args.Ada_n_est
# kwargsDict['base_estimator'] = DecisionTreeClassifier() #args.Ada_b_est
# return kwargsDict
#
#
# def genPipeline():
# return Pipeline([('classifier', AdaBoostClassifier())])
#
#
# def genParamsDict(randomState):
# return {"classifier__n_estimators": np.arange(150)+1,
# "classifier__base_estimator": [DecisionTreeClassifier()]}
#
#
# def genBestParams(detector):
# return {"n_estimators": detector.best_params_["classifier__n_estimators"],
# "base_estimator": detector.best_params_["classifier__base_estimator"]}
#
#
# def genParamsFromDetector(detector):
# nIter = len(detector.cv_results_['param_classifier__n_estimators'])
# return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
# ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
#
#
# def getConfig(config):
# if type(config) is not dict: # Used in late fusion when config is a classifier
# return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str(
# config.base_estimator)
# else:
# return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str(
# config["base_estimator"])
#
#
# def getInterpret(classifier, directory):
# interpretString = getFeatureImportance(classifier, directory)
# return interpretString
\ No newline at end of file
from multiview_platform.MonoMultiViewClassifiers.Multiview.Additions import diversity_utils
from ...Multiview.Additions import diversity_utils
from ..DifficultyFusion.DifficultyFusionModule import difficulty
from ..DoubleFaultFusion.DoubleFaultFusionModule import doubleFault
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment