diff --git a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py index 6395dc74d88ffe0ddf49335057a26fc91d244f6b..c90496989b1212f70150446729c2133a53c90145 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py +++ b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py @@ -166,7 +166,7 @@ def initMonoviewKWARGS(args, classifiersNames): Returns ------- monoviewKWARGS : Dictionary of dictionaries - Dictionary resuming all the specific arguments for the benchmark, oe dictionary for each classifier. + Dictionary resuming all the specific arguments for the benchmark, one dictionary for each classifier. For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`""" @@ -178,7 +178,7 @@ def initMonoviewKWARGS(args, classifiersNames): except AttributeError: raise AttributeError(classifiersName+" is not implemented in MonoviewClassifiers, " "please specify the name of the file in MonoviewClassifiers") - monoviewKWARGS[classifiersName + "KWARGSInit"] = classifierModule.getKWARGS(args) + monoviewKWARGS[classifiersName + "KWARGSInit"] = classifierModule.formatCmdArgs(args) logging.debug("Done:\t Initializing Monoview classifiers arguments") return monoviewKWARGS diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index e0d532dc0a563ed2050e90560f236bc78d8f6bab..47621800f7e62dfa2cfe2624f1b9026387dc7b91 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -73,19 +73,20 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.debug("Done:\t Generate classifier args") logging.debug("Start:\t Training") - cl_res = classifierModule.fit(X_train, y_train, randomState, NB_CORES=nbCores, **clKWARGS) + classifier = getattr(classifierModule, CL_type)(randomState, **clKWARGS) + classifier.fit(X_train, y_train) # NB_CORES=nbCores, logging.debug("Done:\t Training") logging.debug("Start:\t Predicting") - y_train_pred = cl_res.predict(X_train) - y_test_pred = cl_res.predict(X_test) + y_train_pred = classifier.predict(X_train) + y_test_pred = classifier.predict(X_test) full_labels_pred = np.zeros(Y.shape, dtype=int)-100 for trainIndex, index in enumerate(classificationIndices[0]): full_labels_pred[index] = y_train_pred[trainIndex] for testIndex, index in enumerate(classificationIndices[1]): full_labels_pred[index] = y_test_pred[testIndex] if X_test_multiclass != []: - y_test_multiclass_pred = cl_res.predict(X_test_multiclass) + y_test_multiclass_pred = classifier.predict(X_test_multiclass) else: y_test_multiclass_pred = [] logging.debug("Done:\t Predicting") @@ -100,7 +101,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol hyperParamSearch, metrics, nIter, feat, CL_type, clKWARGS, labelsNames, X.shape, y_train, y_train_pred, y_test, y_test_pred, t_end, - randomState, cl_res, outputFileName) + randomState, classifier, outputFileName) cl_desc = [value for key, value in sorted(clKWARGS.items())] logging.debug("Done:\t Getting Results") @@ -158,7 +159,7 @@ def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type) classifierHPSearch = getattr(MonoviewUtils, hyperParamSearch) clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, randomState, - outputFileName, classifierModule, + outputFileName, classifierModule, CL_type, KFolds=KFolds, nbCores=nbCores, metric=metrics[0], nIter=nIter) logging.debug("Done:\t " + hyperParamSearch + " best settings") diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index d2009c5798b39eea6451bd0d4fdabc63208cf748..e5d2049776658a4adef48bd2395bcd0855cf0b20 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -1,6 +1,7 @@ from sklearn.model_selection import RandomizedSearchCV -import numpy as np from scipy.stats import uniform, randint +from sklearn.pipeline import Pipeline +import numpy as np from .. import Metrics from ..utils import HyperParameterSearch @@ -11,11 +12,11 @@ __status__ = "Prototype" # Production, Development, Prototype # __date__ = 2016 - 03 - 25 -def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierModule, KFolds = 4, nbCores = 1, +def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierModule, CL_type, KFolds = 4, nbCores = 1, metric = ["accuracy_score", None], nIter = 30): - pipeline = classifierModule.genPipeline() - params_dict = classifierModule.genParamsDict(randomState) + estimator = getattr(classifierModule, CL_type)(randomState) + params_dict = estimator.genDistribs() metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: @@ -23,15 +24,15 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo else: metricKWARGS = {} scorer = metricModule.get_scorer(**metricKWARGS) - randomSearch = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=params_dict, refit=True, + + randomSearch = RandomizedSearchCV(estimator, n_iter=nIter, param_distributions=params_dict, refit=True, n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState) detector = randomSearch.fit(X_train, y_train) - bestParams = classifierModule.genBestParams(detector) - # desc_params = {"C": SVMPoly_detector.best_params_["classifier__C"], "degree": SVMPoly_detector.best_params_["classifier__degree"]} + + bestParams = estimator.genBestParams(detector) scoresArray = detector.cv_results_['mean_test_score'] - params = classifierModule.genParamsFromDetector(detector) - # params = [("c", np.array(SVMPoly_detector.cv_results_['param_classifier__C'])), ("degree", np.array(SVMPoly_detector.cv_results_['param_classifier__degree']))] + params = estimator.genParamsFromDetector(detector) HyperParameterSearch.genHeatMaps(params, scoresArray, outputFileName) testFoldsPreds = genTestFoldsPreds(X_train, y_train, KFolds, detector.best_estimator_) @@ -53,7 +54,10 @@ def genTestFoldsPreds(X_train, y_train, KFolds, estimator): class CustomRandint: - def __init__(self, low=0, high=0, multiplier="e-"): + """Used as a distribution returning a integer between low and high-1. + It can be used with a multiplier agrument to be able to perform more complex generation + for example 10 e -(randint)""" + def __init__(self, low=0, high=0, multiplier=""): self.randint = randint(low, high) self.multiplier = multiplier @@ -61,10 +65,15 @@ class CustomRandint: randinteger = self.randint.rvs(random_state=random_state) if self.multiplier == "e-": return 10 ** -randinteger + else: + return randinteger class CustomUniform: - def __init__(self, loc=0, state=1, multiplier="e-"): + """Used as a distribution returning a float between loc and loc + scale.. + It can be used with a multiplier agrument to be able to perform more complex generation + for example 10 e -(float)""" + def __init__(self, loc=0, state=1, multiplier=""): self.uniform = uniform(loc, state) self.multiplier = multiplier @@ -72,6 +81,43 @@ class CustomUniform: unif = self.uniform.rvs(random_state=random_state) if self.multiplier == 'e-': return 10 ** -unif + else: + return unif + + +class BaseMonoviewClassifier(object): + + def genBestParams(self, detector): + return dict((param_name, detector.best_params_[param_name]) for param_name in self.param_names) + + def genParamsFromDetector(self, detector): + if self.classed_params is not None: + classed_dict = dict((classed_param, get_names(detector.cv_results_["param_"+classed_param])) + for classed_param in self.classed_params) + return [(param_name, np.array(detector.cv_results_["param_"+param_name])) + if param_name not in self.classed_params else (param_name, classed_dict[param_name]) + for param_name in self.param_names] + + def genDistribs(self): + return dict((param_name, distrib) for param_name, distrib in zip(self.param_names, self.distribs)) + + def getConfig(self): + return "\n\t\t- "+self.__class__.__name__+ "with "+ ", ".join([ param_name+" : " + self.to_str(param_name) for param_name in self.param_names]) + + def to_str(self, param_name): + if param_name in self.weird_strings: + if self.weird_strings[param_name] == "class_name": + return self.get_params()[param_name].__class__.__name__ + else: + return self.weird_strings[param_name](self.get_params()[param_name]) + else: + return str(self.get_params()[param_name]) + + +def get_names(classed_list): + return np.array([object_.__class__.__name__ for object_ in classed_list]) + + # def isUseful(labelSupports, index, CLASS_LABELS, labelDict): diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py index 927301aa6d9625f841bca5df73ec1fb3c8482e13..d367589549d7b1571ebc46be4668d444c172a262 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py @@ -16,15 +16,14 @@ def getDBConfigString(name, feat, classificationIndices, shape, classLabelsNames return dbConfigString -def getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS, classifier, directory): - classifierModule = getattr(MonoviewClassifiers, CL_type) +def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory): classifierConfigString = "Classifier configuration : \n" - classifierConfigString += "\t- " + classifierModule.getConfig(clKWARGS)[5:] + "\n" + classifierConfigString += "\t- " + classifier.getConfig(clKWARGS)[5:] + "\n" classifierConfigString += "\t- Executed on " + str(nbCores) + " core(s) \n" if gridSearch: classifierConfigString += "\t- Got configuration using randomized search with " + str(nIter) + " iterations \n" classifierConfigString += "\n\n" - classifierInterpretString = classifierModule.getInterpret(classifier, directory) + classifierInterpretString = classifier.getInterpret(classifier, directory) return classifierConfigString, classifierInterpretString @@ -53,7 +52,7 @@ def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, fea stringAnalysis += metrics[0][0] + " on train : " + str(trainScore) + "\n" + metrics[0][0] + " on test : " + str( testScore) + "\n\n" stringAnalysis += getDBConfigString(name, feat, learningRate, shape, classLabelsNames, KFolds) - classifierConfigString, classifierIntepretString = getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS, classifier, directory) + classifierConfigString, classifierIntepretString = getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory) stringAnalysis += classifierConfigString for metric in metrics: metricString, metricScore = getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 50e61763e0cc3c7c5174509a23b2336eb498a07f..1808176c466095342ced5d92ec054c2e6fd4c265 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -1,29 +1,27 @@ from sklearn.ensemble import AdaBoostClassifier -from sklearn.pipeline import Pipeline -from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier -from scipy.stats import randint -import numpy as np -# import cPickle -# import matplotlib.pyplot as plt -# from matplotlib.ticker import FuncFormatter -# from .. import Metrics -# from ..utils.HyperParameterSearch import genHeatMaps from ..utils.Interpret import getFeatureImportance -# from ..Monoview.MonoviewUtils import randomizedSearch +from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -class Adaboost(AdaBoostClassifier): - def __init__(self, random_state, **kwargs): - super(AdaBoostClassifier, self).__init__( - n_estimators=kwargs['n_estimators'], - base_estimator=kwargs['base_estimator'], - random_state=random_state) +class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): + + def __init__(self, random_state, n_estimators=10, + base_estimator=DecisionTreeClassifier(), **kwargs): + super(Adaboost, self).__init__( + random_state=random_state, + n_estimators=n_estimators, + base_estimator=base_estimator, + ) + self.param_names = ["n_estimators", "base_estimator"] + self.classed_params = ["base_estimator"] + self.distribs = [CustomRandint(low=1, high=500), [DecisionTreeClassifier()]] + self.weird_strings = {"base_estimator":"class_name"} def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -37,28 +35,17 @@ class Adaboost(AdaBoostClassifier): "base_estimator": DecisionTreeClassifier()}) return paramsSet - def getKWARGS(self, args): - """Used to format kwargs for the parsed args""" - kwargsDict = {} - kwargsDict['n_estimators'] = args.Ada_n_est - kwargsDict['base_estimator'] = DecisionTreeClassifier() # args.Ada_b_est - return kwargsDict - - def genPipeline(self): - return Pipeline([('classifier', AdaBoostClassifier())]) - - def genParamsDict(self, randomState): - return {"classifier__n_estimators": np.arange(150) + 1, - "classifier__base_estimator": [DecisionTreeClassifier()]} + # def genPipeline(self): + # return Pipeline([('classifier', AdaBoostClassifier())]) - def genBestParams(self, detector): - return {"n_estimators": detector.best_params_["classifier__n_estimators"], - "base_estimator": detector.best_params_["classifier__base_estimator"]} + # def genDistribs(self,): + # return {"classifier__n_estimators": CustomRandint(low=1, high=500), + # "classifier__base_estimator": [DecisionTreeClassifier()]} - def genParamsFromDetector(self, detector): - nIter = len(detector.cv_results_['param_classifier__n_estimators']) - return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])), - ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))] + # def genParamsFromDetector(self, detector): + # nIter = len(detector.cv_results_['param_classifier__n_estimators']) + # return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])), + # ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))] def getConfig(self, config): if type(config) is not dict: # Used in late fusion when config is a classifier @@ -68,71 +55,75 @@ class Adaboost(AdaBoostClassifier): return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str( config["base_estimator"]) - def getInterpret(self, classifier, directory): interpretString = getFeatureImportance(classifier, directory) return interpretString -def canProbas(): - return True - - -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): - """Used to fit the monoview classifier with the args stored in kwargs""" - classifier = AdaBoostClassifier(n_estimators=kwargs['n_estimators'], - base_estimator=kwargs['base_estimator'], - random_state=randomState) - classifier.fit(DATASET, CLASS_LABELS) - return classifier - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"n_estimators": randomState.randint(1, 15), - "base_estimator": DecisionTreeClassifier()}) - return paramsSet - - -def getKWARGS(args): +def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {} - kwargsDict['n_estimators'] = args.Ada_n_est - kwargsDict['base_estimator'] = DecisionTreeClassifier() #args.Ada_b_est + kwargsDict = {'n_estimators': args.Ada_n_est, + 'base_estimator': DecisionTreeClassifier()} return kwargsDict - - -def genPipeline(): - return Pipeline([('classifier', AdaBoostClassifier())]) - - -def genParamsDict(randomState): - return {"classifier__n_estimators": np.arange(150)+1, - "classifier__base_estimator": [DecisionTreeClassifier()]} - - -def genBestParams(detector): - return {"n_estimators": detector.best_params_["classifier__n_estimators"], - "base_estimator": detector.best_params_["classifier__base_estimator"]} - - -def genParamsFromDetector(detector): - nIter = len(detector.cv_results_['param_classifier__n_estimators']) - return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])), - ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))] - - -def getConfig(config): - if type(config) is not dict: # Used in late fusion when config is a classifier - return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str( - config.base_estimator) - else: - return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str( - config["base_estimator"]) - - -def getInterpret(classifier, directory): - interpretString = getFeatureImportance(classifier, directory) - return interpretString \ No newline at end of file +# def canProbas(): +# return True +# +# +# def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): +# """Used to fit the monoview classifier with the args stored in kwargs""" +# classifier = AdaBoostClassifier(n_estimators=kwargs['n_estimators'], +# base_estimator=kwargs['base_estimator'], +# random_state=randomState) +# classifier.fit(DATASET, CLASS_LABELS) +# return classifier +# +# +# def paramsToSet(nIter, randomState): +# """Used for weighted linear early fusion to generate random search sets""" +# paramsSet = [] +# for _ in range(nIter): +# paramsSet.append({"n_estimators": randomState.randint(1, 15), +# "base_estimator": DecisionTreeClassifier()}) +# return paramsSet +# +# +# def getKWARGS(args): +# """Used to format kwargs for the parsed args""" +# kwargsDict = {} +# kwargsDict['n_estimators'] = args.Ada_n_est +# kwargsDict['base_estimator'] = DecisionTreeClassifier() #args.Ada_b_est +# return kwargsDict +# +# +# def genPipeline(): +# return Pipeline([('classifier', AdaBoostClassifier())]) +# +# +# def genParamsDict(randomState): +# return {"classifier__n_estimators": np.arange(150)+1, +# "classifier__base_estimator": [DecisionTreeClassifier()]} +# +# +# def genBestParams(detector): +# return {"n_estimators": detector.best_params_["classifier__n_estimators"], +# "base_estimator": detector.best_params_["classifier__base_estimator"]} +# +# +# def genParamsFromDetector(detector): +# nIter = len(detector.cv_results_['param_classifier__n_estimators']) +# return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])), +# ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))] +# +# +# def getConfig(config): +# if type(config) is not dict: # Used in late fusion when config is a classifier +# return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str( +# config.base_estimator) +# else: +# return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str( +# config["base_estimator"]) +# +# +# def getInterpret(classifier, directory): +# interpretString = getFeatureImportance(classifier, directory) +# return interpretString \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/PseudoCQFusion/PseudoCQFusionModule.py b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/PseudoCQFusion/PseudoCQFusionModule.py index bddd8c86d2c2e08af5ffb2367380ecc727caae5f..05b1bb381abf0486ba2b8aaf3321343230e48d63 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/PseudoCQFusion/PseudoCQFusionModule.py +++ b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/PseudoCQFusion/PseudoCQFusionModule.py @@ -1,4 +1,4 @@ -from multiview_platform.MonoMultiViewClassifiers.Multiview.Additions import diversity_utils +from ...Multiview.Additions import diversity_utils from ..DifficultyFusion.DifficultyFusionModule import difficulty from ..DoubleFaultFusion.DoubleFaultFusionModule import doubleFault