Skip to content
Snippets Groups Projects
Select Git revision
  • 6cdb5e1616064accd231b08df757233f82e848b7
  • master default protected
  • loss
  • producer
4 results

install.md

Blame
  • Adaboost.py 3.10 KiB
    from sklearn.ensemble import AdaBoostClassifier
    from sklearn.pipeline import Pipeline
    from sklearn.model_selection import RandomizedSearchCV
    from sklearn.tree import DecisionTreeClassifier
    from scipy.stats import randint
    import numpy as np
    
    from .. import Metrics
    from ..utils.HyperParameterSearch import genHeatMaps
    
    # Author-Info
    __author__ = "Baptiste Bauvin"
    __status__ = "Prototype"  # Production, Development, Prototype
    
    
    def canProbas():
        return True
    
    
    def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
        num_estimators = int(kwargs['0'])
        base_estimators = DecisionTreeClassifier()  # kwargs['1']
        classifier = AdaBoostClassifier(n_estimators=num_estimators, base_estimator=base_estimators,
                                        random_state=randomState)
        classifier.fit(DATASET, CLASS_LABELS)
        return classifier
    
    
    def paramsToSet(nIter, randomState):
        paramsSet = []
        for _ in range(nIter):
            paramsSet.append([randomState.randint(1, 15), DecisionTreeClassifier()])
        return paramsSet
    
    
    def getKWARGS(kwargsList):
        kwargsDict = {}
        for (kwargName, kwargValue) in kwargsList:
            if kwargName == "CL_Adaboost_n_est":
                kwargsDict['0'] = int(kwargValue)
            elif kwargName == "CL_Adaboost_b_est":
                kwargsDict['1'] = kwargValue
        return kwargsDict
    
    
    def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, metric=["accuracy_score", None], nIter=30,
                         nbCores=1):
        pipeline = Pipeline([('classifier', AdaBoostClassifier())])
    
        param = {"classifier__n_estimators": randint(1, 150),
                 "classifier__base_estimator": [DecisionTreeClassifier()]}
        metricModule = getattr(Metrics, metric[0])
        if metric[1] is not None:
            metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
        else:
            metricKWARGS = {}
        scorer = metricModule.get_scorer(**metricKWARGS)
        grid = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=param, refit=True, n_jobs=nbCores,
                                  scoring=scorer, cv=KFolds, random_state=randomState)
        detector = grid.fit(X_train, y_train)
        desc_estimators = [detector.best_params_["classifier__n_estimators"],
                           detector.best_params_["classifier__base_estimator"]]
    
        scoresArray = detector.cv_results_['mean_test_score']
        params = [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
                  ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
    
        genHeatMaps(params, scoresArray, outputFileName)
        return desc_estimators
    
    
    def getConfig(config):
        if type(config) not in [list, dict]:
            return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str(
                config.base_estimator)
        else:
            try:
                return "\n\t\t- Adaboost with num_esimators : " + str(config[0]) + ", base_estimators : " + str(config[1])
            except:
                return "\n\t\t- Adaboost with num_esimators : " + str(config["0"]) + ", base_estimators : " + str(
                    config["1"])