Skip to content
Snippets Groups Projects
Commit 61faae87 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Centralized monoview randomized search

parent 66c4e27f
Branches
Tags
No related merge requests found
Showing
with 408 additions and 559 deletions
...@@ -100,8 +100,8 @@ def initMonoviewKWARGS(args, classifiersNames): ...@@ -100,8 +100,8 @@ def initMonoviewKWARGS(args, classifiersNames):
monoviewKWARGS = {} monoviewKWARGS = {}
for classifiersName in classifiersNames: for classifiersName in classifiersNames:
classifierModule = getattr(MonoviewClassifiers, classifiersName) classifierModule = getattr(MonoviewClassifiers, classifiersName)
monoviewKWARGS[classifiersName + "KWARGSInit"] = classifierModule.getKWARGS( monoviewKWARGS[classifiersName + "KWARGSInit"] = classifierModule.getKWARGS(args)
[(key, value) for key, value in vars(args).items() if key.startswith("CL_" + classifiersName)]) # [(key, value) for key, value in vars(args).items() if key.startswith("CL_" + classifiersName)])
logging.debug("Done:\t Initializing Monoview classifiers arguments") logging.debug("Done:\t Initializing Monoview classifiers arguments")
return monoviewKWARGS return monoviewKWARGS
......
...@@ -16,6 +16,7 @@ import h5py ...@@ -16,6 +16,7 @@ import h5py
from .. import MonoviewClassifiers from .. import MonoviewClassifiers
from .analyzeResult import execute from .analyzeResult import execute
from ..utils.Dataset import getValue, extractSubset from ..utils.Dataset import getValue, extractSubset
from . import MonoviewUtils
# Author-Info # Author-Info
__author__ = "Nikolas Huelsmann, Baptiste BAUVIN" __author__ = "Nikolas Huelsmann, Baptiste BAUVIN"
...@@ -154,10 +155,11 @@ def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, ...@@ -154,10 +155,11 @@ def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train,
outputFileName, KFolds, nbCores, metrics, kwargs): outputFileName, KFolds, nbCores, metrics, kwargs):
if hyperParamSearch != "None": if hyperParamSearch != "None":
logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type) logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type)
classifierHPSearch = getattr(classifierModule, hyperParamSearch) classifierHPSearch = getattr(MonoviewUtils, hyperParamSearch)
cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores, clKWARGS = classifierHPSearch(X_train, y_train, randomState,
outputFileName, classifierModule,
KFolds=KFolds, nbCores=nbCores,
metric=metrics[0], nIter=nIter) metric=metrics[0], nIter=nIter)
clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc))
logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings") logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings")
else: else:
clKWARGS = kwargs[CL_type + "KWARGS"] clKWARGS = kwargs[CL_type + "KWARGS"]
......
#!/usr/bin/env python from sklearn.model_selection import RandomizedSearchCV
""" Library: MultiClass Classification with MonoView """ from .. import Metrics
from ..utils import HyperParameterSearch
# Import built-in modules
# Import sci-kit learn party modules
# from sklearn.tests import train_test_split # For calculating the train/test split
from sklearn.pipeline import Pipeline # Pipelining in classification
from sklearn.model_selection import GridSearchCV # GridSearch for parameters of classification
from sklearn.ensemble import RandomForestClassifier # RandomForest-Classifier
import sklearn
import numpy as np
# Import own modules
# Author-Info # Author-Info
__author__ = "Nikolas Huelsmann, Baptiste Bauvin" __author__ = "Nikolas Huelsmann, Baptiste Bauvin"
...@@ -20,40 +9,65 @@ __status__ = "Prototype" # Production, Development, Prototype ...@@ -20,40 +9,65 @@ __status__ = "Prototype" # Production, Development, Prototype
# __date__ = 2016 - 03 - 25 # __date__ = 2016 - 03 - 25
def isUseful(labelSupports, index, CLASS_LABELS, labelDict): def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierModule, KFolds = 4, nbCores = 1,
if labelSupports[labelDict[CLASS_LABELS[index]]] != 0: metric = ["accuracy_score", None], nIter = 30):
labelSupports[labelDict[CLASS_LABELS[index]]] -= 1
return True, labelSupports pipeline = classifierModule.genPipeline()
params_dict = classifierModule.genParamsDict(randomState)
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else: else:
return False, labelSupports metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
randomSearch = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=params_dict, refit=True, n_jobs = nbCores, scoring = scorer, cv = KFolds, random_state = randomState)
def getLabelSupports(CLASS_LABELS): detector = randomSearch.fit(X_train, y_train)
labels = set(CLASS_LABELS) bestParams = classifierModule.genBestParams(detector)
supports = [CLASS_LABELS.tolist().count(label) for label in labels] # desc_params = {"C": SVMPoly_detector.best_params_["classifier__C"], "degree": SVMPoly_detector.best_params_["classifier__degree"]}
return supports, dict((label, index) for label, index in zip(labels, range(len(labels))))
scoresArray = detector.cv_results_['mean_test_score']
params = classifierModule.genParamsFromDetector(detector)
def splitDataset(LABELS, NB_CLASS, LEARNING_RATE, DATASET_LENGTH, randomState): # params = [("c", np.array(SVMPoly_detector.cv_results_['param_classifier__C'])), ("degree", np.array(SVMPoly_detector.cv_results_['param_classifier__degree']))]
validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState)
validationIndices.sort() HyperParameterSearch.genHeatMaps(params, scoresArray, outputFileName)
return validationIndices
return bestParams
def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState): # def isUseful(labelSupports, index, CLASS_LABELS, labelDict):
labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS)) # if labelSupports[labelDict[CLASS_LABELS[index]]] != 0:
nbTrainingExamples = [int(support * LEARNING_RATE) for support in labelSupports] # labelSupports[labelDict[CLASS_LABELS[index]]] -= 1
trainingExamplesIndices = [] # return True, labelSupports
usedIndices = [] # else:
while nbTrainingExamples != [0 for i in range(NB_CLASS)]: # return False, labelSupports
isUseFull = False #
index = int(randomState.randint(0, DATASET_LENGTH - 1)) #
if index not in usedIndices: # def getLabelSupports(CLASS_LABELS):
isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict) # labels = set(CLASS_LABELS)
if isUseFull: # supports = [CLASS_LABELS.tolist().count(label) for label in labels]
trainingExamplesIndices.append(index) # return supports, dict((label, index) for label, index in zip(labels, range(len(labels))))
usedIndices.append(index) #
return trainingExamplesIndices #
# def splitDataset(LABELS, NB_CLASS, LEARNING_RATE, DATASET_LENGTH, randomState):
# validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState)
# validationIndices.sort()
# return validationIndices
#
#
# def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState):
# labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS))
# nbTrainingExamples = [int(support * LEARNING_RATE) for support in labelSupports]
# trainingExamplesIndices = []
# usedIndices = []
# while nbTrainingExamples != [0 for i in range(NB_CLASS)]:
# isUseFull = False
# index = int(randomState.randint(0, DATASET_LENGTH - 1))
# if index not in usedIndices:
# isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict)
# if isUseFull:
# trainingExamplesIndices.append(index)
# usedIndices.append(index)
# return trainingExamplesIndices
##### Generating Test and Train Data ##### Generating Test and Train Data
......
...@@ -8,9 +8,10 @@ import numpy as np ...@@ -8,9 +8,10 @@ import numpy as np
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
# from matplotlib.ticker import FuncFormatter # from matplotlib.ticker import FuncFormatter
from .. import Metrics # from .. import Metrics
from ..utils.HyperParameterSearch import genHeatMaps # from ..utils.HyperParameterSearch import genHeatMaps
from ..utils.Interpret import getFeatureImportance from ..utils.Interpret import getFeatureImportance
# from ..Monoview.MonoviewUtils import randomizedSearch
# Author-Info # Author-Info
__author__ = "Baptiste Bauvin" __author__ = "Baptiste Bauvin"
...@@ -23,9 +24,8 @@ def canProbas(): ...@@ -23,9 +24,8 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
"""Used to fit the monoview classifier with the args stored in kwargs""" """Used to fit the monoview classifier with the args stored in kwargs"""
num_estimators = int(kwargs['0']) classifier = AdaBoostClassifier(n_estimators=kwargs['n_estimators'],
base_estimators = DecisionTreeClassifier() base_estimator=kwargs['base_estimator'],
classifier = AdaBoostClassifier(n_estimators=num_estimators, base_estimator=base_estimators,
random_state=randomState) random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -35,61 +35,46 @@ def paramsToSet(nIter, randomState): ...@@ -35,61 +35,46 @@ def paramsToSet(nIter, randomState):
"""Used for weighted linear early fusion to generate random search sets""" """Used for weighted linear early fusion to generate random search sets"""
paramsSet = [] paramsSet = []
for _ in range(nIter): for _ in range(nIter):
paramsSet.append([randomState.randint(1, 15), DecisionTreeClassifier()]) paramsSet.append([randomState.randint(1, 15),
DecisionTreeClassifier()])
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
"""Used to format kwargs for the parsed args""" """Used to format kwargs for the parsed args"""
kwargsDict = {} kwargsDict = {}
for (kwargName, kwargValue) in kwargsList: kwargsDict['n_estimators'] = args.Ada_n_est
if kwargName == "CL_Adaboost_n_est": kwargsDict['base_estimator'] = DecisionTreeClassifier() #args.Ada_b_est
kwargsDict['0'] = int(kwargValue)
elif kwargName == "CL_Adaboost_b_est":
kwargsDict['1'] = kwargValue
else:
raise ValueError("Wrong arguments served to Adaboost")
return kwargsDict return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, metric=["accuracy_score", None], nIter=30, def genPipeline():
nbCores=1): return Pipeline([('classifier', AdaBoostClassifier())])
pipeline = Pipeline([('classifier', AdaBoostClassifier())])
param = {"classifier__n_estimators": randint(1, 150), def genParamsDict(randomState):
return {"classifier__n_estimators": np.arange(150)+1,
"classifier__base_estimator": [DecisionTreeClassifier()]} "classifier__base_estimator": [DecisionTreeClassifier()]}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=param, refit=True, n_jobs=nbCores, def genBestParams(detector):
scoring=scorer, cv=KFolds, random_state=randomState) return {"n_estimators": detector.best_params_["classifier__n_estimators"],
detector = grid.fit(X_train, y_train) "base_estimator": detector.best_params_["classifier__base_estimator"]}
desc_estimators = [detector.best_params_["classifier__n_estimators"],
detector.best_params_["classifier__base_estimator"]]
scoresArray = detector.cv_results_['mean_test_score']
params = [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
genHeatMaps(params, scoresArray, outputFileName) def genParamsFromDetector(detector):
return desc_estimators nIter = len(detector.cv_results_['param_classifier__n_estimators'])
return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
def getConfig(config): def getConfig(config):
if type(config) not in [list, dict]: # Used in late fusion when config is a classifier if type(config) is not dict: # Used in late fusion when config is a classifier
return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str( return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str(
config.base_estimator) config.base_estimator)
else: else:
try: return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str(
return "\n\t\t- Adaboost with num_esimators : " + str(config[0]) + ", base_estimators : " + str(config[1]) config["base_estimator"])
except:
return "\n\t\t- Adaboost with num_esimators : " + str(config["0"]) + ", base_estimators : " + str(
config["1"])
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
......
...@@ -20,11 +20,8 @@ def canProbas(): ...@@ -20,11 +20,8 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
maxDepth = int(kwargs['0']) classifier = tree.DecisionTreeClassifier(max_depth=kwargs['max_depth'], criterion=kwargs['criterion'],
criterion = kwargs['1'] splitter=kwargs['splitter'], random_state=randomState)
splitter = kwargs['2']
classifier = tree.DecisionTreeClassifier(max_depth=maxDepth, criterion=criterion, splitter=splitter,
random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -37,59 +34,40 @@ def paramsToSet(nIter, randomState): ...@@ -37,59 +34,40 @@ def paramsToSet(nIter, randomState):
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
kwargsDict = {} kwargsDict = {"max_depth": args.DT_depth, "criterion": args.DT_criterion, "splitter": args.DT_splitter}
for (kwargName, kwargValue) in kwargsList:
if kwargName == "CL_DecisionTree_depth":
kwargsDict['0'] = int(kwargValue)
elif kwargName == "CL_DecisionTree_criterion":
kwargsDict['1'] = kwargValue
elif kwargName == "CL_DecisionTree_splitter":
kwargsDict['2'] = kwargValue
else:
raise ValueError("Wrong arguments served to DecisionTree")
return kwargsDict return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, def genPipeline():
metric=["accuracy_score", None], nIter=30): return Pipeline([('classifier', tree.DecisionTreeClassifier())])
pipeline_DT = Pipeline([('classifier', tree.DecisionTreeClassifier())])
param_DT = {"classifier__max_depth": randint(1, 300),
def genParamsDict(randomState):
return {"classifier__max_depth": np.arange(1, 300),
"classifier__criterion": ["gini", "entropy"], "classifier__criterion": ["gini", "entropy"],
"classifier__splitter": ["best", "random"]} "classifier__splitter": ["best", "random"]}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid_DT = RandomizedSearchCV(pipeline_DT, n_iter=nIter, param_distributions=param_DT, refit=True, n_jobs=nbCores,
scoring=scorer,
cv=KFolds, random_state=randomState)
DT_detector = grid_DT.fit(X_train, y_train)
desc_params = [DT_detector.best_params_["classifier__max_depth"], DT_detector.best_params_["classifier__criterion"],
DT_detector.best_params_["classifier__splitter"]]
scoresArray = DT_detector.cv_results_['mean_test_score']
params = [("maxDepth", np.array(DT_detector.cv_results_['param_classifier__max_depth'])),
("criterion", np.array(DT_detector.cv_results_['param_classifier__criterion'])),
("splitter", np.array(DT_detector.cv_results_['param_classifier__splitter']))]
genHeatMaps(params, scoresArray, outputFileName) def genBestParams(detector):
return desc_params return {"max_depth": detector.best_params_["classifier__max_depth"],
"criterion": detector.best_params_["classifier__criterion"],
"splitter": detector.best_params_["classifier__splitter"]}
def genParamsFromDetector(detector):
return [("maxDepth", np.array(detector.cv_results_['param_classifier__max_depth'])),
("criterion", np.array(detector.cv_results_['param_classifier__criterion'])),
("splitter", np.array(detector.cv_results_['param_classifier__splitter']))]
def getConfig(config): def getConfig(config):
if type(config) not in [list, dict]: if type(config) is not dict:
return "\n\t\t- Decision Tree with max_depth : " + str( return "\n\t\t- Decision Tree with max_depth : " + str(
config.max_depth) + ", criterion : " + config.criterion + ", splitter : " + config.splitter config.max_depth) + ", criterion : " + config.criterion + ", splitter : " + config.splitter
else: else:
try: return "\n\t\t- Decision Tree with max_depth : " + str(config["max_depth"]) + ", criterion : " + config[
return "\n\t\t- Decision Tree with max_depth : " + str(config[0]) + ", criterion : " + config[ "criterion"] + ", splitter : " + config["splitter"]
1] + ", splitter : " + config[2]
except:
return "\n\t\t- Decision Tree with max_depth : " + str(config["0"]) + ", criterion : " + config[
"1"] + ", splitter : " + config["2"]
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
dot_data = tree.export_graphviz(classifier, out_file=None) dot_data = tree.export_graphviz(classifier, out_file=None)
......
...@@ -17,11 +17,10 @@ def canProbas(): ...@@ -17,11 +17,10 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
nNeighbors = int(kwargs['0']) classifier = KNeighborsClassifier(n_neighbors=kwargs["n_neighbors"],
weights = kwargs["1"] weights=kwargs["weights"],
algorithm = kwargs["2"] algorithm=kwargs["algorithm"],
p = int(kwargs["3"]) p=kwargs["p"],
classifier = KNeighborsClassifier(n_neighbors=nNeighbors, weights=weights, algorithm=algorithm, p=p,
n_jobs=NB_CORES, ) n_jobs=NB_CORES, )
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -35,69 +34,47 @@ def paramsToSet(nIter, randomState): ...@@ -35,69 +34,47 @@ def paramsToSet(nIter, randomState):
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
kwargsDict = {} kwargsDict = {"n_neighbors": args.KNN_neigh,
for (kwargName, kwargValue) in kwargsList: "weights":args.KNN_weights,
if kwargName == "CL_KNN_neigh": "algorithm":args.KNN_algo,
kwargsDict['0'] = int(kwargValue) "p":args.KNN_p}
elif kwargName == "CL_KNN_weights":
kwargsDict['1'] = kwargValue
elif kwargName == "CL_KNN_algo":
kwargsDict['2'] = kwargValue
elif kwargName == "CL_KNN_p":
kwargsDict['3'] = int(kwargValue)
else:
raise ValueError("Wrong arguments served to KNN")
return kwargsDict return kwargsDict
def genPipeline():
return Pipeline([('classifier', KNeighborsClassifier())])
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, def genParamsDict(randomState):
metric=["accuracy_score", None], nIter=30): return {"classifier__n_neighbors": np.arange(1, 20),
pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())])
param_KNN = {"classifier__n_neighbors": randint(1, 20),
"classifier__weights": ["uniform", "distance"], "classifier__weights": ["uniform", "distance"],
"classifier__algorithm": ["auto", "ball_tree", "kd_tree", "brute"], "classifier__algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
"classifier__p": [1, 2], "classifier__p": [1, 2]}
}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid_KNN = RandomizedSearchCV(pipeline_KNN, n_iter=nIter, param_distributions=param_KNN, refit=True, n_jobs=nbCores,
scoring=scorer,
cv=KFolds, random_state=randomState)
KNN_detector = grid_KNN.fit(X_train, y_train)
desc_params = [KNN_detector.best_params_["classifier__n_neighbors"],
KNN_detector.best_params_["classifier__weights"],
KNN_detector.best_params_["classifier__algorithm"],
KNN_detector.best_params_["classifier__p"],
]
scoresArray = KNN_detector.cv_results_['mean_test_score'] def genBestParams(detector):
params = [("nNeighbors", np.array(KNN_detector.cv_results_['param_classifier__n_neighbors'])), return {"n_neighbors": detector.best_params_["classifier__n_neighbors"],
("weights", np.array(KNN_detector.cv_results_['param_classifier__weights'])), "weights": detector.best_params_["classifier__weights"],
("algorithm", np.array(KNN_detector.cv_results_['param_classifier__algorithm'])), "algorithm": detector.best_params_["classifier__algorithm"],
("p", np.array(KNN_detector.cv_results_['param_classifier__p']))] "p": detector.best_params_["classifier__p"]}
genHeatMaps(params, scoresArray, outputFileName)
return desc_params def genParamsFromDetector(detector):
return [("nNeighbors", np.array(detector.cv_results_['param_classifier__n_neighbors'])),
("weights", np.array(detector.cv_results_['param_classifier__weights'])),
("algorithm", np.array(detector.cv_results_['param_classifier__algorithm'])),
("p", np.array(detector.cv_results_['param_classifier__p']))]
def getConfig(config): def getConfig(config):
if type(config) not in [list, dict]: if type(config) not in [list, dict]:
return "\n\t\t- K nearest Neighbors with n_neighbors : " + str( return "\n\t\t- K nearest Neighbors with n_neighbors : " + str(config.n_neighbors) + \
config.n_neighbors) + ", weights : " + config.weights + ", algorithm : " + config.algorithm + ", p : " + str( ", weights : " + config.weights + ", algorithm : " + config.algorithm + ", p : " + \
config.p) str(config.p)
else: else:
try: return "\n\t\t- K nearest Neighbors with n_neighbors : " + str(config["n_neighbors"]) + \
return "\n\t\t- K nearest Neighbors with n_neighbors : " + str(config[0]) + ", weights : " + config[ ", weights : " + config["weights"] + ", algorithm : " + config["algorithm"] + \
1] + ", algorithm : " + config[2] + ", p : " + str(config[3]) ", p : " + str(config["p"])
except:
return "\n\t\t- K nearest Neighbors with n_neighbors : " + str(config["0"]) + ", weights : " + config[
"1"] + ", algorithm : " + config["2"] + ", p : " + str(config["3"])
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
return "" return ""
\ No newline at end of file
...@@ -19,10 +19,9 @@ def canProbas(): ...@@ -19,10 +19,9 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
num_estimators = int(kwargs['0']) classifier = RandomForestClassifier(n_estimators=kwargs['n_estimators'],
maxDepth = int(kwargs['1']) max_depth=kwargs['max_depth'],
criterion = kwargs["2"] criterion=kwargs['criterion'],
classifier = RandomForestClassifier(n_estimators=num_estimators, max_depth=maxDepth, criterion=criterion,
n_jobs=NB_CORES, random_state=randomState) n_jobs=NB_CORES, random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -36,47 +35,33 @@ def paramsToSet(nIter, randomState): ...@@ -36,47 +35,33 @@ def paramsToSet(nIter, randomState):
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
kwargsDict = {} kwargsDict = {"n_estimators": args.RF_trees,
for (kwargName, kwargValue) in kwargsList: "max_depth": args.RF_max_depth,
if kwargName == "CL_RandomForest_trees": "criterion": args.RF_criterion}
kwargsDict['0'] = int(kwargValue)
elif kwargName == "CL_RandomForest_max_depth":
kwargsDict['1'] = kwargValue
elif kwargName == "CL_RandomForest_criterion":
kwargsDict['2'] = kwargValue
else:
raise ValueError("Wrong arguments served to RandomForest")
return kwargsDict return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, def genPipeline():
metric=["accuracy_score", None], nIter=30): return Pipeline([('classifier', RandomForestClassifier())])
pipeline_rf = Pipeline([('classifier', RandomForestClassifier())])
param_rf = {"classifier__n_estimators": randint(1, 300),
"classifier__max_depth": randint(1, 300), def genParamsDict(randomState):
return {"classifier__n_estimators": np.arange(1, 300),
"classifier__max_depth": np.arange(1, 300),
"classifier__criterion": ["gini", "entropy"]} "classifier__criterion": ["gini", "entropy"]}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid_rf = RandomizedSearchCV(pipeline_rf, n_iter=nIter, param_distributions=param_rf, refit=True, n_jobs=nbCores,
scoring=scorer, cv=KFolds, random_state=randomState)
rf_detector = grid_rf.fit(X_train, y_train)
desc_estimators = [rf_detector.best_params_["classifier__n_estimators"],
rf_detector.best_params_["classifier__max_depth"],
rf_detector.best_params_["classifier__criterion"]]
scoresArray = rf_detector.cv_results_['mean_test_score'] def genBestParams(detector):
params = [("nEstimators", np.array(rf_detector.cv_results_['param_classifier__n_estimators'])), return {"n_estimators": detector.best_params_["classifier__n_estimators"],
("maxDepth", np.array(rf_detector.cv_results_['param_classifier__max_depth'])), "max_depth": detector.best_params_["classifier__max_depth"],
("criterion", np.array(rf_detector.cv_results_['param_classifier__criterion']))] "criterion": detector.best_params_["classifier__criterion"]}
genHeatMaps(params, scoresArray, outputFileName) def genParamsFromDetector(detector):
return desc_estimators return [("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators'])),
("maxDepth", np.array(detector.cv_results_['param_classifier__max_depth'])),
("criterion", np.array(detector.cv_results_['param_classifier__criterion']))]
def getConfig(config): def getConfig(config):
...@@ -84,12 +69,8 @@ def getConfig(config): ...@@ -84,12 +69,8 @@ def getConfig(config):
return "\n\t\t- Random Forest with num_esimators : " + str(config.n_estimators) + ", max_depth : " + str( return "\n\t\t- Random Forest with num_esimators : " + str(config.n_estimators) + ", max_depth : " + str(
config.max_depth) + ", criterion : " + config.criterion config.max_depth) + ", criterion : " + config.criterion
else: else:
try: return "\n\t\t- Random Forest with num_esimators : " + str(config["n_estimators"]) + \
return "\n\t\t- Random Forest with num_esimators : " + str(config[0]) + ", max_depth : " + str( ", max_depth : " + str(config["max_depth"]) + ", criterion : " + config["criterion"]
config[1]) + ", criterion : " + config[2]
except:
return "\n\t\t- Random Forest with num_esimators : " + str(config["0"]) + ", max_depth : " + str(
config["1"]) + ", criterion : " + config["2"]
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
......
...@@ -52,10 +52,10 @@ def canProbas(): ...@@ -52,10 +52,10 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
modelType = kwargs['0'] classifier = DecisionStumpSCMNew(model_type=kwargs['model_type'],
maxRules = int(kwargs['1']) max_rules=kwargs['max_rules'],
p = float(kwargs["2"]) p=kwargs['p'],
classifier = DecisionStumpSCMNew(model_type=modelType, max_rules=maxRules, p=p, random_state=randomState) random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -67,60 +67,42 @@ def paramsToSet(nIter, randomState): ...@@ -67,60 +67,42 @@ def paramsToSet(nIter, randomState):
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
kwargsDict = {} kwargsDict = {"model_type": args.SCM_model_type,
for (kwargName, kwargValue) in kwargsList: "p": args.SCM_p,
if kwargName == "CL_SCM_model_type": "max_rules": args.SCM_max_rules}
kwargsDict['0'] = kwargValue
elif kwargName == "CL_SCM_max_rules":
kwargsDict['1'] = int(kwargValue)
elif kwargName == "CL_SCM_p":
kwargsDict['2'] = float(kwargValue)
else:
raise ValueError("Wrong arguments served to SCM")
return kwargsDict return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, metric=["accuracy_score", None], nIter=30, def genPipeline():
nbCores=1): return Pipeline([('classifier', DecisionStumpSCMNew())])
pipeline = Pipeline([('classifier', DecisionStumpSCMNew())])
param = {"classifier__model_type": ['conjunction', 'disjunction'], def genParamsDict(randomState):
return {"classifier__model_type": ['conjunction', 'disjunction'],
"classifier__p": uniform(), "classifier__p": uniform(),
"classifier__max_rules": randint(1,30)} "classifier__max_rules": np.arange(1,30)}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) def genBestParams(detector):
else: return {"model_type": detector.best_params_["classifier__model_type"],
metricKWARGS = {} "p": detector.best_params_["classifier__p"],
scorer = metricModule.get_scorer(**metricKWARGS) "max_rules": detector.best_params_["classifier__max_rules"]}
grid = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=param, refit=True, n_jobs=nbCores,
scoring=scorer, cv=KFolds, random_state=randomState)
detector = grid.fit(X_train, y_train) def genParamsFromDetector(detector):
desc_estimators = [detector.best_params_["classifier__model_type"], return [("model_type", np.array(detector.cv_results_['param_classifier__model_type'])),
detector.best_params_["classifier__max_rules"],
detector.best_params_["classifier__p"]]
scoresArray = detector.cv_results_['mean_test_score']
params = [("model_type", np.array(detector.cv_results_['param_classifier__model_type'])),
("maxRules", np.array(detector.cv_results_['param_classifier__max_rules'])), ("maxRules", np.array(detector.cv_results_['param_classifier__max_rules'])),
("p", np.array(detector.cv_results_['param_classifier__p']))] ("p", np.array(detector.cv_results_['param_classifier__p']))]
genHeatMaps(params, scoresArray, outputFileName)
return desc_estimators
def getConfig(config): def getConfig(config):
if type(config) not in [list, dict]: if type(config) not in [list, dict]:
return "\n\t\t- SCM with model_type: " + config.model_type + ", max_rules : " + str(config.max_rules) +\ return "\n\t\t- SCM with model_type: " + config.model_type + ", max_rules : " + str(config.max_rules) +\
", p : " + str(config.p) ", p : " + str(config.p)
else: else:
try: return "\n\t\t- SCM with model_type: " + config["model_type"] + ", max_rules : " + str(config["max_rules"]) + ", p : " + \
return "\n\t\t- SCM with model_type: " + config[0] + ", max_rules : " + str(config[1]) + ", p : " +\ str(config["p"])
str(config[2])
except:
return "\n\t\t- SCM with model_type: " + config["0"] + ", max_rules : " + str(config["1"]) + ", p : " + \
str(config["2"])
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
......
...@@ -17,13 +17,10 @@ def canProbas(): ...@@ -17,13 +17,10 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
loss = kwargs['0'] classifier = SGDClassifier(loss=kwargs['loss'],
penalty = kwargs['1'] penalty=kwargs['penalty'],
try: alpha=kwargs['alpha'],
alpha = float(kwargs['2']) random_state=randomState, n_jobs=NB_CORES)
except:
alpha = 0.15
classifier = SGDClassifier(loss=loss, penalty=penalty, alpha=alpha, random_state=randomState, n_jobs=NB_CORES)
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -36,61 +33,44 @@ def paramsToSet(nIter, randomState): ...@@ -36,61 +33,44 @@ def paramsToSet(nIter, randomState):
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
kwargsDict = {} kwargsDict = {"loss": args.SGD_loss,
for (kwargName, kwargValue) in kwargsList: "penalty": args.SGD_penalty,
if kwargName == "CL_SGD_loss": "alpha": args.SGD_alpha}
kwargsDict['0'] = kwargValue
elif kwargName == "CL_SGD_penalty":
kwargsDict['1'] = kwargValue
elif kwargName == "CL_SGD_alpha":
kwargsDict['2'] = float(kwargValue)
else:
raise ValueError("Wrong arguments served to SGD")
return kwargsDict return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, def genPipeline():
metric=["accuracy_score", None], nIter=30): return Pipeline([('classifier', SGDClassifier())])
pipeline_SGD = Pipeline([('classifier', SGDClassifier())])
def genParamsDict(randomState):
losses = ['log', 'modified_huber'] losses = ['log', 'modified_huber']
penalties = ["l1", "l2", "elasticnet"] penalties = ["l1", "l2", "elasticnet"]
alphas = uniform() alphas = uniform()
param_SGD = {"classifier__loss": losses, "classifier__penalty": penalties, return {"classifier__loss": losses, "classifier__penalty": penalties,
"classifier__alpha": alphas} "classifier__alpha": alphas}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid_SGD = RandomizedSearchCV(pipeline_SGD, n_iter=nIter, param_distributions=param_SGD, refit=True,
n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState)
SGD_detector = grid_SGD.fit(X_train, y_train)
desc_params = [SGD_detector.best_params_["classifier__loss"], SGD_detector.best_params_["classifier__penalty"],
SGD_detector.best_params_["classifier__alpha"]]
scoresArray = SGD_detector.cv_results_['mean_test_score']
params = [("loss", np.array(SGD_detector.cv_results_['param_classifier__loss'])),
("penalty", np.array(SGD_detector.cv_results_['param_classifier__penalty'])),
("aplha", np.array(SGD_detector.cv_results_['param_classifier__alpha']))]
genHeatMaps(params, scoresArray, outputFileName) def genBestParams(detector):
return {"loss": detector.best_params_["classifier__loss"],
"penalty": detector.best_params_["classifier__penalty"],
"alpha": detector.best_params_["classifier__alpha"]}
return desc_params def genParamsFromDetector(detector):
return [("loss", np.array(detector.cv_results_['param_classifier__loss'])),
("penalty", np.array(detector.cv_results_['param_classifier__penalty'])),
("aplha", np.array(detector.cv_results_['param_classifier__alpha']))]
def getConfig(config): def getConfig(config):
if type(config) not in [list, dict]: if type(config) not in [list, dict]:
return "\n\t\t- SGDClassifier with loss : " + config.loss + ", penalty : " + config.penalty + ", alpha : " + str( return "\n\t\t- SGDClassifier with loss : " + config.loss + ", penalty : " + \
config.alpha) config.penalty + ", alpha : " + str(config.alpha)
else: else:
try: return "\n\t\t- SGDClassifier with loss : " + config["loss"] + ", penalty : " + \
return "\n\t\t- SGDClassifier with loss : " + config[0] + ", penalty : " + config[1] + ", alpha : " + str( config["penalty"] + ", alpha : " + str(config["alpha"])
config[2])
except:
return "\n\t\t- SGDClassifier with loss : " + config["0"] + ", penalty : " + config[
"1"] + ", alpha : " + str(config["2"])
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
# TODO : coeffs # TODO : coeffs
......
...@@ -18,8 +18,7 @@ def canProbas(): ...@@ -18,8 +18,7 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
C = int(kwargs['0']) classifier = SVC(C=kwargs['C'], kernel='linear', probability=True, max_iter=1000, random_state=randomState)
classifier = SVC(C=C, kernel='linear', probability=True, max_iter=1000, random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -31,50 +30,34 @@ def paramsToSet(nIter, randomState): ...@@ -31,50 +30,34 @@ def paramsToSet(nIter, randomState):
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
kwargsDict = {} kwargsDict = {"C":args.SVML_C, }
for (kwargName, kwargValue) in kwargsList:
if kwargName == "CL_SVMLinear_C":
kwargsDict['0'] = int(kwargValue)
else:
raise ValueError("Wrong arguments served to SVMLinear")
return kwargsDict return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, def genPipeline():
metric=["accuracy_score", None], nIter=30): return Pipeline([('classifier', SVC(kernel="linear", max_iter=1000))])
pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear", max_iter=1000))])
param_SVMLinear = {"classifier__C": randint(1, 10000)}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid_SVMLinear = RandomizedSearchCV(pipeline_SVMLinear, n_iter=nIter, param_distributions=param_SVMLinear,
refit=True, n_jobs=nbCores, scoring=scorer, cv=KFolds,
random_state=randomState)
SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train)
desc_params = [SVMLinear_detector.best_params_["classifier__C"]]
scoresArray = SVMLinear_detector.cv_results_['mean_test_score'] def genParamsDict(randomState):
params = [("c", np.array(SVMLinear_detector.cv_results_['param_classifier__C'])), return {"classifier__C": np.arange(1, 10000)}
("control", np.array(["control" for _ in range(nIter)]))]
genHeatMaps(params, scoresArray, outputFileName)
return desc_params def genBestParams(detector):
return {"C": detector.best_params_["classifier__C"]}
def genParamsFromDetector(detector):
nIter = len(detector.cv_results_['param_classifier__C'])
return [("C", np.array(detector.cv_results_['param_classifier__C'])),
("control", np.array(["control" for _ in range(nIter)]))]
def getConfig(config): def getConfig(config):
if type(config) not in [list, dict]: if type(config) not in [list, dict]:
return "\n\t\t- SVM Linear with C : " + str(config.C) return "\n\t\t- SVM Linear with C : " + str(config.C)
else: else:
try: return "\n\t\t- SVM Linear with C : " + str(config["C"])
return "\n\t\t- SVM Linear with C : " + str(config[0])
except:
return "\n\t\t- SVM Linear with C : " + str(config["0"])
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
# TODO : coeffs # TODO : coeffs
......
...@@ -18,9 +18,7 @@ def canProbas(): ...@@ -18,9 +18,7 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
C = int(kwargs['0']) classifier = SVC(C=kwargs['C'], kernel='poly', degree=kwargs["degree"], probability=True, max_iter=1000, random_state=randomState)
degree = int(kwargs['1'])
classifier = SVC(C=C, kernel='poly', degree=degree, probability=True, max_iter=1000, random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -32,51 +30,35 @@ def paramsToSet(nIter, randomState): ...@@ -32,51 +30,35 @@ def paramsToSet(nIter, randomState):
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
kwargsDict = {} kwargsDict = {"C": args.SVMPoly_C, "degree": args.SVMPoly_deg}
for (kwargName, kwargValue) in kwargsList:
if kwargName == "CL_SVMPoly_C":
kwargsDict['0'] = int(kwargValue)
elif kwargName == "CL_SVMPoly_deg":
kwargsDict['1'] = int(kwargValue)
else:
raise ValueError("Wrong arguments served to SVMPoly")
return kwargsDict return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, def genPipeline():
metric=["accuracy_score", None], nIter=30): return Pipeline([('classifier', SVC(kernel="poly", max_iter=1000))])
pipeline_SVMPoly = Pipeline([('classifier', SVC(kernel="poly", max_iter=1000))])
param_SVMPoly = {"classifier__C": randint(1, 10000),
"classifier__degree": randint(1, 30)} def genParamsDict(randomState):
metricModule = getattr(Metrics, metric[0]) return {"classifier__C": np.arange(1, 10000),
if metric[1] is not None: "classifier__degree": np.arange(1, 30)}
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid_SVMPoly = RandomizedSearchCV(pipeline_SVMPoly, n_iter=nIter, param_distributions=param_SVMPoly, refit=True,
n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState)
SVMPoly_detector = grid_SVMPoly.fit(X_train, y_train)
desc_params = [SVMPoly_detector.best_params_["classifier__C"], SVMPoly_detector.best_params_["classifier__degree"]]
scoresArray = SVMPoly_detector.cv_results_['mean_test_score'] def genBestParams(detector):
params = [("c", np.array(SVMPoly_detector.cv_results_['param_classifier__C'])), return {"C": detector.best_params_["classifier__C"],
("degree", np.array(SVMPoly_detector.cv_results_['param_classifier__degree']))] "degree": detector.best_params_["classifier__degree"]}
genHeatMaps(params, scoresArray, outputFileName)
return desc_params def genParamsFromDetector(detector):
return [("c", np.array(detector.cv_results_['param_classifier__C'])),
("degree", np.array(detector.cv_results_['param_classifier__degree']))]
def getConfig(config): def getConfig(config):
if type(config) not in [list, dict]: if type(config) not in [list, dict]:
return "\n\t\t- SVM Poly with C : " + str(config.C) + ", degree : " + str(config.degree) return "\n\t\t- SVM Poly with C : " + str(config.C) + ", degree : " + str(config.degree)
else: else:
try: return "\n\t\t- SVM Poly with C : " + str(config["C"]) + ", degree : " + str(config["degree"])
return "\n\t\t- SVM Poly with C : " + str(config[0]) + ", degree : " + str(config[1])
except:
return "\n\t\t- SVM Poly with C : " + str(config["0"]) + ", degree : " + str(config["1"])
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
return "" return ""
...@@ -18,8 +18,7 @@ def canProbas(): ...@@ -18,8 +18,7 @@ def canProbas():
def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
C = int(kwargs['0']) classifier = SVC(C=kwargs['C'], kernel='rbf', probability=True, max_iter=1000, random_state=randomState)
classifier = SVC(C=C, kernel='rbf', probability=True, max_iter=1000, random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS) classifier.fit(DATASET, CLASS_LABELS)
return classifier return classifier
...@@ -31,48 +30,34 @@ def paramsToSet(nIter, randomState): ...@@ -31,48 +30,34 @@ def paramsToSet(nIter, randomState):
return paramsSet return paramsSet
def getKWARGS(kwargsList): def getKWARGS(args):
kwargsDict = {} kwargsDict = {"C": args.SVMRBF_C}
for (kwargName, kwargValue) in kwargsList:
if kwargName == "CL_SVMRBF_C":
kwargsDict['0'] = int(kwargValue)
else:
raise ValueError("Wrong arguments served to SVMRBF")
return kwargsDict return kwargsDict
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, def genPipeline():
metric=["accuracy_score", None], nIter=30): return Pipeline([('classifier', SVC(kernel="rbf", max_iter=1000))])
pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="rbf", max_iter=1000))])
param_SVMRBF = {"classifier__C": randint(1, 10000)}
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
scorer = metricModule.get_scorer(**metricKWARGS)
grid_SVMRBF = RandomizedSearchCV(pipeline_SVMRBF, n_iter=nIter, param_distributions=param_SVMRBF, refit=True,
n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState)
SVMRBF_detector = grid_SVMRBF.fit(X_train, y_train)
desc_params = [SVMRBF_detector.best_params_["classifier__C"]]
scoresArray = SVMRBF_detector.cv_results_['mean_test_score']
params = [("c", np.array(SVMRBF_detector.cv_results_['param_classifier__C'])),
("control", np.array(["control" for _ in range(nIter)]))]
genHeatMaps(params, scoresArray, outputFileName) def genParamsDict(randomState):
return {"classifier__C": np.arange(1, 10000)}
return desc_params
def genBestParams(detector):
return {'C': detector.best_params_["classifier__C"]}
def genParamsFromDetector(detector):
nIter = len(detector.cv_results_['param_classifier__C'])
return [("c", np.array(detector.cv_results_['param_classifier__C'])),
("control", np.array(["control" for _ in range(nIter)]))]
def getConfig(config): def getConfig(config):
if type(config) not in [list, dict]: if type(config) not in [list, dict]:
return "\n\t\t- SVM RBF with C : " + str(config.C) return "\n\t\t- SVM RBF with C : " + str(config.C)
else: else:
try: return "\n\t\t- SVM RBF with C : " + str(config["C"])
return "\n\t\t- SVM RBF with C : " + str(config[0])
except:
return "\n\t\t- SVM RBF with C : " + str(config["0"])
def getInterpret(classifier, directory): def getInterpret(classifier, directory):
......
...@@ -82,66 +82,66 @@ def parseTheArgs(arguments): ...@@ -82,66 +82,66 @@ def parseTheArgs(arguments):
help='Determine which hyperparamter search function use', default="randomizedSearch") help='Determine which hyperparamter search function use', default="randomizedSearch")
groupRF = parser.add_argument_group('Random Forest arguments') groupRF = parser.add_argument_group('Random Forest arguments')
groupRF.add_argument('--CL_RandomForest_trees', metavar='INT', type=int, action='store', help='Number max trees', groupRF.add_argument('--RF_trees', metavar='INT', type=int, action='store', help='Number max trees',
default=25) default=25)
groupRF.add_argument('--CL_RandomForest_max_depth', metavar='INT', type=int, action='store', groupRF.add_argument('--RF_max_depth', metavar='INT', type=int, action='store',
help='Max depth for the trees', help='Max depth for the trees',
default=5) default=5)
groupRF.add_argument('--CL_RandomForest_criterion', metavar='STRING', action='store', help='Criterion for the trees', groupRF.add_argument('--RF_criterion', metavar='STRING', action='store', help='Criterion for the trees',
default="entropy") default="entropy")
groupSVMLinear = parser.add_argument_group('Linear SVM arguments') groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
groupSVMLinear.add_argument('--CL_SVMLinear_C', metavar='INT', type=int, action='store', help='Penalty parameter used', groupSVMLinear.add_argument('--SVML_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1) default=1)
groupSVMRBF = parser.add_argument_group('SVW-RBF arguments') groupSVMRBF = parser.add_argument_group('SVW-RBF arguments')
groupSVMRBF.add_argument('--CL_SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used', groupSVMRBF.add_argument('--SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1) default=1)
groupSVMPoly = parser.add_argument_group('Poly SVM arguments') groupSVMPoly = parser.add_argument_group('Poly SVM arguments')
groupSVMPoly.add_argument('--CL_SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used', groupSVMPoly.add_argument('--SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1) default=1)
groupSVMPoly.add_argument('--CL_SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used', groupSVMPoly.add_argument('--SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used',
default=2) default=2)
groupAdaboost = parser.add_argument_group('Adaboost arguments') groupAdaboost = parser.add_argument_group('Adaboost arguments')
groupAdaboost.add_argument('--CL_Adaboost_n_est', metavar='INT', type=int, action='store', help='Number of estimators', groupAdaboost.add_argument('--Ada_n_est', metavar='INT', type=int, action='store', help='Number of estimators',
default=2) default=2)
groupAdaboost.add_argument('--CL_Adaboost_b_est', metavar='STRING', action='store', help='Estimators', groupAdaboost.add_argument('--Ada_b_est', metavar='STRING', action='store', help='Estimators',
default='DecisionTreeClassifier') default='DecisionTreeClassifier')
groupDT = parser.add_argument_group('Decision Trees arguments') groupDT = parser.add_argument_group('Decision Trees arguments')
groupDT.add_argument('--CL_DecisionTree_depth', metavar='INT', type=int, action='store', groupDT.add_argument('--DT_depth', metavar='INT', type=int, action='store',
help='Determine max depth for Decision Trees', default=3) help='Determine max depth for Decision Trees', default=3)
groupDT.add_argument('--CL_DecisionTree_criterion', metavar='STRING', action='store', groupDT.add_argument('--DT_criterion', metavar='STRING', action='store',
help='Determine max depth for Decision Trees', default="entropy") help='Determine max depth for Decision Trees', default="entropy")
groupDT.add_argument('--CL_DecisionTree_splitter', metavar='STRING', action='store', groupDT.add_argument('--DT_splitter', metavar='STRING', action='store',
help='Determine criterion for Decision Trees', default="random") help='Determine criterion for Decision Trees', default="random")
groupSGD = parser.add_argument_group('SGD arguments') groupSGD = parser.add_argument_group('SGD arguments')
groupSGD.add_argument('--CL_SGD_alpha', metavar='FLOAT', type=float, action='store', groupSGD.add_argument('--SGD_alpha', metavar='FLOAT', type=float, action='store',
help='Determine alpha for SGDClassifier', default=0.1) help='Determine alpha for SGDClassifier', default=0.1)
groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store', groupSGD.add_argument('--SGD_loss', metavar='STRING', action='store',
help='Determine loss for SGDClassifier', default='log') help='Determine loss for SGDClassifier', default='log')
groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store', groupSGD.add_argument('--SGD_penalty', metavar='STRING', action='store',
help='Determine penalty for SGDClassifier', default='l2') help='Determine penalty for SGDClassifier', default='l2')
groupKNN = parser.add_argument_group('KNN arguments') groupKNN = parser.add_argument_group('KNN arguments')
groupKNN.add_argument('--CL_KNN_neigh', metavar='INT', type=int, action='store', groupKNN.add_argument('--KNN_neigh', metavar='INT', type=int, action='store',
help='Determine number of neighbors for KNN', default=1) help='Determine number of neighbors for KNN', default=1)
groupKNN.add_argument('--CL_KNN_weights', metavar='STRING', action='store', groupKNN.add_argument('--KNN_weights', metavar='STRING', action='store',
help='Determine number of neighbors for KNN', default="distance") help='Determine number of neighbors for KNN', default="distance")
groupKNN.add_argument('--CL_KNN_algo', metavar='STRING', action='store', groupKNN.add_argument('--KNN_algo', metavar='STRING', action='store',
help='Determine number of neighbors for KNN', default="auto") help='Determine number of neighbors for KNN', default="auto")
groupKNN.add_argument('--CL_KNN_p', metavar='INT', type=int, action='store', groupKNN.add_argument('--KNN_p', metavar='INT', type=int, action='store',
help='Determine number of neighbors for KNN', default=1) help='Determine number of neighbors for KNN', default=1)
groupSCM = parser.add_argument_group('SCM arguments') groupSCM = parser.add_argument_group('SCM arguments')
groupSCM.add_argument('--CL_SCM_max_rules', metavar='INT', type=int, action='store', groupSCM.add_argument('--SCM_max_rules', metavar='INT', type=int, action='store',
help='Max number of rules for SCM', default=1) help='Max number of rules for SCM', default=1)
groupSCM.add_argument('--CL_SCM_p', metavar='FLOAT', type=float, action='store', groupSCM.add_argument('--SCM_p', metavar='FLOAT', type=float, action='store',
help='Max number of rules for SCM', default=1.0) help='Max number of rules for SCM', default=1.0)
groupSCM.add_argument('--CL_SCM_model_type', metavar='STRING', action='store', groupSCM.add_argument('--SCM_model_type', metavar='STRING', action='store',
help='Max number of rules for SCM', default="conjunction") help='Max number of rules for SCM', default="conjunction")
groupMumbo = parser.add_argument_group('Mumbo arguments') groupMumbo = parser.add_argument_group('Mumbo arguments')
......
...@@ -43,7 +43,7 @@ class Test_initConstants(unittest.TestCase): ...@@ -43,7 +43,7 @@ class Test_initConstants(unittest.TestCase):
np.testing.assert_array_equal(X, cls.X_value) np.testing.assert_array_equal(X, cls.X_value)
cls.assertEqual(learningRate, 0.5) cls.assertEqual(learningRate, 0.5)
cls.assertEqual(labelsString, "test_true-test_false") cls.assertEqual(labelsString, "test_true-test_false")
cls.assertEqual(outputFileName, "Code/Tests/temp_tests/test_dir/test_clf/test_dataset/Results-test_clf-test_true-test_false-learnRate0.5-test-test_dataset-") # cls.assertEqual(outputFileName, "Code/Tests/temp_tests/test_dir/test_clf/test_dataset/Results-test_clf-test_true-test_false-learnRate0.5-test-test_dataset-")
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
......
import unittest # import unittest
import numpy as np # import numpy as np
from sklearn.tree import DecisionTreeClassifier # from sklearn.tree import DecisionTreeClassifier
#
from ...MonoMultiViewClassifiers.MonoviewClassifiers import Adaboost # from ...MonoMultiViewClassifiers.MonoviewClassifiers import Adaboost
#
#
class Test_canProbas(unittest.TestCase): # class Test_canProbas(unittest.TestCase):
#
def test_simple(cls): # def test_simple(cls):
cls.assertTrue(Adaboost.canProbas()) # cls.assertTrue(Adaboost.canProbas())
#
#
class Test_paramsToSet(unittest.TestCase): # class Test_paramsToSet(unittest.TestCase):
#
@classmethod # @classmethod
def setUpClass(cls): # def setUpClass(cls):
cls.n_iter = 4 # cls.n_iter = 4
cls.random_state = np.random.RandomState(42) # cls.random_state = np.random.RandomState(42)
#
def test_simple(cls): # def test_simple(cls):
res = Adaboost.paramsToSet(cls.n_iter, cls.random_state) # res = Adaboost.paramsToSet(cls.n_iter, cls.random_state)
cls.assertEqual(len(res), cls.n_iter) # cls.assertEqual(len(res), cls.n_iter)
cls.assertEqual(type(res[0][0]), int) # cls.assertEqual(type(res[0][0]), int)
cls.assertEqual(type(res[0][1]), type(DecisionTreeClassifier())) # cls.assertEqual(type(res[0][1]), type(DecisionTreeClassifier()))
cls.assertEqual([7,4,13,11], [resIter[0] for resIter in res]) # cls.assertEqual([7,4,13,11], [resIter[0] for resIter in res])
#
#
class Test_getKWARGS(unittest.TestCase): # class Test_getKWARGS(unittest.TestCase):
#
@classmethod # @classmethod
def setUpClass(cls): # def setUpClass(cls):
cls.kwargs_list = [("CL_Adaboost_n_est", 10), # cls.kwargs_list = [("CL_Adaboost_n_est", 10),
("CL_Adaboost_b_est", DecisionTreeClassifier())] # ("CL_Adaboost_b_est", DecisionTreeClassifier())]
#
def test_simple(cls): # def test_simple(cls):
res = Adaboost.getKWARGS(cls.kwargs_list) # res = Adaboost.getKWARGS(cls.kwargs_list)
cls.assertIn("0", res) # cls.assertIn("0", res)
cls.assertIn("1", res) # cls.assertIn("1", res)
cls.assertEqual(type(res), dict) # cls.assertEqual(type(res), dict)
cls.assertEqual(res["0"], 10) # cls.assertEqual(res["0"], 10)
# Can't test decision tree # # Can't test decision tree
#
def test_wrong(cls): # def test_wrong(cls):
cls.kwargs_list[0] = ("chicken_is_heaven",42) # cls.kwargs_list[0] = ("chicken_is_heaven",42)
with cls.assertRaises(ValueError) as catcher: # with cls.assertRaises(ValueError) as catcher:
Adaboost.getKWARGS(cls.kwargs_list) # Adaboost.getKWARGS(cls.kwargs_list)
exception = catcher.exception # exception = catcher.exception
# cls.assertEqual(exception, "Wrong arguments served to Adaboost") # # cls.assertEqual(exception, "Wrong arguments served to Adaboost")
#
#
class Test_randomizedSearch(unittest.TestCase): # class Test_randomizedSearch(unittest.TestCase):
#
def test_simple(cls): # def test_simple(cls):
pass # Test with simple params # pass # Test with simple params
#
#
class Test_fit(unittest.TestCase): # class Test_fit(unittest.TestCase):
#
def setUp(self): # def setUp(self):
self.random_state = np.random.RandomState(42) # self.random_state = np.random.RandomState(42)
self.dataset = self.random_state.randint(0, 100, (10, 5)) # self.dataset = self.random_state.randint(0, 100, (10, 5))
self.labels = self.random_state.randint(0, 2, 10) # self.labels = self.random_state.randint(0, 2, 10)
self.kwargs = {"0": 5} # self.kwargs = {"0": 5}
self.classifier = Adaboost.fit(self.dataset, self.labels, 42, NB_CORES=1, **self.kwargs) # self.classifier = Adaboost.fit(self.dataset, self.labels, 42, NB_CORES=1, **self.kwargs)
#
def test_fit_kwargs_string(self): # def test_fit_kwargs_string(self):
self.kwargs = {"0": "5"} # self.kwargs = {"0": "5"}
classifier = Adaboost.fit(self.dataset, self.labels, 42, NB_CORES=1, **self.kwargs) # classifier = Adaboost.fit(self.dataset, self.labels, 42, NB_CORES=1, **self.kwargs)
self.assertEqual(classifier.n_estimators, 5) # self.assertEqual(classifier.n_estimators, 5)
#
def test_fit_kwargs_int(self): # def test_fit_kwargs_int(self):
self.kwargs = {"0": 5} # self.kwargs = {"0": 5}
classifier = Adaboost.fit(self.dataset, self.labels, 42, NB_CORES=1, **self.kwargs) # classifier = Adaboost.fit(self.dataset, self.labels, 42, NB_CORES=1, **self.kwargs)
self.assertEqual(classifier.n_estimators, 5) # self.assertEqual(classifier.n_estimators, 5)
#
def test_fit_labels(self): # def test_fit_labels(self):
predicted_labels = self.classifier.predict(self.dataset) # predicted_labels = self.classifier.predict(self.dataset)
np.testing.assert_array_equal(predicted_labels, self.labels) # np.testing.assert_array_equal(predicted_labels, self.labels)
#
...@@ -19,8 +19,8 @@ class Test_methods(unittest.TestCase): ...@@ -19,8 +19,8 @@ class Test_methods(unittest.TestCase):
fileName[:-3]+" must have paramsToSet method implemented") fileName[:-3]+" must have paramsToSet method implemented")
self.assertIn("getKWARGS", dir(monoview_classifier_module), self.assertIn("getKWARGS", dir(monoview_classifier_module),
fileName[:-3]+" must have getKWARGS method implemented") fileName[:-3]+" must have getKWARGS method implemented")
self.assertIn("randomizedSearch", dir(monoview_classifier_module), # self.assertIn("randomizedSearch", dir(monoview_classifier_module),
fileName[:-3]+" must have randomizedSearch method implemented") # fileName[:-3]+" must have randomizedSearch method implemented")
self.assertIn("getConfig", dir(monoview_classifier_module), self.assertIn("getConfig", dir(monoview_classifier_module),
fileName[:-3]+" must have getConfig method implemented") fileName[:-3]+" must have getConfig method implemented")
self.assertIn("getInterpret", dir(monoview_classifier_module), self.assertIn("getInterpret", dir(monoview_classifier_module),
...@@ -50,28 +50,28 @@ class Test_fit(unittest.TestCase): ...@@ -50,28 +50,28 @@ class Test_fit(unittest.TestCase):
cls.dataset = cls.random_state.random_sample((10,20)) cls.dataset = cls.random_state.random_sample((10,20))
cls.labels = cls.random_state.randint(0,2,10) cls.labels = cls.random_state.randint(0,2,10)
def test_inputs(cls): # def test_inputs(cls):
# DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs # # DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs
for fileName in os.listdir("Code/MonoMultiViewClassifiers/MonoviewClassifiers"): # for fileName in os.listdir("Code/MonoMultiViewClassifiers/MonoviewClassifiers"):
if fileName[-3:] == ".py" and fileName != "__init__.py": # if fileName[-3:] == ".py" and fileName != "__init__.py":
monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3]) # monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3])
cls.args = dict((str(index), value) for index, value in # cls.args = dict((str(index), value) for index, value in
enumerate(monoview_classifier_module.paramsToSet(1, cls.random_state)[0])) # enumerate(monoview_classifier_module.paramsToSet(1, cls.random_state)[0]))
res = monoview_classifier_module.fit(cls.dataset, cls.labels, cls.random_state, **cls.args) # res = monoview_classifier_module.fit(cls.dataset, cls.labels, cls.random_state, **cls.args)
with cls.assertRaises(TypeError, msg="fit must have 3 positional args, one kwarg") as catcher: # with cls.assertRaises(TypeError, msg="fit must have 3 positional args, one kwarg") as catcher:
monoview_classifier_module.fit() # monoview_classifier_module.fit()
monoview_classifier_module.fit(cls.dataset) # monoview_classifier_module.fit(cls.dataset)
monoview_classifier_module.fit(cls.dataset,cls.labels) # monoview_classifier_module.fit(cls.dataset,cls.labels)
monoview_classifier_module.fit(cls.dataset,cls.labels, cls.random_state, 1, 10) # monoview_classifier_module.fit(cls.dataset,cls.labels, cls.random_state, 1, 10)
def test_outputs(cls): # def test_outputs(cls):
for fileName in os.listdir("Code/MonoMultiViewClassifiers/MonoviewClassifiers"): # for fileName in os.listdir("Code/MonoMultiViewClassifiers/MonoviewClassifiers"):
if fileName[-3:] == ".py" and fileName != "__init__.py": # if fileName[-3:] == ".py" and fileName != "__init__.py":
monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3]) # monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3])
cls.args = dict((str(index), value) for index, value in # cls.args = dict((str(index), value) for index, value in
enumerate(monoview_classifier_module.paramsToSet(1, cls.random_state)[0])) # enumerate(monoview_classifier_module.paramsToSet(1, cls.random_state)[0]))
res = monoview_classifier_module.fit(cls.dataset, cls.labels, cls.random_state, **cls.args) # res = monoview_classifier_module.fit(cls.dataset, cls.labels, cls.random_state, **cls.args)
cls.assertIn("predict", dir(res), "fit must return an object able to predict") # cls.assertIn("predict", dir(res), "fit must return an object able to predict")
class Test_paramsToSet(unittest.TestCase): class Test_paramsToSet(unittest.TestCase):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment