Skip to content
Snippets Groups Projects
Commit 10483f58 authored by bbauvin's avatar bbauvin
Browse files

Added random gridsearch and some predict proba and metric package need to test all that

parent d4ec20e6
Branches
Tags
No related merge requests found
Showing
with 264 additions and 24 deletions
......@@ -51,12 +51,11 @@ groupClass.add_argument('--CL_type', metavar='STRING', action='store',
help='Determine whether to use Multiview, Monoview, or Benchmark, separate with : if multiple',
default='Benchmark')
groupClass.add_argument('--CL_algorithm', metavar='STRING', action='store',
help='Determine which classifier to use, if CL_type = Benchmark, fill monoview and multiview '
'options', default='')
help='Determine which classifier to use, if empty, considering all', default='')
groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store',
help='Determine which monoview classifier to use, separate with : if multiple', default='')
help='Determine which monoview classifier to use, separate with : if multiple, if empty, considering all', default='')
groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store',
help='Determine which multiview classifier to use, separate with : if multiple', default='')
help='Determine which multiview classifier to use, separate with : if multiple, if empty, considering all', default='')
groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
default=1)
......
from sklearn.metrics import accuracy_score as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight=None
score = metric(y_true, y_pred, sample_weight=sample_weight)
return score
from sklearn.metrics import f1_score as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight=None
try:
labels = kwargs["1"]
except:
labels=None
try:
pos_label = kwargs["2"]
except:
pos_label = 1
try:
average = kwargs["3"]
except:
average = "binary"
score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average)
return score
from sklearn.metrics import fbeta_score as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight=None
try:
beta = kwargs["1"]
except:
beta=1.0
try:
labels = kwargs["2"]
except:
labels=None
try:
pos_label = kwargs["3"]
except:
pos_label = 1
try:
average = kwargs["4"]
except:
average = "binary"
score = metric(y_true, y_pred, beta, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average)
return score
from sklearn.metrics import hamming_loss as metric
def score(y_true, y_pred, **kwargs):
try:
classes = kwargs["0"]
except:
classes=None
score = metric(y_true, y_pred, classes=classes)
return score
from sklearn.metrics import jaccard_similarity_score as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight = None
score = metric(y_true, y_pred, sample_weight=sample_weight)
return score
from sklearn.metrics import log_loss as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight = None
try:
eps = kwargs["1"]
except:
eps = 1e-15
score = metric(y_true, y_pred, sample_weight=sample_weight, eps=eps)
return score
from sklearn.metrics import matthews_corrcoef as metric
def score(y_true, y_pred, **kwargs):
score = metric(y_true, y_pred)
return score
\ No newline at end of file
from sklearn.metrics import precision_score as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight=None
try:
labels = kwargs["1"]
except:
labels=None
try:
pos_label = kwargs["2"]
except:
pos_label = 1
try:
average = kwargs["3"]
except:
average = "binary"
score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average)
return score
from sklearn.metrics import recall_score as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight=None
try:
labels = kwargs["1"]
except:
labels=None
try:
pos_label = kwargs["2"]
except:
pos_label = 1
try:
average = kwargs["3"]
except:
average = "binary"
score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average)
return score
from sklearn.metrics import roc_auc_score as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight=None
try:
average = kwargs["1"]
except:
average = "binary"
score = metric(y_true, y_pred, sample_weight=sample_weight, average=average)
return score
from sklearn.metrics import zero_one_loss as metric
def score(y_true, y_pred, **kwargs):
try:
sample_weight = kwargs["0"]
except:
sample_weight=None
score = metric(y_true, y_pred, sample_weight=sample_weight)
return score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.pipeline import Pipeline
from sklearn.grid_search import GridSearchCV
from sklearn.grid_search import RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils.testing import all_estimators
import inspect
import numpy as np
def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
......@@ -9,20 +12,33 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
base_estimators = int(kwargs['1'])
classifier = AdaBoostClassifier(n_estimators=num_estimators, base_estimator=base_estimators)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
return "No desc", classifier
def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
pipeline = Pipeline([('classifier', AdaBoostClassifier())])
param= {"classifier__n_estimators": map(int, kwargs['0']),
"classifier__base_estimator": [DecisionTreeClassifier() for arg in kwargs["1"]]}
grid = GridSearchCV(pipeline,param_grid=param,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds)
grid = RandomizedSearchCV(pipeline,param_distributions=param,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds)
detector = grid.fit(X_train, y_train)
desc_estimators = [detector.best_params_["classifier__n_estimators"]]
description = "Classif_" + "RF" + "-" + "CV_" + str(nbFolds) + "-" + "Trees_" + str(map(str,desc_estimators))
return description, detector
def gridSearch(X_train, y_train, nbFolds=4, nbCores=1):
pipeline = Pipeline([('classifier', AdaBoostClassifier())])
classifiers = [clf for name, clf in all_estimators(type_filter='classifier')
if 'sample_weight' in inspect.getargspec(clf().fit)[0]
and (name != "AdaBoostClassifier" and name !="GradientBoostingClassifier")]
param= {"classifier__n_estimators": np.random.randint(1, 30, 10),
"classifier__base_estimator": classifiers}
grid = RandomizedSearchCV(pipeline,param_distributions=param,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds)
detector = grid.fit(X_train, y_train)
desc_estimators = [detector.best_params_["classifier__n_estimators"],
detector.best_params_["classifier__base_estimator"]]
return desc_estimators
def getConfig(config):
return "\n\t\t- Adaboost with num_esimators : "+config[0]+", base_estimators : "+config[1]
\ No newline at end of file
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline # Pipelining in classification
from sklearn.grid_search import GridSearchCV
import numpy as np
def fit(DATASET, CLASS_LABELS, NB_CORES=1, **kwargs):
maxDepth = int(kwargs['0'])
classifier = DecisionTreeClassifier(max_depth=maxDepth)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
return "No desc", classifier
def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
......@@ -22,5 +22,16 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
return description, DT_detector
def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())])
param_DT = {"classifier__max_depth":np.random.randint(1, 30, 10)}
grid_DT = GridSearchCV(pipeline_DT, param_grid=param_DT, refit=True, n_jobs=nbCores, scoring='accuracy',
cv=nbFolds)
DT_detector = grid_DT.fit(X_train, y_train)
desc_params = [DT_detector.best_params_["classifier__max_depth"]]
return desc_params
def getConfig(config):
return "\n\t\t- Decision Tree with max_depth : "+config[0]
\ No newline at end of file
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline # Pipelining in classification
from sklearn.grid_search import GridSearchCV
import numpy as np
def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
nNeighbors = int(kwargs['0'])
classifier = KNeighborsClassifier(n_neighbors=nNeighbors)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
return "No desc", classifier
def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
......@@ -21,6 +21,15 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
return description, KNN_detector
def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())])
param_KNN = {"classifier__n_neighbors": np.random.randint(1, 30, 10)}
grid_KNN = GridSearchCV(pipeline_KNN, param_grid=param_KNN, refit=True, n_jobs=nbCores, scoring='accuracy',
cv=nbFolds)
KNN_detector = grid_KNN.fit(X_train, y_train)
desc_params = [KNN_detector.best_params_["classifier__n_neighbors"]]
return desc_params
def getConfig(config):
return "\n\t\t- K nearest Neighbors with n_neighbors: "+config[0]
\ No newline at end of file
......@@ -8,7 +8,7 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
maxDepth = int(kwargs['1'])
classifier = RandomForestClassifier(n_estimators=num_estimators, max_depth=maxDepth, n_jobs=NB_CORES)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
return "No desc", classifier
def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
......@@ -42,5 +42,16 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
return description, rf_detector
def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
pipeline_rf = Pipeline([('classifier', RandomForestClassifier())])
param_rf = {"classifier__n_estimators": np.random.randint(1, 30, 10)}
grid_rf = GridSearchCV(pipeline_rf,param_grid=param_rf,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds)
rf_detector = grid_rf.fit(X_train, y_train)
desc_estimators = [rf_detector.best_params_["classifier__n_estimators"]]
return desc_estimators
def getConfig(config):
return "\n\t\t- Random Forest with num_esimators : "+config[0]+", max_depth : "+config[1]
\ No newline at end of file
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline # Pipelining in classification
from sklearn.grid_search import GridSearchCV
import numpy as np
def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
......@@ -12,7 +13,7 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
alpha = 0.15
classifier = SGDClassifier(loss=loss, penalty=penalty, alpha=alpha)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
return "No desc", classifier
def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
......@@ -28,5 +29,19 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
return description, SGD_detector
def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
pipeline_SGD = Pipeline([('classifier', SGDClassifier())])
losses = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron']
penalties = ["l1", "l2", "elasticnet"]
alphas = list(np.random.randint(1,10,10))+list(np.random.random_sample(10))
param_SGD = {"classifier__loss": losses, "classifier__penalty": penalties,
"classifier__alpha": alphas}
grid_SGD = GridSearchCV(pipeline_SGD, param_grid=param_SGD, refit=True, n_jobs=nbCores, scoring='accuracy',
cv=nbFolds)
SGD_detector = grid_SGD.fit(X_train, y_train)
desc_params = [SGD_detector.best_params_["classifier__loss"], SGD_detector.best_params_["classifier__penalty"],
SGD_detector.best_params_["classifier__alpha"]]
return desc_params
def getConfig(config):
return "\n\t\t- SGDClassifier with loss : "+config[0]+", penalty : "+config[1]
\ No newline at end of file
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline # Pipelining in classification
from sklearn.grid_search import GridSearchCV
import numpy as np
def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
C = int(kwargs['0'])
classifier = SVC(C=C, kernel='linear', probability=True)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
return "No desc", classifier
def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
......@@ -21,5 +22,15 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
return description, SVMLinear_detector
def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear"))])
param_SVMLinear = {"classifier__C":np.random.randint(1,2000,30)}
grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, scoring='accuracy',
cv=nbFolds)
SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train)
desc_params = [SVMLinear_detector.best_params_["classifier__C"]]
return desc_params
def getConfig(config):
return "\n\t\t- SVM with C : "+config[0]+", kernel : "+config[1]
\ No newline at end of file
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline # Pipelining in classification
from sklearn.grid_search import GridSearchCV
import numpy as np
def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
......@@ -8,18 +9,19 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
degree = int(kwargs['1'])
classifier = SVC(C=C, kernel='poly', degree=degree, probability=True)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
return "No desc", classifier
def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs):
pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear"))])
param_SVMLinear = {"classifier__C": map(int, kwargs['0']), "classifier__degree": map(int, kwargs["1"])}
grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, scoring='accuracy',
pipeline_SVMPoly = Pipeline([('classifier', SVC(kernel="poly"))])
param_SVMPoly= {"classifier__C": np.random.randint(1,2000,30), "classifier__degree": np.random.randint(1,10,5)}
grid_SVMPoly = GridSearchCV(pipeline_SVMPoly, param_grid=param_SVMPoly, refit=True, n_jobs=nbCores, scoring='accuracy',
cv=nbFolds)
SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train)
desc_params = [SVMLinear_detector.best_params_["classifier__C"], SVMLinear_detector.best_params_["classifier__degree"]]
description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params))
return description, SVMLinear_detector
SVMPoly_detector = grid_SVMPoly.fit(X_train, y_train)
desc_params = [SVMPoly_detector.best_params_["classifier__C"], SVMPoly_detector.best_params_["classifier__degree"]]
return desc_params
def getConfig(config):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment