diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 4f52c31c050776fb9542b0e8c82743ffd7a3ff0e..54b27ba1b47815260f518a09ee8b6ea2a44d19a5 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -58,6 +58,8 @@ groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store' help='Determine which multiview classifier to use, separate with : if multiple, if empty, considering all', default='') groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, default=1) +groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', + help='Determine which metric to use, separate with ":" if multiple, if empty, considering all', default='') groupRF = parser.add_argument_group('Random Forest arguments') groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='GridSearch: Determine the trees', @@ -198,9 +200,9 @@ if "Monoview" in args.CL_type.strip(":"): fusionClassifierConfig = "a" fusionMethodConfig = "a" -mumboNB_ITER = 2 mumboClassifierConfig = "a" mumboclassifierNames = "a" +metrics = args.CL_metrics.split(":") RandomForestKWARGS = {"0":map(int, args.CL_RF_trees.split())} SVMLinearKWARGS = {"0":map(int, args.CL_SVML_C.split(":"))} @@ -230,7 +232,7 @@ for viewIndex, viewArguments in enumerate(argumentDictionaries["Monoview"].value resultsMonoview += (Parallel(n_jobs=nbCores)( delayed(ExecMonoview)(DATASET.get("View"+str(viewIndex)), DATASET.get("labels").value, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, gridSearch=True, - **arguments) + metrics=metrics[viewIndex], **arguments) for arguments in viewArguments)) accuracies = [result[1] for result in resultsMonoview[viewIndex]] @@ -283,7 +285,7 @@ for viewIndex, viewArguments in enumerate(argumentDictionaries["Monoview"].value # resultsMultiview = Parallel(n_jobs=nbCores)( # delayed(ExecMultiview)(DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, -# LABELS_DICTIONARY, gridSearch=True, **arguments) +# LABELS_DICTIONARY, gridSearch=True, metrics=metrics, **arguments) # for arguments in argumentDictionaries["Multiview"]) resultsMultiview = [] results = (resultsMonoview, resultsMultiview) diff --git a/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py b/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py index 08d5c30d78216e48e4d8002988d19192e6f09625..226326f9b1b74d7c4c00b6aa393f19d560afcf27 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py @@ -1,4 +1,5 @@ from sklearn.metrics import accuracy_score as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -8,3 +9,11 @@ def score(y_true, y_pred, **kwargs): sample_weight=None score = metric(y_true, y_pred, sample_weight=sample_weight) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Metrics/f1_score.py b/Code/MonoMutliViewClassifiers/Metrics/f1_score.py index 3dc8c3ae10c51a44c476fb0e39e8870a76e365b7..d563d7dc8deded58e8d3cad28dc124d8e46459a7 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/f1_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/f1_score.py @@ -1,4 +1,5 @@ from sklearn.metrics import f1_score as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -20,3 +21,24 @@ def score(y_true, y_pred, **kwargs): average = "binary" score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + labels = kwargs["1"] + except: + labels=None + try: + pos_label = kwargs["2"] + except: + pos_label = 1 + try: + average = kwargs["3"] + except: + average = "binary" + return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight, labels=labels, + pos_label=pos_label, average=average) diff --git a/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py b/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py index 74f1e24138178bfec9308ddc51cfce51909a2c8e..73d834e2651387a66b39d3450ff9fee00baaddf8 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py @@ -1,4 +1,5 @@ from sklearn.metrics import fbeta_score as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -24,3 +25,28 @@ def score(y_true, y_pred, **kwargs): average = "binary" score = metric(y_true, y_pred, beta, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + beta = kwargs["1"] + except: + beta=1.0 + try: + labels = kwargs["2"] + except: + labels=None + try: + pos_label = kwargs["3"] + except: + pos_label = 1 + try: + average = kwargs["4"] + except: + average = "binary" + return make_scorer(metric, greater_is_better=True, beta=beta, sample_weight=sample_weight, labels=labels, + pos_label=pos_label, average=average) diff --git a/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py b/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py index fe35066d783cfea89eb27f89bc3c15b77e090026..8bc33bc25190ad01766610dd3c74b0adfa9368f9 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py +++ b/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py @@ -1,4 +1,5 @@ from sklearn.metrics import hamming_loss as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -8,3 +9,11 @@ def score(y_true, y_pred, **kwargs): classes=None score = metric(y_true, y_pred, classes=classes) return score + + +def get_scorer(**kwargs): + try: + classes = kwargs["0"] + except: + classes=None + return make_scorer(metric, greater_is_better=False, classes=classes) diff --git a/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py b/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py index c6d134ad15497130aa88eacb905d19aefa22074e..18e0b484c339398d47df421feb658bb6917a9c87 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py @@ -1,4 +1,5 @@ from sklearn.metrics import jaccard_similarity_score as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -8,3 +9,11 @@ def score(y_true, y_pred, **kwargs): sample_weight = None score = metric(y_true, y_pred, sample_weight=sample_weight) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight = None + return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight) diff --git a/Code/MonoMutliViewClassifiers/Metrics/log_loss.py b/Code/MonoMutliViewClassifiers/Metrics/log_loss.py index a6dbf8a3c295ae7bd06d19360939fb731b0a7b3a..cb14bee9a13bb6565faef32ebeee9d438f8c472c 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/log_loss.py +++ b/Code/MonoMutliViewClassifiers/Metrics/log_loss.py @@ -1,4 +1,5 @@ from sklearn.metrics import log_loss as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -12,3 +13,15 @@ def score(y_true, y_pred, **kwargs): eps = 1e-15 score = metric(y_true, y_pred, sample_weight=sample_weight, eps=eps) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight = None + try: + eps = kwargs["1"] + except: + eps = 1e-15 + return make_scorer(metric, greater_is_better=False, sample_weight=sample_weight, eps=eps) diff --git a/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py b/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py index b1ec857aa2937d437bcc7953b5ceaa3927143ddc..2a3c444765d81d740f27b2e025588b1d33429e7f 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py +++ b/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py @@ -1,6 +1,11 @@ from sklearn.metrics import matthews_corrcoef as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): score = metric(y_true, y_pred) - return score \ No newline at end of file + return score + + +def get_scorer(**kwargs): + return make_scorer(metric, greater_is_better=True) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Metrics/precision_score.py b/Code/MonoMutliViewClassifiers/Metrics/precision_score.py index 2a6cccad17d45dc1cbecf039044266e3f713675a..9839c817350bb6c9cbf2d6fa3fa16d72879123e8 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/precision_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/precision_score.py @@ -1,4 +1,5 @@ from sklearn.metrics import precision_score as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -20,3 +21,24 @@ def score(y_true, y_pred, **kwargs): average = "binary" score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + labels = kwargs["1"] + except: + labels=None + try: + pos_label = kwargs["2"] + except: + pos_label = 1 + try: + average = kwargs["3"] + except: + average = "binary" + return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight, labels=labels, pos_label=pos_label, + average=average) diff --git a/Code/MonoMutliViewClassifiers/Metrics/recall_score.py b/Code/MonoMutliViewClassifiers/Metrics/recall_score.py index a4d10284db2ee2a1bb3a477c204c8dbd28299fbd..f4d3846581e99dc6a1385489a167c9a9e07f4602 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/recall_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/recall_score.py @@ -1,4 +1,5 @@ from sklearn.metrics import recall_score as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -20,3 +21,24 @@ def score(y_true, y_pred, **kwargs): average = "binary" score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + labels = kwargs["1"] + except: + labels=None + try: + pos_label = kwargs["2"] + except: + pos_label = 1 + try: + average = kwargs["3"] + except: + average = "binary" + return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight, labels=labels, pos_label=pos_label, + average=average) diff --git a/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py b/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py index 5a9f7cfb9561dbcd99acee59bbfe653cc8ea149c..04fb96cb11f68da4fcf60163255332d0c24e96d8 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py @@ -1,4 +1,5 @@ from sklearn.metrics import roc_auc_score as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -12,3 +13,15 @@ def score(y_true, y_pred, **kwargs): average = "binary" score = metric(y_true, y_pred, sample_weight=sample_weight, average=average) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + average = kwargs["1"] + except: + average = "binary" + return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight, average=average) diff --git a/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py b/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py index a741825bec49fd400f4a8b0fa60423352062d5f6..833d2402f22e8c548a18f76ef1e8bef5dac42054 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py +++ b/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py @@ -1,4 +1,5 @@ from sklearn.metrics import zero_one_loss as metric +from sklearn.metrics import make_scorer def score(y_true, y_pred, **kwargs): @@ -8,3 +9,11 @@ def score(y_true, y_pred, **kwargs): sample_weight=None score = metric(y_true, y_pred, sample_weight=sample_weight) return score + + +def get_scorer(**kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + return make_scorer(metric, greater_is_better=False, sample_weight=sample_weight) diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index dda9e5d70d22521d5a12d88410346135acffd89a..9e2a187ce60d9edfa5d6c69d3ebbac2a59e7072c 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -20,6 +20,7 @@ import h5py import ClassifMonoView # Functions for classification import ExportResults # Functions to render results import MonoviewClassifiers +import Metrics # Author-Info __author__ = "Nikolas Huelsmann, Baptiste BAUVIN" @@ -30,7 +31,8 @@ __date__ = 2016-03-25 ### Argument Parser -def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path, gridSearch=True, **kwargs): +def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path, gridSearch=True, + metrics="accuracy_score", **kwargs): t_start = time.time() directory = os.path.dirname(os.path.abspath(__file__)) + "/Results-ClassMonoView/" @@ -41,6 +43,7 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path, CL_type = kwargs["CL_type"] classifierKWARGS = kwargs[CL_type+"KWARGS"] X = X.value + metrics = [getattr(Metrics, metric) for metric in metrics] # Determine the Database to extract features logging.debug("### Main Programm for Classification MonoView") @@ -61,9 +64,10 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path, classifierModule = getattr(MonoviewClassifiers, CL_type) - classifierFunction = getattr(classifierModule, "fit_gridsearch") + classifierGridSearch = getattr(classifierModule, "gridSearch") - cl_desc, cl_res = classifierFunction(X_train, y_train, nbFolds=nbFolds, nbCores=nbCores,**classifierKWARGS) + cl_desc = classifierGridSearch(X_train, y_train, nbFolds=nbFolds, nbCores=nbCores, metrics=metrics) + cl_res = classifierModule.fit(X_train, y_train, NB_CORES=nbCores) t_end = time.time() - t_start # Add result to Results DF @@ -149,6 +153,8 @@ if __name__=='__main__': groupClass.add_argument('--CL_CV', metavar='INT', action='store', help='Number of k-folds for CV', type=int, default=10) groupClass.add_argument('--CL_Cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, default=1) groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Split ratio for train and test', type=float, default=0.9) + groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', + help='Determine which metric to use, separate with ":" if multiple, if empty, considering all', default='') groupClassifier = parser.add_argument_group('Classifier Config') @@ -217,4 +223,5 @@ if __name__=='__main__': arguments = {args.CL_type+"KWARGS": classifierKWARGS, "feat":args.feat,"fileFeat": args.fileFeat, "fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": args.CL_type} - ExecMonoview(X, Y, args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF, **arguments) + ExecMonoview(X, Y, args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF, + metrics=args.CL_metrics, **arguments) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index 6eba0119ae9bbff786240f614125e57c071597cf..df9269d6f9f2c54fbec649af768a13a79f00933d 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -5,6 +5,7 @@ from sklearn.tree import DecisionTreeClassifier from sklearn.utils.testing import all_estimators import inspect import numpy as np +import Metrics def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -14,25 +15,29 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): classifier.fit(DATASET, CLASS_LABELS) return "No desc", classifier - -def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline = Pipeline([('classifier', AdaBoostClassifier())]) - param= {"classifier__n_estimators": map(int, kwargs['0']), - "classifier__base_estimator": [DecisionTreeClassifier() for arg in kwargs["1"]]} - grid = RandomizedSearchCV(pipeline,param_distributions=param,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds) - detector = grid.fit(X_train, y_train) - desc_estimators = [detector.best_params_["classifier__n_estimators"]] - description = "Classif_" + "RF" + "-" + "CV_" + str(nbFolds) + "-" + "Trees_" + str(map(str,desc_estimators)) - return description, detector - - -def gridSearch(X_train, y_train, nbFolds=4, nbCores=1): +# +# def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): +# pipeline = Pipeline([('classifier', AdaBoostClassifier())]) +# metricModule = getattr(Metrics, metric[0]) +# scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) +# param= {"classifier__n_estimators": map(int, kwargs['0']), +# "classifier__base_estimator": [DecisionTreeClassifier() for arg in kwargs["1"]]} +# grid = RandomizedSearchCV(pipeline,param_distributions=param,refit=True,n_jobs=nbCores,scoring=scorer,cv=nbFolds) +# detector = grid.fit(X_train, y_train) +# desc_estimators = [detector.best_params_["classifier__n_estimators"]] +# description = "Classif_" + "RF" + "-" + "CV_" + str(nbFolds) + "-" + "Trees_" + str(map(str,desc_estimators)) +# return description, detector + + +def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nbCores=1): pipeline = Pipeline([('classifier', AdaBoostClassifier())]) classifiers = [clf for name, clf in all_estimators(type_filter='classifier') if 'sample_weight' in inspect.getargspec(clf().fit)[0] and (name != "AdaBoostClassifier" and name !="GradientBoostingClassifier")] param= {"classifier__n_estimators": np.random.randint(1, 30, 10), "classifier__base_estimator": classifiers} + metricModule = getattr(Metrics, metric[0]) + scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) grid = RandomizedSearchCV(pipeline,param_distributions=param,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds) detector = grid.fit(X_train, y_train) desc_estimators = [detector.best_params_["classifier__n_estimators"], diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index c9c86c357f15ad84fbc0453e5544a8851224f62b..ce7e739b5e3210eb1ceab70c17618e1b84d5d07b 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -2,6 +2,7 @@ from sklearn.tree import DecisionTreeClassifier from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV import numpy as np +import Metrics def fit(DATASET, CLASS_LABELS, NB_CORES=1, **kwargs): maxDepth = int(kwargs['0']) @@ -10,22 +11,24 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1, **kwargs): return "No desc", classifier -def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())]) - param_DT = {"classifier__max_depth":map(int, kwargs['0'])} - - grid_DT = GridSearchCV(pipeline_DT, param_grid=param_DT, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) - DT_detector = grid_DT.fit(X_train, y_train) - desc_params = [DT_detector.best_params_["classifier__max_depth"]] - description = "Classif_" + "DT" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) - return description, DT_detector +# def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): +# pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())]) +# param_DT = {"classifier__max_depth":map(int, kwargs['0'])} +# metricModule = getattr(Metrics, metric[0]) +# scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) +# grid_DT = GridSearchCV(pipeline_DT, param_grid=param_DT, refit=True, n_jobs=nbCores, scoring='accuracy', +# cv=nbFolds) +# DT_detector = grid_DT.fit(X_train, y_train) +# desc_params = [DT_detector.best_params_["classifier__max_depth"]] +# description = "Classif_" + "DT" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) +# return description, DT_detector -def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())]) param_DT = {"classifier__max_depth":np.random.randint(1, 30, 10)} - + metricModule = getattr(Metrics, metric[0]) + scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) grid_DT = GridSearchCV(pipeline_DT, param_grid=param_DT, refit=True, n_jobs=nbCores, scoring='accuracy', cv=nbFolds) DT_detector = grid_DT.fit(X_train, y_train) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index 60cc21c1a2a884e7c675f56c0ea800161c38afd8..5e51332539c0f7d446a7906b5b73f3d14c9c1c6d 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -2,6 +2,7 @@ from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV import numpy as np +import Metrics def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): nNeighbors = int(kwargs['0']) @@ -10,20 +11,24 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return "No desc", classifier -def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())]) - param_KNN = {"classifier__n_neighbors": map(int, kwargs['0'])} - grid_KNN = GridSearchCV(pipeline_KNN, param_grid=param_KNN, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) - KNN_detector = grid_KNN.fit(X_train, y_train) - desc_params = [KNN_detector.best_params_["classifier__n_neighbors"]] - description = "Classif_" + "Lasso" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) - return description, KNN_detector +# def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): +# pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())]) +# param_KNN = {"classifier__n_neighbors": map(int, kwargs['0'])} +# metricModule = getattr(Metrics, metric[0]) +# scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) +# grid_KNN = GridSearchCV(pipeline_KNN, param_grid=param_KNN, refit=True, n_jobs=nbCores, scoring='accuracy', +# cv=nbFolds) +# KNN_detector = grid_KNN.fit(X_train, y_train) +# desc_params = [KNN_detector.best_params_["classifier__n_neighbors"]] +# description = "Classif_" + "Lasso" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) +# return description, KNN_detector -def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())]) param_KNN = {"classifier__n_neighbors": np.random.randint(1, 30, 10)} + metricModule = getattr(Metrics, metric[0]) + scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) grid_KNN = GridSearchCV(pipeline_KNN, param_grid=param_KNN, refit=True, n_jobs=nbCores, scoring='accuracy', cv=nbFolds) KNN_detector = grid_KNN.fit(X_train, y_train) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index 1b3a1f415d74c959e9f82aadf7abbea634fa89e0..445fdfec6d30e8b49cd58ba261b44d30c444f588 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -1,6 +1,7 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import Pipeline from sklearn.grid_search import GridSearchCV +import Metrics def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -11,41 +12,42 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return "No desc", classifier -def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - - # PipeLine with RandomForest classifier - pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) - - # Parameters for GridSearch: Number of Trees - # can be extended with: oob_score, min_samples_leaf, max_features - param_rf = {"classifier__n_estimators": map(int, kwargs['0'])} - - # pipeline: Gridsearch avec le pipeline comme estimator - # param: pour obtenir le meilleur model il va essayer tous les possiblites - # refit: pour utiliser le meilleur model apres girdsearch - # n_jobs: Nombre de CPU (Mon ordi a des problemes avec -1 (Bug Python 2.7 sur Windows)) - # scoring: scoring... - # cv: Nombre de K-Folds pour CV - grid_rf = GridSearchCV( - pipeline_rf, - param_grid=param_rf, - refit=True, - n_jobs=nbCores, - scoring='accuracy', - cv=nbFolds, - ) - - rf_detector = grid_rf.fit(X_train, y_train) - - desc_estimators = [rf_detector.best_params_["classifier__n_estimators"]] - description = "Classif_" + "RF" + "-" + "CV_" + str(nbFolds) + "-" + "Trees_" + str(map(str,desc_estimators)) - return description, rf_detector - - -def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): +# def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): +# +# # PipeLine with RandomForest classifier +# pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) +# +# # Parameters for GridSearch: Number of Trees +# # can be extended with: oob_score, min_samples_leaf, max_features +# param_rf = {"classifier__n_estimators": map(int, kwargs['0'])} +# +# # pipeline: Gridsearch avec le pipeline comme estimator +# # param: pour obtenir le meilleur model il va essayer tous les possiblites +# # refit: pour utiliser le meilleur model apres girdsearch +# # n_jobs: Nombre de CPU (Mon ordi a des problemes avec -1 (Bug Python 2.7 sur Windows)) +# # scoring: scoring... +# # cv: Nombre de K-Folds pour CV +# grid_rf = GridSearchCV( +# pipeline_rf, +# param_grid=param_rf, +# refit=True, +# n_jobs=nbCores, +# scoring='accuracy', +# cv=nbFolds, +# ) +# +# rf_detector = grid_rf.fit(X_train, y_train) +# +# desc_estimators = [rf_detector.best_params_["classifier__n_estimators"]] +# description = "Classif_" + "RF" + "-" + "CV_" + str(nbFolds) + "-" + "Trees_" + str(map(str,desc_estimators)) +# return description, rf_detector + + +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) param_rf = {"classifier__n_estimators": np.random.randint(1, 30, 10)} - + metricModule = getattr(Metrics, metric[0]) + scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) grid_rf = GridSearchCV(pipeline_rf,param_grid=param_rf,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds) rf_detector = grid_rf.fit(X_train, y_train) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index 9ad1f292683875633ba718003d0557c9d271887f..156277031304ae373c6545dd44fcb6da93f4ab93 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -2,6 +2,7 @@ from sklearn.linear_model import SGDClassifier from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV import numpy as np +import Metrics def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -16,26 +17,30 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return "No desc", classifier -def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline_SGD = Pipeline([('classifier', SGDClassifier())]) - param_SGD = {"classifier__loss": kwargs['1'], "classifier__penalty": kwargs['2'], - "classifier__alpha": map(float, kwargs['0'])} - grid_SGD = GridSearchCV(pipeline_SGD, param_grid=param_SGD, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) - SGD_detector = grid_SGD.fit(X_train, y_train) - desc_params = [SGD_detector.best_params_["classifier__loss"], SGD_detector.best_params_["classifier__penalty"], - SGD_detector.best_params_["classifier__alpha"]] - description = "Classif_" + "Lasso" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) - return description, SGD_detector +# def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): +# pipeline_SGD = Pipeline([('classifier', SGDClassifier())]) +# param_SGD = {"classifier__loss": kwargs['1'], "classifier__penalty": kwargs['2'], +# "classifier__alpha": map(float, kwargs['0'])} +# metricModule = getattr(Metrics, metric[0]) +# scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) +# grid_SGD = GridSearchCV(pipeline_SGD, param_grid=param_SGD, refit=True, n_jobs=nbCores, scoring='accuracy', +# cv=nbFolds) +# SGD_detector = grid_SGD.fit(X_train, y_train) +# desc_params = [SGD_detector.best_params_["classifier__loss"], SGD_detector.best_params_["classifier__penalty"], +# SGD_detector.best_params_["classifier__alpha"]] +# description = "Classif_" + "Lasso" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) +# return description, SGD_detector -def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): pipeline_SGD = Pipeline([('classifier', SGDClassifier())]) losses = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'] penalties = ["l1", "l2", "elasticnet"] alphas = list(np.random.randint(1,10,10))+list(np.random.random_sample(10)) param_SGD = {"classifier__loss": losses, "classifier__penalty": penalties, "classifier__alpha": alphas} + metricModule = getattr(Metrics, metric[0]) + scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) grid_SGD = GridSearchCV(pipeline_SGD, param_grid=param_SGD, refit=True, n_jobs=nbCores, scoring='accuracy', cv=nbFolds) SGD_detector = grid_SGD.fit(X_train, y_train) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index 6f59cb29a4da54eb2b88fb2e0c87becdef0070e3..43619432619f8e12c3757e15baac61ad38d7a815 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -2,6 +2,7 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV import numpy as np +import Metrics def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -11,22 +12,27 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return "No desc", classifier -def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear"))]) - param_SVMLinear = {"classifier__C": map(int, kwargs['0'])} - grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) - SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train) - desc_params = [SVMLinear_detector.best_params_["classifier__C"]] - description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) - return description, SVMLinear_detector +# def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): +# pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear"))]) +# param_SVMLinear = {"classifier__C": map(int, kwargs['0'])} +# metricModule = getattr(Metrics, metric[0]) +# scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) +# grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, scoring='accuracy', +# cv=nbFolds) +# SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train) +# desc_params = [SVMLinear_detector.best_params_["classifier__C"]] +# description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) +# return description, SVMLinear_detector -def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear"))]) param_SVMLinear = {"classifier__C":np.random.randint(1,2000,30)} + metricModule = getattr(Metrics, metric[0]) + scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, scoring='accuracy', cv=nbFolds) + SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train) desc_params = [SVMLinear_detector.best_params_["classifier__C"]] return desc_params diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index 1c7ee599a855e26681e76b1bb35c3e082e57c1d5..7db4dd564420cf5b438736f039c317492e87c0b9 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -2,6 +2,7 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV import numpy as np +import Metrics def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -12,17 +13,29 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return "No desc", classifier -def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline_SVMPoly = Pipeline([('classifier', SVC(kernel="poly"))]) - param_SVMPoly= {"classifier__C": np.random.randint(1,2000,30), "classifier__degree": np.random.randint(1,10,5)} - grid_SVMPoly = GridSearchCV(pipeline_SVMPoly, param_grid=param_SVMPoly, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) - SVMPoly_detector = grid_SVMPoly.fit(X_train, y_train) - desc_params = [SVMPoly_detector.best_params_["classifier__C"], SVMPoly_detector.best_params_["classifier__degree"]] +# def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): +# pipeline_SVMPoly = Pipeline([('classifier', SVC(kernel="poly"))]) +# param_SVMPoly= {"classifier__C": np.random.randint(1,2000,30), "classifier__degree": np.random.randint(1,10,5)} +# metricModule = getattr(Metrics, metric[0]) +# scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) +# grid_SVMPoly = GridSearchCV(pipeline_SVMPoly, param_grid=param_SVMPoly, refit=True, n_jobs=nbCores, scoring='accuracy', +# cv=nbFolds) +# SVMPoly_detector = grid_SVMPoly.fit(X_train, y_train) +# desc_params = [SVMPoly_detector.best_params_["classifier__C"], SVMPoly_detector.best_params_["classifier__degree"]] +# return desc_params + + +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): + pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="poly"))]) + param_SVMRBF = {"classifier__C": np.random.randint(1,2000,30)} + metricModule = getattr(Metrics, metric[0]) + scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) + grid_SVMRBF = GridSearchCV(pipeline_SVMRBF, param_grid=param_SVMRBF, refit=True, n_jobs=nbCores, scoring='accuracy', + cv=nbFolds) + SVMRBF_detector = grid_SVMRBF.fit(X_train, y_train) + desc_params = [SVMRBF_detector.best_params_["classifier__C"]] return desc_params - - def getConfig(config): return "\n\t\t- SVM with C : "+config[0]+", kernel : "+config[1] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index ae527e05634010dff000fd4046aa39ceb38717d4..7c2e92767aa55e46651a3a6b5261cab2b6241aaa 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -2,6 +2,7 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV import numpy as np +import Metrics def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -11,20 +12,24 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return "No desc", classifier -def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="rbf"))]) - param_SVMRBF = {"classifier__C": map(int, kwargs['0'])} - grid_SVMRBF = GridSearchCV(pipeline_SVMRBF, param_grid=param_SVMRBF, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) - SVMRBF_detector = grid_SVMRBF.fit(X_train, y_train) - desc_params = [SVMRBF_detector.best_params_["classifier__C"]] - description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) - return description, SVMRBF_detector +# def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): +# pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="rbf"))]) +# param_SVMRBF = {"classifier__C": map(int, kwargs['0'])} +# metricModule = getattr(Metrics, metric[0]) +# scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) +# grid_SVMRBF = GridSearchCV(pipeline_SVMRBF, param_grid=param_SVMRBF, refit=True, n_jobs=nbCores, scoring='accuracy', +# cv=nbFolds) +# SVMRBF_detector = grid_SVMRBF.fit(X_train, y_train) +# desc_params = [SVMRBF_detector.best_params_["classifier__C"]] +# description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) +# return description, SVMRBF_detector -def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], **kwargs): pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="rbf"))]) param_SVMRBF = {"classifier__C": np.random.randint(1,2000,30)} + metricModule = getattr(Metrics, metric[0]) + scorer = metricModule.get_scorer(dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))) grid_SVMRBF = GridSearchCV(pipeline_SVMRBF, param_grid=param_SVMRBF, refit=True, n_jobs=nbCores, scoring='accuracy', cv=nbFolds) SVMRBF_detector = grid_SVMRBF.fit(X_train, y_train) diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 75dceac7ae3c2baa3e2444918bea5b6855a3f3a9..450624ec0d8f0ee1f1fdf628329b32687a377a95 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -16,12 +16,15 @@ import time -def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, gridSearch=False, **kwargs): +def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, + gridSearch=False, metrics=None,**kwargs): datasetLength = DATASET.get("Metadata").attrs["datasetLength"] NB_VIEW = DATASET.get("Metadata").attrs["nbView"] views = [str(DATASET.get("View"+str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)] NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] + if not metrics: + metrics = ["accuracy_score" for view in range (NB_VIEW)] CL_type = kwargs["CL_type"] views = kwargs["views"] @@ -78,7 +81,8 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p if gridSearch: logging.info("Start:\t Gridsearching best settings for monoview classifiers") - bestSettings, fusionConfig = classifierGridSearch(DATASET, classificationKWARGS, learningIndices) + bestSettings, fusionConfig = classifierGridSearch(DATASET, classificationKWARGS, learningIndices + , metrics=metrics) classificationKWARGS["classifiersConfigs"] = bestSettings try: classificationKWARGS["fusionMethodConfig"] = fusionConfig diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index 899350e0da04bd08fce7bf66f4c78d1fce759d8b..73d820402423c0fd1f2135c5a503327f9a466ec1 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -1,8 +1,22 @@ from Methods import * import MonoviewClassifiers +import numpy as np -def gridSearch_hdf5(DATASET, classificationKWARGS): +def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None): + if not usedIndices: + uesdIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + NB_VIEW = DATASET.get("Metadata").attrs["nbView"] + if type(weights)=="NoneType": + weights = np.array([1/NB_VIEW for i in range(NB_VIEW)]) + if sum(weights)!=1: + weights = weights/sum(weights) + monoviewData = np.concatenate([weights[viewIndex]*DATASET.get("View"+str(viewIndex))[usedIndices, :] + for viewIndex in np.arange(NB_VIEW)], axis=1) + return monoviewData + + +def gridSearch_hdf5(DATASET, classificationKWARGS, learningIndices, metrics=None): fusionTypeName = classificationKWARGS["fusionType"] fusionTypePackage = globals()[fusionTypeName+"Package"] fusionMethodModuleName = classificationKWARGS["fusionMethod"] @@ -12,10 +26,14 @@ def gridSearch_hdf5(DATASET, classificationKWARGS): for classifierIndex, classifierName in enumerate(classifiersNames): classifierModule = getattr(MonoviewClassifiers, classifierName) classifierMethod = getattr(classifierModule, "gridSearch") - bestSettings.append(classifierMethod(DATASET.get("View"+str(classifierIndex))[...], - DATASET.get("labels")[...])) + if fusionMethodModuleName == "LateFusion": + bestSettings.append(classifierMethod(DATASET.get("View"+str(classifierIndex))[learningIndices], + DATASET.get("labels")[learningIndices], metrics=metrics[classifierIndex])) + else: + bestSettings.append(classifierMethod(makeMonoviewData_hdf5(DATASET, usedIndices=learningIndices), + DATASET.get("labels")[learningIndices], metrics=metrics[classifierIndex])) classificationKWARGS["classifiersConfigs"] = bestSettings - fusionMethodConfig = fusionMethodModule.gridSearch(DATASET, classificationKWARGS) + fusionMethodConfig = fusionMethodModule.gridSearch(DATASET, classificationKWARGS, learningIndices) return bestSettings, fusionMethodConfig diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py index 5c0875c31f3ee99529aada82e2339ae323d18d90..627e1bb4a7083b61674554bf9def249cb4dd86ef 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py @@ -27,50 +27,50 @@ class EarlyFusionClassifier(object): -class WeightedLinear(EarlyFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - EarlyFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], - NB_CORES=NB_CORES) - self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) - - def fit_hdf5(self, DATASET, trainIndices=None): - if not trainIndices: - trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=trainIndices) - monoviewClassifierModule = getattr(MonoviewClassifiers, self.monoviewClassifierName) - desc, self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, DATASET.get("labels")[trainIndices], - NB_CORES=self.nbCores, - **dict((str(configIndex),config) for configIndex,config in - enumerate(self.monoviewClassifiersConfig))) - - def predict_hdf5(self, DATASET, usedIndices=None): - if usedIndices == None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) - predictedLabels = self.monoviewClassifier.predict(self.monoviewData) - else: - predictedLabels=[] - return predictedLabels - - def predict_proba_hdf5(self, DATASET, usedIndices=None): - if usedIndices == None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) - predictedLabels = self.monoviewClassifier.predict_proba(self.monoviewData) - else: - predictedLabels=[] - return predictedLabels - - def getConfig(self, fusionMethodConfig ,monoviewClassifiersNames, monoviewClassifiersConfigs): - configString = "with weighted concatenation, using weights : "+", ".join(map(str, self.weights))+\ - " with monoview classifier : " - monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifiersNames[0]) - configString += monoviewClassifierModule.getConfig(monoviewClassifiersConfigs[0]) - return configString - - def gridSearch(self, classificationKWARGS): - - return +# class WeightedLinear(EarlyFusionClassifier): +# def __init__(self, NB_CORES=1, **kwargs): +# EarlyFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], +# NB_CORES=NB_CORES) +# self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) +# +# def fit_hdf5(self, DATASET, trainIndices=None): +# if not trainIndices: +# trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) +# self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=trainIndices) +# monoviewClassifierModule = getattr(MonoviewClassifiers, self.monoviewClassifierName) +# desc, self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, DATASET.get("labels")[trainIndices], +# NB_CORES=self.nbCores, +# **dict((str(configIndex),config) for configIndex,config in +# enumerate(self.monoviewClassifiersConfig))) +# +# def predict_hdf5(self, DATASET, usedIndices=None): +# if usedIndices == None: +# usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) +# if usedIndices: +# self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) +# predictedLabels = self.monoviewClassifier.predict(self.monoviewData) +# else: +# predictedLabels=[] +# return predictedLabels +# +# def predict_proba_hdf5(self, DATASET, usedIndices=None): +# if usedIndices == None: +# usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) +# if usedIndices: +# self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) +# predictedLabels = self.monoviewClassifier.predict_proba(self.monoviewData) +# else: +# predictedLabels=[] +# return predictedLabels +# +# def getConfig(self, fusionMethodConfig ,monoviewClassifiersNames, monoviewClassifiersConfigs): +# configString = "with weighted concatenation, using weights : "+", ".join(map(str, self.weights))+\ +# " with monoview classifier : " +# monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifiersNames[0]) +# configString += monoviewClassifierModule.getConfig(monoviewClassifiersConfigs[0]) +# return configString +# +# def gridSearch(self, classificationKWARGS): +# +# return diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py index 5aa54619ae299b60ad7dfd32821a2efe8e0cb6ed..97d57a575609de3fe8f54bd2232ebeba4f7fccdd 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py @@ -31,7 +31,7 @@ def getConfig(classifierConfig): return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' ' -def gridSearch(data, labels): +def gridSearch(data, labels, metrics="accuracy_score"): minSubSampling = 1.0/(len(labels)/2) bestSettings = [] bestResults = [] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py index f0e3dc15543f841e3f2cf4f77aecbb2779223cf3..ae1123e8ecc8b2dd9b8bf3af36dccc95a430ac59 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py @@ -6,9 +6,4 @@ from ModifiedMulticlass import OneVsRestClassifier # Add weights - -def Kover(data, labels, arg, weights,): - isBad = False - subSamplingRatio = arg[0] - - return classifier, prediction, isBad \ No newline at end of file +pass diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index c4a02a2849006cd79bbf4d80a4578ccf5f73d8b2..19dfc8844427f8e93b1c8ab39830a5f5ea2e395d 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -21,6 +21,7 @@ def computeWeights(DATASET_LENGTH, iterIndex, viewIndice, CLASS_LABELS, costMatr for exampleIndice in range(DATASET_LENGTH)]) return weights + def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, DATASET_LENGTH, viewIndice, classifier_config, iterIndex, costMatrices): weights = computeWeights(DATASET_LENGTH, iterIndex, viewIndice, CLASS_LABELS, costMatrices) @@ -30,6 +31,7 @@ def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) return classifier, classes, isBad, averageAccuracy + def trainWeakClassifier_hdf5(classifierName, monoviewDataset, CLASS_LABELS, DATASET_LENGTH, viewIndice, classifier_config, viewName, iterIndex, costMatrices): weights = computeWeights(DATASET_LENGTH, iterIndex, viewIndice, CLASS_LABELS, costMatrices) @@ -39,22 +41,20 @@ def trainWeakClassifier_hdf5(classifierName, monoviewDataset, CLASS_LABELS, DATA logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) return classifier, classes, isBad, averageAccuracy -def gridSearch_hdf5(DATASET, classificationKWARGS): + +def gridSearch_hdf5(DATASET, classificationKWARGS, learningIndices, metrics=None): classifiersNames = classificationKWARGS["classifiersNames"] bestSettings = [] for classifierIndex, classifierName in enumerate(classifiersNames): logging.debug("\tStart:\t Gridsearch for "+classifierName+" on "+DATASET.get("View"+str(classifierIndex)).attrs["name"]) classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string classifierMethod = getattr(classifierModule, "gridSearch") - bestSettings.append(classifierMethod(DATASET.get("View"+str(classifierIndex))[...], - DATASET.get("labels")[...])) + bestSettings.append(classifierMethod(DATASET.get("View"+str(classifierIndex))[learningIndices], + DATASET.get("labels")[learningIndices], metrics=metrics[classifierIndex])) logging.debug("\tDone:\t Gridsearch for "+classifierName) return bestSettings, None - - - class Mumbo: def __init__(self, NB_VIEW, DATASET_LENGTH, CLASS_LABELS, NB_CORES=1,**kwargs):