From 10483f58f7d28e8bcb17752c4ed33e6a01254246 Mon Sep 17 00:00:00 2001 From: bbauvin <baptiste.bauvin@centrale-marseille.fr> Date: Fri, 26 Aug 2016 17:41:49 -0400 Subject: [PATCH] Added random gridsearch and some predict proba and metric package need to test all that --- Code/MonoMutliViewClassifiers/ExecClassif.py | 9 +- .../Metrics/__init__.py | 0 .../Metrics/accuracy_score.py | 10 + .../Metrics/f1_score.py | 22 ++ .../Metrics/fbeta_score.py | 26 ++ .../Metrics/hamming_loss.py | 10 + .../Metrics/jaccard_similarity_score.py | 10 + .../Metrics/log_loss.py | 14 + .../Metrics/matthews_corrcoef.py | 6 + .../Metrics/precision_score.py | 22 ++ .../Metrics/recall_score.py | 22 ++ .../Metrics/roc_auc_score.py | 14 + .../Metrics/zero_one_loss.py | 10 + .../MonoviewClassifiers/Adaboost.py | 24 +- .../MonoviewClassifiers/DecisionTree.py | 15 +- .../MonoviewClassifiers/KNN.py | 13 +- .../MonoviewClassifiers/RandomForest.py | 13 +- .../MonoviewClassifiers/SGD.py | 17 +- .../MonoviewClassifiers/SVMLinear.py | 13 +- .../MonoviewClassifiers/SVMPoly.py | 18 +- .../MonoviewClassifiers/SVMRBF.py | 13 +- .../Multiview/ExecMultiview.py | 6 +- .../Multiview/Fusion/Fusion.py | 14 +- .../Multiview/Fusion/Methods/EarlyFusion.py | 6 +- .../EarlyFusionPackage/WeightedLinear.py | 70 ++++ .../Methods/EarlyFusionPackage/__init__.py | 0 .../Multiview/Fusion/Methods/LateFusion.py | 374 +++++++++--------- .../LateFusionPackage/BayesianInference.py | 54 +++ .../LateFusionPackage/MajorityVoting.py | 66 ++++ .../Methods/LateFusionPackage/SVMForLinear.py | 63 +++ .../LateFusionPackage/WeightedLinear.py | 59 +++ .../Methods/LateFusionPackage/__init__.py | 0 .../Multiview/Fusion/Methods/__init__.py | 3 +- .../Mumbo/Classifiers/DecisionTree.py | 2 + .../Multiview/Mumbo/Mumbo.py | 5 +- 35 files changed, 803 insertions(+), 220 deletions(-) create mode 100644 Code/MonoMutliViewClassifiers/Metrics/__init__.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/f1_score.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/log_loss.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/precision_score.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/recall_score.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py create mode 100644 Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py create mode 100644 Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py create mode 100644 Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/__init__.py create mode 100644 Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py create mode 100644 Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py create mode 100644 Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py create mode 100644 Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py create mode 100644 Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/__init__.py diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index e3ed083d..4f52c31c 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -41,7 +41,7 @@ groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Determine the learning rate if > 1.0, number of fold for cross validation', type=float, default=0.7) groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation', - type=int, default=5) + type=int, default=5 ) groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int, default=4) groupClass.add_argument('--CL_classes', metavar='STRING', action='store', @@ -51,12 +51,11 @@ groupClass.add_argument('--CL_type', metavar='STRING', action='store', help='Determine whether to use Multiview, Monoview, or Benchmark, separate with : if multiple', default='Benchmark') groupClass.add_argument('--CL_algorithm', metavar='STRING', action='store', - help='Determine which classifier to use, if CL_type = Benchmark, fill monoview and multiview ' - 'options', default='') + help='Determine which classifier to use, if empty, considering all', default='') groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store', - help='Determine which monoview classifier to use, separate with : if multiple', default='') + help='Determine which monoview classifier to use, separate with : if multiple, if empty, considering all', default='') groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store', - help='Determine which multiview classifier to use, separate with : if multiple', default='') + help='Determine which multiview classifier to use, separate with : if multiple, if empty, considering all', default='') groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, default=1) diff --git a/Code/MonoMutliViewClassifiers/Metrics/__init__.py b/Code/MonoMutliViewClassifiers/Metrics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py b/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py new file mode 100644 index 00000000..08d5c30d --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py @@ -0,0 +1,10 @@ +from sklearn.metrics import accuracy_score as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + score = metric(y_true, y_pred, sample_weight=sample_weight) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/f1_score.py b/Code/MonoMutliViewClassifiers/Metrics/f1_score.py new file mode 100644 index 00000000..3dc8c3ae --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/f1_score.py @@ -0,0 +1,22 @@ +from sklearn.metrics import f1_score as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + labels = kwargs["1"] + except: + labels=None + try: + pos_label = kwargs["2"] + except: + pos_label = 1 + try: + average = kwargs["3"] + except: + average = "binary" + score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py b/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py new file mode 100644 index 00000000..74f1e241 --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py @@ -0,0 +1,26 @@ +from sklearn.metrics import fbeta_score as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + beta = kwargs["1"] + except: + beta=1.0 + try: + labels = kwargs["2"] + except: + labels=None + try: + pos_label = kwargs["3"] + except: + pos_label = 1 + try: + average = kwargs["4"] + except: + average = "binary" + score = metric(y_true, y_pred, beta, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py b/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py new file mode 100644 index 00000000..fe35066d --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py @@ -0,0 +1,10 @@ +from sklearn.metrics import hamming_loss as metric + + +def score(y_true, y_pred, **kwargs): + try: + classes = kwargs["0"] + except: + classes=None + score = metric(y_true, y_pred, classes=classes) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py b/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py new file mode 100644 index 00000000..c6d134ad --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py @@ -0,0 +1,10 @@ +from sklearn.metrics import jaccard_similarity_score as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight = None + score = metric(y_true, y_pred, sample_weight=sample_weight) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/log_loss.py b/Code/MonoMutliViewClassifiers/Metrics/log_loss.py new file mode 100644 index 00000000..a6dbf8a3 --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/log_loss.py @@ -0,0 +1,14 @@ +from sklearn.metrics import log_loss as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight = None + try: + eps = kwargs["1"] + except: + eps = 1e-15 + score = metric(y_true, y_pred, sample_weight=sample_weight, eps=eps) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py b/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py new file mode 100644 index 00000000..b1ec857a --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py @@ -0,0 +1,6 @@ +from sklearn.metrics import matthews_corrcoef as metric + + +def score(y_true, y_pred, **kwargs): + score = metric(y_true, y_pred) + return score \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Metrics/precision_score.py b/Code/MonoMutliViewClassifiers/Metrics/precision_score.py new file mode 100644 index 00000000..2a6cccad --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/precision_score.py @@ -0,0 +1,22 @@ +from sklearn.metrics import precision_score as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + labels = kwargs["1"] + except: + labels=None + try: + pos_label = kwargs["2"] + except: + pos_label = 1 + try: + average = kwargs["3"] + except: + average = "binary" + score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/recall_score.py b/Code/MonoMutliViewClassifiers/Metrics/recall_score.py new file mode 100644 index 00000000..a4d10284 --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/recall_score.py @@ -0,0 +1,22 @@ +from sklearn.metrics import recall_score as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + labels = kwargs["1"] + except: + labels=None + try: + pos_label = kwargs["2"] + except: + pos_label = 1 + try: + average = kwargs["3"] + except: + average = "binary" + score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py b/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py new file mode 100644 index 00000000..5a9f7cfb --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py @@ -0,0 +1,14 @@ +from sklearn.metrics import roc_auc_score as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + try: + average = kwargs["1"] + except: + average = "binary" + score = metric(y_true, y_pred, sample_weight=sample_weight, average=average) + return score diff --git a/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py b/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py new file mode 100644 index 00000000..a741825b --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py @@ -0,0 +1,10 @@ +from sklearn.metrics import zero_one_loss as metric + + +def score(y_true, y_pred, **kwargs): + try: + sample_weight = kwargs["0"] + except: + sample_weight=None + score = metric(y_true, y_pred, sample_weight=sample_weight) + return score diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index d1c45281..6eba0119 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -1,7 +1,10 @@ from sklearn.ensemble import AdaBoostClassifier from sklearn.pipeline import Pipeline -from sklearn.grid_search import GridSearchCV +from sklearn.grid_search import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier +from sklearn.utils.testing import all_estimators +import inspect +import numpy as np def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -9,20 +12,33 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): base_estimators = int(kwargs['1']) classifier = AdaBoostClassifier(n_estimators=num_estimators, base_estimator=base_estimators) classifier.fit(DATASET, CLASS_LABELS) - return classifier + return "No desc", classifier def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline = Pipeline([('classifier', AdaBoostClassifier())]) param= {"classifier__n_estimators": map(int, kwargs['0']), "classifier__base_estimator": [DecisionTreeClassifier() for arg in kwargs["1"]]} - grid = GridSearchCV(pipeline,param_grid=param,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds) + grid = RandomizedSearchCV(pipeline,param_distributions=param,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds) detector = grid.fit(X_train, y_train) desc_estimators = [detector.best_params_["classifier__n_estimators"]] description = "Classif_" + "RF" + "-" + "CV_" + str(nbFolds) + "-" + "Trees_" + str(map(str,desc_estimators)) return description, detector +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1): + pipeline = Pipeline([('classifier', AdaBoostClassifier())]) + classifiers = [clf for name, clf in all_estimators(type_filter='classifier') + if 'sample_weight' in inspect.getargspec(clf().fit)[0] + and (name != "AdaBoostClassifier" and name !="GradientBoostingClassifier")] + param= {"classifier__n_estimators": np.random.randint(1, 30, 10), + "classifier__base_estimator": classifiers} + grid = RandomizedSearchCV(pipeline,param_distributions=param,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds) + detector = grid.fit(X_train, y_train) + desc_estimators = [detector.best_params_["classifier__n_estimators"], + detector.best_params_["classifier__base_estimator"]] + return desc_estimators + + def getConfig(config): return "\n\t\t- Adaboost with num_esimators : "+config[0]+", base_estimators : "+config[1] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index 8fe4de8d..c9c86c35 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -1,13 +1,13 @@ from sklearn.tree import DecisionTreeClassifier from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV - +import numpy as np def fit(DATASET, CLASS_LABELS, NB_CORES=1, **kwargs): maxDepth = int(kwargs['0']) classifier = DecisionTreeClassifier(max_depth=maxDepth) classifier.fit(DATASET, CLASS_LABELS) - return classifier + return "No desc", classifier def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): @@ -22,5 +22,16 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): return description, DT_detector +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): + pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())]) + param_DT = {"classifier__max_depth":np.random.randint(1, 30, 10)} + + grid_DT = GridSearchCV(pipeline_DT, param_grid=param_DT, refit=True, n_jobs=nbCores, scoring='accuracy', + cv=nbFolds) + DT_detector = grid_DT.fit(X_train, y_train) + desc_params = [DT_detector.best_params_["classifier__max_depth"]] + return desc_params + + def getConfig(config): return "\n\t\t- Decision Tree with max_depth : "+config[0] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index ae03c355..60cc21c1 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -1,13 +1,13 @@ from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV - +import numpy as np def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): nNeighbors = int(kwargs['0']) classifier = KNeighborsClassifier(n_neighbors=nNeighbors) classifier.fit(DATASET, CLASS_LABELS) - return classifier + return "No desc", classifier def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): @@ -21,6 +21,15 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): return description, KNN_detector +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): + pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())]) + param_KNN = {"classifier__n_neighbors": np.random.randint(1, 30, 10)} + grid_KNN = GridSearchCV(pipeline_KNN, param_grid=param_KNN, refit=True, n_jobs=nbCores, scoring='accuracy', + cv=nbFolds) + KNN_detector = grid_KNN.fit(X_train, y_train) + desc_params = [KNN_detector.best_params_["classifier__n_neighbors"]] + return desc_params + def getConfig(config): return "\n\t\t- K nearest Neighbors with n_neighbors: "+config[0] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index 968d83d2..1b3a1f41 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -8,7 +8,7 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): maxDepth = int(kwargs['1']) classifier = RandomForestClassifier(n_estimators=num_estimators, max_depth=maxDepth, n_jobs=NB_CORES) classifier.fit(DATASET, CLASS_LABELS) - return classifier + return "No desc", classifier def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): @@ -42,5 +42,16 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): return description, rf_detector +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): + pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) + param_rf = {"classifier__n_estimators": np.random.randint(1, 30, 10)} + + grid_rf = GridSearchCV(pipeline_rf,param_grid=param_rf,refit=True,n_jobs=nbCores,scoring='accuracy',cv=nbFolds) + rf_detector = grid_rf.fit(X_train, y_train) + + desc_estimators = [rf_detector.best_params_["classifier__n_estimators"]] + return desc_estimators + + def getConfig(config): return "\n\t\t- Random Forest with num_esimators : "+config[0]+", max_depth : "+config[1] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index 3a2bc27f..9ad1f292 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -1,6 +1,7 @@ from sklearn.linear_model import SGDClassifier from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV +import numpy as np def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -12,7 +13,7 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): alpha = 0.15 classifier = SGDClassifier(loss=loss, penalty=penalty, alpha=alpha) classifier.fit(DATASET, CLASS_LABELS) - return classifier + return "No desc", classifier def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): @@ -28,5 +29,19 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): return description, SGD_detector +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): + pipeline_SGD = Pipeline([('classifier', SGDClassifier())]) + losses = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'] + penalties = ["l1", "l2", "elasticnet"] + alphas = list(np.random.randint(1,10,10))+list(np.random.random_sample(10)) + param_SGD = {"classifier__loss": losses, "classifier__penalty": penalties, + "classifier__alpha": alphas} + grid_SGD = GridSearchCV(pipeline_SGD, param_grid=param_SGD, refit=True, n_jobs=nbCores, scoring='accuracy', + cv=nbFolds) + SGD_detector = grid_SGD.fit(X_train, y_train) + desc_params = [SGD_detector.best_params_["classifier__loss"], SGD_detector.best_params_["classifier__penalty"], + SGD_detector.best_params_["classifier__alpha"]] + return desc_params + def getConfig(config): return "\n\t\t- SGDClassifier with loss : "+config[0]+", penalty : "+config[1] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index 568badb4..6f59cb29 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -1,13 +1,14 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV +import numpy as np def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) classifier = SVC(C=C, kernel='linear', probability=True) classifier.fit(DATASET, CLASS_LABELS) - return classifier + return "No desc", classifier def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): @@ -21,5 +22,15 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): return description, SVMLinear_detector +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): + pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear"))]) + param_SVMLinear = {"classifier__C":np.random.randint(1,2000,30)} + grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, scoring='accuracy', + cv=nbFolds) + SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train) + desc_params = [SVMLinear_detector.best_params_["classifier__C"]] + return desc_params + + def getConfig(config): return "\n\t\t- SVM with C : "+config[0]+", kernel : "+config[1] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index 9f43f0b9..1c7ee599 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -1,6 +1,7 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV +import numpy as np def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): @@ -8,18 +9,19 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): degree = int(kwargs['1']) classifier = SVC(C=C, kernel='poly', degree=degree, probability=True) classifier.fit(DATASET, CLASS_LABELS) - return classifier + return "No desc", classifier def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear"))]) - param_SVMLinear = {"classifier__C": map(int, kwargs['0']), "classifier__degree": map(int, kwargs["1"])} - grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, scoring='accuracy', + pipeline_SVMPoly = Pipeline([('classifier', SVC(kernel="poly"))]) + param_SVMPoly= {"classifier__C": np.random.randint(1,2000,30), "classifier__degree": np.random.randint(1,10,5)} + grid_SVMPoly = GridSearchCV(pipeline_SVMPoly, param_grid=param_SVMPoly, refit=True, n_jobs=nbCores, scoring='accuracy', cv=nbFolds) - SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train) - desc_params = [SVMLinear_detector.best_params_["classifier__C"], SVMLinear_detector.best_params_["classifier__degree"]] - description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) - return description, SVMLinear_detector + SVMPoly_detector = grid_SVMPoly.fit(X_train, y_train) + desc_params = [SVMPoly_detector.best_params_["classifier__C"], SVMPoly_detector.best_params_["classifier__degree"]] + return desc_params + + def getConfig(config): diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index 202cc076..ae527e05 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -1,13 +1,14 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.grid_search import GridSearchCV +import numpy as np def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) classifier = SVC(C=C, kernel='rbf', probability=True) classifier.fit(DATASET, CLASS_LABELS) - return classifier + return "No desc", classifier def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): @@ -21,5 +22,15 @@ def fit_gridsearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): return description, SVMRBF_detector +def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): + pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="rbf"))]) + param_SVMRBF = {"classifier__C": np.random.randint(1,2000,30)} + grid_SVMRBF = GridSearchCV(pipeline_SVMRBF, param_grid=param_SVMRBF, refit=True, n_jobs=nbCores, scoring='accuracy', + cv=nbFolds) + SVMRBF_detector = grid_SVMRBF.fit(X_train, y_train) + desc_params = [SVMRBF_detector.best_params_["classifier__C"]] + return desc_params + + def getConfig(config): return "\n\t\t- SVM with C : "+config[0]+", kernel : "+config[1] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 09268a89..75dceac7 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -78,8 +78,12 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p if gridSearch: logging.info("Start:\t Gridsearching best settings for monoview classifiers") - bestSettings = classifierGridSearch(DATASET, classificationKWARGS["classifiersNames"]) + bestSettings, fusionConfig = classifierGridSearch(DATASET, classificationKWARGS, learningIndices) classificationKWARGS["classifiersConfigs"] = bestSettings + try: + classificationKWARGS["fusionMethodConfig"] = fusionConfig + except: + pass logging.info("Done:\t Gridsearching best settings for monoview classifiers") # Begin Classification diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index 3fe09168..899350e0 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -1,14 +1,22 @@ from Methods import * +import MonoviewClassifiers -def gridSearch_hdf5(DATASET, classifiersNames): +def gridSearch_hdf5(DATASET, classificationKWARGS): + fusionTypeName = classificationKWARGS["fusionType"] + fusionTypePackage = globals()[fusionTypeName+"Package"] + fusionMethodModuleName = classificationKWARGS["fusionMethod"] + fusionMethodModule = getattr(fusionTypePackage, fusionMethodModuleName) + classifiersNames = classificationKWARGS["classifiersNames"] bestSettings = [] for classifierIndex, classifierName in enumerate(classifiersNames): - classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string + classifierModule = getattr(MonoviewClassifiers, classifierName) classifierMethod = getattr(classifierModule, "gridSearch") bestSettings.append(classifierMethod(DATASET.get("View"+str(classifierIndex))[...], DATASET.get("labels")[...])) - return bestSettings + classificationKWARGS["classifiersConfigs"] = bestSettings + fusionMethodConfig = fusionMethodModule.gridSearch(DATASET, classificationKWARGS) + return bestSettings, fusionMethodConfig class Fusion: diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py index 35877675..5c0875c3 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py @@ -38,7 +38,7 @@ class WeightedLinear(EarlyFusionClassifier): trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=trainIndices) monoviewClassifierModule = getattr(MonoviewClassifiers, self.monoviewClassifierName) - self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, DATASET.get("labels")[trainIndices], + desc, self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, DATASET.get("labels")[trainIndices], NB_CORES=self.nbCores, **dict((str(configIndex),config) for configIndex,config in enumerate(self.monoviewClassifiersConfig))) @@ -70,3 +70,7 @@ class WeightedLinear(EarlyFusionClassifier): configString += monoviewClassifierModule.getConfig(monoviewClassifiersConfigs[0]) return configString + def gridSearch(self, classificationKWARGS): + + return + diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py new file mode 100644 index 00000000..4965f831 --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -0,0 +1,70 @@ +from EarlyFusion import EarlyFusionClassifier +import MonoviewClassifiers +import numpy as np +from sklearn.metrics import accuracy_score + + +def gridSearch(DATASET, classificationKWARGS, trainIndices): + bestScore = 0.0 + bestConfig = None + if classificationKWARGS["fusionMethodConfig"][0] is not None: + for i in range(0): + randomWeightsArray = np.random.random_sample(len(DATASET.get("Metadata").attrs["nbView"])) + normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) + classificationKWARGS["fusionMethodConfig"][0] = normalizedArray + classifier = WeightedLinear(1, **classificationKWARGS) + classifier.fit_hdf5(DATASET, trainIndices) + predictedLabels = classifier.predict_hdf5(DATASET, trainIndices) + accuracy = accuracy_score(DATASET.get("labels")[trainIndices], predictedLabels) + if accuracy > bestScore: + bestScore = accuracy + bestConfig = normalizedArray + return bestConfig + + +class WeightedLinear(EarlyFusionClassifier): + def __init__(self, NB_CORES=1, **kwargs): + EarlyFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], + NB_CORES=NB_CORES) + self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) + + def fit_hdf5(self, DATASET, trainIndices=None): + if not trainIndices: + trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=trainIndices) + monoviewClassifierModule = getattr(MonoviewClassifiers, self.monoviewClassifierName) + desc, self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, DATASET.get("labels")[trainIndices], + NB_CORES=self.nbCores, + **dict((str(configIndex),config) for configIndex,config in + enumerate(self.monoviewClassifiersConfig))) + + def predict_hdf5(self, DATASET, usedIndices=None): + if usedIndices == None: + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + if usedIndices: + self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) + predictedLabels = self.monoviewClassifier.predict(self.monoviewData) + else: + predictedLabels=[] + return predictedLabels + + def predict_proba_hdf5(self, DATASET, usedIndices=None): + if usedIndices == None: + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + if usedIndices: + self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) + predictedLabels = self.monoviewClassifier.predict_proba(self.monoviewData) + else: + predictedLabels=[] + return predictedLabels + + def getConfig(self, fusionMethodConfig ,monoviewClassifiersNames, monoviewClassifiersConfigs): + configString = "with weighted concatenation, using weights : "+", ".join(map(str, self.weights))+ \ + " with monoview classifier : " + monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifiersNames[0]) + configString += monoviewClassifierModule.getConfig(monoviewClassifiersConfigs[0]) + return configString + + def gridSearch(self, classificationKWARGS): + + return \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/__init__.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index 9222d733..844f9969 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -24,6 +24,7 @@ def fifMonoviewClassifier(classifierName, data, labels, classifierConfig): ))) return classifier + class LateFusionClassifier(object): def __init__(self, monoviewClassifiersNames, monoviewClassifiersConfigs, NB_CORES=1): self.monoviewClassifiersNames = monoviewClassifiersNames @@ -35,200 +36,201 @@ class LateFusionClassifier(object): if trainIndices == None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) nbView = DATASET.get("Metadata").attrs["nbView"] - self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)( + monoviewResults = Parallel(n_jobs=self.nbCores)( delayed(fifMonoviewClassifier)(self.monoviewClassifiersNames[viewIndex], DATASET.get("View"+str(viewIndex))[trainIndices, :], DATASET.get("labels")[trainIndices], self.monoviewClassifiersConfigs[viewIndex]) for viewIndex in range(nbView)) + self.monoviewClassifiers = [monoviewClassifier for desc, monoviewClassifier in monoviewResults] -class WeightedLinear(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], - NB_CORES=NB_CORES) - self.weights = map(float, kwargs['fusionMethodConfig'][0]) - - def predict_hdf5(self, DATASET, usedIndices=None): - # Normalize weights ? - # weights = weights/float(max(weights)) - if usedIndices == None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - predictedLabels = [] - viewScores = np.zeros((DATASET.get("Metadata").attrs["nbView"], len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) - for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): - viewScores[viewIndex] = self.monoviewClassifiers[viewIndex].predict_proba( - DATASET.get("View" + str(viewIndex))[usedIndices]) - for currentIndex, usedIndex in enumerate(usedIndices): - predictedLabel = np.argmax(np.array( - [max(viewScore) * weight for viewScore, weight in zip(viewScores[:, currentIndex], self.weights)], - dtype=float)) - predictedLabels.append(predictedLabel) - # fusedExamples = np.array([sum(np.array([featureScores * weight for weight, featureScores in zip(weights, exampleDecisions)])) for exampleDecisions in monoViewDecisions]) - else: - predictedLabels = [] - - return predictedLabels - - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with Weighted linear using a weight for each view : "+", ".join(self.weights) + \ - "\n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): - monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) - configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) - return configString - - - -# The SVMClassifier is here used to find the right weights for linear fusion -# Here we have a function to train it, one to fuse. -# And one to do both. -class SVMForLinear(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], - NB_CORES=NB_CORES) - self.SVMClassifier = None - - def fit_hdf5(self, DATASET, trainIndices=None): - if trainIndices == None: - trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - nbViews = DATASET.get("Metadata").attrs["nbView"] - for viewIndex in range(nbViews): - monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[viewIndex]) - self.monoviewClassifiers.append( - monoviewClassifier.fit(DATASET.get("View" + str(viewIndex))[trainIndices], - DATASET.get("labels")[trainIndices], - NB_CORES=self.nbCores, - **dict((str(configIndex), config) for configIndex, config in - enumerate(self.monoviewClassifiersConfigs[viewIndex] - )))) - self.SVMForLinearFusionFit(DATASET, usedIndices=trainIndices) - - def predict_hdf5(self, DATASET, usedIndices=None): - # Normalize weights ? - # weights = weights/float(max(weights)) - if usedIndices == None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - monoviewDecisions = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbView"]), dtype=int) - for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): - monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[viewIndex]) - monoviewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( - DATASET.get("View" + str(viewIndex))[usedIndices]) - predictedLabels = self.SVMClassifier.predict(monoviewDecisions) - else: - predictedLabels = [] - return predictedLabels - - def SVMForLinearFusionFit(self, DATASET, usedIndices=None): - self.SVMClassifier = OneVsOneClassifier(SVC()) - monoViewDecisions = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbView"]), dtype=int) - for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): - monoViewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( - DATASET.get("View" + str(viewIndex))[usedIndices]) - - self.SVMClassifier.fit(monoViewDecisions, DATASET.get("labels")[usedIndices]) - - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with SVM for linear \n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): - monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) - configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) - return configString - - -# For majority voting, we have a problem : we have 5 fetures and 101 classes -# on Calthech, so if each feature votes for one class, we can't find a good -# result -class MajorityVoting(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], - NB_CORES=NB_CORES) - - def predict_hdf5(self, DATASET, usedIndices=None): - if usedIndices == None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - datasetLength = len(usedIndices) - votes = np.zeros((datasetLength, DATASET.get("Metadata").attrs["nbClass"]), dtype=int) - monoViewDecisions = np.zeros((len(usedIndices),DATASET.get("Metadata").attrs["nbView"]), dtype=int) - for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): - monoViewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( - DATASET.get("View" + str(viewIndex))[usedIndices]) - for exampleIndex in range(datasetLength): - for featureClassification in monoViewDecisions[exampleIndex, :]: - votes[exampleIndex, featureClassification] += 1 - nbMaximum = len(np.where(votes[exampleIndex] == max(votes[exampleIndex]))[0]) - try: - assert nbMaximum != DATASET.get("Metadata").attrs["nbView"] - except: - print "Majority voting can't decide, each classifier has voted for a different class" - raise - predictedLabels = np.argmax(votes, axis=1) - # Can be upgraded by restarting a new classification process if - # there are multiple maximums ?: - # while nbMaximum>1: - # relearn with only the classes that have a maximum number of vote - # votes = revote - # nbMaximum = len(np.where(votes==max(votes))[0]) - else: - predictedLabels = [] - return predictedLabels - - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with Majority Voting \n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): - monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) - configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) - return configString - - -# For probabilistic classifiers, we need to add more late fusion methods -# For example, in the bayesian inference -# probabilisticClassifiers is a nbExample array of sklearn probabilistic classifiers -# (such as Naive Bayesian Gaussian -# http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html#sklearn.naive_bayes.GaussianNB) -class BayesianInference(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], - NB_CORES=NB_CORES) - self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) - - def predict_hdf5(self, DATASET, usedIndices=None): - nbView = DATASET.get("nbView").value - if usedIndices == None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if sum(self.weights)!=1.0: - self.weights = self.weights/sum(self.weights) - if usedIndices: - - viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) - for viewIndex in range(nbView): - viewScores[viewIndex] = np.power(self.monoviewClassifiers[viewIndex].predict_proba(DATASET.get("View" + str(viewIndex)) - [usedIndices]), - self.weights[viewIndex]) - predictedLabels = np.argmax(np.prod(viewScores, axis=1), axis=1) - else: - predictedLabels = [] - return predictedLabels - - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with Bayesian Inference using a weight for each view : "+", ".join(self.weights) + \ - "\n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): - monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) - configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) - return configString - - - - +# class WeightedLinear(LateFusionClassifier): +# def __init__(self, NB_CORES=1, **kwargs): +# LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], +# NB_CORES=NB_CORES) +# self.weights = map(float, kwargs['fusionMethodConfig'][0]) +# +# def predict_hdf5(self, DATASET, usedIndices=None): +# # Normalize weights ? +# # weights = weights/float(max(weights)) +# if usedIndices == None: +# usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) +# if usedIndices: +# predictedLabels = [] +# viewScores = np.zeros((DATASET.get("Metadata").attrs["nbView"], len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) +# for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): +# viewScores[viewIndex] = self.monoviewClassifiers[viewIndex].predict_proba( +# DATASET.get("View" + str(viewIndex))[usedIndices]) +# for currentIndex, usedIndex in enumerate(usedIndices): +# predictedLabel = np.argmax(np.array( +# [max(viewScore) * weight for viewScore, weight in zip(viewScores[:, currentIndex], self.weights)], +# dtype=float)) +# predictedLabels.append(predictedLabel) +# # fusedExamples = np.array([sum(np.array([featureScores * weight for weight, featureScores in zip(weights, exampleDecisions)])) for exampleDecisions in monoViewDecisions]) +# else: +# predictedLabels = [] +# +# return predictedLabels +# +# def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): +# configString = "with Weighted linear using a weight for each view : "+", ".join(self.weights) + \ +# "\n\t-With monoview classifiers : " +# for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): +# monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) +# configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) +# return configString +# +# +# +# # The SVMClassifier is here used to find the right weights for linear fusion +# # Here we have a function to train it, one to fuse. +# # And one to do both. +# class SVMForLinear(LateFusionClassifier): +# def __init__(self, NB_CORES=1, **kwargs): +# LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], +# NB_CORES=NB_CORES) +# self.SVMClassifier = None +# +# def fit_hdf5(self, DATASET, trainIndices=None): +# if trainIndices == None: +# trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) +# nbViews = DATASET.get("Metadata").attrs["nbView"] +# for viewIndex in range(nbViews): +# monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[viewIndex]) +# self.monoviewClassifiers.append( +# monoviewClassifier.fit(DATASET.get("View" + str(viewIndex))[trainIndices], +# DATASET.get("labels")[trainIndices], +# NB_CORES=self.nbCores, +# **dict((str(configIndex), config) for configIndex, config in +# enumerate(self.monoviewClassifiersConfigs[viewIndex] +# )))[1]) +# self.SVMForLinearFusionFit(DATASET, usedIndices=trainIndices) +# +# def predict_hdf5(self, DATASET, usedIndices=None): +# # Normalize weights ? +# # weights = weights/float(max(weights)) +# if usedIndices == None: +# usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) +# if usedIndices: +# monoviewDecisions = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbView"]), dtype=int) +# for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): +# monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[viewIndex]) +# monoviewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( +# DATASET.get("View" + str(viewIndex))[usedIndices]) +# predictedLabels = self.SVMClassifier.predict(monoviewDecisions) +# else: +# predictedLabels = [] +# return predictedLabels +# +# def SVMForLinearFusionFit(self, DATASET, usedIndices=None): +# self.SVMClassifier = OneVsOneClassifier(SVC()) +# monoViewDecisions = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbView"]), dtype=int) +# for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): +# monoViewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( +# DATASET.get("View" + str(viewIndex))[usedIndices]) +# +# self.SVMClassifier.fit(monoViewDecisions, DATASET.get("labels")[usedIndices]) +# +# def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): +# configString = "with SVM for linear \n\t-With monoview classifiers : " +# for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): +# monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) +# configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) +# return configString +# +# +# # For majority voting, we have a problem : we have 5 fetures and 101 classes +# # on Calthech, so if each feature votes for one class, we can't find a good +# # result +# class MajorityVoting(LateFusionClassifier): +# def __init__(self, NB_CORES=1, **kwargs): +# LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], +# NB_CORES=NB_CORES) +# +# def predict_hdf5(self, DATASET, usedIndices=None): +# if usedIndices == None: +# usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) +# if usedIndices: +# datasetLength = len(usedIndices) +# votes = np.zeros((datasetLength, DATASET.get("Metadata").attrs["nbClass"]), dtype=int) +# monoViewDecisions = np.zeros((len(usedIndices),DATASET.get("Metadata").attrs["nbView"]), dtype=int) +# for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): +# monoViewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( +# DATASET.get("View" + str(viewIndex))[usedIndices]) +# for exampleIndex in range(datasetLength): +# for featureClassification in monoViewDecisions[exampleIndex, :]: +# votes[exampleIndex, featureClassification] += 1 +# nbMaximum = len(np.where(votes[exampleIndex] == max(votes[exampleIndex]))[0]) +# try: +# assert nbMaximum != DATASET.get("Metadata").attrs["nbView"] +# except: +# print "Majority voting can't decide, each classifier has voted for a different class" +# raise +# predictedLabels = np.argmax(votes, axis=1) +# # Can be upgraded by restarting a new classification process if +# # there are multiple maximums ?: +# # while nbMaximum>1: +# # relearn with only the classes that have a maximum number of vote +# # votes = revote +# # nbMaximum = len(np.where(votes==max(votes))[0]) +# else: +# predictedLabels = [] +# return predictedLabels +# +# def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): +# configString = "with Majority Voting \n\t-With monoview classifiers : " +# for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): +# monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) +# configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) +# return configString +# +# +# # For probabilistic classifiers, we need to add more late fusion methods +# # For example, in the bayesian inference +# # probabilisticClassifiers is a nbExample array of sklearn probabilistic classifiers +# # (such as Naive Bayesian Gaussian +# # http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html#sklearn.naive_bayes.GaussianNB) +# class BayesianInference(LateFusionClassifier): +# def __init__(self, NB_CORES=1, **kwargs): +# LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], +# NB_CORES=NB_CORES) +# self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) +# +# def predict_hdf5(self, DATASET, usedIndices=None): +# nbView = DATASET.get("nbView").value +# if usedIndices == None: +# usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) +# if sum(self.weights)!=1.0: +# self.weights = self.weights/sum(self.weights) +# if usedIndices: +# +# viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) +# for viewIndex in range(nbView): +# viewScores[viewIndex] = np.power(self.monoviewClassifiers[viewIndex].predict_proba(DATASET.get("View" + str(viewIndex)) +# [usedIndices]), +# self.weights[viewIndex]) +# predictedLabels = np.argmax(np.prod(viewScores, axis=1), axis=1) +# else: +# predictedLabels = [] +# return predictedLabels +# +# def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): +# configString = "with Bayesian Inference using a weight for each view : "+", ".join(self.weights) + \ +# "\n\t-With monoview classifiers : " +# for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): +# monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) +# configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) +# return configString +# +# +# # -# def weightedProduct(featureProbas, weights): -# try: -# assert np.sum(weights) == 1.0 -# except: +# # +# # def weightedProduct(featureProbas, weights): +# # try: +# # assert np.sum(weights) == 1.0 +# # except: # print "Need to give a weight array that sums to one" # raise # weightedProbas = np.power(featureProbas, weights) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py new file mode 100644 index 00000000..af908e11 --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -0,0 +1,54 @@ +from LateFusion import LateFusionClassifier +import MonoviewClassifiers +import numpy as np +from sklearn.metrics import accuracy_score + +def gridSearch(DATASET, classificationKWARGS, trainIndices): + bestScore = 0.0 + bestConfig = None + if classificationKWARGS["fusionMethodConfig"][0] is not None: + for i in range(0): + randomWeightsArray = np.random.random_sample(len(DATASET.get("Metadata").attrs["nbView"])) + normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) + classificationKWARGS["fusionMethodConfig"][0] = normalizedArray + classifier = BayesianInference(1, **classificationKWARGS) + classifier.fit_hdf5(DATASET, trainIndices) + predictedLabels = classifier.predict_hdf5(DATASET, trainIndices) + accuracy = accuracy_score(DATASET.get("labels")[trainIndices], predictedLabels) + if accuracy > bestScore: + bestScore = accuracy + bestConfig = normalizedArray + return bestConfig + + +class BayesianInference(LateFusionClassifier): + def __init__(self, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], + NB_CORES=NB_CORES) + self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) + + def predict_hdf5(self, DATASET, usedIndices=None): + nbView = DATASET.get("nbView").value + if usedIndices == None: + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + if sum(self.weights)!=1.0: + self.weights = self.weights/sum(self.weights) + if usedIndices: + + viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) + for viewIndex in range(nbView): + viewScores[viewIndex] = np.power(self.monoviewClassifiers[viewIndex].predict_proba(DATASET.get("View" + str(viewIndex)) + [usedIndices]), + self.weights[viewIndex]) + predictedLabels = np.argmax(np.prod(viewScores, axis=1), axis=1) + else: + predictedLabels = [] + return predictedLabels + + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): + configString = "with Bayesian Inference using a weight for each view : "+", ".join(self.weights) + \ + "\n\t-With monoview classifiers : " + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) + configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) + return configString \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py new file mode 100644 index 00000000..ce837a4c --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -0,0 +1,66 @@ +from LateFusion import LateFusionClassifier +import MonoviewClassifiers +import numpy as np +from sklearn.metrics import accuracy_score + + +def gridSearch(DATASET, classificationKWARGS, trainIndices): + bestScore = 0.0 + bestConfig = None + if classificationKWARGS["fusionMethodConfig"][0] is not None: + for i in range(0): + randomWeightsArray = np.random.random_sample(len(DATASET.get("Metadata").attrs["nbView"])) + normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) + classificationKWARGS["fusionMethodConfig"][0] = normalizedArray + classifier = MajorityVoting(1, **classificationKWARGS) + classifier.fit_hdf5(DATASET, trainIndices) + predictedLabels = classifier.predict_hdf5(DATASET, trainIndices) + accuracy = accuracy_score(DATASET.get("labels")[trainIndices], predictedLabels) + if accuracy > bestScore: + bestScore = accuracy + bestConfig = normalizedArray + return bestConfig + + +class MajorityVoting(LateFusionClassifier): + def __init__(self, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], + NB_CORES=NB_CORES) + self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) + + def predict_hdf5(self, DATASET, usedIndices=None): + if usedIndices == None: + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + if usedIndices: + datasetLength = len(usedIndices) + votes = np.zeros((datasetLength, DATASET.get("Metadata").attrs["nbClass"]), dtype=int) + monoViewDecisions = np.zeros((len(usedIndices),DATASET.get("Metadata").attrs["nbView"]), dtype=int) + for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): + monoViewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( + DATASET.get("View" + str(viewIndex))[usedIndices]) + for exampleIndex in range(datasetLength): + for viewIndex, featureClassification in enumerate(monoViewDecisions[exampleIndex, :]): + votes[exampleIndex, featureClassification] += self.weights[viewIndex] + nbMaximum = len(np.where(votes[exampleIndex] == max(votes[exampleIndex]))[0]) + try: + assert nbMaximum != DATASET.get("Metadata").attrs["nbView"] + except: + print "Majority voting can't decide, each classifier has voted for a different class" + raise + predictedLabels = np.argmax(votes, axis=1) + # Can be upgraded by restarting a new classification process if + # there are multiple maximums ?: + # while nbMaximum>1: + # relearn with only the classes that have a maximum number of vote + # votes = revote + # nbMaximum = len(np.where(votes==max(votes))[0]) + else: + predictedLabels = [] + return predictedLabels + + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): + configString = "with Majority Voting \n\t-With monoview classifiers : " + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) + configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) + return configString \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py new file mode 100644 index 00000000..a6464406 --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -0,0 +1,63 @@ +from LateFusion import LateFusionClassifier +import MonoviewClassifiers +import numpy as np +from sklearn.multiclass import OneVsOneClassifier +from sklearn.svm import SVC + + +def gridSearch(DATASET, classificationKWARGS, trainIndices): + return None + + +class SVMForLinear(LateFusionClassifier): + def __init__(self, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], + NB_CORES=NB_CORES) + self.SVMClassifier = None + + def fit_hdf5(self, DATASET, trainIndices=None): + if trainIndices == None: + trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + nbViews = DATASET.get("Metadata").attrs["nbView"] + for viewIndex in range(nbViews): + monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[viewIndex]) + self.monoviewClassifiers.append( + monoviewClassifier.fit(DATASET.get("View" + str(viewIndex))[trainIndices], + DATASET.get("labels")[trainIndices], + NB_CORES=self.nbCores, + **dict((str(configIndex), config) for configIndex, config in + enumerate(self.monoviewClassifiersConfigs[viewIndex] + )))[1]) + self.SVMForLinearFusionFit(DATASET, usedIndices=trainIndices) + + def predict_hdf5(self, DATASET, usedIndices=None): + # Normalize weights ? + # weights = weights/float(max(weights)) + if usedIndices == None: + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + if usedIndices: + monoviewDecisions = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbView"]), dtype=int) + for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): + monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[viewIndex]) + monoviewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( + DATASET.get("View" + str(viewIndex))[usedIndices]) + predictedLabels = self.SVMClassifier.predict(monoviewDecisions) + else: + predictedLabels = [] + return predictedLabels + + def SVMForLinearFusionFit(self, DATASET, usedIndices=None): + self.SVMClassifier = OneVsOneClassifier(SVC()) + monoViewDecisions = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbView"]), dtype=int) + for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): + monoViewDecisions[:, viewIndex] = self.monoviewClassifiers[viewIndex].predict( + DATASET.get("View" + str(viewIndex))[usedIndices]) + + self.SVMClassifier.fit(monoViewDecisions, DATASET.get("labels")[usedIndices]) + + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): + configString = "with SVM for linear \n\t-With monoview classifiers : " + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) + configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) + return configString \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py new file mode 100644 index 00000000..3ba4b76b --- /dev/null +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -0,0 +1,59 @@ +from LateFusion import LateFusionClassifier +import MonoviewClassifiers +import numpy as np +from sklearn.metrics import accuracy_score + + +def gridSearch(DATASET, classificationKWARGS, trainIndices): + bestScore = 0.0 + bestConfig = None + if classificationKWARGS["fusionMethodConfig"][0] is not None: + for i in range(0): + randomWeightsArray = np.random.random_sample(len(DATASET.get("Metadata").attrs["nbView"])) + normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) + classificationKWARGS["fusionMethodConfig"][0] = normalizedArray + classifier = WeightedLinear(1, **classificationKWARGS) + classifier.fit_hdf5(DATASET, trainIndices) + predictedLabels = classifier.predict_hdf5(DATASET, trainIndices) + accuracy = accuracy_score(DATASET.get("labels")[trainIndices], predictedLabels) + if accuracy > bestScore: + bestScore = accuracy + bestConfig = normalizedArray + return bestConfig + + +class WeightedLinear(LateFusionClassifier): + def __init__(self, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['monoviewClassifiersConfigs'], + NB_CORES=NB_CORES) + self.weights = map(float, kwargs['fusionMethodConfig'][0]) + + def predict_hdf5(self, DATASET, usedIndices=None): + # Normalize weights ? + # weights = weights/float(max(weights)) + if usedIndices == None: + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + if usedIndices: + predictedLabels = [] + viewScores = np.zeros((DATASET.get("Metadata").attrs["nbView"], len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) + for viewIndex in range(DATASET.get("Metadata").attrs["nbView"]): + viewScores[viewIndex] = self.monoviewClassifiers[viewIndex].predict_proba( + DATASET.get("View" + str(viewIndex))[usedIndices]) + for currentIndex, usedIndex in enumerate(usedIndices): + predictedLabel = np.argmax(np.array( + [max(viewScore) * weight for viewScore, weight in zip(viewScores[:, currentIndex], self.weights)], + dtype=float)) + predictedLabels.append(predictedLabel) + # fusedExamples = np.array([sum(np.array([featureScores * weight for weight, featureScores in zip(weights, exampleDecisions)])) for exampleDecisions in monoViewDecisions]) + else: + predictedLabels = [] + + return predictedLabels + + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): + configString = "with Weighted linear using a weight for each view : "+", ".join(self.weights) + \ + "\n\t-With monoview classifiers : " + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) + configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) + return configString diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/__init__.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/__init__.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/__init__.py index 5604e81d..b99d85d7 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/__init__.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/__init__.py @@ -1,2 +1 @@ - -from . import EarlyFusion, LateFusion \ No newline at end of file +from . import EarlyFusionPackage, LateFusionPackage \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py index 2981081d..5aa54619 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py @@ -24,11 +24,13 @@ def DecisionTree(data, labels, arg, weights): return classifier, prediction, isBad, accuracy + def getConfig(classifierConfig): depth = classifierConfig[0] subSampling = classifierConfig[1] return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' ' + def gridSearch(data, labels): minSubSampling = 1.0/(len(labels)/2) bestSettings = [] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index 084192fb..c4a02a28 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -39,7 +39,8 @@ def trainWeakClassifier_hdf5(classifierName, monoviewDataset, CLASS_LABELS, DATA logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) return classifier, classes, isBad, averageAccuracy -def gridSearch_hdf5(DATASET, classifiersNames): +def gridSearch_hdf5(DATASET, classificationKWARGS): + classifiersNames = classificationKWARGS["classifiersNames"] bestSettings = [] for classifierIndex, classifierName in enumerate(classifiersNames): logging.debug("\tStart:\t Gridsearch for "+classifierName+" on "+DATASET.get("View"+str(classifierIndex)).attrs["name"]) @@ -48,7 +49,7 @@ def gridSearch_hdf5(DATASET, classifiersNames): bestSettings.append(classifierMethod(DATASET.get("View"+str(classifierIndex))[...], DATASET.get("labels")[...])) logging.debug("\tDone:\t Gridsearch for "+classifierName) - return bestSettings + return bestSettings, None -- GitLab