diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index 4f6d844a5d69d32d53b81d6fc4576df49fec9809..cd9e4dc44990641865b5e3113e6fb9b6836f1005 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -65,7 +65,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.debug("Start:\t Generate classifier args") classifierModule = getattr(MonoviewClassifiers, CL_type) - clKWARGS = getHPs(classifierModule, hyperParamSearch, + clKWARGS, testFoldsPreds = getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState, outputFileName, KFolds, nbCores, metrics, kwargs) @@ -108,7 +108,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.info("Done:\t Saving Results") viewIndex = args["viewIndex"] - return viewIndex, [CL_type, cl_desc + [feat], metricsScores, full_labels_pred, clKWARGS, y_test_multiclass_pred] + return viewIndex, [CL_type, cl_desc + [feat], metricsScores, full_labels_pred, clKWARGS, y_test_multiclass_pred, testFoldsPreds] def initConstants(args, X, classificationIndices, labelsNames, name, directory): @@ -156,14 +156,14 @@ def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, if hyperParamSearch != "None": logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type) classifierHPSearch = getattr(MonoviewUtils, hyperParamSearch) - clKWARGS = classifierHPSearch(X_train, y_train, randomState, + clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, randomState, outputFileName, classifierModule, KFolds=KFolds, nbCores=nbCores, metric=metrics[0], nIter=nIter) logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings") else: clKWARGS = kwargs[CL_type + "KWARGS"] - return clKWARGS + return clKWARGS, testFoldsPreds def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis): diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index 1e0f6ff119f4bf6e34e29c94e11c86c87935c6c7..f2de53cc41af467d90a8a67929d615d3a70774b6 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -1,4 +1,5 @@ from sklearn.model_selection import RandomizedSearchCV +import numpy as np from .. import Metrics from ..utils import HyperParameterSearch @@ -21,7 +22,8 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo else: metricKWARGS = {} scorer = metricModule.get_scorer(**metricKWARGS) - randomSearch = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=params_dict, refit=True, n_jobs = nbCores, scoring = scorer, cv = KFolds, random_state = randomState) + randomSearch = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=params_dict, refit=True, + n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState) detector = randomSearch.fit(X_train, y_train) bestParams = classifierModule.genBestParams(detector) # desc_params = {"C": SVMPoly_detector.best_params_["classifier__C"], "degree": SVMPoly_detector.best_params_["classifier__degree"]} @@ -31,8 +33,21 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo # params = [("c", np.array(SVMPoly_detector.cv_results_['param_classifier__C'])), ("degree", np.array(SVMPoly_detector.cv_results_['param_classifier__degree']))] HyperParameterSearch.genHeatMaps(params, scoresArray, outputFileName) - - return bestParams + testFoldsPreds = genTestFoldsPreds(X_train, y_train, KFolds, detector.best_estimator_) + return bestParams, testFoldsPreds + + +def genTestFoldsPreds(X_train, y_train, KFolds, estimator): + testFoldsPreds = [] + folds = KFolds.split(X_train, y_train) + foldLengths = np.zeros(KFolds.n_splits,dtype=int) + for foldIndex, (trainIndices, testIndices) in enumerate(folds): + foldLengths[foldIndex] = len(testIndices) + estimator.fit(X_train[trainIndices], y_train[trainIndices]) + testFoldsPreds.append(estimator.predict(X_train[trainIndices])) + minFoldLength = foldLengths.min() + testFoldsPreds = np.array([testFoldPreds[:minFoldLength] for testFoldPreds in testFoldsPreds]) + return testFoldsPreds # def isUseful(labelSupports, index, CLASS_LABELS, labelDict): # if labelSupports[labelDict[CLASS_LABELS[index]]] != 0: diff --git a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/FusionModule.py b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/FusionModule.py index 0fa1a25d79236b540e6f483db14ca51d61924821..a222a5996ddea3d127f31a18160c8c926c2b5cdb 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/FusionModule.py +++ b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/FusionModule.py @@ -21,7 +21,7 @@ __status__ = "Prototype" # Production, Development, Prototype def genName(config): if config["fusionType"] == "LateFusion": classifierRedNames = [classifierName[:4] for classifierName in config["classifiersNames"]] - return "Late-" + str(config["fusionMethod"][:4])+"-"+"-".join(classifierRedNames) + return "Late-" + str(config["fusionMethod"][:4])#+"-"+"-".join(classifierRedNames) elif config["fusionType"] == "EarlyFusion": return "Early-" + config["fusionMethod"][:4] + "-" + config["classifiersNames"][:4] diff --git a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusion.py b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusion.py index 1ba4023fef00538dbf9d04ff832643cc45e65c3f..bd795d0eb16afb83dd015f943459647dcf7baad9 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusion.py +++ b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusion.py @@ -5,6 +5,7 @@ import numpy as np import itertools from joblib import Parallel, delayed import sys +import math from .... import MonoviewClassifiers from .... import Metrics @@ -19,13 +20,13 @@ def canProbasClassifier(classifierConfig): return False -def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needProbas, randomState): +def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needProbas, randomState, nbCores=1): if type(classifierConfig) == dict: monoviewClassifier = getattr(MonoviewClassifiers, classifierName) if needProbas and not monoviewClassifier.canProbas(): monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") - DTConfig = {"0": 300, "1": "entropy", "2": "random"} - classifier = monoviewClassifier.fit(data, labels, randomState, DTConfig) + DTConfig = {"max_depth": 300, "criterion": "entropy", "splitter": "random"} + classifier = monoviewClassifier.fit(data, labels, randomState, nbCores, **DTConfig) return classifier else: if type(classifierConfig) is dict: @@ -34,8 +35,7 @@ def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needPr classifierConfig = dict((str(configIndex), config) for configIndex, config in enumerate(classifierConfig)) - classifier = monoviewClassifier.fit(data, labels, randomState, - **classifierConfig) + classifier = monoviewClassifier.fit(data, labels, randomState, nbCores, **classifierConfig) return classifier @@ -72,8 +72,65 @@ def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, clas bestCombination = combination return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] -def allMonoviewClassifiers(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): - return allClassifersNames + +def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview): + nbViews = len(viewsIndices) + nbClassifiers = len(allClassifersNames) + nbFolds = len(resultsMonoview[0][1][6]) + foldsLen = len(resultsMonoview[0][1][6][0]) + classifiersNames = [[] for _ in viewsIndices] + classifiersDecisions = np.zeros((nbViews, nbClassifiers, nbFolds, foldsLen)) + + for resultMonoview in resultsMonoview: + if resultMonoview[1][0] in classifiersNames[viewsIndices.index(resultMonoview[0])]: + pass + else: + classifiersNames[viewsIndices.index(resultMonoview[0])].append(resultMonoview[1][0]) + classifierIndex = classifiersNames[viewsIndices.index(resultMonoview[0])].index(resultMonoview[1][0]) + classifiersDecisions[viewsIndices.index(resultMonoview[0]), classifierIndex] = resultMonoview[1][6] + return classifiersDecisions, classifiersNames + + +def disagreement(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): + + classifiersDecisions, classifiersNames = getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview) + + foldsLen = len(resultsMonoview[0][1][6][0]) + nbViews = len(viewsIndices) + nbClassifiers = len(allClassifersNames) + combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews) + nbCombinations = math.factorial(nbClassifiers+nbViews-1) / math.factorial(nbViews) / math.factorial(nbClassifiers-1) + disagreements = np.zeros(nbCombinations) + combis = np.zeros((nbCombinations, nbViews), dtype=int) + + for combinationsIndex, combination in enumerate(combinations): + combis[combinationsIndex] = combination + combiWithView = [(viewIndex,combiIndex) for viewIndex, combiIndex in enumerate(combination)] + binomes = itertools.combinations(combiWithView, 2) + nbBinomes = math.factorial(nbViews) / 2 / math.factorial(nbViews-2) + disagreement = np.zeros(nbBinomes) + for binomeIndex, binome in enumerate(binomes): + (viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome + nbDisagree = np.sum(np.logical_xor(classifiersDecisions[viewIndex1, classifierIndex1], + classifiersDecisions[viewIndex2, classifierIndex2]) + , axis=1)/foldsLen + disagreement[binomeIndex] = np.mean(nbDisagree) + disagreements[combinationsIndex] = np.mean(disagreement) + print(disagreements) + bestCombiIndex = np.argmax(disagreements) + bestCombination = combis[bestCombiIndex] + + return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] + + + + + + + + +# def allMonoviewClassifiers(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): +# return allClassifersNames def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index 2b0742e3f6828719ea1b7bf7d5ba3e9b22c86c8a..21eb045a3923034977fa6f46356d10a7536d9cc3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -190,7 +190,7 @@ def parseTheArgs(arguments): help='Names of the classifier used for late fusion', default=['']) groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store', help='Determine which method to use to select the monoview classifiers', - default="intersect") + default="disagreement") groupFatLateFusion = parser.add_argument_group('Fat Late Fusion arguments') groupFatLateFusion.add_argument('--FLF_weights', metavar='FLOAT', action='store', nargs="+", diff --git a/multiview_platform/Tests/Test_MonoView/test_MonoviewUtils.py b/multiview_platform/Tests/Test_MonoView/test_MonoviewUtils.py new file mode 100644 index 0000000000000000000000000000000000000000..026541f68e05c13c7d99aecf7ad382db01b8999f --- /dev/null +++ b/multiview_platform/Tests/Test_MonoView/test_MonoviewUtils.py @@ -0,0 +1,28 @@ +import unittest +import numpy as np +from sklearn.model_selection import StratifiedKFold +from sklearn.tree.tree import DecisionTreeClassifier + +from ...MonoMultiViewClassifiers.Monoview import MonoviewUtils + +class Test_genTestFoldsPreds(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state = np.random.RandomState(42) + cls.X_train = cls.random_state.random_sample((31,10)) + cls.y_train = np.ones(31, dtype=int) + cls.KFolds = StratifiedKFold(n_splits=3, random_state=cls.random_state) + + cls.estimator = DecisionTreeClassifier(max_depth=1) + + cls.y_train[15:] = -1 + # print(cls.X_train) + # print(cls.y_train) + + def test_simple(cls): + testFoldsPreds = MonoviewUtils.genTestFoldsPreds(cls.X_train, cls.y_train, cls.KFolds, cls.estimator) + cls.assertEqual(testFoldsPreds.shape, (3,10)) + np.testing.assert_array_equal(testFoldsPreds[0], np.array([ 1, 1, -1, -1, 1, 1, -1, 1, -1, 1])) + + diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/Test_Methods/__init__.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/Test_Methods/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/Test_Methods/test_LateFusion.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/Test_Methods/test_LateFusion.py new file mode 100644 index 0000000000000000000000000000000000000000..105115ccd87dcbb6bd0be5d763e345e0ad425225 --- /dev/null +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/Test_Methods/test_LateFusion.py @@ -0,0 +1,118 @@ +import unittest +import numpy as np + +from .....MonoMultiViewClassifiers.MultiviewClassifiers.Fusion.Methods import LateFusion + +class Test_disagreement(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.randomState = np.random.RandomState(42) + cls.allClassifiersNames = ["SCM", "SVM", "DT"] + cls.directory = "" + cls.viewsIndices = [0,1] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.random_integers(0,1,6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.random_integers(0,1,6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [0, ["DT", "", "", "", "", "", np.array([cls.randomState.random_integers(0,1,6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1,6)]) + ]], + [1, ["SCM", "", "", "", "", "", np.array([cls.randomState.random_integers(0,1,6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [1, ["SVM", "", "", "", "", "", np.array([cls.randomState.random_integers(0,1,6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [1, ["DT", "", "", "", "", "", np.array([cls.randomState.random_integers(0,1,6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]] + ] + cls.classificationIndices = [] + + def test_simple(cls): + bestCombi = LateFusion.disagreement(cls.allClassifiersNames, cls.directory, cls.viewsIndices, + cls.resultsMonoview, cls.classificationIndices) + cls.assertEqual(bestCombi, ["SCM", "DT"]) + + def test_viewsIndices(cls): + cls.viewsIndices = [0,6] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [0, ["DT", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [6, ["SCM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [6, ["SVM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [6, ["DT", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]] + ] + bestCombi = LateFusion.disagreement(cls.allClassifiersNames, cls.directory, cls.viewsIndices, + cls.resultsMonoview, cls.classificationIndices) + cls.assertEqual(bestCombi, ["DT", "DT"]) + + def test_multipleViews(cls): + cls.viewsIndices = [0, 6, 18] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [0, ["DT", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [6, ["SCM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [6, ["SVM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [6, ["DT", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [18, ["SCM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [18, ["SVM", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]], + [18, ["DT", "", "", "", "", "", np.array([cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6), + cls.randomState.random_integers(0, 1, 6)]) + ]] + ] + bestCombi = LateFusion.disagreement(cls.allClassifiersNames, cls.directory, cls.viewsIndices, + cls.resultsMonoview, cls.classificationIndices) + cls.assertEqual(bestCombi, ['SCM', 'SVM', 'SVM']) \ No newline at end of file