From 540f2e0c2c023739300364ca756b4e262dc3eb74 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin.1@ulaval.ca> Date: Fri, 9 Feb 2018 10:43:26 -0500 Subject: [PATCH] Added fusion with disagreement --- .../DisagreeFusion/DisagreeFusionModule.py | 142 ++++++++++++++++++ .../DisagreeFusion/__init__.py | 1 + .../DisagreeFusion/analyzeResults.py | 21 +++ .../Fusion/Methods/LateFusion.py | 96 ++++++------ .../utils/execution.py | 12 +- .../Test_DisagreeFusion/__init__.py | 0 .../test_DisagreeFusionModule.py} | 21 +-- .../Test_Fusion/test_FusionModule.py | 2 +- 8 files changed, 230 insertions(+), 65 deletions(-) create mode 100644 multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/DisagreeFusionModule.py create mode 100644 multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/__init__.py create mode 100644 multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/analyzeResults.py create mode 100644 multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/__init__.py rename multiview_platform/Tests/Test_MultiviewClassifiers/{Test_Fusion/Test_Methods/test_LateFusion.py => Test_DisagreeFusion/test_DisagreeFusionModule.py} (91%) diff --git a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/DisagreeFusionModule.py b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/DisagreeFusionModule.py new file mode 100644 index 00000000..4c9af9ac --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/DisagreeFusionModule.py @@ -0,0 +1,142 @@ +import numpy as np +import math +import itertools + +from ...utils.Multiclass import isBiclass, genMulticlassMonoviewDecision + + +def genName(config): + return "DisagreeFusion" + + +def getBenchmark(benchmark, args=None): + benchmark["Multiview"]["DisagreeFusion"] = ["take_everything"] + return benchmark + + +def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview): + nbViews = len(viewsIndices) + nbClassifiers = len(allClassifersNames) + nbFolds = len(resultsMonoview[0][1][6]) + foldsLen = len(resultsMonoview[0][1][6][0]) + classifiersNames = [[] for _ in viewsIndices] + classifiersDecisions = np.zeros((nbViews, nbClassifiers, nbFolds, foldsLen)) + + for resultMonoview in resultsMonoview: + if resultMonoview[1][0] in classifiersNames[viewsIndices.index(resultMonoview[0])]: + pass + else: + classifiersNames[viewsIndices.index(resultMonoview[0])].append(resultMonoview[1][0]) + classifierIndex = classifiersNames[viewsIndices.index(resultMonoview[0])].index(resultMonoview[1][0]) + classifiersDecisions[viewsIndices.index(resultMonoview[0]), classifierIndex] = resultMonoview[1][6] + return classifiersDecisions, classifiersNames + + +def disagree(allClassifersNames, viewsIndices, resultsMonoview): + + classifiersDecisions, classifiersNames = getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview) + + foldsLen = len(resultsMonoview[0][1][6][0]) + nbViews = len(viewsIndices) + nbClassifiers = len(allClassifersNames) + combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews) + nbCombinations = math.factorial(nbClassifiers+nbViews-1) / math.factorial(nbViews) / math.factorial(nbClassifiers-1) + disagreements = np.zeros(nbCombinations) + combis = np.zeros((nbCombinations, nbViews), dtype=int) + + for combinationsIndex, combination in enumerate(combinations): + combis[combinationsIndex] = combination + combiWithView = [(viewIndex,combiIndex) for viewIndex, combiIndex in enumerate(combination)] + binomes = itertools.combinations(combiWithView, 2) + nbBinomes = math.factorial(nbViews) / 2 / math.factorial(nbViews-2) + disagreement = np.zeros(nbBinomes) + for binomeIndex, binome in enumerate(binomes): + (viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome + nbDisagree = np.sum(np.logical_xor(classifiersDecisions[viewIndex1, classifierIndex1], + classifiersDecisions[viewIndex2, classifierIndex2]) + , axis=1)/float(foldsLen) + disagreement[binomeIndex] = np.mean(nbDisagree) + disagreements[combinationsIndex] = np.mean(disagreement) + bestCombiIndex = np.argmax(disagreements) + bestCombination = combis[bestCombiIndex] + + return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)], disagreements[bestCombiIndex] + + +def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): + monoviewClassifierModulesNames = benchmark["Monoview"] + classifiersNames, disagreement = disagree(monoviewClassifierModulesNames, + viewsIndices, resultsMonoview) + multiclass_preds = [monoviewResult[1][5] for monoviewResult in resultsMonoview] + if isBiclass(multiclass_preds): + monoviewDecisions = np.array([monoviewResult[1][3] for monoviewResult in resultsMonoview + if classifiersNames[viewsIndices.index(monoviewResult[0])] == + monoviewResult[1][0]]) + else: + monoviewDecisions = np.array( + [genMulticlassMonoviewDecision(monoviewResult, classificationIndices) for monoviewResult in + resultsMonoview if classifiersNames[viewsIndices.index(monoviewResult[0])] == monoviewResult[1][0]]) + argumentsList = [] + arguments = {"CL_type": "DisagreeFusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "DisagreeFusionKWARGS": { + "weights": args.DGF_weights, + "classifiersNames": classifiersNames, + "monoviewDecisions": monoviewDecisions, + "nbCLass":len(args.CL_classes), + "disagreement":disagreement + } + } + argumentsList.append(arguments) + return argumentsList + + +def genParamsSets(classificationKWARGS, randomState, nIter=1): + """Used to generate parameters sets for the random hyper parameters optimization function""" + weights = [randomState.random_sample(len(classificationKWARGS["classifiersNames"])) for _ in range(nIter)] + nomralizedWeights = [[weightVector/np.sum(weightVector)] for weightVector in weights] + return nomralizedWeights + + +class DisagreeFusionClass: + + def __init__(self, randomState, NB_CORES=1, **kwargs): + if kwargs["weights"] == []: + self.weights = [1.0/len(["classifiersNames"]) for _ in range(len(["classifiersNames"]))] + else: + self.weights = np.array(kwargs["weights"])/np.sum(np.array(kwargs["weights"])) + self.monoviewDecisions = kwargs["monoviewDecisions"] + self.classifiersNames = kwargs["classifiersNames"] + self.nbClass = kwargs["nbCLass"] + self.disagreement = kwargs["disagreement"] + + def setParams(self, paramsSet): + self.weights = paramsSet[0] + + def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): + pass + + def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): + if usedIndices is None: + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + votes = np.zeros((len(usedIndices), self.nbClass), dtype=float) + for usedIndex, exampleIndex in enumerate(usedIndices): + for monoviewDecisionIndex, monoviewDecision in enumerate(self.monoviewDecisions): + votes[usedIndex, monoviewDecision[exampleIndex]] += self.weights[monoviewDecisionIndex] + predictedLabels = np.argmax(votes, axis=1) + return predictedLabels + + def predict_probas_hdf5(self, DATASET, usedIndices=None): + pass + + def getConfigString(self, classificationKWARGS): + return "weights : "+", ".join(map(str, list(self.weights))) + + def getSpecificAnalysis(self, classificationKWARGS): + stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiersNames)+\ + ', with a disagreement of '+str(self.disagreement) + return stringAnalysis diff --git a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/__init__.py b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/__init__.py new file mode 100644 index 00000000..8119d49d --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/__init__.py @@ -0,0 +1 @@ +from . import DisagreeFusionModule,analyzeResults \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/analyzeResults.py b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/analyzeResults.py new file mode 100644 index 00000000..af44a6fb --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/DisagreeFusion/analyzeResults.py @@ -0,0 +1,21 @@ +from ...Multiview import analyzeResults + +# Author-Info +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + + +def execute(classifier, trainLabels, + testLabels, DATASET, + classificationKWARGS, classificationIndices, + LABELS_DICTIONARY, views, nbCores, times, + name, KFolds, + hyperParamSearch, nIter, metrics, + viewsIndices, randomState, labels, classifierModule): + return analyzeResults.execute(classifier, trainLabels, + testLabels, DATASET, + classificationKWARGS, classificationIndices, + LABELS_DICTIONARY, views, nbCores, times, + name, KFolds, + hyperParamSearch, nIter, metrics, + viewsIndices, randomState, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusion.py b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusion.py index bd795d0e..9f3d15f3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusion.py +++ b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusion.py @@ -73,54 +73,54 @@ def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, clas return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] -def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview): - nbViews = len(viewsIndices) - nbClassifiers = len(allClassifersNames) - nbFolds = len(resultsMonoview[0][1][6]) - foldsLen = len(resultsMonoview[0][1][6][0]) - classifiersNames = [[] for _ in viewsIndices] - classifiersDecisions = np.zeros((nbViews, nbClassifiers, nbFolds, foldsLen)) - - for resultMonoview in resultsMonoview: - if resultMonoview[1][0] in classifiersNames[viewsIndices.index(resultMonoview[0])]: - pass - else: - classifiersNames[viewsIndices.index(resultMonoview[0])].append(resultMonoview[1][0]) - classifierIndex = classifiersNames[viewsIndices.index(resultMonoview[0])].index(resultMonoview[1][0]) - classifiersDecisions[viewsIndices.index(resultMonoview[0]), classifierIndex] = resultMonoview[1][6] - return classifiersDecisions, classifiersNames - - -def disagreement(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): - - classifiersDecisions, classifiersNames = getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview) - - foldsLen = len(resultsMonoview[0][1][6][0]) - nbViews = len(viewsIndices) - nbClassifiers = len(allClassifersNames) - combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews) - nbCombinations = math.factorial(nbClassifiers+nbViews-1) / math.factorial(nbViews) / math.factorial(nbClassifiers-1) - disagreements = np.zeros(nbCombinations) - combis = np.zeros((nbCombinations, nbViews), dtype=int) - - for combinationsIndex, combination in enumerate(combinations): - combis[combinationsIndex] = combination - combiWithView = [(viewIndex,combiIndex) for viewIndex, combiIndex in enumerate(combination)] - binomes = itertools.combinations(combiWithView, 2) - nbBinomes = math.factorial(nbViews) / 2 / math.factorial(nbViews-2) - disagreement = np.zeros(nbBinomes) - for binomeIndex, binome in enumerate(binomes): - (viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome - nbDisagree = np.sum(np.logical_xor(classifiersDecisions[viewIndex1, classifierIndex1], - classifiersDecisions[viewIndex2, classifierIndex2]) - , axis=1)/foldsLen - disagreement[binomeIndex] = np.mean(nbDisagree) - disagreements[combinationsIndex] = np.mean(disagreement) - print(disagreements) - bestCombiIndex = np.argmax(disagreements) - bestCombination = combis[bestCombiIndex] - - return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] +# def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview): +# nbViews = len(viewsIndices) +# nbClassifiers = len(allClassifersNames) +# nbFolds = len(resultsMonoview[0][1][6]) +# foldsLen = len(resultsMonoview[0][1][6][0]) +# classifiersNames = [[] for _ in viewsIndices] +# classifiersDecisions = np.zeros((nbViews, nbClassifiers, nbFolds, foldsLen)) +# +# for resultMonoview in resultsMonoview: +# if resultMonoview[1][0] in classifiersNames[viewsIndices.index(resultMonoview[0])]: +# pass +# else: +# classifiersNames[viewsIndices.index(resultMonoview[0])].append(resultMonoview[1][0]) +# classifierIndex = classifiersNames[viewsIndices.index(resultMonoview[0])].index(resultMonoview[1][0]) +# classifiersDecisions[viewsIndices.index(resultMonoview[0]), classifierIndex] = resultMonoview[1][6] +# return classifiersDecisions, classifiersNames +# +# +# def disagreement(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): +# +# classifiersDecisions, classifiersNames = getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview) +# +# foldsLen = len(resultsMonoview[0][1][6][0]) +# nbViews = len(viewsIndices) +# nbClassifiers = len(allClassifersNames) +# combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews) +# nbCombinations = math.factorial(nbClassifiers+nbViews-1) / math.factorial(nbViews) / math.factorial(nbClassifiers-1) +# disagreements = np.zeros(nbCombinations) +# combis = np.zeros((nbCombinations, nbViews), dtype=int) +# +# for combinationsIndex, combination in enumerate(combinations): +# combis[combinationsIndex] = combination +# combiWithView = [(viewIndex,combiIndex) for viewIndex, combiIndex in enumerate(combination)] +# binomes = itertools.combinations(combiWithView, 2) +# nbBinomes = math.factorial(nbViews) / 2 / math.factorial(nbViews-2) +# disagreement = np.zeros(nbBinomes) +# for binomeIndex, binome in enumerate(binomes): +# (viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome +# nbDisagree = np.sum(np.logical_xor(classifiersDecisions[viewIndex1, classifierIndex1], +# classifiersDecisions[viewIndex2, classifierIndex2]) +# , axis=1)/foldsLen +# disagreement[binomeIndex] = np.mean(nbDisagree) +# disagreements[combinationsIndex] = np.mean(disagreement) +# print(disagreements) +# bestCombiIndex = np.argmax(disagreements) +# bestCombination = combis[bestCombiIndex] +# +# return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index 21eb045a..3498b807 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -190,18 +190,13 @@ def parseTheArgs(arguments): help='Names of the classifier used for late fusion', default=['']) groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store', help='Determine which method to use to select the monoview classifiers', - default="disagreement") + default="intersect") groupFatLateFusion = parser.add_argument_group('Fat Late Fusion arguments') groupFatLateFusion.add_argument('--FLF_weights', metavar='FLOAT', action='store', nargs="+", help='Determine the weights of each monoview decision for FLF', type=float, default=[]) - groupMumboNew = parser.add_argument_group('New Mumbo implementation arguments') - groupMumboNew.add_argument('--MUN_n_estimators', metavar='INT', action='store', - help='Determine the number of esitmators for mumbo', type=int, - default=10) - groupFatSCMLateFusion = parser.add_argument_group('Fat SCM Late Fusion arguments') groupFatSCMLateFusion.add_argument('--FSCMLF_p', metavar='FLOAT', action='store', help='Determine the p argument of the SCM', type=float, @@ -213,6 +208,11 @@ def parseTheArgs(arguments): help='Determine the model type of the SCM', default="conjunction") + groupDisagreeFusion = parser.add_argument_group('Disagreement based fusion arguments') + groupDisagreeFusion.add_argument('--DGF_weights', metavar='FLOAT', action='store', nargs="+", + help='Determine the weights of each monoview decision for DFG', type=float, + default=[]) + args = parser.parse_args(arguments) diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/__init__.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/Test_Methods/test_LateFusion.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py similarity index 91% rename from multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/Test_Methods/test_LateFusion.py rename to multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py index 105115cc..d7c77ff7 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/Test_Methods/test_LateFusion.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py @@ -1,7 +1,8 @@ import unittest import numpy as np -from .....MonoMultiViewClassifiers.MultiviewClassifiers.Fusion.Methods import LateFusion +from ....MonoMultiViewClassifiers.MultiviewClassifiers.DisagreeFusion import DisagreeFusionModule + class Test_disagreement(unittest.TestCase): @@ -39,9 +40,9 @@ class Test_disagreement(unittest.TestCase): cls.classificationIndices = [] def test_simple(cls): - bestCombi = LateFusion.disagreement(cls.allClassifiersNames, cls.directory, cls.viewsIndices, - cls.resultsMonoview, cls.classificationIndices) - cls.assertEqual(bestCombi, ["SCM", "DT"]) + bestCombi, disagreement = DisagreeFusionModule.disagree(cls.allClassifiersNames, cls.viewsIndices, cls.resultsMonoview) + cls.assertAlmostEqual(disagreement, 0.666666666667) + cls.assertEqual(len(bestCombi), 2) def test_viewsIndices(cls): cls.viewsIndices = [0,6] @@ -70,9 +71,9 @@ class Test_disagreement(unittest.TestCase): cls.randomState.random_integers(0, 1, 6)]) ]] ] - bestCombi = LateFusion.disagreement(cls.allClassifiersNames, cls.directory, cls.viewsIndices, - cls.resultsMonoview, cls.classificationIndices) - cls.assertEqual(bestCombi, ["DT", "DT"]) + bestCombi, disagreement = DisagreeFusionModule.disagree(cls.allClassifiersNames, cls.viewsIndices, cls.resultsMonoview) + cls.assertAlmostEqual(disagreement, 0.611111111111) + cls.assertEqual(len(bestCombi), 2) def test_multipleViews(cls): cls.viewsIndices = [0, 6, 18] @@ -113,6 +114,6 @@ class Test_disagreement(unittest.TestCase): cls.randomState.random_integers(0, 1, 6)]) ]] ] - bestCombi = LateFusion.disagreement(cls.allClassifiersNames, cls.directory, cls.viewsIndices, - cls.resultsMonoview, cls.classificationIndices) - cls.assertEqual(bestCombi, ['SCM', 'SVM', 'SVM']) \ No newline at end of file + bestCombi, disagreement = DisagreeFusionModule.disagree(cls.allClassifiersNames, cls.viewsIndices, cls.resultsMonoview,) + cls.assertAlmostEqual(disagreement, 0.592592592593) + cls.assertEqual(len(bestCombi), 3) \ No newline at end of file diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/test_FusionModule.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/test_FusionModule.py index a8e9bb41..8aa7084d 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/test_FusionModule.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/test_FusionModule.py @@ -10,5 +10,5 @@ class Test_genName(unittest.TestCase): "fusionMethod": "chicken_is_heaven", "classifiersNames": ["cheese", "is", "no", "disease"]} res = FusionModule.genName(self.config) - self.assertEqual(res, "Late-chic-chee-is-no-dise") + self.assertEqual(res, "Late-chic") -- GitLab