Skip to content
Snippets Groups Projects
Commit fc061d91 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Corrected disagreement fusion

parent 540f2e0c
No related branches found
No related tags found
No related merge requests found
......@@ -139,15 +139,7 @@ def arangeMetrics(metrics, metricPrinc):
raise AttributeError(metricPrinc + " not in metric pool")
return metrics
def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, kFolds):
logging.debug("Start:\t Benchmark initialization")
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
......@@ -158,10 +150,34 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
trainIndices = classificationIndices[0]
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview = []
folds = kFolds.split(np.arange(len(trainLabels)), trainLabels)
for foldIndex, (trainCVIndices, testCVIndices) in enumerate(folds):
fileName = directory+"/folds/test_labels_fold_"+str(foldIndex)+".csv"
if not os.path.exists(os.path.dirname(fileName)):
try:
os.makedirs(os.path.dirname(fileName))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
np.savetxt(fileName, trainLabels[testCVIndices], delimiter=",")
labelsNames = list(LABELS_DICTIONARY.values())
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
logging.debug("Done:\t Benchmark initialization")
return resultsMonoview, labelsNames
def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
resultsMonoview, labelsNames = benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, kFolds)
logging.debug("Start:\t Monoview benchmark")
resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
......@@ -197,20 +213,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
"""Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
logging.debug("Start:\t Benchmark initialization")
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices = classificationIndices[0]
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview = []
labelsNames = list(LABELS_DICTIONARY.values())
logging.debug("Done:\t Benchmark initialization")
resultsMonoview, labelsNames = benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, kFolds)
logging.debug("Start:\t Monoview benchmark")
nbExperiments = len(argumentDictionaries["Monoview"])
......@@ -252,20 +255,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non
ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
logging.debug("Start:\t Benchmark initialization")
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices = classificationIndices[0]
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
resultsMonoview = []
labelsNames = list(LABELS_DICTIONARY.values())
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
logging.debug("Done:\t Benchmark initialization")
resultsMonoview, labelsNames = benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, kFolds)
logging.debug("Start:\t Monoview benchmark")
for arguments in argumentDictionaries["Monoview"]:
......
import numpy as np
import math
import itertools
from ...utils.Multiclass import isBiclass, genMulticlassMonoviewDecision
from .. import diversity_utils
def genName(config):
......@@ -14,129 +12,30 @@ def getBenchmark(benchmark, args=None):
return benchmark
def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview):
nbViews = len(viewsIndices)
nbClassifiers = len(allClassifersNames)
nbFolds = len(resultsMonoview[0][1][6])
foldsLen = len(resultsMonoview[0][1][6][0])
classifiersNames = [[] for _ in viewsIndices]
classifiersDecisions = np.zeros((nbViews, nbClassifiers, nbFolds, foldsLen))
for resultMonoview in resultsMonoview:
if resultMonoview[1][0] in classifiersNames[viewsIndices.index(resultMonoview[0])]:
pass
else:
classifiersNames[viewsIndices.index(resultMonoview[0])].append(resultMonoview[1][0])
classifierIndex = classifiersNames[viewsIndices.index(resultMonoview[0])].index(resultMonoview[1][0])
classifiersDecisions[viewsIndices.index(resultMonoview[0]), classifierIndex] = resultMonoview[1][6]
return classifiersDecisions, classifiersNames
def disagree(allClassifersNames, viewsIndices, resultsMonoview):
classifiersDecisions, classifiersNames = getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview)
foldsLen = len(resultsMonoview[0][1][6][0])
nbViews = len(viewsIndices)
nbClassifiers = len(allClassifersNames)
combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews)
nbCombinations = math.factorial(nbClassifiers+nbViews-1) / math.factorial(nbViews) / math.factorial(nbClassifiers-1)
disagreements = np.zeros(nbCombinations)
combis = np.zeros((nbCombinations, nbViews), dtype=int)
for combinationsIndex, combination in enumerate(combinations):
combis[combinationsIndex] = combination
combiWithView = [(viewIndex,combiIndex) for viewIndex, combiIndex in enumerate(combination)]
binomes = itertools.combinations(combiWithView, 2)
nbBinomes = math.factorial(nbViews) / 2 / math.factorial(nbViews-2)
disagreement = np.zeros(nbBinomes)
for binomeIndex, binome in enumerate(binomes):
(viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome
nbDisagree = np.sum(np.logical_xor(classifiersDecisions[viewIndex1, classifierIndex1],
classifiersDecisions[viewIndex2, classifierIndex2])
, axis=1)/float(foldsLen)
disagreement[binomeIndex] = np.mean(nbDisagree)
disagreements[combinationsIndex] = np.mean(disagreement)
bestCombiIndex = np.argmax(disagreements)
bestCombination = combis[bestCombiIndex]
return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)], disagreements[bestCombiIndex]
def disagree(classifierDecision1, classifierDecision2, ground_truth):
return np.logical_xor(classifierDecision1, classifierDecision2)
def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):
monoviewClassifierModulesNames = benchmark["Monoview"]
classifiersNames, disagreement = disagree(monoviewClassifierModulesNames,
viewsIndices, resultsMonoview)
multiclass_preds = [monoviewResult[1][5] for monoviewResult in resultsMonoview]
if isBiclass(multiclass_preds):
monoviewDecisions = np.array([monoviewResult[1][3] for monoviewResult in resultsMonoview
if classifiersNames[viewsIndices.index(monoviewResult[0])] ==
monoviewResult[1][0]])
else:
monoviewDecisions = np.array(
[genMulticlassMonoviewDecision(monoviewResult, classificationIndices) for monoviewResult in
resultsMonoview if classifiersNames[viewsIndices.index(monoviewResult[0])] == monoviewResult[1][0]])
argumentsList = []
arguments = {"CL_type": "DisagreeFusion",
"views": views,
"NB_VIEW": len(views),
"viewsIndices": viewsIndices,
"NB_CLASS": len(args.CL_classes),
"LABELS_NAMES": args.CL_classes,
"DisagreeFusionKWARGS": {
"weights": args.DGF_weights,
"classifiersNames": classifiersNames,
"monoviewDecisions": monoviewDecisions,
"nbCLass":len(args.CL_classes),
"disagreement":disagreement
}
}
argumentsList.append(arguments)
return argumentsList
return diversity_utils.getArgs(args, benchmark, views, viewsIndices,
randomState, directory, resultsMonoview,
classificationIndices, disagree, "DisagreeFusion")
def genParamsSets(classificationKWARGS, randomState, nIter=1):
"""Used to generate parameters sets for the random hyper parameters optimization function"""
weights = [randomState.random_sample(len(classificationKWARGS["classifiersNames"])) for _ in range(nIter)]
nomralizedWeights = [[weightVector/np.sum(weightVector)] for weightVector in weights]
return nomralizedWeights
return diversity_utils.genParamsSets(classificationKWARGS, randomState, nIter=nIter)
# """Used to generate parameters sets for the random hyper parameters optimization function"""
# weights = [randomState.random_sample(len(classificationKWARGS["classifiersNames"])) for _ in range(nIter)]
# nomralizedWeights = [[weightVector/np.sum(weightVector)] for weightVector in weights]
# return nomralizedWeights
class DisagreeFusionClass:
class DisagreeFusionClass(diversity_utils.DiversityFusionClass):
def __init__(self, randomState, NB_CORES=1, **kwargs):
if kwargs["weights"] == []:
self.weights = [1.0/len(["classifiersNames"]) for _ in range(len(["classifiersNames"]))]
else:
self.weights = np.array(kwargs["weights"])/np.sum(np.array(kwargs["weights"]))
self.monoviewDecisions = kwargs["monoviewDecisions"]
self.classifiersNames = kwargs["classifiersNames"]
self.nbClass = kwargs["nbCLass"]
self.disagreement = kwargs["disagreement"]
def setParams(self, paramsSet):
self.weights = paramsSet[0]
def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]):
pass
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if usedIndices is None:
usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
votes = np.zeros((len(usedIndices), self.nbClass), dtype=float)
for usedIndex, exampleIndex in enumerate(usedIndices):
for monoviewDecisionIndex, monoviewDecision in enumerate(self.monoviewDecisions):
votes[usedIndex, monoviewDecision[exampleIndex]] += self.weights[monoviewDecisionIndex]
predictedLabels = np.argmax(votes, axis=1)
return predictedLabels
def predict_probas_hdf5(self, DATASET, usedIndices=None):
pass
def getConfigString(self, classificationKWARGS):
return "weights : "+", ".join(map(str, list(self.weights)))
diversity_utils.DiversityFusionClass.__init__(self, randomState, NB_CORES=1, **kwargs)
def getSpecificAnalysis(self, classificationKWARGS):
stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiersNames)+\
', with a disagreement of '+str(self.disagreement)
', with a disagreement of '+str(self.div_measure)
return stringAnalysis
......@@ -73,66 +73,6 @@ def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, clas
return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)]
# def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview):
# nbViews = len(viewsIndices)
# nbClassifiers = len(allClassifersNames)
# nbFolds = len(resultsMonoview[0][1][6])
# foldsLen = len(resultsMonoview[0][1][6][0])
# classifiersNames = [[] for _ in viewsIndices]
# classifiersDecisions = np.zeros((nbViews, nbClassifiers, nbFolds, foldsLen))
#
# for resultMonoview in resultsMonoview:
# if resultMonoview[1][0] in classifiersNames[viewsIndices.index(resultMonoview[0])]:
# pass
# else:
# classifiersNames[viewsIndices.index(resultMonoview[0])].append(resultMonoview[1][0])
# classifierIndex = classifiersNames[viewsIndices.index(resultMonoview[0])].index(resultMonoview[1][0])
# classifiersDecisions[viewsIndices.index(resultMonoview[0]), classifierIndex] = resultMonoview[1][6]
# return classifiersDecisions, classifiersNames
#
#
# def disagreement(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices):
#
# classifiersDecisions, classifiersNames = getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview)
#
# foldsLen = len(resultsMonoview[0][1][6][0])
# nbViews = len(viewsIndices)
# nbClassifiers = len(allClassifersNames)
# combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews)
# nbCombinations = math.factorial(nbClassifiers+nbViews-1) / math.factorial(nbViews) / math.factorial(nbClassifiers-1)
# disagreements = np.zeros(nbCombinations)
# combis = np.zeros((nbCombinations, nbViews), dtype=int)
#
# for combinationsIndex, combination in enumerate(combinations):
# combis[combinationsIndex] = combination
# combiWithView = [(viewIndex,combiIndex) for viewIndex, combiIndex in enumerate(combination)]
# binomes = itertools.combinations(combiWithView, 2)
# nbBinomes = math.factorial(nbViews) / 2 / math.factorial(nbViews-2)
# disagreement = np.zeros(nbBinomes)
# for binomeIndex, binome in enumerate(binomes):
# (viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome
# nbDisagree = np.sum(np.logical_xor(classifiersDecisions[viewIndex1, classifierIndex1],
# classifiersDecisions[viewIndex2, classifierIndex2])
# , axis=1)/foldsLen
# disagreement[binomeIndex] = np.mean(nbDisagree)
# disagreements[combinationsIndex] = np.mean(disagreement)
# print(disagreements)
# bestCombiIndex = np.argmax(disagreements)
# bestCombination = combis[bestCombiIndex]
#
# return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)]
# def allMonoviewClassifiers(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices):
# return allClassifersNames
def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices):
nbViews = len(viewsIndices)
nbClassifiers = len(allClassifersNames)
......
import os
import diversity_utils
for module in os.listdir(os.path.dirname(os.path.realpath(__file__))):
if module == '__init__.py' or module[-3:] == '.py' or module[-4:] == '.pyc' or module == '__pycache__' :
continue
......
import numpy as np
import math
import itertools
import os
from ..utils.Multiclass import isBiclass, genMulticlassMonoviewDecision
def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview):
nbViews = len(viewsIndices)
nbClassifiers = len(allClassifersNames)
nbFolds = len(resultsMonoview[0][1][6])
foldsLen = len(resultsMonoview[0][1][6][0])
classifiersNames = [[] for _ in viewsIndices]
classifiersDecisions = np.zeros((nbViews, nbClassifiers, nbFolds, foldsLen))
for resultMonoview in resultsMonoview:
if resultMonoview[1][0] in classifiersNames[viewsIndices.index(resultMonoview[0])]:
pass
else:
classifiersNames[viewsIndices.index(resultMonoview[0])].append(resultMonoview[1][0])
classifierIndex = classifiersNames[viewsIndices.index(resultMonoview[0])].index(resultMonoview[1][0])
classifiersDecisions[viewsIndices.index(resultMonoview[0]), classifierIndex] = resultMonoview[1][6]
return classifiersDecisions, classifiersNames
def couple_div_measure(allClassifersNames, viewsIndices, resultsMonoview, measurement):
classifiersDecisions, classifiersNames = getClassifiersDecisions(allClassifersNames,
viewsIndices,
resultsMonoview)
foldsLen = len(resultsMonoview[0][1][6][0])
nbViews = len(viewsIndices)
nbClassifiers = len(allClassifersNames)
combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews)
nbCombinations = math.factorial(nbClassifiers+nbViews-1) / math.factorial(nbViews) / math.factorial(nbClassifiers-1)
div_measure = np.zeros(nbCombinations)
combis = np.zeros((nbCombinations, nbViews), dtype=int)
for combinationsIndex, combination in enumerate(combinations):
combis[combinationsIndex] = combination
combiWithView = [(viewIndex,combiIndex) for viewIndex, combiIndex in enumerate(combination)]
binomes = itertools.combinations(combiWithView, 2)
nbBinomes = math.factorial(nbViews) / 2 / math.factorial(nbViews-2)
disagreement = np.zeros(nbBinomes)
for binomeIndex, binome in enumerate(binomes):
(viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome
nbDisagree = np.sum(measurement(classifiersDecisions[viewIndex1, classifierIndex1],
classifiersDecisions[viewIndex2, classifierIndex2])
, axis=1)/float(foldsLen)
disagreement[binomeIndex] = np.mean(nbDisagree)
div_measure[combinationsIndex] = np.mean(disagreement)
bestCombiIndex = np.argmax(div_measure)
bestCombination = combis[bestCombiIndex]
return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)], div_measure[bestCombiIndex]
def getFoldsGroundTruth(directory):
foldsFilesNames = os.listdir(directory+"folds/")
folds
for fileName in os.listdir(directory+"folds/"):
foldIndex = int(fileName[-5])
def getArgs(args, benchmark, views, viewsIndices, randomState,
directory, resultsMonoview, classificationIndices, measurement, name):
foldsGroundTruth = getFoldsGroundTruth(directory)
monoviewClassifierModulesNames = benchmark["Monoview"]
classifiersNames, div_measure = couple_div_measure(monoviewClassifierModulesNames,
viewsIndices, resultsMonoview, measurement)
multiclass_preds = [monoviewResult[1][5] for monoviewResult in resultsMonoview]
if isBiclass(multiclass_preds):
monoviewDecisions = np.array([monoviewResult[1][3] for monoviewResult in resultsMonoview
if classifiersNames[viewsIndices.index(monoviewResult[0])] ==
monoviewResult[1][0]])
else:
monoviewDecisions = np.array(
[genMulticlassMonoviewDecision(monoviewResult, classificationIndices) for monoviewResult in
resultsMonoview if classifiersNames[viewsIndices.index(monoviewResult[0])] == monoviewResult[1][0]])
argumentsList = []
arguments = {"CL_type": name,
"views": views,
"NB_VIEW": len(views),
"viewsIndices": viewsIndices,
"NB_CLASS": len(args.CL_classes),
"LABELS_NAMES": args.CL_classes,
name+"KWARGS": {
"weights": args.DGF_weights,
"classifiersNames": classifiersNames,
"monoviewDecisions": monoviewDecisions,
"nbCLass":len(args.CL_classes),
"div_measure":div_measure
}
}
argumentsList.append(arguments)
return argumentsList
def genParamsSets(classificationKWARGS, randomState, nIter=1):
"""Used to generate parameters sets for the random hyper parameters optimization function"""
weights = [randomState.random_sample(len(classificationKWARGS["classifiersNames"])) for _ in range(nIter)]
nomralizedWeights = [[weightVector/np.sum(weightVector)] for weightVector in weights]
return nomralizedWeights
class DiversityFusionClass:
def __init__(self, randomState, NB_CORES=1, **kwargs):
if kwargs["weights"] == []:
self.weights = [1.0/len(kwargs["classifiersNames"]) for _ in range(len(kwargs["classifiersNames"]))]
else:
self.weights = np.array(kwargs["weights"])/np.sum(np.array(kwargs["weights"]))
self.monoviewDecisions = kwargs["monoviewDecisions"]
self.classifiersNames = kwargs["classifiersNames"]
self.nbClass = kwargs["nbCLass"]
self.div_measure = kwargs["div_measure"]
def setParams(self, paramsSet):
self.weights = paramsSet[0]
def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]):
pass
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if usedIndices is None:
usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
votes = np.zeros((len(usedIndices), self.nbClass), dtype=float)
for usedIndex, exampleIndex in enumerate(usedIndices):
for monoviewDecisionIndex, monoviewDecision in enumerate(self.monoviewDecisions):
votes[usedIndex, monoviewDecision[exampleIndex]] += self.weights[monoviewDecisionIndex]
predictedLabels = np.argmax(votes, axis=1)
return predictedLabels
def predict_probas_hdf5(self, DATASET, usedIndices=None):
pass
def getConfigString(self, classificationKWARGS):
return "weights : "+", ".join(map(str, list(self.weights)))
def getSpecificAnalysis(self, classificationKWARGS):
stringAnalysis = "Classifiers used for each view : " + ', '.join(self.classifiersNames)
return stringAnalysis
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment