diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 5e3ff07f7369adfa88ce9007d1fce5a8621cbebe..7553e6b81dd5c645c11f7dec47790c4026694f33 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -216,13 +216,13 @@ def lateFusionSetArgs(views, viewsIndices, classes, method, def initMultiviewArguments(args, benchmark, views, viewsIndices, scores, classifiersConfigs, classifiersNames, - NB_VIEW, metrics, argumentDictionaries): + NB_VIEW, metrics, argumentDictionaries, randomState): multiviewArguments = [] if "Multiview" in benchmark: for multiviewAlgoName in benchmark["Multiview"]: multiviewPackage = getattr(Multiview, multiviewAlgoName) mutliviewModule = getattr(multiviewPackage, multiviewAlgoName) - multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices) + multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState) argumentDictionaries["Multiview"] = multiviewArguments return argumentDictionaries @@ -494,7 +494,7 @@ else: monoviewTime = time.time() - dataBaseTime - start argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, scores, classifiersConfigs, - classifiersNames, NB_VIEW, metrics[0], argumentDictionaries) + classifiersNames, NB_VIEW, metrics[0], argumentDictionaries, randomState) if nbCores > 1: resultsMultiview = [] @@ -503,14 +503,14 @@ if nbCores > 1: resultsMultiview += Parallel(n_jobs=nbCores)( delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, args.CL_split, args.CL_nbFolds, args.type, args.pathF, - LABELS_DICTIONARY, statsIter, hyperParamSearch=hyperParamSearch, + LABELS_DICTIONARY, statsIter, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) else: resultsMultiview = [ ExecMultiview(directory, DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, - LABELS_DICTIONARY, statsIter, hyperParamSearch=hyperParamSearch, + LABELS_DICTIONARY, statsIter, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Multiview"]] multiviewTime = time.time() - monoviewTime - dataBaseTime - start @@ -523,7 +523,6 @@ labels = np.array( resultsMultiview]).transpose() trueLabels = DATASET.get("Labels").value times = [dataBaseTime, monoviewTime, multiviewTime] -# times=[] results = (resultsMonoview, resultsMultiview) analyzeLabels(labels, trueLabels, results, directory) logging.debug("Start:\t Analyze Global Results") diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 77e14c2006c0b1b999359387b8a6f14ecfc8de2f..153b0b6a0a58897a97855c4ba0cab2a0a83caa0e 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -7,27 +7,26 @@ sys.path.append( from Multiview import * import GetMultiviewDb as DB -import argparse import os import logging import time import h5py -from utils.Dataset import getShape -from utils.HyperParameterSearch import searchBestSettings +from ..utils.Dataset import getShape +from ..utils.HyperParameterSearch import searchBestSettings # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, statsIter, +def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, statsIter, randomState, hyperParamSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): DATASET = h5py.File(path+name+str(coreIndex)+".hdf5", "r") - return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, statsIter, + return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, statsIter, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **arguments) -def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, statsIter, +def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, statsIter, randomState, hyperParamSearch=False, metrics=None, nIter=30, **kwargs): datasetLength = DATASET.get("Metadata").attrs["datasetLength"] @@ -36,7 +35,7 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data viewsIndices = kwargs["viewsIndices"] NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] if not metrics: - metrics = [["accuracy_score", None]] + metrics = [["f1_score", None]] metric = metrics[0] CL_type = kwargs["CL_type"] LABELS_NAMES = kwargs["LABELS_NAMES"] @@ -63,14 +62,14 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data analysisModule = getattr(classifierPackage, "analyzeResults") logging.info("Start:\t Determine validation split for ratio " + str(learningRate)) - iValidationIndices = [DB.splitDataset(DATASET, learningRate, datasetLength) for iterIndex in range(statsIter)] + iValidationIndices = [DB.splitDataset(DATASET, learningRate, datasetLength, randomState) for _ in range(statsIter)] iLearningIndices = [[index for index in range(datasetLength) if index not in validationIndices] for validationIndices in iValidationIndices] iClassificationSetLength = [len(learningIndices) for learningIndices in iLearningIndices] logging.info("Done:\t Determine validation split") logging.info("Start:\t Determine "+str(nbFolds)+" folds") if nbFolds != 1: - iKFolds = [DB.getKFoldIndices(nbFolds, DATASET.get("Labels")[...], NB_CLASS, learningIndices) for learningIndices in iLearningIndices] + iKFolds = [DB.getKFoldIndices(nbFolds, DATASET.get("Labels")[...], NB_CLASS, learningIndices, randomState) for learningIndices in iLearningIndices] else: iKFolds = [[[], range(classificationSetLength)] for classificationSetLength in iClassificationSetLength] @@ -84,10 +83,9 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data # logging.info("Start:\t Classification") # Begin Classification if hyperParamSearch != "None": - classifier = searchBestSettings(DATASET, CL_type, metrics, iLearningIndices, iKFolds, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, nIter=nIter, **classificationKWARGS) + classifier = searchBestSettings(DATASET, CL_type, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, nIter=nIter, **classificationKWARGS) else: classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) - # classifier.setParams(classificationKWARGS) for _ in range(statsIter): classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices) trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices) @@ -112,7 +110,7 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data LABELS_DICTIONARY, views, nbCores, times, name, nbFolds, ivalidationIndices, hyperParamSearch, nIter, metrics, statsIter, - viewsIndices) + viewsIndices, randomState) labelsSet = set(LABELS_DICTIONARY.values()) logging.info(stringAnalysis) featureString = "-".join(views) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index 1f30cb63d676e45e526e9a90c5f6fd3a3a2bfd57..11a58af63d5656621c71fe866bed8981df3bc693 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -95,12 +95,12 @@ def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices= return monoviewData -def genParamsSets(classificationKWARGS, nIter=1): +def genParamsSets(classificationKWARGS, randomState, nIter=1): fusionTypeName = classificationKWARGS["fusionType"] fusionTypePackage = globals()[fusionTypeName+"Package"] fusionMethodModuleName = classificationKWARGS["fusionMethod"] fusionMethodModule = getattr(fusionTypePackage, fusionMethodModuleName) - fusionMethodConfig = fusionMethodModule.genParamsSets(classificationKWARGS, nIter=nIter) + fusionMethodConfig = fusionMethodModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter) return fusionMethodConfig @@ -134,7 +134,7 @@ def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices class Fusion: - def __init__(self, NB_CORES=1,**kwargs): + def __init__(self, randomState, NB_CORES=1, **kwargs): fusionType = kwargs['fusionType'] fusionMethod = kwargs['fusionMethod'] fusionTypePackage = globals()[fusionType+"Package"] @@ -142,7 +142,7 @@ class Fusion: fusionMethodClass = getattr(fusionMethodModule, fusionMethod) nbCores = NB_CORES classifierKWARGS = dict((key, value) for key, value in kwargs.iteritems() if key not in ['fusionType', 'fusionMethod']) - self.classifier = fusionMethodClass(NB_CORES=nbCores, **classifierKWARGS) + self.classifier = fusionMethodClass(randomState, NB_CORES=nbCores, **classifierKWARGS) def setParams(self, paramsSet): self.classifier.setParams(paramsSet) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py index cb17c4c70d63e82de95222bc82f1af5892874af0..1ba7f1b8eb187534234aaead7a375764508ff1b5 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py @@ -6,7 +6,7 @@ from utils.Dataset import getV class EarlyFusionClassifier(object): - def __init__(self, monoviewClassifierName, monoviewClassifierConfig, NB_CORES=1): + def __init__(self, randomState, monoviewClassifierName, monoviewClassifierConfig, NB_CORES=1): self.monoviewClassifierName = monoviewClassifierName if type(monoviewClassifierConfig)==dict: pass @@ -18,6 +18,7 @@ class EarlyFusionClassifier(object): self.monoviewClassifier = None self.nbCores = NB_CORES self.monoviewData = None + self.randomState = randomState def makeMonoviewData_hdf5(self, DATASET, weights=None, usedIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py index 624ed6f762a099dfc809cf413072820b56b89786..ff72555fc9010c827fc5561b1f06a4c35183461b 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -4,11 +4,11 @@ import numpy as np from sklearn.metrics import accuracy_score -def genParamsSets(classificationKWARGS, nIter=1): +def genParamsSets(classificationKWARGS, randomState, nIter=1): nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): - randomWeightsArray = np.random.random_sample(nbView) + randomWeightsArray = randomState.random_sample(nbView) normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) paramsSets.append([normalizedArray]) return paramsSets @@ -57,8 +57,8 @@ def getArgs(args, views, viewsIndices): class WeightedLinear(EarlyFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - EarlyFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + def __init__(self, randomState, NB_CORES=1, **kwargs): + EarlyFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], NB_CORES=NB_CORES) if kwargs['fusionMethodConfig'][0]==None: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index 9b616adf253e4d70b4a1bfbc5e7b9c709add7b4f..dfebc0c2a8fa806b49574d39547418bc11da2537 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -23,14 +23,13 @@ def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needPr def getAccuracies(LateFusionClassifiers): return "" + def Intersect(resMono): pass - - class LateFusionClassifier(object): - def __init__(self, monoviewClassifiersNames, monoviewClassifiersConfigs, monoviewSelection, NB_CORES=1): + def __init__(self, randomState, monoviewClassifiersNames, monoviewClassifiersConfigs, monoviewSelection, NB_CORES=1): self.monoviewClassifiersNames = monoviewClassifiersNames self.monoviewClassifiersConfigs = monoviewClassifiersConfigs self.monoviewClassifiers = [] @@ -38,6 +37,7 @@ class LateFusionClassifier(object): self.accuracies = np.zeros(len(monoviewClassifiersNames)) self.needProbas = False self.monoviewSelection = monoviewSelection + self.randomState = randomState def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py index 9b308c51c74a945880ceffac3f9e1b37a23744fb..6ddcfb54bae48b3def6ba59dd4028ea00770a2c6 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -7,11 +7,11 @@ import MonoviewClassifiers from ...Methods.LateFusion import LateFusionClassifier -def genParamsSets(classificationKWARGS, nIter=1): +def genParamsSets(classificationKWARGS, randomState, nIter=1): nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): - randomWeightsArray = np.random.random_sample(nbView) + randomWeightsArray = randomState.random_sample(nbView) normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) paramsSets.append([normalizedArray]) return paramsSets @@ -78,8 +78,8 @@ def getArgs(args, views, viewsIndices): class BayesianInference(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + def __init__(self, randomState, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) # self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py index 086c64050b04f154c7c43ab2ab54316017cba3f1..7f453270271a6dcd45bc85be6dbbb650e8a5d290 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -5,11 +5,11 @@ from sklearn.metrics import accuracy_score from utils.Dataset import getV -def genParamsSets(classificationKWARGS, nIter=1): +def genParamsSets(classificationKWARGS, randomState, nIter=1): nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): - randomWeightsArray = np.random.random_sample(nbView) + randomWeightsArray = randomState.random_sample(nbView) normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) paramsSets.append([normalizedArray]) return paramsSets @@ -58,8 +58,8 @@ def getArgs(args, views, viewsIndices): class MajorityVoting(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + def __init__(self, randomState, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig'][0]==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index e91a87af8bc6f64c4e8725c8bd8d3e4633d96cdb..f5218f495d1b84b28574131a7af7ee8e95e65b53 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -14,14 +14,14 @@ from sklearn.metrics import accuracy_score import itertools -def genParamsSets(classificationKWARGS, nIter=1): +def genParamsSets(classificationKWARGS, randomState, nIter=1): nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): - max_attributes = random.randint(1, 20) - p = random.random() - model = random.choice(["conjunction", "disjunction"]) - order = random.randint(1,nbView) + max_attributes = randomState.randint(1, 20) + p = randomState.random() + model = randomState.choice(["conjunction", "disjunction"]) + order = randomState.randint(1,nbView) paramsSets.append([p, max_attributes, model, order]) return paramsSets @@ -72,8 +72,8 @@ def getArgs(args, views, viewsIndices): class SCMForLinear(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + def __init__(self, randomState, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) self.SCMClassifier = None # self.config = kwargs['fusionMethodConfig'][0] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py index 1f2f1ad5bb64a2098067a1b81b62554f99346652..fa9ee8807eafb7fb8ad85520b8a8aee933a58023 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -6,7 +6,7 @@ from sklearn.svm import SVC from utils.Dataset import getV -def genParamsSets(classificationKWARGS, nIter=1): +def genParamsSets(classificationKWARGS, randomState, nIter=1): nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): @@ -38,8 +38,8 @@ def getArgs(args, views, viewsIndices): return [arguments] class SVMForLinear(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + def __init__(self, randomState, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) self.SVMClassifier = None diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index e81ebb84768eb98afe86a01b821c1a8a1aa499af..e880a773ef1dcc66fcea117c2b84d08880fb0619 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -5,11 +5,11 @@ from sklearn.metrics import accuracy_score from utils.Dataset import getV -def genParamsSets(classificationKWARGS, nIter=1): +def genParamsSets(classificationKWARGS, randomState, nIter=1): nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): - randomWeightsArray = np.random.random_sample(nbView) + randomWeightsArray = randomState.random_sample(nbView) normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) paramsSets.append([normalizedArray]) return paramsSets @@ -36,30 +36,10 @@ def getArgs(args, views, viewsIndices): "nbView": (len(viewsIndices))}} return [arguments] -# def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): -# if type(viewsIndices)==type(None): -# viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) -# nbView = len(viewsIndices) -# bestScore = 0.0 -# bestConfig = None -# if classificationKWARGS["fusionMethodConfig"][0] is not None: -# for i in range(nIter): -# randomWeightsArray = np.random.random_sample(nbView) -# normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) -# classificationKWARGS["fusionMethodConfig"][0] = normalizedArray -# classifier = WeightedLinear(1, **classificationKWARGS) -# classifier.fit_hdf5(DATASET, trainIndices, viewsIndices=viewsIndices) -# predictedLabels = classifier.predict_hdf5(DATASET, trainIndices, viewsIndices=viewsIndices) -# accuracy = accuracy_score(DATASET.get("Labels")[trainIndices], predictedLabels) -# if accuracy > bestScore: -# bestScore = accuracy -# bestConfig = normalizedArray -# return [bestConfig] - class WeightedLinear(LateFusionClassifier): - def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + def __init__(self, randomState, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig'][0]==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py index 874452b2a8d54aca9d0a0cf468c58b33ac097dec..6906723fb15730b650330196f8649ece801f6a82 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py @@ -68,7 +68,7 @@ def getMetricsScores(metrics, trainLabelsIterations, testLabelsIterations, def execute(classifiersIterations, trainLabelsIterations,testLabelsIterations, DATASET, classificationKWARGS, learningRate, LABELS_DICTIONARY,views, nbCores, times, name, nbFolds, ivalidationIndices, - gridSearch, nIter, metrics, statsIter, viewsIndices): + gridSearch, nIter, metrics, statsIter, viewsIndices, randomState): CLASS_LABELS = DATASET.get("Labels").value @@ -112,7 +112,7 @@ def execute(classifiersIterations, trainLabelsIterations,testLabelsIterations, D scoreOnTrain = np.mean(np.array([metricModule.score(CLASS_LABELS[learningIndices[statsIterIndex]], trainLabelsIterations[statsIterIndex], **metricKWARGS) for statsIterIndex in range(statsIter)])) scoreOnTest = np.mean(np.array([metricModule.score(CLASS_LABELS[ivalidationIndices[statsIterIndex]], testLabelsIterations[statsIterIndex], **metricKWARGS) for statsIterIndex in range(statsIter)])) fusionConfiguration = classifiersIterations[0].classifier.getConfig(fusionMethodConfig,monoviewClassifiersNames, monoviewClassifiersConfigs) - stringAnalysis = "\t\tResult for Multiview classification with "+ fusionType + \ + stringAnalysis = "\t\tResult for Multiview classification with "+ fusionType + "and random state : "+str(randomState)+\ "\n\nAverage "+metrics[0][0]+" :\n\t-On Train : " + str(scoreOnTrain) + "\n\t-On Test : " + str(scoreOnTest) + \ "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ ', '.join(LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join(views) + "\n\t-" + str(nbFolds) + \ diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py index 0cc9ceaa5543e9f6ca7f4ea07b8668ec4c031791..a2aacd89feb124fa6062db888f55ed0ab436000c 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py @@ -33,7 +33,7 @@ def getConfig(classifierConfig): return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' ' -def gridSearch(data, labels, metric="accuracy_score"): +def gridSearch(data, labels, randomState, metric="accuracy_score"): minSubSampling = 1.0/(len(labels)/2) bestSettings = [] bestResults = [] @@ -51,7 +51,7 @@ def gridSearch(data, labels, metric="accuracy_score"): # baseScore = 1000.0 # isBetter = "lower" for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.05) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.05, randomState) classifier.fit(subSampledData, subSampledLabels) prediction = classifier.predict(data) preliminary_accuracies[i] = accuracy_score(labels, prediction) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/SubSampling.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/SubSampling.py index 4dcd4ffeb747744687b85f728b1c77c668606ea9..b3ffc382de1435ab667b2f0d78b345401c09daf1 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/SubSampling.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/SubSampling.py @@ -1,5 +1,4 @@ import numpy as np -import random def getLabelSupports(CLASS_LABELS): labels = set(CLASS_LABELS) @@ -15,7 +14,7 @@ def isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict): return False, nbTrainingExamples -def subSample(data, labels, subSampling, weights=None): +def subSample(data, labels, subSampling, randomState, weights=None): if weights is None: weights = np.ones(len(labels))/len(labels) nbExamples = len(labels) @@ -26,7 +25,7 @@ def subSample(data, labels, subSampling, weights=None): trainingExamplesIndices = [] usedIndices = [] while nbTrainingExamples != [0 for i in range(len(labelSupports))]: - index = int(random.randint(0, nbExamples - 1)) + index = int(randomState.randint(0, nbExamples - 1)) isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, labels, labelDict) if isUseFull and index not in usedIndices: trainingExamplesIndices.append(index) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index 7763eba33ae37541689c4b69821ef651ad984632..6e62ad341cb43b533dd8491d920bee8fe8ed336e 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -81,7 +81,7 @@ def trainWeakClassifier_hdf5(classifierName, monoviewDataset, CLASS_LABELS, DATA return classifier, classes, isBad, averageAccuracy -def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, metric=None, nIter=None): +def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, randomState, metric=None, nIter=None): classifiersNames = classificationKWARGS["classifiersNames"] bestSettings = [] for classifierIndex, classifierName in enumerate(classifiersNames): @@ -96,7 +96,7 @@ def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, class Mumbo: - def __init__(self, NB_CORES=1, **kwargs): + def __init__(self, randomState, NB_CORES=1, **kwargs): self.maxIter = kwargs["maxIter"] self.minIter = kwargs["minIter"] self.threshold = kwargs["threshold"] @@ -112,6 +112,7 @@ class Mumbo: self.bestViews = np.zeros(self.maxIter, dtype=int)-1 self.averageAccuracies = np.zeros((self.maxIter, nbView)) self.iterAccuracies = np.zeros(self.maxIter) + self.randomState = randomState def initDataDependant(self, datasetLength, nbView, nbClass, labels): self.edges = np.zeros((self.maxIter, nbView)) @@ -243,7 +244,6 @@ class Mumbo: predictedProbas = [] return predictedProbas - def trainWeakClassifiers(self, DATASET, CLASS_LABELS, NB_CLASS, DATASET_LENGTH, NB_VIEW): trainedClassifiers = [] labelsMatrix = [] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py index 0e3433924cfb473b4b91cde49233dfb3066f6907..fbbfba1ad3d982890c25ac58ce520a6be825d8ee 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py @@ -194,7 +194,6 @@ def getReport(classifiersIterations, CLASS_LABELS, iterationValidationIndices, D scoresOnTestByIter) - def getClassificationReport(kFolds, kFoldClassifier, CLASS_LABELS, validationIndices, DATASET, kFoldPredictedTrainLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels,statsIter, viewIndices): nbView = len(viewIndices) @@ -344,6 +343,7 @@ def getClassificationReport(kFolds, kFoldClassifier, CLASS_LABELS, validationInd kFoldBestViewsStatsM, kFoldAccuracyOnTrainByIterM, kFoldAccuracyOnTestByIterM, kFoldAccuracyOnValidationByIterM, kFoldBestViewsM) + def iterRelevant(iterIndex, kFoldClassifierStats): relevants = np.zeros(len(kFoldClassifierStats[0]), dtype=bool) for statsIterIndex, kFoldClassifier in enumerate(kFoldClassifierStats): @@ -420,7 +420,7 @@ def getMeanIterations(kFoldClassifierStats, foldIndex): def execute(classifiersIterations, trainLabelsIterations,testLabelsIterations, DATASET,initKWARGS, LEARNING_RATE,LABELS_DICTIONARY,views, NB_CORES, times,databaseName, nbFolds, validationIndices, gridSearch, - nIter, metrics, statsIter,viewIndices): + nIter, metrics, statsIter,viewIndices, randomState): CLASS_LABELS = DATASET.get("Labels")[...] @@ -432,7 +432,7 @@ def execute(classifiersIterations, trainLabelsIterations,testLabelsIterations, D scoresOnTestByIter) = getReport(classifiersIterations, CLASS_LABELS, validationIndices, DATASET, trainLabelsIterations, testLabelsIterations, statsIter, viewIndices, metrics[0]) - stringAnalysis = "\t\tResult for Multiview classification with Mumbo" \ + stringAnalysis = "\t\tResult for Multiview classification with Mumbo with random state : "+str(randomState) + \ "\n\nAverage "+metrics[0][0]+" :\n\t-On Train : " + str(totalScoreOnTrain) + "\n\t-On Test : " + \ str(totalScoreOnTest) stringAnalysis += dbConfigurationString diff --git a/Code/MonoMutliViewClassifiers/utils/Dataset.py b/Code/MonoMutliViewClassifiers/utils/Dataset.py index 6b63b874d6bd381b8d6ff393188efe4ab8a103ef..23dca4b27cf75e634eed2026b7605fa0c2e3d7ec 100644 --- a/Code/MonoMutliViewClassifiers/utils/Dataset.py +++ b/Code/MonoMutliViewClassifiers/utils/Dataset.py @@ -33,6 +33,7 @@ def getValue(DATASET): shape=DATASET.attrs["shape"]) return sparse_mat + def extractSubset(matrix, usedIndices): if sparse.issparse(matrix): newIndptr = np.zeros(len(usedIndices)+1, dtype=int) diff --git a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py index dec085810224d795f65906dc5205568e9319839d..932aa04e7297a6fdc66a809fc4f9f20554d512ff 100644 --- a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py +++ b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py @@ -4,12 +4,12 @@ import sys import Multiview import Metrics -def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, viewsIndices=None, searchingTool="hyperParamSearch", nIter=1, **kwargs): +def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None, searchingTool="hyperParamSearch", nIter=1, **kwargs): if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) thismodule = sys.modules[__name__] searchingToolMethod = getattr(thismodule, searchingTool) - bestSettings = searchingToolMethod(dataset, classifierName, metrics, iLearningIndices, iKFolds, viewsIndices=viewsIndices, nIter=nIter, **kwargs) + bestSettings = searchingToolMethod(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=viewsIndices, nIter=nIter, **kwargs) return bestSettings # or well set clasifier ? @@ -18,7 +18,7 @@ def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, pass -def randomizedSearch(dataset, classifierName, metrics, iLearningIndices, iKFolds, viewsIndices=None, nIter=1, nbCores=1, **classificationKWARGS): +def randomizedSearch(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None, nIter=1, nbCores=1, **classificationKWARGS): if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) metric = metrics[0] @@ -33,40 +33,41 @@ def randomizedSearch(dataset, classifierName, metrics, iLearningIndices, iKFolds statsIter = len(iLearningIndices) if classifierName != "Mumbo": datasetLength = dataset.get("Metadata").attrs["datasetLength"] - paramsSets = classifierModule.genParamsSets(classificationKWARGS, nIter=nIter) + paramsSets = classifierModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter) + if metricModule.getConfig()[-14]=="h": + baseScore = -1000.0 + isBetter = "higher" + else: + baseScore = 1000.0 + isBetter = "lower" + bestSettings = None for paramsSet in paramsSets: - if metricModule.getConfig()[-14]=="h": - baseScore = -1000.0 - isBetter = "higher" - else: - baseScore = 1000.0 - isBetter = "lower" scores = [] for statsIterIndex in range(statsIter): for fold in iKFolds[statsIterIndex]: fold.sort() trainIndices = [index for index in range(datasetLength) if (index not in fold) and (index in iLearningIndices[statsIterIndex])] - classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) + classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) classifier.setParams(paramsSet) classifier.fit_hdf5(dataset, trainIndices=trainIndices, viewsIndices=viewsIndices) - trainLabels = classifier.predict_hdf5(dataset, usedIndices=trainIndices, viewsIndices=viewsIndices) + # trainLabels = classifier.predict_hdf5(dataset, usedIndices=trainIndices, viewsIndices=viewsIndices) testLabels = classifier.predict_hdf5(dataset, usedIndices=fold, viewsIndices=viewsIndices) - trainScore = metricModule.score(dataset.get("Labels").value[trainIndices], trainLabels) + # trainScore = metricModule.score(dataset.get("Labels").value[trainIndices], trainLabels) testScore = metricModule.score(dataset.get("Labels").value[fold], testLabels) scores.append(testScore) crossValScore = np.mean(np.array(scores)) - if isBetter=="higher" and crossValScore>baseScore: + if isBetter=="higher" and crossValScore > baseScore: baseScore = crossValScore bestSettings = paramsSet - elif isBetter=="lower" and crossValScore<baseScore: + elif isBetter=="lower" and crossValScore < baseScore: baseScore = crossValScore bestSettings = paramsSet classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) classifier.setParams(bestSettings) else: - bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, iLearningIndices[0], metric=metric, nIter=nIter) + bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, iLearningIndices[0], randomState, metric=metric, nIter=nIter) classificationKWARGS["classifiersConfigs"] = bestConfigs classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) diff --git a/Code/MonoMutliViewClassifiers/utils/Transformations.py b/Code/MonoMutliViewClassifiers/utils/Transformations.py index a28cccd4edf709b62e030381a781fc9854d77f81..026ff636e71fa5173e39184c29d2b0f855f68fe1 100644 --- a/Code/MonoMutliViewClassifiers/utils/Transformations.py +++ b/Code/MonoMutliViewClassifiers/utils/Transformations.py @@ -1,5 +1,6 @@ import numpy as np + def signLabels(labels): if set(labels) == (0,1): return np.array([label if label != 0 else -1 for label in labels])