diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index ddf891d5d0d48cfc139be7ab7c5115aa29a4e338..bdf29ce33bf88d6168d8ec411a3b7ac1659a0ea7 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -404,6 +404,7 @@ groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action= help='Determine which method to use to select the monoview classifiers', default="intersect") +start = time.time() args = parser.parse_args() os.nice(args.nice) @@ -412,7 +413,6 @@ statsIter = args.CL_statsiter randomState = np.random.RandomState(args.randomState) hyperParamSearch = args.CL_HPS_type -start = time.time() if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake", "Plausible", "KMultiOmic"]: getDatabase = getattr(DB, "getClassicDB" + args.type[1:]) @@ -475,7 +475,7 @@ if nbCores > 1: for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): resultsMonoview += (Parallel(n_jobs=nbCores)( delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, statsIter, randomState, + coreIndex, args.type, args.pathF, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) @@ -490,7 +490,7 @@ if nbCores > 1: else: resultsMonoview += ([ExecMonoview(directory, DATASET.get("View" + str(arguments["viewIndex"])), DATASET.get("Labels").value, args.name, labelsNames, - classificationIndices, kFolds, 1, args.type, args.pathF, statsIter, randomState, + classificationIndices, kFolds, 1, args.type, args.pathF, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Monoview"]]) @@ -511,14 +511,14 @@ if nbCores > 1: resultsMultiview += Parallel(n_jobs=nbCores)( delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, args.pathF, - LABELS_DICTIONARY, statsIter, randomState, hyperParamSearch=hyperParamSearch, + LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) else: resultsMultiview = [ ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF, - LABELS_DICTIONARY, statsIter, randomState, hyperParamSearch=hyperParamSearch, + LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Multiview"]] multiviewTime = time.time() - monoviewTime - dataBaseTime - start @@ -536,3 +536,10 @@ analyzeLabels(labels, trueLabels, results, directory) logging.debug("Start:\t Analyze Global Results") resultAnalysis(benchmark, results, args.name, times, metrics, directory) logging.debug("Done:\t Analyze Global Results") +globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime +totalTime = time.time() - start +logging.info("Extraction time : "+str(dataBaseTime)+ + "s, Monoview time : "+str(monoviewTime)+ + "s, Multiview Time : "+str(multiviewTime)+ + "s, Global Analysis Time : "+str(globalAnalysisTime)+ + "s, Total Duration : "+str(totalTime)+"s") \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/FeatExtraction/FeatExtraction.py b/Code/MonoMutliViewClassifiers/FeatExtraction/FeatExtraction.py index 34154cc9d6512e4e1a22494db0e591f69000d246..3859414838a8d483f498259bba520ec22d8f23ea 100644 --- a/Code/MonoMutliViewClassifiers/FeatExtraction/FeatExtraction.py +++ b/Code/MonoMutliViewClassifiers/FeatExtraction/FeatExtraction.py @@ -218,7 +218,7 @@ def calcSURFSIFTDescriptors(dfImages, boolSIFT): elif(float(i)/float(len(npImages))>0.5 and bool_Progress==False): logging.debug(feat + "50% of images processed (Keypoints)") bool_Progress = None - elif(float(i)/float(len(npImages))>0.75 and bool_Progress==None): + elif float(i)/float(len(npImages))>0.75 and bool_Progress==None: logging.debug(feat + "75% of images processed (Keypoints)") bool_Progress = NotImplemented diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index a22e478f22d491f18ab5e96b7839a9daf63852df..5ee0407db3defc376103ef8f0d93bb03ae5d07cb 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -30,7 +30,7 @@ __status__ = "Prototype" # Production, Development, Prototype __date__ = 2016-03-25 -def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, path, statsIter, randomState, hyperParamSearch="randomizedSearch", +def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, path, randomState, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): DATASET = h5py.File(path+name+str(datasetFileIndex)+".hdf5", "r") kwargs = args["args"] @@ -38,11 +38,11 @@ def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, neededViewIndex = views.index(kwargs["feat"]) X = DATASET.get("View"+str(neededViewIndex)) Y = DATASET.get("Labels").value - return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path, statsIter, randomState, hyperParamSearch=hyperParamSearch, + return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **args) -def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, statsIter, randomState, hyperParamSearch="randomizedSearch", +def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, randomState, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): logging.debug("Start:\t Loading data") try: @@ -115,7 +115,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol stringAnalysis, imagesAnalysis, metricsScores = execute(name, classificationIndices, KFolds, nbCores, hyperParamSearch, metrics, nIter, feat, CL_type, clKWARGS, labelsNames, X.shape, - y_train, y_train_pred, y_test, y_test_pred, t_end, statsIter, randomState) + y_train, y_train_pred, y_test, y_test_pred, t_end, randomState) cl_desc = [value for key, value in sorted(clKWARGS.iteritems())] logging.debug("Done:\t Getting Results") logging.info(stringAnalysis) @@ -151,7 +151,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.info("Done:\t Result Analysis") viewIndex = args["viewIndex"] - return viewIndex, [CL_type, cl_desc+[feat], metricsScores, full_labels, cl_res] + return viewIndex, [CL_type, cl_desc+[feat], metricsScores, full_labels, clKWARGS] # # Classification Report with Precision, Recall, F1 , Support # logging.debug("Info:\t Classification report:") diff --git a/Code/MonoMutliViewClassifiers/Monoview/analyzeResult.py b/Code/MonoMutliViewClassifiers/Monoview/analyzeResult.py index d495a776ec2006f55cd492f605fe1dc7d9a6e87f..ea81871c29810df885a38df9512c08acfa7240b3 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/analyzeResult.py +++ b/Code/MonoMutliViewClassifiers/Monoview/analyzeResult.py @@ -43,7 +43,7 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, feat, CL_type, clKWARGS, classLabelsNames, - shape, y_train, y_train_pred, y_test, y_test_pred, time, statsIter, randomState): + shape, y_train, y_train_pred, y_test, y_test_pred, time, randomState): metricsScores = {} metricModule = getattr(Metrics, metrics[0][0]) trainScore = metricModule.score(y_train, y_train_pred) @@ -52,7 +52,7 @@ def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, fea # val = np.mean(testScores) stdTrain = "nan" #np.std(trainScores) stdTest = "nan" #np.std(testScores) - stringAnalysis = "Classification on "+name+" database for "+feat+" with "+CL_type+", random state is "+str(randomState)+", and "+str(statsIter)+" statistical iterations\n\n" + stringAnalysis = "Classification on "+name+" database for "+feat+" with "+CL_type+", random state is "+str(randomState)+".\n\n" stringAnalysis += metrics[0][0]+" on train : "+str(trainScore)+", with STD : "+str(stdTrain)+"\n"+metrics[0][0]+" on test : "+str(testScore)+", with STD : "+str(stdTest)+"\n\n" stringAnalysis += getDBConfigString(name, feat, learningRate, shape, classLabelsNames, KFolds) stringAnalysis += getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS) diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 7e5a37dc3a290cacf7587c2fa9621b0d4bb71aa3..41e02a2baba2da22309cf25b897c3092fb6a6b76 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -21,28 +21,28 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, statsIter, randomState, +def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, randomState, hyperParamSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): DATASET = h5py.File(path+name+str(coreIndex)+".hdf5", "r") - return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, statsIter, randomState, + return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **arguments) -def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, LABELS_DICTIONARY, statsIter, randomState, +def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, LABELS_DICTIONARY, randomState, hyperParamSearch=False, metrics=None, nIter=30, **kwargs): - datasetLength = DATASET.get("Metadata").attrs["datasetLength"] - NB_VIEW = kwargs["NB_VIEW"] + # datasetLength = DATASET.get("Metadata").attrs["datasetLength"] + # NB_VIEW = kwargs["NB_VIEW"] views = kwargs["views"] viewsIndices = kwargs["viewsIndices"] - NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] + # NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] if not metrics: metrics = [["f1_score", None]] - metric = metrics[0] + # metric = metrics[0] CL_type = kwargs["CL_type"] - LABELS_NAMES = kwargs["LABELS_NAMES"] + # LABELS_NAMES = kwargs["LABELS_NAMES"] classificationKWARGS = kwargs[CL_type+"KWARGS"] - learningRate = len(classificationIndices[0])/(len(classificationIndices[0])+len(classificationIndices[1])) + learningRate = len(classificationIndices[0])/float((len(classificationIndices[0])+len(classificationIndices[1]))) t_start = time.time() logging.info("### Main Programm for Multiview Classification") logging.info("### Classification - Database : " + str(name) + " ; Views : " + ", ".join(views) + @@ -65,13 +65,13 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor classifierClass = getattr(classifierModule, CL_type) analysisModule = getattr(classifierPackage, "analyzeResults") - logging.info("Train ratio : " + str(learningRate)) + # logging.info("Train ratio : " + str(learningRate)) # iValidationIndices = [DB.splitDataset(DATASET, classificationIndices, datasetLength, randomState) for _ in range(statsIter)] # iLearningIndices = [[index for index in range(datasetLength) if index not in validationIndices] for validationIndices in iValidationIndices] # iClassificationSetLength = [len(learningIndices) for learningIndices in iLearningIndices] # logging.info("Done:\t Determine validation split") - logging.info("CV On " + str(KFolds.n_splits) + " folds") + # logging.info("CV On " + str(KFolds.n_splits) + " folds") # if KFolds != 1: # iKFolds = [DB.getKFoldIndices(KFolds, DATASET.get("Labels")[...], NB_CLASS, learningIndices, randomState) for learningIndices in iLearningIndices] # else: @@ -89,10 +89,15 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor if hyperParamSearch != "None": classifier = searchBestSettings(DATASET, CL_type, metrics, learningIndices, KFolds, randomState, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, nIter=nIter, **classificationKWARGS) else: - classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) + classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices) trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices) + # try: + # if "MajorityVoting" == classificationKWARGS["fusionMethod"]: + # import pdb; pdb.set_trace() + # except: + # pass testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices) fullLabels = classifier.predict_hdf5(DATASET, viewsIndices=viewsIndices) # trainLabelsIterations.append(trainLabels) @@ -113,7 +118,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor classificationKWARGS, classificationIndices, LABELS_DICTIONARY, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, statsIter, + hyperParamSearch, nIter, metrics, viewsIndices, randomState) labelsSet = set(LABELS_DICTIONARY.values()) logging.info(stringAnalysis) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index 6bfb7a3f209e4a9063b93a674a10a5a654d27f29..20ebc5e5a5e0dd2c32bbdcc700dfcb6af75c60aa 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -75,7 +75,7 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, directory, result fusionTypePackage = getattr(Methods, fusionType+"Package") for fusionMethod in benchmark["Multiview"]["Fusion"]["Methods"][fusionType]: fusionMethodModule = getattr(fusionTypePackage, fusionMethod) - arguments = fusionMethodModule.getArgs(args, views, viewsIndices, directory, resultsMonoview) + arguments = fusionMethodModule.getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview) argumentsList+= arguments return argumentsList @@ -86,7 +86,7 @@ def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices= if not usedIndices: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) NB_VIEW = len(viewsIndices) - if weights==None: + if weights is None: weights = np.array([1/NB_VIEW for i in range(NB_VIEW)]) if sum(weights)!=1: weights = weights/sum(weights) @@ -168,7 +168,7 @@ class Fusion: # return fusionType, fusionMethod, classifier def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) @@ -176,7 +176,7 @@ class Fusion: return predictedLabels def predict_probas_hdf5(self, DATASET, usedIndices=None): - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if usedIndices: predictedLabels = self.classifier.predict_probas_hdf5(DATASET, usedIndices=usedIndices) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py index e72b0e3e9dbaa2aef6e550009f102357d14810c1..f2527a9e50a55b510f2695109b494111be581eea 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -18,13 +18,12 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): argumentsList = [] if args.FU_E_cl_names != ['']: pass else: - monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) - if (not isPackage)] + monoviewClassifierModulesNames = benchmark["Monoview"] args.FU_E_cl_names = monoviewClassifierModulesNames args.FU_E_cl_config = [None for _ in monoviewClassifierModulesNames] for classifierName, classifierConfig in zip(args.FU_E_cl_names, args.FU_E_cl_config): @@ -85,7 +84,7 @@ class WeightedLinear(EarlyFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): EarlyFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig']==None: + if kwargs['fusionMethodConfig'] is None: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) elif kwargs['fusionMethodConfig']==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) @@ -112,7 +111,7 @@ class WeightedLinear(EarlyFusionClassifier): if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) self.weights = self.weights/float(np.sum(self.weights)) - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices, viewsIndices=viewsIndices) predictedLabels = self.monoviewClassifier.predict(self.monoviewData) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index fe204796f9afe84a8b02cbde144a74efd94523f9..690f94f55066f6ff6552984b638d7d8d7ecc7673 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -23,7 +23,7 @@ def canProbasClassifier(classifierConfig): def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needProbas, randomState): - if type(classifierConfig[0])==dict: + if type(classifierConfig) == dict: monoviewClassifier = getattr(MonoviewClassifiers, classifierName) if needProbas and not monoviewClassifier.canProbas(): monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") @@ -35,14 +35,14 @@ def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needPr enumerate(classifierConfig ))) return classifier - else: - if needProbas and not canProbasClassifier(classifierConfig): - monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") - DTConfig = {"0":300, "1":"entropy", "2":"random"} - classifier = monoviewClassifier.fit(data,labels, randomState,DTConfig) - return classifier - else: - return classifierConfig + # else: + # if needProbas and not canProbasClassifier(classifierConfig): + # monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") + # DTConfig = {"0":300, "1":"entropy", "2":"random"} + # classifier = monoviewClassifier.fit(data,labels, randomState,DTConfig) + # return classifier + # else: + # return classifierConfig @@ -122,12 +122,12 @@ def getClassifiers(selectionMethodName, allClassifiersNames, directory, viewsInd def getConfig(classifiersNames, resultsMonoview): - classifiers = [0 for _ in range(len(classifiersNames))] + classifiersConfigs = [0 for _ in range(len(classifiersNames))] for viewIndex, classifierName in enumerate(classifiersNames): for resultMonoview in resultsMonoview: if resultMonoview[0]==viewIndex and resultMonoview[1][0]==classifierName: - classifiers[viewIndex]=resultMonoview[1][4] - return classifiers + classifiersConfigs[viewIndex]=resultMonoview[1][4] + return classifiersConfigs def jambon(fromage): pass @@ -149,7 +149,7 @@ class LateFusionClassifier(object): def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - if trainIndices == None: + if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) # monoviewSelectionMethod = locals()[self.monoviewSelection] # self.monoviewClassifiers = monoviewSelectionMethod() diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py index f0df43a70b700a09bc414cdde8ab355dbc4e2a46..5cab66d33e33949d314769b67cb9a6f80d886e61 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -25,24 +25,23 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): # fusionMethodConfig = args.FU_method_config # return classifiersNames, classifiersConfig, fusionMethodConfig -def getArgs(args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): if args.FU_L_cl_names!=['']: - args.FU_L_select_monoview = "user_defined" + args.FU_L_select_monoview = "user_defined" else: - monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) - if (not isPackage)] + monoviewClassifierModulesNames = benchmark["Monoview"] args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) - for classifierName in args.FU_L_cl_names] + for classifierName in args.FU_L_cl_names] + if args.FU_L_cl_names==[""] and args.CL_type == ["Multiview"]: + raise AttributeError("You must perform Monoview classification or specify " + "which monoview classifier to use Late Fusion") if args.FU_L_cl_config != ['']: classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) - for monoviewClassifierModule,classifierConfig - in zip(monoviewClassifierModules,args.FU_L_cl_config)] + for monoviewClassifierModule,classifierConfig + in zip(monoviewClassifierModules,args.FU_L_cl_config)] else: classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) - if args.FU_L_cl_names==[""] and args.CL_type == ["Multiview"]: - raise AttributeError("You must perform Monoview classification or specify " - "which monoview classifier to use Late Fusion") arguments = {"CL_type": "Fusion", "views": views, "NB_VIEW": len(views), @@ -85,8 +84,8 @@ class BayesianInference(LateFusionClassifier): NB_CORES=NB_CORES) # self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) - if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: - self.weights = [1.0 for classifier in kwargs['classifiersNames']] + if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig']==['']: + self.weights = np.array([1.0 for classifier in kwargs['classifiersNames']]) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.needProbas = True @@ -95,13 +94,14 @@ class BayesianInference(LateFusionClassifier): self.weights = paramsSet[0] def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if viewsIndices is None: viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - self.weights = self.weights/float(max(self.weights)) + # self.weights /= float(max(self.weights)) nbView = len(viewsIndices) - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if sum(self.weights)!=1.0: + print self.weights self.weights = self.weights/sum(self.weights) viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py index 9a7fa6d465573bb2ea99089979154c86d5b17115..7171e8d042a6dd71214843fb31f5dad3eda0a5da 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -16,12 +16,11 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): if args.FU_L_cl_names!=['']: pass else: - monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) - if (not isPackage)] + monoviewClassifierModulesNames = benchmark["Monoview"] args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] @@ -71,24 +70,24 @@ class MajorityVoting(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: + if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig']==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) def setParams(self, paramsSet): - self.weights = paramsSet[0] + self.weights = np.array(paramsSet[0]) def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) - self.weights = self.weights/float(max(self.weights)) - if usedIndices == None: + self.weights /= float(sum(self.weights)) + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) datasetLength = len(usedIndices) - votes = np.zeros((datasetLength, DATASET.get("Metadata").attrs["nbClass"]), dtype=int) + votes = np.zeros((datasetLength, DATASET.get("Metadata").attrs["nbClass"]), dtype=float) monoViewDecisions = np.zeros((len(usedIndices),nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( @@ -112,7 +111,7 @@ class MajorityVoting(LateFusionClassifier): return predictedLabels def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with Majority Voting \n\t-With monoview classifiers : " + configString = "with Majority Voting \n\t-With weights : "+str(self.weights)+"\n\t-With monoview classifiers : " for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index 576a1550409efb5216726fd8de68637415f5ec51..a6144e3dd9e444c9051f01a0a20826e08a4da87c 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -27,12 +27,11 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): if args.FU_L_cl_names!=['']: pass else: - monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) - if (not isPackage)] + monoviewClassifierModulesNames =benchmark["Monoview"] args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] @@ -87,7 +86,7 @@ class SCMForLinear(LateFusionClassifier): NB_CORES=NB_CORES) self.SCMClassifier = None # self.config = kwargs['fusionMethodConfig'][0] - if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: + if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig']==['']: self.p = 1 self.maxAttributes = 5 self.order = 1 @@ -105,28 +104,27 @@ class SCMForLinear(LateFusionClassifier): self.modelType = paramsSet[2] def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if viewsIndices is None: viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - if trainIndices == None: + if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(self.monoviewClassifiersConfigs[0])==dict: - for index, viewIndex in enumerate(viewsIndices): - monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) - self.monoviewClassifiers.append( - monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), - DATASET.get("Labels")[trainIndices], - NB_CORES=self.nbCores, - **dict((str(configIndex), config) for configIndex, config in - enumerate(self.monoviewClassifiersConfigs[index])))) - else: - self.monoviewClassifiers = self.monoviewClassifiersConfigs + # if type(self.monoviewClassifiersConfigs[0])==dict: + for index, viewIndex in enumerate(viewsIndices): + monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) + self.monoviewClassifiers.append( + monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), + DATASET.get("Labels").value[trainIndices], self.randomState, + NB_CORES=self.nbCores, + **self.monoviewClassifiersConfigs[index])) + # else: + # self.monoviewClassifiers = self.monoviewClassifiersConfigs self.SCMForLinearFusionFit(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if viewsIndices is None: viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) accus=[] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py index 5acaab03ac42543d66116992519d93b94919295f..86f5f75de9dcd9057db084ef202d444c3a43059e 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -17,12 +17,11 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): # def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): # return None -def getArgs(args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): if args.FU_L_cl_names!=['']: pass else: - monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) - if (not isPackage)] + monoviewClassifierModulesNames = benchmark["Monoview"] args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] @@ -54,16 +53,16 @@ class SVMForLinear(LateFusionClassifier): self.SVMClassifier = None def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if viewsIndices is None: viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - if trainIndices == None: + if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(self.monoviewClassifiersConfigs[0])==dict: for index, viewIndex in enumerate(viewsIndices): monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) self.monoviewClassifiers.append( monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), - DATASET.get("Labels")[trainIndices], + DATASET.get("Labels").value[trainIndices], self.randomState, NB_CORES=self.nbCores, **dict((str(configIndex), config) for configIndex, config in enumerate(self.monoviewClassifiersConfigs[index])))) @@ -75,10 +74,10 @@ class SVMForLinear(LateFusionClassifier): pass def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if viewsIndices is None: viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index c63313be2b90aec114cac09a99bbf2066eaaab63..eb9678e77c2bf34b4847e9194ca2d6aa5adbd32d 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -16,12 +16,11 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): if args.FU_L_cl_names!=['']: pass else: - monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) - if (not isPackage)] + monoviewClassifierModulesNames = benchmark["Monoview"] args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] @@ -51,7 +50,7 @@ class WeightedLinear(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: + if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig']==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) @@ -61,11 +60,11 @@ class WeightedLinear(LateFusionClassifier): self.weights = paramsSet[0] def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if viewsIndices is None: viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) self.weights = self.weights/float(sum(self.weights)) - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) for index, viewIndex in enumerate(viewsIndices): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py index d378aa0e4421f26de12ef770852772431ad0d2ef..936a1b459644f9d9154cc6f82b8aeea7ca02f067 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py @@ -37,27 +37,25 @@ def printMetricScore(metricScores, metrics): return metricScoreString -def getTotalMetricScores(metric, trainLabels, testLabels, DATASET, validationIndices): +def getTotalMetricScores(metric, trainLabels, testLabels, DATASET, validationIndices, learningIndices): labels = DATASET.get("Labels").value - DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] metricModule = getattr(Metrics, metric[0]) if metric[1]!=None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} - learningIndices = [index for index in range(DATASET_LENGTH) if index not in validationIndices] trainScore = metricModule.score(labels[learningIndices], trainLabels, **metricKWARGS) testScore = metricModule.score(labels[validationIndices], testLabels, **metricKWARGS) return [trainScore, testScore] def getMetricsScores(metrics, trainLabels, testLabels, - DATASET, validationIndices): + DATASET, validationIndices, learningIndices): metricsScores = {} for metric in metrics: metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, testLabels, - DATASET, validationIndices) + DATASET, validationIndices, learningIndices) return metricsScores @@ -66,7 +64,7 @@ def execute(classifier, trainLabels, classificationKWARGS, classificationIndices, LABELS_DICTIONARY, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, statsIter, + hyperParamSearch, nIter, metrics, viewsIndices, randomState): CLASS_LABELS = DATASET.get("Labels").value @@ -77,8 +75,8 @@ def execute(classifier, trainLabels, monoviewClassifiersConfigs = classificationKWARGS["classifiersConfigs"] fusionMethodConfig = classificationKWARGS["fusionMethodConfig"] - DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] - NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] + # DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] + # NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] # kFoldAccuracyOnTrain = np.zeros((nbFolds, statsIter)) # kFoldAccuracyOnTest = np.zeros((nbFolds, statsIter)) # kFoldAccuracyOnValidation = np.zeros((nbFolds, statsIter)) @@ -108,10 +106,10 @@ def execute(classifier, trainLabels, metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metrics[0][1])) else: metricKWARGS = {} - scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], trainLabels, **metricKWARGS) + scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], CLASS_LABELS[learningIndices], **metricKWARGS) scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], testLabels, **metricKWARGS) fusionConfiguration = classifier.classifier.getConfig(fusionMethodConfig,monoviewClassifiersNames, monoviewClassifiersConfigs) - stringAnalysis = "\t\tResult for Multiview classification with "+ fusionType + " and random state : "+str(randomState)+\ + stringAnalysis = "\t\tResult for Multiview classification with "+ fusionType + " and random state : "+str(randomState)+ \ "\n\n"+metrics[0][0]+" :\n\t-On Train : " + str(scoreOnTrain) + "\n\t-On Test : " + str(scoreOnTest) + \ "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ ', '.join(LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join(views) + "\n\t-" + str(KFolds.n_splits) + \ @@ -120,9 +118,12 @@ def execute(classifier, trainLabels, if fusionType=="LateFusion": stringAnalysis+=Methods.LateFusion.getScores(classifier) metricsScores = getMetricsScores(metrics, trainLabels, testLabels, - DATASET, validationIndices) - stringAnalysis+=printMetricScore(metricsScores, metrics) + DATASET, validationIndices, learningIndices) + # if fusionMethod=="MajorityVoting": + # print CLASS_LABELS[learningIndices]==CLASS_LABELS[learningIndices] + # import pdb;pdb.set_trace() # stringAnalysis += "\n\nComputation time on " + str(nbCores) + " cores : \n\tDatabase extraction time : " + str( + stringAnalysis+=printMetricScore(metricsScores, metrics) # hms(seconds=int(extractionTime))) + "\n\t" # row_format = "{:>15}" * 3 # stringAnalysis += row_format.format("", *['Learn', 'Prediction']) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index 5cd16f82c3aa65fb88ed2baa87ee903a42872ba7..90301d59e8e93882f74a7b73542906af66bb23c0 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -204,9 +204,9 @@ class Mumbo: def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(viewsIndices)==type(None): + if viewsIndices is None: viewsIndices = range(DATASET.get("Metadata").attrs["nbView"]) viewDict = dict((viewIndex, index) for index, viewIndex in enumerate(viewsIndices)) @@ -229,19 +229,16 @@ class Mumbo: def predict_proba_hdf5(self, DATASET, usedIndices=None): NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices is not None: - DATASET_LENGTH = len(usedIndices) - predictedProbas = np.zeros((DATASET_LENGTH, NB_CLASS)) + DATASET_LENGTH = len(usedIndices) + predictedProbas = np.zeros((DATASET_LENGTH, NB_CLASS)) - for labelIndex, exampleIndex in enumerate(usedIndices): - for classifier, alpha, view in zip(self.bestClassifiers, self.alphas, self.bestViews): - data = getV(DATASET, int(view), exampleIndex) - predictedProbas[labelIndex, int(classifier.predict(np.array([data])))] += alpha[view] - predictedProbas[labelIndex,:] = predictedProbas[labelIndex,:]/np.sum(predictedProbas[labelIndex,:]) - else: - predictedProbas = [] + for labelIndex, exampleIndex in enumerate(usedIndices): + for classifier, alpha, view in zip(self.bestClassifiers, self.alphas, self.bestViews): + data = getV(DATASET, int(view), exampleIndex) + predictedProbas[labelIndex, int(classifier.predict(np.array([data])))] += alpha[view] + predictedProbas[labelIndex,:] = predictedProbas[labelIndex,:]/np.sum(predictedProbas[labelIndex,:]) return predictedProbas def trainWeakClassifiers(self, DATASET, CLASS_LABELS, NB_CLASS, DATASET_LENGTH, NB_VIEW): @@ -482,24 +479,24 @@ class Mumbo: return np.transpose(predictedLabels) def classifyMumbobyIter_hdf5(self, DATASET, fakeViewsIndicesDict, usedIndices=None, NB_CLASS=2): - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - DATASET_LENGTH = len(usedIndices) - predictedLabels = np.zeros((DATASET_LENGTH, self.maxIter)) - votes = np.zeros((DATASET_LENGTH, NB_CLASS)) + # if usedIndices is not None: + DATASET_LENGTH = len(usedIndices) + predictedLabels = np.zeros((DATASET_LENGTH, self.maxIter)) + votes = np.zeros((DATASET_LENGTH, NB_CLASS)) - for iterIndex, (classifier, alpha, view) in enumerate(zip(self.bestClassifiers, self.alphas, self.bestViews)): - votesByIter = np.zeros((DATASET_LENGTH, NB_CLASS)) + for iterIndex, (classifier, alpha, view) in enumerate(zip(self.bestClassifiers, self.alphas, self.bestViews)): + votesByIter = np.zeros((DATASET_LENGTH, NB_CLASS)) - for usedExampleIndex, exampleIndex in enumerate(usedIndices): - data = np.array([np.array(getV(DATASET,int(view), exampleIndex))]) - votesByIter[usedExampleIndex, int(classifier.predict(data))] += alpha[fakeViewsIndicesDict[view]] - votes[usedExampleIndex] = votes[usedExampleIndex] + np.array(votesByIter[usedExampleIndex]) - predictedLabels[usedExampleIndex, iterIndex] = np.argmax(votes[usedExampleIndex]) - else: - predictedLabels = [] - for i in range(self.maxIter): - predictedLabels.append([]) + for usedExampleIndex, exampleIndex in enumerate(usedIndices): + data = np.array([np.array(getV(DATASET,int(view), int(exampleIndex)))]) + votesByIter[usedExampleIndex, int(classifier.predict(data))] += alpha[fakeViewsIndicesDict[view]] + votes[usedExampleIndex] = votes[usedExampleIndex] + np.array(votesByIter[usedExampleIndex]) + predictedLabels[usedExampleIndex, iterIndex] = np.argmax(votes[usedExampleIndex]) + # else: + # predictedLabels = [] + # for i in range(self.maxIter): + # predictedLabels.append([]) return np.transpose(predictedLabels) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py index fbbfba1ad3d982890c25ac58ce520a6be825d8ee..d4604b20c6b1c03d50355a58e56e8d98017ea161 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py @@ -84,17 +84,17 @@ def getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices DBString += "\n\t-Views : " + ', '.join([viewName+" of shape "+str(viewShape) for viewName, viewShape in zip(viewNames, viewShapes)]) DBString += "\n\t-" + str(nbFolds) + " folds" - DBString += "\n\t- Validation set length : "+str(len(validationIndices[0]))+" for learning rate : "+str(LEARNING_RATE)+" on a total number of examples of "+str(DATASET.get("Metadata").attrs["datasetLength"]) + DBString += "\n\t- Validation set length : "+str(len(validationIndices))+" for learning rate : "+str(LEARNING_RATE)+" on a total number of examples of "+str(DATASET.get("Metadata").attrs["datasetLength"]) DBString += "\n\n" return DBString, viewNames -def getAlgoConfig(classifiersIterations, initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times): - classifierNames = initKWARGS["classifiersNames"] - maxIter = initKWARGS["maxIter"] - minIter = initKWARGS["minIter"] - threshold = initKWARGS["threshold"] - classifiersConfig = initKWARGS["classifiersConfigs"] +def getAlgoConfig(classifier, classificationKWARGS, nbCores, viewNames, hyperParamSearch, nIter, times): + # classifierNames = classificationKWARGS["classifiersNames"] + maxIter = classificationKWARGS["maxIter"] + minIter = classificationKWARGS["minIter"] + threshold = classificationKWARGS["threshold"] + # classifiersConfig = classificationKWARGS["classifiersConfigs"] extractionTime, classificationTime = times # kFoldLearningTime = [np.mean(np.array([kFoldLearningTime[statsIterIndex][foldIdx] # for statsIterIndex in range(len(kFoldLearningTime))])) @@ -105,19 +105,19 @@ def getAlgoConfig(classifiersIterations, initKWARGS, NB_CORES, viewNames, gridSe weakClassifierConfigs = [getattr(globals()[classifierName], 'getConfig')(classifiersConfig) for classifiersConfig, classifierName - in zip(classifiersIterations[0].classifiersConfigs, classifiersIterations[0].classifiersNames)] + in zip(classifier.classifiersConfigs, classifier.classifiersNames)] classifierAnalysis = [classifierName + " " + weakClassifierConfig + "on " + feature for classifierName, weakClassifierConfig, feature - in zip(classifiersIterations[0].classifiersNames, weakClassifierConfigs, viewNames)] + in zip(classifier.classifiersNames, weakClassifierConfigs, viewNames)] gridSearchString = "" - if gridSearch: + if hyperParamSearch: gridSearchString += "Configurations found by randomized search with "+str(nIter)+" iterations" - algoString = "\n\nMumbo configuration : \n\t-Used "+str(NB_CORES)+" core(s)" + algoString = "\n\nMumbo configuration : \n\t-Used "+str(nbCores)+" core(s)" algoString += "\n\t-Iterations : min " + str(minIter)+ ", max "+str(maxIter)+", threshold "+str(threshold) algoString += "\n\t-Weak Classifiers : " + "\n\t\t-".join(classifierAnalysis) algoString += "\n\n" - algoString += "\n\nComputation time on " + str(NB_CORES) + " cores : \n\tDatabase extraction time : " + str( + algoString += "\n\nComputation time on " + str(nbCores) + " cores : \n\tDatabase extraction time : " + str( hms(seconds=int(extractionTime))) + "\n\t" row_format = "{:>15}" * 3 algoString += row_format.format("", *['Learn', 'Prediction']) @@ -133,215 +133,191 @@ def getAlgoConfig(classifiersIterations, initKWARGS, NB_CORES, viewNames, gridSe return algoString, classifierAnalysis -def getReport(classifiersIterations, CLASS_LABELS, iterationValidationIndices, DATASET, trainLabelsIterations, - testLabelsIterations, statsIter, viewIndices, metric): +def getReport(classifier, CLASS_LABELS, classificationIndices, DATASET, trainLabels, + testLabels, viewIndices, metric): + learningIndices, validationIndices = classificationIndices nbView = len(viewIndices) - viewsDict = dict((viewIndex, index) for index, viewIndex in enumerate(viewIndices)) - DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] + # viewsDict = dict((viewIndex, index) for index, viewIndex in enumerate(viewIndices)) + # DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] metricModule = getattr(Metrics, metric[0]) fakeViewsIndicesDict = dict((viewIndex, fakeViewIndex) for viewIndex, fakeViewIndex in zip(viewIndices, range(nbView))) - if metric[1]!=None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) - else: - metricKWARGS = {} - trainScores = [] - testScores = [] - meanAverageAccuraciesIterations =[] - viewsStatsIteration = np.zeros((statsIter, nbView)) - scoresByIterIteration = [] - maxIter = 0 - for statIterIndex in range(statsIter): - predictedTrainLabels = trainLabelsIterations[statIterIndex] - predictedTestLabels = testLabelsIterations[statIterIndex] - validationIndices = iterationValidationIndices[statIterIndex] - learningIndices = [index for index in range(DATASET_LENGTH) if index not in validationIndices] - trainScore = metricModule.score(CLASS_LABELS[learningIndices], predictedTrainLabels) - testScore = metricModule.score(CLASS_LABELS[validationIndices], predictedTestLabels) - trainScores.append(trainScore) - testScores.append(testScore) - mumboClassifier = classifiersIterations[statIterIndex] - if mumboClassifier.iterIndex+1 > maxIter: - maxIter = mumboClassifier.iterIndex - meanAverageAccuraciesIterations.append(np.mean(mumboClassifier.averageAccuracies, axis=0)) - viewsStatsIteration[statIterIndex, :] = np.array([float(list(mumboClassifier.bestViews).count(viewIndex))/ - len(mumboClassifier.bestViews)for viewIndex in range(nbView)]) - PredictedTrainLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, usedIndices=learningIndices, - NB_CLASS=NB_CLASS) - PredictedTestLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, usedIndices=validationIndices, - NB_CLASS=NB_CLASS) - scoresByIter = np.zeros((len(PredictedTestLabelsByIter),2)) - for iterIndex,(iterPredictedTrainLabels, iterPredictedTestLabels) in enumerate(zip(PredictedTrainLabelsByIter, PredictedTestLabelsByIter)): - scoresByIter[iterIndex, 0] = metricModule.score(CLASS_LABELS[learningIndices],iterPredictedTrainLabels) - scoresByIter[iterIndex, 1] = metricModule.score(CLASS_LABELS[validationIndices],iterPredictedTestLabels) - scoresByIterIteration.append(scoresByIter) - - scoresOnTainByIter = [np.mean(np.array([scoresByIterIteration[statsIterIndex][iterIndex, 0] - for statsIterIndex in range(statsIter) - if scoresByIterIteration[statsIterIndex].shape[0]>iterIndex])) - for iterIndex in range(maxIter)] - - scoresOnTestByIter = [np.mean(np.array([scoresByIterIteration[statsIterIndex][iterIndex, 1] - for statsIterIndex in range(statsIter) - if scoresByIterIteration[statsIterIndex].shape[0]>iterIndex])) - for iterIndex in range(maxIter)] - - viewsStats = np.mean(viewsStatsIteration, axis=0) - meanAverageAccuracies = np.mean(np.array(meanAverageAccuraciesIterations), axis=0) - totalScoreOnTrain = np.mean(np.array(trainScores)) - totalScoreOnTest = np.mean(np.array(trainScores)) - return (totalScoreOnTrain, totalScoreOnTest, meanAverageAccuracies, viewsStats, scoresOnTainByIter, + # if metric[1]!=None: + # metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + # else: + # metricKWARGS = {} + trainScore = metricModule.score(CLASS_LABELS[learningIndices], trainLabels) + testScore = metricModule.score(CLASS_LABELS[validationIndices], testLabels) + mumboClassifier = classifier + maxIter = mumboClassifier.iterIndex + meanAverageAccuracies = np.mean(mumboClassifier.averageAccuracies, axis=0) + viewsStats = np.array([float(list(mumboClassifier.bestViews).count(viewIndex))/ + len(mumboClassifier.bestViews)for viewIndex in range(nbView)]) + PredictedTrainLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, usedIndices=learningIndices, + NB_CLASS=NB_CLASS) + PredictedTestLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, usedIndices=validationIndices, + NB_CLASS=NB_CLASS) + scoresByIter = np.zeros((len(PredictedTestLabelsByIter),2)) + for iterIndex,(iterPredictedTrainLabels, iterPredictedTestLabels) in enumerate(zip(PredictedTrainLabelsByIter, PredictedTestLabelsByIter)): + scoresByIter[iterIndex, 0] = metricModule.score(CLASS_LABELS[learningIndices],iterPredictedTrainLabels) + scoresByIter[iterIndex, 1] = metricModule.score(CLASS_LABELS[validationIndices],iterPredictedTestLabels) + + scoresOnTainByIter = [scoresByIter[iterIndex, 0] for iterIndex in range(maxIter)] + + scoresOnTestByIter = [scoresByIter[iterIndex, 1] for iterIndex in range(maxIter)] + + return (trainScore, testScore, meanAverageAccuracies, viewsStats, scoresOnTainByIter, scoresOnTestByIter) -def getClassificationReport(kFolds, kFoldClassifier, CLASS_LABELS, validationIndices, DATASET, - kFoldPredictedTrainLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels,statsIter, viewIndices): - nbView = len(viewIndices) - viewsDict = dict((viewIndex, index) for index, viewIndex in enumerate(viewIndices)) - DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] - NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] - iterKFoldBestViews = [] - iterKFoldMeanAverageAccuracies = [] - iterKFoldAccuracyOnTrainByIter = [] - iterKFoldAccuracyOnTestByIter = [] - iterKFoldAccuracyOnValidationByIter = [] - iterKFoldBestViewsStats = [] - totalAccuracyOnTrainIter = [] - totalAccuracyOnTestIter = [] - totalAccuracyOnValidationIter = [] - - for statIterIndex in range(statsIter): - kFoldPredictedTrainLabelsByIter = [] - kFoldPredictedTestLabelsByIter = [] - kFoldPredictedValidationLabelsByIter = [] - kFoldBestViews = [] - kFoldAccuracyOnTrain = [] - kFoldAccuracyOnTest = [] - kFoldAccuracyOnValidation = [] - kFoldAccuracyOnTrainByIter = [] - kFoldAccuracyOnTestByIter = [] - kFoldAccuracyOnValidationByIter = [] - kFoldMeanAverageAccuracies = [] - kFoldBestViewsStats = [] - for foldIdx, fold in enumerate(kFolds[statIterIndex]): - if fold != range(DATASET_LENGTH): - - trainIndices = [index for index in range(DATASET_LENGTH) if (index not in fold) and (index not in validationIndices[statIterIndex])] - testLabels = CLASS_LABELS[fold] - trainLabels = CLASS_LABELS[trainIndices] - validationLabels = CLASS_LABELS[validationIndices[statIterIndex]] - - mumboClassifier = kFoldClassifier[statIterIndex][foldIdx] - kFoldBestViews.append(mumboClassifier.bestViews) - meanAverageAccuracies = np.mean(mumboClassifier.averageAccuracies, axis=0) - kFoldMeanAverageAccuracies.append(meanAverageAccuracies) - kFoldBestViewsStats.append([float(list(mumboClassifier.bestViews).count(viewIndex))/ - len(mumboClassifier.bestViews) - for viewIndex in range(nbView)]) - - kFoldAccuracyOnTrain.append(100 * accuracy_score(trainLabels, kFoldPredictedTrainLabels[statIterIndex][foldIdx])) - kFoldAccuracyOnTest.append(100 * accuracy_score(testLabels, kFoldPredictedTestLabels[statIterIndex][foldIdx])) - kFoldAccuracyOnValidation.append(100 * accuracy_score(validationLabels, - kFoldPredictedValidationLabels[statIterIndex][foldIdx])) - - PredictedTrainLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, usedIndices=trainIndices, - NB_CLASS=NB_CLASS) - kFoldPredictedTrainLabelsByIter.append(PredictedTrainLabelsByIter) - PredictedTestLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, usedIndices=fold, - NB_CLASS=NB_CLASS) - kFoldPredictedTestLabelsByIter.append(PredictedTestLabelsByIter) - PredictedValidationLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, - usedIndices=validationIndices[statIterIndex], - NB_CLASS=NB_CLASS) - kFoldPredictedValidationLabelsByIter.append(PredictedValidationLabelsByIter) - - kFoldAccuracyOnTrainByIter.append([]) - kFoldAccuracyOnTestByIter.append([]) - kFoldAccuracyOnValidationByIter.append([]) - for iterIndex in range(mumboClassifier.iterIndex+1): - if len(PredictedTestLabelsByIter)==mumboClassifier.iterIndex+1: - kFoldAccuracyOnTestByIter[foldIdx].append(100 * accuracy_score(testLabels, - PredictedTestLabelsByIter[iterIndex])) - else: - kFoldAccuracyOnTestByIter[foldIdx].append(0.0) - kFoldAccuracyOnTrainByIter[foldIdx].append(100 * accuracy_score(trainLabels, - PredictedTrainLabelsByIter[iterIndex])) - kFoldAccuracyOnValidationByIter[foldIdx].append(100 * accuracy_score(validationLabels, - PredictedValidationLabelsByIter[iterIndex])) - - - iterKFoldBestViews.append(kFoldBestViews) - iterKFoldMeanAverageAccuracies.append(kFoldMeanAverageAccuracies) - iterKFoldAccuracyOnTrainByIter.append(kFoldAccuracyOnTrainByIter) - iterKFoldAccuracyOnTestByIter.append(kFoldAccuracyOnTestByIter) - iterKFoldAccuracyOnValidationByIter.append(kFoldAccuracyOnValidationByIter) - iterKFoldBestViewsStats.append(kFoldBestViewsStats) - totalAccuracyOnTrainIter.append(np.mean(kFoldAccuracyOnTrain)) - totalAccuracyOnTestIter.append(np.mean(kFoldAccuracyOnTest)) - totalAccuracyOnValidationIter.append(np.mean(kFoldAccuracyOnValidation)) - kFoldMeanAverageAccuraciesM = [] - kFoldBestViewsStatsM = [] - kFoldAccuracyOnTrainByIterM = [] - kFoldAccuracyOnTestByIterM = [] - kFoldAccuracyOnValidationByIterM = [] - kFoldBestViewsM = [] - for foldIdx in range(len(kFolds[0])): - kFoldBestViewsStatsM.append(np.mean(np.array([iterKFoldBestViewsStats[statIterIndex][foldIdx] for statIterIndex in range(statsIter)]), axis=0)) - bestViewVotes = [] - MeanAverageAccuraciesM = np.zeros((statsIter, nbView)) - AccuracyOnValidationByIterM = [] - AccuracyOnTrainByIterM = [] - AccuracyOnTestByIterM = [] - nbTrainIterations = [] - nbTestIterations = [] - nbValidationIterations = np.zeros(statsIter) - for statIterIndex in range(statsIter): - for iterationIndex, viewForIteration in enumerate(iterKFoldBestViews[statIterIndex][foldIdx]): - if statIterIndex==0: - bestViewVotes.append(np.zeros(nbView)) - bestViewVotes[iterationIndex][viewsDict[viewForIteration]]+=1 - else: - bestViewVotes[iterationIndex][viewsDict[viewForIteration]]+=1 - - MeanAverageAccuraciesM[statIterIndex] = np.array(iterKFoldMeanAverageAccuracies[statIterIndex][foldIdx]) - - for valdiationAccuracyIndex, valdiationAccuracy in enumerate(iterKFoldAccuracyOnValidationByIter[statIterIndex][foldIdx]): - if statIterIndex==0: - AccuracyOnValidationByIterM.append([]) - AccuracyOnValidationByIterM[valdiationAccuracyIndex].append(valdiationAccuracy) - else: - AccuracyOnValidationByIterM[valdiationAccuracyIndex].append(valdiationAccuracy) - for trainAccuracyIndex, trainAccuracy in enumerate(iterKFoldAccuracyOnTrainByIter[statIterIndex][foldIdx]): - if statIterIndex==0: - AccuracyOnTrainByIterM.append([]) - AccuracyOnTrainByIterM[trainAccuracyIndex].append(trainAccuracy) - else: - AccuracyOnTestByIterM[trainAccuracyIndex].append(trainAccuracy) - for testAccuracyIndex, testAccuracy in enumerate(iterKFoldAccuracyOnTestByIter[statIterIndex][foldIdx]): - if statIterIndex==0: - AccuracyOnTestByIterM.append([]) - AccuracyOnTestByIterM[testAccuracyIndex].append(testAccuracy) - else: - AccuracyOnTestByIterM[testAccuracyIndex].append(testAccuracy) - - #AccuracyOnValidationByIterM.append(iterKFoldAccuracyOnValidationByIter[statIterIndex][foldIdx]) - #AccuracyOnTrainByIterM.append(iterKFoldAccuracyOnTrainByIter[statIterIndex][foldIdx]) - #AccuracyOnTestByIterM.append(iterKFoldAccuracyOnTestByIter[statIterIndex][foldIdx]) - - kFoldAccuracyOnTrainByIterM.append([np.mean(np.array(accuracies)) for accuracies in AccuracyOnTrainByIterM]) - kFoldAccuracyOnTestByIterM.append([np.mean(np.array(accuracies)) for accuracies in AccuracyOnTestByIterM]) - kFoldAccuracyOnValidationByIterM.append([np.mean(np.array(accuracies)) for accuracies in AccuracyOnValidationByIterM]) - - kFoldMeanAverageAccuraciesM.append(np.mean(MeanAverageAccuraciesM, axis=0)) - kFoldBestViewsM.append(np.array([np.argmax(bestViewVote) for bestViewVote in bestViewVotes])) - - - totalAccuracyOnTrain = np.mean(np.array(totalAccuracyOnTrainIter)) - totalAccuracyOnTest = np.mean(np.array(totalAccuracyOnTestIter)) - totalAccuracyOnValidation = np.mean(np.array(totalAccuracyOnValidationIter)) - return (totalAccuracyOnTrain, totalAccuracyOnTest, totalAccuracyOnValidation, kFoldMeanAverageAccuraciesM, - kFoldBestViewsStatsM, kFoldAccuracyOnTrainByIterM, kFoldAccuracyOnTestByIterM, kFoldAccuracyOnValidationByIterM, - kFoldBestViewsM) +# def getClassificationReport(kFolds, kFoldClassifier, CLASS_LABELS, validationIndices, DATASET, +# kFoldPredictedTrainLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels,statsIter, viewIndices): +# nbView = len(viewIndices) +# viewsDict = dict((viewIndex, index) for index, viewIndex in enumerate(viewIndices)) +# DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] +# NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] +# iterKFoldBestViews = [] +# iterKFoldMeanAverageAccuracies = [] +# iterKFoldAccuracyOnTrainByIter = [] +# iterKFoldAccuracyOnTestByIter = [] +# iterKFoldAccuracyOnValidationByIter = [] +# iterKFoldBestViewsStats = [] +# totalAccuracyOnTrainIter = [] +# totalAccuracyOnTestIter = [] +# totalAccuracyOnValidationIter = [] +# +# for statIterIndex in range(statsIter): +# kFoldPredictedTrainLabelsByIter = [] +# kFoldPredictedTestLabelsByIter = [] +# kFoldPredictedValidationLabelsByIter = [] +# kFoldBestViews = [] +# kFoldAccuracyOnTrain = [] +# kFoldAccuracyOnTest = [] +# kFoldAccuracyOnValidation = [] +# kFoldAccuracyOnTrainByIter = [] +# kFoldAccuracyOnTestByIter = [] +# kFoldAccuracyOnValidationByIter = [] +# kFoldMeanAverageAccuracies = [] +# kFoldBestViewsStats = [] +# for foldIdx, fold in enumerate(kFolds[statIterIndex]): +# if fold != range(DATASET_LENGTH): +# +# trainIndices = [index for index in range(DATASET_LENGTH) if (index not in fold) and (index not in validationIndices[statIterIndex])] +# testLabels = CLASS_LABELS[fold] +# trainLabels = CLASS_LABELS[trainIndices] +# validationLabels = CLASS_LABELS[validationIndices[statIterIndex]] +# +# mumboClassifier = kFoldClassifier[statIterIndex][foldIdx] +# kFoldBestViews.append(mumboClassifier.bestViews) +# meanAverageAccuracies = np.mean(mumboClassifier.averageAccuracies, axis=0) +# kFoldMeanAverageAccuracies.append(meanAverageAccuracies) +# kFoldBestViewsStats.append([float(list(mumboClassifier.bestViews).count(viewIndex))/ +# len(mumboClassifier.bestViews) +# for viewIndex in range(nbView)]) +# +# kFoldAccuracyOnTrain.append(100 * accuracy_score(trainLabels, kFoldPredictedTrainLabels[statIterIndex][foldIdx])) +# kFoldAccuracyOnTest.append(100 * accuracy_score(testLabels, kFoldPredictedTestLabels[statIterIndex][foldIdx])) +# kFoldAccuracyOnValidation.append(100 * accuracy_score(validationLabels, +# kFoldPredictedValidationLabels[statIterIndex][foldIdx])) +# +# PredictedTrainLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, usedIndices=trainIndices, +# NB_CLASS=NB_CLASS) +# kFoldPredictedTrainLabelsByIter.append(PredictedTrainLabelsByIter) +# PredictedTestLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, usedIndices=fold, +# NB_CLASS=NB_CLASS) +# kFoldPredictedTestLabelsByIter.append(PredictedTestLabelsByIter) +# PredictedValidationLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, +# usedIndices=validationIndices[statIterIndex], +# NB_CLASS=NB_CLASS) +# kFoldPredictedValidationLabelsByIter.append(PredictedValidationLabelsByIter) +# +# kFoldAccuracyOnTrainByIter.append([]) +# kFoldAccuracyOnTestByIter.append([]) +# kFoldAccuracyOnValidationByIter.append([]) +# for iterIndex in range(mumboClassifier.iterIndex+1): +# if len(PredictedTestLabelsByIter)==mumboClassifier.iterIndex+1: +# kFoldAccuracyOnTestByIter[foldIdx].append(100 * accuracy_score(testLabels, +# PredictedTestLabelsByIter[iterIndex])) +# else: +# kFoldAccuracyOnTestByIter[foldIdx].append(0.0) +# kFoldAccuracyOnTrainByIter[foldIdx].append(100 * accuracy_score(trainLabels, +# PredictedTrainLabelsByIter[iterIndex])) +# kFoldAccuracyOnValidationByIter[foldIdx].append(100 * accuracy_score(validationLabels, +# PredictedValidationLabelsByIter[iterIndex])) +# +# +# iterKFoldBestViews.append(kFoldBestViews) +# iterKFoldMeanAverageAccuracies.append(kFoldMeanAverageAccuracies) +# iterKFoldAccuracyOnTrainByIter.append(kFoldAccuracyOnTrainByIter) +# iterKFoldAccuracyOnTestByIter.append(kFoldAccuracyOnTestByIter) +# iterKFoldAccuracyOnValidationByIter.append(kFoldAccuracyOnValidationByIter) +# iterKFoldBestViewsStats.append(kFoldBestViewsStats) +# totalAccuracyOnTrainIter.append(np.mean(kFoldAccuracyOnTrain)) +# totalAccuracyOnTestIter.append(np.mean(kFoldAccuracyOnTest)) +# totalAccuracyOnValidationIter.append(np.mean(kFoldAccuracyOnValidation)) +# kFoldMeanAverageAccuraciesM = [] +# kFoldBestViewsStatsM = [] +# kFoldAccuracyOnTrainByIterM = [] +# kFoldAccuracyOnTestByIterM = [] +# kFoldAccuracyOnValidationByIterM = [] +# kFoldBestViewsM = [] +# for foldIdx in range(len(kFolds[0])): +# kFoldBestViewsStatsM.append(np.mean(np.array([iterKFoldBestViewsStats[statIterIndex][foldIdx] for statIterIndex in range(statsIter)]), axis=0)) +# bestViewVotes = [] +# MeanAverageAccuraciesM = np.zeros((statsIter, nbView)) +# AccuracyOnValidationByIterM = [] +# AccuracyOnTrainByIterM = [] +# AccuracyOnTestByIterM = [] +# nbTrainIterations = [] +# nbTestIterations = [] +# nbValidationIterations = np.zeros(statsIter) +# for statIterIndex in range(statsIter): +# for iterationIndex, viewForIteration in enumerate(iterKFoldBestViews[statIterIndex][foldIdx]): +# if statIterIndex==0: +# bestViewVotes.append(np.zeros(nbView)) +# bestViewVotes[iterationIndex][viewsDict[viewForIteration]]+=1 +# else: +# bestViewVotes[iterationIndex][viewsDict[viewForIteration]]+=1 +# +# MeanAverageAccuraciesM[statIterIndex] = np.array(iterKFoldMeanAverageAccuracies[statIterIndex][foldIdx]) +# +# for valdiationAccuracyIndex, valdiationAccuracy in enumerate(iterKFoldAccuracyOnValidationByIter[statIterIndex][foldIdx]): +# if statIterIndex==0: +# AccuracyOnValidationByIterM.append([]) +# AccuracyOnValidationByIterM[valdiationAccuracyIndex].append(valdiationAccuracy) +# else: +# AccuracyOnValidationByIterM[valdiationAccuracyIndex].append(valdiationAccuracy) +# for trainAccuracyIndex, trainAccuracy in enumerate(iterKFoldAccuracyOnTrainByIter[statIterIndex][foldIdx]): +# if statIterIndex==0: +# AccuracyOnTrainByIterM.append([]) +# AccuracyOnTrainByIterM[trainAccuracyIndex].append(trainAccuracy) +# else: +# AccuracyOnTestByIterM[trainAccuracyIndex].append(trainAccuracy) +# for testAccuracyIndex, testAccuracy in enumerate(iterKFoldAccuracyOnTestByIter[statIterIndex][foldIdx]): +# if statIterIndex==0: +# AccuracyOnTestByIterM.append([]) +# AccuracyOnTestByIterM[testAccuracyIndex].append(testAccuracy) +# else: +# AccuracyOnTestByIterM[testAccuracyIndex].append(testAccuracy) +# +# #AccuracyOnValidationByIterM.append(iterKFoldAccuracyOnValidationByIter[statIterIndex][foldIdx]) +# #AccuracyOnTrainByIterM.append(iterKFoldAccuracyOnTrainByIter[statIterIndex][foldIdx]) +# #AccuracyOnTestByIterM.append(iterKFoldAccuracyOnTestByIter[statIterIndex][foldIdx]) +# +# kFoldAccuracyOnTrainByIterM.append([np.mean(np.array(accuracies)) for accuracies in AccuracyOnTrainByIterM]) +# kFoldAccuracyOnTestByIterM.append([np.mean(np.array(accuracies)) for accuracies in AccuracyOnTestByIterM]) +# kFoldAccuracyOnValidationByIterM.append([np.mean(np.array(accuracies)) for accuracies in AccuracyOnValidationByIterM]) +# +# kFoldMeanAverageAccuraciesM.append(np.mean(MeanAverageAccuraciesM, axis=0)) +# kFoldBestViewsM.append(np.array([np.argmax(bestViewVote) for bestViewVote in bestViewVotes])) +# +# +# totalAccuracyOnTrain = np.mean(np.array(totalAccuracyOnTrainIter)) +# totalAccuracyOnTest = np.mean(np.array(totalAccuracyOnTestIter)) +# totalAccuracyOnValidation = np.mean(np.array(totalAccuracyOnValidationIter)) +# return (totalAccuracyOnTrain, totalAccuracyOnTest, totalAccuracyOnValidation, kFoldMeanAverageAccuraciesM, +# kFoldBestViewsStatsM, kFoldAccuracyOnTrainByIterM, kFoldAccuracyOnTestByIterM, kFoldAccuracyOnValidationByIterM, +# kFoldBestViewsM) def iterRelevant(iterIndex, kFoldClassifierStats): @@ -379,38 +355,32 @@ def printMetricScore(metricScores, metrics): else: metricKWARGS = {} metricScoreString += "\tFor "+metricModule.getConfig(**metricKWARGS)+" : " - metricScoreString += "\n\t\t- Score on train : "+str(metricScores[metric[0]][0]) +" with STD : "+str(metricScores[metric[0]][2]) - metricScoreString += "\n\t\t- Score on test : "+str(metricScores[metric[0]][1]) +" with STD : "+str(metricScores[metric[0]][3]) + metricScoreString += "\n\t\t- Score on train : "+str(metricScores[metric[0]][0]) + metricScoreString += "\n\t\t- Score on test : "+str(metricScores[metric[0]][1]) metricScoreString += "\n\n" return metricScoreString -def getTotalMetricScores(metric, trainLabelsIterations, testLabelsIterations, DATASET, iterationValidationIndices, statsIter): +def getTotalMetricScores(metric, trainLabels, testLabels, + DATASET, validationIndices, learningIndices): labels = DATASET.get("Labels").value - DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] metricModule = getattr(Metrics, metric[0]) if metric[1]!=None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} - trainScores = [] - testScores = [] - validationScores = [] - for statsIterIndex in range(statsIter): - validationIndices = iterationValidationIndices[statsIterIndex] - learningIndices = [index for index in range(DATASET_LENGTH) if index not in validationIndices] - trainScores.append(metricModule.score(labels[learningIndices], trainLabelsIterations[statsIterIndex], **metricKWARGS)) - testScores.append(metricModule.score(labels[validationIndices], testLabelsIterations[statsIterIndex], **metricKWARGS)) - return [np.mean(np.array(trainScores)), np.mean(np.array(testScores)), np.std(np.array(trainScores)), - np.std(np.array(testScores))] - - -def getMetricsScores(metrics, trainLabelsIterations, testLabelsIterations, - DATASET, validationIndices, statsIter): + validationIndices = validationIndices + trainScore = metricModule.score(labels[learningIndices], trainLabels, **metricKWARGS) + testScore = metricModule.score(labels[validationIndices], testLabels, **metricKWARGS) + return [trainScore, testScore] + + +def getMetricsScores(metrics, trainLabels, testLabels, + DATASET, validationIndices, learningIndices): metricsScores = {} for metric in metrics: - metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabelsIterations, testLabelsIterations, - DATASET, validationIndices, statsIter) + metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, testLabels, + DATASET, validationIndices, learningIndices) return metricsScores @@ -418,27 +388,34 @@ def getMeanIterations(kFoldClassifierStats, foldIndex): iterations = np.array([kFoldClassifier[foldIndex].iterIndex+1 for kFoldClassifier in kFoldClassifierStats]) return np.mean(iterations) -def execute(classifiersIterations, trainLabelsIterations,testLabelsIterations, DATASET,initKWARGS, - LEARNING_RATE,LABELS_DICTIONARY,views, NB_CORES, times,databaseName, nbFolds, validationIndices, gridSearch, - nIter, metrics, statsIter,viewIndices, randomState): + +def execute(classifier, trainLabels, + testLabels, DATASET, + classificationKWARGS, classificationIndices, + LABELS_DICTIONARY, views, nbCores, times, + databaseName, KFolds, + hyperParamSearch, nIter, metrics, + viewsIndices, randomState): + learningIndices, validationIndices = classificationIndices + LEARNING_RATE = len(learningIndices)/(len(learningIndices)+len(validationIndices)) + nbFolds = KFolds.n_splits CLASS_LABELS = DATASET.get("Labels")[...] dbConfigurationString, viewNames = getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, LABELS_DICTIONARY) - algoConfigurationString, classifierAnalysis = getAlgoConfig(classifiersIterations, initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times) - + algoConfigurationString, classifierAnalysis = getAlgoConfig(classifier, classificationKWARGS, nbCores, viewNames, hyperParamSearch, nIter, times) (totalScoreOnTrain, totalScoreOnTest, meanAverageAccuracies, viewsStats, scoresOnTainByIter, - scoresOnTestByIter) = getReport(classifiersIterations, CLASS_LABELS, validationIndices, DATASET, - trainLabelsIterations, testLabelsIterations, statsIter, viewIndices, metrics[0]) + scoresOnTestByIter) = getReport(classifier, CLASS_LABELS, classificationIndices, DATASET, + trainLabels, testLabels, viewsIndices, metrics[0]) stringAnalysis = "\t\tResult for Multiview classification with Mumbo with random state : "+str(randomState) + \ "\n\nAverage "+metrics[0][0]+" :\n\t-On Train : " + str(totalScoreOnTrain) + "\n\t-On Test : " + \ str(totalScoreOnTest) stringAnalysis += dbConfigurationString stringAnalysis += algoConfigurationString - metricsScores = getMetricsScores(metrics, trainLabelsIterations, testLabelsIterations, - DATASET, validationIndices, statsIter) + metricsScores = getMetricsScores(metrics, trainLabels, testLabels, + DATASET, validationIndices, learningIndices) stringAnalysis += printMetricScore(metricsScores, metrics) stringAnalysis += "Mean average scores and stats :" for viewIndex, (meanAverageAccuracy, bestViewStat) in enumerate(zip(meanAverageAccuracies,viewsStats)): diff --git a/Code/MonoMutliViewClassifiers/ResultAnalysis.py b/Code/MonoMutliViewClassifiers/ResultAnalysis.py index af37d3ec6f015e8be7e82bba57012af051a123f0..befa591dbd4e9deae74d20d6d88f2c2a5d6f3599 100644 --- a/Code/MonoMutliViewClassifiers/ResultAnalysis.py +++ b/Code/MonoMutliViewClassifiers/ResultAnalysis.py @@ -32,7 +32,7 @@ def genFusionName(type_, a, b, c): if type_ == "Fusion" and a["fusionType"] != "EarlyFusion": return "Late-"+str(a["fusionMethod"]) elif type_ == "Fusion" and a["fusionType"] != "LateFusion": - return "Early-"+a["fusionMethod"]+"-"+a["classifiersNames"][0] + return "Early-"+a["fusionMethod"]+"-"+a["classifiersNames"] def genNamesFromRes(mono, multi): @@ -48,17 +48,17 @@ def resultAnalysis(benchmark, results, name, times, metrics, directory): nbResults = len(mono)+len(multi) validationScores = [float(res[1][2][metric[0]][1]) for res in mono] validationScores += [float(scores[metric[0]][1]) for a, b, scores, c in multi] - validationSTD = [float(res[1][2][metric[0]][3]) for res in mono] - validationSTD += [float(scores[metric[0]][3]) for a, b, scores, c in multi] + # validationSTD = [float(res[1][2][metric[0]][3]) for res in mono] + # validationSTD += [float(scores[metric[0]][3]) for a, b, scores, c in multi] trainScores = [float(res[1][2][metric[0]][0]) for res in mono] trainScores += [float(scores[metric[0]][0]) for a, b, scores, c in multi] - trainSTD = [float(res[1][2][metric[0]][2]) for res in mono] - trainSTD += [float(scores[metric[0]][2]) for a, b, scores, c in multi] + # trainSTD = [float(res[1][2][metric[0]][2]) for res in mono] + # trainSTD += [float(scores[metric[0]][2]) for a, b, scores, c in multi] validationScores = np.array(validationScores) - validationSTD = np.array(validationSTD) + # validationSTD = np.array(validationSTD) trainScores = np.array(trainScores) - trainSTD = np.array(trainSTD) + # trainSTD = np.array(trainSTD) names = np.array(names) f = pylab.figure(figsize=(40, 30)) @@ -72,14 +72,14 @@ def resultAnalysis(benchmark, results, name, times, metrics, directory): metricKWARGS = {} sorted_indices = np.argsort(validationScores) validationScores = validationScores[sorted_indices] - validationSTD = validationSTD[sorted_indices] + # validationSTD = validationSTD[sorted_indices] trainScores = trainScores[sorted_indices] - trainSTD = trainSTD[sorted_indices] + # trainSTD = trainSTD[sorted_indices] names = names[sorted_indices] ax.set_title(getattr(Metrics, metric[0]).getConfig(**metricKWARGS)+" on validation set for each classifier") - rects = ax.bar(range(nbResults), validationScores, width, color="r", yerr=validationSTD) - rect2 = ax.bar(np.arange(nbResults)+width, trainScores, width, color="0.7", yerr=trainSTD) + rects = ax.bar(range(nbResults), validationScores, width, color="r", )#yerr=validationSTD) + rect2 = ax.bar(np.arange(nbResults)+width, trainScores, width, color="0.7",)# yerr=trainSTD) autolabel(rects, ax) autolabel(rect2, ax) ax.legend((rects[0], rect2[0]), ('Test', 'Train')) @@ -87,7 +87,7 @@ def resultAnalysis(benchmark, results, name, times, metrics, directory): ax.set_xticklabels(names, rotation="vertical") f.savefig(directory+time.strftime("%Y%m%d-%H%M%S")+"-"+name+"-"+metric[0]+".png") - logging.info("Extraction time : "+str(times[0])+"s, Monoview time : "+str(times[1])+"s, Multiview Time : "+str(times[2])+"s") + def analyzeLabels(labelsArrays, realLabels, results, directory): diff --git a/Code/MonoMutliViewClassifiers/Versions.py b/Code/MonoMutliViewClassifiers/Versions.py index 2c782bb14fa956ee254ea168724533e292820923..7ff7615cfab5eacf1fee82e3ad98c0d7f1e40603 100644 --- a/Code/MonoMutliViewClassifiers/Versions.py +++ b/Code/MonoMutliViewClassifiers/Versions.py @@ -27,11 +27,11 @@ def testVersions(): print "Please install pyscm" raise - try: - import cv2 - # print("OpenCV2-V.: " + cv2.__version__) - except: - print "Please install cv2 module" + # try: + # import cv2 + # # print("OpenCV2-V.: " + cv2.__version__) + # except: + # print "Please install cv2 module" try: import pandas diff --git a/Code/MonoMutliViewClassifiers/utils/Dataset.py b/Code/MonoMutliViewClassifiers/utils/Dataset.py index 35df10ecbb04a662eb5409447b5144302d86efea..dd90f9e829b267b75a1cc88ae5c50307d2ef68fb 100644 --- a/Code/MonoMutliViewClassifiers/utils/Dataset.py +++ b/Code/MonoMutliViewClassifiers/utils/Dataset.py @@ -3,12 +3,12 @@ import numpy as np def getV(DATASET, viewIndex, usedIndices=None): - if usedIndices==None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(usedIndices) is int: return DATASET.get("View"+str(viewIndex))[usedIndices, :] else: - usedIndices=np.array(usedIndices) + usedIndices = np.array(usedIndices) sortedIndices = np.argsort(usedIndices) usedIndices = usedIndices[sortedIndices] @@ -35,9 +35,9 @@ def getValue(DATASET): return DATASET.value else: sparse_mat = sparse.csr_matrix((DATASET.get("data").value, - DATASET.get("indices").value, - DATASET.get("indptr").value), - shape=DATASET.attrs["shape"]) + DATASET.get("indices").value, + DATASET.get("indptr").value), + shape=DATASET.attrs["shape"]) return sparse_mat