diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 463439d6409f88babd4f4e961519b4eee368eff1..a4726d985bb200d94d7e007ecbf5e29c721f0e97 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -7,11 +7,12 @@ import operator import itertools import sys import select +import logging +import errno # Import 3rd party modules from joblib import Parallel, delayed import numpy as np -import logging import matplotlib matplotlib.use('Agg') import math @@ -41,9 +42,17 @@ __status__ = "Prototype" # Production, Development, P testVersions() def initLogFile(args): - directory = os.path.dirname(os.path.abspath(__file__)) + "/Results/" + # os.path.dirname(os.path.abspath(__file__)) + + directory = "../../Results/"+args.name+"/started_"+time.strftime("%Y_%m_%d-%H_%M")+"/" logFileName = time.strftime("%Y%m%d-%H%M%S") + "-CMultiV-" + args.CL_type + "-" + "_".join(args.views.split(":")) + "-" + args.name + \ "-LOG" + print logFileName + if not os.path.exists(os.path.dirname(directory+logFileName)): + try: + os.makedirs(os.path.dirname(directory+logFileName)) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise logFile = directory + logFileName if os.path.isfile(logFile + ".log"): for i in range(1, 20): @@ -58,6 +67,8 @@ def initLogFile(args): if args.log: logging.getLogger().addHandler(logging.StreamHandler()) + return directory + def input(timeout=15): print "You have " + str(timeout) + " seconds to stop the script by typing n" @@ -471,7 +482,7 @@ try: except: gridSearch = False -initLogFile(args) +directory = initLogFile(args) DATASET, LABELS_DICTIONARY = getDatabase(args.views.split(":"), args.pathF, args.name, args.CL_nb_class, args.CL_classes) @@ -514,7 +525,7 @@ if nbCores>1: nbExperiments = len(argumentDictionaries["Monoview"]) for stepIndex in range(int(math.ceil(float(nbExperiments)/nbCores))): resultsMonoview+=(Parallel(n_jobs=nbCores)( - delayed(ExecMonoview_multicore)(args.name, labelsNames, args.CL_split, args.CL_nbFolds, coreIndex, args.type, args.pathF, statsIter, gridSearch=gridSearch, + delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, args.CL_split, args.CL_nbFolds, coreIndex, args.type, args.pathF, statsIter, gridSearch=gridSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] @@ -522,7 +533,7 @@ if nbCores>1: classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] else: - resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])), + resultsMonoview+=([ExecMonoview(directory, DATASET.get("View"+str(arguments["viewIndex"])), DATASET.get("Labels").value, args.name, labelsNames, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, statsIter, gridSearch=gridSearch, metrics=metrics, nIter=args.CL_GS_iter, @@ -541,12 +552,12 @@ if nbCores>1: nbExperiments = len(argumentDictionaries["Multiview"]) for stepIndex in range(int(math.ceil(float(nbExperiments)/nbCores))): resultsMultiview += Parallel(n_jobs=nbCores)( - delayed(ExecMultiview_multicore)(coreIndex, args.name, args.CL_split, args.CL_nbFolds, args.type, args.pathF, + delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, args.CL_split, args.CL_nbFolds, args.type, args.pathF, LABELS_DICTIONARY, statsIter, gridSearch=gridSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Multiview"][stepIndex*nbCores+coreIndex]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) else: - resultsMultiview = [ExecMultiview(DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, + resultsMultiview = [ExecMultiview(directory, DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, LABELS_DICTIONARY, statsIter, gridSearch=gridSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Multiview"]] multiviewTime = time.time()-monoviewTime-dataBaseTime-start @@ -559,8 +570,8 @@ trueLabels = DATASET.get("Labels").value times = [dataBaseTime, monoviewTime, multiviewTime] # times=[] results = (resultsMonoview, resultsMultiview) -analyzeLabels(labels, trueLabels, results) +analyzeLabels(labels, trueLabels, results, directory) logging.debug("Start:\t Analyze Global Results") -resultAnalysis(benchmark, results, args.name, times, metrics) +resultAnalysis(benchmark, results, args.name, times, metrics, directory) logging.debug("Done:\t Analyze Global Results") diff --git a/Code/MonoMutliViewClassifiers/Monoview/ClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ClassifMonoView.py index 798fe9ccefa0d0c79377af68230424e1223441d7..487708e992560c66626ad2523a0de6e2afb8c3a3 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ClassifMonoView.py @@ -6,9 +6,9 @@ import pandas as pd # For DataFrames # Import sci-kit learn party modules -from sklearn.cross_validation import train_test_split # For calculating the train/test split +#from sklearn.tests import train_test_split # For calculating the train/test split from sklearn.pipeline import Pipeline # Pipelining in classification -from sklearn.grid_search import GridSearchCV # GridSearch for parameters of classification +from sklearn.model_selection import GridSearchCV # GridSearch for parameters of classification from sklearn.ensemble import RandomForestClassifier # RandomForest-Classifier import sklearn import numpy as np @@ -59,43 +59,43 @@ def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLA ##### Generating Test and Train Data -def calcTrainTestOwn(X,y,split): - - classLabels = pd.Series(y) - - - data_train = [] - data_test = [] - label_train = [] - label_test = [] - - # Reminder to store position in array - reminder = 0 - - for i in classLabels.unique(): - # Calculate the number of samples per class - count = (len(classLabels[classLabels==i])) - - # Min/Max: To determine the range to read from array - min_train = reminder - max_train = int(round(count * split)) +1 +reminder - min_test = max_train - max_test = count + reminder - - #Extend the respective list with ClassLabels(y)/Features(X) - label_train.extend(classLabels[min_train:max_train]) - label_test.extend(classLabels[min_test:max_test]) - data_train.extend(X[min_train:max_train]) - data_test.extend(X[min_test:max_test]) - - reminder = reminder + count - - return np.array(data_train), np.array(data_test), np.array(label_train).astype(int), np.array(label_test).astype(int) - -def calcTrainTest(X,y,split): - X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=split) +# def calcTrainTestOwn(X,y,split): +# +# classLabels = pd.Series(y) +# +# +# data_train = [] +# data_test = [] +# label_train = [] +# label_test = [] +# +# # Reminder to store position in array +# reminder = 0 +# +# for i in classLabels.unique(): +# # Calculate the number of samples per class +# count = (len(classLabels[classLabels==i])) +# +# # Min/Max: To determine the range to read from array +# min_train = reminder +# max_train = int(round(count * split)) +1 +reminder +# min_test = max_train +# max_test = count + reminder +# +# #Extend the respective list with ClassLabels(y)/Features(X) +# label_train.extend(classLabels[min_train:max_train]) +# label_test.extend(classLabels[min_test:max_test]) +# data_train.extend(X[min_train:max_train]) +# data_test.extend(X[min_test:max_test]) +# +# reminder = reminder + count +# +# return np.array(data_train), np.array(data_test), np.array(label_train).astype(int), np.array(label_test).astype(int) - return (X_train, X_test, y_train, y_test) +# def calcTrainTest(X,y,split): +# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=split) +# +# return (X_train, X_test, y_train, y_test) # Classifiers diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index 07b3b1a1226356e2f75a8c013bd583f52c524a21..949d48f14c0cbcf0e065bac03bc10487a80a0c8f 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -29,7 +29,7 @@ __status__ = "Prototype" # Production, Development, Prototype __date__ = 2016-03-25 -def ExecMonoview_multicore(name, labelsNames, learningRate, nbFolds, datasetFileIndex, databaseType, path, statsIter, gridSearch=True, +def ExecMonoview_multicore(directory, name, labelsNames, learningRate, nbFolds, datasetFileIndex, databaseType, path, statsIter, gridSearch=True, metrics=[["accuracy_score", None]], nIter=30, **args): DATASET = h5py.File(path+name+str(datasetFileIndex)+".hdf5", "r") kwargs = args["args"] @@ -37,11 +37,11 @@ def ExecMonoview_multicore(name, labelsNames, learningRate, nbFolds, datasetFile neededViewIndex = views.index(kwargs["feat"]) X = DATASET.get("View"+str(neededViewIndex)) Y = DATASET.get("Labels").value - return ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, 1, databaseType, path, statsIter, gridSearch=gridSearch, + return ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, 1, databaseType, path, statsIter, gridSearch=gridSearch, metrics=metrics, nIter=nIter, **args) -def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databaseType, path, statsIter, gridSearch=True, +def ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databaseType, path, statsIter, gridSearch=True, metrics=[["accuracy_score", None]], nIter=30, **args): logging.debug("Start:\t Loading data") try: @@ -63,7 +63,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa y_tests = [] y_train_preds = [] y_test_preds = [] - for iterationStat in range(1): + for iterationStat in range(statsIter): # Calculate Train/Test data logging.debug("Start:\t Determine Train/Test split"+" for iteration "+str(iterationStat+1)) testIndices = ClassifMonoView.splitDataset(Y, nbClass, learningRate, datasetLength) @@ -116,7 +116,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa labelsString = "-".join(labelsNames) timestr = time.strftime("%Y%m%d-%H%M%S") CL_type_string = CL_type - outputFileName = "Results/" + timestr + "Results-" + CL_type_string + "-" + labelsString + \ + outputFileName = directory + timestr + "Results-" + CL_type_string + "-" + labelsString + \ '-learnRate' + str(learningRate) + '-' + name + "-" + feat outputTextFile = open(outputFileName + '.txt', 'w') diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index 4e9b1c76f2d680c6310167abc7c514113b8f3e0b..02c0560086b4313ed138783e3174f492e9226c6b 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -1,6 +1,6 @@ from sklearn.ensemble import AdaBoostClassifier from sklearn.pipeline import Pipeline -from sklearn.grid_search import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier import Metrics from scipy.stats import randint diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index f9cb2679a470bd3977d85d277ab1b90d450dbb3e..8c37293465101abfeda7017b1ec10c80f2741a1c 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -1,6 +1,6 @@ from sklearn.tree import DecisionTreeClassifier from sklearn.pipeline import Pipeline # Pipelining in classification -from sklearn.grid_search import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index 9105e37c1d5caa97bdfb5588f9e315f427373367..6ed4dd893760cd89480dc715830ee051562388f2 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -1,6 +1,6 @@ from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import Pipeline # Pipelining in classification -from sklearn.grid_search import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index f0f24ee1b2dcdc87e550e1505d558b3dbb6abbfc..16a4646cdc7f8fd0e0f06edbcf12824b50b81622 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -1,6 +1,6 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import Pipeline -from sklearn.grid_search import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index 59026a6628b1fff9551316f217daba5835b7a6b4..8912505909feb9370e363c2bd5c41a3d8be73795 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -1,6 +1,6 @@ from sklearn.linear_model import SGDClassifier from sklearn.pipeline import Pipeline # Pipelining in classification -from sklearn.grid_search import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import uniform diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index 4140f7a3b0631534dfb5899539ea04538c757211..c6b7bbe2ee3bf885efab73edd047e17d3e114840 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -1,6 +1,6 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification -from sklearn.grid_search import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index 2beb829592f6b5e9b78c442cc0167b456ba4fc49..1a719a83b4ca38a68b8587913537b696c8dc1d09 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -1,6 +1,6 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification -from sklearn.grid_search import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index 760395b930d641b3f4a826dd3f47fd85a11b8317..ed4ddda7a86f0ea65d35cde3adfec04a10626220 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -1,6 +1,6 @@ from sklearn.svm import SVC from sklearn.pipeline import Pipeline # Pipelining in classification -from sklearn.grid_search import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 37e02981a723e109dd5bc9b42e919e502247bf20..4e482a3399e57a2613e5120e9b1806026c608e98 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -23,14 +23,14 @@ __status__ = "Prototype" # Production, Development, P -def ExecMultiview_multicore(coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY , statsIter, +def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY , statsIter, gridSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): DATASET = h5py.File(path+name+str(coreIndex)+".hdf5", "r") - return ExecMultiview(DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, statsIter, + return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, statsIter, gridSearch=gridSearch, metrics=metrics, nIter=nIter, **arguments) -def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, statsIter, +def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, statsIter, gridSearch=False, metrics=None, nIter=30, **kwargs): datasetLength = DATASET.get("Metadata").attrs["datasetLength"] @@ -126,7 +126,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p CL_type_string += "-"+classificationKWARGS["fusionType"]+"-"+classificationKWARGS["fusionMethod"]+"-"+"-".join(classificationKWARGS["classifiersNames"]) elif CL_type=="Mumbo": CL_type_string += "-"+"-".join(classificationKWARGS["classifiersNames"]) - outputFileName = "Results/" + timestr + "Results-" + CL_type_string + "-" + featureString + '-' + labelsString + \ + outputFileName = directory + timestr + "Results-" + CL_type_string + "-" + featureString + '-' + labelsString + \ '-learnRate' + str(learningRate) + '-' + name outputTextFile = open(outputFileName + '.txt', 'w') diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index 439e4d53bc4b1aade3471b28b47590f198101a99..1c97fe0a21e69e2742c12fd22ca9a693e446340c 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -96,6 +96,8 @@ class SCMForLinear(LateFusionClassifier): getV(DATASET, viewIndex, usedIndices)) accus.append(accuracy_score(DATASET.get("Labels").value[usedIndices], monoviewDecision)) monoviewDecisions[:, index] = monoviewDecision + print monoviewDecisions + exit() features = self.generateInteractions(monoviewDecisions) predictedLabels = self.SCMClassifier.predict(features) else: @@ -158,10 +160,12 @@ class SCMForLinear(LateFusionClassifier): def generateInteractions(self, monoViewDecisions): if type(self.order)==type(None): order = monoViewDecisions.shape[1] - genratedIntercations = [monoViewDecisions[:,i] for i in range(monoViewDecisions.shape[1])] if self.order==1: + print monoViewDecisions return monoViewDecisions + else: + genratedIntercations = [monoViewDecisions[:,i] for i in range(monoViewDecisions.shape[1])] for orderIndex in range(self.order-1): combins = itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex+2) for combin in combins: diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index 5dcb33346e6a897c2e3e6b74513bf3739057ba3a..c4a1c60bcfb3a5d512d4e8dce6be6257c303d50b 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -55,20 +55,15 @@ class WeightedLinear(LateFusionClassifier): if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) - self.weights = self.weights/float(max(self.weights)) + self.weights = self.weights/float(sum(self.weights)) if usedIndices == None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if usedIndices: - predictedLabels = [] viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) for index, viewIndex in enumerate(viewsIndices): - viewScores[index] = self.monoviewClassifiers[index].predict_proba( - getV(DATASET, viewIndex, usedIndices)) - for currentIndex, usedIndex in enumerate(usedIndices): - predictedLabel = np.argmax(np.array( - [max(viewScore) * weight for viewScore, weight in zip(viewScores[:, currentIndex], self.weights)], - dtype=float)) - predictedLabels.append(predictedLabel) + viewScores[index] = np.array(self.monoviewClassifiers[index].predict_proba( + getV(DATASET, viewIndex, usedIndices)))*self.weights[index] + predictedLabels = np.argmax(np.sum(viewScores, axis=0), axis=1) else: predictedLabels = [] diff --git a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py index 34b8a3947f208026622f762b526d853dac03007d..49150a1de5c3a35faf487d61aa8287d57c3f78ee 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py +++ b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py @@ -183,7 +183,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES): nbLabelsAvailable+=1 LABELS_NAMES = [line.strip().split(";")[1] for lineIdx, line in enumerate(labelsNamesFile) if lineIdx in np.random.randint(nbLabelsAvailable, size=NB_CLASS)] fullLabels = np.genfromtxt(pathF + nameDB + '-ClassLabels.csv', delimiter=',').astype(int) - labelsDictionary = dict((classIndice, labelName) for (classIndice, labelName) in + labelsDictionary = dict((classIndex, labelName) for (classIndex, labelName) in [(int(line.strip().split(";")[0]),line.strip().split(";")[1])for lineIndex, line in enumerate(labelsNamesFile) if line.strip().split(";")[0] in LABELS_NAMES]) if len(set(fullLabels))>NB_CLASS: usedIndices = getPositions(labelsDictionary.keys(), fullLabels) @@ -198,12 +198,12 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES): viewDset.attrs["binary"] = False labelsDset = datasetFile.create_dataset("Labels", fullLabels[usedIndices].shape, data=fullLabels[usedIndices]) - #labelsDset.attrs["labelsDictionary"] = labelsDictionary + labelsDset.attrs["labels"] = [labelName for index, labelName in labelsDictionary.iteritems()] + labelsDset.attrs["labels_indices"] = [labelIndex for labelIndex, labelName in labelsDictionary.iteritems()] metaDataGrp = datasetFile.create_group("Metadata") metaDataGrp.attrs["nbView"] = len(views) metaDataGrp.attrs["nbClass"] = NB_CLASS - print NB_CLASS metaDataGrp.attrs["datasetLength"] = len(fullLabels[usedIndices]) datasetFile.close() datasetFile = h5py.File(pathF+nameDB+".hdf5", "r") @@ -212,34 +212,38 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES): def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, LABELS_NAMES): datasetFile = h5py.File(pathF+nameDB+".hdf5", "r") - fullLabels = datasetFile.get("Labels").value - fullLabelsDictionary = datasetFile.get("Labels").attrs["labelsDictionary"] - fullNbClass = datasetFile.get("Metadata").attrs["nbClass"] - if len(LABELS_NAMES)!=NB_CLASS: - LABELS_NAMES = [value for index, value in fullLabelsDictionary.iteritems() - if index in np.random.randint(fullNbClass, size=NB_CLASS)] - labelsDictionary = dict((classIndice, labelName) for (classIndice, labelName) - in fullLabelsDictionary.iteritems() if labelName in LABELS_NAMES) - if len(set(fullLabels))>NB_CLASS: - usedIndices = getPositions(labelsDictionary.keys(), fullLabels) - else: - usedIndices = range(len(fullLabels)) - tempDatasetFile = datasetFile = h5py.File(pathF+nameDB+"_temp.hdf5", "w") - for viewIndex, view in enumerate(views): - viewMatrix = datasetFile.get("View"+str(viewIndex)).value[:, usedIndices] - viewDset = tempDatasetFile.create_dataset("View"+str(viewIndex), viewMatrix.shape, data=viewMatrix) - viewDset.attrs["name"] = view - - labelsDset = tempDatasetFile.create_dataset("Labels", fullLabels[usedIndices].shape, data=fullLabels[usedIndices]) - labelsDset.attrs["labelsDictionary"] = labelsDictionary - - metaDataGrp = tempDatasetFile.create_group("Metadata") - metaDataGrp.attrs["nbView"] = len(views) - metaDataGrp.attrs["nbClass"] = NB_CLASS - metaDataGrp.attrs["datasetLength"] = len(fullLabels[usedIndices]) - datasetFile.close() - tempDatasetFile.close() - datasetFile = h5py.File(pathF+nameDB+"_temp.hdf5", "r") + fullLabels = datasetFile.get("Labels") + labelsDictionary = dict((labelIndex, labelName) for labelIndex, labelName in + zip(fullLabels.attrs["labels_indices"], fullLabels.attrs["labels"])) + # #datasetFile.get("Labels").attrs["labelsDictionary"] + # + # fullNbClass = datasetFile.get("Metadata").attrs["nbClass"] + # if len(LABELS_NAMES)!=NB_CLASS: + # LABELS_NAMES = [value for index, value in fullLabelsDictionary.iteritems() + # if index in np.random.randint(fullNbClass, size=NB_CLASS)] + # usableLabels = [labelName for index, labelName in fullLabelsDictionary.iteritems() if labelName in LABELS_NAMES] + # labelsDictionary = dict((classIndex, labelName) for classIndex, labelName + # in enumerate(usableLabels)) + # if len(set(fullLabels))>NB_CLASS: + # usedIndices = getPositions(labelsDictionary.keys(), fullLabels) + # else: + # usedIndices = range(len(fullLabels)) + # tempDatasetFile = datasetFile = h5py.File(pathF+nameDB+"_temp.hdf5", "w") + # for viewIndex, view in enumerate(views): + # viewMatrix = datasetFile.get("View"+str(viewIndex)).value[:, usedIndices] + # viewDset = tempDatasetFile.create_dataset("View"+str(viewIndex), viewMatrix.shape, data=viewMatrix) + # viewDset.attrs["name"] = view + # + # labelsDset = tempDatasetFile.create_dataset("Labels", fullLabels[usedIndices].shape, data=fullLabels[usedIndices]) + # labelsDset.attrs["labelsDictionary"] = labelsDictionary + # + # metaDataGrp = tempDatasetFile.create_group("Metadata") + # metaDataGrp.attrs["nbView"] = len(views) + # metaDataGrp.attrs["nbClass"] = NB_CLASS + # metaDataGrp.attrs["datasetLength"] = len(fullLabels[usedIndices]) + # datasetFile.close() + # tempDatasetFile.close() + # datasetFile = h5py.File(pathF+nameDB+"_temp.hdf5", "r") return datasetFile, labelsDictionary diff --git a/Code/MonoMutliViewClassifiers/ResultAnalysis.py b/Code/MonoMutliViewClassifiers/ResultAnalysis.py index 694a63335a23a46e26c9f96653440e482cec99b6..3f9f90285f8e3e0cdb6194862a852917ddd77ec0 100644 --- a/Code/MonoMutliViewClassifiers/ResultAnalysis.py +++ b/Code/MonoMutliViewClassifiers/ResultAnalysis.py @@ -36,19 +36,26 @@ def genNamesFromRes(mono, multi): return names -def resultAnalysis(benchmark, results, name, times, metrics): +def resultAnalysis(benchmark, results, name, times, metrics, directory): mono, multi = results for metric in metrics: names = genNamesFromRes(mono, multi) nbResults = len(mono)+len(multi) - validationScores = [float(res[1][2][metric[0]][0]) for res in mono] - validationScores += [float(scores[metric[0]][0]) for a, b, scores, c in multi] - validationSTD = [float(res[1][2][metric[0]][2]) for res in mono] - validationSTD += [float(scores[metric[0]][2]) for a, b, scores, c in multi] - trainScores = [float(res[1][2][metric[0]][1]) for res in mono] - trainScores += [float(scores[metric[0]][1]) for a, b, scores, c in multi] - trainSTD = [float(res[1][2][metric[0]][3]) for res in mono] - trainSTD += [float(scores[metric[0]][3]) for a, b, scores, c in multi] + validationScores = [float(res[1][2][metric[0]][1]) for res in mono] + validationScores += [float(scores[metric[0]][1]) for a, b, scores, c in multi] + validationSTD = [float(res[1][2][metric[0]][3]) for res in mono] + validationSTD += [float(scores[metric[0]][3]) for a, b, scores, c in multi] + trainScores = [float(res[1][2][metric[0]][0]) for res in mono] + trainScores += [float(scores[metric[0]][0]) for a, b, scores, c in multi] + trainSTD = [float(res[1][2][metric[0]][2]) for res in mono] + trainSTD += [float(scores[metric[0]][2]) for a, b, scores, c in multi] + + validationScores = np.array(validationScores) + validationSTD = np.array(validationSTD) + trainScores = np.array(trainScores) + trainSTD = np.array(trainSTD) + names = np.array(names) + f = pylab.figure(figsize=(40, 30)) width = 0.35 # the width of the bars fig = plt.gcf() @@ -58,20 +65,27 @@ def resultAnalysis(benchmark, results, name, times, metrics): metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} + sorted_indices = np.argsort(validationScores) + validationScores = validationScores[sorted_indices] + validationSTD = validationSTD[sorted_indices] + trainScores = trainScores[sorted_indices] + trainSTD = trainSTD[sorted_indices] + names = names[sorted_indices] + ax.set_title(getattr(Metrics, metric[0]).getConfig(**metricKWARGS)+" on validation set for each classifier") rects = ax.bar(range(nbResults), validationScores, width, color="r", yerr=validationSTD) rect2 = ax.bar(np.arange(nbResults)+width, trainScores, width, color="0.7", yerr=trainSTD) autolabel(rects, ax) autolabel(rect2, ax) - ax.legend((rects[0], rect2[0]), ('Train', 'Test')) + ax.legend((rects[0], rect2[0]), ('Test', 'Train')) ax.set_xticks(np.arange(nbResults)+width) ax.set_xticklabels(names, rotation="vertical") - f.savefig("Results/"+time.strftime("%Y%m%d-%H%M%S")+"-"+name+"-"+metric[0]+".png") + f.savefig(directory+time.strftime("%Y%m%d-%H%M%S")+"-"+name+"-"+metric[0]+".png") logging.info("Extraction time : "+str(times[0])+"s, Monoview time : "+str(times[1])+"s, Multiview Time : "+str(times[2])+"s") -def analyzeLabels(labelsArrays, realLabels, results): +def analyzeLabels(labelsArrays, realLabels, results, directory): mono, multi = results classifiersNames = genNamesFromRes(mono, multi) nbClassifiers = len(classifiersNames) @@ -94,4 +108,4 @@ def analyzeLabels(labelsArrays, realLabels, results): plt.xticks(ticks, labels, rotation="vertical") cbar = fig.colorbar(cax, ticks=[0, 1]) cbar.ax.set_yticklabels(['Wrong', ' Right']) - fig.savefig("Results/"+time.strftime("%Y%m%d-%H%M%S")+"error_analysis.png") \ No newline at end of file + fig.savefig(directory+time.strftime("%Y%m%d-%H%M%S")+"-error_analysis.png") \ No newline at end of file diff --git a/Readme.md b/Readme.md index 901f721319933b09f8436f8c674ddd5db55fdd5a..680b2c6446134035fb454446bff2ba5a167af2a0 100644 --- a/Readme.md +++ b/Readme.md @@ -1,4 +1,4 @@ -# Benchmark de classification mono et multi-vue +# Mono- and Multi-view classification benchmark This project aims to be an easy-to use solution to run a prior benchmark on a dataset abd evaluate mono- and multi-view algorithms capacity to classify it correctly. @@ -42,4 +42,4 @@ Results will be stored in multiview-machine-learning-omis/Code/MonoMultiViewClas ## Authors -* **Baptiste BAUVIN** +* **Baptiste BAUVIN**