diff --git a/Code/MonoMultiViewClassifiers/ExecClassif.py b/Code/MonoMultiViewClassifiers/ExecClassif.py index b5360978e6060743ff25ca7362db294e919daf9d..2f31424de9b68aed992473be56f46bf45288d451 100644 --- a/Code/MonoMultiViewClassifiers/ExecClassif.py +++ b/Code/MonoMultiViewClassifiers/ExecClassif.py @@ -18,7 +18,7 @@ from . import MultiviewClassifiers from .Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore from .Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore from .utils import GetMultiviewDb as DB -from .ResultAnalysis import resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, getResults +from .ResultAnalysis import getResults #resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, from .utils import execution, Dataset, Multiclass from . import Metrics diff --git a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index 7da23e3e1063b1ccd6e4d85f6138ffaad4c8e31a..2cbce0a85eda5ccf0b27c2b50c57696bf0aaeb0c 100644 --- a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -52,7 +52,10 @@ def initTrainTest(X, Y, classificationIndices): trainIndices, testIndices, testIndicesMulticlass = classificationIndices X_train = extractSubset(X, trainIndices) X_test = extractSubset(X, testIndices) - X_test_multiclass = extractSubset(X, testIndicesMulticlass) + if testIndicesMulticlass != []: + X_test_multiclass = extractSubset(X, testIndicesMulticlass) + else: + X_test_multiclass = [] y_train = Y[trainIndices] y_test = Y[testIndices] return X_train, y_train, X_test, y_test, X_test_multiclass diff --git a/Code/MonoMultiViewClassifiers/ResultAnalysis.py b/Code/MonoMultiViewClassifiers/ResultAnalysis.py index c8a451db5edb05770d4053e0710e8d77981d3661..45d935f9cde8948e140cfb7d60d6035c1d9b8182 100644 --- a/Code/MonoMultiViewClassifiers/ResultAnalysis.py +++ b/Code/MonoMultiViewClassifiers/ResultAnalysis.py @@ -459,7 +459,7 @@ def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, cl def iterCmap(statsIter): cmapList = ["red", "0.0"] for i in range(statsIter): - cmapList.append(str((i+1)/statsIter)) + cmapList.append(str(float((i+1))/statsIter)) cmap = mpl.colors.ListedColormap(cmapList) bounds = [-100*statsIter-0.5, -0.5] for i in range(statsIter): @@ -658,14 +658,14 @@ def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName, nbExamples) else: biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) - analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName) + analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName, nbExamples) else: if nbMulticlass>1: biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics, classificationIndices, directories) else: - analyzeBiclass(results) + biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) diff --git a/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py index d51185e882167577d25bbcb1ca0f706232d995be..871f9bb3f56b58e5c50f06df86a8afbb9a2f8b5d 100644 --- a/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py +++ b/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py @@ -52,7 +52,7 @@ def makeMeNoisy(viewData, randomState, percentage=15): return noisyViewData -def getPlausibleDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, nbView=3, +def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", nbView=3, nbClass=2, datasetLength=347, randomStateInt=42): """Used to generate a plausible dataset to test the algorithms""" randomState = np.random.RandomState(randomStateInt) @@ -64,38 +64,65 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, nbView=3, if exc.errno != errno.EEXIST: raise datasetFile = h5py.File(pathF + "/Plausible.hdf5", "w") - firstBound = int(datasetLength / 3) - rest = datasetLength - 2*int(datasetLength / 3) - scndBound = 2*int(datasetLength / 3) - thrdBound = datasetLength - CLASS_LABELS = np.array([0 for _ in range(firstBound)] + [1 for _ in range(firstBound)] + [2 for _ in range(rest)]) - for viewIndex in range(nbView): - viewData = np.array([np.zeros(nbFeatures) for _ in range(firstBound)] + - [np.ones(nbFeatures)for _ in range(firstBound)] + - [np.ones(nbFeatures)+1 for _ in range(rest)]) - fakeOneIndices = randomState.randint(0, firstBound, int(datasetLength / 12)) - fakeTwoIndices = randomState.randint(firstBound, scndBound, int(datasetLength / 12)) - fakeZeroIndices = randomState.randint(scndBound, thrdBound, int(datasetLength / 12)) + if NB_CLASS == 2: + CLASS_LABELS = np.array([0 for _ in range(datasetLength/2)] + [1 for _ in range(datasetLength-datasetLength/2)]) + for viewIndex in range(nbView): + viewData = np.array([np.zeros(nbFeatures) for _ in range(datasetLength/2)] + + [np.ones(nbFeatures)for _ in range(datasetLength-datasetLength/2)]) + fakeOneIndices = randomState.randint(0, datasetLength/2, int(datasetLength / 12)) + fakeZeroIndices = randomState.randint(datasetLength/2, datasetLength-datasetLength/2, int(datasetLength / 12)) - viewData[fakeOneIndices] = np.ones((len(fakeOneIndices), nbFeatures)) - viewData[fakeZeroIndices] = np.zeros((len(fakeZeroIndices), nbFeatures)) - viewData[fakeTwoIndices] = np.ones((len(fakeTwoIndices), nbFeatures))+1 - viewData = makeMeNoisy(viewData, randomState) - viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewData.shape, data=viewData.astype(np.uint8)) - viewDset.attrs["name"] = "View" + str(viewIndex) - viewDset.attrs["sparse"] = False - labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) - labelsDset[...] = CLASS_LABELS - labelsDset.attrs["name"] = "Labels" - labelsDset.attrs["names"] = ["No".encode(), "Yes".encode(), "Maybe".encode()] - metaDataGrp = datasetFile.create_group("Metadata") - metaDataGrp.attrs["nbView"] = nbView - metaDataGrp.attrs["nbClass"] = 3 - metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) - datasetFile.close() - datasetFile = h5py.File(pathF + "Plausible.hdf5", "r") - LABELS_DICTIONARY = {0: "No", 1: "Yes", 2:"Maybe"} - return datasetFile, LABELS_DICTIONARY + viewData[fakeOneIndices] = np.ones((len(fakeOneIndices), nbFeatures)) + viewData[fakeZeroIndices] = np.zeros((len(fakeZeroIndices), nbFeatures)) + viewData = makeMeNoisy(viewData, randomState) + viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewData.shape, data=viewData.astype(np.uint8)) + viewDset.attrs["name"] = "View" + str(viewIndex) + viewDset.attrs["sparse"] = False + labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) + labelsDset[...] = CLASS_LABELS + labelsDset.attrs["name"] = "Labels" + labelsDset.attrs["names"] = ["No".encode(), "Yes".encode()] + metaDataGrp = datasetFile.create_group("Metadata") + metaDataGrp.attrs["nbView"] = nbView + metaDataGrp.attrs["nbClass"] = 2 + metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) + datasetFile.close() + datasetFile = h5py.File(pathF + "Plausible.hdf5", "r") + LABELS_DICTIONARY = {0: "No", 1: "Yes"} + return datasetFile, LABELS_DICTIONARY + elif NB_CLASS >= 3: + firstBound = int(datasetLength / 3) + rest = datasetLength - 2*int(datasetLength / 3) + scndBound = 2*int(datasetLength / 3) + thrdBound = datasetLength + CLASS_LABELS = np.array([0 for _ in range(firstBound)] + [1 for _ in range(firstBound)] + [2 for _ in range(rest)]) + for viewIndex in range(nbView): + viewData = np.array([np.zeros(nbFeatures) for _ in range(firstBound)] + + [np.ones(nbFeatures)for _ in range(firstBound)] + + [np.ones(nbFeatures)+1 for _ in range(rest)]) + fakeOneIndices = randomState.randint(0, firstBound, int(datasetLength / 12)) + fakeTwoIndices = randomState.randint(firstBound, scndBound, int(datasetLength / 12)) + fakeZeroIndices = randomState.randint(scndBound, thrdBound, int(datasetLength / 12)) + + viewData[fakeOneIndices] = np.ones((len(fakeOneIndices), nbFeatures)) + viewData[fakeZeroIndices] = np.zeros((len(fakeZeroIndices), nbFeatures)) + viewData[fakeTwoIndices] = np.ones((len(fakeTwoIndices), nbFeatures))+1 + viewData = makeMeNoisy(viewData, randomState) + viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewData.shape, data=viewData.astype(np.uint8)) + viewDset.attrs["name"] = "View" + str(viewIndex) + viewDset.attrs["sparse"] = False + labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) + labelsDset[...] = CLASS_LABELS + labelsDset.attrs["name"] = "Labels" + labelsDset.attrs["names"] = ["No".encode(), "Yes".encode(), "Maybe".encode()] + metaDataGrp = datasetFile.create_group("Metadata") + metaDataGrp.attrs["nbView"] = nbView + metaDataGrp.attrs["nbClass"] = 3 + metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) + datasetFile.close() + datasetFile = h5py.File(pathF + "Plausible.hdf5", "r") + LABELS_DICTIONARY = {0: "No", 1: "Yes", 2:"Maybe"} + return datasetFile, LABELS_DICTIONARY # def getFakeDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, randomState): diff --git a/Code/MonoMultiViewClassifiers/utils/Multiclass.py b/Code/MonoMultiViewClassifiers/utils/Multiclass.py index 89fd7bd1f038442067fed97f0b405f3abe43a0ee..1132dd2141363e8dabe35048c9d24d88d7dae986 100644 --- a/Code/MonoMultiViewClassifiers/utils/Multiclass.py +++ b/Code/MonoMultiViewClassifiers/utils/Multiclass.py @@ -6,7 +6,9 @@ def genMulticlassLabels(labels, multiclassMethod, classificationIndices): if multiclassMethod == "oneVersusOne": nbLabels = len(set(list(labels))) if nbLabels == 2: - classificationIndices = [[trainIndices, testIndices, []] for trainIndices, testIndices in classificationIndices] + classificationIndices = [[trainIndices for trainIndices, _ in classificationIndices], + [testIndices for _, testIndices in classificationIndices], + [[] for _ in classificationIndices]] return [labels], [(0,1)], [classificationIndices] else: combinations = itertools.combinations(np.arange(nbLabels), 2) diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py index 10906637ba20983c1548c6a4bec1aa9c172c3d10..206f2eea3c1cc366d3e27885d9231e74998ec031 100644 --- a/Code/MonoMultiViewClassifiers/utils/execution.py +++ b/Code/MonoMultiViewClassifiers/utils/execution.py @@ -264,7 +264,7 @@ def genKFolds(statsIter, nbFolds, statsIterRandomStates): foldsList.append(sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, random_state=randomState)) return foldsList else: - return sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, random_state=statsIterRandomStates) + return [sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, random_state=statsIterRandomStates)] def initViews(DATASET, args):