diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index 67b86d4033c9016fef4d18ee1a7c308a0b2da503..92653a2ff7b4c04e67019cbf070b0fc813d76996 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -70,7 +70,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa y_test_preds = [] for iterationStat in range(statsIter): # Calculate Train/Test data - logging.debug("Start:\t Determine Train/Test split") + logging.debug("Start:\t Determine Train/Test split"+" for iteration "+str(iterationStat+1)) testIndices = ClassifMonoView.splitDataset(Y, nbClass, learningRate, datasetLength) trainIndices = [i for i in range(datasetLength) if i not in testIndices] diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 8f2aa96e03e87957244ed5616964bff6d791e400..77769d796b0e1e1a5946630793d416368d9b2625 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -68,9 +68,10 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p classifierClass = getattr(classifierModule, CL_type) classifierGridSearch = getattr(classifierModule, "gridSearch_hdf5") analysisModule = getattr(classifierPackage, "analyzeResults") + for iterIndex in range(statsIter): - logging.info("Start:\t Determine validation split for ratio " + str(learningRate)) + logging.info("Start:\t Determine validation split for ratio " + str(learningRate)+" for iteration "+str(iterIndex+1)) validationIndices = DB.splitDataset(DATASET, learningRate, datasetLength) learningIndices = [index for index in range(datasetLength) if index not in validationIndices] classificationSetLength = len(learningIndices) @@ -88,23 +89,6 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p logging.info("Start:\t Learning with " + CL_type + " and " + str(len(kFolds)) + " folds") - - - - - - - if gridSearch: - logging.info("Start:\t Randomsearching best settings for monoview classifiers") - bestSettings, fusionConfig = classifierGridSearch(DATASET, viewsIndices, classificationKWARGS, learningIndices - , metric=metrics[0], nIter=nIter) - classificationKWARGS["classifiersConfigs"] = bestSettings - try: - classificationKWARGS["fusionMethodConfig"] = fusionConfig - except: - pass - logging.info("Done:\t Randomsearching best settings for monoview classifiers") - logging.info("Start:\t Classification") # Begin Classification @@ -119,6 +103,16 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p fold.sort() logging.info("\tStart:\t Fold number " + str(foldIdx + 1)) trainIndices = [index for index in range(datasetLength) if (index not in fold) and (index not in validationIndices)] + if gridSearch: + logging.info("Start:\t Randomsearching best settings for monoview classifiers") + bestSettings, fusionConfig = classifierGridSearch(DATASET, viewsIndices, classificationKWARGS, trainIndices + , metric=metrics[0], nIter=nIter) + classificationKWARGS["classifiersConfigs"] = bestSettings + try: + classificationKWARGS["fusionMethodConfig"] = fusionConfig + except: + pass + logging.info("Done:\t Randomsearching best settings for monoview classifiers") DATASET_LENGTH = len(trainIndices) classifier = classifierClass(NB_VIEW, DATASET_LENGTH, DATASET.get("Labels").value[trainIndices], NB_CORES=nbCores, **classificationKWARGS) @@ -127,7 +121,6 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p learningTime = time.time() - extractionTime - t_start kFoldLearningTimeIter.append(learningTime) - logging.info("\tStart: \t Classification") kFoldPredictedTrainLabelsIter.append(classifier.predict_hdf5(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices)) kFoldPredictedTestLabelsIter.append(classifier.predict_hdf5(DATASET, usedIndices=fold, viewsIndices=viewsIndices)) kFoldPredictedValidationLabelsIter.append(classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices)) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index 57888231b715addfed55322a7c20e3811cc8c810..6d9bc56403a6c4978bf731b0ac29aac4a7c05b0a 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -68,11 +68,15 @@ class SCMForLinear(LateFusionClassifier): usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if usedIndices: monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) + accus=[] for index, viewIndex in enumerate(viewsIndices): - monoviewDecisions[:, index] = self.monoviewClassifiers[index].predict( + monoviewDecision = self.monoviewClassifiers[index].predict( getV(DATASET, viewIndex, usedIndices)) - features = self.generateInteractions(monoviewDecisions, order=self.order) + accus.append(accuracy_score(DATASET.get("Labels").value[usedIndices], monoviewDecision)) + monoviewDecisions[:, index] = monoviewDecision + features = self.generateInteractions(monoviewDecisions) predictedLabels = self.SCMClassifier.predict(features) + print str(np.array([accuracy_score(DATASET.get("Labels").value[usedIndices], predictedLabels)>acc for acc in accus]).all()), len(self.SCMClassifier.model) else: predictedLabels = [] return predictedLabels @@ -91,8 +95,10 @@ class SCMForLinear(LateFusionClassifier): for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( getV(DATASET, viewIndex, usedIndices)) - features = self.generateInteractions(monoViewDecisions, order=self.order) - featureSequence = [str(featureIndex) for featureIndex in range(features.shape[1])] + features = self.generateInteractions(monoViewDecisions) + featureSequence=[str(index) for index in range(nbView)] + for orderIndex in range(self.order-1): + featureSequence += [str(featureIndex) for featureIndex in itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex+2)] featureIndexByRule = np.arange(features.shape[1], dtype=np.uint32) binaryAttributes = LazyBaptisteRuleList(featureSequence, featureIndexByRule) packedData = _pack_binary_bytes_to_ints(features, 64) @@ -124,14 +130,14 @@ class SCMForLinear(LateFusionClassifier): except: pass - def generateInteractions(self, monoViewDecisions, order=None): - if type(order)==type(None): + def generateInteractions(self, monoViewDecisions): + if type(self.order)==type(None): order = monoViewDecisions.shape[1] genratedIntercations = [monoViewDecisions[:,i] for i in range(monoViewDecisions.shape[1])] - if order==1: + if self.order==1: return monoViewDecisions else: - for orderIndex in range(order-1): + for orderIndex in range(self.order-1): combins = itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex+2) for combin in combins: generatedDecision = monoViewDecisions[:,combin[0]] diff --git a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py index 9d1ec462239af1467ac4907a10fff5b2e29c764a..34257b61b5ac1fd3f7755091e6d6ab6d50777d54 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py +++ b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py @@ -13,18 +13,33 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def getPlausibleDBhdf5(features, pathF, name , NB_CLASS, LABELS_NAME, nbView=4, nbClass=2, datasetLength=500): +def makeMeNoisy(viewData, percentage=5): + viewData = viewData.astype(bool) + nbNoisyCoord = int(percentage/100.0*viewData.shape[0]*viewData.shape[1]) + rows = range(viewData.shape[0]) + cols = range(viewData.shape[1]) + for _ in range(nbNoisyCoord): + rowIdx = random.choice(rows) + colIdx = random.choice(cols) + viewData[rowIdx, colIdx] = not viewData[rowIdx, colIdx] + noisyViewData = viewData.astype(np.uint8) + return noisyViewData + + +def getPlausibleDBhdf5(features, pathF, name , NB_CLASS, LABELS_NAME, nbView=10, nbClass=2, datasetLength=500): nbFeatures = 150 datasetFile = h5py.File(pathF+"Plausible.hdf5", "w") CLASS_LABELS = np.array([0 for i in range(datasetLength/2)]+[1 for i in range(datasetLength/2)]) for viewIndex in range(nbView): # if viewIndex== 0 : - viewData = np.array([np.zeros(nbFeatures) for i in range(datasetLength/2)]+[np.ones(nbFeatures) for i in range(datasetLength/2)]).astype(np.uint8) - fakeTrueIndices = np.random.randint(0, datasetLength/2-1, datasetLength/5) - fakeFalseIndices = np.random.randint(datasetLength/2, datasetLength, datasetLength/5) + viewData = np.array([np.zeros(nbFeatures) for i in range(datasetLength/2)]+[np.ones(nbFeatures) for i in range(datasetLength/2)]) + fakeTrueIndices = np.random.randint(0, datasetLength/2-1, datasetLength/10) + fakeFalseIndices = np.random.randint(datasetLength/2, datasetLength, datasetLength/10) + viewData[fakeTrueIndices] = np.ones((len(fakeTrueIndices), nbFeatures)) viewData[fakeFalseIndices] = np.zeros((len(fakeFalseIndices), nbFeatures)) - viewDset = datasetFile.create_dataset("View"+str(viewIndex), viewData.shape, data=viewData) + viewData = makeMeNoisy(viewData) + viewDset = datasetFile.create_dataset("View"+str(viewIndex), viewData.shape, data=viewData.astype(np.uint8)) viewDset.attrs["name"] = "View"+str(viewIndex) viewDset.attrs["sparse"] = False viewDset.attrs["binary"] = True @@ -49,7 +64,7 @@ def getPlausibleDBhdf5(features, pathF, name , NB_CLASS, LABELS_NAME, nbView=4, def getFakeDBhdf5(features, pathF, name , NB_CLASS, LABELS_NAME): NB_VIEW = 4 - DATASET_LENGTH = 300 + DATASET_LENGTH = 30 NB_CLASS = 2 VIEW_DIMENSIONS = np.random.random_integers(5, 20, NB_VIEW) diff --git a/Code/MonoMutliViewClassifiers/ResultAnalysis.py b/Code/MonoMutliViewClassifiers/ResultAnalysis.py index 951c9f968b7b191dd8883bc4861c2abde8f6b615..1f80cf1ef4022c4828f123a845fb03d0e48aedfa 100644 --- a/Code/MonoMutliViewClassifiers/ResultAnalysis.py +++ b/Code/MonoMutliViewClassifiers/ResultAnalysis.py @@ -48,7 +48,7 @@ def resultAnalysis(benchmark, results, name, times, metrics): metricKWARGS = {} ax.set_title(getattr(Metrics, metric[0]).getConfig(**metricKWARGS)+" on validation set for each classifier") rects = ax.bar(range(nbResults), validationScores, width, color="r") - rect2 = ax.bar(np.arange(nbResults)+width, trainScores, width, color="0.3") + rect2 = ax.bar(np.arange(nbResults)+width, trainScores, width, color="0.7") autolabel(rects, ax) autolabel(rect2, ax) ax.legend((rects[0], rect2[0]), ('Train', 'Test')) diff --git a/Code/MonoMutliViewClassifiers/Versions.py b/Code/MonoMutliViewClassifiers/Versions.py index ccff8b4a2c4bad8d7dc47c9326e43081cf757b4c..3f0038a3b22b70ba489a9405f188b1fd143095d8 100644 --- a/Code/MonoMutliViewClassifiers/Versions.py +++ b/Code/MonoMutliViewClassifiers/Versions.py @@ -21,6 +21,11 @@ def testVersions(): except: print "Please install Python 2.7" raise + try: + import pyscm + except: + print "Please install pyscm" + raise try: import cv2