From ff0b76cdf57c217d92952c4503eb4429c67995e2 Mon Sep 17 00:00:00 2001 From: bbauvin <baptiste.bauvin@centrale-marseille.fr> Date: Thu, 12 Oct 2017 14:44:39 -0400 Subject: [PATCH] Refactored project --- Code/MonoMutliViewClassifiers/ExecClassif.py | 101 +++--- .../Metrics/accuracy_score.py | 14 +- .../Metrics/f1_score.py | 23 +- .../Metrics/fbeta_score.py | 32 +- .../Metrics/hamming_loss.py | 14 +- .../Metrics/jaccard_similarity_score.py | 8 +- .../Metrics/log_loss.py | 11 +- .../Metrics/matthews_corrcoef.py | 7 +- .../Metrics/precision_score.py | 23 +- .../Metrics/recall_score.py | 23 +- .../Metrics/roc_auc_score.py | 15 +- .../Metrics/zero_one_loss.py | 15 +- .../Monoview/ExecClassifMonoView.py | 129 ++++---- .../Monoview/ExecPlot.py | 62 ++-- .../Monoview/ExportResults.py | 16 +- .../Monoview/MonoviewUtils.py | 105 ++++--- .../Monoview/__init__.py | 2 +- .../Monoview/analyzeResult.py | 36 ++- Code/MonoMutliViewClassifiers/Monoview/run.py | 6 +- .../MonoviewClassifiers/Adaboost.py | 52 +--- .../MonoviewClassifiers/DecisionTree.py | 26 +- .../MonoviewClassifiers/KNN.py | 31 +- .../MonoviewClassifiers/RandomForest.py | 20 +- .../MonoviewClassifiers/SCM.py | 80 ++--- .../MonoviewClassifiers/SGD.py | 24 +- .../MonoviewClassifiers/SVMLinear.py | 22 +- .../MonoviewClassifiers/SVMPoly.py | 21 +- .../MonoviewClassifiers/SVMRBF.py | 22 +- .../Multiview/ExecMultiview.py | 35 ++- .../Multiview/Fusion/Fusion.py | 69 +++-- .../Multiview/Fusion/Methods/EarlyFusion.py | 19 +- .../EarlyFusionPackage/WeightedLinear.py | 31 +- .../Methods/EarlyFusionPackage/__init__.py | 3 +- .../Multiview/Fusion/Methods/LateFusion.py | 93 ++---- .../LateFusionPackage/BayesianInference.py | 41 +-- .../LateFusionPackage/MajorityVoting.py | 33 +- .../Methods/LateFusionPackage/SCMForLinear.py | 110 ++++--- .../Methods/LateFusionPackage/SVMForLinear.py | 26 +- .../LateFusionPackage/WeightedLinear.py | 30 +- .../Methods/LateFusionPackage/__init__.py | 3 +- .../Multiview/Fusion/Methods/__init__.py | 3 +- .../Multiview/Fusion/__init__.py | 3 +- .../Multiview/Fusion/analyzeResults.py | 41 +-- .../Mumbo/Classifiers/DecisionTree.py | 35 ++- .../Multiview/Mumbo/Classifiers/Kover.py | 2 +- .../Mumbo/Classifiers/ModifiedMulticlass.py | 24 +- .../Mumbo/Classifiers/SubSampling.py | 5 +- .../Multiview/Mumbo/Mumbo.py | 71 +++-- .../Multiview/Mumbo/__init__.py | 3 +- .../Multiview/Mumbo/analyzeResults.py | 99 +++--- .../MonoMutliViewClassifiers/Multiview/run.py | 12 +- .../ResultAnalysis.py | 60 ++-- Code/MonoMutliViewClassifiers/Versions.py | 11 +- .../MonoMutliViewClassifiers/utils/Dataset.py | 38 ++- .../utils/HyperParameterSearch.py | 290 +++++++++--------- .../utils/Transformations.py | 2 +- .../utils/__init__.py | 2 +- 57 files changed, 1120 insertions(+), 1014 deletions(-) diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 6b5d3781..3d9c6efd 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -217,13 +217,15 @@ def lateFusionSetArgs(views, viewsIndices, classes, method, return arguments -def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices): +def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, + resultsMonoview, classificationIndices): multiviewArguments = [] if "Multiview" in benchmark: for multiviewAlgoName in benchmark["Multiview"]: multiviewPackage = getattr(Multiview, multiviewAlgoName) mutliviewModule = getattr(multiviewPackage, multiviewAlgoName) - multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices) + multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState, directory, + resultsMonoview, classificationIndices) argumentDictionaries["Multiview"] = multiviewArguments return argumentDictionaries @@ -243,7 +245,8 @@ def genSplits(statsIter, indices, DATASET, splitRatio, statsIterRandomStates): if statsIter > 1: splits = [] for randomState in statsIterRandomStates: - trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices, DATASET.get("Labels").value, + trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices, + DATASET.get("Labels").value, test_size=splitRatio, random_state=randomState) splits.append([trainIndices, testIndices]) @@ -266,35 +269,38 @@ def genKFolds(statsIter, nbFolds, statsIterRandomStates): def genDirecortiesNames(directory, statsIter): - if statsIter>1: + if statsIter > 1: directories = [] for i in range(statsIter): - directories.append(directory+"iter_"+str(i+1)+"/") + directories.append(directory + "iter_" + str(i + 1) + "/") return directories else: return directory -def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, kFolds, - randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start, benchmark, +def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, + kFolds, + randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start, + benchmark, views): resultsMonoview = [] labelsNames = LABELS_DICTIONARY.values() resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, randomState, - hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter, - **arguments) - for arguments in argumentDictionaries["Monoview"]] + coreIndex, args.type, args.pathF, randomState, + hyperParamSearch=hyperParamSearch, + metrics=metrics, nIter=args.CL_GS_iter, + **arguments) + for arguments in argumentDictionaries["Monoview"]] monoviewTime = time.time() - dataBaseTime - start - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices) + argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, + randomState, directory, resultsMonoview, classificationIndices) resultsMultiview = [] resultsMultiview += [ ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter,**arguments) + metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Multiview"]] multiviewTime = time.time() - monoviewTime - dataBaseTime - start @@ -310,11 +316,11 @@ def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, logging.debug("Done:\t Analyze Global Results for iteration") globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime totalTime = time.time() - start - logging.info("Extraction time : "+str(dataBaseTime)+ - "s, Monoview time : "+str(monoviewTime)+ - "s, Multiview Time : "+str(multiviewTime)+ - "s, Global Analysis Time : "+str(globalAnalysisTime)+ - "s, Total Duration : "+str(totalTime)+"s") + logging.info("Extraction time : " + str(dataBaseTime) + + "s, Monoview time : " + str(monoviewTime) + + "s, Multiview Time : " + str(multiviewTime) + + "s, Global Analysis Time : " + str(globalAnalysisTime) + + "s, Total Duration : " + str(totalTime) + "s") return results @@ -343,14 +349,16 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, for arguments in argumentDictionaries["Monoview"]]) monoviewTime = time.time() - dataBaseTime - start - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices) + argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, + randomState, directory, resultsMonoview, classificationIndices) resultsMultiview = [] if nbCores > 1: nbExperiments = len(argumentDictionaries["Multiview"]) for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): resultsMultiview += Parallel(n_jobs=nbCores)( - delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, + delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, + args.type, args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, @@ -379,11 +387,11 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, logging.debug("Done:\t Analyze Global Results") globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime totalTime = time.time() - start - logging.info("Extraction time : "+str(dataBaseTime)+ - "s, Monoview time : "+str(monoviewTime)+ - "s, Multiview Time : "+str(multiviewTime)+ - "s, Global Analysis Time : "+str(globalAnalysisTime)+ - "s, Total Duration : "+str(totalTime)+"s") + logging.info("Extraction time : " + str(dataBaseTime) + + "s, Monoview time : " + str(monoviewTime) + + "s, Multiview Time : " + str(multiviewTime) + + "s, Global Analysis Time : " + str(globalAnalysisTime) + + "s, Total Duration : " + str(totalTime) + "s") return results @@ -398,7 +406,7 @@ def initRandomState(randomStateArg, directory): fileName = randomStateArg with open(fileName, 'rb') as handle: randomState = cPickle.load(handle) - with open(directory+"randomState.pickle", "wb") as handle: + with open(directory + "randomState.pickle", "wb") as handle: cPickle.dump(randomState, handle) return randomState @@ -448,7 +456,8 @@ groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store' groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, default=2) groupClass.add_argument('--CL_statsiter', metavar='INT', action='store', - help="Number of iteration for each algorithm to mean results if using multiple cores, it's highly recommended to use statsiter mod(nbCores) = 0", type=int, + help="Number of iteration for each algorithm to mean results if using multiple cores, it's highly recommended to use statsiter mod(nbCores) = 0", + type=int, default=2) groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+", help='Determine which metrics to use, separate metric and configuration with ":".' @@ -589,7 +598,6 @@ if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake", "Plaus else: getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:]) - DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nb_class, args.CL_classes) @@ -622,7 +630,6 @@ logging.info("Start:\t Finding all available mono- & multiview algorithms") benchmark = initBenchmark(args) - initKWARGS = initKWARGS(args, benchmark) dataBaseTime = time.time() - start @@ -632,24 +639,29 @@ argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, vi initKWARGS) directories = genDirecortiesNames(directory, statsIter) -if statsIter>1: +if statsIter > 1: for statIterIndex in range(statsIter): - if not os.path.exists(os.path.dirname(directories[statIterIndex]+"train_labels.csv")): + if not os.path.exists(os.path.dirname(directories[statIterIndex] + "train_labels.csv")): try: - os.makedirs(os.path.dirname(directories[statIterIndex]+"train_labels.csv")) + os.makedirs(os.path.dirname(directories[statIterIndex] + "train_labels.csv")) except OSError as exc: if exc.errno != errno.EEXIST: raise trainIndices, testIndices = classificationIndices[statIterIndex] trainLabels = DATASET.get("Labels").value[trainIndices] - np.savetxt(directories[statIterIndex]+"train_labels.csv", trainLabels, delimiter=",") + np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",") if nbCores > 1: iterResults = [] nbExperiments = statsIter for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): iterResults += (Parallel(n_jobs=nbCores)( - delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1, directories[coreIndex + stepIndex * nbCores], args, classificationIndices[coreIndex + stepIndex * nbCores], kFolds[coreIndex + stepIndex * nbCores], - statsIterRandomStates[coreIndex + stepIndex * nbCores], hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start, benchmark, + delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1, + directories[coreIndex + stepIndex * nbCores], args, + classificationIndices[coreIndex + stepIndex * nbCores], + kFolds[coreIndex + stepIndex * nbCores], + statsIterRandomStates[coreIndex + stepIndex * nbCores], + hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, + start, benchmark, views) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") @@ -658,15 +670,18 @@ if statsIter>1: else: iterResults = [] for iterIndex in range(statsIter): - iterResults.append(classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args, - classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex], - hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark, views)) + iterResults.append( + classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args, + classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex], + hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark, + views)) analyzeIterResults(iterResults, args.name, metrics, directory) else: - res = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, kFolds, - statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, - benchmark, views) + res = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, + kFolds, + statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, + benchmark, views) if statsIter > 1: - pass \ No newline at end of file + pass diff --git a/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py b/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py index fdc97a35..a51d11b7 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/accuracy_score.py @@ -7,8 +7,8 @@ from sklearn.metrics import accuracy_score as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): @@ -24,7 +24,7 @@ def score(y_true, y_pred, **kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None score = metric(y_true, y_pred, sample_weight=sample_weight) return score @@ -38,7 +38,7 @@ def get_scorer(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight) @@ -46,6 +46,6 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None - configString = "Accuracy score using "+str(sample_weight)+" as sample_weights (higher is better)" - return configString \ No newline at end of file + sample_weight = None + configString = "Accuracy score using " + str(sample_weight) + " as sample_weights (higher is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/f1_score.py b/Code/MonoMutliViewClassifiers/Metrics/f1_score.py index 842a0316..ab33833e 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/f1_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/f1_score.py @@ -7,19 +7,19 @@ from sklearn.metrics import f1_score as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -36,11 +36,11 @@ def get_scorer(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -57,11 +57,11 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -70,6 +70,7 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "binary" - configString = "F1 score using "+str(sample_weight)+" as sample_weights, "+str(labels)+" as labels, "+str(pos_label)\ - +" as pos_label, "+average+" as average (higher is better)" - return configString \ No newline at end of file + configString = "F1 score using " + str(sample_weight) + " as sample_weights, " + str(labels) + " as labels, " + str( + pos_label) \ + + " as pos_label, " + average + " as average (higher is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py b/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py index 0ef83fb7..e83beabd 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/fbeta_score.py @@ -2,23 +2,23 @@ from sklearn.metrics import fbeta_score as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: beta = kwargs["1"] except: - beta=1.0 + beta = 1.0 try: labels = kwargs["2"] except: - labels=None + labels = None try: pos_label = kwargs["3"] except: @@ -27,7 +27,8 @@ def score(y_true, y_pred, **kwargs): average = kwargs["4"] except: average = "binary" - score = metric(y_true, y_pred, beta, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + score = metric(y_true, y_pred, beta, sample_weight=sample_weight, labels=labels, pos_label=pos_label, + average=average) return score @@ -35,15 +36,15 @@ def get_scorer(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: beta = kwargs["1"] except: - beta=1.0 + beta = 1.0 try: labels = kwargs["2"] except: - labels=None + labels = None try: pos_label = kwargs["3"] except: @@ -60,15 +61,15 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: beta = kwargs["1"] except: - beta=1.0 + beta = 1.0 try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -77,6 +78,7 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "binary" - configString = "F-beta score using "+str(sample_weight)+" as sample_weights, "+str(labels)+" as labels, "+str(pos_label) \ - +" as pos_label, "+average+" as average, "+str(beta)+" as beta (higher is better)" - return configString \ No newline at end of file + configString = "F-beta score using " + str(sample_weight) + " as sample_weights, " + str( + labels) + " as labels, " + str(pos_label) \ + + " as pos_label, " + average + " as average, " + str(beta) + " as beta (higher is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py b/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py index 2ffe3cf1..2ad6e26c 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py +++ b/Code/MonoMutliViewClassifiers/Metrics/hamming_loss.py @@ -2,15 +2,15 @@ from sklearn.metrics import hamming_loss as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): try: classes = kwargs["0"] except: - classes=None + classes = None score = metric(y_true, y_pred, classes=classes) return score @@ -19,7 +19,7 @@ def get_scorer(**kwargs): try: classes = kwargs["0"] except: - classes=None + classes = None return make_scorer(metric, greater_is_better=False, classes=classes) @@ -27,6 +27,6 @@ def getConfig(**kwargs): try: classes = kwargs["0"] except: - classes=None - configString = "Hamming loss using "+str(classes)+" as classes (lower is better)" - return configString \ No newline at end of file + classes = None + configString = "Hamming loss using " + str(classes) + " as classes (lower is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py b/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py index ccd6cbcb..fff71707 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/jaccard_similarity_score.py @@ -2,8 +2,8 @@ from sklearn.metrics import jaccard_similarity_score as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): @@ -27,6 +27,6 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None - configString = "Jaccard similarity score using "+str(sample_weight)+" as sample_weights (higher is better)" + sample_weight = None + configString = "Jaccard similarity score using " + str(sample_weight) + " as sample_weights (higher is better)" return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/log_loss.py b/Code/MonoMutliViewClassifiers/Metrics/log_loss.py index ef4da573..7a748037 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/log_loss.py +++ b/Code/MonoMutliViewClassifiers/Metrics/log_loss.py @@ -2,8 +2,8 @@ from sklearn.metrics import log_loss as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): @@ -35,10 +35,11 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: eps = kwargs["1"] except: eps = 1e-15 - configString = "Log loss using "+str(sample_weight)+" as sample_weights, "+str(eps)+" as eps (lower is better)" - return configString \ No newline at end of file + configString = "Log loss using " + str(sample_weight) + " as sample_weights, " + str( + eps) + " as eps (lower is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py b/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py index 7c473403..3f077474 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py +++ b/Code/MonoMutliViewClassifiers/Metrics/matthews_corrcoef.py @@ -2,8 +2,8 @@ from sklearn.metrics import matthews_corrcoef as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): @@ -14,6 +14,7 @@ def score(y_true, y_pred, **kwargs): def get_scorer(**kwargs): return make_scorer(metric, greater_is_better=True) + def getConfig(**kwargs): configString = "Matthews correlation coefficient (higher is better)" - return configString \ No newline at end of file + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/precision_score.py b/Code/MonoMutliViewClassifiers/Metrics/precision_score.py index 0dffcf35..49620fb5 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/precision_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/precision_score.py @@ -2,19 +2,19 @@ from sklearn.metrics import precision_score as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -31,11 +31,11 @@ def get_scorer(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -52,11 +52,11 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -65,6 +65,7 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "binary" - configString = "Precision score using "+str(sample_weight)+" as sample_weights, "+str(labels)+" as labels, "+str(pos_label) \ - +" as pos_label, "+average+" as average (higher is better)" - return configString \ No newline at end of file + configString = "Precision score using " + str(sample_weight) + " as sample_weights, " + str( + labels) + " as labels, " + str(pos_label) \ + + " as pos_label, " + average + " as average (higher is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/recall_score.py b/Code/MonoMutliViewClassifiers/Metrics/recall_score.py index f2941bf4..ad657812 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/recall_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/recall_score.py @@ -2,19 +2,19 @@ from sklearn.metrics import recall_score as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -31,11 +31,11 @@ def get_scorer(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -52,11 +52,11 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: labels = kwargs["1"] except: - labels=None + labels = None try: pos_label = kwargs["2"] except: @@ -65,6 +65,7 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "binary" - configString = "Recall score using "+str(sample_weight)+" as sample_weights, "+str(labels)+" as labels, "+str(pos_label) \ - +" as pos_label, "+average+" as average (higher is better)" - return configString \ No newline at end of file + configString = "Recall score using " + str(sample_weight) + " as sample_weights, " + str( + labels) + " as labels, " + str(pos_label) \ + + " as pos_label, " + average + " as average (higher is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py b/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py index 7beda9f4..2847252d 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py +++ b/Code/MonoMutliViewClassifiers/Metrics/roc_auc_score.py @@ -2,15 +2,15 @@ from sklearn.metrics import roc_auc_score as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, **kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: average = kwargs["1"] except: @@ -23,7 +23,7 @@ def get_scorer(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: average = kwargs["1"] except: @@ -35,10 +35,11 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None try: average = kwargs["3"] except: average = "micro" - configString = "ROC AUC score using "+str(sample_weight)+" as sample_weights, "+average+" as average (higher is better)" - return configString \ No newline at end of file + configString = "ROC AUC score using " + str( + sample_weight) + " as sample_weights, " + average + " as average (higher is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py b/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py index 79388de3..ea0b6478 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py +++ b/Code/MonoMutliViewClassifiers/Metrics/zero_one_loss.py @@ -2,14 +2,15 @@ from sklearn.metrics import zero_one_loss as metric from sklearn.metrics import make_scorer # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + def score(y_true, y_pred, **kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None score = metric(y_true, y_pred, sample_weight=sample_weight) return score @@ -18,7 +19,7 @@ def get_scorer(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None + sample_weight = None return make_scorer(metric, greater_is_better=False, sample_weight=sample_weight) @@ -26,6 +27,6 @@ def getConfig(**kwargs): try: sample_weight = kwargs["0"] except: - sample_weight=None - configString = "Zero one loss using "+str(sample_weight)+" as sample_weights (lower is better)" - return configString \ No newline at end of file + sample_weight = None + configString = "Zero one loss using " + str(sample_weight) + " as sample_weights (lower is better)" + return configString diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index 42820970..1580d808 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -3,46 +3,50 @@ """ Execution: Script to perform a MonoView classification """ # Import built-in modules -import argparse # for command line arguments -import datetime # for TimeStamp in CSVFile -import os # to geth path of the running script -import time # for time calculations +import argparse # for command line arguments +import datetime # for TimeStamp in CSVFile +import os # to geth path of the running script +import time # for time calculations import operator import errno # Import 3rd party modules -import numpy as np # for reading CSV-files and Series -import logging # To create Log-Files -from sklearn import metrics # For stastics on classification +import numpy as np # for reading CSV-files and Series +import logging # To create Log-Files +from sklearn import metrics # For stastics on classification import h5py # Import own modules -import MonoviewUtils # Functions for classification -import ExportResults # Functions to render results +import MonoviewUtils # Functions for classification +import ExportResults # Functions to render results import MonoviewClassifiers import Metrics from analyzeResult import execute from utils.Dataset import getV, getValue, extractSubset # Author-Info -__author__ = "Nikolas Huelsmann, Baptiste BAUVIN" -__status__ = "Prototype" # Production, Development, Prototype -__date__ = 2016-03-25 +__author__ = "Nikolas Huelsmann, Baptiste BAUVIN" +__status__ = "Prototype" # Production, Development, Prototype +__date__ = 2016 - 03 - 25 -def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, path, randomState, hyperParamSearch="randomizedSearch", +def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, + path, randomState, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): - DATASET = h5py.File(path+name+str(datasetFileIndex)+".hdf5", "r") + DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r") kwargs = args["args"] - views = [DATASET.get("View"+str(viewIndex)).attrs["name"] for viewIndex in range(DATASET.get("Metadata").attrs["nbView"])] + views = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in + range(DATASET.get("Metadata").attrs["nbView"])] neededViewIndex = views.index(kwargs["feat"]) - X = DATASET.get("View"+str(neededViewIndex)) + X = DATASET.get("View" + str(neededViewIndex)) Y = DATASET.get("Labels").value - return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path, randomState, hyperParamSearch=hyperParamSearch, + return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path, + randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **args) -def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, randomState, hyperParamSearch="randomizedSearch", +def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, + randomState, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): logging.debug("Start:\t Loading data") try: @@ -53,11 +57,11 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol feat = X.attrs["name"] CL_type = kwargs["CL_type"] X = getValue(X) - learningRate = len(classificationIndices[0])/(len(classificationIndices[0])+len(classificationIndices[1])) + learningRate = len(classificationIndices[0]) / (len(classificationIndices[0]) + len(classificationIndices[1])) labelsString = "-".join(labelsNames) timestr = time.strftime("%Y%m%d-%H%M%S") CL_type_string = CL_type - outputFileName = directory + "/"+CL_type_string+"/"+"/"+feat+"/"+timestr +"Results-" + CL_type_string + "-" + labelsString + \ + outputFileName = directory + "/" + CL_type_string + "/" + "/" + feat + "/" + timestr + "Results-" + CL_type_string + "-" + labelsString + \ '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-" if not os.path.exists(os.path.dirname(outputFileName)): try: @@ -87,13 +91,13 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol if hyperParamSearch != "None": classifierHPSearch = getattr(classifierModule, hyperParamSearch) - logging.debug("Start:\t RandomSearch best settings with "+str(nIter)+" iterations for "+CL_type) + logging.debug("Start:\t RandomSearch best settings with " + str(nIter) + " iterations for " + CL_type) cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores, metric=metrics[0], nIter=nIter) clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc)) logging.debug("Done:\t RandomSearch best settings") else: - clKWARGS = kwargs[kwargs["CL_type"]+"KWARGS"] + clKWARGS = kwargs[kwargs["CL_type"] + "KWARGS"] logging.debug("Start:\t Training") cl_res = classifierModule.fit(X_train, y_train, randomState, NB_CORES=nbCores, **clKWARGS) logging.debug("Done:\t Training") @@ -110,9 +114,11 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.debug("Start:\t Getting Results") - stringAnalysis, imagesAnalysis, metricsScores = execute(name, classificationIndices, KFolds, nbCores, hyperParamSearch, metrics, nIter, feat, CL_type, + stringAnalysis, imagesAnalysis, metricsScores = execute(name, classificationIndices, KFolds, nbCores, + hyperParamSearch, metrics, nIter, feat, CL_type, clKWARGS, labelsNames, X.shape, - y_train, y_train_pred, y_test, y_test_pred, t_end, randomState) + y_train, y_train_pred, y_test, y_test_pred, t_end, + randomState) cl_desc = [value for key, value in sorted(clKWARGS.iteritems())] logging.debug("Done:\t Getting Results") logging.info(stringAnalysis) @@ -126,16 +132,16 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol outputTextFile = open(outputFileName + '.txt', 'w') outputTextFile.write(stringAnalysis) outputTextFile.close() - np.savetxt(outputFileName+"full_pred.csv", full_labels.astype(np.int16), delimiter=",") - np.savetxt(outputFileName+"train_pred.csv", y_train_pred.astype(np.int16), delimiter=",") - np.savetxt(outputFileName+"train_labels.csv", y_train.astype(np.int16), delimiter=",") + np.savetxt(outputFileName + "full_pred.csv", full_labels.astype(np.int16), delimiter=",") + np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",") + np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",") if imagesAnalysis is not None: for imageName in imagesAnalysis: if os.path.isfile(outputFileName + imageName + ".png"): - for i in range(1,20): + for i in range(1, 20): testFileName = outputFileName + imageName + "-" + str(i) + ".png" - if os.path.isfile(testFileName ) != True: + if not os.path.isfile(testFileName): imagesAnalysis[imageName].savefig(testFileName) break @@ -143,9 +149,10 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.info("Done:\t Result Analysis") viewIndex = args["viewIndex"] - return viewIndex, [CL_type, cl_desc+[feat], metricsScores, full_labels, clKWARGS] + return viewIndex, [CL_type, cl_desc + [feat], metricsScores, full_labels, clKWARGS] -if __name__=='__main__': + +if __name__ == '__main__': parser = argparse.ArgumentParser( description='This methods permits to execute a multiclass classification with one single view. At this point the used classifier is a RandomForest. The GridSearch permits to vary the number of trees and CrossValidation with k-folds. The result will be a plot of the score per class and a CSV with the best classifier found by the GridSearch.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -153,25 +160,36 @@ if __name__=='__main__': groupStandard = parser.add_argument_group('Standard arguments') groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5") - groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)', default='DB') - groupStandard.add_argument('--feat', metavar='STRING', action='store', help='Name of Feature for Classification (default: %(default)s)', default='RGB') - groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') - groupStandard.add_argument('--fileCL', metavar='STRING', action='store', help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv') - groupStandard.add_argument('--fileCLD', metavar='STRING', action='store', help='Name of classLabels-Description CSV-file (default: %(default)s)', default='classLabels-Description.csv') - groupStandard.add_argument('--fileFeat', metavar='STRING', action='store', help='Name of feature CSV-file (default: %(default)s)', default='feature.csv') - + groupStandard.add_argument('--name', metavar='STRING', action='store', + help='Name of Database (default: %(default)s)', default='DB') + groupStandard.add_argument('--feat', metavar='STRING', action='store', + help='Name of Feature for Classification (default: %(default)s)', default='RGB') + groupStandard.add_argument('--pathF', metavar='STRING', action='store', + help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') + groupStandard.add_argument('--fileCL', metavar='STRING', action='store', + help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv') + groupStandard.add_argument('--fileCLD', metavar='STRING', action='store', + help='Name of classLabels-Description CSV-file (default: %(default)s)', + default='classLabels-Description.csv') + groupStandard.add_argument('--fileFeat', metavar='STRING', action='store', + help='Name of feature CSV-file (default: %(default)s)', default='feature.csv') groupClass = parser.add_argument_group('Classification arguments') - groupClass.add_argument('--CL_type', metavar='STRING', action='store', help='Classifier to use', default="RandomForest") - groupClass.add_argument('--CL_CV', metavar='INT', action='store', help='Number of k-folds for CV', type=int, default=10) - groupClass.add_argument('--CL_Cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, default=1) - groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Split ratio for train and test', type=float, default=0.9) + groupClass.add_argument('--CL_type', metavar='STRING', action='store', help='Classifier to use', + default="RandomForest") + groupClass.add_argument('--CL_CV', metavar='INT', action='store', help='Number of k-folds for CV', type=int, + default=10) + groupClass.add_argument('--CL_Cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, + default=1) + groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Split ratio for train and test', + type=float, default=0.9) groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', - help='Determine which metrics to use, separate with ":" if multiple, if empty, considering all', default='') - + help='Determine which metrics to use, separate with ":" if multiple, if empty, considering all', + default='') groupClassifier = parser.add_argument_group('Classifier Config') - groupClassifier.add_argument('--CL_config', metavar='STRING', nargs="+", action='store', help='GridSearch: Determine the trees', default=['25:75:125:175']) + groupClassifier.add_argument('--CL_config', metavar='STRING', nargs="+", action='store', + help='GridSearch: Determine the trees', default=['25:75:125:175']) args = parser.parse_args() @@ -184,20 +202,20 @@ if __name__=='__main__': logfilename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-LOG" logfile = directory + logfilename if os.path.isfile(logfile + ".log"): - for i in range(1,20): - testFileName = logfilename + "-" + str(i) + ".log" - if os.path.isfile(directory + testFileName)!=True: + for i in range(1, 20): + testFileName = logfilename + "-" + str(i) + ".log" + if not os.path.isfile(directory + testFileName): logfile = directory + testFileName break else: - logfile = logfile + ".log" + logfile += ".log" - logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG, filemode='w') + logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG, + filemode='w') - if(args.log): + if args.log: logging.getLogger().addHandler(logging.StreamHandler()) - # Read the features logging.debug("Start:\t Read " + args.type + " Files") @@ -206,14 +224,15 @@ if __name__=='__main__': Y = np.genfromtxt(args.pathF + args.fileCL, delimiter=';') elif args.type == ".hdf5": dataset = h5py.File(args.pathF + args.name + ".hdf5", "r") - viewsDict = dict((dataset.get("View"+str(viewIndex)).attrs["name"], viewIndex) for viewIndex in range(dataset.get("Metadata").attrs["nbView"])) - X = dataset["View"+str(viewsDict[args.feat])][...] + viewsDict = dict((dataset.get("View" + str(viewIndex)).attrs["name"], viewIndex) for viewIndex in + range(dataset.get("Metadata").attrs["nbView"])) + X = dataset["View" + str(viewsDict[args.feat])][...] Y = dataset["Labels"][...] logging.debug("Info:\t Shape of Feature:" + str(X.shape) + ", Length of classLabels vector:" + str(Y.shape)) logging.debug("Done:\t Read CSV Files") - arguments = {args.CL_type+"KWARGS": classifierKWARGS, "feat":args.feat,"fileFeat": args.fileFeat, + arguments = {args.CL_type + "KWARGS": classifierKWARGS, "feat": args.feat, "fileFeat": args.fileFeat, "fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": args.CL_type} ExecMonoview(X, Y, args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF, metrics=args.CL_metrics, **arguments) diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecPlot.py b/Code/MonoMutliViewClassifiers/Monoview/ExecPlot.py index 979636c2..97262a72 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecPlot.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecPlot.py @@ -3,27 +3,27 @@ """ Script whichs helps to replot results from Feature Parameter Optimisation """ # Import built-in modules -import argparse # for acommand line arguments -import datetime # for TimeStamp in CSVFile -import os # to geth path of the running script +import argparse # for acommand line arguments +import datetime # for TimeStamp in CSVFile +import os # to geth path of the running script import matplotlib + matplotlib.use('Agg') # Import 3rd party modules -import pandas as pd # for Series -import numpy as np # for DataFrames +import pandas as pd # for Series +import numpy as np # for DataFrames # Import own modules -import ExportResults # Functions to render results +import ExportResults # Functions to render results # Author-Info -__author__ = "Nikolas Huelsmann" -__status__ = "Prototype" # Production, Development, Prototype -__date__ = 2016-03-25 - +__author__ = "Nikolas Huelsmann" +__status__ = "Prototype" # Production, Development, Prototype +__date__ = 2016 - 03 - 25 parser = argparse.ArgumentParser( -description='This method can be used to replot results from Feature Parameter Optimisation', -formatter_class=argparse.ArgumentDefaultsHelpFormatter) + description='This method can be used to replot results from Feature Parameter Optimisation', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = parser.parse_args() args.name = "Caltech" args.valueStart = 2 @@ -32,11 +32,12 @@ args.nCalcs = 5 args.feature = "HSV" args.param = "HSV_V_Bins" args.show = False -df_feat_res = pd.DataFrame.from_csv(path="D:\\BitBucket\\multiview-machine-learning-omis\\Results\\Hydra\\2016_03_23-FPO-Caltech-HSV-HSV_V_Bins.csv", sep=';') - +df_feat_res = pd.DataFrame.from_csv( + path="D:\\BitBucket\\multiview-machine-learning-omis\\Results\\Hydra\\2016_03_23-FPO-Caltech-HSV-HSV_V_Bins.csv", + sep=';') # Get data from result to show results in plot -#logging.debug("Start:\t Plot Result") +# logging.debug("Start:\t Plot Result") # Total time for feature extraction and classification tot_time = df_feat_res.b_feat_extr_time.values + df_feat_res.e_cl_time.values tot_time = np.asarray(tot_time) @@ -51,13 +52,12 @@ cl_time = np.asarray(cl_time) score = df_feat_res.f_cl_score.values score = np.asarray(score) - # Range on X-Axis -if(args.nCalcs>1): - step = float(args.valueEnd-args.valueStart)/float(args.nCalcs-1) - rangeX = np.around(np.array(range(0,args.nCalcs))*step) + args.valueStart +if args.nCalcs > 1: + step = float(args.valueEnd - args.valueStart) / float(args.nCalcs - 1) + rangeX = np.around(np.array(range(0, args.nCalcs)) * step) + args.valueStart else: - rangeX = [args.valueStart] + rangeX = [args.valueStart] rangeX = np.asarray(rangeX) # Description of Classification @@ -67,17 +67,23 @@ cl_desc = df_feat_res.c_cl_desc.values feat_desc = df_feat_res.a_feat_desc.values dir = os.path.dirname(os.path.abspath(__file__)) + "/Results-FeatParaOpt/" -#filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-FPO-" + args.name + "-" + args.feature + "-" + args.param -#ExportResults.exportPandasToCSV(df_feat_res, directory, filename) +# filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-FPO-" + args.name + "-" + args.feature + "-" + args.param +# ExportResults.exportPandasToCSV(df_feat_res, directory, filename) # Store or Show plot -if(args.show): - store = False +if args.show: + store = False else: - store = True + store = True fileName = datetime.datetime.now().strftime("%Y_%m_%d") + "-FPO-" + args.name + "-" + args.feature + "-" + args.param # Show Results for Calculation -ExportResults.showScoreTime(dir, fileName + "-TotalTime", store, score, tot_time, rangeX, args.param, feat_desc, cl_desc, 'Results for Parameter Optimisation - DB:' + args.name + ' Feat:' + args.feature, 'Precision', 'Total Time (Feature Extraction+Classification)\n [s]') -ExportResults.showScoreTime(dir, fileName + "-FeatExtTime", store, score, feat_time, rangeX, args.param, feat_desc, cl_desc, 'Results for Parameter Optimisation - DB:' + args.name + ' Feat:' + args.feature, 'Precision', 'Feature Extraction Time\n [s]') -ExportResults.showScoreTime(dir, fileName + "-ClassTime", store, score, cl_time, rangeX, args.param, feat_desc, cl_desc, 'Results for Parameter Optimisation - DB:' + args.name + ' Feat:' + args.feature, 'Precision', 'Classification Time\n [s]') +ExportResults.showScoreTime(dir, fileName + "-TotalTime", store, score, tot_time, rangeX, args.param, feat_desc, + cl_desc, 'Results for Parameter Optimisation - DB:' + args.name + ' Feat:' + args.feature, + 'Precision', 'Total Time (Feature Extraction+Classification)\n [s]') +ExportResults.showScoreTime(dir, fileName + "-FeatExtTime", store, score, feat_time, rangeX, args.param, feat_desc, + cl_desc, 'Results for Parameter Optimisation - DB:' + args.name + ' Feat:' + args.feature, + 'Precision', 'Feature Extraction Time\n [s]') +ExportResults.showScoreTime(dir, fileName + "-ClassTime", store, score, cl_time, rangeX, args.param, feat_desc, cl_desc, + 'Results for Parameter Optimisation - DB:' + args.name + ' Feat:' + args.feature, + 'Precision', 'Classification Time\n [s]') diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExportResults.py b/Code/MonoMutliViewClassifiers/Monoview/ExportResults.py index 68368d38..a3cb7117 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExportResults.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExportResults.py @@ -12,6 +12,7 @@ import numpy as np # for Numpy Arrays import matplotlib.pyplot as plt # for Plots from scipy.interpolate import interp1d # to Interpolate Data import matplotlib + matplotlib.use('Agg') from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, HPacker # to generate the Annotations in plot from pylab import rcParams # to change size of plot @@ -33,7 +34,7 @@ def exportPandasToCSV(pandasSorDF, directory, filename): if os.path.isfile(file + ".csv"): for i in range(1, 20): testFileName = filename + "-" + str(i) + ".csv" - if os.path.isfile(directory + testFileName) != True: + if not os.path.isfile(directory + testFileName): pandasSorDF.to_csv(directory + testFileName, sep=';') break @@ -48,7 +49,7 @@ def exportNumpyToCSV(numpyArray, directory, filename, format): if os.path.isfile(file + ".csv"): for i in range(1, 20): testFileName = filename + "-" + str(i) + ".csv" - if os.path.isfile(directory + testFileName) != True: + if not os.path.isfile(directory + testFileName): np.savetxt(directory + testFileName, numpyArray, delimiter=";", fmt=format) break @@ -59,7 +60,8 @@ def exportNumpyToCSV(numpyArray, directory, filename, format): #### Rendering of results ### Rendering of Score and Time -def showScoreTime(directory, filename, store, resScore, resTime, rangeX, parameter, feat_desc, cl_desc, fig_desc, y_desc1, +def showScoreTime(directory, filename, store, resScore, resTime, rangeX, parameter, feat_desc, cl_desc, fig_desc, + y_desc1, y_desc2): # Determine interpolated functions f_score_interp = interp1d(rangeX, resScore, kind='quadratic') @@ -123,14 +125,14 @@ def showScoreTime(directory, filename, store, resScore, resTime, rangeX, paramet plt.title(fig_desc, fontsize=18) - if (store): + if store: # Makes sure that the file does not yet exist file = directory + filename if os.path.isfile(file + ".png"): for i in range(1, 20): testFileName = filename + "-" + str(i) + ".png" - if os.path.isfile(directory + testFileName) != True: + if not os.path.isfile(directory + testFileName): plt.savefig(directory + testFileName) break @@ -173,7 +175,7 @@ def showResults(directory, filename, db, feat, score): if os.path.isfile(file + ".png"): for i in range(1, 20): testFileName = filename + "-" + str(i) + ".png" - if os.path.isfile(directory + testFileName) != True: + if not os.path.isfile(directory + testFileName): plt.savefig(directory + testFileName) break @@ -253,7 +255,7 @@ def plot_confusion_matrix(directory, filename, df_confusion, title='Confusion ma if os.path.isfile(file + ".png"): for i in range(1, 20): testFileName = filename + "-" + str(i) + ".png" - if os.path.isfile(directory + testFileName) != True: + if not os.path.isfile(directory + testFileName): plt.savefig(directory + testFileName) break diff --git a/Code/MonoMutliViewClassifiers/Monoview/MonoviewUtils.py b/Code/MonoMutliViewClassifiers/Monoview/MonoviewUtils.py index 626890df..7eed810f 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/MonoviewUtils.py +++ b/Code/MonoMutliViewClassifiers/Monoview/MonoviewUtils.py @@ -5,22 +5,22 @@ # Import built-in modules # Import sci-kit learn party modules -#from sklearn.tests import train_test_split # For calculating the train/test split -from sklearn.pipeline import Pipeline # Pipelining in classification -from sklearn.model_selection import GridSearchCV # GridSearch for parameters of classification -from sklearn.ensemble import RandomForestClassifier # RandomForest-Classifier +# from sklearn.tests import train_test_split # For calculating the train/test split +from sklearn.pipeline import Pipeline # Pipelining in classification +from sklearn.model_selection import GridSearchCV # GridSearch for parameters of classification +from sklearn.ensemble import RandomForestClassifier # RandomForest-Classifier import sklearn import numpy as np # Import own modules # Author-Info -__author__ = "Nikolas Huelsmann, Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype -__date__ = 2016-03-25 +__author__ = "Nikolas Huelsmann, Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype +__date__ = 2016 - 03 - 25 -def isUseful (labelSupports, index, CLASS_LABELS, labelDict): +def isUseful(labelSupports, index, CLASS_LABELS, labelDict): if labelSupports[labelDict[CLASS_LABELS[index]]] != 0: labelSupports[labelDict[CLASS_LABELS[index]]] -= 1 return True, labelSupports @@ -35,7 +35,7 @@ def getLabelSupports(CLASS_LABELS): def splitDataset(LABELS, NB_CLASS, LEARNING_RATE, DATASET_LENGTH, randomState): - validationIndices = extractRandomTrainingSet(LABELS, 1-LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState) + validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState) validationIndices.sort() return validationIndices @@ -47,7 +47,7 @@ def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLA usedIndices = [] while nbTrainingExamples != [0 for i in range(NB_CLASS)]: isUseFull = False - index = int(randomState.randint(0, DATASET_LENGTH-1)) + index = int(randomState.randint(0, DATASET_LENGTH - 1)) if index not in usedIndices: isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict) if isUseFull: @@ -170,7 +170,7 @@ def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLA # y_test: Test Labels # num_estimators: number of trees def MonoviewClassifRandomForest(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): - num_estimators = kwargs["classifier__n_estimators"] + num_estimators = kwargs["classifier__n_estimators"] # PipeLine with RandomForest classifier pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) @@ -185,18 +185,18 @@ def MonoviewClassifRandomForest(X_train, y_train, nbFolds=4, nbCores=1, **kwargs # scoring: scoring... # cv: Nombre de K-Folds pour CV grid_rf = GridSearchCV( - pipeline_rf, - param_grid=param_rf, - refit=True, - n_jobs=nbCores, - scoring='accuracy', - cv=nbFolds, + pipeline_rf, + param_grid=param_rf, + refit=True, + n_jobs=nbCores, + scoring='accuracy', + cv=nbFolds, ) rf_detector = grid_rf.fit(X_train, y_train) desc_estimators = [rf_detector.best_params_["classifier__n_estimators"]] - description = "Classif_" + "RF" + "-" + "CV_" + str(nbFolds) + "-" + "Trees_" + str(map(str,desc_estimators)) + description = "Classif_" + "RF" + "-" + "CV_" + str(nbFolds) + "-" + "Trees_" + str(map(str, desc_estimators)) return description, rf_detector @@ -205,22 +205,24 @@ def MonoviewClassifSVMLinear(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): pipeline_SVMLinear = Pipeline([('classifier', sklearn.svm.SVC())]) param_SVMLinear = kwargs - grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) + grid_SVMLinear = GridSearchCV(pipeline_SVMLinear, param_grid=param_SVMLinear, refit=True, n_jobs=nbCores, + scoring='accuracy', + cv=nbFolds) SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train) desc_params = [SVMLinear_detector.best_params_["classifier__C"]] - description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) + description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str, desc_params)) return description, SVMLinear_detector + def MonoviewClassifSVMRBF(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): pipeline_SVMRBF = Pipeline([('classifier', sklearn.svm.SVC())]) param_SVMRBF = kwargs grid_SVMRBF = GridSearchCV(pipeline_SVMRBF, param_grid=param_SVMRBF, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) + cv=nbFolds) SVMRBF_detector = grid_SVMRBF.fit(X_train, y_train) desc_params = [SVMRBF_detector.best_params_["classifier__C"]] - description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) + description = "Classif_" + "SVC" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str, desc_params)) return description, SVMRBF_detector @@ -229,10 +231,10 @@ def MonoviewClassifDecisionTree(X_train, y_train, nbFolds=4, nbCores=1, **kwargs param_DT = kwargs grid_DT = GridSearchCV(pipeline_DT, param_grid=param_DT, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) + cv=nbFolds) DT_detector = grid_DT.fit(X_train, y_train) desc_params = [DT_detector.best_params_["classifier__max_depth"]] - description = "Classif_" + "DT" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) + description = "Classif_" + "DT" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str, desc_params)) return description, DT_detector @@ -240,13 +242,14 @@ def MonoviewClassifSGD(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): pipeline_SGD = Pipeline([('classifier', sklearn.linear_model.SGDClassifier())]) param_SGD = kwargs grid_SGD = GridSearchCV(pipeline_SGD, param_grid=param_SGD, refit=True, n_jobs=nbCores, scoring='accuracy', - cv=nbFolds) + cv=nbFolds) SGD_detector = grid_SGD.fit(X_train, y_train) desc_params = [SGD_detector.best_params_["classifier__loss"], SGD_detector.best_params_["classifier__penalty"], SGD_detector.best_params_["classifier__alpha"]] - description = "Classif_" + "Lasso" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) + description = "Classif_" + "Lasso" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str, desc_params)) return description, SGD_detector + def MonoviewClassifKNN(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): pipeline_KNN = Pipeline([('classifier', sklearn.neighbors.KNeighborsClassifier())]) param_KNN = kwargs @@ -254,31 +257,31 @@ def MonoviewClassifKNN(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): cv=nbFolds) KNN_detector = grid_KNN.fit(X_train, y_train) desc_params = [KNN_detector.best_params_["classifier__n_neighbors"]] - description = "Classif_" + "Lasso" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str,desc_params)) + description = "Classif_" + "Lasso" + "-" + "CV_" + str(nbFolds) + "-" + "-".join(map(str, desc_params)) return description, KNN_detector -#def calcClassifRandomForest(X_train, X_test, y_test, y_train, num_estimators): -# from sklearn.grid_search import ParameterGrid -# param_rf = { 'classifier__n_estimators': num_estimators} -# forest = RandomForestClassifier() -# -# bestgrid=0; -# for g in ParameterGrid(grid): -# forest.set_params(**g) -# forest.fit(X_train,y_train) -# score = forest.score(X_test, y_test) -# -# if score > best_score: -# best_score = score -# best_grid = g -# -# rf_detector = RandomForestClassifier() -# rf_detector.set_params(**best_grid) -# rf_detector.fit(X_train,y_train) - -# #desc_estimators = best_grid -# description = "Classif_" + "RF" + "-" + "CV_" + "NO" + "-" + "Trees_" + str(best_grid) - -# return (description, rf_detector) \ No newline at end of file + # def calcClassifRandomForest(X_train, X_test, y_test, y_train, num_estimators): + # from sklearn.grid_search import ParameterGrid + # param_rf = { 'classifier__n_estimators': num_estimators} + # forest = RandomForestClassifier() + # + # bestgrid=0; + # for g in ParameterGrid(grid): + # forest.set_params(**g) + # forest.fit(X_train,y_train) + # score = forest.score(X_test, y_test) + # + # if score > best_score: + # best_score = score + # best_grid = g + # + # rf_detector = RandomForestClassifier() + # rf_detector.set_params(**best_grid) + # rf_detector.fit(X_train,y_train) + + # #desc_estimators = best_grid + # description = "Classif_" + "RF" + "-" + "CV_" + "NO" + "-" + "Trees_" + str(best_grid) + + # return (description, rf_detector) diff --git a/Code/MonoMutliViewClassifiers/Monoview/__init__.py b/Code/MonoMutliViewClassifiers/Monoview/__init__.py index 8e96d5a2..f0fc97ad 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/__init__.py +++ b/Code/MonoMutliViewClassifiers/Monoview/__init__.py @@ -1 +1 @@ -from . import ExecClassifMonoView, MonoviewUtils \ No newline at end of file +from . import ExecClassifMonoView, MonoviewUtils diff --git a/Code/MonoMutliViewClassifiers/Monoview/analyzeResult.py b/Code/MonoMutliViewClassifiers/Monoview/analyzeResult.py index c4b2133c..ac8439cd 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/analyzeResult.py +++ b/Code/MonoMutliViewClassifiers/Monoview/analyzeResult.py @@ -6,23 +6,23 @@ import Metrics def getDBConfigString(name, feat, classificationIndices, shape, classLabelsNames, KFolds): - learningRate = float(len(classificationIndices[0]))/len(classificationIndices[0])+len(classificationIndices[1]) + learningRate = float(len(classificationIndices[0])) / len(classificationIndices[0]) + len(classificationIndices[1]) dbConfigString = "Database configuration : \n" - dbConfigString += "\t- Database name : "+name+"\n" - dbConfigString += "\t- View name : "+feat+"\t View shape : "+str(shape)+"\n" - dbConfigString += "\t- Learning Rate : "+str(learningRate) + "\n" - dbConfigString += "\t- Labels used : "+", ".join(classLabelsNames)+"\n" - dbConfigString += "\t- Number of cross validation folds : "+str(KFolds.n_splits) + "\n\n" + dbConfigString += "\t- Database name : " + name + "\n" + dbConfigString += "\t- View name : " + feat + "\t View shape : " + str(shape) + "\n" + dbConfigString += "\t- Learning Rate : " + str(learningRate) + "\n" + dbConfigString += "\t- Labels used : " + ", ".join(classLabelsNames) + "\n" + dbConfigString += "\t- Number of cross validation folds : " + str(KFolds.n_splits) + "\n\n" return dbConfigString def getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS): classifierModule = getattr(MonoviewClassifiers, CL_type) classifierConfigString = "Classifier configuration : \n" - classifierConfigString += "\t- "+classifierModule.getConfig(clKWARGS)[5:]+"\n" - classifierConfigString += "\t- Executed on "+str(nbCores)+" core(s) \n" + classifierConfigString += "\t- " + classifierModule.getConfig(clKWARGS)[5:] + "\n" + classifierConfigString += "\t- Executed on " + str(nbCores) + " core(s) \n" if gridSearch: - classifierConfigString += "\t- Got configuration using randomized search with "+str(nIter)+" iterations \n" + classifierConfigString += "\t- Got configuration using randomized search with " + str(nIter) + " iterations \n" classifierConfigString += "\n\n" return classifierConfigString @@ -35,9 +35,9 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): metricKWARGS = {} metricScoreTrain = metricModule.score(y_train, y_train_pred) metricScoreTest = metricModule.score(y_test, y_test_pred) - metricScoreString = "\tFor "+metricModule.getConfig(**metricKWARGS)+" : " - metricScoreString += "\n\t\t- Score on train : "+str(metricScoreTrain) - metricScoreString += "\n\t\t- Score on test : "+str(metricScoreTest) + metricScoreString = "\tFor " + metricModule.getConfig(**metricKWARGS) + " : " + metricScoreString += "\n\t\t- Score on train : " + str(metricScoreTrain) + metricScoreString += "\n\t\t- Score on test : " + str(metricScoreTest) metricScoreString += "\n" return metricScoreString @@ -48,13 +48,15 @@ def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, fea metricModule = getattr(Metrics, metrics[0][0]) trainScore = metricModule.score(y_train, y_train_pred) testScore = metricModule.score(y_test, y_test_pred) - stringAnalysis = "Classification on "+name+" database for "+feat+" with "+CL_type+", random state is "+str(randomState)+".\n\n" - stringAnalysis += metrics[0][0]+" on train : "+str(trainScore)+"\n"+metrics[0][0]+" on test : "+str(testScore)+"\n\n" + stringAnalysis = "Classification on " + name + " database for " + feat + " with " + CL_type + ", random state is " + str( + randomState) + ".\n\n" + stringAnalysis += metrics[0][0] + " on train : " + str(trainScore) + "\n" + metrics[0][0] + " on test : " + str( + testScore) + "\n\n" stringAnalysis += getDBConfigString(name, feat, learningRate, shape, classLabelsNames, KFolds) stringAnalysis += getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS) for metric in metrics: - stringAnalysis+=getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred) - if metric[1]!=None: + stringAnalysis += getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred) + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -63,4 +65,4 @@ def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, fea stringAnalysis += "\n\n Classification took " + str(hms(seconds=int(time))) imageAnalysis = {} - return stringAnalysis, imageAnalysis, metricsScores \ No newline at end of file + return stringAnalysis, imageAnalysis, metricsScores diff --git a/Code/MonoMutliViewClassifiers/Monoview/run.py b/Code/MonoMutliViewClassifiers/Monoview/run.py index fd455567..ece77c70 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/run.py +++ b/Code/MonoMutliViewClassifiers/Monoview/run.py @@ -1,6 +1,8 @@ # coding=utf-8 import os -os.system('python ExecClassifMonoView.py -log --name MultiOmicDataset --type hdf5 --feat RNASeq --pathF /home/doob/Téléchargements/Data_multi_omics/ --CL_type DecisionTree --CL_CV 5 --CL_Cores 4 --CL_split 0.5') + +os.system( + 'python ExecClassifMonoView.py -log --name MultiOmicDataset --type hdf5 --feat RNASeq --pathF /home/doob/Téléchargements/Data_multi_omics/ --CL_type DecisionTree --CL_CV 5 --CL_Cores 4 --CL_split 0.5') # /donnees/pj_bdd_bbauvin/Data_multi_omics/ # MiRNA_ RNASeq Clinic -# \ No newline at end of file +# diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index 876238ca..f98ce9bd 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -9,18 +9,19 @@ import matplotlib.pyplot as plt from utils.HyperParameterSearch import genHeatMaps # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): return True -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): +def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): num_estimators = int(kwargs['0']) - base_estimators = DecisionTreeClassifier()#kwargs['1'] - classifier = AdaBoostClassifier(n_estimators=num_estimators, base_estimator=base_estimators, random_state=randomState) + base_estimators = DecisionTreeClassifier() # kwargs['1'] + classifier = AdaBoostClassifier(n_estimators=num_estimators, base_estimator=base_estimators, + random_state=randomState) classifier.fit(DATASET, CLASS_LABELS) return classifier @@ -42,13 +43,14 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, metric=["accuracy_score", None], nIter=30, nbCores=1): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, metric=["accuracy_score", None], nIter=30, + nbCores=1): pipeline = Pipeline([('classifier', AdaBoostClassifier())]) - param= {"classifier__n_estimators": randint(1, 150), - "classifier__base_estimator": [DecisionTreeClassifier()]} + param = {"classifier__n_estimators": randint(1, 150), + "classifier__base_estimator": [DecisionTreeClassifier()]} metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -64,38 +66,16 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, me ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))] genHeatMaps(params, scoresArray, outputFileName) - - # baseEstimatorsSet = np.array(set(baseEstimators)) - # nEstimatorsSet = np.sort(np.array(list(set(nEstimators)))) - # - # scoresArray = detector.cv_results_['mean_test_score'] - # scoresMatrix = np.zeros((len(nEstimatorsSet), 1)) - # for baseEstimator, nEstimator, score in zip(baseEstimators, nEstimators, scoresArray): - # baseEstimatorIndex = 0 - # i, = np.where(nEstimatorsSet == nEstimator) - # print i - # nEstimatorIndex, = np.where(nEstimatorsSet == nEstimator) - # scoresMatrix[int(nEstimatorIndex), baseEstimatorIndex] = score - # - # plt.figure(figsize=(8, 6)) - # plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) - # plt.imshow(scoresMatrix, interpolation='nearest', cmap=plt.cm.hot, - # ) - # plt.xlabel('n_estimators') - # plt.ylabel('base_estimator') - # plt.colorbar() - # plt.xticks(np.arange(1), ["DecisionTree"]) - # plt.yticks(np.arange(len(nEstimatorsSet)), nEstimatorsSet, rotation=45) - # plt.title('Validation accuracy') - # plt.savefig(outputFileName+"heat_map.png") return desc_estimators def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- Adaboost with num_esimators : "+str(config.n_estimators)+", base_estimators : "+str(config.base_estimator) + return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str( + config.base_estimator) else: try: - return "\n\t\t- Adaboost with num_esimators : "+str(config[0])+", base_estimators : "+str(config[1]) + return "\n\t\t- Adaboost with num_esimators : " + str(config[0]) + ", base_estimators : " + str(config[1]) except: - return "\n\t\t- Adaboost with num_esimators : "+str(config["0"])+", base_estimators : "+str(config["1"]) \ No newline at end of file + return "\n\t\t- Adaboost with num_esimators : " + str(config["0"]) + ", base_estimators : " + str( + config["1"]) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index 168a0864..c680e81a 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -1,5 +1,5 @@ from sklearn.tree import DecisionTreeClassifier -from sklearn.pipeline import Pipeline # Pipelining in classification +from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint @@ -7,8 +7,8 @@ import numpy as np from utils.HyperParameterSearch import genHeatMaps # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): @@ -28,7 +28,8 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append([randomState.randint(1, 300), randomState.choice(["gini", "entropy"]), randomState.choice(["best", "random"])]) + paramsSet.append([randomState.randint(1, 300), randomState.choice(["gini", "entropy"]), + randomState.choice(["best", "random"])]) return paramsSet @@ -44,18 +45,20 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, + metric=["accuracy_score", None], nIter=30): pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())]) param_DT = {"classifier__max_depth": randint(1, 300), "classifier__criterion": ["gini", "entropy"], "classifier__splitter": ["best", "random"]} metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} scorer = metricModule.get_scorer(**metricKWARGS) - grid_DT = RandomizedSearchCV(pipeline_DT, n_iter=nIter, param_distributions=param_DT, refit=True, n_jobs=nbCores, scoring=scorer, + grid_DT = RandomizedSearchCV(pipeline_DT, n_iter=nIter, param_distributions=param_DT, refit=True, n_jobs=nbCores, + scoring=scorer, cv=KFolds, random_state=randomState) DT_detector = grid_DT.fit(X_train, y_train) desc_params = [DT_detector.best_params_["classifier__max_depth"], DT_detector.best_params_["classifier__criterion"], @@ -72,9 +75,12 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nb def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- Decision Tree with max_depth : "+str(config.max_depth) + ", criterion : "+config.criterion+", splitter : "+config.splitter + return "\n\t\t- Decision Tree with max_depth : " + str( + config.max_depth) + ", criterion : " + config.criterion + ", splitter : " + config.splitter else: try: - return "\n\t\t- Decision Tree with max_depth : "+str(config[0]) + ", criterion : "+config[1]+", splitter : "+config[2] + return "\n\t\t- Decision Tree with max_depth : " + str(config[0]) + ", criterion : " + config[ + 1] + ", splitter : " + config[2] except: - return "\n\t\t- Decision Tree with max_depth : "+str(config["0"]) + ", criterion : "+config["1"]+", splitter : "+config["2"] \ No newline at end of file + return "\n\t\t- Decision Tree with max_depth : " + str(config["0"]) + ", criterion : " + config[ + "1"] + ", splitter : " + config["2"] diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index c6df7e41..98c04d14 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -1,22 +1,21 @@ from sklearn.neighbors import KNeighborsClassifier -from sklearn.pipeline import Pipeline # Pipelining in classification +from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint import numpy as np from utils.HyperParameterSearch import genHeatMaps - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): return True -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): +def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): nNeighbors = int(kwargs['0']) weights = kwargs["1"] algorithm = kwargs["2"] @@ -31,7 +30,7 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append([randomState.randint(1, 50), randomState.choice(["uniform", "distance"]), - randomState.choice(["auto", "ball_tree", "kd_tree", "brute"]), randomState.choice([1,2])]) + randomState.choice(["auto", "ball_tree", "kd_tree", "brute"]), randomState.choice([1, 2])]) return paramsSet @@ -49,20 +48,22 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, + metric=["accuracy_score", None], nIter=30): pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())]) param_KNN = {"classifier__n_neighbors": randint(1, 50), "classifier__weights": ["uniform", "distance"], "classifier__algorithm": ["auto", "ball_tree", "kd_tree", "brute"], - "classifier__p": [1,2], + "classifier__p": [1, 2], } metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} scorer = metricModule.get_scorer(**metricKWARGS) - grid_KNN = RandomizedSearchCV(pipeline_KNN, n_iter=nIter, param_distributions=param_KNN, refit=True, n_jobs=nbCores, scoring=scorer, + grid_KNN = RandomizedSearchCV(pipeline_KNN, n_iter=nIter, param_distributions=param_KNN, refit=True, n_jobs=nbCores, + scoring=scorer, cv=KFolds, random_state=randomState) KNN_detector = grid_KNN.fit(X_train, y_train) desc_params = [KNN_detector.best_params_["classifier__n_neighbors"], @@ -84,9 +85,13 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nb def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- K nearest Neighbors with n_neighbors : "+str(config.n_neighbors)+", weights : "+config.weights+", algorithm : "+config.algorithm+", p : "+str(config.p) + return "\n\t\t- K nearest Neighbors with n_neighbors : " + str( + config.n_neighbors) + ", weights : " + config.weights + ", algorithm : " + config.algorithm + ", p : " + str( + config.p) else: try: - return "\n\t\t- K nearest Neighbors with n_neighbors : "+str(config[0])+", weights : "+config[1]+", algorithm : "+config[2]+", p : "+str(config[3]) + return "\n\t\t- K nearest Neighbors with n_neighbors : " + str(config[0]) + ", weights : " + config[ + 1] + ", algorithm : " + config[2] + ", p : " + str(config[3]) except: - return "\n\t\t- K nearest Neighbors with n_neighbors : "+str(config["0"])+", weights : "+config["1"]+", algorithm : "+config["2"]+", p : "+str(config["3"]) \ No newline at end of file + return "\n\t\t- K nearest Neighbors with n_neighbors : " + str(config["0"]) + ", weights : " + config[ + "1"] + ", algorithm : " + config["2"] + ", p : " + str(config["3"]) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index 4370d04f..16246724 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -7,15 +7,15 @@ import numpy as np from utils.HyperParameterSearch import genHeatMaps # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): return True -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): +def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): num_estimators = int(kwargs['0']) maxDepth = int(kwargs['1']) criterion = kwargs["2"] @@ -45,13 +45,14 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, + metric=["accuracy_score", None], nIter=30): pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) param_rf = {"classifier__n_estimators": randint(1, 300), "classifier__max_depth": randint(1, 300), "classifier__criterion": ["gini", "entropy"]} metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -75,9 +76,12 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nb def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- Random Forest with num_esimators : "+str(config.n_estimators)+", max_depth : "+str(config.max_depth)+ ", criterion : "+config.criterion + return "\n\t\t- Random Forest with num_esimators : " + str(config.n_estimators) + ", max_depth : " + str( + config.max_depth) + ", criterion : " + config.criterion else: try: - return "\n\t\t- Random Forest with num_esimators : "+str(config[0])+", max_depth : "+str(config[1])+ ", criterion : "+config[2] + return "\n\t\t- Random Forest with num_esimators : " + str(config[0]) + ", max_depth : " + str( + config[1]) + ", criterion : " + config[2] except: - return "\n\t\t- Random Forest with num_esimators : "+str(config["0"])+", max_depth : "+str(config["1"])+ ", criterion : "+config["2"] + return "\n\t\t- Random Forest with num_esimators : " + str(config["0"]) + ", max_depth : " + str( + config["1"]) + ", criterion : " + config["2"] diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py index 20d49be8..0216cf64 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py @@ -8,26 +8,25 @@ from pyscm.binary_attributes.base import BaseBinaryAttributeList import os from utils.HyperParameterSearch import genHeatMaps - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): return False -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): +def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): max_attrtibutes = kwargs['0'] try: p = kwargs['1'] except: - p=1.0 + p = 1.0 try: model_type = kwargs['2'] except: - model_type="conjunction" + model_type = "conjunction" try: attributeClassification = kwargs["attributeClassification"] binaryAttributes = kwargs["binaryAttributes"] @@ -64,14 +63,14 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=None, metric=["accuracy_score", None], nIter=30, nbCores=1): - +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=None, metric=["accuracy_score", None], + nIter=30, nbCores=1): metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} - if metricModule.getConfig()[-14]=="h": + if metricModule.getConfig()[-14] == "h": baseScore = -1000.0 isBetter = "higher" else: @@ -104,19 +103,19 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=None, pass dsetFile.close() os.remove(name) - if scores==[]: + if scores == []: score = baseScore else: score = np.mean(np.array(scores)) - if isBetter=="higher" and score > baseScore: + if isBetter == "higher" and score > baseScore: baseScore = score config = [max_attributes, p, model] - if isBetter=="lower" and score < baseScore: + if isBetter == "lower" and score < baseScore: baseScore = score config = [max_attributes, p, model] - assert config!=[], "No good configuration found for SCM" + assert config != [], "No good configuration found for SCM" scoresArray = scores params = [("maxAttributes", np.array(maxAttributesArray)), ("p", np.array(pArray)), @@ -128,12 +127,15 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=None, def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- SCM with max_attributes : "+str(config.max_attributes)+", model type : "+config.model_type+", p : "+str(config.p) + return "\n\t\t- SCM with max_attributes : " + str( + config.max_attributes) + ", model type : " + config.model_type + ", p : " + str(config.p) else: - try : - return "\n\t\t- SCM with max_attributes : "+str(config[0])+", p : "+str(config[1])+", model type : "+str(config[2]) + try: + return "\n\t\t- SCM with max_attributes : " + str(config[0]) + ", p : " + str( + config[1]) + ", model type : " + str(config[2]) except: - return "\n\t\t- SCM with max_attributes : "+str(config["0"])+", p : "+str(config["1"])+", model type : "+str(config["2"]) + return "\n\t\t- SCM with max_attributes : " + str(config["0"]) + ", p : " + str( + config["1"]) + ", model type : " + str(config["2"]) def transformData(dataArray): @@ -148,18 +150,18 @@ def transformData(dataArray): nameb = "temp_scm" if not os.path.isfile(nameb): dsetFile = h5py.File(nameb, "w") - name=nameb + name = nameb else: - fail=True - i=0 - name=nameb + fail = True + i = 0 + name = nameb while fail: if not os.path.isfile(name): dsetFile = h5py.File(name, "w") - fail=False + fail = False else: - i+=1 - name = nameb+str(i) + i += 1 + name = nameb + str(i) packedDataset = dsetFile.create_dataset("temp_scm", data=packedData) dsetFile.close() @@ -170,15 +172,16 @@ def transformData(dataArray): def isBinary(dataset): - if type(dataset[0,0]) is np.uint8: + if type(dataset[0, 0]) is np.uint8: return True for line in dataset: for data in line: - if data!=0 or data!=1: + if data != 0 or data != 1: return False return True -#!/usr/bin/env python + +# !/usr/bin/env python """ Kover: Learn interpretable computational phenotyping models from k-merized genomic data Copyright (C) 2015 Alexandre Drouin @@ -220,7 +223,6 @@ def _minimum_uint_size(max_value): class BaptisteRule(object): - def __init__(self, feature_index, kmer_sequence, type): """ A k-mer rule @@ -244,16 +246,19 @@ class BaptisteRule(object): return (X[:, self.feature_index] == 1).astype(np.uint8) def inverse(self): - return BaptisteRule(feature_index=self.feature_index, kmer_sequence=self.kmer_sequence, type="absence" if self.type == "presence" else "presence") + return BaptisteRule(feature_index=self.feature_index, kmer_sequence=self.kmer_sequence, + type="absence" if self.type == "presence" else "presence") def __str__(self): return ("Absence(" if self.type == "absence" else "Presence(") + self.kmer_sequence + ")" + class LazyBaptisteRuleList(object): """ By convention, the first half of the list contains presence rules and the second half contains the absence rules in the same order. """ + def __init__(self, kmer_sequences, feature_index_by_rule): self.n_rules = feature_index_by_rule.shape[0] * 2 self.kmer_sequences = kmer_sequences @@ -274,6 +279,7 @@ class LazyBaptisteRuleList(object): def __len__(self): return self.n_rules + class BaseRuleClassifications(object): def __init__(self): pass @@ -296,6 +302,7 @@ class BaptisteRuleClassifications(BaseRuleClassifications): """ Methods involving columns account for presence and absence rules """ + # TODO: Clean up. Get rid of the code to handle deleted rows. We don't need this. def __init__(self, dataset, n_rows, block_size=None): self.dataset = dataset @@ -332,7 +339,7 @@ class BaptisteRuleClassifications(BaseRuleClassifications): """ Columns can be an integer (or any object that implements __index__) or a sorted list/ndarray. """ - #TODO: Support slicing, make this more efficient than getting the columns individually. + # TODO: Support slicing, make this more efficient than getting the columns individually. columns_is_int = False if hasattr(columns, "__index__"): # All int types implement the __index__ method (PEP 357) columns = [columns.__index__()] @@ -344,8 +351,8 @@ class BaptisteRuleClassifications(BaseRuleClassifications): else: columns = list(columns) # Detect where an inversion is needed (columns corresponding to absence rules) - columns, invert_result = zip(* (((column if column < self.dataset.shape[1] else column % self.dataset.shape[1]), - (True if column > self.dataset.shape[1] else False)) for column in columns)) + columns, invert_result = zip(*(((column if column < self.dataset.shape[1] else column % self.dataset.shape[1]), + (True if column > self.dataset.shape[1] else False)) for column in columns)) columns = list(columns) invert_result = np.array(invert_result) @@ -431,9 +438,10 @@ class BaptisteRuleClassifications(BaseRuleClassifications): self.inplace_popcount(block, block_row_mask) # Increment the sum - result[col_block * self.block_size[1]:min((col_block + 1) * self.block_size[1], self.dataset.shape[1])] += np.sum(block, axis=0) + result[col_block * self.block_size[1]:min((col_block + 1) * self.block_size[1], + self.dataset.shape[1])] += np.sum(block, axis=0) # Compute the sum for absence rules - result[self.dataset.shape[1] : ] = len(rows) - result[: self.dataset.shape[1]] + result[self.dataset.shape[1]:] = len(rows) - result[: self.dataset.shape[1]] - return result \ No newline at end of file + return result diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index 4ce68949..5ae8538a 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -1,23 +1,21 @@ from sklearn.linear_model import SGDClassifier -from sklearn.pipeline import Pipeline # Pipelining in classification +from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import uniform import numpy as np from utils.HyperParameterSearch import genHeatMaps - - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): return True -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): +def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): loss = kwargs['0'] penalty = kwargs['1'] try: @@ -49,7 +47,8 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, + metric=["accuracy_score", None], nIter=30): pipeline_SGD = Pipeline([('classifier', SGDClassifier())]) losses = ['log', 'modified_huber'] penalties = ["l1", "l2", "elasticnet"] @@ -57,7 +56,7 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nb param_SGD = {"classifier__loss": losses, "classifier__penalty": penalties, "classifier__alpha": alphas} metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -80,9 +79,12 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nb def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- SGDClassifier with loss : "+config.loss+", penalty : "+config.penalty+", alpha : "+str(config.alpha) + return "\n\t\t- SGDClassifier with loss : " + config.loss + ", penalty : " + config.penalty + ", alpha : " + str( + config.alpha) else: try: - return "\n\t\t- SGDClassifier with loss : "+config[0]+", penalty : "+config[1]+", alpha : "+str(config[2]) + return "\n\t\t- SGDClassifier with loss : " + config[0] + ", penalty : " + config[1] + ", alpha : " + str( + config[2]) except: - return "\n\t\t- SGDClassifier with loss : "+config["0"]+", penalty : "+config["1"]+", alpha : "+str(config["2"]) \ No newline at end of file + return "\n\t\t- SGDClassifier with loss : " + config["0"] + ", penalty : " + config[ + "1"] + ", alpha : " + str(config["2"]) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index 0f48fce5..c2c20ed1 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -1,22 +1,21 @@ from sklearn.svm import SVC -from sklearn.pipeline import Pipeline # Pipelining in classification +from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint import numpy as np from utils.HyperParameterSearch import genHeatMaps - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): return True -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): +def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): C = int(kwargs['0']) classifier = SVC(C=C, kernel='linear', probability=True, max_iter=1000, random_state=randomState) classifier.fit(DATASET, CLASS_LABELS) @@ -26,7 +25,7 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append([randomState.randint(1, 10000),]) + paramsSet.append([randomState.randint(1, 10000), ]) return paramsSet @@ -38,11 +37,12 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, + metric=["accuracy_score", None], nIter=30): pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear", max_iter=1000))]) param_SVMLinear = {"classifier__C": randint(1, 10000)} metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -65,9 +65,9 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nb def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- SVM Linear with C : "+str(config.C) + return "\n\t\t- SVM Linear with C : " + str(config.C) else: try: - return "\n\t\t- SVM Linear with C : "+str(config[0]) + return "\n\t\t- SVM Linear with C : " + str(config[0]) except: - return "\n\t\t- SVM Linear with C : "+str(config["0"]) + return "\n\t\t- SVM Linear with C : " + str(config["0"]) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index 316b7af6..e092bde2 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -1,23 +1,21 @@ from sklearn.svm import SVC -from sklearn.pipeline import Pipeline # Pipelining in classification +from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint import numpy as np from utils.HyperParameterSearch import genHeatMaps - - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): return True -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): +def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): C = int(kwargs['0']) degree = int(kwargs['1']) classifier = SVC(C=C, kernel='poly', degree=degree, probability=True, max_iter=1000, random_state=randomState) @@ -42,12 +40,13 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, + metric=["accuracy_score", None], nIter=30): pipeline_SVMPoly = Pipeline([('classifier', SVC(kernel="poly", max_iter=1000))]) param_SVMPoly = {"classifier__C": randint(1, 10000), "classifier__degree": randint(1, 30)} metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -68,9 +67,9 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nb def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- SVM Poly with C : "+str(config.C)+", degree : "+str(config.degree) + return "\n\t\t- SVM Poly with C : " + str(config.C) + ", degree : " + str(config.degree) else: try: - return "\n\t\t- SVM Poly with C : "+str(config[0])+", degree : "+str(config[1]) + return "\n\t\t- SVM Poly with C : " + str(config[0]) + ", degree : " + str(config[1]) except: - return "\n\t\t- SVM Poly with C : "+str(config["0"])+", degree : "+str(config["1"]) \ No newline at end of file + return "\n\t\t- SVM Poly with C : " + str(config["0"]) + ", degree : " + str(config["1"]) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index 4b4ec762..4846c9c4 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -1,22 +1,21 @@ from sklearn.svm import SVC -from sklearn.pipeline import Pipeline # Pipelining in classification +from sklearn.pipeline import Pipeline # Pipelining in classification from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint import numpy as np from utils.HyperParameterSearch import genHeatMaps - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def canProbas(): return True -def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): +def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): C = int(kwargs['0']) classifier = SVC(C=C, kernel='rbf', probability=True, max_iter=1000, random_state=randomState) classifier.fit(DATASET, CLASS_LABELS) @@ -26,7 +25,7 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append([randomState.randint(1, 10000),]) + paramsSet.append([randomState.randint(1, 10000), ]) return paramsSet @@ -38,11 +37,12 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, + metric=["accuracy_score", None], nIter=30): pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="rbf", max_iter=1000))]) param_SVMRBF = {"classifier__C": randint(1, 10000)} metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -63,9 +63,9 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nb def getConfig(config): if type(config) not in [list, dict]: - return "\n\t\t- SVM RBF with C : "+str(config.C) + return "\n\t\t- SVM RBF with C : " + str(config.C) else: try: - return "\n\t\t- SVM RBF with C : "+str(config[0]) + return "\n\t\t- SVM RBF with C : " + str(config[0]) except: - return "\n\t\t- SVM RBF with C : "+str(config["0"]) \ No newline at end of file + return "\n\t\t- SVM RBF with C : " + str(config["0"]) diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 557d33dd..bacbdee6 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -3,7 +3,7 @@ import os.path import errno sys.path.append( - os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) # from Multiview import * import Multiview @@ -18,35 +18,38 @@ from utils.HyperParameterSearch import searchBestSettings # Author-Info __author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__status__ = "Prototype" # Production, Development, Prototype -def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, randomState, +def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, + randomState, hyperParamSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): - DATASET = h5py.File(path+name+str(coreIndex)+".hdf5", "r") - return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, randomState, + DATASET = h5py.File(path + name + str(coreIndex) + ".hdf5", "r") + return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, + randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **arguments) -def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, LABELS_DICTIONARY, randomState, +def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, + LABELS_DICTIONARY, randomState, hyperParamSearch=False, metrics=None, nIter=30, **kwargs): - views = kwargs["views"] viewsIndices = kwargs["viewsIndices"] if not metrics: metrics = [["f1_score", None]] CL_type = kwargs["CL_type"] - classificationKWARGS = kwargs[CL_type+"KWARGS"] - learningRate = len(classificationIndices[0])/float((len(classificationIndices[0])+len(classificationIndices[1]))) + classificationKWARGS = kwargs[CL_type + "KWARGS"] + learningRate = len(classificationIndices[0]) / float( + (len(classificationIndices[0]) + len(classificationIndices[1]))) t_start = time.time() logging.info("### Main Programm for Multiview Classification") logging.info("### Classification - Database : " + str(name) + " ; Views : " + ", ".join(views) + - " ; Algorithm : " + CL_type + " ; Cores : " + str(nbCores)+", Train ratio : " + str(learningRate)+ + " ; Algorithm : " + CL_type + " ; Cores : " + str(nbCores) + ", Train ratio : " + str(learningRate) + ", CV on " + str(KFolds.n_splits) + " folds") for viewIndex, viewName in zip(viewsIndices, views): logging.info("Info:\t Shape of " + str(viewName) + " :" + str( - getShape(DATASET, viewIndex))) + getShape(DATASET, viewIndex))) logging.info("Done:\t Read Database Files") extractionTime = time.time() - t_start @@ -57,7 +60,9 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor analysisModule = getattr(classifierPackage, "analyzeResults") if hyperParamSearch != "None": - classifier = searchBestSettings(DATASET, CL_type, metrics, learningIndices, KFolds, randomState, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, nIter=nIter, **classificationKWARGS) + classifier = searchBestSettings(DATASET, CL_type, metrics, learningIndices, KFolds, randomState, + viewsIndices=viewsIndices, searchingTool=hyperParamSearch, nIter=nIter, + **classificationKWARGS) else: classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) @@ -102,13 +107,13 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor if imagesAnalysis is not None: for imageName in imagesAnalysis: if os.path.isfile(outputFileName + imageName + ".png"): - for i in range(1,20): + for i in range(1, 20): testFileName = outputFileName + imageName + "-" + str(i) + ".png" - if os.path.isfile(testFileName )!=True: + if not os.path.isfile(testFileName): imagesAnalysis[imageName].savefig(testFileName) break imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png') logging.info("Done:\t Result Analysis") - return CL_type, classificationKWARGS, metricsScores, fullLabels \ No newline at end of file + return CL_type, classificationKWARGS, metricsScores, fullLabels diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index 8e3012a2..b523e2bf 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -8,10 +8,9 @@ import MonoviewClassifiers from utils.Dataset import getV - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def getBenchmark(benchmark, args=None): @@ -32,16 +31,17 @@ def getBenchmark(benchmark, args=None): if (not isPackage)] fusionMonoviewClassifiers = allMonoviewAlgos allFusionAlgos = {"Methods": fusionMethods, "Classifiers": fusionMonoviewClassifiers} - benchmark["Multiview"]["Fusion"]=allFusionAlgos + benchmark["Multiview"]["Fusion"] = allFusionAlgos else: benchmark["Multiview"]["Fusion"] = {} if args.FU_types != [""]: benchmark["Multiview"]["Fusion"]["Methods"] = dict( (fusionType, []) for fusionType in args.FU_types) else: - benchmark["Multiview"]["Fusion"]["Methods"] = dict((fusionModulesName, "_") for fusionModulesName in fusionModulesNames) + benchmark["Multiview"]["Fusion"]["Methods"] = dict( + (fusionModulesName, "_") for fusionModulesName in fusionModulesNames) if "LateFusion" in benchmark["Multiview"]["Fusion"]["Methods"]: - if args.FU_late_methods== [""]: + if args.FU_late_methods == [""]: benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"] = [name for _, name, isPackage in pkgutil.iter_modules([ "Multiview/Fusion/Methods/LateFusionPackage"]) @@ -72,32 +72,33 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, directory, result args.FU_L_select_monoview = "randomClf" argumentsList = [] for fusionType in benchmark["Multiview"]["Fusion"]["Methods"]: - fusionTypePackage = getattr(Methods, fusionType+"Package") + fusionTypePackage = getattr(Methods, fusionType + "Package") for fusionMethod in benchmark["Multiview"]["Fusion"]["Methods"][fusionType]: fusionMethodModule = getattr(fusionTypePackage, fusionMethod) - arguments = fusionMethodModule.getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices) - argumentsList+= arguments + arguments = fusionMethodModule.getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, + classificationIndices) + argumentsList += arguments return argumentsList def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if not usedIndices: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) NB_VIEW = len(viewsIndices) if weights is None: - weights = np.array([1/NB_VIEW for i in range(NB_VIEW)]) - if sum(weights)!=1: - weights = weights/sum(weights) - monoviewData = np.concatenate([weights[index]*getV(DATASET, viewIndex, usedIndices) - for index, viewIndex in enumerate(viewsIndices)], axis=1) + weights = np.array([1 / NB_VIEW for i in range(NB_VIEW)]) + if sum(weights) != 1: + weights = weights / sum(weights) + monoviewData = np.concatenate([weights[index] * getV(DATASET, viewIndex, usedIndices) + for index, viewIndex in enumerate(viewsIndices)], axis=1) return monoviewData def genParamsSets(classificationKWARGS, randomState, nIter=1): fusionTypeName = classificationKWARGS["fusionType"] - fusionTypePackage = getattr(Methods, fusionTypeName+"Package") + fusionTypePackage = getattr(Methods, fusionTypeName + "Package") fusionMethodModuleName = classificationKWARGS["fusionMethod"] fusionMethodModule = getattr(fusionTypePackage, fusionMethodModuleName) fusionMethodConfig = fusionMethodModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter) @@ -105,16 +106,16 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices, metric=None, nIter=30): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) fusionTypeName = classificationKWARGS["fusionType"] - fusionTypePackage = globals()[fusionTypeName+"Package"] + fusionTypePackage = globals()[fusionTypeName + "Package"] fusionMethodModuleName = classificationKWARGS["fusionMethod"] fusionMethodModule = getattr(fusionTypePackage, fusionMethodModuleName) classifiersNames = classificationKWARGS["classifiersNames"] bestSettings = [] for classifierIndex, classifierName in enumerate(classifiersNames): - logging.debug("\tStart:\t Random search for "+classifierName+ " with "+str(nIter)+" iterations") + logging.debug("\tStart:\t Random search for " + classifierName + " with " + str(nIter) + " iterations") classifierModule = getattr(MonoviewClassifiers, classifierName) classifierMethod = getattr(classifierModule, "hyperParamSearch") if fusionTypeName == "LateFusion": @@ -122,34 +123,38 @@ def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices DATASET.get("Labels")[learningIndices], metric=metric, nIter=nIter)) else: - bestSettings.append(classifierMethod(makeMonoviewData_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices), - DATASET.get("Labels")[learningIndices], metric=metric, - nIter=nIter)) - logging.debug("\tDone:\t Random search for "+classifierName) + bestSettings.append( + classifierMethod(makeMonoviewData_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices), + DATASET.get("Labels")[learningIndices], metric=metric, + nIter=nIter)) + logging.debug("\tDone:\t Random search for " + classifierName) classificationKWARGS["classifiersConfigs"] = bestSettings - logging.debug("\tStart:\t Random search for "+fusionMethodModuleName) - fusionMethodConfig = fusionMethodModule.gridSearch(DATASET, classificationKWARGS, learningIndices, nIter=nIter, viewsIndices=viewsIndices) - logging.debug("\tDone:\t Random search for "+fusionMethodModuleName) + logging.debug("\tStart:\t Random search for " + fusionMethodModuleName) + fusionMethodConfig = fusionMethodModule.gridSearch(DATASET, classificationKWARGS, learningIndices, nIter=nIter, + viewsIndices=viewsIndices) + logging.debug("\tDone:\t Random search for " + fusionMethodModuleName) return bestSettings, fusionMethodConfig def getCLString(classificationKWARGS): if classificationKWARGS["fusionType"] == "LateFusion": - return "Fusion-"+classificationKWARGS["fusionType"]+"-"+classificationKWARGS["fusionMethod"]+"-"+\ + return "Fusion-" + classificationKWARGS["fusionType"] + "-" + classificationKWARGS["fusionMethod"] + "-" + \ "-".join(classificationKWARGS["classifiersNames"]) elif classificationKWARGS["fusionType"] == "EarlyFusion": - return "Fusion-"+classificationKWARGS["fusionType"]+"-"+classificationKWARGS["fusionMethod"]+"-"+ \ + return "Fusion-" + classificationKWARGS["fusionType"] + "-" + classificationKWARGS["fusionMethod"] + "-" + \ classificationKWARGS["classifiersNames"] + class Fusion: def __init__(self, randomState, NB_CORES=1, **kwargs): fusionType = kwargs['fusionType'] fusionMethod = kwargs['fusionMethod'] - fusionTypePackage = getattr(Methods, fusionType+"Package") + fusionTypePackage = getattr(Methods, fusionType + "Package") fusionMethodModule = getattr(fusionTypePackage, fusionMethod) fusionMethodClass = getattr(fusionMethodModule, fusionMethod) nbCores = NB_CORES - classifierKWARGS = dict((key, value) for key, value in kwargs.iteritems() if key not in ['fusionType', 'fusionMethod']) + classifierKWARGS = dict( + (key, value) for key, value in kwargs.iteritems() if key not in ['fusionType', 'fusionMethod']) self.classifier = fusionMethodClass(randomState, NB_CORES=nbCores, **classifierKWARGS) def setParams(self, paramsSet): @@ -161,7 +166,7 @@ class Fusion: def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) predictedLabels = self.classifier.predict_hdf5(DATASET, usedIndices=usedIndices, viewsIndices=viewsIndices) return predictedLabels @@ -174,5 +179,3 @@ class Fusion: else: predictedLabels = [] return predictedLabels - - diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py index 23bf6487..ebfa665d 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py @@ -8,14 +8,14 @@ from utils.Dataset import getV class EarlyFusionClassifier(object): def __init__(self, randomState, monoviewClassifierName, monoviewClassifierConfig, NB_CORES=1): self.monoviewClassifierName = monoviewClassifierName - if type(monoviewClassifierConfig)==dict: + if type(monoviewClassifierConfig) == dict: pass elif monoviewClassifierConfig is None: pass else: monoviewClassifierConfig = dict((str(configIndex), config[0]) for configIndex, config in - enumerate(monoviewClassifierConfig - )) + enumerate(monoviewClassifierConfig + )) self.monoviewClassifiersConfig = monoviewClassifierConfig self.monoviewClassifier = None self.nbCores = NB_CORES @@ -23,15 +23,14 @@ class EarlyFusionClassifier(object): self.randomState = randomState def makeMonoviewData_hdf5(self, DATASET, weights=None, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(weights)== type(None): - weights = np.array([1/nbView for i in range(nbView)]) - if sum(weights)!=1: - weights = weights/sum(weights) + if type(weights) == type(None): + weights = np.array([1 / nbView for i in range(nbView)]) + if sum(weights) != 1: + weights = weights / sum(weights) self.monoviewData = np.concatenate([getV(DATASET, viewIndex, usedIndices) - for index, viewIndex in enumerate(viewsIndices)], axis=1) - + for index, viewIndex in enumerate(viewsIndices)], axis=1) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py index cd9dbbee..5206ea41 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -13,7 +13,7 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): paramsSets = [] for iterIndex in range(nIter): randomWeightsArray = randomState.random_sample(nbView) - normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) + normalizedArray = randomWeightsArray / np.sum(randomWeightsArray) paramsSets.append([normalizedArray, paramsMonoview[iterIndex]]) return paramsSets @@ -40,7 +40,8 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl "classifiersNames": classifierName, "classifiersConfigs": monoviewClassifierModule.getKWARGS([arg.split(":") for arg in - classifierConfig.split(",")]), + classifierConfig.split( + ",")]), 'fusionMethodConfig': args.FU_E_method_configs, "nbView": (len(viewsIndices))}} else: @@ -66,31 +67,33 @@ class WeightedLinear(EarlyFusionClassifier): NB_CORES=NB_CORES) if kwargs['fusionMethodConfig'] is None: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) - elif kwargs['fusionMethodConfig']==['']: + elif kwargs['fusionMethodConfig'] == ['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'])) def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - self.weights = self.weights/float(max(self.weights)) + self.weights /= float(max(self.weights)) self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=trainIndices, viewsIndices=viewsIndices) monoviewClassifierModule = getattr(MonoviewClassifiers, self.monoviewClassifierName) - self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, DATASET.get("Labels").value[trainIndices], self.randomState, - NB_CORES=self.nbCores, - **self.monoviewClassifiersConfig) + self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, + DATASET.get("Labels").value[trainIndices], + self.randomState, + NB_CORES=self.nbCores, + **self.monoviewClassifiersConfig) def setParams(self, paramsSet): self.weights = paramsSet[0] self.monoviewClassifiersConfig = dict((str(index), param) for index, param in enumerate(paramsSet[1])) def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - self.weights = self.weights/float(np.sum(self.weights)) + self.weights /= float(np.sum(self.weights)) if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices, viewsIndices=viewsIndices) @@ -99,14 +102,14 @@ class WeightedLinear(EarlyFusionClassifier): return predictedLabels def predict_proba_hdf5(self, DATASET, usedIndices=None): - if usedIndices == None: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) predictedLabels = self.monoviewClassifier.predict_proba(self.monoviewData) return predictedLabels - def getConfig(self, fusionMethodConfig ,monoviewClassifiersNames, monoviewClassifiersConfigs): - configString = "with weighted concatenation, using weights : "+", ".join(map(str, self.weights))+ \ + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames, monoviewClassifiersConfigs): + configString = "with weighted concatenation, using weights : " + ", ".join(map(str, self.weights)) + \ " with monoview classifier : " monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifiersNames) configString += monoviewClassifierModule.getConfig(self.monoviewClassifiersConfig) @@ -114,4 +117,4 @@ class WeightedLinear(EarlyFusionClassifier): def gridSearch(self, classificationKWARGS): - return \ No newline at end of file + return diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/__init__.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/__init__.py index 9bbd76fb..406f941a 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/__init__.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/__init__.py @@ -1,7 +1,8 @@ import os + for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): if module == '__init__.py' or module[-3:] != '.py': continue __import__(module[:-3], locals(), globals()) del module -del os \ No newline at end of file +del os diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index 09b549cb..c2044a81 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -27,13 +27,14 @@ def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needPr monoviewClassifier = getattr(MonoviewClassifiers, classifierName) if needProbas and not monoviewClassifier.canProbas(): monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") - DTConfig = {"0":300, "1":"entropy", "2":"random"} - classifier = monoviewClassifier.fit(data,labels, randomState,DTConfig) + DTConfig = {"0": 300, "1": "entropy", "2": "random"} + classifier = monoviewClassifier.fit(data, labels, randomState, DTConfig) return classifier else: - classifier = monoviewClassifier.fit(data,labels, randomState,**dict((str(configIndex), config) for configIndex, config in - enumerate(classifierConfig - ))) + classifier = monoviewClassifier.fit(data, labels, randomState, + **dict((str(configIndex), config) for configIndex, config in + enumerate(classifierConfig + ))) return classifier @@ -46,34 +47,17 @@ def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, clas # wrongSets = [0 for _ in allClassifersNames] classifiersNames = [[] for _ in viewsIndices] nbViews = len(viewsIndices) - trainLabels = np.genfromtxt(directory+"train_labels.csv", delimiter=",").astype(np.int16) + trainLabels = np.genfromtxt(directory + "train_labels.csv", delimiter=",").astype(np.int16) length = len(trainLabels) for resultMonoview in resultsMonoview: if resultMonoview[1][0] in classifiersNames[resultMonoview[0]]: classifierIndex = classifiersNames.index(resultMonoview[1][0]) - wrongSets[resultMonoview[0]][classifierIndex] = np.where(trainLabels+resultMonoview[1][3][classificationIndices[0]] == 1) + wrongSets[resultMonoview[0]][classifierIndex] = np.where( + trainLabels + resultMonoview[1][3][classificationIndices[0]] == 1) else: classifiersNames[resultMonoview[0]].append(resultMonoview[1][0]) - wrongSets[resultMonoview[0]].append(np.where(trainLabels+resultMonoview[1][3][classificationIndices[0]] == 1)) - # for classifierIndex, classifierName in enumerate(allClassifersNames): - # try: - # classifierDirectory = directory+classifierName+"/" - # viewDirectoryNames = os.listdir(classifierDirectory) - # wrongSets[classifierIndex]=[0 for _ in viewDirectoryNames] - # for viewIndex, viewDirectoryName in enumerate(viewDirectoryNames): - # for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectoryName+"/"): - # if resultFileName.endswith("train_labels.csv"): - # yTrainFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName - # elif resultFileName.endswith("train_pred.csv"): - # yTrainPredFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName - # train = np.genfromtxt(yTrainFileName, delimiter=",").astype(np.int16) - # pred = np.genfromtxt(yTrainPredFileName, delimiter=",").astype(np.int16) - # length = len(train) - # wrongLabelsIndices = np.where(train+pred == 1) - # wrongSets[classifierIndex][viewIndex]=wrongLabelsIndices - # except OSError: - # for viewIndex in range(nbViews): - # wrongSets[classifierIndex][viewIndex]= np.arange(length) + wrongSets[resultMonoview[0]].append( + np.where(trainLabels + resultMonoview[1][3][classificationIndices[0]] == 1)) combinations = itertools.combinations_with_replacement(range(len(classifiersNames[0])), nbViews) bestLen = length @@ -88,22 +72,6 @@ def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, clas return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] -# def getFormFile(directory, viewDirectory, resultFileName): -# file = open(directory+"/"+viewDirectory+"/"+resultFileName) -# for line in file: -# if "Score on train" in line: -# score = float(line.strip().split(":")[1]) -# break -# elif "train" in line: -# metricName = line.strip().split(" ")[0] -# metricModule = getattr(Metrics, metricName) -# if metricModule.getConfig()[-14]=="h": -# betterHigh = True -# else: -# betterHigh = False -# return score, betterHigh - - def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): nbViews = len(viewsIndices) nbClassifiers = len(allClassifersNames) @@ -111,7 +79,7 @@ def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, clas classifiersNames = [[] for _ in viewsIndices] metricName = resultsMonoview[0][1][2].keys()[0] metricModule = getattr(Metrics, metricName) - if metricModule.getConfig()[-14]=="h": + if metricModule.getConfig()[-14] == "h": betterHigh = True else: betterHigh = False @@ -119,14 +87,8 @@ def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, clas if resultMonoview[1][0] not in classifiersNames[resultMonoview[0]]: classifiersNames[resultMonoview[0]].append(resultMonoview[1][0]) classifierIndex = classifiersNames[resultMonoview[0]].index(resultMonoview[1][0]) - scores[resultMonoview[0],classifierIndex] = resultMonoview[1][2].values()[0][0] - # - # for classifierIndex, classifierName in enumerate(allClassifersNames): - # classifierDirectory = directory+"/"+classifierName+"/" - # for viewIndex, viewDirectory in enumerate(os.listdir(classifierDirectory)): - # for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectory+"/"): - # if resultFileName.endswith(".txt"): - # scores[viewIndex, classifierIndex], betterHigh = getFormFile(directory, viewDirectory, resultFileName) + scores[resultMonoview[0], classifierIndex] = resultMonoview[1][2].values()[0][0] + if betterHigh: classifierIndices = np.argmax(scores, axis=1) else: @@ -134,10 +96,12 @@ def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, clas return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(classifierIndices)] -def getClassifiers(selectionMethodName, allClassifiersNames, directory, viewsIndices, resultsMonoview, classificationIndices): +def getClassifiers(selectionMethodName, allClassifiersNames, directory, viewsIndices, resultsMonoview, + classificationIndices): thismodule = sys.modules[__name__] selectionMethod = getattr(thismodule, selectionMethodName) - classifiersNames = selectionMethod(allClassifiersNames, directory, viewsIndices, resultsMonoview, classificationIndices) + classifiersNames = selectionMethod(allClassifiersNames, directory, viewsIndices, resultsMonoview, + classificationIndices) return classifiersNames @@ -145,15 +109,16 @@ def getConfig(classifiersNames, resultsMonoview): classifiersConfigs = [0 for _ in range(len(classifiersNames))] for viewIndex, classifierName in enumerate(classifiersNames): for resultMonoview in resultsMonoview: - if resultMonoview[0]==viewIndex and resultMonoview[1][0]==classifierName: - classifiersConfigs[viewIndex]=resultMonoview[1][4] + if resultMonoview[0] == viewIndex and resultMonoview[1][0] == classifierName: + classifiersConfigs[viewIndex] = resultMonoview[1][4] return classifiersConfigs class LateFusionClassifier(object): - def __init__(self, randomState, monoviewClassifiersNames, monoviewClassifiersConfigs, monoviewSelection, NB_CORES=1): + def __init__(self, randomState, monoviewClassifiersNames, monoviewClassifiersConfigs, monoviewSelection, + NB_CORES=1): self.monoviewClassifiersNames = monoviewClassifiersNames - if type(monoviewClassifiersConfigs[0])==dict: + if type(monoviewClassifiersConfigs[0]) == dict: self.monoviewClassifiersConfigs = monoviewClassifiersConfigs self.monoviewClassifiers = [] else: @@ -165,14 +130,14 @@ class LateFusionClassifier(object): self.randomState = randomState def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)( - delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index], - getV(DATASET, viewIndex, trainIndices), - DATASET.get("Labels").value[trainIndices], - self.monoviewClassifiersConfigs[index], self.needProbas, self.randomState) - for index, viewIndex in enumerate(viewsIndices)) \ No newline at end of file + delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index], + getV(DATASET, viewIndex, trainIndices), + DATASET.get("Labels").value[trainIndices], + self.monoviewClassifiersConfigs[index], self.needProbas, self.randomState) + for index, viewIndex in enumerate(viewsIndices)) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py index 32d623ed..7940938d 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -12,26 +12,28 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): paramsSets = [] for _ in range(nIter): randomWeightsArray = randomState.random_sample(nbView) - normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) + normalizedArray = randomWeightsArray / np.sum(randomWeightsArray) paramsSets.append([normalizedArray]) return paramsSets def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): - if args.FU_L_cl_names!=['']: + if args.FU_L_cl_names != ['']: args.FU_L_select_monoview = "user_defined" else: monoviewClassifierModulesNames = benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, + viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] - if args.FU_L_cl_names==[""] and args.CL_type == ["Multiview"]: + if args.FU_L_cl_names == [""] and args.CL_type == ["Multiview"]: raise AttributeError("You must perform Monoview classification or specify " "which monoview classifier to use Late Fusion") if args.FU_L_cl_config != ['']: - classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) - for monoviewClassifierModule,classifierConfig - in zip(monoviewClassifierModules,args.FU_L_cl_config)] + classifiersConfigs = [ + monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule, classifierConfig + in zip(monoviewClassifierModules, args.FU_L_cl_config)] else: classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", @@ -52,10 +54,11 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class BayesianInference(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig']==['']: + if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig'] == ['']: self.weights = np.array([1.0 for classifier in kwargs['classifiersNames']]) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) @@ -70,22 +73,24 @@ class BayesianInference(LateFusionClassifier): nbView = len(viewsIndices) if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if sum(self.weights)!=1.0: + if sum(self.weights) != 1.0: print self.weights - self.weights = self.weights/sum(self.weights) + self.weights = self.weights / sum(self.weights) viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) for index, viewIndex in enumerate(viewsIndices): - viewScores[index] = np.power(self.monoviewClassifiers[index].predict_proba(getV(DATASET, viewIndex, usedIndices)), - self.weights[index]) + viewScores[index] = np.power( + self.monoviewClassifiers[index].predict_proba(getV(DATASET, viewIndex, usedIndices)), + self.weights[index]) predictedLabels = np.argmax(np.prod(viewScores, axis=0), axis=1) return predictedLabels - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with Bayesian Inference using a weight for each view : "+", ".join(map(str, self.weights)) + \ + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames, monoviewClassifiersConfigs): + configString = "with Bayesian Inference using a weight for each view : " + ", ".join(map(str, self.weights)) + \ "\n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, + monoviewClassifiersNames): monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) - configString+="\n\t -Method used to select monoview classifiers : "+self.monoviewSelection - return configString \ No newline at end of file + configString += "\n\t -Method used to select monoview classifiers : " + self.monoviewSelection + return configString diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py index 31d44c79..4f70dcde 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -11,23 +11,25 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): paramsSets = [] for _ in range(nIter): randomWeightsArray = randomState.random_sample(nbView) - normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) + normalizedArray = randomWeightsArray / np.sum(randomWeightsArray) paramsSets.append([normalizedArray]) return paramsSets def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): - if args.FU_L_cl_names!=['']: + if args.FU_L_cl_names != ['']: pass else: monoviewClassifierModulesNames = benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, + viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: - classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) - for monoviewClassifierModule,classifierConfig - in zip(monoviewClassifierModules,args.FU_L_cl_config)] + classifiersConfigs = [ + monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule, classifierConfig + in zip(monoviewClassifierModules, args.FU_L_cl_config)] else: classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", @@ -48,9 +50,10 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class MajorityVoting(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig']==['']: + if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig'] == ['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) @@ -59,7 +62,7 @@ class MajorityVoting(LateFusionClassifier): self.weights = np.array(paramsSet[0]) def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) self.weights /= float(sum(self.weights)) @@ -68,7 +71,7 @@ class MajorityVoting(LateFusionClassifier): datasetLength = len(usedIndices) votes = np.zeros((datasetLength, DATASET.get("Metadata").attrs["nbClass"]), dtype=float) - monoViewDecisions = np.zeros((len(usedIndices),nbView), dtype=int) + monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( getV(DATASET, viewIndex, usedIndices)) @@ -90,9 +93,11 @@ class MajorityVoting(LateFusionClassifier): # nbMaximum = len(np.where(votes==max(votes))[0]) return predictedLabels - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with Majority Voting \n\t-With weights : "+str(self.weights)+"\n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames, monoviewClassifiersConfigs): + configString = "with Majority Voting \n\t-With weights : " + str( + self.weights) + "\n\t-With monoview classifiers : " + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, + monoviewClassifiersNames): monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) - return configString \ No newline at end of file + return configString diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index 5ba55d97..510e26a8 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -22,23 +22,25 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): max_attributes = randomState.randint(1, 20) p = randomState.random_sample() model = randomState.choice(["conjunction", "disjunction"]) - order = randomState.randint(1,nbView) + order = randomState.randint(1, nbView) paramsSets.append([p, max_attributes, model, order]) return paramsSets def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): - if args.FU_L_cl_names!=['']: + if args.FU_L_cl_names != ['']: pass else: - monoviewClassifierModulesNames =benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) + monoviewClassifierModulesNames = benchmark["Monoview"] + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, + viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: - classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) - for monoviewClassifierModule,classifierConfig - in zip(monoviewClassifierModules,args.FU_L_cl_config)] + classifiersConfigs = [ + monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule, classifierConfig + in zip(monoviewClassifierModules, args.FU_L_cl_config)] else: classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", @@ -59,10 +61,11 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class SCMForLinear(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + kwargs["monoviewSelection"], NB_CORES=NB_CORES) self.SCMClassifier = None - if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig']==['']: + if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig'] == ['']: self.p = 1 self.maxAttributes = 5 self.order = 1 @@ -100,7 +103,7 @@ class SCMForLinear(LateFusionClassifier): if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) - accus=[] + accus = [] for index, viewIndex in enumerate(viewsIndices): monoviewDecision = self.monoviewClassifiers[index].predict( getV(DATASET, viewIndex, usedIndices)) @@ -111,44 +114,47 @@ class SCMForLinear(LateFusionClassifier): return predictedLabels def SCMForLinearFusionFit(self, DATASET, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) - self.SCMClassifier = pyscm.scm.SetCoveringMachine(p=self.p, max_attributes=self.maxAttributes, model_type=self.modelType, verbose=False) + self.SCMClassifier = pyscm.scm.SetCoveringMachine(p=self.p, max_attributes=self.maxAttributes, + model_type=self.modelType, verbose=False) monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( getV(DATASET, viewIndex, usedIndices)) features = self.generateInteractions(monoViewDecisions) - featureSequence=[str(index) for index in range(nbView)] - for orderIndex in range(self.order-1): - featureSequence += [str(featureIndex) for featureIndex in itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex+2)] + featureSequence = [str(index) for index in range(nbView)] + for orderIndex in range(self.order - 1): + featureSequence += [str(featureIndex) for featureIndex in + itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex + 2)] featureIndexByRule = np.arange(features.shape[1], dtype=np.uint32) binaryAttributes = LazyBaptisteRuleList(featureSequence, featureIndexByRule) packedData = _pack_binary_bytes_to_ints(features, 64) nameb = "temp_scm_fusion" if not os.path.isfile(nameb): dsetFile = h5py.File(nameb, "w") - name=nameb + name = nameb else: - fail=True - i=0 - name=nameb + fail = True + i = 0 + name = nameb while fail: if not os.path.isfile(name): dsetFile = h5py.File(name, "w") - fail=False + fail = False else: - i+=1 - name = nameb+str(i) + i += 1 + name = nameb + str(i) packedDataset = dsetFile.create_dataset("temp_scm", data=packedData) dsetFile.close() dsetFile = h5py.File(name, "r") packedDataset = dsetFile.get("temp_scm") attributeClassification = BaptisteRuleClassifications(packedDataset, features.shape[0]) - self.SCMClassifier.fit(binaryAttributes, DATASET.get("Labels").value[usedIndices], attribute_classifications=attributeClassification) + self.SCMClassifier.fit(binaryAttributes, DATASET.get("Labels").value[usedIndices], + attribute_classifications=attributeClassification) try: dsetFile.close() os.remove(name) @@ -156,37 +162,38 @@ class SCMForLinear(LateFusionClassifier): pass def generateInteractions(self, monoViewDecisions): - if type(self.order)==type(None): + if type(self.order) == type(None): order = monoViewDecisions.shape[1] - if self.order==1: + if self.order == 1: return monoViewDecisions else: - genratedIntercations = [monoViewDecisions[:,i] for i in range(monoViewDecisions.shape[1])] - for orderIndex in range(self.order-1): - combins = itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex+2) + genratedIntercations = [monoViewDecisions[:, i] for i in range(monoViewDecisions.shape[1])] + for orderIndex in range(self.order - 1): + combins = itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex + 2) for combin in combins: - generatedDecision = monoViewDecisions[:,combin[0]] - for index in range(len(combin)-1): - if self.modelType=="disjunction": - generatedDecision = np.logical_and(generatedDecision, monoViewDecisions[:,combin[index+1]]) + generatedDecision = monoViewDecisions[:, combin[0]] + for index in range(len(combin) - 1): + if self.modelType == "disjunction": + generatedDecision = np.logical_and(generatedDecision, + monoViewDecisions[:, combin[index + 1]]) else: - generatedDecision = np.logical_or(generatedDecision, monoViewDecisions[:,combin[index+1]]) + generatedDecision = np.logical_or(generatedDecision, + monoViewDecisions[:, combin[index + 1]]) genratedIntercations.append(generatedDecision) return np.transpose(np.array(genratedIntercations).astype(np.uint8)) - - - - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with SCM for linear with max_attributes : "+str(self.maxAttributes)+", p : "+str(self.p)+\ - " model_type : "+str(self.modelType)+" has chosen "+\ - str(len(self.SCMClassifier.attribute_importances))+" rule(s) \n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames, monoviewClassifiersConfigs): + configString = "with SCM for linear with max_attributes : " + str(self.maxAttributes) + ", p : " + str(self.p) + \ + " model_type : " + str(self.modelType) + " has chosen " + \ + str(len(self.SCMClassifier.attribute_importances)) + " rule(s) \n\t-With monoview classifiers : " + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, + monoviewClassifiersNames): monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) return configString + def _minimum_uint_size(max_value): """ Find the minimum size unsigned integer type that can store values of at most max_value @@ -205,7 +212,6 @@ def _minimum_uint_size(max_value): class BaptisteRule(object): - def __init__(self, feature_index, kmer_sequence, type): """ A k-mer rule @@ -229,7 +235,8 @@ class BaptisteRule(object): return (X[:, self.feature_index] == 1).astype(np.uint8) def inverse(self): - return BaptisteRule(feature_index=self.feature_index, kmer_sequence=self.kmer_sequence, type="absence" if self.type == "presence" else "presence") + return BaptisteRule(feature_index=self.feature_index, kmer_sequence=self.kmer_sequence, + type="absence" if self.type == "presence" else "presence") def __str__(self): return ("Absence(" if self.type == "absence" else "Presence(") + self.kmer_sequence + ")" @@ -240,6 +247,7 @@ class LazyBaptisteRuleList(object): By convention, the first half of the list contains presence rules and the second half contains the absence rules in the same order. """ + def __init__(self, kmer_sequences, feature_index_by_rule): self.n_rules = feature_index_by_rule.shape[0] * 2 self.kmer_sequences = kmer_sequences @@ -260,6 +268,7 @@ class LazyBaptisteRuleList(object): def __len__(self): return self.n_rules + class BaseRuleClassifications(object): def __init__(self): pass @@ -277,10 +286,12 @@ class BaseRuleClassifications(object): def sum_rows(self, rows): raise NotImplementedError() + class BaptisteRuleClassifications(BaseRuleClassifications): """ Methods involving columns account for presence and absence rules """ + # TODO: Clean up. Get rid of the code to handle deleted rows. We don't need this. def __init__(self, dataset, n_rows, block_size=None): self.dataset = dataset @@ -317,7 +328,7 @@ class BaptisteRuleClassifications(BaseRuleClassifications): """ Columns can be an integer (or any object that implements __index__) or a sorted list/ndarray. """ - #TODO: Support slicing, make this more efficient than getting the columns individually. + # TODO: Support slicing, make this more efficient than getting the columns individually. columns_is_int = False if hasattr(columns, "__index__"): # All int types implement the __index__ method (PEP 357) columns = [columns.__index__()] @@ -329,8 +340,8 @@ class BaptisteRuleClassifications(BaseRuleClassifications): else: columns = list(columns) # Detect where an inversion is needed (columns corresponding to absence rules) - columns, invert_result = zip(* (((column if column < self.dataset.shape[1] else column % self.dataset.shape[1]), - (True if column > self.dataset.shape[1] else False)) for column in columns)) + columns, invert_result = zip(*(((column if column < self.dataset.shape[1] else column % self.dataset.shape[1]), + (True if column > self.dataset.shape[1] else False)) for column in columns)) columns = list(columns) invert_result = np.array(invert_result) @@ -416,9 +427,10 @@ class BaptisteRuleClassifications(BaseRuleClassifications): self.inplace_popcount(block, block_row_mask) # Increment the sum - result[col_block * self.block_size[1]:min((col_block + 1) * self.block_size[1], self.dataset.shape[1])] += np.sum(block, axis=0) + result[col_block * self.block_size[1]:min((col_block + 1) * self.block_size[1], + self.dataset.shape[1])] += np.sum(block, axis=0) # Compute the sum for absence rules - result[self.dataset.shape[1] : ] = len(rows) - result[: self.dataset.shape[1]] + result[self.dataset.shape[1]:] = len(rows) - result[: self.dataset.shape[1]] - return result \ No newline at end of file + return result diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py index d488d7c1..5a0b23f9 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -15,17 +15,19 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): - if args.FU_L_cl_names!=['']: + if args.FU_L_cl_names != ['']: pass else: monoviewClassifierModulesNames = benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, + viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: - classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) - for monoviewClassifierModule,classifierConfig - in zip(monoviewClassifierModules,args.FU_L_cl_config)] + classifiersConfigs = [ + monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule, classifierConfig + in zip(monoviewClassifierModules, args.FU_L_cl_config)] else: classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", @@ -46,7 +48,8 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class SVMForLinear(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + kwargs["monoviewSelection"], NB_CORES=NB_CORES) self.SVMClassifier = None @@ -55,7 +58,7 @@ class SVMForLinear(LateFusionClassifier): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(self.monoviewClassifiersConfigs[0])==dict: + if type(self.monoviewClassifiersConfigs[0]) == dict: for index, viewIndex in enumerate(viewsIndices): monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) self.monoviewClassifiers.append( @@ -85,7 +88,7 @@ class SVMForLinear(LateFusionClassifier): return predictedLabels def SVMForLinearFusionFit(self, DATASET, usedIndices=None, viewsIndices=None): - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) self.SVMClassifier = OneVsOneClassifier(SVC()) @@ -96,9 +99,10 @@ class SVMForLinear(LateFusionClassifier): self.SVMClassifier.fit(monoViewDecisions, DATASET.get("Labels").value[usedIndices]) - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames, monoviewClassifiersConfigs): configString = "with SVM for linear \n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, + monoviewClassifiersNames): monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) - return configString \ No newline at end of file + return configString diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index 20c16340..7e03342f 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -11,23 +11,25 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): paramsSets = [] for _ in range(nIter): randomWeightsArray = randomState.random_sample(nbView) - normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) + normalizedArray = randomWeightsArray / np.sum(randomWeightsArray) paramsSets.append([normalizedArray]) return paramsSets def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): - if args.FU_L_cl_names!=['']: + if args.FU_L_cl_names != ['']: pass else: monoviewClassifierModulesNames = benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, + viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: - classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) - for monoviewClassifierModule,classifierConfig - in zip(monoviewClassifierModules,args.FU_L_cl_config)] + classifiersConfigs = [ + monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule, classifierConfig + in zip(monoviewClassifierModules, args.FU_L_cl_config)] else: classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", @@ -48,9 +50,10 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class WeightedLinear(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig']==['']: + if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig'] == ['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) @@ -63,21 +66,22 @@ class WeightedLinear(LateFusionClassifier): if viewsIndices is None: viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) - self.weights = self.weights/float(sum(self.weights)) + self.weights /= float(sum(self.weights)) if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) for index, viewIndex in enumerate(viewsIndices): viewScores[index] = np.array(self.monoviewClassifiers[index].predict_proba( - getV(DATASET, viewIndex, usedIndices)))*self.weights[index] + getV(DATASET, viewIndex, usedIndices))) * self.weights[index] predictedLabels = np.argmax(np.sum(viewScores, axis=0), axis=1) return predictedLabels - def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): - configString = "with Weighted linear using a weight for each view : "+", ".join(map(str,self.weights)) + \ + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames, monoviewClassifiersConfigs): + configString = "with Weighted linear using a weight for each view : " + ", ".join(map(str, self.weights)) + \ "\n\t-With monoview classifiers : " - for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): + for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, + monoviewClassifiersNames): monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) return configString diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/__init__.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/__init__.py index 9bbd76fb..406f941a 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/__init__.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/__init__.py @@ -1,7 +1,8 @@ import os + for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): if module == '__init__.py' or module[-3:] != '.py': continue __import__(module[:-3], locals(), globals()) del module -del os \ No newline at end of file +del os diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/__init__.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/__init__.py index 3ce1d337..c7ee5473 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/__init__.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/__init__.py @@ -1,2 +1,3 @@ from . import EarlyFusion, LateFusion, LateFusionPackage, EarlyFusionPackage -__all__ = ["EarlyFusionPackage", "LateFusionPackage"] \ No newline at end of file + +__all__ = ["EarlyFusionPackage", "LateFusionPackage"] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/__init__.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/__init__.py index 9b0e79fa..dd2810a4 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/__init__.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/__init__.py @@ -1,2 +1,3 @@ from . import Fusion, analyzeResults, Methods -__all__ = ["Fusion", "Methods"] \ No newline at end of file + +__all__ = ["Fusion", "Methods"] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py index dc601b36..9142a30f 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py @@ -1,6 +1,7 @@ from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report import numpy as np import matplotlib + matplotlib.use('Agg') import matplotlib.pyplot as plt import operator @@ -9,11 +10,9 @@ from Methods import * import Methods.LateFusion import Metrics - - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def error(testLabels, computedLabels): @@ -21,18 +20,17 @@ def error(testLabels, computedLabels): return float(error) * 100 / len(computedLabels) - def printMetricScore(metricScores, metrics): metricScoreString = "\n\n" for metric in metrics: metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} - metricScoreString += "\tFor "+metricModule.getConfig(**metricKWARGS)+" : " - metricScoreString += "\n\t\t- Score on train : "+str(metricScores[metric[0]][0]) - metricScoreString += "\n\t\t- Score on test : "+str(metricScores[metric[0]][1]) + metricScoreString += "\tFor " + metricModule.getConfig(**metricKWARGS) + " : " + metricScoreString += "\n\t\t- Score on train : " + str(metricScores[metric[0]][0]) + metricScoreString += "\n\t\t- Score on test : " + str(metricScores[metric[0]][1]) metricScoreString += "\n\n" return metricScoreString @@ -40,7 +38,7 @@ def printMetricScore(metricScores, metrics): def getTotalMetricScores(metric, trainLabels, testLabels, DATASET, validationIndices, learningIndices): labels = DATASET.get("Labels").value metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -66,7 +64,6 @@ def execute(classifier, trainLabels, name, KFolds, hyperParamSearch, nIter, metrics, viewsIndices, randomState): - CLASS_LABELS = DATASET.get("Labels").value fusionType = classificationKWARGS["fusionType"] @@ -76,24 +73,28 @@ def execute(classifier, trainLabels, learningIndices, validationIndices = classificationIndices metricModule = getattr(Metrics, metrics[0][0]) - if metrics[0][1]!=None: + if metrics[0][1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metrics[0][1])) else: metricKWARGS = {} scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], CLASS_LABELS[learningIndices], **metricKWARGS) scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], testLabels, **metricKWARGS) - fusionConfiguration = classifier.classifier.getConfig(fusionMethodConfig,monoviewClassifiersNames, monoviewClassifiersConfigs) - stringAnalysis = "\t\tResult for Multiview classification with "+ fusionType + " and random state : "+str(randomState)+ \ - "\n\n"+metrics[0][0]+" :\n\t-On Train : " + str(scoreOnTrain) + "\n\t-On Test : " + str(scoreOnTest) + \ + fusionConfiguration = classifier.classifier.getConfig(fusionMethodConfig, monoviewClassifiersNames, + monoviewClassifiersConfigs) + stringAnalysis = "\t\tResult for Multiview classification with " + fusionType + " and random state : " + str( + randomState) + \ + "\n\n" + metrics[0][0] + " :\n\t-On Train : " + str(scoreOnTrain) + "\n\t-On Test : " + str( + scoreOnTest) + \ "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ - ', '.join(LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join(views) + "\n\t-" + str(KFolds.n_splits) + \ - " folds\n\nClassification configuration : \n\t-Algorithm used : "+fusionType+" "+fusionConfiguration + ', '.join(LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join(views) + "\n\t-" + str( + KFolds.n_splits) + \ + " folds\n\nClassification configuration : \n\t-Algorithm used : " + fusionType + " " + fusionConfiguration - if fusionType=="LateFusion": - stringAnalysis+=Methods.LateFusion.getScores(classifier) + if fusionType == "LateFusion": + stringAnalysis += Methods.LateFusion.getScores(classifier) metricsScores = getMetricsScores(metrics, trainLabels, testLabels, DATASET, validationIndices, learningIndices) - stringAnalysis+=printMetricScore(metricsScores, metrics) + stringAnalysis += printMetricScore(metricsScores, metrics) imagesAnalysis = {} return stringAnalysis, imagesAnalysis, metricsScores diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py index e90bd99a..f2806f74 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py @@ -8,16 +8,18 @@ import logging import Metrics + def DecisionTree(data, labels, arg, weights, randomState): depth = int(arg[0]) subSampling = float(arg[1]) if subSampling != 1.0: - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState, weights=weights) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState, + weights=weights) else: subSampledData, subSampledLabels, subSampledWeights = data, labels, weights isBad = False classifier = tree.DecisionTreeClassifier(max_depth=depth) - #classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth)) + # classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth)) classifier.fit(subSampledData, subSampledLabels, subSampledWeights) prediction = classifier.predict(data) accuracy = accuracy_score(labels, prediction) @@ -34,7 +36,7 @@ def getConfig(classifierConfig): def hyperParamSearch(data, labels, randomState, metric="accuracy_score"): - minSubSampling = 1.0/(len(labels)/2) + minSubSampling = 1.0 / (len(labels) / 2) bestSettings = [] bestResults = [] classifier = tree.DecisionTreeClassifier(max_depth=1) @@ -46,13 +48,14 @@ def hyperParamSearch(data, labels, randomState, metric="accuracy_score"): preliminary_accuracies[i] = accuracy_score(labels, prediction) preliminary_accuracy = np.mean(preliminary_accuracies) if preliminary_accuracy < 0.50: - for max_depth in np.arange(10)+1: - for subSampling in sorted((np.arange(20, dtype=float)+1)/20, reverse=True): + for max_depth in np.arange(10) + 1: + for subSampling in sorted((np.arange(20, dtype=float) + 1) / 20, reverse=True): if subSampling > minSubSampling: accuracies = np.zeros(50) for i in range(50): if subSampling != 1.0: - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, + randomState) else: subSampledData, subSampledLabels, = data, labels classifier = tree.DecisionTreeClassifier(max_depth=max_depth) @@ -73,11 +76,12 @@ def hyperParamSearch(data, labels, randomState, metric="accuracy_score"): preliminary_accuracies[i] = accuracy_score(labels, prediction) preliminary_accuracy = np.mean(preliminary_accuracies) if preliminary_accuracy < 0.50: - for subSampling in sorted((np.arange(19, dtype=float)+1)/200, reverse=True): + for subSampling in sorted((np.arange(19, dtype=float) + 1) / 200, reverse=True): if minSubSampling < subSampling: accuracies = np.zeros(50) for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState ) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, + randomState) classifier = tree.DecisionTreeClassifier(max_depth=1) classifier.fit(subSampledData, subSampledLabels) prediction = classifier.predict(data) @@ -87,10 +91,11 @@ def hyperParamSearch(data, labels, randomState, metric="accuracy_score"): bestSettings.append([1, subSampling]) bestResults.append(accuracy) else: - for subSampling in sorted((np.arange(19, dtype=float)+1)/2000, reverse=True): + for subSampling in sorted((np.arange(19, dtype=float) + 1) / 2000, reverse=True): accuracies = np.zeros(50) for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, + randomState) if minSubSampling < subSampling: classifier1 = tree.DecisionTreeClassifier(max_depth=1) classifier1.fit(subSampledData, subSampledLabels) @@ -101,7 +106,7 @@ def hyperParamSearch(data, labels, randomState, metric="accuracy_score"): bestSettings.append([1, subSampling]) bestResults.append(accuracy) - assert bestResults!=[], "No good settings found for Decision Tree!" + assert bestResults != [], "No good settings found for Decision Tree!" return getBestSetting(bestSettings, bestResults) @@ -110,10 +115,10 @@ def getBestSetting(bestSettings, bestResults): diffTo52 = 100.0 bestSettingsIndex = 0 for resultIndex, result in enumerate(bestResults): - if abs(0.55-result) < diffTo52: - diffTo52 = abs(0.55-result) + if abs(0.55 - result) < diffTo52: + diffTo52 = abs(0.55 - result) bestResult = result bestSettingsIndex = resultIndex - logging.debug("\t\tInfo:\t Best Result : "+str(result)) + logging.debug("\t\tInfo:\t Best Result : " + str(result)) - return map(lambda p: round(p, 4), bestSettings[bestSettingsIndex]) \ No newline at end of file + return map(lambda p: round(p, 4), bestSettings[bestSettingsIndex]) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py index eede20a3..596368ac 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py @@ -121,4 +121,4 @@ # logging.debug("\t\tInfo:\t Best Reslut : "+str(result)) # # return map(lambda p: round(p, 4), bestSettings[bestSettingsIndex]) -# # return map(round(,4), bestSettings[bestSettingsIndex]) \ No newline at end of file +# # return map(round(,4), bestSettings[bestSettingsIndex]) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/ModifiedMulticlass.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/ModifiedMulticlass.py index 040342fd..ae3c70b2 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/ModifiedMulticlass.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/ModifiedMulticlass.py @@ -166,7 +166,6 @@ def predict_proba_ovr(estimators, X, is_multilabel): class _ConstantPredictor(BaseEstimator): - def fit(self, X, y): self.y_ = y return self @@ -256,9 +255,9 @@ class OneVsRestClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): # n_jobs > 1 in can results in slower performance due to the overhead # of spawning threads. See joblib issue #112. self.estimators_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_binary)( - self.estimator, X, column, classes=[ - "not %s" % self.label_binarizer_.classes_[i], - self.label_binarizer_.classes_[i]], sample_weight=sample_weight) + self.estimator, X, column, classes=[ + "not %s" % self.label_binarizer_.classes_[i], + self.label_binarizer_.classes_[i]], sample_weight=sample_weight) for i, column in enumerate(columns)) return self @@ -349,7 +348,7 @@ class OneVsRestClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): check_is_fitted(self, 'estimators_') if not hasattr(self.estimators_[0], "decision_function"): raise AttributeError( - "Base estimator doesn't have a decision_function attribute.") + "Base estimator doesn't have a decision_function attribute.") return np.array([est.decision_function(X).ravel() for est in self.estimators_]).T @@ -367,7 +366,7 @@ class OneVsRestClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): check_is_fitted(self, 'estimators_') if not hasattr(self.estimators_[0], "coef_"): raise AttributeError( - "Base estimator doesn't have a coef_ attribute.") + "Base estimator doesn't have a coef_ attribute.") coefs = [e.coef_ for e in self.estimators_] if sp.issparse(coefs[0]): return sp.vstack(coefs) @@ -378,7 +377,7 @@ class OneVsRestClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): check_is_fitted(self, 'estimators_') if not hasattr(self.estimators_[0], "intercept_"): raise AttributeError( - "Base estimator doesn't have an intercept_ attribute.") + "Base estimator doesn't have an intercept_ attribute.") return np.array([e.intercept_.ravel() for e in self.estimators_]) @@ -469,9 +468,9 @@ class OneVsOneClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): self.classes_ = np.unique(y) n_classes = self.classes_.shape[0] self.estimators_ = Parallel(n_jobs=self.n_jobs)( - delayed(_fit_ovo_binary)( - self.estimator, X, y, self.classes_[i], self.classes_[j]) - for i in range(n_classes) for j in range(i + 1, n_classes)) + delayed(_fit_ovo_binary)( + self.estimator, X, y, self.classes_[i], self.classes_[j]) + for i in range(n_classes) for j in range(i + 1, n_classes)) return self @@ -695,8 +694,8 @@ class OutputCodeClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): for i in range(X.shape[0])], dtype=np.int) self.estimators_ = Parallel(n_jobs=self.n_jobs)( - delayed(_fit_binary)(self.estimator, X, Y[:, i]) - for i in range(Y.shape[1])) + delayed(_fit_binary)(self.estimator, X, Y[:, i]) + for i in range(Y.shape[1])) return self @@ -715,4 +714,3 @@ class OutputCodeClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin): Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T pred = euclidean_distances(Y, self.code_book_).argmin(axis=1) return self.classes_[pred] - diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/SubSampling.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/SubSampling.py index b3ffc382..d8f2bd5c 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/SubSampling.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/SubSampling.py @@ -1,5 +1,6 @@ import numpy as np + def getLabelSupports(CLASS_LABELS): labels = set(CLASS_LABELS) supports = [CLASS_LABELS.tolist().count(label) for label in labels] @@ -16,7 +17,7 @@ def isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict): def subSample(data, labels, subSampling, randomState, weights=None): if weights is None: - weights = np.ones(len(labels))/len(labels) + weights = np.ones(len(labels)) / len(labels) nbExamples = len(labels) labelSupports, labelDict = getLabelSupports(labels) @@ -37,4 +38,4 @@ def subSample(data, labels, subSampling, randomState, weights=None): subSampledData.append(data[index]) subSampledLabels.append(labels[index]) subSampledWeights.append(weights[index]) - return np.array(subSampledData), np.array(subSampledLabels), np.array(subSampledWeights) \ No newline at end of file + return np.array(subSampledData), np.array(subSampledLabels), np.array(subSampledWeights) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index e5ae0318..d36f646f 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -9,10 +9,9 @@ import logging from sklearn.metrics import accuracy_score from utils.Dataset import getV - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype # Data shape : ((Views, Examples, Corrdinates)) @@ -63,17 +62,20 @@ def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, weights = computeWeights(DATASET_LENGTH, iterIndex, viewIndice, CLASS_LABELS, costMatrices) classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string classifierMethod = getattr(classifierModule, classifierName) - classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, weights) + classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, + weights) logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) return classifier, classes, isBad, averageAccuracy def trainWeakClassifier_hdf5(classifierName, monoviewDataset, CLASS_LABELS, DATASET_LENGTH, - viewIndice, classifier_config, viewName, iterIndex, costMatrices, classifierIndex, randomState): + viewIndice, classifier_config, viewName, iterIndex, costMatrices, classifierIndex, + randomState): weights = computeWeights(DATASET_LENGTH, iterIndex, classifierIndex, CLASS_LABELS, costMatrices) classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string classifierMethod = getattr(classifierModule, classifierName) - classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, weights, randomState) + classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, + weights, randomState) logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) return classifier, classes, isBad, averageAccuracy @@ -82,20 +84,22 @@ def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, classifiersNames = classificationKWARGS["classifiersNames"] bestSettings = [] for classifierIndex, classifierName in enumerate(classifiersNames): - logging.debug("\tStart:\t Random search for "+classifierName+" on "+DATASET.get("View"+str(viewIndices[classifierIndex])).attrs["name"]) + logging.debug("\tStart:\t Random search for " + classifierName + " on " + + DATASET.get("View" + str(viewIndices[classifierIndex])).attrs["name"]) classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string classifierGridSearch = getattr(classifierModule, "hyperParamSearch") bestSettings.append(classifierGridSearch(getV(DATASET, viewIndices[classifierIndex], learningIndices), - DATASET.get("Labels").value[learningIndices], randomState, metric=metric)) - logging.debug("\tDone:\t Gridsearch for "+classifierName) + DATASET.get("Labels").value[learningIndices], randomState, + metric=metric)) + logging.debug("\tDone:\t Gridsearch for " + classifierName) return bestSettings, None def getCLString(classificationKWARGS): - return "Mumbo-"+"-".join(classificationKWARGS["classifiersNames"]) + return "Mumbo-" + "-".join(classificationKWARGS["classifiersNames"]) -class Mumbo: +class Mumbo: def __init__(self, randomState, NB_CORES=1, **kwargs): self.maxIter = kwargs["maxIter"] self.minIter = kwargs["minIter"] @@ -109,7 +113,7 @@ class Mumbo: self.alphas = np.zeros((self.maxIter, nbView)) self.generalAlphas = np.zeros(self.maxIter) self.bestClassifiers = [] - self.bestViews = np.zeros(self.maxIter, dtype=int)-1 + self.bestViews = np.zeros(self.maxIter, dtype=int) - 1 self.averageAccuracies = np.zeros((self.maxIter, nbView)) self.iterAccuracies = np.zeros(self.maxIter) self.randomState = randomState @@ -119,7 +123,7 @@ class Mumbo: self.alphas = np.zeros((self.maxIter, nbView)) self.generalAlphas = np.zeros(self.maxIter) self.bestClassifiers = [] - self.bestViews = np.zeros(self.maxIter, dtype=int)-1 + self.bestViews = np.zeros(self.maxIter, dtype=int) - 1 self.averageAccuracies = np.zeros((self.maxIter, nbView)) self.costMatrices = np.array([ np.array([ @@ -127,7 +131,8 @@ class Mumbo: np.array([1 if labels[exampleIndice] != classe else -(nbClass - 1) for classe in range(nbClass) - ]) for exampleIndice in range(datasetLength) + ]) for exampleIndice in + range(datasetLength) ]) for viewIndice in range(nbView)]) if iteration == 0 else np.zeros((nbView, datasetLength, nbClass)) @@ -151,7 +156,7 @@ class Mumbo: # Initialization if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(viewsIndices)==type(None): + if type(viewsIndices) == type(None): viewsIndices = range(DATASET.get("Metadata").attrs["nbView"]) NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] NB_VIEW = len(viewsIndices) @@ -159,12 +164,13 @@ class Mumbo: LABELS = DATASET.get("Labels").value[trainIndices] self.initDataDependant(DATASET_LENGTH, NB_VIEW, NB_CLASS, LABELS) # Learning - isStabilized=False + isStabilized = False self.iterIndex = 0 - while not isStabilized and not self.iterIndex >= self.maxIter-1: + while not isStabilized and not self.iterIndex >= self.maxIter - 1: if self.iterIndex > self.minIter: - coeffs = np.polyfit(np.log(np.arange(self.iterIndex)+0.00001), self.iterAccuracies[:self.iterIndex], 1) - if coeffs[0]/self.iterIndex < self.threshold: + coeffs = np.polyfit(np.log(np.arange(self.iterIndex) + 0.00001), self.iterAccuracies[:self.iterIndex], + 1) + if coeffs[0] / self.iterIndex < self.threshold: isStabilized = True logging.debug('\t\tStart:\t Iteration ' + str(self.iterIndex + 1)) @@ -180,7 +186,8 @@ class Mumbo: if areBad[viewFakeIndex]: self.alphas[self.iterIndex, viewFakeIndex] = 0. else: - self.alphas[self.iterIndex, viewFakeIndex] = self.computeAlpha(self.edges[self.iterIndex, viewFakeIndex]) + self.alphas[self.iterIndex, viewFakeIndex] = self.computeAlpha( + self.edges[self.iterIndex, viewFakeIndex]) self.updateDs(LABELS, NB_VIEW, DATASET_LENGTH) self.updateFs(NB_VIEW, DATASET_LENGTH, NB_CLASS) @@ -188,14 +195,14 @@ class Mumbo: self.updateCostmatrices(NB_VIEW, DATASET_LENGTH, NB_CLASS, LABELS) bestView, edge, bestFakeView = self.chooseView(viewsIndices, LABELS, DATASET_LENGTH) self.bestViews[self.iterIndex] = bestView - logging.debug("\t\t\t Best view : \t\t"+DATASET.get("View"+str(bestView)).attrs["name"]) + logging.debug("\t\t\t Best view : \t\t" + DATASET.get("View" + str(bestView)).attrs["name"]) if areBad.all(): self.generalAlphas[self.iterIndex] = 0. else: self.generalAlphas[self.iterIndex] = self.computeAlpha(edge) self.bestClassifiers.append(classifiers[bestFakeView]) self.updateGeneralFs(DATASET_LENGTH, NB_CLASS, bestFakeView) - self.updateGeneralCostMatrix(DATASET_LENGTH, NB_CLASS,LABELS) + self.updateGeneralCostMatrix(DATASET_LENGTH, NB_CLASS, LABELS) predictedLabels = self.predict_hdf5(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) accuracy = accuracy_score(DATASET.get("Labels").value[trainIndices], predictedLabels) self.iterAccuracies[self.iterIndex] = accuracy @@ -238,7 +245,7 @@ class Mumbo: for classifier, alpha, view in zip(self.bestClassifiers, self.alphas, self.bestViews): data = getV(DATASET, int(view), exampleIndex) predictedProbas[labelIndex, int(classifier.predict(np.array([data])))] += alpha[view] - predictedProbas[labelIndex,:] = predictedProbas[labelIndex,:]/np.sum(predictedProbas[labelIndex,:]) + predictedProbas[labelIndex, :] = predictedProbas[labelIndex, :] / np.sum(predictedProbas[labelIndex, :]) return predictedProbas def trainWeakClassifiers(self, DATASET, CLASS_LABELS, NB_CLASS, DATASET_LENGTH, NB_VIEW): @@ -282,11 +289,12 @@ class Mumbo: iterIndex = self.iterIndex trainedClassifiersAndLabels = Parallel(n_jobs=NB_JOBS)( delayed(trainWeakClassifier_hdf5)(classifiersNames[classifierIndex], - getV(DATASET,viewIndex,trainIndices), + getV(DATASET, viewIndex, trainIndices), DATASET.get("Labels").value[trainIndices], DATASET_LENGTH, viewIndex, classifiersConfigs[classifierIndex], - DATASET.get("View"+str(viewIndex)).attrs["name"], iterIndex, costMatrices, classifierIndex, self.randomState) + DATASET.get("View" + str(viewIndex)).attrs["name"], iterIndex, + costMatrices, classifierIndex, self.randomState) for classifierIndex, viewIndex in enumerate(viewIndices)) for viewFakeIndex, (classifier, labelsArray, isBad, averageAccuracy) in enumerate(trainedClassifiersAndLabels): @@ -310,7 +318,6 @@ class Mumbo: else: self.edges[self.iterIndex, viewFakeIndex] = -cCost / tCost - def computeAlpha(self, edge): if 1 > edge > -1: return 0.5 * math.log((1 + edge) / (1 - edge)) @@ -333,8 +340,8 @@ class Mumbo: == \ CLASS_LABELS[exampleIndice] \ or self.allViewsClassifyBadly(self.predictions, pastIterIndice, - NB_VIEW, CLASS_LABELS[exampleIndice], - exampleIndice): + NB_VIEW, CLASS_LABELS[exampleIndice], + exampleIndice): self.ds[pastIterIndice, viewIndice, exampleIndice] = 1 else: @@ -363,7 +370,8 @@ class Mumbo: if classe != CLASS_LABELS[exampleIndice]: self.costMatrices[self.iterIndex + 1, viewIndice, exampleIndice, classe] \ = 1.0 * math.exp(self.fs[self.iterIndex, viewIndice, exampleIndice, classe] - - self.fs[self.iterIndex, viewIndice, exampleIndice, int(CLASS_LABELS[exampleIndice])]) + self.fs[self.iterIndex, viewIndice, exampleIndice, int( + CLASS_LABELS[exampleIndice])]) else: self.costMatrices[self.iterIndex + 1, viewIndice, exampleIndice, classe] \ = -1. * np.sum(np.exp(self.fs[self.iterIndex, viewIndice, exampleIndice] - @@ -425,7 +433,8 @@ class Mumbo: predictedLabels = np.zeros((DATASET_LENGTH, NB_ITER)) votes = np.zeros((DATASET_LENGTH, NB_CLASS)) - for classifier, alpha, view, iterIndice in zip(self.bestClassifiers, self.alphas, self.bestViews, range(NB_ITER)): + for classifier, alpha, view, iterIndice in zip(self.bestClassifiers, self.alphas, self.bestViews, + range(NB_ITER)): votesByIter = np.zeros((DATASET_LENGTH, NB_CLASS)) for exampleIndice in range(DATASET_LENGTH): @@ -447,7 +456,7 @@ class Mumbo: votesByIter = np.zeros((DATASET_LENGTH, NB_CLASS)) for usedExampleIndex, exampleIndex in enumerate(usedIndices): - data = np.array([np.array(getV(DATASET,int(view), int(exampleIndex)))]) + data = np.array([np.array(getV(DATASET, int(view), int(exampleIndex)))]) votesByIter[usedExampleIndex, int(classifier.predict(data))] += alpha[fakeViewsIndicesDict[view]] votes[usedExampleIndex] = votes[usedExampleIndex] + np.array(votesByIter[usedExampleIndex]) predictedLabels[usedExampleIndex, iterIndex] = np.argmax(votes[usedExampleIndex]) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/__init__.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/__init__.py index be034386..5f2e3464 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/__init__.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/__init__.py @@ -1,2 +1,3 @@ from . import Mumbo, analyzeResults -__all__ = ["Mumbo", "Classifiers"] \ No newline at end of file + +__all__ = ["Mumbo", "Classifiers"] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py index 4247036d..ca309191 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py @@ -1,6 +1,7 @@ from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report import numpy as np import matplotlib + matplotlib.use('Agg') import matplotlib.pyplot as plt import operator @@ -11,11 +12,9 @@ import logging import Metrics from utils.Dataset import getV, getShape - # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype def findMainView(bestViews): @@ -30,7 +29,7 @@ def plotAccuracyByIter(scoresOnTainByIter, scoresOnTestByIter, features, classif figure = plt.figure() ax1 = figure.add_subplot(111) axes = figure.gca() - axes.set_ylim([0.40,1.00]) + axes.set_ylim([0.40, 1.00]) titleString = "" for view, classifierConfig in zip(features, classifierAnalysis): titleString += "\n" + view + " : " + classifierConfig @@ -39,15 +38,15 @@ def plotAccuracyByIter(scoresOnTainByIter, scoresOnTestByIter, features, classif plt.text(0.5, 1.08, titleString, horizontalalignment='center', fontsize=8, - transform = ax1.transAxes) + transform=ax1.transAxes) figure.subplots_adjust(top=0.8) ax1.set_xlabel("Iteration Index") ax1.set_ylabel("Accuracy") ax1.plot(x, scoresOnTainByIter, c='red', label='Train') ax1.plot(x, scoresOnTestByIter, c='black', label='Test') - ax1.legend(loc='lower center', - ncol=3, fancybox=True, shadow=True) + ax1.legend(loc='lower center', + ncol=3, fancybox=True, shadow=True) return '-accuracyByIteration', figure @@ -62,7 +61,7 @@ def classifyMumbobyIter_hdf5(usedIndices, DATASET, classifiers, alphas, views, N votesByIter = np.zeros((DATASET_LENGTH, NB_CLASS)) for usedExampleIndex, exampleIndex in enumerate(usedIndices): - data = np.array([np.array(getV(DATASET,int(view), exampleIndex))]) + data = np.array([np.array(getV(DATASET, int(view), exampleIndex))]) votesByIter[usedExampleIndex, int(classifier.predict(data))] += alpha votes[usedExampleIndex] = votes[usedExampleIndex] + np.array(votesByIter[usedExampleIndex]) predictedLabels[usedExampleIndex, iterIndex] = np.argmax(votes[usedExampleIndex]) @@ -77,14 +76,15 @@ def error(testLabels, computedLabels): def getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, LABELS_DICTIONARY): nbView = DATASET.get("Metadata").attrs["nbView"] - viewNames = [DATASET.get("View"+str(viewIndex)).attrs["name"] for viewIndex in range(nbView)] - viewShapes = [getShape(DATASET,viewIndex) for viewIndex in range(nbView)] + viewNames = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in range(nbView)] + viewShapes = [getShape(DATASET, viewIndex) for viewIndex in range(nbView)] DBString = "Dataset info :\n\t-Dataset name : " + databaseName DBString += "\n\t-Labels : " + ', '.join(LABELS_DICTIONARY.values()) - DBString += "\n\t-Views : " + ', '.join([viewName+" of shape "+str(viewShape) + DBString += "\n\t-Views : " + ', '.join([viewName + " of shape " + str(viewShape) for viewName, viewShape in zip(viewNames, viewShapes)]) DBString += "\n\t-" + str(nbFolds) + " folds" - DBString += "\n\t- Validation set length : "+str(len(validationIndices))+" for learning rate : "+str(LEARNING_RATE)+" on a total number of examples of "+str(DATASET.get("Metadata").attrs["datasetLength"]) + DBString += "\n\t- Validation set length : " + str(len(validationIndices)) + " for learning rate : " + str( + LEARNING_RATE) + " on a total number of examples of " + str(DATASET.get("Metadata").attrs["datasetLength"]) DBString += "\n\n" return DBString, viewNames @@ -95,7 +95,7 @@ def getAlgoConfig(classifier, classificationKWARGS, nbCores, viewNames, hyperPar threshold = classificationKWARGS["threshold"] extractionTime, classificationTime = times weakClassifierConfigs = [getattr(globals()[classifierName], 'getConfig')(classifiersConfig) for classifiersConfig, - classifierName + classifierName in zip(classifier.classifiersConfigs, classifier.classifiersNames)] classifierAnalysis = [classifierName + " " + weakClassifierConfig + "on " + feature for classifierName, weakClassifierConfig, @@ -103,9 +103,9 @@ def getAlgoConfig(classifier, classificationKWARGS, nbCores, viewNames, hyperPar in zip(classifier.classifiersNames, weakClassifierConfigs, viewNames)] gridSearchString = "" if hyperParamSearch: - gridSearchString += "Configurations found by randomized search with "+str(nIter)+" iterations" - algoString = "\n\nMumbo configuration : \n\t-Used "+str(nbCores)+" core(s)" - algoString += "\n\t-Iterations : min " + str(minIter)+ ", max "+str(maxIter)+", threshold "+str(threshold) + gridSearchString += "Configurations found by randomized search with " + str(nIter) + " iterations" + algoString = "\n\nMumbo configuration : \n\t-Used " + str(nbCores) + " core(s)" + algoString += "\n\t-Iterations : min " + str(minIter) + ", max " + str(maxIter) + ", threshold " + str(threshold) algoString += "\n\t-Weak Classifiers : " + "\n\t\t-".join(classifierAnalysis) algoString += "\n\n" algoString += "\n\nComputation time on " + str(nbCores) + " cores : \n\tDatabase extraction time : " + str( @@ -124,22 +124,26 @@ def getReport(classifier, CLASS_LABELS, classificationIndices, DATASET, trainLab nbView = len(viewIndices) NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] metricModule = getattr(Metrics, metric[0]) - fakeViewsIndicesDict = dict((viewIndex, fakeViewIndex) for viewIndex, fakeViewIndex in zip(viewIndices, range(nbView))) + fakeViewsIndicesDict = dict( + (viewIndex, fakeViewIndex) for viewIndex, fakeViewIndex in zip(viewIndices, range(nbView))) trainScore = metricModule.score(CLASS_LABELS[learningIndices], trainLabels) testScore = metricModule.score(CLASS_LABELS[validationIndices], testLabels) mumboClassifier = classifier maxIter = mumboClassifier.iterIndex meanAverageAccuracies = np.mean(mumboClassifier.averageAccuracies, axis=0) - viewsStats = np.array([float(list(mumboClassifier.bestViews).count(viewIndex))/ - len(mumboClassifier.bestViews)for viewIndex in range(nbView)]) - PredictedTrainLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, usedIndices=learningIndices, - NB_CLASS=NB_CLASS) - PredictedTestLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, usedIndices=validationIndices, + viewsStats = np.array([float(list(mumboClassifier.bestViews).count(viewIndex)) / + len(mumboClassifier.bestViews) for viewIndex in range(nbView)]) + PredictedTrainLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, + usedIndices=learningIndices, NB_CLASS=NB_CLASS) - scoresByIter = np.zeros((len(PredictedTestLabelsByIter),2)) - for iterIndex,(iterPredictedTrainLabels, iterPredictedTestLabels) in enumerate(zip(PredictedTrainLabelsByIter, PredictedTestLabelsByIter)): - scoresByIter[iterIndex, 0] = metricModule.score(CLASS_LABELS[learningIndices],iterPredictedTrainLabels) - scoresByIter[iterIndex, 1] = metricModule.score(CLASS_LABELS[validationIndices],iterPredictedTestLabels) + PredictedTestLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, + usedIndices=validationIndices, + NB_CLASS=NB_CLASS) + scoresByIter = np.zeros((len(PredictedTestLabelsByIter), 2)) + for iterIndex, (iterPredictedTrainLabels, iterPredictedTestLabels) in enumerate( + zip(PredictedTrainLabelsByIter, PredictedTestLabelsByIter)): + scoresByIter[iterIndex, 0] = metricModule.score(CLASS_LABELS[learningIndices], iterPredictedTrainLabels) + scoresByIter[iterIndex, 1] = metricModule.score(CLASS_LABELS[validationIndices], iterPredictedTestLabels) scoresOnTainByIter = [scoresByIter[iterIndex, 0] for iterIndex in range(maxIter)] @@ -161,7 +165,7 @@ def iterRelevant(iterIndex, kFoldClassifierStats): def modifiedMean(surplusAccuracies): maxLen = 0 for foldAccuracies in surplusAccuracies.values(): - if len(foldAccuracies)>maxLen: + if len(foldAccuracies) > maxLen: maxLen = len(foldAccuracies) meanAccuracies = [] for accuracyIndex in range(maxLen): @@ -179,13 +183,13 @@ def printMetricScore(metricScores, metrics): metricScoreString = "\n\n" for metric in metrics: metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} - metricScoreString += "\tFor "+metricModule.getConfig(**metricKWARGS)+" : " - metricScoreString += "\n\t\t- Score on train : "+str(metricScores[metric[0]][0]) - metricScoreString += "\n\t\t- Score on test : "+str(metricScores[metric[0]][1]) + metricScoreString += "\tFor " + metricModule.getConfig(**metricKWARGS) + " : " + metricScoreString += "\n\t\t- Score on train : " + str(metricScores[metric[0]][0]) + metricScoreString += "\n\t\t- Score on test : " + str(metricScores[metric[0]][1]) metricScoreString += "\n\n" return metricScoreString @@ -194,7 +198,7 @@ def getTotalMetricScores(metric, trainLabels, testLabels, DATASET, validationIndices, learningIndices): labels = DATASET.get("Labels").value metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -214,7 +218,7 @@ def getMetricsScores(metrics, trainLabels, testLabels, def getMeanIterations(kFoldClassifierStats, foldIndex): - iterations = np.array([kFoldClassifier[foldIndex].iterIndex+1 for kFoldClassifier in kFoldClassifierStats]) + iterations = np.array([kFoldClassifier[foldIndex].iterIndex + 1 for kFoldClassifier in kFoldClassifierStats]) return np.mean(iterations) @@ -226,20 +230,23 @@ def execute(classifier, trainLabels, hyperParamSearch, nIter, metrics, viewsIndices, randomState): learningIndices, validationIndices = classificationIndices - LEARNING_RATE = len(learningIndices)/(len(learningIndices)+len(validationIndices)) + LEARNING_RATE = len(learningIndices) / (len(learningIndices) + len(validationIndices)) nbFolds = KFolds.n_splits CLASS_LABELS = DATASET.get("Labels")[...] - dbConfigurationString, viewNames = getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, LABELS_DICTIONARY) - algoConfigurationString, classifierAnalysis = getAlgoConfig(classifier, classificationKWARGS, nbCores, viewNames, hyperParamSearch, nIter, times) + dbConfigurationString, viewNames = getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, + LABELS_DICTIONARY) + algoConfigurationString, classifierAnalysis = getAlgoConfig(classifier, classificationKWARGS, nbCores, viewNames, + hyperParamSearch, nIter, times) (totalScoreOnTrain, totalScoreOnTest, meanAverageAccuracies, viewsStats, scoresOnTainByIter, scoresOnTestByIter) = getReport(classifier, CLASS_LABELS, classificationIndices, DATASET, - trainLabels, testLabels, viewsIndices, metrics[0]) + trainLabels, testLabels, viewsIndices, metrics[0]) - stringAnalysis = "\t\tResult for Multiview classification with Mumbo with random state : "+str(randomState) + \ - "\n\nAverage "+metrics[0][0]+" :\n\t-On Train : " + str(totalScoreOnTrain) + "\n\t-On Test : " + \ + stringAnalysis = "\t\tResult for Multiview classification with Mumbo with random state : " + str(randomState) + \ + "\n\nAverage " + metrics[0][0] + " :\n\t-On Train : " + str( + totalScoreOnTrain) + "\n\t-On Test : " + \ str(totalScoreOnTest) stringAnalysis += dbConfigurationString stringAnalysis += algoConfigurationString @@ -247,16 +254,16 @@ def execute(classifier, trainLabels, DATASET, validationIndices, learningIndices) stringAnalysis += printMetricScore(metricsScores, metrics) stringAnalysis += "Mean average scores and stats :" - for viewIndex, (meanAverageAccuracy, bestViewStat) in enumerate(zip(meanAverageAccuracies,viewsStats)): - stringAnalysis+="\n\t- On "+viewNames[viewIndex]+ \ - " : \n\t\t- Mean average Accuracy : "+str(meanAverageAccuracy)+ \ - "\n\t\t- Percentage of time chosen : "+str(bestViewStat) + for viewIndex, (meanAverageAccuracy, bestViewStat) in enumerate(zip(meanAverageAccuracies, viewsStats)): + stringAnalysis += "\n\t- On " + viewNames[viewIndex] + \ + " : \n\t\t- Mean average Accuracy : " + str(meanAverageAccuracy) + \ + "\n\t\t- Percentage of time chosen : " + str(bestViewStat) stringAnalysis += "\n\n For each iteration : " for iterIndex in range(len(scoresOnTainByIter)): stringAnalysis += "\n\t- Iteration " + str(iterIndex + 1) stringAnalysis += "\n\t\tScore on train : " + \ - str(scoresOnTainByIter[iterIndex]) + '\n\t\tScore on test : ' + \ - str(scoresOnTestByIter[iterIndex]) + str(scoresOnTainByIter[iterIndex]) + '\n\t\tScore on test : ' + \ + str(scoresOnTestByIter[iterIndex]) name, image = plotAccuracyByIter(scoresOnTainByIter, scoresOnTestByIter, views, classifierAnalysis) imagesAnalysis = {name: image} diff --git a/Code/MonoMutliViewClassifiers/Multiview/run.py b/Code/MonoMutliViewClassifiers/Multiview/run.py index 61d2d52b..41b4a663 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/run.py +++ b/Code/MonoMutliViewClassifiers/Multiview/run.py @@ -1,15 +1,15 @@ # coding=utf-8 # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype import os -os.system('python ExecMultiview.py -log --name ModifiedMultiOmic --type .csv --views Methyl:MiRNA:RNASEQ:Clinical --pathF /home/bbauvin/Documents/Data/Data_multi_omics/ --CL_split 0.7 --CL_nbFolds 5 --CL_nb_class 2 --CL_classes Positive:Negative --CL_type Fusion --CL_cores 4 --FU_type EarlyFusion --FU_method WeightedLinear') + +os.system( + 'python ExecMultiview.py -log --name ModifiedMultiOmic --type .csv --views Methyl:MiRNA:RNASEQ:Clinical --pathF /home/bbauvin/Documents/Data/Data_multi_omics/ --CL_split 0.7 --CL_nbFolds 5 --CL_nb_class 2 --CL_classes Positive:Negative --CL_type Fusion --CL_cores 4 --FU_type EarlyFusion --FU_method WeightedLinear') # /donnees/pj_bdd_bbauvin/Data_multi_omics/ # # /home/bbauvin/Documents/Data/Data_multi_omics/ # Fusion --CL_cores 4 --FU_type EarlyFusion --FU_method WeightedLinear -#Mumbo --MU_type DecisionTree:DecisionTree:DecisionTree:DecisionTree:DecisionTree --MU_config 1:0.02 1:0.02 1:0.1 2:0.1 1:0.1 --MU_iter 1000 - +# Mumbo --MU_type DecisionTree:DecisionTree:DecisionTree:DecisionTree:DecisionTree --MU_config 1:0.02 1:0.02 1:0.1 2:0.1 1:0.1 --MU_iter 1000 diff --git a/Code/MonoMutliViewClassifiers/ResultAnalysis.py b/Code/MonoMutliViewClassifiers/ResultAnalysis.py index 80a577b6..883e7e0e 100644 --- a/Code/MonoMutliViewClassifiers/ResultAnalysis.py +++ b/Code/MonoMutliViewClassifiers/ResultAnalysis.py @@ -5,37 +5,39 @@ import logging # Import third party modules import matplotlib + matplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np from matplotlib import cm import matplotlib as mpl -#Import own Modules +# Import own Modules import Metrics from utils.Transformations import signLabels # Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + def autolabel(rects, ax): for rect in rects: height = rect.get_height() - ax.text(rect.get_x() + rect.get_width()/2., 1.01*height, + ax.text(rect.get_x() + rect.get_width() / 2., 1.01 * height, "%.2f" % height, ha='center', va='bottom') def genFusionName(type_, a, b, c): if type_ == "Fusion" and a["fusionType"] != "EarlyFusion": - return "Late-"+str(a["fusionMethod"]) + return "Late-" + str(a["fusionMethod"]) elif type_ == "Fusion" and a["fusionType"] != "LateFusion": - return "Early-"+a["fusionMethod"]+"-"+a["classifiersNames"] + return "Early-" + a["fusionMethod"] + "-" + a["classifiersNames"] def genNamesFromRes(mono, multi): - names = [res[1][0]+"-"+res[1][1][-1] for res in mono] + names = [res[1][0] + "-" + res[1][1][-1] for res in mono] names += [type_ if type_ != "Fusion" else genFusionName(type_, a, b, c) for type_, a, b, c in multi] return names @@ -44,7 +46,7 @@ def resultAnalysis(benchmark, results, name, times, metrics, directory): mono, multi = results for metric in metrics: names = genNamesFromRes(mono, multi) - nbResults = len(mono)+len(multi) + nbResults = len(mono) + len(multi) validationScores = [float(res[1][2][metric[0]][1]) for res in mono] validationScores += [float(scores[metric[0]][1]) for a, b, scores, c in multi] trainScores = [float(res[1][2][metric[0]][0]) for res in mono] @@ -55,11 +57,11 @@ def resultAnalysis(benchmark, results, name, times, metrics, directory): names = np.array(names) f = pylab.figure(figsize=(40, 30)) - width = 0.35 # the width of the bars + width = 0.35 # the width of the bars fig = plt.gcf() fig.subplots_adjust(bottom=105.0, top=105.01) ax = f.add_axes([0.1, 0.1, 0.8, 0.8]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -68,16 +70,16 @@ def resultAnalysis(benchmark, results, name, times, metrics, directory): trainScores = trainScores[sorted_indices] names = names[sorted_indices] - ax.set_title(getattr(Metrics, metric[0]).getConfig(**metricKWARGS)+" on validation set for each classifier") + ax.set_title(getattr(Metrics, metric[0]).getConfig(**metricKWARGS) + " on validation set for each classifier") rects = ax.bar(range(nbResults), validationScores, width, color="r", ) - rect2 = ax.bar(np.arange(nbResults)+width, trainScores, width, color="0.7",) + rect2 = ax.bar(np.arange(nbResults) + width, trainScores, width, color="0.7", ) autolabel(rects, ax) autolabel(rect2, ax) ax.legend((rects[0], rect2[0]), ('Test', 'Train')) - ax.set_xticks(np.arange(nbResults)+width) + ax.set_xticks(np.arange(nbResults) + width) ax.set_xticklabels(names, rotation="vertical") - f.savefig(directory+time.strftime("%Y%m%d-%H%M%S")+"-"+name+"-"+metric[0]+".png") + f.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + name + "-" + metric[0] + ".png") plt.close() @@ -87,24 +89,24 @@ def analyzeLabels(labelsArrays, realLabels, results, directory): nbClassifiers = len(classifiersNames) nbExamples = realLabels.shape[0] nbIter = 2 - data = np.zeros((nbExamples, nbClassifiers*nbIter)) + data = np.zeros((nbExamples, nbClassifiers * nbIter)) tempData = np.array([labelsArray == realLabels for labelsArray in np.transpose(labelsArrays)]).astype(int) for classifierIndex in range(nbClassifiers): for iterIndex in range(nbIter): - data[:,classifierIndex*nbIter+iterIndex] = tempData[classifierIndex,:] - fig = pylab.figure(figsize=(10,20)) - cmap = mpl.colors.ListedColormap(['red','green']) - bounds=[-0.5,0.5,1.5] + data[:, classifierIndex * nbIter + iterIndex] = tempData[classifierIndex, :] + fig = pylab.figure(figsize=(10, 20)) + cmap = mpl.colors.ListedColormap(['red', 'green']) + bounds = [-0.5, 0.5, 1.5] norm = mpl.colors.BoundaryNorm(bounds, cmap.N) cax = plt.imshow(data, interpolation='none', cmap=cmap, norm=norm, aspect='auto') plt.title('Error on examples depending on the classifier') - ticks = np.arange(0, nbClassifiers*nbIter, nbIter) + ticks = np.arange(0, nbClassifiers * nbIter, nbIter) labels = classifiersNames plt.xticks(ticks, labels, rotation="vertical") cbar = fig.colorbar(cax, ticks=[0, 1]) cbar.ax.set_yticklabels(['Wrong', ' Right']) - fig.savefig(directory+time.strftime("%Y%m%d-%H%M%S")+"-error_analysis.png") + fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-error_analysis.png") plt.close() @@ -126,11 +128,11 @@ def genScoresNames(iterResults, metric, nbResults, names, nbMono): trainMeans = np.mean(trainScores, axis=0) f = pylab.figure(figsize=(40, 30)) - width = 0.35 # the width of the bars + width = 0.35 # the width of the bars fig = plt.gcf() fig.subplots_adjust(bottom=105.0, top=105.01) ax = f.add_axes([0.1, 0.1, 0.8, 0.8]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} @@ -141,24 +143,24 @@ def genScoresNames(iterResults, metric, nbResults, names, nbMono): trainMeans = trainMeans[sorted_indices] names = np.array(names)[sorted_indices] - ax.set_title(getattr(Metrics, metric[0]).getConfig(**metricKWARGS)+" for each classifier") + ax.set_title(getattr(Metrics, metric[0]).getConfig(**metricKWARGS) + " for each classifier") rects = ax.bar(range(nbResults), validationMeans, width, color="r", yerr=validationSTDs) - rect2 = ax.bar(np.arange(nbResults)+width, trainMeans, width, color="0.7", yerr=trainSTDs) + rect2 = ax.bar(np.arange(nbResults) + width, trainMeans, width, color="0.7", yerr=trainSTDs) autolabel(rects, ax) autolabel(rect2, ax) ax.legend((rects[0], rect2[0]), ('Test', 'Train')) - ax.set_xticks(np.arange(nbResults)+width) + ax.set_xticks(np.arange(nbResults) + width) ax.set_xticklabels(names, rotation="vertical") return f def analyzeIterResults(iterResults, name, metrics, directory): - nbResults = len(iterResults[0][0])+len(iterResults[0][1]) + nbResults = len(iterResults[0][0]) + len(iterResults[0][1]) nbMono = len(iterResults[0][0]) nbIter = len(iterResults) names = genNamesFromRes(iterResults[0][0], iterResults[0][1]) for metric in metrics: figure = genScoresNames(iterResults, metric, nbResults, names, nbMono) - figure.savefig(directory+time.strftime("%Y%m%d-%H%M%S")+"-"+name+"-Mean_on_" - +str(nbIter)+"_iter-"+metric[0]+".png") + figure.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + name + "-Mean_on_" + + str(nbIter) + "_iter-" + metric[0] + ".png") diff --git a/Code/MonoMutliViewClassifiers/Versions.py b/Code/MonoMutliViewClassifiers/Versions.py index 7ff7615c..4dd66d14 100644 --- a/Code/MonoMutliViewClassifiers/Versions.py +++ b/Code/MonoMutliViewClassifiers/Versions.py @@ -10,9 +10,10 @@ # Author-Info -__author__ = "Nikolas Huelsmann, Baptiste BAUVIN" -__status__ = "Prototype" # Production, Development, Prototype -__date__ = 2016-03-25 +__author__ = "Nikolas Huelsmann, Baptiste BAUVIN" +__status__ = "Prototype" # Production, Development, Prototype +__date__ = 2016 - 03 - 25 + def testVersions(): try: @@ -68,7 +69,7 @@ def testVersions(): raise try: - import logging # To create Log-Files + import logging # To create Log-Files # print("Logging: " + logging.__version__) except: print "Please install logging module" @@ -89,7 +90,7 @@ def testVersions(): raise try: - import h5py# + import h5py # # print("h5py: " + h5py.__version__) except: print "Pease install h5py module" diff --git a/Code/MonoMutliViewClassifiers/utils/Dataset.py b/Code/MonoMutliViewClassifiers/utils/Dataset.py index dd90f9e8..86c0ba33 100644 --- a/Code/MonoMutliViewClassifiers/utils/Dataset.py +++ b/Code/MonoMutliViewClassifiers/utils/Dataset.py @@ -6,28 +6,29 @@ def getV(DATASET, viewIndex, usedIndices=None): if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(usedIndices) is int: - return DATASET.get("View"+str(viewIndex))[usedIndices, :] + return DATASET.get("View" + str(viewIndex))[usedIndices, :] else: usedIndices = np.array(usedIndices) sortedIndices = np.argsort(usedIndices) usedIndices = usedIndices[sortedIndices] - if not DATASET.get("View"+str(viewIndex)).attrs["sparse"]: - return DATASET.get("View"+str(viewIndex))[usedIndices, :][np.argsort(sortedIndices),:] + if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]: + return DATASET.get("View" + str(viewIndex))[usedIndices, :][np.argsort(sortedIndices), :] else: - sparse_mat = sparse.csr_matrix((DATASET.get("View"+str(viewIndex)).get("data").value, - DATASET.get("View"+str(viewIndex)).get("indices").value, - DATASET.get("View"+str(viewIndex)).get("indptr").value), - shape=DATASET.get("View"+str(viewIndex)).attrs["shape"])[usedIndices,:][np.argsort(sortedIndices),:] + sparse_mat = sparse.csr_matrix((DATASET.get("View" + str(viewIndex)).get("data").value, + DATASET.get("View" + str(viewIndex)).get("indices").value, + DATASET.get("View" + str(viewIndex)).get("indptr").value), + shape=DATASET.get("View" + str(viewIndex)).attrs["shape"])[usedIndices, :][ + np.argsort(sortedIndices), :] return sparse_mat def getShape(DATASET, viewIndex): - if not DATASET.get("View"+str(viewIndex)).attrs["sparse"]: - return DATASET.get("View"+str(viewIndex)).shape + if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]: + return DATASET.get("View" + str(viewIndex)).shape else: - return DATASET.get("View"+str(viewIndex)).attrs["shape"] + return DATASET.get("View" + str(viewIndex)).attrs["shape"] def getValue(DATASET): @@ -35,23 +36,26 @@ def getValue(DATASET): return DATASET.value else: sparse_mat = sparse.csr_matrix((DATASET.get("data").value, - DATASET.get("indices").value, - DATASET.get("indptr").value), + DATASET.get("indices").value, + DATASET.get("indptr").value), shape=DATASET.attrs["shape"]) return sparse_mat def extractSubset(matrix, usedIndices): if sparse.issparse(matrix): - newIndptr = np.zeros(len(usedIndices)+1, dtype=int) + newIndptr = np.zeros(len(usedIndices) + 1, dtype=int) oldindptr = matrix.indptr for exampleIndexIndex, exampleIndex in enumerate(usedIndices): - newIndptr[exampleIndexIndex+1] = newIndptr[exampleIndexIndex]+(oldindptr[exampleIndex+1]-oldindptr[exampleIndex]) + newIndptr[exampleIndexIndex + 1] = newIndptr[exampleIndexIndex] + ( + oldindptr[exampleIndex + 1] - oldindptr[exampleIndex]) newData = np.ones(newIndptr[-1], dtype=bool) - newIndices = np.zeros(newIndptr[-1], dtype=int) + newIndices = np.zeros(newIndptr[-1], dtype=int) oldIndices = matrix.indices for exampleIndexIndex, exampleIndex in enumerate(usedIndices): - newIndices[newIndptr[exampleIndexIndex]:newIndptr[exampleIndexIndex+1]] = oldIndices[oldindptr[exampleIndex]: oldindptr[exampleIndex+1]] + newIndices[newIndptr[exampleIndexIndex]:newIndptr[exampleIndexIndex + 1]] = oldIndices[ + oldindptr[exampleIndex]: + oldindptr[exampleIndex + 1]] return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1])) else: - return matrix[usedIndices] \ No newline at end of file + return matrix[usedIndices] diff --git a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py index 0f98ef52..915c2685 100644 --- a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py +++ b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py @@ -6,35 +6,39 @@ import Metrics import matplotlib.pyplot as plt import itertools -def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None, searchingTool="hyperParamSearch", nIter=1, **kwargs): + +def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None, + searchingTool="hyperParamSearch", nIter=1, **kwargs): if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) thismodule = sys.modules[__name__] searchingToolMethod = getattr(thismodule, searchingTool) - bestSettings = searchingToolMethod(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=viewsIndices, nIter=nIter, **kwargs) - return bestSettings # or well set clasifier ? + bestSettings = searchingToolMethod(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, + viewsIndices=viewsIndices, nIter=nIter, **kwargs) + return bestSettings # or well set clasifier ? def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs): - #si grid search est selectionne, on veut tester certaines valeurs + # si grid search est selectionne, on veut tester certaines valeurs pass -def randomizedSearch(dataset, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1, nbCores=1, **classificationKWARGS): +def randomizedSearch(dataset, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1, + nbCores=1, **classificationKWARGS): if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) metric = metrics[0] metricModule = getattr(Metrics, metric[0]) - if metric[1]!=None: + if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} - classifierPackage =getattr(Multiview,classifierName) # Permet d'appeler un module avec une string + classifierPackage = getattr(Multiview, classifierName) # Permet d'appeler un module avec une string classifierModule = getattr(classifierPackage, classifierName) classifierClass = getattr(classifierModule, classifierName) if classifierName != "Mumbo": paramsSets = classifierModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter) - if metricModule.getConfig()[-14]=="h": + if metricModule.getConfig()[-14] == "h": baseScore = -1000.0 isBetter = "higher" else: @@ -48,22 +52,24 @@ def randomizedSearch(dataset, classifierName, metrics, learningIndices, KFolds, classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) classifier.setParams(paramsSet) classifier.fit_hdf5(dataset, trainIndices=learningIndices[trainIndices], viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(dataset, usedIndices=learningIndices[testIndices], viewsIndices=viewsIndices) + testLabels = classifier.predict_hdf5(dataset, usedIndices=learningIndices[testIndices], + viewsIndices=viewsIndices) testScore = metricModule.score(dataset.get("Labels").value[learningIndices[testIndices]], testLabels) scores.append(testScore) crossValScore = np.mean(np.array(scores)) - if isBetter=="higher" and crossValScore > baseScore: + if isBetter == "higher" and crossValScore > baseScore: baseScore = crossValScore bestSettings = paramsSet - elif isBetter=="lower" and crossValScore < baseScore: + elif isBetter == "lower" and crossValScore < baseScore: baseScore = crossValScore bestSettings = paramsSet classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) classifier.setParams(bestSettings) else: - bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, learningIndices, randomState, metric=metric, nIter=nIter) + bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, learningIndices, + randomState, metric=metric, nIter=nIter) classificationKWARGS["classifiersConfigs"] = bestConfigs classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) @@ -79,7 +85,7 @@ def genHeatMaps(params, scoresArray, outputFileName): if nbParams > 2: combinations = itertools.combinations(range(nbParams), 2) else: - combinations = [(0,1)] + combinations = [(0, 1)] for combination in combinations: paramName1, paramArray1 = params[combination[0]] paramName2, paramArray2 = params[combination[1]] @@ -87,10 +93,10 @@ def genHeatMaps(params, scoresArray, outputFileName): paramArray1Set = np.sort(np.array(list(set(paramArray1)))) paramArray2Set = np.sort(np.array(list(set(paramArray2)))) - scoresMatrix = np.zeros((len(paramArray2Set), len(paramArray1Set)))-0.1 + scoresMatrix = np.zeros((len(paramArray2Set), len(paramArray1Set))) - 0.1 for param1, param2, score in zip(paramArray1, paramArray2, scoresArray): - param1Index, = np.where(paramArray1Set == param1) - param2Index, = np.where(paramArray2Set == param2) + param1Index, = np.where(paramArray1Set == param1) + param2Index, = np.where(paramArray2Set == param2) scoresMatrix[int(param2Index), int(param1Index)] = score plt.figure(figsize=(8, 6)) @@ -103,131 +109,131 @@ def genHeatMaps(params, scoresArray, outputFileName): plt.xticks(np.arange(len(paramArray1Set)), paramArray1Set) plt.yticks(np.arange(len(paramArray2Set)), paramArray2Set, rotation=45) plt.title('Validation metric') - plt.savefig(outputFileName+"heat_map-"+paramName1+"-"+paramName2+".png") + plt.savefig(outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png") plt.close() -# nohup python ~/dev/git/spearmint/spearmint/main.py . & - -# import json -# import numpy as np -# import math -# -# from os import system -# from os.path import join -# -# -# def run_kover(dataset, split, model_type, p, max_rules, output_dir): -# outdir = join(output_dir, "%s_%f" % (model_type, p)) -# kover_command = "kover learn " \ -# "--dataset '%s' " \ -# "--split %s " \ -# "--model-type %s " \ -# "--p %f " \ -# "--max-rules %d " \ -# "--max-equiv-rules 10000 " \ -# "--hp-choice cv " \ -# "--random-seed 0 " \ -# "--output-dir '%s' " \ -# "--n-cpu 1 " \ -# "-v" % (dataset, -# split, -# model_type, -# p, -# max_rules, -# outdir) -# -# system(kover_command) -# -# return json.load(open(join(outdir, "results.json")))["cv"]["best_hp"]["score"] -# -# -# def main(job_id, params): -# print params -# -# max_rules = params["MAX_RULES"][0] -# -# species = params["SPECIES"][0] -# antibiotic = params["ANTIBIOTIC"][0] -# split = params["SPLIT"][0] -# -# model_type = params["model_type"][0] -# -# # LS31 -# if species == "saureus": -# dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/data/earle_2016/saureus/kover_datasets/%s.kover" % antibiotic -# else: -# dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/genome_scm_paper/data/%s/%s.kover" % (species, antibiotic) -# -# output_path = "/home/droale01/droale01-ls31/projects/genome_scm/manifold_scm/spearmint/vanilla_scm/%s/%s" % (species, antibiotic) -# -# # MacBook -# #dataset_path = "/Volumes/Einstein 1/kover_phylo/datasets/%s/%s.kover" % (species, antibiotic) -# #output_path = "/Volumes/Einstein 1/manifold_scm/version2/%s_spearmint" % antibiotic -# -# return run_kover(dataset=dataset_path, -# split=split, -# model_type=model_type, -# p=params["p"][0], -# max_rules=max_rules, -# output_dir=output_path) -# killall mongod && sleep 1 && rm -r database/* && rm mongo.log* -# mongod --fork --logpath mongo.log --dbpath database -# -# { -# "language" : "PYTHON", -# "experiment-name" : "vanilla_scm_cdiff_azithromycin", -# "polling-time" : 1, -# "resources" : { -# "my-machine" : { -# "scheduler" : "local", -# "max-concurrent" : 5, -# "max-finished-jobs" : 100 -# } -# }, -# "tasks": { -# "resistance" : { -# "type" : "OBJECTIVE", -# "likelihood" : "NOISELESS", -# "main-file" : "spearmint_wrapper", -# "resources" : ["my-machine"] -# } -# }, -# "variables": { -# -# "MAX_RULES" : { -# "type" : "ENUM", -# "size" : 1, -# "options": [10] -# }, -# -# -# "SPECIES" : { -# "type" : "ENUM", -# "size" : 1, -# "options": ["cdiff"] -# }, -# "ANTIBIOTIC" : { -# "type" : "ENUM", -# "size" : 1, -# "options": ["azithromycin"] -# }, -# "SPLIT" : { -# "type" : "ENUM", -# "size" : 1, -# "options": ["split_seed_2"] -# }, -# -# -# "model_type" : { -# "type" : "ENUM", -# "size" : 1, -# "options": ["conjunction", "disjunction"] -# }, -# "p" : { -# "type" : "FLOAT", -# "size" : 1, -# "min" : 0.01, -# "max" : 100 -# } -# } -# } \ No newline at end of file + # nohup python ~/dev/git/spearmint/spearmint/main.py . & + + # import json + # import numpy as np + # import math + # + # from os import system + # from os.path import join + # + # + # def run_kover(dataset, split, model_type, p, max_rules, output_dir): + # outdir = join(output_dir, "%s_%f" % (model_type, p)) + # kover_command = "kover learn " \ + # "--dataset '%s' " \ + # "--split %s " \ + # "--model-type %s " \ + # "--p %f " \ + # "--max-rules %d " \ + # "--max-equiv-rules 10000 " \ + # "--hp-choice cv " \ + # "--random-seed 0 " \ + # "--output-dir '%s' " \ + # "--n-cpu 1 " \ + # "-v" % (dataset, + # split, + # model_type, + # p, + # max_rules, + # outdir) + # + # system(kover_command) + # + # return json.load(open(join(outdir, "results.json")))["cv"]["best_hp"]["score"] + # + # + # def main(job_id, params): + # print params + # + # max_rules = params["MAX_RULES"][0] + # + # species = params["SPECIES"][0] + # antibiotic = params["ANTIBIOTIC"][0] + # split = params["SPLIT"][0] + # + # model_type = params["model_type"][0] + # + # # LS31 + # if species == "saureus": + # dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/data/earle_2016/saureus/kover_datasets/%s.kover" % antibiotic + # else: + # dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/genome_scm_paper/data/%s/%s.kover" % (species, antibiotic) + # + # output_path = "/home/droale01/droale01-ls31/projects/genome_scm/manifold_scm/spearmint/vanilla_scm/%s/%s" % (species, antibiotic) + # + # # MacBook + # #dataset_path = "/Volumes/Einstein 1/kover_phylo/datasets/%s/%s.kover" % (species, antibiotic) + # #output_path = "/Volumes/Einstein 1/manifold_scm/version2/%s_spearmint" % antibiotic + # + # return run_kover(dataset=dataset_path, + # split=split, + # model_type=model_type, + # p=params["p"][0], + # max_rules=max_rules, + # output_dir=output_path) + # killall mongod && sleep 1 && rm -r database/* && rm mongo.log* + # mongod --fork --logpath mongo.log --dbpath database + # + # { + # "language" : "PYTHON", + # "experiment-name" : "vanilla_scm_cdiff_azithromycin", + # "polling-time" : 1, + # "resources" : { + # "my-machine" : { + # "scheduler" : "local", + # "max-concurrent" : 5, + # "max-finished-jobs" : 100 + # } + # }, + # "tasks": { + # "resistance" : { + # "type" : "OBJECTIVE", + # "likelihood" : "NOISELESS", + # "main-file" : "spearmint_wrapper", + # "resources" : ["my-machine"] + # } + # }, + # "variables": { + # + # "MAX_RULES" : { + # "type" : "ENUM", + # "size" : 1, + # "options": [10] + # }, + # + # + # "SPECIES" : { + # "type" : "ENUM", + # "size" : 1, + # "options": ["cdiff"] + # }, + # "ANTIBIOTIC" : { + # "type" : "ENUM", + # "size" : 1, + # "options": ["azithromycin"] + # }, + # "SPLIT" : { + # "type" : "ENUM", + # "size" : 1, + # "options": ["split_seed_2"] + # }, + # + # + # "model_type" : { + # "type" : "ENUM", + # "size" : 1, + # "options": ["conjunction", "disjunction"] + # }, + # "p" : { + # "type" : "FLOAT", + # "size" : 1, + # "min" : 0.01, + # "max" : 100 + # } + # } + # } diff --git a/Code/MonoMutliViewClassifiers/utils/Transformations.py b/Code/MonoMutliViewClassifiers/utils/Transformations.py index 026ff636..5d569add 100644 --- a/Code/MonoMutliViewClassifiers/utils/Transformations.py +++ b/Code/MonoMutliViewClassifiers/utils/Transformations.py @@ -2,7 +2,7 @@ import numpy as np def signLabels(labels): - if set(labels) == (0,1): + if set(labels) == (0, 1): return np.array([label if label != 0 else -1 for label in labels]) else: return labels diff --git a/Code/MonoMutliViewClassifiers/utils/__init__.py b/Code/MonoMutliViewClassifiers/utils/__init__.py index dfd67e21..a67257f2 100644 --- a/Code/MonoMutliViewClassifiers/utils/__init__.py +++ b/Code/MonoMutliViewClassifiers/utils/__init__.py @@ -1 +1 @@ -from . import Dataset \ No newline at end of file +from . import Dataset -- GitLab