diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 4014ce75be3d9117275bc3e07a7ba3dd0322a7c0..6b5d3781df153e3d8586453b5554d2ccb0ee8f12 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -217,13 +217,13 @@ def lateFusionSetArgs(views, viewsIndices, classes, method, return arguments -def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview): +def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices): multiviewArguments = [] if "Multiview" in benchmark: for multiviewAlgoName in benchmark["Multiview"]: multiviewPackage = getattr(Multiview, multiviewAlgoName) mutliviewModule = getattr(multiviewPackage, multiviewAlgoName) - multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview) + multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices) argumentDictionaries["Multiview"] = multiviewArguments return argumentDictionaries @@ -288,7 +288,7 @@ def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, for arguments in argumentDictionaries["Monoview"]] monoviewTime = time.time() - dataBaseTime - start - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview) + argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices) resultsMultiview = [] resultsMultiview += [ @@ -343,10 +343,10 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, for arguments in argumentDictionaries["Monoview"]]) monoviewTime = time.time() - dataBaseTime - start - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview) + argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices) + resultsMultiview = [] if nbCores > 1: - resultsMultiview = [] nbExperiments = len(argumentDictionaries["Multiview"]) for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): resultsMultiview += Parallel(n_jobs=nbCores)( @@ -384,6 +384,7 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, "s, Multiview Time : "+str(multiviewTime)+ "s, Global Analysis Time : "+str(globalAnalysisTime)+ "s, Total Duration : "+str(totalTime)+"s") + return results def initRandomState(randomStateArg, directory): @@ -595,6 +596,7 @@ DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args datasetLength = DATASET.get("Metadata").attrs["datasetLength"] indices = np.arange(datasetLength) classificationIndices = genSplits(statsIter, indices, DATASET, args.CL_split, statsIterRandomStates) + kFolds = genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates) datasetFiles = initMultipleDatasets(args, nbCores) @@ -631,22 +633,38 @@ argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, vi directories = genDirecortiesNames(directory, statsIter) if statsIter>1: - iterResults = [] - nbExperiments = statsIter - for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): - iterResults += (Parallel(n_jobs=nbCores)( - delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1, directories[coreIndex + stepIndex * nbCores], args, classificationIndices[coreIndex + stepIndex * nbCores], kFolds[coreIndex + stepIndex * nbCores], - statsIterRandomStates[coreIndex + stepIndex * nbCores], hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start, benchmark, - views) - for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) + for statIterIndex in range(statsIter): + if not os.path.exists(os.path.dirname(directories[statIterIndex]+"train_labels.csv")): + try: + os.makedirs(os.path.dirname(directories[statIterIndex]+"train_labels.csv")) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + trainIndices, testIndices = classificationIndices[statIterIndex] + trainLabels = DATASET.get("Labels").value[trainIndices] + np.savetxt(directories[statIterIndex]+"train_labels.csv", trainLabels, delimiter=",") if nbCores > 1: + iterResults = [] + nbExperiments = statsIter + for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): + iterResults += (Parallel(n_jobs=nbCores)( + delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1, directories[coreIndex + stepIndex * nbCores], args, classificationIndices[coreIndex + stepIndex * nbCores], kFolds[coreIndex + stepIndex * nbCores], + statsIterRandomStates[coreIndex + stepIndex * nbCores], hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start, benchmark, + views) + for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) logging.debug("Start:\t Deleting datasets for multiprocessing") + else: + iterResults = [] + for iterIndex in range(statsIter): + iterResults.append(classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args, + classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex], + hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark, views)) analyzeIterResults(iterResults, args.name, metrics, directory) else: - classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, kFolds, + res = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, kFolds, statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark, views) diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index ae0cb5d0015ca6a6eb21172a93276df73bb82b1c..428209704217954af9dd34478a24cc89de380228 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -54,7 +54,17 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol CL_type = kwargs["CL_type"] X = getValue(X) learningRate = len(classificationIndices[0])/(len(classificationIndices[0])+len(classificationIndices[1])) - + labelsString = "-".join(labelsNames) + timestr = time.strftime("%Y%m%d-%H%M%S") + CL_type_string = CL_type + outputFileName = directory + "/"+CL_type_string+"/"+"/"+feat+"/"+timestr +"Results-" + CL_type_string + "-" + labelsString + \ + '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-" + if not os.path.exists(os.path.dirname(outputFileName)): + try: + os.makedirs(os.path.dirname(outputFileName)) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise logging.debug("Done:\t Loading data") # Determine the Database to extract features logging.debug("Info:\t Classification - Database:" + str(name) + " Feature:" + str(feat) + " train ratio:" @@ -78,7 +88,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol if hyperParamSearch != "None": classifierHPSearch = getattr(classifierModule, hyperParamSearch) logging.debug("Start:\t RandomSearch best settings with "+str(nIter)+" iterations for "+CL_type) - cl_desc = classifierHPSearch(X_train, y_train, randomState, KFolds=KFolds, nbCores=nbCores, + cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores, metric=metrics[0], nIter=nIter) clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc)) logging.debug("Done:\t RandomSearch best settings") @@ -106,17 +116,12 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol cl_desc = [value for key, value in sorted(clKWARGS.iteritems())] logging.debug("Done:\t Getting Results") logging.info(stringAnalysis) - labelsString = "-".join(labelsNames) - timestr = time.strftime("%Y%m%d-%H%M%S") - CL_type_string = CL_type - outputFileName = directory + "/"+CL_type_string+"/"+"/"+feat+"/"+timestr +"Results-" + CL_type_string + "-" + labelsString + \ - '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-" - if not os.path.exists(os.path.dirname(outputFileName)): - try: - os.makedirs(os.path.dirname(outputFileName)) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise + # labelsString = "-".join(labelsNames) + # timestr = time.strftime("%Y%m%d-%H%M%S") + # CL_type_string = CL_type + # outputFileName = directory + "/"+CL_type_string+"/"+"/"+feat+"/"+timestr +"Results-" + CL_type_string + "-" + labelsString + \ + # '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-" + outputTextFile = open(outputFileName + '.txt', 'w') outputTextFile.write(stringAnalysis) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index 67dab09e3aee185d145a277cb69982187be510c5..876238ca7e33435d1cec5e9c1574a80c915c4bb6 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -4,6 +4,9 @@ from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier import Metrics from scipy.stats import randint +import numpy as np +import matplotlib.pyplot as plt +from utils.HyperParameterSearch import genHeatMaps # Author-Info __author__ = "Baptiste Bauvin" @@ -39,10 +42,10 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=4, metric=["accuracy_score", None], nIter=30, nbCores=1): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, metric=["accuracy_score", None], nIter=30, nbCores=1): pipeline = Pipeline([('classifier', AdaBoostClassifier())]) - param= {"classifier__n_estimators": randint(1, 15), + param= {"classifier__n_estimators": randint(1, 150), "classifier__base_estimator": [DecisionTreeClassifier()]} metricModule = getattr(Metrics, metric[0]) if metric[1]!=None: @@ -55,6 +58,36 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, metric=["accuracy_ detector = grid.fit(X_train, y_train) desc_estimators = [detector.best_params_["classifier__n_estimators"], detector.best_params_["classifier__base_estimator"]] + + scoresArray = detector.cv_results_['mean_test_score'] + params = [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])), + ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))] + + genHeatMaps(params, scoresArray, outputFileName) + + # baseEstimatorsSet = np.array(set(baseEstimators)) + # nEstimatorsSet = np.sort(np.array(list(set(nEstimators)))) + # + # scoresArray = detector.cv_results_['mean_test_score'] + # scoresMatrix = np.zeros((len(nEstimatorsSet), 1)) + # for baseEstimator, nEstimator, score in zip(baseEstimators, nEstimators, scoresArray): + # baseEstimatorIndex = 0 + # i, = np.where(nEstimatorsSet == nEstimator) + # print i + # nEstimatorIndex, = np.where(nEstimatorsSet == nEstimator) + # scoresMatrix[int(nEstimatorIndex), baseEstimatorIndex] = score + # + # plt.figure(figsize=(8, 6)) + # plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) + # plt.imshow(scoresMatrix, interpolation='nearest', cmap=plt.cm.hot, + # ) + # plt.xlabel('n_estimators') + # plt.ylabel('base_estimator') + # plt.colorbar() + # plt.xticks(np.arange(1), ["DecisionTree"]) + # plt.yticks(np.arange(len(nEstimatorsSet)), nEstimatorsSet, rotation=45) + # plt.title('Validation accuracy') + # plt.savefig(outputFileName+"heat_map.png") return desc_estimators diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index 67acba97c36d25f6e59f1a3148d4680752d27596..168a0864ef8d89903c9590494a773d9a59ec2784 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -3,6 +3,8 @@ from sklearn.pipeline import Pipeline # Pipelining in classifi from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint +import numpy as np +from utils.HyperParameterSearch import genHeatMaps # Author-Info __author__ = "Baptiste Bauvin" @@ -42,7 +44,7 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())]) param_DT = {"classifier__max_depth": randint(1, 300), "classifier__criterion": ["gini", "entropy"], @@ -58,6 +60,13 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= DT_detector = grid_DT.fit(X_train, y_train) desc_params = [DT_detector.best_params_["classifier__max_depth"], DT_detector.best_params_["classifier__criterion"], DT_detector.best_params_["classifier__splitter"]] + + scoresArray = DT_detector.cv_results_['mean_test_score'] + params = [("maxDepth", np.array(DT_detector.cv_results_['param_classifier__max_depth'])), + ("criterion", np.array(DT_detector.cv_results_['param_classifier__criterion'])), + ("splitter", np.array(DT_detector.cv_results_['param_classifier__splitter']))] + + genHeatMaps(params, scoresArray, outputFileName) return desc_params diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index c8947a0a2cbb70a9feafdf108984385dc69f7c42..c6df7e415ac26b5a5d1a5a74b53a849d86c21b88 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -3,6 +3,8 @@ from sklearn.pipeline import Pipeline # Pipelining in classifi from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint +import numpy as np +from utils.HyperParameterSearch import genHeatMaps # Author-Info @@ -47,7 +49,7 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())]) param_KNN = {"classifier__n_neighbors": randint(1, 50), "classifier__weights": ["uniform", "distance"], @@ -68,6 +70,15 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= KNN_detector.best_params_["classifier__algorithm"], KNN_detector.best_params_["classifier__p"], ] + + scoresArray = KNN_detector.cv_results_['mean_test_score'] + params = [("nNeighbors", np.array(KNN_detector.cv_results_['param_classifier__n_neighbors'])), + ("weights", np.array(KNN_detector.cv_results_['param_classifier__weights'])), + ("algorithm", np.array(KNN_detector.cv_results_['param_classifier__algorithm'])), + ("p", np.array(KNN_detector.cv_results_['param_classifier__p']))] + + genHeatMaps(params, scoresArray, outputFileName) + return desc_params diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index 9452465eebb9f6dc0668bf1a2c6334cd9f2355c2..4370d04f594effbe6f2f9350c3095b626216845b 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -3,6 +3,8 @@ from sklearn.pipeline import Pipeline from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint +import numpy as np +from utils.HyperParameterSearch import genHeatMaps # Author-Info __author__ = "Baptiste Bauvin" @@ -43,7 +45,7 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) param_rf = {"classifier__n_estimators": randint(1, 300), "classifier__max_depth": randint(1, 300), @@ -61,6 +63,13 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= desc_estimators = [rf_detector.best_params_["classifier__n_estimators"], rf_detector.best_params_["classifier__max_depth"], rf_detector.best_params_["classifier__criterion"]] + + scoresArray = rf_detector.cv_results_['mean_test_score'] + params = [("nEstimators", np.array(rf_detector.cv_results_['param_classifier__n_estimators'])), + ("maxDepth", np.array(rf_detector.cv_results_['param_classifier__max_depth'])), + ("criterion", np.array(rf_detector.cv_results_['param_classifier__criterion']))] + + genHeatMaps(params, scoresArray, outputFileName) return desc_estimators diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py index e5871b664eebd9a348fe6ff1d27e27f1af6d9485..20d49be8581ae722fbd05025b7aa15c6e22242aa 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py @@ -6,6 +6,7 @@ import h5py from Multiview import GetMultiviewDb as DB from pyscm.binary_attributes.base import BaseBinaryAttributeList import os +from utils.HyperParameterSearch import genHeatMaps # Author-Info @@ -63,7 +64,7 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=None, metric=["accuracy_score", None], nIter=30, nbCores=1): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=None, metric=["accuracy_score", None], nIter=30, nbCores=1): metricModule = getattr(Metrics, metric[0]) if metric[1]!=None: @@ -77,10 +78,16 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=None, metric=["accura baseScore = 1000.0 isBetter = "lower" config = [] + maxAttributesArray = [] + pArray = [] + modelsArray = [] for iterIndex in range(nIter): max_attributes = randomState.randint(1, 20) + maxAttributesArray.append(max_attributes) p = randomState.random_sample() + pArray.append(p) model = randomState.choice(["conjunction", "disjunction"]) + modelsArray.append(model) classifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=max_attributes, model_type=model, verbose=False) scores = [] kFolds = KFolds.split(X_train, y_train) @@ -110,6 +117,12 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=None, metric=["accura config = [max_attributes, p, model] assert config!=[], "No good configuration found for SCM" + scoresArray = scores + params = [("maxAttributes", np.array(maxAttributesArray)), + ("p", np.array(pArray)), + ("model", np.array(modelsArray))] + + genHeatMaps(params, scoresArray, outputFileName) return config diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index d531e8e7a74ea43ea9e28b7424f1f35e5a68c201..4ce689495c833a9b772869d8075c7ac5c8b54c1e 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -3,6 +3,8 @@ from sklearn.pipeline import Pipeline # Pipelining in classifi from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import uniform +import numpy as np +from utils.HyperParameterSearch import genHeatMaps @@ -47,7 +49,7 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_SGD = Pipeline([('classifier', SGDClassifier())]) losses = ['log', 'modified_huber'] penalties = ["l1", "l2", "elasticnet"] @@ -65,6 +67,14 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= SGD_detector = grid_SGD.fit(X_train, y_train) desc_params = [SGD_detector.best_params_["classifier__loss"], SGD_detector.best_params_["classifier__penalty"], SGD_detector.best_params_["classifier__alpha"]] + + scoresArray = SGD_detector.cv_results_['mean_test_score'] + params = [("loss", np.array(SGD_detector.cv_results_['param_classifier__loss'])), + ("penalty", np.array(SGD_detector.cv_results_['param_classifier__penalty'])), + ("aplha", np.array(SGD_detector.cv_results_['param_classifier__alpha']))] + + genHeatMaps(params, scoresArray, outputFileName) + return desc_params diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index f9dfb3319a3e400eb297b14d52e76860b6ee8d84..0f48fce57d8232a3e2867b86fe30fa13e40705f2 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -3,6 +3,8 @@ from sklearn.pipeline import Pipeline # Pipelining in classifi from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint +import numpy as np +from utils.HyperParameterSearch import genHeatMaps # Author-Info @@ -36,7 +38,7 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear", max_iter=1000))]) param_SVMLinear = {"classifier__C": randint(1, 10000)} metricModule = getattr(Metrics, metric[0]) @@ -51,6 +53,13 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= SVMLinear_detector = grid_SVMLinear.fit(X_train, y_train) desc_params = [SVMLinear_detector.best_params_["classifier__C"]] + + scoresArray = SVMLinear_detector.cv_results_['mean_test_score'] + params = [("c", np.array(SVMLinear_detector.cv_results_['param_classifier__C'])), + ("control", np.array(["control" for _ in range(nIter)]))] + + genHeatMaps(params, scoresArray, outputFileName) + return desc_params diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index 961e579104b7e365c35b4517cf39fb20dd00d23d..316b7af6babc96787edd157e06678f56fdfbb4e4 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -3,6 +3,8 @@ from sklearn.pipeline import Pipeline # Pipelining in classifi from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint +import numpy as np +from utils.HyperParameterSearch import genHeatMaps @@ -40,7 +42,7 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_SVMPoly = Pipeline([('classifier', SVC(kernel="poly", max_iter=1000))]) param_SVMPoly = {"classifier__C": randint(1, 10000), "classifier__degree": randint(1, 30)} @@ -52,8 +54,15 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= scorer = metricModule.get_scorer(**metricKWARGS) grid_SVMPoly = RandomizedSearchCV(pipeline_SVMPoly, n_iter=nIter, param_distributions=param_SVMPoly, refit=True, n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState) - SVMRBF_detector = grid_SVMPoly.fit(X_train, y_train) - desc_params = [SVMRBF_detector.best_params_["classifier__C"], SVMRBF_detector.best_params_["classifier__degree"]] + SVMPoly_detector = grid_SVMPoly.fit(X_train, y_train) + desc_params = [SVMPoly_detector.best_params_["classifier__C"], SVMPoly_detector.best_params_["classifier__degree"]] + + scoresArray = SVMPoly_detector.cv_results_['mean_test_score'] + params = [("c", np.array(SVMPoly_detector.cv_results_['param_classifier__C'])), + ("degree", np.array(SVMPoly_detector.cv_results_['param_classifier__degree']))] + + genHeatMaps(params, scoresArray, outputFileName) + return desc_params diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index 1ce9881c7444f4c74082e388b6f6b7c2cc903780..4b4ec762793839100cc579a21505320b971b8464 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -3,6 +3,8 @@ from sklearn.pipeline import Pipeline # Pipelining in classifi from sklearn.model_selection import RandomizedSearchCV import Metrics from scipy.stats import randint +import numpy as np +from utils.HyperParameterSearch import genHeatMaps # Author-Info @@ -36,7 +38,7 @@ def getKWARGS(kwargsList): return kwargsDict -def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): +def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="rbf", max_iter=1000))]) param_SVMRBF = {"classifier__C": randint(1, 10000)} metricModule = getattr(Metrics, metric[0]) @@ -49,6 +51,13 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState) SVMRBF_detector = grid_SVMRBF.fit(X_train, y_train) desc_params = [SVMRBF_detector.best_params_["classifier__C"]] + + scoresArray = SVMRBF_detector.cv_results_['mean_test_score'] + params = [("c", np.array(SVMRBF_detector.cv_results_['param_classifier__C'])), + ("control", np.array(["control" for _ in range(nIter)]))] + + genHeatMaps(params, scoresArray, outputFileName) + return desc_params diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index 3370bf1e13d49c1ca1a3ab7eac3bb2dab5e2aa6e..8e3012a277f69783e17b93067c8d544dee27060e 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -67,7 +67,7 @@ def getBenchmark(benchmark, args=None): return benchmark -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview): +def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): if not "Monoview" in benchmark and not args.FU_L_select_monoview in ["randomClf", "Determined"]: args.FU_L_select_monoview = "randomClf" argumentsList = [] @@ -75,7 +75,7 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, directory, result fusionTypePackage = getattr(Methods, fusionType+"Package") for fusionMethod in benchmark["Multiview"]["Fusion"]["Methods"][fusionType]: fusionMethodModule = getattr(fusionTypePackage, fusionMethod) - arguments = fusionMethodModule.getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview) + arguments = fusionMethodModule.getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices) argumentsList+= arguments return argumentsList diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py index 49e60c712eaf180809de66f9c26ac6087bd7cbf0..cd9dbbee8d9f04b14b531a547da420eb4ccb8016 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -18,7 +18,7 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): argumentsList = [] if args.FU_E_cl_names != ['']: pass diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index 1e78855a57a388737f27fcddc2f560abb2d11812..09b549cbd2ae295fa31555b7e45e53b6ffe71d66 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -41,78 +41,103 @@ def getScores(LateFusionClassifiers): return "" -def intersect(allClassifersNames, directory, viewsIndices): - wrongSets = [0 for _ in allClassifersNames] +def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): + wrongSets = [[] for _ in viewsIndices] + # wrongSets = [0 for _ in allClassifersNames] + classifiersNames = [[] for _ in viewsIndices] nbViews = len(viewsIndices) - for classifierIndex, classifierName in enumerate(allClassifersNames): - try : - classifierDirectory = directory+classifierName+"/" - viewDirectoryNames = os.listdir(classifierDirectory) - wrongSets[classifierIndex]=[0 for _ in viewDirectoryNames] - for viewIndex, viewDirectoryName in enumerate(viewDirectoryNames): - for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectoryName+"/"): - if resultFileName.endswith("train_labels.csv"): - yTrainFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName - elif resultFileName.endswith("train_pred.csv"): - yTrainPredFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName - train = np.genfromtxt(yTrainFileName, delimiter=",").astype(np.int16) - pred = np.genfromtxt(yTrainPredFileName, delimiter=",").astype(np.int16) - length = len(train) - wrongLabelsIndices = np.where(train+pred == 1) - wrongSets[classifierIndex][viewIndex]=wrongLabelsIndices - except OSError: - for viewIndex in range(nbViews): - wrongSets[classifierIndex][viewIndex]= np.arange(length) - combinations = itertools.combinations_with_replacement(range(len(allClassifersNames)), nbViews) + trainLabels = np.genfromtxt(directory+"train_labels.csv", delimiter=",").astype(np.int16) + length = len(trainLabels) + for resultMonoview in resultsMonoview: + if resultMonoview[1][0] in classifiersNames[resultMonoview[0]]: + classifierIndex = classifiersNames.index(resultMonoview[1][0]) + wrongSets[resultMonoview[0]][classifierIndex] = np.where(trainLabels+resultMonoview[1][3][classificationIndices[0]] == 1) + else: + classifiersNames[resultMonoview[0]].append(resultMonoview[1][0]) + wrongSets[resultMonoview[0]].append(np.where(trainLabels+resultMonoview[1][3][classificationIndices[0]] == 1)) + # for classifierIndex, classifierName in enumerate(allClassifersNames): + # try: + # classifierDirectory = directory+classifierName+"/" + # viewDirectoryNames = os.listdir(classifierDirectory) + # wrongSets[classifierIndex]=[0 for _ in viewDirectoryNames] + # for viewIndex, viewDirectoryName in enumerate(viewDirectoryNames): + # for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectoryName+"/"): + # if resultFileName.endswith("train_labels.csv"): + # yTrainFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName + # elif resultFileName.endswith("train_pred.csv"): + # yTrainPredFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName + # train = np.genfromtxt(yTrainFileName, delimiter=",").astype(np.int16) + # pred = np.genfromtxt(yTrainPredFileName, delimiter=",").astype(np.int16) + # length = len(train) + # wrongLabelsIndices = np.where(train+pred == 1) + # wrongSets[classifierIndex][viewIndex]=wrongLabelsIndices + # except OSError: + # for viewIndex in range(nbViews): + # wrongSets[classifierIndex][viewIndex]= np.arange(length) + + combinations = itertools.combinations_with_replacement(range(len(classifiersNames[0])), nbViews) bestLen = length bestCombination = None for combination in combinations: intersect = np.arange(length, dtype=np.int16) for viewIndex, classifierIndex in enumerate(combination): - intersect = np.intersect1d(intersect, wrongSets[classifierIndex][viewIndex]) + intersect = np.intersect1d(intersect, wrongSets[viewIndex][classifierIndex]) if len(intersect) < bestLen: bestLen = len(intersect) bestCombination = combination - return [allClassifersNames[index] for index in bestCombination] - - -def getFormFile(directory, viewDirectory, resultFileName): - file = open(directory+"/"+viewDirectory+"/"+resultFileName) - for line in file: - if "Score on train" in line: - score = float(line.strip().split(":")[1]) - break - elif "train" in line: - metricName = line.strip().split(" ")[0] + return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] + + +# def getFormFile(directory, viewDirectory, resultFileName): +# file = open(directory+"/"+viewDirectory+"/"+resultFileName) +# for line in file: +# if "Score on train" in line: +# score = float(line.strip().split(":")[1]) +# break +# elif "train" in line: +# metricName = line.strip().split(" ")[0] +# metricModule = getattr(Metrics, metricName) +# if metricModule.getConfig()[-14]=="h": +# betterHigh = True +# else: +# betterHigh = False +# return score, betterHigh + + +def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): + nbViews = len(viewsIndices) + nbClassifiers = len(allClassifersNames) + scores = np.zeros((nbViews, nbClassifiers)) + classifiersNames = [[] for _ in viewsIndices] + metricName = resultsMonoview[0][1][2].keys()[0] metricModule = getattr(Metrics, metricName) if metricModule.getConfig()[-14]=="h": betterHigh = True else: betterHigh = False - return score, betterHigh - - -def bestScore(allClassifersNames, directory, viewsIndices): - nbViews = len(viewsIndices) - nbClassifiers = len(allClassifersNames) - scores = np.zeros((nbViews, nbClassifiers)) - for classifierIndex, classifierName in enumerate(allClassifersNames): - classifierDirectory = directory+"/"+classifierName+"/" - for viewIndex, viewDirectory in enumerate(os.listdir(classifierDirectory)): - for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectory+"/"): - if resultFileName.endswith(".txt"): - scores[viewIndex, classifierIndex], betterHigh = getFormFile(directory, viewDirectory, resultFileName) + for resultMonoview in resultsMonoview: + if resultMonoview[1][0] not in classifiersNames[resultMonoview[0]]: + classifiersNames[resultMonoview[0]].append(resultMonoview[1][0]) + classifierIndex = classifiersNames[resultMonoview[0]].index(resultMonoview[1][0]) + scores[resultMonoview[0],classifierIndex] = resultMonoview[1][2].values()[0][0] + # + # for classifierIndex, classifierName in enumerate(allClassifersNames): + # classifierDirectory = directory+"/"+classifierName+"/" + # for viewIndex, viewDirectory in enumerate(os.listdir(classifierDirectory)): + # for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectory+"/"): + # if resultFileName.endswith(".txt"): + # scores[viewIndex, classifierIndex], betterHigh = getFormFile(directory, viewDirectory, resultFileName) if betterHigh: classifierIndices = np.argmax(scores, axis=1) else: classifierIndices = np.argmin(scores, axis=1) - return [allClassifersNames[index] for index in classifierIndices] + return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(classifierIndices)] -def getClassifiers(selectionMethodName, allClassifiersNames, directory, viewsIndices): +def getClassifiers(selectionMethodName, allClassifiersNames, directory, viewsIndices, resultsMonoview, classificationIndices): thismodule = sys.modules[__name__] selectionMethod = getattr(thismodule, selectionMethodName) - classifiersNames = selectionMethod(allClassifiersNames, directory, viewsIndices) + classifiersNames = selectionMethod(allClassifiersNames, directory, viewsIndices, resultsMonoview, classificationIndices) return classifiersNames diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py index fd1bc24fda55598044cdc672fb3f3e56b6e4df6a..32d623edfedffe985f8167c82d5d6aff5ea7e56d 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -17,12 +17,12 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): if args.FU_L_cl_names!=['']: args.FU_L_select_monoview = "user_defined" else: monoviewClassifierModulesNames = benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_names==[""] and args.CL_type == ["Multiview"]: diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py index 19e2f400116de2843f1678f6b9e636384f44ce1e..31d44c79fd059ad8c62b5c853fa7105d848bcc01 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -16,12 +16,12 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): if args.FU_L_cl_names!=['']: pass else: monoviewClassifierModulesNames = benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index e675b5021816c935aa816bec7eb1c380625b4a56..5ba55d97206696edf8344688122daccd2fca50e1 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -27,12 +27,12 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): if args.FU_L_cl_names!=['']: pass else: monoviewClassifierModulesNames =benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py index ebad066fcd1eae4b918dc557f82f1313583f577d..d488d7c1eef59b07252939f85fc0e45294402438 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -14,12 +14,12 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): if args.FU_L_cl_names!=['']: pass else: monoviewClassifierModulesNames = benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index eb9678e77c2bf34b4847e9194ca2d6aa5adbd32d..20c163406fe64ad064034a180a5bc4417d19d51a 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -16,12 +16,12 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview): +def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, classificationIndices): if args.FU_L_cl_names!=['']: pass else: monoviewClassifierModulesNames = benchmark["Monoview"] - args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices, resultsMonoview, classificationIndices) monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index 3b63701d126b5a80dd0d0403344f801196618513..e5ae03187c381bd64e7455a617028420d29a20c7 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -28,7 +28,7 @@ def getBenchmark(benchmark, args=None): return benchmark -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview): +def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): argumentsList = [] arguments = {"CL_type": "Mumbo", diff --git a/Code/MonoMutliViewClassifiers/ResultAnalysis.py b/Code/MonoMutliViewClassifiers/ResultAnalysis.py index 0cce2e35e4eaeee9e76141c531ee21e7d544d3cb..80a577b66259f0416653947a9e9e180c8af5f3f9 100644 --- a/Code/MonoMutliViewClassifiers/ResultAnalysis.py +++ b/Code/MonoMutliViewClassifiers/ResultAnalysis.py @@ -78,6 +78,7 @@ def resultAnalysis(benchmark, results, name, times, metrics, directory): ax.set_xticklabels(names, rotation="vertical") f.savefig(directory+time.strftime("%Y%m%d-%H%M%S")+"-"+name+"-"+metric[0]+".png") + plt.close() def analyzeLabels(labelsArrays, realLabels, results, directory): @@ -104,6 +105,7 @@ def analyzeLabels(labelsArrays, realLabels, results, directory): cbar = fig.colorbar(cax, ticks=[0, 1]) cbar.ax.set_yticklabels(['Wrong', ' Right']) fig.savefig(directory+time.strftime("%Y%m%d-%H%M%S")+"-error_analysis.png") + plt.close() def genScoresNames(iterResults, metric, nbResults, names, nbMono): @@ -154,7 +156,6 @@ def genScoresNames(iterResults, metric, nbResults, names, nbMono): def analyzeIterResults(iterResults, name, metrics, directory): nbResults = len(iterResults[0][0])+len(iterResults[0][1]) nbMono = len(iterResults[0][0]) - nbMulti = len(iterResults[0][1]) nbIter = len(iterResults) names = genNamesFromRes(iterResults[0][0], iterResults[0][1]) for metric in metrics: diff --git a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py index 573c7388b5b63bc3fa2908d0a88930cd9a16fa35..0f98ef522d47768b0e6ab6b40d83e8402e498624 100644 --- a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py +++ b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py @@ -3,6 +3,8 @@ import numpy as np import sys import Multiview import Metrics +import matplotlib.pyplot as plt +import itertools def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None, searchingTool="hyperParamSearch", nIter=1, **kwargs): if viewsIndices is None: @@ -71,6 +73,39 @@ def randomizedSearch(dataset, classifierName, metrics, learningIndices, KFolds, def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs): pass + +def genHeatMaps(params, scoresArray, outputFileName): + nbParams = len(params) + if nbParams > 2: + combinations = itertools.combinations(range(nbParams), 2) + else: + combinations = [(0,1)] + for combination in combinations: + paramName1, paramArray1 = params[combination[0]] + paramName2, paramArray2 = params[combination[1]] + + paramArray1Set = np.sort(np.array(list(set(paramArray1)))) + paramArray2Set = np.sort(np.array(list(set(paramArray2)))) + + scoresMatrix = np.zeros((len(paramArray2Set), len(paramArray1Set)))-0.1 + for param1, param2, score in zip(paramArray1, paramArray2, scoresArray): + param1Index, = np.where(paramArray1Set == param1) + param2Index, = np.where(paramArray2Set == param2) + scoresMatrix[int(param2Index), int(param1Index)] = score + + plt.figure(figsize=(8, 6)) + plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) + plt.imshow(scoresMatrix, interpolation='nearest', cmap=plt.cm.hot, + ) + plt.xlabel(paramName1) + plt.ylabel(paramName2) + plt.colorbar() + plt.xticks(np.arange(len(paramArray1Set)), paramArray1Set) + plt.yticks(np.arange(len(paramArray2Set)), paramArray2Set, rotation=45) + plt.title('Validation metric') + plt.savefig(outputFileName+"heat_map-"+paramName1+"-"+paramName2+".png") + plt.close() + # nohup python ~/dev/git/spearmint/spearmint/main.py . & # import json