From 0d05a153711fd407895994d16f269097d27c86f9 Mon Sep 17 00:00:00 2001 From: bbauvin <baptiste.bauvin@centrale-marseille.fr> Date: Thu, 9 Nov 2017 14:50:58 -0500 Subject: [PATCH] Biclass analysis is working need to debug mullticlass --- Code/MonoMultiViewClassifiers/ExecClassif.py | 856 +++--------------- .../Metrics/f1_score.py | 7 +- .../Metrics/fbeta_score.py | 5 +- .../Metrics/precision_score.py | 5 +- .../Metrics/recall_score.py | 5 +- .../Metrics/roc_auc_score.py | 11 +- .../Monoview/ExecClassifMonoView.py | 2 +- .../Multiview/ExecMultiview.py | 2 +- .../Fusion/FusionModule.py | 2 +- .../Mumbo/analyzeResults.py | 4 +- .../ResultAnalysis.py | 97 +- .../utils/HyperParameterSearch.py | 1 + .../utils/Multiclass.py | 1 + 13 files changed, 244 insertions(+), 754 deletions(-) diff --git a/Code/MonoMultiViewClassifiers/ExecClassif.py b/Code/MonoMultiViewClassifiers/ExecClassif.py index df869b68..ae75199f 100644 --- a/Code/MonoMultiViewClassifiers/ExecClassif.py +++ b/Code/MonoMultiViewClassifiers/ExecClassif.py @@ -90,11 +90,13 @@ def initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS def initMonoviewKWARGS(args, classifiersNames): """Used to init kwargs thanks to a function in each monoview classifier package""" + logging.debug("Start:\t Initializing Monoview classifiers arguments") monoviewKWARGS = {} for classifiersName in classifiersNames: classifierModule = getattr(MonoviewClassifiers, classifiersName) monoviewKWARGS[classifiersName + "KWARGSInit"] = classifierModule.getKWARGS( [(key, value) for key, value in vars(args).items() if key.startswith("CL_" + classifiersName)]) + logging.debug("Done:\t Initializing Monoview classifiers arguments") return monoviewKWARGS @@ -106,6 +108,7 @@ def initKWARGSFunc(args, benchmark): def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices): """Used to add each monoview exeperience args to the list of monoview experiences args""" + logging.debug("Start:\t Initializing Multiview classifiers arguments") multiviewArguments = [] if "Multiview" in benchmark: for multiviewAlgoName in benchmark["Multiview"]: @@ -114,6 +117,7 @@ def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDiction multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices) argumentDictionaries["Multiview"] = multiviewArguments + logging.debug("Start:\t Initializing Multiview classifiers arguments") return argumentDictionaries @@ -130,708 +134,6 @@ def arangeMetrics(metrics, metricPrinc): return metrics -def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, - kFolds, - randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start, - benchmark, - views): - """Used to execute mono and multiview classification and result analysis for one random state - using multicore classification""" - resultsMonoview = [] - labelsNames = LABELS_DICTIONARY.values() - np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") - - resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, randomState, - hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter, - **arguments) - for arguments in argumentDictionaries["Monoview"]] - monoviewTime = time.time() - dataBaseTime - start - - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, - randomState, directory, resultsMonoview, classificationIndices) - - resultsMultiview = [] - resultsMultiview += [ - ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, - args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter, **arguments) - for arguments in argumentDictionaries["Multiview"]] - multiviewTime = time.time() - monoviewTime - dataBaseTime - start - - labels = np.array( - [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in - resultsMultiview]).transpose() - DATASET = h5py.File(args.pathF + args.name + str(0) + ".hdf5", "r") - trueLabels = DATASET.get("Labels").value - times = [dataBaseTime, monoviewTime, multiviewTime] - results = (resultsMonoview, resultsMultiview) - labelAnalysis = analyzeLabels(labels, trueLabels, results, directory) - logging.debug("Start:\t Analyze Iteration Results") - resultAnalysis(benchmark, results, args.name, times, metrics, directory) - logging.debug("Done:\t Analyze Iteration Results") - globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime - totalTime = time.time() - start - logging.info("Extraction time : " + str(int(dataBaseTime)) + - "s, Monoview time : " + str(int(monoviewTime)) + - "s, Multiview Time : " + str(int(multiviewTime)) + - "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) + - "s, Iteration Duration : " + str(int(totalTime)) + "s") - return results, labelAnalysis - - -def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, kFolds, - randomState, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, - benchmark, views): - """Used to execute mono and multiview classification and result analysis for one random state - classification""" - #TODO : Clarify this one - - - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, - randomState, directory, resultsMonoview, classificationIndices) - - resultsMultiview = [] - if nbCores > 1: - nbExperiments = len(argumentDictionaries["Multiview"]) - for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): - resultsMultiview += Parallel(n_jobs=nbCores)( - delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, - args.type, - args.pathF, - LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter, - **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) - for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) - else: - resultsMultiview = [ - ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF, - LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in - argumentDictionaries["Multiview"]] - multiviewTime = time.time() - monoviewTime - dataBaseTime - start - if nbCores > 1: - logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") - datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) - logging.debug("Start:\t Deleting datasets for multiprocessing") - labels = np.array( - [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in - resultsMultiview]).transpose() - trueLabels = DATASET.get("Labels").value - times = [dataBaseTime, monoviewTime, multiviewTime] - results = (resultsMonoview, resultsMultiview) - labelAnalysis = analyzeLabels(labels, trueLabels, results, directory) - logging.debug("Start:\t Analyze Iteration Results") - resultAnalysis(benchmark, results, args.name, times, metrics, directory) - logging.debug("Done:\t Analyze Iteration Results") - globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime - totalTime = time.time() - start - logging.info("Extraction time : " + str(int(dataBaseTime)) + - "s, Monoview time : " + str(int(monoviewTime)) + - "s, Multiview Time : " + str(int(multiviewTime)) + - "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) + - "s, Iteration Duration : " + str(int(totalTime)) + "s") - return results, labelAnalysis - -# -# def getMetricsScoresBiclass(metrics, monoviewResults, multiviewResults): -# metricsScores = {} -# for metric in metrics: -# classifiersNames = [] -# trainScores = [] -# testScores = [] -# for classifierResult in monoviewResults: -# trainScores.append(classifierResult[1][2][metric[0]][0]) -# testScores.append(classifierResult[1][2][metric[0]][1]) -# classifiersNames.append(classifierResult[1][0]) -# for classifierResult in multiviewResults: -# trainScores.append(classifierResult[2][metric[0]][0]) -# testScores.append(classifierResult[2][metric[0]][1]) -# multiviewClassifierPackage = getattr(MultiviewClassifiers, classifierResult[0]) -# multiviewClassifierModule = getattr(multiviewClassifierPackage, classifierResult[0]+"Module") -# classifiersNames.append(multiviewClassifierModule.genName(classifierResult[1])) -# metricsScores[metric[0]] = {"classifiersNames": classifiersNames, -# "trainScores": trainScores, -# "testScores": testScores} -# return metricsScores -# -# -# def getExampleErrorsBiclass(usedBenchmarkArgumentDictionary, monoviewResults, multiviewResults): -# exampleErrors = {} -# trueLabels = usedBenchmarkArgumentDictionary["labels"] -# for classifierResult in monoviewResults: -# classifierName = classifierResult[1][0] -# predictedLabels = classifierResult[1][3] -# errorOnExamples = predictedLabels==trueLabels -# errorOnExamples = errorOnExamples.astype(int) -# unseenExamples = np.where(trueLabels==-100)[0] -# errorOnExamples[unseenExamples]=-100 -# exampleErrors[classifierName] = errorOnExamples -# for classifierResult in multiviewResults: -# multiviewClassifierPackage = getattr(MultiviewClassifiers, classifierResult[0]) -# multiviewClassifierModule = getattr(multiviewClassifierPackage, classifierResult[0]+"Module") -# classifierName = multiviewClassifierModule.genName(classifierResult[1]) -# predictedLabels = classifierResult[3] -# errorOnExamples = predictedLabels==trueLabels -# errorOnExamples = errorOnExamples.astype(int) -# unseenExamples = np.where(trueLabels==-100)[0] -# errorOnExamples[unseenExamples]=-100 -# exampleErrors[classifierName] = errorOnExamples -# return exampleErrors -# -# -# def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames,minSize=10): -# for metricName, metricScores in metricsScores.items(): -# logging.debug("Start:\t Multiclass score graph generation for "+metricName) -# trainScores = metricScores["trainScores"] -# testScores = metricScores["testScore"] -# names = metricScores["classifiersNames"] -# nbResults = len(testScores) -# -# testScores = np.array(testScores) -# trainScores = np.array(trainScores) -# names = np.array(names) -# size = nbResults -# if nbResults < minSize: -# size = minSize -# figKW = {"figsize" : (size, 3.0/4*size+2.0)} -# f, ax = plt.subplots(nrows=1, ncols=1, **figKW) -# barWidth= 0.35 -# sorted_indices = np.argsort(testScores) -# testScores = testScores[sorted_indices] -# trainScores = trainScores[sorted_indices] -# names = names[sorted_indices] -# -# ax.set_title(metricName + "\n scores for each classifier") -# rects = ax.bar(range(nbResults), testScores, barWidth, color="r", ) -# rect2 = ax.bar(np.arange(nbResults) + barWidth, trainScores, barWidth, color="0.7", ) -# autolabel(rects, ax) -# autolabel(rect2, ax) -# ax.legend((rects[0], rect2[0]), ('Test', 'Train')) -# ax.set_ylim(-0.1, 1.1) -# ax.set_xticks(np.arange(nbResults) + barWidth) -# ax.set_xticklabels(names, rotation="vertical") -# plt.tight_layout() -# f.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName +"-"+"vs".join(labelsNames)+ "-" + metricName + ".png") -# plt.close() -# logging.debug("Done:\t Multiclass score graph generation for " + metricName) -# -# -# def publishExampleErrors(exampleErrors, directory, databaseName, labelsNames,minSize=10): -# logging.debug("Start:\t Label analysis figure generation") -# nbClassifiers = len(exampleErrors) -# nbExamples = len(exampleErrors.values()[0]) -# nbIter = 2 -# data = np.zeros((nbExamples, nbClassifiers * nbIter)) -# temp_data = np.zeros((nbExamples, nbClassifiers)) -# classifiersNames = exampleErrors.keys() -# for classifierIndex, (classifierName, errorOnExamples) in enumerate(exampleErrors.items()): -# for iterIndex in range(nbIter): -# data[:, classifierIndex * nbIter + iterIndex] = errorOnExamples -# temp_data[:,classifierIndex] = errorOnExamples -# figWidth = max(nbClassifiers/2, minSize) -# figHeight = max(nbExamples/20, minSize) -# figKW = {"figsize":(figWidth, figHeight)} -# fig, ax = plt.subplots(nrows=1, ncols=1, **figKW) -# cmap = mpl.colors.ListedColormap(['black', 'red', 'green']) -# bounds = [-100.5,-0.5, 0.5, 1.5] -# norm = mpl.colors.BoundaryNorm(bounds, cmap.N) -# -# cax = plt.imshow(data, interpolation='none', cmap=cmap, norm=norm, aspect='auto') -# plt.title('Errors depending on the classifier') -# ticks = np.arange(nbIter/2-0.5, nbClassifiers * nbIter, nbIter) -# labels = classifiersNames -# plt.xticks(ticks, labels, rotation="vertical") -# cbar = fig.colorbar(cax, ticks=[0, 1]) -# cbar.ax.set_yticklabels(['Unseen', 'Wrong', ' Right']) -# fig.tight_layout() -# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName +"-"+"vs".join(labelsNames)+ "-error_analysis.png") -# plt.close() -# logging.debug("Done:\t Label analysis figure generation") -# -# logging.debug("Start:\t Error by example figure generation") -# errorOnExamples = -1*np.sum(data, axis=1)/nbIter+nbClassifiers -# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-clf_errors_doubled.csv", data, delimiter=",") -# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.csv", temp_data, delimiter=",") -# fig, ax = plt.subplots() -# x = np.arange(nbExamples) -# plt.bar(x, errorOnExamples) -# plt.ylim([0,nbClassifiers]) -# plt.title("Number of classifiers that failed to classify each example") -# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName +"-"+"vs".join(labelsNames)+ "-example_errors.png") -# plt.close() -# logging.debug("Done:\t Error by example figure generation") -# -# -# def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): -# biclassResults = [{} for _ in range(statsIter)] -# for result in results: -# flag = result[0] -# iteridex = flag[0] -# classifierPositive = flag[1][0] -# classifierNegative = flag[1][1] -# biclassResults[iteridex][[classifierPositive, classifierNegative]] = {} -# for benchmarkArgumentDictionary in benchmarkArgumentDictionaries: -# if benchmarkArgumentDictionary["flag"]==flag: -# usedBenchmarkArgumentDictionary = benchmarkArgumentDictionary -# monoviewResults = result[1] -# multiviewResults = result[2] -# metricsScores = getMetricsScoresBiclass(metrics, monoviewResults, multiviewResults) -# exampleErrors = getExampleErrorsBiclass(usedBenchmarkArgumentDictionary, monoviewResults, multiviewResults) -# directory = usedBenchmarkArgumentDictionary["directory"] -# databaseName = usedBenchmarkArgumentDictionary["args"].name -# labelsNames = [usedBenchmarkArgumentDictionary["LABELS_DICTIONARY"][0], -# usedBenchmarkArgumentDictionary["LABELS_DICTIONARY"][1]] -# publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames) -# publishExampleErrors(exampleErrors, directory, databaseName, labelsNames) -# biclassResults[iteridex][[classifierPositive, classifierNegative]]["metricsScores"] = metricsScores -# biclassResults[iteridex][[classifierPositive, classifierNegative]]["exampleErrors"] = exampleErrors -# return biclassResults -# -# -# def genMetricsScoresMulticlass(results, trueLabels, metrics, argumentsDictionaries, classificationIndices): -# """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration""" -# -# logging.debug("Start:\t Getting multiclass scores for each metric") -# # TODO : Metric score for train and test -# for metric in metrics: -# metricModule = getattr(Metrics, metric[0]) -# for iterIndex, iterResults in enumerate(results): -# trainIndices, testIndices = classificationIndices[iterIndex] -# for classifierName, resultDictionary in iterResults.items(): -# if not "metricsScores" in resultDictionary: -# results[iterIndex][classifierName]["metricsScores"]={} -# trainScore = metricModule.score(trueLabels[trainIndices], -# resultDictionary["labels"][trainIndices]) -# testScore = metricModule.score(trueLabels[testIndices], -# resultDictionary["labels"][testIndices]) -# results[iterIndex][classifierName]["metricsScores"][metric[0]] = [trainScore, testScore] -# -# -# logging.debug("Done:\t Getting multiclass scores for each metric") -# -# return results -# -# -# def getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels): -# """Used to add all the arrays showing on which example there is an error for each clf and each iteration""" -# -# logging.debug("Start:\t Getting errors on each example for each classifier") -# -# for iterIndex, iterResults in enumerate(multiclassResults): -# for classifierName, classifierResults in iterResults.items(): -# errorOnExamples = classifierResults["labels"] == multiclassLabels -# multiclassResults[iterIndex][classifierName]["errorOnExample"] = errorOnExamples.astype(int) -# -# logging.debug("Done:\t Getting errors on each example for each classifier") -# -# return multiclassResults -# -# -# def autolabel(rects, ax): -# """Used to print scores on top of the bars""" -# for rect in rects: -# height = rect.get_height() -# ax.text(rect.get_x() + rect.get_width() / 2., 1.01 * height, -# "%.2f" % height, -# ha='center', va='bottom') -# -# -# def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, databaseName, minSize=10): -# for iterIndex in range(statsIter): -# directory = direcories[iterIndex] -# for metric in metrics: -# logging.debug("Start:\t Multiclass score graph generation for "+metric[0]) -# classifiersNames = [] -# validationScores = [] -# trainScores = [] -# for classifierName in multiclassResults[iterIndex].keys(): -# classifiersNames.append(classifierName) -# validationScores.append(multiclassResults[iterIndex][classifierName]["metricsScores"][metric[0]][1]) -# trainScores.append(multiclassResults[iterIndex][classifierName]["metricsScores"][metric[0]][0]) -# nbResults = len(validationScores) -# -# validationScores = np.array(validationScores) -# trainScores = np.array(trainScores) -# names = np.array(classifiersNames) -# size = nbResults -# if nbResults < minSize: -# size = minSize -# figKW = {"figsize" : (size, 3.0/4*size+2.0)} -# f, ax = plt.subplots(nrows=1, ncols=1, **figKW) -# barWidth= 0.35 -# sorted_indices = np.argsort(validationScores) -# validationScores = validationScores[sorted_indices] -# trainScores = trainScores[sorted_indices] -# names = names[sorted_indices] -# -# ax.set_title(metric[0] + "\n on validation set for each classifier") -# rects = ax.bar(range(nbResults), validationScores, barWidth, color="r", ) -# rect2 = ax.bar(np.arange(nbResults) + barWidth, trainScores, barWidth, color="0.7", ) -# autolabel(rects, ax) -# autolabel(rect2, ax) -# ax.legend((rects[0], rect2[0]), ('Test', 'Train')) -# ax.set_ylim(-0.1, 1.1) -# ax.set_xticks(np.arange(nbResults) + barWidth) -# ax.set_xticklabels(names, rotation="vertical") -# plt.tight_layout() -# f.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName + "-" + metric[0] + ".png") -# plt.close() -# logging.debug("Done:\t Multiclass score graph generation for " + metric[0]) -# -# -# def publishMulticlassExmapleErrors(multiclassResults, directories, databaseName, labelsNames, minSize=10): -# for iterIndex, multiclassResult in multiclassResults: -# directory = directories[iterIndex] -# logging.debug("Start:\t Label analysis figure generation") -# nbClassifiers = len(multiclassResult) -# nbExamples = len(multiclassResult.values()[0]["errorOnExample"]) -# nbIter = 2 -# data = np.zeros((nbExamples, nbClassifiers * nbIter)) -# temp_data = np.zeros((nbExamples, nbClassifiers)) -# classifiersNames = multiclassResult.keys() -# for classifierIndex, (classifierName, errorOnExamplesDict) in enumerate(multiclassResult.items()): -# for iterIndex in range(nbIter): -# data[:, classifierIndex * nbIter + iterIndex] = errorOnExamplesDict["errorOnExample"] -# temp_data[:,classifierIndex] = errorOnExamplesDict["errorOnExample"] -# figWidth = max(nbClassifiers/2, minSize) -# figHeight = max(nbExamples/20, minSize) -# figKW = {"figsize":(figWidth, figHeight)} -# fig, ax = plt.subplots(nrows=1, ncols=1, **figKW) -# cmap = mpl.colors.ListedColormap(['black', 'red', 'green']) -# bounds = [-100.5,-0.5, 0.5, 1.5] -# norm = mpl.colors.BoundaryNorm(bounds, cmap.N) -# -# cax = plt.imshow(data, interpolation='none', cmap=cmap, norm=norm, aspect='auto') -# plt.title('Errors depending on the classifier') -# ticks = np.arange(nbIter/2-0.5, nbClassifiers * nbIter, nbIter) -# labels = classifiersNames -# plt.xticks(ticks, labels, rotation="vertical") -# cbar = fig.colorbar(cax, ticks=[0, 1]) -# cbar.ax.set_yticklabels(['Unseen', 'Wrong', ' Right']) -# fig.tight_layout() -# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName +"-error_analysis.png") -# plt.close() -# logging.debug("Done:\t Label analysis figure generation") -# -# logging.debug("Start:\t Error by example figure generation") -# errorOnExamples = -1*np.sum(data, axis=1)/nbIter+nbClassifiers -# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-clf_errors_doubled.csv", data, delimiter=",") -# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.csv", temp_data, delimiter=",") -# fig, ax = plt.subplots() -# x = np.arange(nbExamples) -# plt.bar(x, errorOnExamples) -# plt.ylim([0,nbClassifiers]) -# plt.title("Number of classifiers that failed to classify each example") -# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName +"-"+"vs".join(labelsNames)+ "-example_errors.png") -# plt.close() -# logging.debug("Done:\t Error by example figure generation") -# -# -# def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, -# metrics, classificationIndices, directories): -# """Used to tranform one versus one results in multiclass results and to publish it""" -# multiclassResults = [{} for _ in range(statsIter)] -# for flag, resMono, resMulti in results: -# iterIndex = flag[0] -# classifierPositive = flag[1][0] -# classifierNegative = flag[1][1] -# for classifierResult in resMono: -# if classifierResult[1][0] not in multiclassResults[iterIndex]: -# multiclassResults[iterIndex][classifierResult[1][0]] = np.zeros((nbExamples,nbLabels),dtype=int) -# for exampleIndex, label in enumerate(classifierResult[1][3]): -# if label == 1: -# multiclassResults[iterIndex][classifierResult[1][0]][exampleIndex, classifierPositive] += 1 -# else: -# multiclassResults[iterIndex][classifierResult[1][0]][exampleIndex, classifierNegative] += 1 -# for classifierResult in resMulti: -# multiviewClassifierPackage = getattr(MultiviewClassifiers, classifierResult[0]) -# multiviewClassifierModule = getattr(multiviewClassifierPackage, classifierResult[0]+"Module") -# classifierName = multiviewClassifierModule.genName(classifierResult[1]) -# if classifierName not in multiclassResults[iterIndex]: -# multiclassResults[iterIndex][classifierName] = np.zeros((nbExamples,nbLabels),dtype=int) -# for exampleIndex, label in enumerate(classifierResult[3]): -# if label == 1: -# multiclassResults[iterIndex][classifierName][exampleIndex, classifierPositive] += 1 -# else: -# multiclassResults[iterIndex][classifierName][exampleIndex, classifierNegative] += 1 -# -# -# for iterIndex, multiclassiterResult in enumerate(multiclassResults): -# for key, value in multiclassiterResult.items(): -# multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)} -# -# multiclassResults = genMetricsScoresMulticlass(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries, classificationIndices) -# multiclassResults = getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels) -# -# publishMulticlassScores(multiclassResults, metrics, statsIter, directories, benchmarkArgumentDictionaries[0]["args"].name) -# publishMulticlassExmapleErrors(multiclassResults, metrics, statsIter, directories, benchmarkArgumentDictionaries[0]["args"].name) -# return multiclassResults -# -# -# def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, classifiersDict, dataBaseName, statsIter, minSize=10): -# for labelsCombination, iterResult in iterResults.items(): -# currentDirectory = directory+ labelsDictionary[labelsCombination[0]]+"vs"+labelsDictionary[labelsCombination[1]]+"/" -# for metricName, scores in iterResults["metricsScores"].items(): -# trainScores = scores["trainScores"] -# testScores = scores["testScores"] -# trainMeans = np.mean(trainScores, axis=1) -# testMeans = np.mean(testScores, axis=1) -# trainSTDs = np.std(trainScores, axis=1) -# testSTDs = np.std(testScores, axis=1) -# nbResults = len(trainMeans) -# names = classifiersDict.values() -# size=nbResults -# if nbResults<minSize: -# size=minSize -# figKW = {"figsize" : (size, 3.0/4*size+2.0)} -# f, ax = plt.subplots(nrows=1, ncols=1, **figKW) -# barWidth = 0.35 # the width of the bars -# sorted_indices = np.argsort(testMeans) -# testMeans = testMeans[sorted_indices] -# testSTDs = testSTDs[sorted_indices] -# trainSTDs = trainSTDs[sorted_indices] -# trainMeans = trainMeans[sorted_indices] -# names = np.array(names)[sorted_indices] -# -# ax.set_title(metricName + " for each classifier") -# rects = ax.bar(range(nbResults), testMeans, barWidth, color="r", yerr=testSTDs) -# rect2 = ax.bar(np.arange(nbResults) + barWidth, trainMeans, barWidth, color="0.7", yerr=trainSTDs) -# autolabel(rects, ax) -# autolabel(rect2, ax) -# ax.set_ylim(-0.1, 1.1) -# ax.legend((rects[0], rect2[0]), ('Test', 'Train')) -# ax.set_xticks(np.arange(nbResults) + barWidth) -# ax.set_xticklabels(names, rotation="vertical") -# f.tight_layout() -# f.savefig(currentDirectory + time.strftime("%Y%m%d-%H%M%S") + "-" + dataBaseName + "-Mean_on_" -# + str(statsIter) + "_iter-" + metricName + ".png") -# -# -# -# def publishIterBiclassExampleErrors(iterResults, directory, labelsDictionary, classifiersDict, statsIter, minSize=10): -# for labelsCombination, combiResults in iterResults.items(): -# currentDirectory = directory+ labelsDictionary[labelsCombination[0]]+"vs"+labelsDictionary[labelsCombination[1]]+"/" -# classifiersNames = classifiersDict.values() -# logging.debug("Start:\t Global label analysis figure generation") -# nbExamples = combiResults["errorOnExamples"].shape[1] -# nbClassifiers = combiResults["errorOnExamples"].shape[0] -# nbIter = 2 -# -# figWidth = max(nbClassifiers / 2, minSize) -# figHeight = max(nbExamples / 20, minSize) -# figKW = {"figsize": (figWidth, figHeight)} -# fig, ax = plt.subplots(nrows=1, ncols=1, **figKW) -# data = combiResults["errorOnExamples"] -# cax = plt.imshow(-data, interpolation='none', cmap="Greys", aspect='auto') -# plt.title('Errors depending on the classifier') -# ticks = np.arange(nbIter/2-0.5, nbClassifiers * nbIter, nbIter) -# plt.xticks(ticks, classifiersNames, rotation="vertical") -# cbar = fig.colorbar(cax, ticks=[0, -statsIter]) -# cbar.ax.set_yticklabels(['Always Wrong', 'Always Right']) -# fig.tight_layout() -# fig.savefig(currentDirectory + time.strftime("%Y%m%d-%H%M%S") + "-error_analysis.png") -# plt.close() -# logging.debug("Done:\t Global label analysis figure generation") -# logging.debug("Start:\t Global error by example figure generation") -# errorOnExamples = -1 * np.sum(data, axis=1) / nbIter + (nbClassifiers*statsIter) -# np.savetxt(currentDirectory + time.strftime("%Y%m%d-%H%M%S") + "-clf_errors.csv", data, delimiter=",") -# np.savetxt(currentDirectory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.csv", errorOnExamples, delimiter=",") -# fig, ax = plt.subplots() -# x = np.arange(nbExamples) -# plt.bar(x, errorOnExamples) -# plt.ylim([0,nbClassifiers*statsIter]) -# plt.title("Number of classifiers that failed to classify each example") -# fig.savefig(currentDirectory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.png") -# plt.close() -# logging.debug("Done:\t Global error by example figure generation") -# -# -# def publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, dataBaseName, directory, statsIter, minSize=10): -# for metricName, scores in iterMulticlassResults["metricsScores"].items(): -# trainScores = scores["trainScores"] -# testScores = scores["testScores"] -# trainMeans = np.mean(trainScores, axis=1) -# testMeans = np.mean(testScores, axis=1) -# trainSTDs = np.std(trainScores, axis=1) -# testSTDs = np.std(testScores, axis=1) -# nbResults = len(trainMeans) -# names = classifiersNames -# size=nbResults -# if nbResults<minSize: -# size=minSize -# figKW = {"figsize" : (size, 3.0/4*size+2.0)} -# f, ax = plt.subplots(nrows=1, ncols=1, **figKW) -# barWidth = 0.35 # the width of the bars -# sorted_indices = np.argsort(testMeans) -# testMeans = testMeans[sorted_indices] -# testSTDs = testSTDs[sorted_indices] -# trainSTDs = trainSTDs[sorted_indices] -# trainMeans = trainMeans[sorted_indices] -# names = np.array(names)[sorted_indices] -# -# ax.set_title(metricName + " for each classifier") -# rects = ax.bar(range(nbResults), testMeans, barWidth, color="r", yerr=testSTDs) -# rect2 = ax.bar(np.arange(nbResults) + barWidth, trainMeans, barWidth, color="0.7", yerr=trainSTDs) -# autolabel(rects, ax) -# autolabel(rect2, ax) -# ax.set_ylim(-0.1, 1.1) -# ax.legend((rects[0], rect2[0]), ('Test', 'Train')) -# ax.set_xticks(np.arange(nbResults) + barWidth) -# ax.set_xticklabels(names, rotation="vertical") -# f.tight_layout() -# f.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + dataBaseName + "-Mean_on_" -# + str(statsIter) + "_iter-" + metricName + ".png") -# -# -# def publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classifiersNames, statsIter, minSize=10): -# logging.debug("Start:\t Global label analysis figure generation") -# nbExamples = iterMulticlassResults["errorOnExamples"].shape[1] -# nbClassifiers = iterMulticlassResults["errorOnExamples"].shape[0] -# nbIter = 2 -# -# figWidth = max(nbClassifiers / 2, minSize) -# figHeight = max(nbExamples / 20, minSize) -# figKW = {"figsize": (figWidth, figHeight)} -# fig, ax = plt.subplots(nrows=1, ncols=1, **figKW) -# data = iterMulticlassResults["errorOnExamples"] -# cax = plt.imshow(-data, interpolation='none', cmap="Greys", aspect='auto') -# plt.title('Errors depending on the classifier') -# ticks = np.arange(nbIter/2-0.5, nbClassifiers * nbIter, nbIter) -# plt.xticks(ticks, classifiersNames, rotation="vertical") -# cbar = fig.colorbar(cax, ticks=[0, -statsIter]) -# cbar.ax.set_yticklabels(['Always Wrong', 'Always Right']) -# fig.tight_layout() -# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-error_analysis.png") -# plt.close() -# logging.debug("Done:\t Global label analysis figure generation") -# logging.debug("Start:\t Global error by example figure generation") -# errorOnExamples = -1 * np.sum(data, axis=1) / nbIter + (nbClassifiers*statsIter) -# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-clf_errors.csv", data, delimiter=",") -# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.csv", errorOnExamples, delimiter=",") -# fig, ax = plt.subplots() -# x = np.arange(nbExamples) -# plt.bar(x, errorOnExamples) -# plt.ylim([0,nbClassifiers*statsIter]) -# plt.title("Number of classifiers that failed to classify each example") -# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.png") -# plt.close() -# logging.debug("Done:\t Global error by example figure generation") -# -# -# def analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName): -# iterBiclassResults = {} -# classifiersDict = {} -# for iterIndex, biclassResult in biclassResults: -# for labelsComination, results in biclassResult.items(): -# for metric in metrics: -# nbClassifiers = len(results["metricsScores"][metric[0]]["classifiersNames"]) -# if not classifiersDict: -# classifiersDict = dict((classifierName, classifierIndex) -# for classifierIndex, classifierName -# in enumerate(results["metricsScores"][metric[0]]["classifiersNames"])) -# if labelsComination not in iterBiclassResults: -# iterBiclassResults[labelsComination] = {} -# iterBiclassResults[labelsComination]["metricsScores"] = {} -# -# iterBiclassResults[labelsComination]["errorOnExamples"] = np.zeros((nbClassifiers, -# len(results["exampleErrors"])), -# dtype=int) -# if metric[0] not in iterBiclassResults[labelsComination]["metricsScores"]: -# iterBiclassResults[labelsComination]["metricsScores"][metric[0]]= {"trainScores": -# np.zeros(nbClassifiers, statsIter), -# "testScores": -# np.zeros(nbClassifiers, statsIter)} -# for classifierName, trainScore, testScore in zip(results["metricsScores"][metric[0]]["classifiersNames"], -# results["metricsScores"][metric[0]]["trainScores"], -# results["metricsScores"][metric[0]]["testScores"], -# ): -# iterBiclassResults[labelsComination]["metricsScores"][metric[0]]["trainScores"][classifiersDict[classifierName], iterIndex] = trainScore -# iterBiclassResults[labelsComination]["metricsScores"][metric[0]]["testScores"][classifiersDict[classifierName], iterIndex] = testScore -# for classifierName, errorOnExample in results["errorOnExamples"]: -# iterBiclassResults[labelsComination]["errorOnExamples"][classifiersDict[classifierName], :] += errorOnExample -# publishIterBiclassMetricsScores(iterBiclassResults, directory, labelsDictionary, classifiersDict, dataBaseName, statsIter) -# publishIterBiclassExampleErrors(iterBiclassResults, directory, labelsDictionary, classifiersDict, statsIter) -# -# -# def analyzeIterMulticlass(biclassResults, multiclassResults, directory, statsIter, labelsDictionary, metrics, dataBaseName): -# analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName) -# # iterBiclassResults = {} -# # classifiersDict = {} -# # -# # for iterIndex, biclassResult in biclassResults: -# # for labelsComination, results in biclassResult.items(): -# # for metric in metrics: -# # nbClassifiers = len(results["metricsScores"][metric[0]]["classifiersNames"]) -# # if not classifiersDict: -# # classifiersDict = dict((classifierName, classifierIndex) -# # for classifierIndex, classifierName -# # in enumerate(results["metricsScores"][metric[0]]["classifiersNames"])) -# # if labelsComination not in iterBiclassResults: -# # iterBiclassResults[labelsComination] = {} -# # iterBiclassResults[labelsComination]["metricsScores"] = {} -# # -# # iterBiclassResults[labelsComination]["errorOnExamples"] = np.zeros((nbClassifiers, -# # len(results["exampleErrors"])), -# # dtype=int) -# # if metric[0] not in iterBiclassResults[labelsComination]["metricsScores"]: -# # iterBiclassResults[labelsComination]["metricsScores"][metric[0]]= {"trainScores": -# # np.zeros(nbClassifiers, statsIter), -# # "testScores": -# # np.zeros(nbClassifiers, statsIter)} -# # for classifierName, trainScore, testScore in zip(results["metricsScores"][metric[0]]["classifiersNames"], -# # results["metricsScores"][metric[0]]["trainScores"], -# # results["metricsScores"][metric[0]]["testScores"], -# # ): -# # iterBiclassResults[labelsComination]["metricsScores"][metric[0]]["trainScores"][classifiersDict[classifierName], iterIndex] = trainScore -# # iterBiclassResults[labelsComination]["metricsScores"][metric[0]]["testScores"][classifiersDict[classifierName], iterIndex] = testScore -# # for classifierName, errorOnExample in results["errorOnExamples"]: -# # iterBiclassResults[labelsComination]["errorOnExamples"][classifiersDict[classifierName], :] += errorOnExample -# # publishIterBiclassMetricsScores(iterBiclassResults, directory, labelsDictionary, classifiersDict, dataBaseName, statsIter) -# # publishIterBiclassExampleErrors(iterBiclassResults, directory, labelsDictionary, classifiersDict, statsIter) -# -# iterMulticlassResults = {} -# nbClassifiers = len(multiclassResults[0]) -# iterMulticlassResults["errorOnExamples"] = np.zeros((nbClassifiers,len(multiclassResults[0].values()[0]["exampleErrors"])), -# dtype=int) -# iterMulticlassResults["metricsScores"] = {} -# classifiersNames = [] -# for iterIndex, multiclassResult in multiclassResults: -# for classifierName, classifierResults in multiclassResult.items(): -# classifiersNames.append(classifierName) -# classifierIndex = len(classifiersNames)-1 -# for metric in metrics: -# if metric[0] not in iterMulticlassResults["metricsScores"]: -# iterMulticlassResults["metricsScores"][metric[0]] = {"trainScores": -# np.zeros(nbClassifiers, statsIter), -# "testScores": -# np.zeros(nbClassifiers, statsIter)} -# iterMulticlassResults["metricsScores"][metric[0]]["trainScores"][classifierIndex, iterIndex] = classifierResults["metricsScores"][metric[0]][0] -# iterMulticlassResults["metricsScores"][metric[0]]["testScores"][classifierIndex, iterIndex] = classifierResults["metricsScores"][metric[0]][1] -# iterMulticlassResults["errorOnExamples"][classifierIndex, :] = classifierResults["exampleErrors"] -# publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, dataBaseName, directory, statsIter) -# publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classifiersNames, statsIter) -# -# -# -# def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics, classificationIndices, directories, directory): -# if statsIter > 1: -# if nbMulticlass > 1: -# biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) -# multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels, metrics, classificationIndices, directories) -# datatBaseName = benchmarkArgumentDictionaries[0]["args"].name -# analyzeIterMulticlass(biclassResults, multiclassResults, directory, statsIter, dataBaseName) -# else: -# biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) -# analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName) -# -# if nbMulticlass>1: -# analyzeMulticlass(results) -# else: -# analyzeBiclass(results) - - def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, @@ -839,6 +141,8 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class initMultiviewArguments=initMultiviewArguments): """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and ExecMultiview_multicore args are only used for tests""" + + logging.debug("Start:\t Benchmark initialization") if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): try: os.makedirs(os.path.dirname(directory + "train_labels.csv")) @@ -851,21 +155,30 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class resultsMonoview = [] labelsNames = list(LABELS_DICTIONARY.values()) np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") + logging.debug("Done:\t Benchmark initialization") + + logging.debug("Start:\t Monoview benchmark") resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, coreIndex, args.type, args.pathF, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argument) for argument in argumentDictionaries["Monoview"]] + logging.debug("Done:\t Monoview benchmark") + logging.debug("Start:\t Multiview arguments initialization") argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices) + logging.debug("Done:\t Multiview arguments initialization") + logging.debug("Start:\t Multiview benchmark") resultsMultiview = [] resultsMultiview += [ ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Multiview"]] + logging.debug("Done:\t Multiview benchmark") + return [flag, resultsMonoview, resultsMultiview] @@ -877,6 +190,8 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non initMultiviewArguments=initMultiviewArguments): """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and ExecMultiview_multicore args are only used for tests""" + + logging.debug("Start:\t Benchmark initialization") if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): try: os.makedirs(os.path.dirname(directory + "train_labels.csv")) @@ -889,7 +204,9 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") resultsMonoview = [] labelsNames = list(LABELS_DICTIONARY.values()) + logging.debug("Done:\t Benchmark initialization") + logging.debug("Start:\t Monoview benchmark") nbExperiments = len(argumentDictionaries["Monoview"]) nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) for stepIndex in range(nbMulticoreToDo): @@ -900,10 +217,14 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) + logging.debug("Done:\t Monoview benchmark") + logging.debug("Start:\t Multiview arguments initialization") argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices) + logging.debug("Done:\t Multiview arguments initialization") + logging.debug("Start:\t Multiview benchmark") resultsMultiview = [] nbExperiments = len(argumentDictionaries["Multiview"]) nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) @@ -914,6 +235,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) + logging.debug("Done:\t Multiview benchmark") return [flag, resultsMonoview, resultsMultiview] @@ -923,6 +245,8 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): + + logging.debug("Start:\t Benchmark initialization") if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): try: os.makedirs(os.path.dirname(directory + "train_labels.csv")) @@ -935,6 +259,9 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non resultsMonoview = [] labelsNames = list(LABELS_DICTIONARY.values()) np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") + logging.debug("Done:\t Benchmark initialization") + + logging.debug("Start:\t Monoview benchmark") for arguments in argumentDictionaries["Monoview"]: kwargs = arguments["args"] views = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in @@ -946,17 +273,22 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non 1, args.type, args.pathF, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments)] + logging.debug("Done:\t Monoview benchmark") - + logging.debug("Start:\t Multiview arguments initialization") argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices) + logging.debug("Done:\t Multiview arguments initialization") + logging.debug("Start:\t Multiview benchmark") resultsMultiview = [] for arguments in argumentDictionaries["Multiview"]: resultsMultiview += [ ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments)] + logging.debug("Done:\t Multiview benchmark") + return [flag, resultsMonoview, resultsMultiview] @@ -985,12 +317,17 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionari for arguments in benchmarkArgumentsDictionaries: results += [execOneBenchmarkMonoCore(DATASET=DATASET, **arguments)] logging.debug("Done:\t Executing all the needed biclass benchmarks") - + if nbCores > 1: + logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") + args = benchmarkArgumentsDictionaries[0]["args"] + datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) + logging.debug("Start:\t Deleting datasets for multiprocessing") # Do everything with flagging nbExamples = len(classificationIndices[0][0])+len(classificationIndices[0][1]) - logging.debug("Start:\t Analyzing preds") - getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiClassLabels, metrics, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels) - logging.debug("Done:\t Analyzing preds") + multiclassGroundTruth = DATASET.get("Labels").value + logging.debug("Start:\t Analyzing predictions") + getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassGroundTruth, metrics, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels) + logging.debug("Done:\t Analyzing predictions") return results @@ -1047,7 +384,7 @@ def execClassif(arguments): if len(metric) == 1: metrics[metricIndex] = [metric[0], None] - logging.debug("Start:\t Finding all available mono- & multiview algorithms") + # logging.debug("Start:\t Finding all available mono- & multiview algorithms") benchmark = initBenchmark(args) @@ -1081,6 +418,109 @@ def execClassif(arguments): # +# def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, +# kFolds, +# randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start, +# benchmark, +# views): +# """Used to execute mono and multiview classification and result analysis for one random state +# using multicore classification""" +# resultsMonoview = [] +# labelsNames = LABELS_DICTIONARY.values() +# np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") +# +# resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, +# coreIndex, args.type, args.pathF, randomState, +# hyperParamSearch=hyperParamSearch, +# metrics=metrics, nIter=args.CL_GS_iter, +# **arguments) +# for arguments in argumentDictionaries["Monoview"]] +# monoviewTime = time.time() - dataBaseTime - start +# +# argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, +# randomState, directory, resultsMonoview, classificationIndices) +# +# resultsMultiview = [] +# resultsMultiview += [ +# ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, +# args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, +# metrics=metrics, nIter=args.CL_GS_iter, **arguments) +# for arguments in argumentDictionaries["Multiview"]] +# multiviewTime = time.time() - monoviewTime - dataBaseTime - start +# +# labels = np.array( +# [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in +# resultsMultiview]).transpose() +# DATASET = h5py.File(args.pathF + args.name + str(0) + ".hdf5", "r") +# trueLabels = DATASET.get("Labels").value +# times = [dataBaseTime, monoviewTime, multiviewTime] +# results = (resultsMonoview, resultsMultiview) +# labelAnalysis = analyzeLabels(labels, trueLabels, results, directory) +# logging.debug("Start:\t Analyze Iteration Results") +# resultAnalysis(benchmark, results, args.name, times, metrics, directory) +# logging.debug("Done:\t Analyze Iteration Results") +# globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime +# totalTime = time.time() - start +# logging.info("Extraction time : " + str(int(dataBaseTime)) + +# "s, Monoview time : " + str(int(monoviewTime)) + +# "s, Multiview Time : " + str(int(multiviewTime)) + +# "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) + +# "s, Iteration Duration : " + str(int(totalTime)) + "s") +# return results, labelAnalysis +# +# +# def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, kFolds, +# randomState, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, +# benchmark, views): +# """Used to execute mono and multiview classification and result analysis for one random state +# classification""" +# #TODO : Clarify this one +# +# +# argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, +# randomState, directory, resultsMonoview, classificationIndices) +# +# resultsMultiview = [] +# if nbCores > 1: +# nbExperiments = len(argumentDictionaries["Multiview"]) +# for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): +# resultsMultiview += Parallel(n_jobs=nbCores)( +# delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, +# args.type, +# args.pathF, +# LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, +# metrics=metrics, nIter=args.CL_GS_iter, +# **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) +# for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) +# else: +# resultsMultiview = [ +# ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF, +# LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, +# metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in +# argumentDictionaries["Multiview"]] +# multiviewTime = time.time() - monoviewTime - dataBaseTime - start +# if nbCores > 1: +# logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") +# datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) +# logging.debug("Start:\t Deleting datasets for multiprocessing") +# labels = np.array( +# [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in +# resultsMultiview]).transpose() +# trueLabels = DATASET.get("Labels").value +# times = [dataBaseTime, monoviewTime, multiviewTime] +# results = (resultsMonoview, resultsMultiview) +# labelAnalysis = analyzeLabels(labels, trueLabels, results, directory) +# logging.debug("Start:\t Analyze Iteration Results") +# resultAnalysis(benchmark, results, args.name, times, metrics, directory) +# logging.debug("Done:\t Analyze Iteration Results") +# globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime +# totalTime = time.time() - start +# logging.info("Extraction time : " + str(int(dataBaseTime)) + +# "s, Monoview time : " + str(int(monoviewTime)) + +# "s, Multiview Time : " + str(int(multiviewTime)) + +# "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) + +# "s, Iteration Duration : " + str(int(totalTime)) + "s") +# return results, labelAnalysis # # # diff --git a/Code/MonoMultiViewClassifiers/Metrics/f1_score.py b/Code/MonoMultiViewClassifiers/Metrics/f1_score.py index 467420b4..13550b2f 100644 --- a/Code/MonoMultiViewClassifiers/Metrics/f1_score.py +++ b/Code/MonoMultiViewClassifiers/Metrics/f1_score.py @@ -27,8 +27,13 @@ def score(y_true, y_pred, **kwargs): try: average = kwargs["3"] except: - average = "micro" + if len(set(y_true)) > 2: + average = "micro" + else: + average = "binary" + score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + return score diff --git a/Code/MonoMultiViewClassifiers/Metrics/fbeta_score.py b/Code/MonoMultiViewClassifiers/Metrics/fbeta_score.py index e55bf450..84e3f85c 100644 --- a/Code/MonoMultiViewClassifiers/Metrics/fbeta_score.py +++ b/Code/MonoMultiViewClassifiers/Metrics/fbeta_score.py @@ -26,7 +26,10 @@ def score(y_true, y_pred, **kwargs): try: average = kwargs["4"] except: - average = "micro" + if len(set(y_true)) > 2: + average = "micro" + else: + average = "binary" score = metric(y_true, y_pred, beta, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) return score diff --git a/Code/MonoMultiViewClassifiers/Metrics/precision_score.py b/Code/MonoMultiViewClassifiers/Metrics/precision_score.py index 49620fb5..a5bbd477 100644 --- a/Code/MonoMultiViewClassifiers/Metrics/precision_score.py +++ b/Code/MonoMultiViewClassifiers/Metrics/precision_score.py @@ -22,7 +22,10 @@ def score(y_true, y_pred, **kwargs): try: average = kwargs["3"] except: - average = "binary" + if len(set(y_true)) > 2: + average = "micro" + else: + average = "binary" score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) return score diff --git a/Code/MonoMultiViewClassifiers/Metrics/recall_score.py b/Code/MonoMultiViewClassifiers/Metrics/recall_score.py index ad657812..b1e3baa4 100644 --- a/Code/MonoMultiViewClassifiers/Metrics/recall_score.py +++ b/Code/MonoMultiViewClassifiers/Metrics/recall_score.py @@ -22,7 +22,10 @@ def score(y_true, y_pred, **kwargs): try: average = kwargs["3"] except: - average = "binary" + if len(set(y_true)) > 2: + average = "micro" + else: + average = "binary" score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) return score diff --git a/Code/MonoMultiViewClassifiers/Metrics/roc_auc_score.py b/Code/MonoMultiViewClassifiers/Metrics/roc_auc_score.py index 2847252d..2f31f24f 100644 --- a/Code/MonoMultiViewClassifiers/Metrics/roc_auc_score.py +++ b/Code/MonoMultiViewClassifiers/Metrics/roc_auc_score.py @@ -1,5 +1,6 @@ from sklearn.metrics import roc_auc_score as metric from sklearn.metrics import make_scorer +from sklearn.preprocessing import MultiLabelBinarizer # Author-Info __author__ = "Baptiste Bauvin" @@ -14,7 +15,15 @@ def score(y_true, y_pred, **kwargs): try: average = kwargs["1"] except: - average = "micro" + if len(set(y_true)) > 2: + average = "micro" + else: + average = None + if len(set(y_true)) > 2: + mlb = MultiLabelBinarizer() + y_true = mlb.fit_transform([(label) for label in y_true]) + y_pred = mlb.fit_transform([(label) for label in y_pred]) + score = metric(y_true, y_pred, sample_weight=sample_weight, average=average) return score diff --git a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index 376cb113..7da23e3e 100644 --- a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -153,7 +153,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol full_labels_pred[index] = y_train_pred[trainIndex] for testIndex, index in enumerate(classificationIndices[1]): full_labels_pred[index] = y_test_pred[testIndex] - if X_test_multiclass: + if X_test_multiclass != []: y_test_multiclass_pred = cl_res.predict(X_test_multiclass) else: y_test_multiclass_pred = [] diff --git a/Code/MonoMultiViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMultiViewClassifiers/Multiview/ExecMultiview.py index 018b38fa..a0b21968 100644 --- a/Code/MonoMultiViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMultiViewClassifiers/Multiview/ExecMultiview.py @@ -129,7 +129,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor fullLabels[index] = trainLabels[trainIndex] for testIndex, index in enumerate(validationIndices): fullLabels[index] = testLabels[testIndex] - if testIndicesMulticlass: + if testIndicesMulticlass != []: testLabelsMulticlass = classifier.predict_hdf5(DATASET, usedIndices=testIndicesMulticlass, viewsIndices=viewsIndices) else: testLabelsMulticlass = [] diff --git a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/FusionModule.py b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/FusionModule.py index e10855ea..ab5ceedd 100644 --- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/FusionModule.py +++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/FusionModule.py @@ -20,7 +20,7 @@ __status__ = "Prototype" # Production, Development, Prototype def genName(config): if config["fusionType"] == "LateFusion": - classifierRedNames = [classifierName[:4] for classifierName in config["classifierNames"]] + classifierRedNames = [classifierName[:4] for classifierName in config["classifiersNames"]] return "Late-" + str(config["fusionMethod"][:4])+"-"+"-".join(classifierRedNames) elif config["fusionType"] == "EarlyFusion": return "Early-" + config["fusionMethod"] + "-" + config["classifiersNames"] diff --git a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/analyzeResults.py b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/analyzeResults.py index 85ac7a12..0f00d2ad 100644 --- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/analyzeResults.py +++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/analyzeResults.py @@ -118,7 +118,7 @@ def getReport(classifier, CLASS_LABELS, classificationIndices, DATASET, trainLab testLabels, viewIndices, metric): learningIndices, validationIndices, multiviewTestIndices = classificationIndices nbView = len(viewIndices) - NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] + NB_CLASS = len(set(CLASS_LABELS)) # DATASET.get("Metadata").attrs["nbClass"] metricModule = getattr(Metrics, metric[0]) fakeViewsIndicesDict = dict( (viewIndex, fakeViewIndex) for viewIndex, fakeViewIndex in zip(viewIndices, range(nbView))) @@ -233,7 +233,7 @@ def execute(classifier, trainLabels, LEARNING_RATE = len(learningIndices) / (len(learningIndices) + len(validationIndices)) nbFolds = KFolds.n_splits - CLASS_LABELS = DATASET.get("Labels")[...] + CLASS_LABELS = labels dbConfigurationString, viewNames = getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, LABELS_DICTIONARY) diff --git a/Code/MonoMultiViewClassifiers/ResultAnalysis.py b/Code/MonoMultiViewClassifiers/ResultAnalysis.py index b2fc826e..5017f244 100644 --- a/Code/MonoMultiViewClassifiers/ResultAnalysis.py +++ b/Code/MonoMultiViewClassifiers/ResultAnalysis.py @@ -78,9 +78,9 @@ def getExampleErrorsBiclass(usedBenchmarkArgumentDictionary, monoviewResults, mu def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames,minSize=10): for metricName, metricScores in metricsScores.items(): - logging.debug("Start:\t Multiclass score graph generation for "+metricName) + logging.debug("Start:\t Biclass score graph generation for "+metricName) trainScores = metricScores["trainScores"] - testScores = metricScores["testScore"] + testScores = metricScores["testScores"] names = metricScores["classifiersNames"] nbResults = len(testScores) @@ -110,13 +110,13 @@ def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames,min plt.tight_layout() f.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName +"-"+"vs".join(labelsNames)+ "-" + metricName + ".png") plt.close() - logging.debug("Done:\t Multiclass score graph generation for " + metricName) + logging.debug("Done:\t Biclass score graph generation for " + metricName) def publishExampleErrors(exampleErrors, directory, databaseName, labelsNames,minSize=10): - logging.debug("Start:\t Label analysis figure generation") + logging.debug("Start:\t Biclass Label analysis figure generation") nbClassifiers = len(exampleErrors) - nbExamples = len(exampleErrors.values()[0]) + nbExamples = len(list(exampleErrors.values())[0]) nbIter = 2 data = np.zeros((nbExamples, nbClassifiers * nbIter)) temp_data = np.zeros((nbExamples, nbClassifiers)) @@ -143,9 +143,9 @@ def publishExampleErrors(exampleErrors, directory, databaseName, labelsNames,min fig.tight_layout() fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName +"-"+"vs".join(labelsNames)+ "-error_analysis.png") plt.close() - logging.debug("Done:\t Label analysis figure generation") + logging.debug("Done:\t Biclass Label analysis figure generation") - logging.debug("Start:\t Error by example figure generation") + logging.debug("Start:\t Biclass Error by example figure generation") errorOnExamples = -1*np.sum(data, axis=1)/nbIter+nbClassifiers np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-clf_errors_doubled.csv", data, delimiter=",") np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.csv", temp_data, delimiter=",") @@ -156,17 +156,18 @@ def publishExampleErrors(exampleErrors, directory, databaseName, labelsNames,min plt.title("Number of classifiers that failed to classify each example") fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + databaseName +"-"+"vs".join(labelsNames)+ "-example_errors.png") plt.close() - logging.debug("Done:\t Error by example figure generation") + logging.debug("Done:\t Biclass Error by example figure generation") def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): + logging.debug("Srart:\t Analzing all biclass resuls") biclassResults = [{} for _ in range(statsIter)] for result in results: flag = result[0] iteridex = flag[0] classifierPositive = flag[1][0] classifierNegative = flag[1][1] - biclassResults[iteridex][[classifierPositive, classifierNegative]] = {} + biclassResults[iteridex][str(classifierPositive) + str(classifierNegative)] = {} for benchmarkArgumentDictionary in benchmarkArgumentDictionaries: if benchmarkArgumentDictionary["flag"]==flag: usedBenchmarkArgumentDictionary = benchmarkArgumentDictionary @@ -180,12 +181,13 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): usedBenchmarkArgumentDictionary["LABELS_DICTIONARY"][1]] publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames) publishExampleErrors(exampleErrors, directory, databaseName, labelsNames) - biclassResults[iteridex][[classifierPositive, classifierNegative]]["metricsScores"] = metricsScores - biclassResults[iteridex][[classifierPositive, classifierNegative]]["exampleErrors"] = exampleErrors + biclassResults[iteridex][str(classifierPositive) + str(classifierNegative)]["metricsScores"] = metricsScores + biclassResults[iteridex][str(classifierPositive) + str(classifierNegative)]["exampleErrors"] = exampleErrors + logging.debug("Done:\t Analzing all biclass resuls") return biclassResults -def genMetricsScoresMulticlass(results, trueLabels, metrics, argumentsDictionaries, classificationIndices): +def genMetricsScoresMulticlass(results, trueLabels, metrics, argumentsDictionaries): """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration""" logging.debug("Start:\t Getting multiclass scores for each metric") @@ -193,14 +195,16 @@ def genMetricsScoresMulticlass(results, trueLabels, metrics, argumentsDictionari for metric in metrics: metricModule = getattr(Metrics, metric[0]) for iterIndex, iterResults in enumerate(results): - trainIndices, testIndices = classificationIndices[iterIndex] + for argumentsDictionary in argumentsDictionaries: + if argumentsDictionary["flag"][0]==iterIndex: + classificationIndices = argumentsDictionary["classificationIndices"] + trainIndices, testIndices, multiclassTestIndices = classificationIndices for classifierName, resultDictionary in iterResults.items(): if not "metricsScores" in resultDictionary: results[iterIndex][classifierName]["metricsScores"]={} - trainScore = metricModule.score(trueLabels[trainIndices], - resultDictionary["labels"][trainIndices]) - testScore = metricModule.score(trueLabels[testIndices], - resultDictionary["labels"][testIndices]) + trainScore = metricModule.score(trueLabels[trainIndices],resultDictionary["labels"][trainIndices]) + testScore = metricModule.score(trueLabels[multiclassTestIndices], + resultDictionary["labels"][multiclassTestIndices]) results[iterIndex][classifierName]["metricsScores"][metric[0]] = [trainScore, testScore] @@ -224,15 +228,6 @@ def getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels): return multiclassResults -def autolabel(rects, ax): - """Used to print scores on top of the bars""" - for rect in rects: - height = rect.get_height() - ax.text(rect.get_x() + rect.get_width() / 2., 1.01 * height, - "%.2f" % height, - ha='center', va='bottom') - - def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, databaseName, minSize=10): for iterIndex in range(statsIter): directory = direcories[iterIndex] @@ -281,7 +276,7 @@ def publishMulticlassExmapleErrors(multiclassResults, directories, databaseName, directory = directories[iterIndex] logging.debug("Start:\t Label analysis figure generation") nbClassifiers = len(multiclassResult) - nbExamples = len(multiclassResult.values()[0]["errorOnExample"]) + nbExamples = len(list(multiclassResult.values())[0]["errorOnExample"]) nbIter = 2 data = np.zeros((nbExamples, nbClassifiers * nbIter)) temp_data = np.zeros((nbExamples, nbClassifiers)) @@ -332,32 +327,55 @@ def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamp iterIndex = flag[0] classifierPositive = flag[1][0] classifierNegative = flag[1][1] + for benchmarkArgumentDictionary in benchmarkArgumentDictionaries: + if benchmarkArgumentDictionary["flag"] == flag: + trainIndices, testIndices, testMulticlassIndices = benchmarkArgumentDictionary["classificationIndices"] for classifierResult in resMono: if classifierResult[1][0] not in multiclassResults[iterIndex]: - multiclassResults[iterIndex][classifierResult[1][0]] = np.zeros((nbExamples,nbLabels),dtype=int) - for exampleIndex, label in enumerate(classifierResult[1][3]): + multiclassResults[iterIndex][classifierResult[1][0]] = np.zeros((nbExamples, nbLabels),dtype=int) + for exampleIndex in trainIndices: + label = classifierResult[1][3][exampleIndex] + if label == 1: + multiclassResults[iterIndex][classifierResult[1][0]][exampleIndex, classifierPositive] += 1 + else: + multiclassResults[iterIndex][classifierResult[1][0]][exampleIndex, classifierNegative] += 1 + for multiclassIndex, exampleIndex in enumerate(testMulticlassIndices): + label = classifierResult[1][5][multiclassIndex] if label == 1: multiclassResults[iterIndex][classifierResult[1][0]][exampleIndex, classifierPositive] += 1 else: multiclassResults[iterIndex][classifierResult[1][0]][exampleIndex, classifierNegative] += 1 + for classifierResult in resMulti: multiviewClassifierPackage = getattr(MultiviewClassifiers, classifierResult[0]) multiviewClassifierModule = getattr(multiviewClassifierPackage, classifierResult[0]+"Module") classifierName = multiviewClassifierModule.genName(classifierResult[1]) if classifierName not in multiclassResults[iterIndex]: multiclassResults[iterIndex][classifierName] = np.zeros((nbExamples,nbLabels),dtype=int) - for exampleIndex, label in enumerate(classifierResult[3]): + for exampleIndex in trainIndices: + label = classifierResult[3][exampleIndex] + if label == 1: + multiclassResults[iterIndex][classifierName][exampleIndex, classifierPositive] += 1 + else: + multiclassResults[iterIndex][classifierName][exampleIndex, classifierNegative] += 1 + for multiclassIndex, exampleIndex in enumerate(testMulticlassIndices): + label = classifierResult[4][multiclassIndex] if label == 1: multiclassResults[iterIndex][classifierName][exampleIndex, classifierPositive] += 1 else: multiclassResults[iterIndex][classifierName][exampleIndex, classifierNegative] += 1 + # for exampleIndex, label in enumerate(classifierResult[3]): + # if label == 1: + # multiclassResults[iterIndex][classifierName][exampleIndex, classifierPositive] += 1 + # else: + # multiclassResults[iterIndex][classifierName][exampleIndex, classifierNegative] += 1 for iterIndex, multiclassiterResult in enumerate(multiclassResults): for key, value in multiclassiterResult.items(): multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)} - multiclassResults = genMetricsScoresMulticlass(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries, classificationIndices) + multiclassResults = genMetricsScoresMulticlass(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries) multiclassResults = getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels) publishMulticlassScores(multiclassResults, metrics, statsIter, directories, benchmarkArgumentDictionaries[0]["args"].name) @@ -404,7 +422,6 @@ def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, cl + str(statsIter) + "_iter-" + metricName + ".png") - def publishIterBiclassExampleErrors(iterResults, directory, labelsDictionary, classifiersDict, statsIter, minSize=10): for labelsCombination, combiResults in iterResults.items(): currentDirectory = directory+ labelsDictionary[labelsCombination[0]]+"vs"+labelsDictionary[labelsCombination[1]]+"/" @@ -550,7 +567,10 @@ def analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDict publishIterBiclassExampleErrors(iterBiclassResults, directory, labelsDictionary, classifiersDict, statsIter) -def analyzeIterMulticlass(biclassResults, multiclassResults, directory, statsIter, labelsDictionary, metrics, dataBaseName): +def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName): + """Used to mean the multiclass results on the iterations executed with different random states""" + + logging.debug("Start:\t Getting mean results for multiclass classification") iterMulticlassResults = {} nbClassifiers = len(multiclassResults[0]) iterMulticlassResults["errorOnExamples"] = np.zeros((nbClassifiers,len(multiclassResults[0].values()[0]["exampleErrors"])), @@ -570,11 +590,14 @@ def analyzeIterMulticlass(biclassResults, multiclassResults, directory, statsIte iterMulticlassResults["metricsScores"][metric[0]]["trainScores"][classifierIndex, iterIndex] = classifierResults["metricsScores"][metric[0]][0] iterMulticlassResults["metricsScores"][metric[0]]["testScores"][classifierIndex, iterIndex] = classifierResults["metricsScores"][metric[0]][1] iterMulticlassResults["errorOnExamples"][classifierIndex, :] = classifierResults["exampleErrors"] + logging.debug("Start:\t Getting mean results for multiclass classification") + publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, dataBaseName, directory, statsIter) publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classifiersNames, statsIter) def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels): + """Used to analyze the results of the previous benchmarks""" dataBaseName = benchmarkArgumentDictionaries[0]["args"].name if statsIter > 1: if nbMulticlass > 1: @@ -582,13 +605,15 @@ def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics, classificationIndices, directories) analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName) - analyzeIterMulticlass(biclassResults, multiclassResults, directory, statsIter, labelsDictionary, metrics, dataBaseName) + analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName) else: biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName) - + else: if nbMulticlass>1: - analyzeMulticlass(results) + biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) + multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, + metrics, classificationIndices, directories) else: analyzeBiclass(results) diff --git a/Code/MonoMultiViewClassifiers/utils/HyperParameterSearch.py b/Code/MonoMultiViewClassifiers/utils/HyperParameterSearch.py index 436e05d9..54cc0a37 100644 --- a/Code/MonoMultiViewClassifiers/utils/HyperParameterSearch.py +++ b/Code/MonoMultiViewClassifiers/utils/HyperParameterSearch.py @@ -66,6 +66,7 @@ def randomizedSearch(dataset, labels, classifierPackage, classifierName, metrics bestSettings = paramsSet classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) classifier.setParams(bestSettings) + # TODO : This must be corrected else: bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, labels, viewsIndices, classificationKWARGS, learningIndices, diff --git a/Code/MonoMultiViewClassifiers/utils/Multiclass.py b/Code/MonoMultiViewClassifiers/utils/Multiclass.py index 76a7a6bc..89fd7bd1 100644 --- a/Code/MonoMultiViewClassifiers/utils/Multiclass.py +++ b/Code/MonoMultiViewClassifiers/utils/Multiclass.py @@ -33,6 +33,7 @@ def genMulticlassLabels(labels, multiclassMethod, classificationIndices): else: pass multiclassLabels.append(newLabels) + elif multiclassMethod == "oneVersusRest": # TODO : Implement one versus rest if probas are not a problem anymore pass -- GitLab