Skip to content
Snippets Groups Projects
Commit 0738a1da authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Simplified iter biclass result analysis

parent 7616f9f8
No related branches found
No related tags found
No related merge requests found
......@@ -11,7 +11,6 @@ import matplotlib as mpl
# Import own Modules
from . import Metrics
from . import MultiviewClassifiers
# Author-Info
__author__ = "Baptiste Bauvin"
......@@ -388,7 +387,7 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2):
- -100 if it did not classify the example (multiclass one versus one).
base_file_name : list of str
The name of the file in which the figure will be saved ("2D_plot_data.csv" and "bar_plot_data.csv" will
be added at the end)
be added at the end).
nbCopies : int, optinal, default: 2
The number of times the data is copied (classifier wise) in order for the figure to be more readable.
......@@ -396,9 +395,9 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2):
Returns
-------
nbClassifiers : int
Number of different classifiers
Number of different classifiers.
nbExamples : int
NUmber of examples
NUmber of examples.
nbCopies : int
The number of times the data is copied (classifier wise) in order for the figure to be more readable.
classifiersNames : list of strs
......@@ -445,28 +444,76 @@ def publishExampleErrors(exampleErrors, directory, databaseName, labelsNames):
logging.debug("Done:\t Biclass Label analysis figures generation")
def get_arguments(benchmarkArgumentDictionaries, flag):
r"""Used to get the arguments passed to the benchmark executing function corresponding to the flag of a
biclass experimentation.
Parameters
----------
flag : list
The needed experimentation's flag.
benchmarkArgumentDictionaries : list of dicts
The list of all the arguments passed to the benchmark executing functions.
Returns
-------
benchmarkArgumentDictionary : dict
All the arguments passed to the benchmark executing function for the needed experimentation.
"""
for benchmarkArgumentDictionary in benchmarkArgumentDictionaries:
if benchmarkArgumentDictionary["flag"] == flag:
return benchmarkArgumentDictionary
def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics):
r"""Used to extract and format the results of the different biclass experimentations performed.
Parameters
----------
results : list
The result list returned by the bencmark execution function. For each executed benchmark, contains
a flag & a result element.
The flag is a way to identify to which benchmark the results belong, formatted this way :
`flag = iterIndex, [classifierPositive, classifierNegative]` with
- `iterIndex` the index of the statistical iteration
- `[classifierPositive, classifierNegative]` the indices of the labels considered positive and negative
by the classifier (mainly useful for one versus one multiclass classification).
benchmarkArgumentDictionaries : list of dicts
The list of all the arguments passed to the benchmark executing functions.
statsIter : int
The number of statistical iterations.
metrics : list of lists
THe list containing the metrics and their configuration.
Returns
-------
biclassResults : list of dicts of dicts
The list contains a dictionary for each statistical iteration. This dictionary contains a dictionary for each
label combination, regrouping the scores for each metrics and the information useful to plot errors on examples.
"""
logging.debug("Srart:\t Analzing all biclass resuls")
biclassResults = [{} for _ in range(statsIter)]
for flag, result in results:
iteridex = flag[0]
classifierPositive = flag[1][0]
classifierNegative = flag[1][1]
biclassResults[iteridex][str(classifierPositive) + str(classifierNegative)] = {}
iteridex, [classifierPositive, classifierNegative] = flag
arguments = get_arguments(benchmarkArgumentDictionaries, flag)
for benchmarkArgumentDictionary in benchmarkArgumentDictionaries:
if benchmarkArgumentDictionary["flag"]==flag:
usedBenchmarkArgumentDictionary = benchmarkArgumentDictionary
metricsScores = getMetricsScoresBiclass(metrics, result)
exampleErrors = getExampleErrorsBiclass(usedBenchmarkArgumentDictionary["labels"], result)
directory = usedBenchmarkArgumentDictionary["directory"]
databaseName = usedBenchmarkArgumentDictionary["args"].name
labelsNames = [usedBenchmarkArgumentDictionary["LABELS_DICTIONARY"][0],
usedBenchmarkArgumentDictionary["LABELS_DICTIONARY"][1]]
exampleErrors = getExampleErrorsBiclass(arguments["labels"], result)
directory = arguments["directory"]
databaseName = arguments["args"].name
labelsNames = [arguments["LABELS_DICTIONARY"][0],
arguments["LABELS_DICTIONARY"][1]]
publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames)
publishExampleErrors(exampleErrors, directory, databaseName, labelsNames)
biclassResults[iteridex][str(classifierPositive) + str(classifierNegative)]["metricsScores"] = metricsScores
biclassResults[iteridex][str(classifierPositive) + str(classifierNegative)]["exampleErrors"] = exampleErrors
biclassResults[iteridex][str(classifierPositive) + str(classifierNegative)] = {"metricsScores": metricsScores,
"exampleErrors": exampleErrors}
logging.debug("Done:\t Analzing all biclass resuls")
return biclassResults
......@@ -475,18 +522,22 @@ def genMetricsScoresMulticlass(results, trueLabels, metrics, argumentsDictionari
"""Used to add all the metrics scores to the multiclass result structure for each clf and each iteration"""
logging.debug("Start:\t Getting multiclass scores for each metric")
# TODO : Metric score for train and test
for metric in metrics:
metricModule = getattr(Metrics, metric[0])
for iterIndex, iterResults in enumerate(results):
for argumentsDictionary in argumentsDictionaries:
if argumentsDictionary["flag"][0]==iterIndex:
classificationIndices = argumentsDictionary["classificationIndices"]
trainIndices, testIndices, multiclassTestIndices = classificationIndices
for classifierName, resultDictionary in iterResults.items():
if not "metricsScores" in resultDictionary:
results[iterIndex][classifierName]["metricsScores"]={}
trainScore = metricModule.score(trueLabels[trainIndices],resultDictionary["labels"][trainIndices], multiclass=True)
trainScore = metricModule.score(trueLabels[trainIndices],
resultDictionary["labels"][trainIndices],
multiclass=True)
testScore = metricModule.score(trueLabels[multiclassTestIndices],
resultDictionary["labels"][multiclassTestIndices],
multiclass=True)
......@@ -510,7 +561,7 @@ def getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels):
return multiclassResults
def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, databaseName, minSize=10):
def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, databaseName):
for iterIndex in range(statsIter):
directory = direcories[iterIndex]
for metric in metrics:
......@@ -530,7 +581,7 @@ def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, d
logging.debug("Done:\t Multiclass score graph generation for " + metric[0])
def publishMulticlassExmapleErrors(multiclassResults, directories, databaseName, minSize=10):
def publishMulticlassExmapleErrors(multiclassResults, directories, databaseName):
for iterIndex, multiclassResult in enumerate(multiclassResults):
directory = directories[iterIndex]
logging.debug("Start:\t Multiclass Label analysis figure generation")
......@@ -617,7 +668,6 @@ def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, cl
train_STDs=trainSTDs, test_STDs=testSTDs)
def gen_error_dat_glob(combiResults, statsIter, base_file_name):
nbExamples = combiResults["errorOnExamples"].shape[1]
nbClassifiers = combiResults["errorOnExamples"].shape[0]
......@@ -673,17 +723,14 @@ def publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classif
logging.debug("Done:\t Global multiclass label analysis figures generation")
def analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName, nbExamples):
iterBiclassResults = {}
classifiersDict = {}
for iterIndex, biclassResult in enumerate(biclassResults):
for labelsComination, results in biclassResult.items():
for metric in metrics:
nbClassifiers = len(results["metricsScores"][metric[0]]["classifiersNames"])
if not classifiersDict:
def gen_classifiers_dict(results, metrics):
classifiersDict = dict((classifierName, classifierIndex)
for classifierIndex, classifierName
in enumerate(results["metricsScores"][metric[0]]["classifiersNames"]))
in enumerate(results[0][list(results[0].keys())[0]]["metricsScores"][metrics[0][0]]["classifiersNames"]))
return classifiersDict, len(classifiersDict)
def add_new_labels_combination(iterBiclassResults, labelsComination, nbClassifiers, nbExamples):
if labelsComination not in iterBiclassResults:
iterBiclassResults[labelsComination] = {}
iterBiclassResults[labelsComination]["metricsScores"] = {}
......@@ -691,19 +738,41 @@ def analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDict
iterBiclassResults[labelsComination]["errorOnExamples"] = np.zeros((nbClassifiers,
nbExamples),
dtype=int)
return iterBiclassResults
def add_new_metric(iterBiclassResults, metric, labelsComination, nbClassifiers, statsIter):
if metric[0] not in iterBiclassResults[labelsComination]["metricsScores"]:
iterBiclassResults[labelsComination]["metricsScores"][metric[0]] = {"trainScores":
np.zeros((nbClassifiers, statsIter)),
"testScores":
np.zeros((nbClassifiers, statsIter))}
for classifierName, trainScore, testScore in zip(results["metricsScores"][metric[0]]["classifiersNames"],
results["metricsScores"][metric[0]]["trainScores"],
results["metricsScores"][metric[0]]["testScores"],
):
return iterBiclassResults
def analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName, nbExamples):
"""Used to format the results in order to plot the mean results on the iterations"""
iterBiclassResults = {}
classifiersDict, nbClassifiers = gen_classifiers_dict(biclassResults, metrics)
for iterIndex, biclassResult in enumerate(biclassResults):
for labelsComination, results in biclassResult.items():
for metric in metrics:
iterBiclassResults = add_new_labels_combination(iterBiclassResults, labelsComination, nbClassifiers, nbExamples)
iterBiclassResults = add_new_metric(iterBiclassResults, metric, labelsComination, nbClassifiers, statsIter)
metric_results = results["metricsScores"][metric[0]]
for classifierName, trainScore, testScore in zip(metric_results["classifiersNames"],
metric_results["trainScores"],
metric_results["testScores"],):
iterBiclassResults[labelsComination]["metricsScores"][metric[0]]["trainScores"][classifiersDict[classifierName], iterIndex] = trainScore
iterBiclassResults[labelsComination]["metricsScores"][metric[0]]["testScores"][classifiersDict[classifierName], iterIndex] = testScore
for classifierName, errorOnExample in results["exampleErrors"].items():
iterBiclassResults[labelsComination]["errorOnExamples"][classifiersDict[classifierName], :] += errorOnExample["errorOnExamples"]
publishIterBiclassMetricsScores(iterBiclassResults, directory, labelsDictionary, classifiersDict, dataBaseName, statsIter)
publishIterBiclassExampleErrors(iterBiclassResults, directory, labelsDictionary, classifiersDict, statsIter)
......@@ -738,224 +807,16 @@ def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, data
publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classifiersNames, statsIter)
def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels):
def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics,
classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels):
"""Used to analyze the results of the previous benchmarks"""
dataBaseName = benchmarkArgumentDictionaries[0]["args"].name
if statsIter > 1:
if nbMulticlass > 1:
biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics)
multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels,
metrics, classificationIndices, directories)
analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName, nbExamples)
analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName, nbExamples)
else:
biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics)
if nbMulticlass > 1:
multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels,
multiclassLabels, metrics, classificationIndices, directories)
if statsIter > 1:
analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName, nbExamples)
else:
if nbMulticlass > 1:
biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics)
multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels,
metrics, classificationIndices, directories)
else:
biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics)
analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName, nbExamples)
# def genFusionName(type_, a, b, c):
# """Used to generate fusion classifiers names"""
# if type_ == "Fusion" and a["fusionType"] != "EarlyFusion":
# return "Late-" + str(a["fusionMethod"])
# elif type_ == "Fusion" and a["fusionType"] != "LateFusion":
# return "Early-" + a["fusionMethod"] + "-" + a["classifiersNames"]
#
#
# def genNamesFromRes(mono, multi):
# """Used to generate classifiers names list (inthe right order) from mono- and multi-view preds"""
# names = [res[1][0] + "-" + res[1][1][-1] for res in mono]
# names += [type_ if type_ != "Fusion" else genFusionName(type_, a, b, c) for type_, a, b, c in multi]
# return names
#
#
# def resultAnalysis(benchmark, results, name, times, metrics, directory, minSize=10):
# """Used to generate bar graphs of all the classifiers scores for each metric """
# mono, multi = results
# for metric in metrics:
# logging.debug("Start:\t Score graph generation for "+metric[0])
# names = genNamesFromRes(mono, multi)
# nbResults = len(mono) + len(multi)
# validationScores = [float(res[1][2][metric[0]][1]) for res in mono]
# validationScores += [float(scores[metric[0]][1]) for a, b, scores, c in multi]
# trainScores = [float(res[1][2][metric[0]][0]) for res in mono]
# trainScores += [float(scores[metric[0]][0]) for a, b, scores, c in multi]
#
# validationScores = np.array(validationScores)
# trainScores = np.array(trainScores)
# names = np.array(names)
# sorted_indices = np.argsort(validationScores)
# validationScores = validationScores[sorted_indices]
# trainScores = trainScores[sorted_indices]
# names = names[sorted_indices]
#
# size = nbResults
# if nbResults < minSize:
# size = minSize
# figKW = {"figsize" : (size, 3.0/4*size+2.0)}
# f, ax = plt.subplots(nrows=1, ncols=1, **figKW)
# barWidth= 0.35
# ax.set_title(metric[0] + "\n on validation set for each classifier")
# rects = ax.bar(range(nbResults), validationScores, barWidth, color="r", )
# rect2 = ax.bar(np.arange(nbResults) + barWidth, trainScores, barWidth, color="0.7", )
# autolabel(rects, ax)
# autolabel(rect2, ax)
# ax.legend((rects[0], rect2[0]), ('Test', 'Train'))
# ax.set_ylim(-0.1, 1.1)
# ax.set_xticks(np.arange(nbResults) + barWidth)
# ax.set_xticklabels(names, rotation="vertical")
# plt.tight_layout()
# f.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + name + "-" + metric[0] + ".png")
# plt.close()
# logging.debug("Done:\t Score graph generation for " + metric[0])
#
#
# def analyzeLabels(labelsArrays, realLabels, results, directory, minSize = 10):
# """Used to generate a graph showing errors on each example depending on classifier"""
# logging.debug("Start:\t Label analysis figure generation")
# mono, multi = results
# classifiersNames = genNamesFromRes(mono, multi)
# nbClassifiers = len(classifiersNames)
# nbExamples = realLabels.shape[0]
# nbIter = 2
# data = np.zeros((nbExamples, nbClassifiers * nbIter))
# tempData = np.array([labelsArray == realLabels for labelsArray in np.transpose(labelsArrays)]).astype(int)
# for classifierIndex in range(nbClassifiers):
# for iterIndex in range(nbIter):
# data[:, classifierIndex * nbIter + iterIndex] = tempData[classifierIndex, :]
# figWidth = max(nbClassifiers/2, minSize)
# figHeight = max(nbExamples/20, minSize)
# figKW = {"figsize":(figWidth, figHeight)}
# fig, ax = plt.subplots(nrows=1, ncols=1, **figKW)
# cmap = mpl.colors.ListedColormap(['red', 'green'])
# bounds = [-0.5, 0.5, 1.5]
# norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
#
# cax = plt.imshow(data, interpolation='none', cmap=cmap, norm=norm, aspect='auto')
# plt.title('Errors depending on the classifier')
# ticks = np.arange(nbIter/2-0.5, nbClassifiers * nbIter, nbIter)
# labels = classifiersNames
# plt.xticks(ticks, labels, rotation="vertical")
# cbar = fig.colorbar(cax, ticks=[0, 1])
# cbar.ax.set_yticklabels(['Wrong', ' Right'])
# fig.tight_layout()
# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-error_analysis.png")
# plt.close()
# logging.debug("Done:\t Label analysis figure generation")
#
# logging.debug("Start:\t Error by example figure generation")
# errorOnExamples = -1*np.sum(data, axis=1)/nbIter+nbClassifiers
# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-clf_errors.csv", data, delimiter=",")
# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.csv", errorOnExamples, delimiter=",")
# fig, ax = plt.subplots()
# x = np.arange(nbExamples)
# plt.bar(x, errorOnExamples)
# plt.ylim([0,nbClassifiers])
# plt.title("Number of classifiers that failed to classify each example")
# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.png")
# plt.close()
# logging.debug("Done:\t Error by example figure generation")
# return data
#
#
# def analyzeIterLabels(labelsAnalysisList, directory, classifiersNames, minSize=10):
# """Used to generate a graph showing errors on each example depending on classifierusing a score
# if multiple iterations"""
# logging.debug("Start:\t Global label analysis figure generation")
# nbExamples = labelsAnalysisList[0].shape[0]
# nbClassifiers = len(classifiersNames)
# nbIter = 2
#
# figWidth = max(nbClassifiers / 2, minSize)
# figHeight = max(nbExamples / 20, minSize)
# figKW = {"figsize": (figWidth, figHeight)}
# fig, ax = plt.subplots(nrows=1, ncols=1, **figKW)
# data = sum(labelsAnalysisList)
# cax = plt.imshow(-data, interpolation='none', cmap="Greys", aspect='auto')
# plt.title('Errors depending on the classifier')
# ticks = np.arange(nbIter/2-0.5, nbClassifiers * nbIter, nbIter)
# plt.xticks(ticks, classifiersNames, rotation="vertical")
# cbar = fig.colorbar(cax, ticks=[0, -len(labelsAnalysisList)])
# cbar.ax.set_yticklabels(['Always Wrong', 'Always Right'])
# fig.tight_layout()
# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-error_analysis.png")
# plt.close()
# logging.debug("Done:\t Global label analysis figure generation")
# logging.debug("Start:\t Global error by example figure generation")
# errorOnExamples = -1 * np.sum(data, axis=1) / nbIter + (nbClassifiers*len(labelsAnalysisList))
# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-clf_errors.csv", data, delimiter=",")
# np.savetxt(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.csv", errorOnExamples, delimiter=",")
# fig, ax = plt.subplots()
# x = np.arange(nbExamples)
# plt.bar(x, errorOnExamples)
# plt.ylim([0,nbClassifiers*len(labelsAnalysisList)])
# plt.title("Number of classifiers that failed to classify each example")
# fig.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-example_errors.png")
# plt.close()
# logging.debug("Done:\t Global error by example figure generation")
#
#
# def genFig(iterResults, metric, nbResults, names, nbMono, minSize=10):
# """Used to generate the bar graph representing the mean scores of each classifiers if multiple iteration
# with different random states"""
# nbIter = len(iterResults)
# validationScores = np.zeros((nbIter, nbResults))
# trainScores = np.zeros((nbIter, nbResults))
# for iterIndex, iterResult in enumerate(iterResults):
# mono, multi = iterResult
# validationScores[iterIndex, :nbMono] = np.array([float(res[1][2][metric[0]][1]) for res in mono])
# validationScores[iterIndex, nbMono:] = np.array([float(scores[metric[0]][1]) for a, b, scores, c in multi])
# trainScores[iterIndex, :nbMono] = np.array([float(res[1][2][metric[0]][0]) for res in mono])
# trainScores[iterIndex, nbMono:] = np.array([float(scores[metric[0]][0]) for a, b, scores, c in multi])
#
# validationSTDs = np.std(validationScores, axis=0)
# trainSTDs = np.std(trainScores, axis=0)
# validationMeans = np.mean(validationScores, axis=0)
# trainMeans = np.mean(trainScores, axis=0)
# size=nbResults
# if nbResults<minSize:
# size=minSize
# figKW = {"figsize" : (size, 3.0/4*size+2.0)}
# f, ax = plt.subplots(nrows=1, ncols=1, **figKW)
# barWidth = 0.35 # the width of the bars
# sorted_indices = np.argsort(validationMeans)
# validationMeans = validationMeans[sorted_indices]
# validationSTDs = validationSTDs[sorted_indices]
# trainSTDs = trainSTDs[sorted_indices]
# trainMeans = trainMeans[sorted_indices]
# names = np.array(names)[sorted_indices]
#
# ax.set_title(metric[0] + " for each classifier")
# rects = ax.bar(range(nbResults), validationMeans, barWidth, color="r", yerr=validationSTDs)
# rect2 = ax.bar(np.arange(nbResults) + barWidth, trainMeans, barWidth, color="0.7", yerr=trainSTDs)
# autolabel(rects, ax)
# autolabel(rect2, ax)
# ax.set_ylim(-0.1, 1.1)
# ax.legend((rects[0], rect2[0]), ('Test', 'Train'))
# ax.set_xticks(np.arange(nbResults) + barWidth)
# ax.set_xticklabels(names, rotation="vertical")
# f.tight_layout()
#
# return f
#
#
# def analyzeIterResults(iterResults, name, metrics, directory):
# nbResults = len(iterResults[0][0]) + len(iterResults[0][1])
# nbMono = len(iterResults[0][0])
# nbIter = len(iterResults)
# names = genNamesFromRes(iterResults[0][0], iterResults[0][1])
# for metric in metrics:
# logging.debug("Start:\t Global score graph generation for " + metric[0])
# figure = genFig(iterResults, metric, nbResults, names, nbMono)
# figure.savefig(directory + time.strftime("%Y%m%d-%H%M%S") + "-" + name + "-Mean_on_"
# + str(nbIter) + "_iter-" + metric[0] + ".png")
# logging.debug("Done:\t Global score graph generation for " + metric[0])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment