Skip to content
Snippets Groups Projects
Commit c38366de authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added noise analysis and muntiple dataset possibility

parent c16dee60
No related branches found
No related tags found
No related merge requests found
......@@ -21,7 +21,7 @@ from .Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore
from .Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore
from .utils import GetMultiviewDb as DB
from .ResultAnalysis import \
getResults # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes,
getResults, plot_results_noise # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes,
from .utils import execution, Dataset, Multiclass
# Author-Info
......@@ -569,12 +569,15 @@ def execBenchmark(nbCores, statsIter, nbMulticlass,
classificationIndices[0][1])
multiclassGroundTruth = DATASET.get("Labels").value
logging.debug("Start:\t Analyzing predictions")
getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
results_mean_stds =getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
multiclassGroundTruth, metrics, classificationIndices,
directories, directory, labelsDictionary, nbExamples, nbLabels)
logging.debug("Done:\t Analyzing predictions")
filename = DATASET.filename
DATASET.close()
return results
if "_temp_" in filename:
os.remove(filename)
return results_mean_stds
def execClassif(arguments):
......@@ -598,6 +601,7 @@ def execClassif(arguments):
args.noise_std=[0.0]
for name in dataset_list:
noise_results = []
for noise_std in args.noise_std:
directory = execution.initLogFile(name, args.views, args.CL_type,
......@@ -667,9 +671,9 @@ def execClassif(arguments):
views, viewsIndices)
nbMulticlass = len(labelsCombinations)
results = execBenchmark(nbCores, statsIter, nbMulticlass,
results_mean_stds = execBenchmark(nbCores, statsIter, nbMulticlass,
benchmarkArgumentDictionaries, splits, directories,
directory, multiclassLabels, metrics, LABELS_DICTIONARY,
NB_CLASS, DATASET)
print(results)
quit()
noise_results.append([noise_std, results_mean_stds])
plot_results_noise(directory, noise_results, metrics[0][0], name)
......@@ -135,6 +135,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
# print(self.classification_matrix)
# print(self.weights_, self.break_cause)
self.weights_ = np.array(self.weights_)
if np.sum(self.weights_) != 1:
self.weights_ /= np.sum(self.weights_)
formatted_y[formatted_y == -1] = 0
......
......@@ -5,9 +5,11 @@ import os
import time
import matplotlib as mpl
from matplotlib.patches import Patch
# Import third party modules
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Import own Modules
from . import Metrics
......@@ -17,6 +19,49 @@ __author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def plot_results_noise(directory, noise_results, metric_to_plot, name, width=0.1):
avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray",
"tab:olive", "tab:red", ]
colors ={}
lengend_patches = []
noise_levels = np.array([noise_level for noise_level, _ in noise_results])
df = pd.DataFrame(columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], )
if len(noise_results)>1:
width = np.min(np.diff(noise_levels))
for noise_level, noise_result in noise_results:
classifiers_names, meaned_metrics, metric_stds = [], [], []
for noise_result in noise_result:
classifier_name = noise_result[0].split("-")[0]
if noise_result[1] is metric_to_plot:
classifiers_names.append(classifier_name)
meaned_metrics.append(noise_result[2])
metric_stds.append(noise_result[3])
if classifier_name not in colors:
try:
colors[classifier_name] = avail_colors.pop(0)
except IndexError:
colors[classifier_name] = "k"
classifiers_names, meaned_metrics, metric_stds = np.array(classifiers_names), np.array(meaned_metrics), np.array(metric_stds)
sorted_indices = np.argsort(-meaned_metrics)
for index in sorted_indices:
row = pd.DataFrame(
{'noise_level':noise_level, 'classifier_name':classifiers_names[index], 'mean_score':meaned_metrics[index],
'score_std':metric_stds[index]}, index=[0])
df = pd.concat([df, row])
plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index], width=0.5*width, label=classifiers_names[index], color=colors[classifiers_names[index]])
for classifier_name, color in colors.items():
lengend_patches.append(Patch(facecolor=color, label=classifier_name))
plt.legend(handles=lengend_patches, loc='lower center', bbox_to_anchor=(0.5, 1.05), ncol=2)
plt.ylabel(metric_to_plot)
plt.title(name)
plt.xticks(noise_levels)
plt.xlabel("Noise level")
plt.savefig(directory+name+"_noise_analysis.png")
plt.close()
df.to_csv(directory+name+"_noise_analysis.csv")
def autolabel(rects, ax, set=1, std=None):
r"""Used to print the score below the bars.
......@@ -285,6 +330,7 @@ def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames):
Returns
-------
"""
results=[]
for metricName, metricScores in metricsScores.items():
logging.debug(
"Start:\t Biclass score graph generation for " + metricName)
......@@ -303,7 +349,8 @@ def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames):
logging.debug(
"Done:\t Biclass score graph generation for " + metricName)
results+=[[classifiersName, metricName, testMean, testSTD] for classifiersName, testMean, testSTD in zip(np.array(metricScores["classifiersNames"]), np.array(metricScores["testScores"]), np.zeros(len(np.array(metricScores["testScores"]))))]
return results
def iterCmap(statsIter):
r"""Used to generate a colormap that will have a tick for each iteration : the whiter the better.
......@@ -547,7 +594,7 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics):
labelsNames = [arguments["LABELS_DICTIONARY"][0],
arguments["LABELS_DICTIONARY"][1]]
publishMetricsGraphs(metricsScores, directory, databaseName,
results = publishMetricsGraphs(metricsScores, directory, databaseName,
labelsNames)
publishExampleErrors(exampleErrors, directory, databaseName,
labelsNames)
......@@ -558,7 +605,7 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics):
"exampleErrors": exampleErrors}
logging.debug("Done:\t Analzing all biclass resuls")
return biclassResults
return results, biclassResults
def genMetricsScoresMulticlass(results, trueLabels, metrics,
......@@ -612,6 +659,7 @@ def getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels):
def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories,
databaseName):
results=[]
for iterIndex in range(statsIter):
directory = direcories[iterIndex]
for metric in metrics:
......@@ -639,6 +687,8 @@ def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories,
logging.debug(
"Done:\t Multiclass score graph generation for " + metric[0])
results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiersNames, validationScores, np.zeros(len(validationScores)))]
return results
def publishMulticlassExmapleErrors(multiclassResults, directories,
......@@ -713,12 +763,12 @@ def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries,
multiclassResults = getErrorOnLabelsMulticlass(multiclassResults,
multiclassLabels)
publishMulticlassScores(multiclassResults, metrics, statsIter, directories,
results = publishMulticlassScores(multiclassResults, metrics, statsIter, directories,
benchmarkArgumentDictionaries[0]["args"].name)
publishMulticlassExmapleErrors(multiclassResults, directories,
benchmarkArgumentDictionaries[0][
"args"].name)
return multiclassResults
return results, multiclassResults
def numpy_mean_and_std(scores_array):
......@@ -728,6 +778,7 @@ def numpy_mean_and_std(scores_array):
def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary,
classifiersDict, dataBaseName, statsIter,
minSize=10):
results=[]
for labelsCombination, iterResult in iterResults.items():
currentDirectory = directory + labelsDictionary[
int(labelsCombination[0])] + "-vs-" + labelsDictionary[
......@@ -754,6 +805,8 @@ def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary,
metricName=metricName, fileName=fileName,
tag=" averaged",
train_STDs=trainSTDs, test_STDs=testSTDs)
results+=[[classifiersName, metricName, testMean, testSTD] for classifiersName, testMean, testSTD in zip(names, testMeans, testSTDs)]
return results
def gen_error_dat_glob(combiResults, statsIter, base_file_name):
......@@ -796,6 +849,7 @@ def publishIterBiclassExampleErrors(iterResults, directory, labelsDictionary,
def publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames,
dataBaseName, directory, statsIter,
minSize=10):
results = []
for metricName, scores in iterMulticlassResults["metricsScores"].items():
trainMeans, trainSTDs = numpy_mean_and_std(scores["trainScores"])
testMeans, testSTDs = numpy_mean_and_std(scores["testScores"])
......@@ -812,6 +866,9 @@ def publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames,
tag=" averaged multiclass",
train_STDs=trainSTDs, test_STDs=testSTDs)
results+=[[classifiersName, metricName,testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiersNames, testMeans, testSTDs)]
return results
def publishIterMulticlassExampleErrors(iterMulticlassResults, directory,
classifiersNames, statsIter, minSize=10):
......@@ -900,13 +957,13 @@ def analyzebiclassIter(biclassResults, metrics, statsIter, directory,
classifiersDict[classifierName], :] += errorOnExample[
"errorOnExamples"]
publishIterBiclassMetricsScores(iterBiclassResults, directory,
results = publishIterBiclassMetricsScores(iterBiclassResults, directory,
labelsDictionary, classifiersDict,
dataBaseName, statsIter)
publishIterBiclassExampleErrors(iterBiclassResults, directory,
labelsDictionary, classifiersDict,
statsIter)
return results
def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics,
dataBaseName, nbExamples):
......@@ -942,10 +999,11 @@ def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics,
logging.debug("Start:\t Getting mean results for multiclass classification")
classifiersNames = np.array(classifiersNames)
publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames,
results = publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames,
dataBaseName, directory, statsIter)
publishIterMulticlassExampleErrors(iterMulticlassResults, directory,
classifiersNames, statsIter)
return results
def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries,
......@@ -954,18 +1012,20 @@ def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries,
nbExamples, nbLabels):
"""Used to analyze the results of the previous benchmarks"""
dataBaseName = benchmarkArgumentDictionaries[0]["args"].name
biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries,
results_means_std, biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries,
statsIter, metrics)
if nbMulticlass > 1:
multiclassResults = analyzeMulticlass(results, statsIter,
results_means_std, multiclassResults = analyzeMulticlass(results, statsIter,
benchmarkArgumentDictionaries,
nbExamples, nbLabels,
multiclassLabels, metrics,
classificationIndices,
directories)
if statsIter > 1:
analyzebiclassIter(biclassResults, metrics, statsIter, directory,
results_means_std = analyzebiclassIter(biclassResults, metrics, statsIter, directory,
labelsDictionary, dataBaseName, nbExamples)
if nbMulticlass > 1:
analyzeIterMulticlass(multiclassResults, directory, statsIter,
results_means_std = analyzeIterMulticlass(multiclassResults, directory, statsIter,
metrics, dataBaseName, nbExamples)
return results_means_std
......@@ -433,11 +433,9 @@ def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name,
dataset_file.copy("Labels", noisy_dataset)
for view_index in range(dataset_file.get("Metadata").attrs["nbView"]):
dataset_file.copy("View" + str(view_index), noisy_dataset)
# dataset_file.close()
for view_index in range(noisy_dataset.get("Metadata").attrs["nbView"]):
view_name = "View" + str(view_index)
view_dset = noisy_dataset.get(view_name)
# orig_shape = view_dset.value.shape
view_limits = dataset_file[
"Metadata/View" + str(view_index) + "_limits"].value
view_ranges = view_limits[:, 1] - view_limits[:, 0]
......@@ -448,16 +446,11 @@ def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name,
view_limits[:, 0], noised_data)
noised_data = np.where(noised_data > view_limits[:, 1],
view_limits[:, 1], noised_data)
# import matplotlib.pyplot as plt
# plt.imshow(noised_data[1,:].reshape((28,28)))
# plt.savefig("plif.png")
# lower_contrast = view_dset.value[1,:].reshape((28,28))/10
# print(np.max(lower_contrast))
# plt.imshow(lower_contrast.astype(int))
# plt.savefig("plif2.png")
# quit()
noisy_dataset[view_name][...] = noised_data
# final_shape = noised_data.shape
original_dataset_filename = dataset_file.filename
dataset_file.close()
if "_temp_" in original_dataset_filename:
os.remove(original_dataset_filename)
return noisy_dataset, dataset_name + "_noised"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment