diff --git a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py index 33e94429dc3ef0eb13f7b01289bc950adf5c1e71..8c73ab9aadb2c8da300010435788e8d82bcc7ef7 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py +++ b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py @@ -21,7 +21,7 @@ from .Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore from .Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore from .utils import GetMultiviewDb as DB from .ResultAnalysis import \ - getResults # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, + getResults, plot_results_noise # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, from .utils import execution, Dataset, Multiclass # Author-Info @@ -569,12 +569,15 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, classificationIndices[0][1]) multiclassGroundTruth = DATASET.get("Labels").value logging.debug("Start:\t Analyzing predictions") - getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, + results_mean_stds =getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassGroundTruth, metrics, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels) logging.debug("Done:\t Analyzing predictions") + filename = DATASET.filename DATASET.close() - return results + if "_temp_" in filename: + os.remove(filename) + return results_mean_stds def execClassif(arguments): @@ -598,6 +601,7 @@ def execClassif(arguments): args.noise_std=[0.0] for name in dataset_list: + noise_results = [] for noise_std in args.noise_std: directory = execution.initLogFile(name, args.views, args.CL_type, @@ -667,9 +671,9 @@ def execClassif(arguments): views, viewsIndices) nbMulticlass = len(labelsCombinations) - results = execBenchmark(nbCores, statsIter, nbMulticlass, - benchmarkArgumentDictionaries, splits, directories, - directory, multiclassLabels, metrics, LABELS_DICTIONARY, - NB_CLASS, DATASET) - print(results) - quit() + results_mean_stds = execBenchmark(nbCores, statsIter, nbMulticlass, + benchmarkArgumentDictionaries, splits, directories, + directory, multiclassLabels, metrics, LABELS_DICTIONARY, + NB_CLASS, DATASET) + noise_results.append([noise_std, results_mean_stds]) + plot_results_noise(directory, noise_results, metrics[0][0], name) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py index 0fbc8b08c60585d9d2ef026e2086dd6b6023d83b..3916a4b2b8c17483acbd08f747d9d45be6fbb0e4 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py @@ -135,7 +135,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): # print(self.classification_matrix) # print(self.weights_, self.break_cause) self.weights_ = np.array(self.weights_) - self.weights_ /= np.sum(self.weights_) + if np.sum(self.weights_) != 1: + self.weights_ /= np.sum(self.weights_) formatted_y[formatted_y == -1] = 0 formatted_y = formatted_y.reshape((m,)) diff --git a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py index 22ba5ec02bf9e92958ca259c6de7d7f070840bf4..df79a60e94154361c95bfb8ab09841ca02fe98bc 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py +++ b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py @@ -5,9 +5,11 @@ import os import time import matplotlib as mpl +from matplotlib.patches import Patch # Import third party modules import matplotlib.pyplot as plt import numpy as np +import pandas as pd # Import own Modules from . import Metrics @@ -17,6 +19,49 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def plot_results_noise(directory, noise_results, metric_to_plot, name, width=0.1): + avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray", + "tab:olive", "tab:red", ] + colors ={} + lengend_patches = [] + noise_levels = np.array([noise_level for noise_level, _ in noise_results]) + df = pd.DataFrame(columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], ) + if len(noise_results)>1: + width = np.min(np.diff(noise_levels)) + for noise_level, noise_result in noise_results: + classifiers_names, meaned_metrics, metric_stds = [], [], [] + for noise_result in noise_result: + classifier_name = noise_result[0].split("-")[0] + if noise_result[1] is metric_to_plot: + classifiers_names.append(classifier_name) + meaned_metrics.append(noise_result[2]) + metric_stds.append(noise_result[3]) + if classifier_name not in colors: + try: + colors[classifier_name] = avail_colors.pop(0) + except IndexError: + colors[classifier_name] = "k" + classifiers_names, meaned_metrics, metric_stds = np.array(classifiers_names), np.array(meaned_metrics), np.array(metric_stds) + sorted_indices = np.argsort(-meaned_metrics) + for index in sorted_indices: + row = pd.DataFrame( + {'noise_level':noise_level, 'classifier_name':classifiers_names[index], 'mean_score':meaned_metrics[index], + 'score_std':metric_stds[index]}, index=[0]) + df = pd.concat([df, row]) + plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index], width=0.5*width, label=classifiers_names[index], color=colors[classifiers_names[index]]) + for classifier_name, color in colors.items(): + lengend_patches.append(Patch(facecolor=color, label=classifier_name)) + plt.legend(handles=lengend_patches, loc='lower center', bbox_to_anchor=(0.5, 1.05), ncol=2) + plt.ylabel(metric_to_plot) + plt.title(name) + plt.xticks(noise_levels) + plt.xlabel("Noise level") + plt.savefig(directory+name+"_noise_analysis.png") + plt.close() + df.to_csv(directory+name+"_noise_analysis.csv") + + + def autolabel(rects, ax, set=1, std=None): r"""Used to print the score below the bars. @@ -194,7 +239,7 @@ def sort_by_test_score(train_scores, test_scores, names, train_STDs=None, return sorted_names, sorted_train_scores, sorted_test_scores, sorted_train_STDs, sorted_test_STDs -def plotMetricScores(trainScores, testScores, names, nbResults, metricName, +def plotMetricScores(trainScores, testScores, names, nbResults, metricName, fileName, tag="", train_STDs=None, test_STDs=None): r"""Used to plot and save the score barplot for a specific metric. @@ -285,6 +330,7 @@ def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames): Returns ------- """ + results=[] for metricName, metricScores in metricsScores.items(): logging.debug( "Start:\t Biclass score graph generation for " + metricName) @@ -303,7 +349,8 @@ def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames): logging.debug( "Done:\t Biclass score graph generation for " + metricName) - + results+=[[classifiersName, metricName, testMean, testSTD] for classifiersName, testMean, testSTD in zip(np.array(metricScores["classifiersNames"]), np.array(metricScores["testScores"]), np.zeros(len(np.array(metricScores["testScores"]))))] + return results def iterCmap(statsIter): r"""Used to generate a colormap that will have a tick for each iteration : the whiter the better. @@ -547,7 +594,7 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): labelsNames = [arguments["LABELS_DICTIONARY"][0], arguments["LABELS_DICTIONARY"][1]] - publishMetricsGraphs(metricsScores, directory, databaseName, + results = publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames) publishExampleErrors(exampleErrors, directory, databaseName, labelsNames) @@ -558,7 +605,7 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): "exampleErrors": exampleErrors} logging.debug("Done:\t Analzing all biclass resuls") - return biclassResults + return results, biclassResults def genMetricsScoresMulticlass(results, trueLabels, metrics, @@ -612,6 +659,7 @@ def getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels): def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, databaseName): + results=[] for iterIndex in range(statsIter): directory = direcories[iterIndex] for metric in metrics: @@ -639,6 +687,8 @@ def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, logging.debug( "Done:\t Multiclass score graph generation for " + metric[0]) + results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiersNames, validationScores, np.zeros(len(validationScores)))] + return results def publishMulticlassExmapleErrors(multiclassResults, directories, @@ -713,12 +763,12 @@ def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassResults = getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels) - publishMulticlassScores(multiclassResults, metrics, statsIter, directories, + results = publishMulticlassScores(multiclassResults, metrics, statsIter, directories, benchmarkArgumentDictionaries[0]["args"].name) publishMulticlassExmapleErrors(multiclassResults, directories, benchmarkArgumentDictionaries[0][ "args"].name) - return multiclassResults + return results, multiclassResults def numpy_mean_and_std(scores_array): @@ -728,6 +778,7 @@ def numpy_mean_and_std(scores_array): def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, classifiersDict, dataBaseName, statsIter, minSize=10): + results=[] for labelsCombination, iterResult in iterResults.items(): currentDirectory = directory + labelsDictionary[ int(labelsCombination[0])] + "-vs-" + labelsDictionary[ @@ -754,6 +805,8 @@ def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, metricName=metricName, fileName=fileName, tag=" averaged", train_STDs=trainSTDs, test_STDs=testSTDs) + results+=[[classifiersName, metricName, testMean, testSTD] for classifiersName, testMean, testSTD in zip(names, testMeans, testSTDs)] + return results def gen_error_dat_glob(combiResults, statsIter, base_file_name): @@ -796,6 +849,7 @@ def publishIterBiclassExampleErrors(iterResults, directory, labelsDictionary, def publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, dataBaseName, directory, statsIter, minSize=10): + results = [] for metricName, scores in iterMulticlassResults["metricsScores"].items(): trainMeans, trainSTDs = numpy_mean_and_std(scores["trainScores"]) testMeans, testSTDs = numpy_mean_and_std(scores["testScores"]) @@ -812,6 +866,9 @@ def publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, tag=" averaged multiclass", train_STDs=trainSTDs, test_STDs=testSTDs) + results+=[[classifiersName, metricName,testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiersNames, testMeans, testSTDs)] + return results + def publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classifiersNames, statsIter, minSize=10): @@ -900,13 +957,13 @@ def analyzebiclassIter(biclassResults, metrics, statsIter, directory, classifiersDict[classifierName], :] += errorOnExample[ "errorOnExamples"] - publishIterBiclassMetricsScores(iterBiclassResults, directory, + results = publishIterBiclassMetricsScores(iterBiclassResults, directory, labelsDictionary, classifiersDict, dataBaseName, statsIter) publishIterBiclassExampleErrors(iterBiclassResults, directory, labelsDictionary, classifiersDict, statsIter) - + return results def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName, nbExamples): @@ -942,10 +999,11 @@ def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, logging.debug("Start:\t Getting mean results for multiclass classification") classifiersNames = np.array(classifiersNames) - publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, + results = publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, dataBaseName, directory, statsIter) publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classifiersNames, statsIter) + return results def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, @@ -954,18 +1012,20 @@ def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, nbExamples, nbLabels): """Used to analyze the results of the previous benchmarks""" dataBaseName = benchmarkArgumentDictionaries[0]["args"].name - biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, + results_means_std, biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) + if nbMulticlass > 1: - multiclassResults = analyzeMulticlass(results, statsIter, + results_means_std, multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics, classificationIndices, directories) if statsIter > 1: - analyzebiclassIter(biclassResults, metrics, statsIter, directory, + results_means_std = analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName, nbExamples) if nbMulticlass > 1: - analyzeIterMulticlass(multiclassResults, directory, statsIter, + results_means_std = analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName, nbExamples) + return results_means_std diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py index c60796db0d810a0197196a7ab69a201f693a298b..52a9c21c4adc9a9094d5362fdb60edbbe2c308b1 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py @@ -433,11 +433,9 @@ def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, dataset_file.copy("Labels", noisy_dataset) for view_index in range(dataset_file.get("Metadata").attrs["nbView"]): dataset_file.copy("View" + str(view_index), noisy_dataset) - # dataset_file.close() for view_index in range(noisy_dataset.get("Metadata").attrs["nbView"]): view_name = "View" + str(view_index) view_dset = noisy_dataset.get(view_name) - # orig_shape = view_dset.value.shape view_limits = dataset_file[ "Metadata/View" + str(view_index) + "_limits"].value view_ranges = view_limits[:, 1] - view_limits[:, 0] @@ -448,16 +446,11 @@ def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, view_limits[:, 0], noised_data) noised_data = np.where(noised_data > view_limits[:, 1], view_limits[:, 1], noised_data) - # import matplotlib.pyplot as plt - # plt.imshow(noised_data[1,:].reshape((28,28))) - # plt.savefig("plif.png") - # lower_contrast = view_dset.value[1,:].reshape((28,28))/10 - # print(np.max(lower_contrast)) - # plt.imshow(lower_contrast.astype(int)) - # plt.savefig("plif2.png") - # quit() noisy_dataset[view_name][...] = noised_data - # final_shape = noised_data.shape + original_dataset_filename = dataset_file.filename + dataset_file.close() + if "_temp_" in original_dataset_filename: + os.remove(original_dataset_filename) return noisy_dataset, dataset_name + "_noised"