diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index bf66ff69b2485ebead77dd58e40cc09f91871b3f..fdbe28178b653a5929bddd13d3aca9ec8b90ea66 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -210,7 +210,7 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples, ------- """ fig, ax = plt.subplots(nrows=1, ncols=1, ) - cmap, norm = iterCmap(stats_iter) + cmap, norm = iter_cmap(stats_iter) cax = plt.imshow(data, cmap=cmap, norm=norm, aspect='auto') plt.title('Errors depending on the classifier') @@ -284,7 +284,7 @@ def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName): plt.close() -def iterCmap(statsIter): +def iter_cmap(statsIter): r"""Used to generate a colormap that will have a tick for each iteration : the whiter the better. Parameters @@ -375,7 +375,7 @@ def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35): return fig_kwargs, bar_width -def get_metrics_scores_biclass(metrics, results): +def get_metrics_scores(metrics, results): r"""Used to extract metrics scores in case of biclass classification Parameters @@ -418,7 +418,7 @@ def get_metrics_scores_biclass(metrics, results): return metrics_scores -def get_example_errors_biclass(groud_truth, results): +def get_example_errors(groud_truth, results): r"""Used to get for each classifier and each example whether the classifier has misclassified the example or not. Parameters @@ -621,44 +621,35 @@ def publish_example_errors(example_errors, directory, databaseName, def plot_durations(durations, directory, database_name, durations_stds=None): file_name = os.path.join(directory, database_name + "-durations") - + durations.to_csv(file_name+"_dataframe.csv") fig = plotly.graph_objs.Figure() if durations_stds is None: - durations_stds = {} - for dur_key, dur_val in durations.items(): - durations_stds[dur_key] = dict((key, 0) - for key, val in durations[dur_key].items()) + durations_stds = pd.DataFrame(0, durations.index, durations.columns) + else: + durations_stds.to_csv(file_name+"_stds_dataframe.csv") fig.add_trace(plotly.graph_objs.Bar(name='Hyper-parameter Optimization', - x=list(durations['hps'].keys()), - y=list(durations['hps'].values()), + x=durations.index, + y=durations['hps'], error_y=dict(type='data', - array=list(durations_stds[ - "hps"].values())), + array=durations_stds["hps"]), marker_color="grey")) fig.add_trace(plotly.graph_objs.Bar(name='Fit (on train set)', - x=list(durations['fit'].keys()), - y=list(durations['fit'].values()), + x=durations.index, + y=durations['fit'], error_y=dict(type='data', - array=list(durations_stds[ - "fit"].values())), + array=durations_stds["fit"]), marker_color="black")) fig.add_trace(plotly.graph_objs.Bar(name='Prediction (on test set)', - x=list(durations['pred'].keys()), - y=list(durations['pred'].values()), + x=durations.index, + y=durations['pred'], error_y=dict(type='data', - array=list(durations_stds[ - "pred"].values())), + array=durations_stds["pred"]), marker_color="lightgrey")) - fig.update_layout(title="Durations for each classfier") + fig.update_layout(title="Durations for each classfier", + yaxis_title="Duration (s)") fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False) - index = durations["hps"].keys() - df = pd.DataFrame(index=index, - columns=["hps", "fit", "pred"],) - for key, value in durations.items(): - df[key] = [value[ind] for ind in index] - df.to_csv(file_name+"_dataframe.csv") def publish_feature_importances(feature_importances, directory, database_name, @@ -755,16 +746,15 @@ def get_feature_importances(result, feature_names=None): def get_duration(results): - durations = {"hps":{}, "fit":{}, "pred":{}} + df = pd.DataFrame(columns=["hps", "fit", "pred"], ) for classifier_result in results: - durations["hps"][ - classifier_result.get_classifier_name()] = classifier_result.hps_duration - durations["fit"][ - classifier_result.get_classifier_name()] = classifier_result.fit_duration - durations["pred"][ - classifier_result.get_classifier_name()] = classifier_result.pred_duration - return durations - + df.at[classifier_result.get_classifier_name(), + "hps"] = classifier_result.hps_duration + df.at[classifier_result.get_classifier_name(), + "fit"] = classifier_result.fit_duration + df.at[classifier_result.get_classifier_name(), + "pred"] = classifier_result.pred_duration + return df def publish_tracebacks(directory, database_name, labels_names, tracebacks, @@ -818,8 +808,8 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, for iter_index, result, tracebacks in results: arguments = get_arguments(benchmark_argument_dictionaries, iter_index) - metrics_scores = get_metrics_scores_biclass(metrics, result) - example_errors = get_example_errors_biclass(labels, result) + metrics_scores = get_metrics_scores(metrics, result) + example_errors = get_example_errors(labels, result) feature_importances = get_feature_importances(result) durations = get_duration(result) directory = arguments["directory"] @@ -850,168 +840,6 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, return res, iter_results, flagged_tracebacks_list -# def gen_metrics_scores_multiclass(results, true_labels, metrics_list, -# arguments_dictionaries): -# """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration""" -# -# logging.debug("Start:\t Getting multiclass scores for each metric") -# -# for metric in metrics_list: -# metric_module = getattr(metrics, metric[0]) -# for iter_index, iter_results in enumerate(results): -# -# for argumentsDictionary in arguments_dictionaries: -# if argumentsDictionary["flag"][0] == iter_index: -# classification_indices = argumentsDictionary[ -# "classification_indices"] -# train_indices, test_indices, multiclass_test_indices = classification_indices -# -# for classifier_name, resultDictionary in iter_results.items(): -# if not "metrics_scores" in resultDictionary: -# results[iter_index][classifier_name]["metrics_scores"] = {} -# train_score = metric_module.score(true_labels[train_indices], -# resultDictionary["labels"][ -# train_indices], -# multiclass=True) -# test_score = metric_module.score( -# true_labels[multiclass_test_indices], -# resultDictionary["labels"][multiclass_test_indices], -# multiclass=True) -# results[iter_index][classifier_name]["metrics_scores"][ -# metric[0]] = [train_score, test_score] -# logging.debug("Done:\t Getting multiclass scores for each metric") -# return results - - -# def get_error_on_labels_multiclass(multiclass_results, multiclass_labels): -# """Used to add all the arrays showing on which example there is an error for each clf and each iteration""" -# -# logging.debug("Start:\t Getting errors on each example for each classifier") -# -# for iter_index, iter_results in enumerate(multiclass_results): -# for classifier_name, classifier_results in iter_results.items(): -# error_on_examples = classifier_results["labels"] == multiclass_labels -# multiclass_results[iter_index][classifier_name][ -# "error_on_examples"] = error_on_examples.astype(int) -# -# logging.debug("Done:\t Getting errors on each example for each classifier") -# -# return multiclass_results - - -# def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories, -# databaseName): -# results=[] -# for iter_index in range(stats_iter): -# directory = direcories[iter_index] -# for metric in metrics: -# logging.debug( -# "Start:\t Multiclass score graph generation for " + metric[0]) -# classifiers_names = np.array([classifier_name for classifier_name in -# multiclass_results[iter_index].keys()]) -# train_scores = np.array([multiclass_results[iter_index][ -# classifier_name]["metrics_scores"][ -# metric[0]][0] -# for classifier_name in classifiers_names]) -# validationScores = np.array([multiclass_results[iter_index][ -# classifier_name]["metrics_scores"][ -# metric[0]][1] -# for classifier_name in -# classifiers_names]) -# -# nbResults = classifiers_names.shape[0] -# fileName = os.path.join(directory , time.strftime( -# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[ -# 0]) -# -# plot_metric_scores(train_scores, validationScores, classifiers_names, -# nbResults, metric[0], fileName, tag=" multiclass") -# -# logging.debug( -# "Done:\t Multiclass score graph generation for " + metric[0]) -# results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiers_names, validationScores, np.zeros(len(validationScores)))] -# return results - - -# def publishMulticlassExmapleErrors(multiclass_results, directories, -# databaseName, example_ids, multiclass_labels): -# for iter_index, multiclass_result in enumerate(multiclass_results): -# directory = directories[iter_index] -# logging.debug("Start:\t Multiclass Label analysis figure generation") -# -# base_file_name = os.path.join(directory, time.strftime( -# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-") -# nb_classifiers, nb_examples, classifiers_names, data, error_on_examples = gen_error_data( -# dict((key, multiclass_result[key]['error_on_examples']) -# for key in multiclass_result.keys()),) -# plot_2d(data, classifiers_names, nb_classifiers, nb_examples, -# base_file_name, example_ids=example_ids, labels=multiclass_labels) -# -# plot_errors_bar(error_on_examples, nb_classifiers, nb_examples, -# base_file_name) -# -# logging.debug("Done:\t Multiclass Label analysis figure generation") - -# -# def analyzeMulticlass(results, stats_iter, benchmark_argument_dictionaries, -# nb_examples, nb_labels, multiclass_labels, -# metrics, classification_indices, directories, example_ids): -# """Used to transform one versus one results in multiclass results and to publish it""" -# multiclass_results = [{} for _ in range(stats_iter)] -# -# for flag, result, tracebacks in results: -# iter_index = flag[0] -# classifierPositive = flag[1][0] -# classifierNegative = flag[1][1] -# -# for benchmarkArgumentDictionary in benchmark_argument_dictionaries: -# if benchmarkArgumentDictionary["flag"] == flag: -# trainIndices, testIndices, testMulticlassIndices = \ -# benchmarkArgumentDictionary["classification_indices"] -# -# for classifierResult in result: -# classifier_name = classifierResult.get_classifier_name() -# if classifier_name not in multiclass_results[iter_index]: -# multiclass_results[iter_index][classifier_name] = np.zeros( -# (nb_examples, nb_labels), dtype=int) -# for exampleIndex in trainIndices: -# label = classifierResult.full_labels_pred[exampleIndex] -# if label == 1: -# multiclass_results[iter_index][classifier_name][ -# exampleIndex, classifierPositive] += 1 -# else: -# multiclass_results[iter_index][classifier_name][ -# exampleIndex, classifierNegative] += 1 -# for multiclassIndex, exampleIndex in enumerate( -# testMulticlassIndices): -# label = classifierResult.y_test_multiclass_pred[multiclassIndex] -# if label == 1: -# multiclass_results[iter_index][classifier_name][ -# exampleIndex, classifierPositive] += 1 -# else: -# multiclass_results[iter_index][classifier_name][ -# exampleIndex, classifierNegative] += 1 -# -# for iter_index, multiclassiterResult in enumerate(multiclass_results): -# for key, value in multiclassiterResult.items(): -# multiclass_results[iter_index][key] = { -# "labels": np.argmax(value, axis=1)} -# -# multiclass_results = gen_metrics_scores_multiclass(multiclass_results, -# multiclass_labels, metrics, -# benchmark_argument_dictionaries) -# multiclass_results = get_error_on_labels_multiclass(multiclass_results, -# multiclass_labels) -# -# results = publishMulticlassScores(multiclass_results, metrics, stats_iter, directories, -# benchmark_argument_dictionaries[0]["args"]["name"]) -# publishMulticlassExmapleErrors(multiclass_results, directories, -# benchmark_argument_dictionaries[0][ -# "args"]["name"], example_ids, multiclass_labels) -# -# return results, multiclass_results - - def numpy_mean_and_std(scores_array): return np.mean(scores_array, axis=1), np.std(scores_array, axis=1) @@ -1080,47 +908,6 @@ def publish_all_example_errors(iter_results, directory, "Done:\t Global biclass label analysis figures generation") -# def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_names, -# data_base_name, directory, stats_iter, -# min_size=10): -# results = [] -# for metric_name, scores in iter_multiclass_results["metrics_scores"].items(): -# trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"]) -# testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"]) -# -# nb_results = classifiers_names.shape[0] -# -# file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str( -# stats_iter) + "_iter-" + metric_name + ".png") -# -# plot_metric_scores(trainMeans, testMeans, classifiers_names, nb_results, -# metric_name, file_name, tag=" averaged multiclass", -# train_STDs=trainSTDs, test_STDs=testSTDs) -# -# results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)] -# return results - - -# def publish_iter_multiclass_example_errors(iter_multiclass_results, directory, -# classifiers_names, stats_iter, example_ids, multiclass_labels, min_size=10): -# logging.debug( -# "Start:\t Global multiclass label analysis figures generation") -# nb_examples, nb_classifiers, data, error_on_examples, classifiers_names = gen_error_data_glob( -# dict((clf_name, combi_res) -# for clf_name, combi_res -# in zip(classifiers_names, -# iter_multiclass_results["error_on_examples"])), -# stats_iter) -# -# plot_2d(data, classifiers_names, nb_classifiers, nb_examples, -# directory, stats_iter=stats_iter, -# example_ids=example_ids, labels=multiclass_labels) -# -# plot_errors_bar(error_on_examples, nb_classifiers * stats_iter, nb_examples, -# directory) -# -# logging.debug("Done:\t Global multiclass label analysis figures generation") - def gen_classifiers_dict(results, metrics): classifiers_dict = dict((classifier_name, classifierIndex) @@ -1158,14 +945,14 @@ def add_new_metric(iter_biclass_results, metric, labels_combination, return iter_biclass_results -def format_previous_results(biclass_results): +def format_previous_results(iter_results_lists): """ Formats each statistical iteration's result into a mean/std analysis for the metrics and adds the errors of each statistical iteration. Parameters ---------- - biclass_results : The raw results, for each statistical iteration i contains + iter_results_lists : The raw results, for each statistical iteration i contains - biclass_results[i]["metrics_scores"] is a dictionary with a pd.dataframe for each metrics - biclass_results[i]["example_errors"], a dicaitonary with a np.array @@ -1187,7 +974,7 @@ def format_previous_results(biclass_results): metric_concat_dict = {} for iter_index, metrics_score in enumerate( - biclass_results["metrics_scores"]): + iter_results_lists["metrics_scores"]): for metric_name, dataframe in metrics_score.items(): if metric_name not in metric_concat_dict: metric_concat_dict[metric_name] = dataframe @@ -1202,9 +989,18 @@ def format_previous_results(biclass_results): metrics_analysis[metric_name][ "std"] = dataframe.groupby(dataframe.index).std(ddof=0) + durations_df_concat = pd.DataFrame(dtype=float) + for iter_index, durations_df in enumerate(iter_results_lists["durations"]): + durations_df_concat = pd.concat((durations_df_concat, durations_df), + axis=1) + durations_df_concat = durations_df_concat.astype(float) + grouped_df = durations_df_concat.groupby(durations_df_concat.columns, axis=1) + duration_means = grouped_df.mean() + duration_stds = grouped_df.std() + importance_concat_dict = {} for iter_index, view_feature_importances in enumerate( - biclass_results["feature_importances"]): + iter_results_lists["feature_importances"]): for view_name, feature_importances in view_feature_importances.items(): if view_name not in importance_concat_dict: importance_concat_dict[view_name] = feature_importances @@ -1220,7 +1016,7 @@ def format_previous_results(biclass_results): dataframe.index).std(ddof=0) added_example_errors = {} - for example_errors in biclass_results["example_errors"]: + for example_errors in iter_results_lists["example_errors"]: for classifier_name, errors in example_errors.items(): if classifier_name not in added_example_errors: added_example_errors[classifier_name] = errors @@ -1228,7 +1024,7 @@ def format_previous_results(biclass_results): added_example_errors[classifier_name] += errors error_analysis = added_example_errors return metrics_analysis, error_analysis, feature_importances_analysis, feature_importances_stds, \ - biclass_results["labels"] + iter_results_lists["labels"], duration_means, duration_stds def analyze_all(biclass_results, stats_iter, directory, data_base_name, @@ -1236,7 +1032,8 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name, """Used to format the results in order to plot the mean results on the iterations""" metrics_analysis, error_analysis, \ feature_importances, feature_importances_stds, \ - labels = format_previous_results(biclass_results) + labels, duration_means, \ + duration_stds = format_previous_results(biclass_results) results = publish_all_metrics_scores(metrics_analysis, directory, @@ -1245,9 +1042,242 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name, example_ids, labels) publish_feature_importances(feature_importances, directory, data_base_name, feature_importances_stds) + plot_durations(duration_means, directory, data_base_name, duration_stds) return results +def save_failed(failed_list, directory): + with open(os.path.join(directory, "failed_algorithms.txt"), + "w") as failed_file: + failed_file.write( + "The following algorithms sent an error, the tracebacks are stored in the coressponding directory :\n") + failed_file.write(", \n".join(failed_list) + ".") + + +def get_results(results, stats_iter, benchmark_argument_dictionaries, + metrics, directory, example_ids, labels): + """Used to analyze the results of the previous benchmarks""" + data_base_name = benchmark_argument_dictionaries[0]["args"]["name"] + + results_means_std, biclass_results, flagged_failed = analyze_iterations( + results, benchmark_argument_dictionaries, + stats_iter, metrics, example_ids, labels) + if flagged_failed: + save_failed(flagged_failed, directory) + + if stats_iter > 1: + results_means_std = analyze_all( + biclass_results, stats_iter, directory, + data_base_name, example_ids) + return results_means_std + + + + +# def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_names, +# data_base_name, directory, stats_iter, +# min_size=10): +# results = [] +# for metric_name, scores in iter_multiclass_results["metrics_scores"].items(): +# trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"]) +# testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"]) +# +# nb_results = classifiers_names.shape[0] +# +# file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str( +# stats_iter) + "_iter-" + metric_name + ".png") +# +# plot_metric_scores(trainMeans, testMeans, classifiers_names, nb_results, +# metric_name, file_name, tag=" averaged multiclass", +# train_STDs=trainSTDs, test_STDs=testSTDs) +# +# results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)] +# return results + + +# def publish_iter_multiclass_example_errors(iter_multiclass_results, directory, +# classifiers_names, stats_iter, example_ids, multiclass_labels, min_size=10): +# logging.debug( +# "Start:\t Global multiclass label analysis figures generation") +# nb_examples, nb_classifiers, data, error_on_examples, classifiers_names = gen_error_data_glob( +# dict((clf_name, combi_res) +# for clf_name, combi_res +# in zip(classifiers_names, +# iter_multiclass_results["error_on_examples"])), +# stats_iter) +# +# plot_2d(data, classifiers_names, nb_classifiers, nb_examples, +# directory, stats_iter=stats_iter, +# example_ids=example_ids, labels=multiclass_labels) +# +# plot_errors_bar(error_on_examples, nb_classifiers * stats_iter, nb_examples, +# directory) +# +# logging.debug("Done:\t Global multiclass label analysis figures generation") + + +# def gen_metrics_scores_multiclass(results, true_labels, metrics_list, +# arguments_dictionaries): +# """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration""" +# +# logging.debug("Start:\t Getting multiclass scores for each metric") +# +# for metric in metrics_list: +# metric_module = getattr(metrics, metric[0]) +# for iter_index, iter_results in enumerate(results): +# +# for argumentsDictionary in arguments_dictionaries: +# if argumentsDictionary["flag"][0] == iter_index: +# classification_indices = argumentsDictionary[ +# "classification_indices"] +# train_indices, test_indices, multiclass_test_indices = classification_indices +# +# for classifier_name, resultDictionary in iter_results.items(): +# if not "metrics_scores" in resultDictionary: +# results[iter_index][classifier_name]["metrics_scores"] = {} +# train_score = metric_module.score(true_labels[train_indices], +# resultDictionary["labels"][ +# train_indices], +# multiclass=True) +# test_score = metric_module.score( +# true_labels[multiclass_test_indices], +# resultDictionary["labels"][multiclass_test_indices], +# multiclass=True) +# results[iter_index][classifier_name]["metrics_scores"][ +# metric[0]] = [train_score, test_score] +# logging.debug("Done:\t Getting multiclass scores for each metric") +# return results + + +# def get_error_on_labels_multiclass(multiclass_results, multiclass_labels): +# """Used to add all the arrays showing on which example there is an error for each clf and each iteration""" +# +# logging.debug("Start:\t Getting errors on each example for each classifier") +# +# for iter_index, iter_results in enumerate(multiclass_results): +# for classifier_name, classifier_results in iter_results.items(): +# error_on_examples = classifier_results["labels"] == multiclass_labels +# multiclass_results[iter_index][classifier_name][ +# "error_on_examples"] = error_on_examples.astype(int) +# +# logging.debug("Done:\t Getting errors on each example for each classifier") +# +# return multiclass_results + + +# def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories, +# databaseName): +# results=[] +# for iter_index in range(stats_iter): +# directory = direcories[iter_index] +# for metric in metrics: +# logging.debug( +# "Start:\t Multiclass score graph generation for " + metric[0]) +# classifiers_names = np.array([classifier_name for classifier_name in +# multiclass_results[iter_index].keys()]) +# train_scores = np.array([multiclass_results[iter_index][ +# classifier_name]["metrics_scores"][ +# metric[0]][0] +# for classifier_name in classifiers_names]) +# validationScores = np.array([multiclass_results[iter_index][ +# classifier_name]["metrics_scores"][ +# metric[0]][1] +# for classifier_name in +# classifiers_names]) +# +# nbResults = classifiers_names.shape[0] +# fileName = os.path.join(directory , time.strftime( +# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[ +# 0]) +# +# plot_metric_scores(train_scores, validationScores, classifiers_names, +# nbResults, metric[0], fileName, tag=" multiclass") +# +# logging.debug( +# "Done:\t Multiclass score graph generation for " + metric[0]) +# results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiers_names, validationScores, np.zeros(len(validationScores)))] +# return results + + +# def publishMulticlassExmapleErrors(multiclass_results, directories, +# databaseName, example_ids, multiclass_labels): +# for iter_index, multiclass_result in enumerate(multiclass_results): +# directory = directories[iter_index] +# logging.debug("Start:\t Multiclass Label analysis figure generation") +# +# base_file_name = os.path.join(directory, time.strftime( +# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-") +# nb_classifiers, nb_examples, classifiers_names, data, error_on_examples = gen_error_data( +# dict((key, multiclass_result[key]['error_on_examples']) +# for key in multiclass_result.keys()),) +# plot_2d(data, classifiers_names, nb_classifiers, nb_examples, +# base_file_name, example_ids=example_ids, labels=multiclass_labels) +# +# plot_errors_bar(error_on_examples, nb_classifiers, nb_examples, +# base_file_name) +# +# logging.debug("Done:\t Multiclass Label analysis figure generation") + +# +# def analyzeMulticlass(results, stats_iter, benchmark_argument_dictionaries, +# nb_examples, nb_labels, multiclass_labels, +# metrics, classification_indices, directories, example_ids): +# """Used to transform one versus one results in multiclass results and to publish it""" +# multiclass_results = [{} for _ in range(stats_iter)] +# +# for flag, result, tracebacks in results: +# iter_index = flag[0] +# classifierPositive = flag[1][0] +# classifierNegative = flag[1][1] +# +# for benchmarkArgumentDictionary in benchmark_argument_dictionaries: +# if benchmarkArgumentDictionary["flag"] == flag: +# trainIndices, testIndices, testMulticlassIndices = \ +# benchmarkArgumentDictionary["classification_indices"] +# +# for classifierResult in result: +# classifier_name = classifierResult.get_classifier_name() +# if classifier_name not in multiclass_results[iter_index]: +# multiclass_results[iter_index][classifier_name] = np.zeros( +# (nb_examples, nb_labels), dtype=int) +# for exampleIndex in trainIndices: +# label = classifierResult.full_labels_pred[exampleIndex] +# if label == 1: +# multiclass_results[iter_index][classifier_name][ +# exampleIndex, classifierPositive] += 1 +# else: +# multiclass_results[iter_index][classifier_name][ +# exampleIndex, classifierNegative] += 1 +# for multiclassIndex, exampleIndex in enumerate( +# testMulticlassIndices): +# label = classifierResult.y_test_multiclass_pred[multiclassIndex] +# if label == 1: +# multiclass_results[iter_index][classifier_name][ +# exampleIndex, classifierPositive] += 1 +# else: +# multiclass_results[iter_index][classifier_name][ +# exampleIndex, classifierNegative] += 1 +# +# for iter_index, multiclassiterResult in enumerate(multiclass_results): +# for key, value in multiclassiterResult.items(): +# multiclass_results[iter_index][key] = { +# "labels": np.argmax(value, axis=1)} +# +# multiclass_results = gen_metrics_scores_multiclass(multiclass_results, +# multiclass_labels, metrics, +# benchmark_argument_dictionaries) +# multiclass_results = get_error_on_labels_multiclass(multiclass_results, +# multiclass_labels) +# +# results = publishMulticlassScores(multiclass_results, metrics, stats_iter, directories, +# benchmark_argument_dictionaries[0]["args"]["name"]) +# publishMulticlassExmapleErrors(multiclass_results, directories, +# benchmark_argument_dictionaries[0][ +# "args"]["name"], example_ids, multiclass_labels) +# +# return results, multiclass_results + + # def analyze_iter_multiclass(multiclass_results, directory, stats_iter, metrics, # data_base_name, nb_examples, example_ids, multiclass_labels): # """Used to mean the multiclass results on the iterations executed with different random states""" @@ -1287,30 +1317,4 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name, # data_base_name, directory, stats_iter) # publish_iter_multiclass_example_errors(iter_multiclass_results, directory, # classifiers_names, stats_iter, example_ids, multiclass_labels) -# return results - - -def save_failed(failed_list, directory): - with open(os.path.join(directory, "failed_algorithms.txt"), - "w") as failed_file: - failed_file.write( - "The following algorithms sent an error, the tracebacks are stored in the coressponding directory :\n") - failed_file.write(", \n".join(failed_list) + ".") - - -def get_results(results, stats_iter, benchmark_argument_dictionaries, - metrics, directory, example_ids, labels): - """Used to analyze the results of the previous benchmarks""" - data_base_name = benchmark_argument_dictionaries[0]["args"]["name"] - - results_means_std, biclass_results, flagged_failed = analyze_iterations( - results, benchmark_argument_dictionaries, - stats_iter, metrics, example_ids, labels) - if flagged_failed: - save_failed(flagged_failed, directory) - - if stats_iter > 1: - results_means_std = analyze_all( - biclass_results, stats_iter, directory, - data_base_name, example_ids) - return results_means_std +# return results \ No newline at end of file diff --git a/multiview_platform/tests/test_ResultAnalysis.py b/multiview_platform/tests/test_ResultAnalysis.py index 18ad42de3f9a4ae8b2e1f8b4c27239564edc9a18..413c8d52260bc22d89119b4750bdb50c3ca25413 100644 --- a/multiview_platform/tests/test_ResultAnalysis.py +++ b/multiview_platform/tests/test_ResultAnalysis.py @@ -30,8 +30,8 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): {"accuracy_score":[0.9, 0.95], "f1_score":[0.91, 0.96]} , "", "", "", "", "",)] - metrics_scores = result_analysis.get_metrics_scores_biclass(metrics, - results) + metrics_scores = result_analysis.get_metrics_scores(metrics, + results) self.assertIsInstance(metrics_scores, dict) self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame) np.testing.assert_array_equal(np.array(metrics_scores["accuracy_score"].loc["train"]), np.array([0.9])) @@ -70,8 +70,8 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): classifier="", n_features="") ] - metrics_scores = result_analysis.get_metrics_scores_biclass(metrics, - results) + metrics_scores = result_analysis.get_metrics_scores(metrics, + results) self.assertIsInstance(metrics_scores, dict) self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame) np.testing.assert_array_equal( @@ -105,8 +105,8 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): classifier="", n_features="") ] - metrics_scores = result_analysis.get_metrics_scores_biclass(metrics, - results) + metrics_scores = result_analysis.get_metrics_scores(metrics, + results) self.assertIsInstance(metrics_scores, dict) self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame) np.testing.assert_array_equal( @@ -141,8 +141,8 @@ class Test_get_example_errors_biclass(unittest.TestCase): , np.array([0,0,1,1,0,0,1,1,0]), "", "", "", "",) ] - example_errors = result_analysis.get_example_errors_biclass(ground_truth, - results) + example_errors = result_analysis.get_example_errors(ground_truth, + results) self.assertIsInstance(example_errors, dict) np.testing.assert_array_equal(example_errors["mv"], np.array([1,0,1,0,0,1,0,1,-100]))