Skip to content
Snippets Groups Projects
Commit f98fd248 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added duration analysis"

parent cda7ba8b
No related branches found
No related tags found
No related merge requests found
......@@ -210,7 +210,7 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
-------
"""
fig, ax = plt.subplots(nrows=1, ncols=1, )
cmap, norm = iterCmap(stats_iter)
cmap, norm = iter_cmap(stats_iter)
cax = plt.imshow(data, cmap=cmap, norm=norm,
aspect='auto')
plt.title('Errors depending on the classifier')
......@@ -284,7 +284,7 @@ def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName):
plt.close()
def iterCmap(statsIter):
def iter_cmap(statsIter):
r"""Used to generate a colormap that will have a tick for each iteration : the whiter the better.
Parameters
......@@ -375,7 +375,7 @@ def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35):
return fig_kwargs, bar_width
def get_metrics_scores_biclass(metrics, results):
def get_metrics_scores(metrics, results):
r"""Used to extract metrics scores in case of biclass classification
Parameters
......@@ -418,7 +418,7 @@ def get_metrics_scores_biclass(metrics, results):
return metrics_scores
def get_example_errors_biclass(groud_truth, results):
def get_example_errors(groud_truth, results):
r"""Used to get for each classifier and each example whether the classifier has misclassified the example or not.
Parameters
......@@ -621,44 +621,35 @@ def publish_example_errors(example_errors, directory, databaseName,
def plot_durations(durations, directory, database_name, durations_stds=None):
file_name = os.path.join(directory, database_name + "-durations")
durations.to_csv(file_name+"_dataframe.csv")
fig = plotly.graph_objs.Figure()
if durations_stds is None:
durations_stds = {}
for dur_key, dur_val in durations.items():
durations_stds[dur_key] = dict((key, 0)
for key, val in durations[dur_key].items())
durations_stds = pd.DataFrame(0, durations.index, durations.columns)
else:
durations_stds.to_csv(file_name+"_stds_dataframe.csv")
fig.add_trace(plotly.graph_objs.Bar(name='Hyper-parameter Optimization',
x=list(durations['hps'].keys()),
y=list(durations['hps'].values()),
x=durations.index,
y=durations['hps'],
error_y=dict(type='data',
array=list(durations_stds[
"hps"].values())),
array=durations_stds["hps"]),
marker_color="grey"))
fig.add_trace(plotly.graph_objs.Bar(name='Fit (on train set)',
x=list(durations['fit'].keys()),
y=list(durations['fit'].values()),
x=durations.index,
y=durations['fit'],
error_y=dict(type='data',
array=list(durations_stds[
"fit"].values())),
array=durations_stds["fit"]),
marker_color="black"))
fig.add_trace(plotly.graph_objs.Bar(name='Prediction (on test set)',
x=list(durations['pred'].keys()),
y=list(durations['pred'].values()),
x=durations.index,
y=durations['pred'],
error_y=dict(type='data',
array=list(durations_stds[
"pred"].values())),
array=durations_stds["pred"]),
marker_color="lightgrey"))
fig.update_layout(title="Durations for each classfier")
fig.update_layout(title="Durations for each classfier",
yaxis_title="Duration (s)")
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)')
plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
index = durations["hps"].keys()
df = pd.DataFrame(index=index,
columns=["hps", "fit", "pred"],)
for key, value in durations.items():
df[key] = [value[ind] for ind in index]
df.to_csv(file_name+"_dataframe.csv")
def publish_feature_importances(feature_importances, directory, database_name,
......@@ -755,16 +746,15 @@ def get_feature_importances(result, feature_names=None):
def get_duration(results):
durations = {"hps":{}, "fit":{}, "pred":{}}
df = pd.DataFrame(columns=["hps", "fit", "pred"], )
for classifier_result in results:
durations["hps"][
classifier_result.get_classifier_name()] = classifier_result.hps_duration
durations["fit"][
classifier_result.get_classifier_name()] = classifier_result.fit_duration
durations["pred"][
classifier_result.get_classifier_name()] = classifier_result.pred_duration
return durations
df.at[classifier_result.get_classifier_name(),
"hps"] = classifier_result.hps_duration
df.at[classifier_result.get_classifier_name(),
"fit"] = classifier_result.fit_duration
df.at[classifier_result.get_classifier_name(),
"pred"] = classifier_result.pred_duration
return df
def publish_tracebacks(directory, database_name, labels_names, tracebacks,
......@@ -818,8 +808,8 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
for iter_index, result, tracebacks in results:
arguments = get_arguments(benchmark_argument_dictionaries, iter_index)
metrics_scores = get_metrics_scores_biclass(metrics, result)
example_errors = get_example_errors_biclass(labels, result)
metrics_scores = get_metrics_scores(metrics, result)
example_errors = get_example_errors(labels, result)
feature_importances = get_feature_importances(result)
durations = get_duration(result)
directory = arguments["directory"]
......@@ -850,168 +840,6 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
return res, iter_results, flagged_tracebacks_list
# def gen_metrics_scores_multiclass(results, true_labels, metrics_list,
# arguments_dictionaries):
# """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration"""
#
# logging.debug("Start:\t Getting multiclass scores for each metric")
#
# for metric in metrics_list:
# metric_module = getattr(metrics, metric[0])
# for iter_index, iter_results in enumerate(results):
#
# for argumentsDictionary in arguments_dictionaries:
# if argumentsDictionary["flag"][0] == iter_index:
# classification_indices = argumentsDictionary[
# "classification_indices"]
# train_indices, test_indices, multiclass_test_indices = classification_indices
#
# for classifier_name, resultDictionary in iter_results.items():
# if not "metrics_scores" in resultDictionary:
# results[iter_index][classifier_name]["metrics_scores"] = {}
# train_score = metric_module.score(true_labels[train_indices],
# resultDictionary["labels"][
# train_indices],
# multiclass=True)
# test_score = metric_module.score(
# true_labels[multiclass_test_indices],
# resultDictionary["labels"][multiclass_test_indices],
# multiclass=True)
# results[iter_index][classifier_name]["metrics_scores"][
# metric[0]] = [train_score, test_score]
# logging.debug("Done:\t Getting multiclass scores for each metric")
# return results
# def get_error_on_labels_multiclass(multiclass_results, multiclass_labels):
# """Used to add all the arrays showing on which example there is an error for each clf and each iteration"""
#
# logging.debug("Start:\t Getting errors on each example for each classifier")
#
# for iter_index, iter_results in enumerate(multiclass_results):
# for classifier_name, classifier_results in iter_results.items():
# error_on_examples = classifier_results["labels"] == multiclass_labels
# multiclass_results[iter_index][classifier_name][
# "error_on_examples"] = error_on_examples.astype(int)
#
# logging.debug("Done:\t Getting errors on each example for each classifier")
#
# return multiclass_results
# def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories,
# databaseName):
# results=[]
# for iter_index in range(stats_iter):
# directory = direcories[iter_index]
# for metric in metrics:
# logging.debug(
# "Start:\t Multiclass score graph generation for " + metric[0])
# classifiers_names = np.array([classifier_name for classifier_name in
# multiclass_results[iter_index].keys()])
# train_scores = np.array([multiclass_results[iter_index][
# classifier_name]["metrics_scores"][
# metric[0]][0]
# for classifier_name in classifiers_names])
# validationScores = np.array([multiclass_results[iter_index][
# classifier_name]["metrics_scores"][
# metric[0]][1]
# for classifier_name in
# classifiers_names])
#
# nbResults = classifiers_names.shape[0]
# fileName = os.path.join(directory , time.strftime(
# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[
# 0])
#
# plot_metric_scores(train_scores, validationScores, classifiers_names,
# nbResults, metric[0], fileName, tag=" multiclass")
#
# logging.debug(
# "Done:\t Multiclass score graph generation for " + metric[0])
# results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiers_names, validationScores, np.zeros(len(validationScores)))]
# return results
# def publishMulticlassExmapleErrors(multiclass_results, directories,
# databaseName, example_ids, multiclass_labels):
# for iter_index, multiclass_result in enumerate(multiclass_results):
# directory = directories[iter_index]
# logging.debug("Start:\t Multiclass Label analysis figure generation")
#
# base_file_name = os.path.join(directory, time.strftime(
# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-")
# nb_classifiers, nb_examples, classifiers_names, data, error_on_examples = gen_error_data(
# dict((key, multiclass_result[key]['error_on_examples'])
# for key in multiclass_result.keys()),)
# plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
# base_file_name, example_ids=example_ids, labels=multiclass_labels)
#
# plot_errors_bar(error_on_examples, nb_classifiers, nb_examples,
# base_file_name)
#
# logging.debug("Done:\t Multiclass Label analysis figure generation")
#
# def analyzeMulticlass(results, stats_iter, benchmark_argument_dictionaries,
# nb_examples, nb_labels, multiclass_labels,
# metrics, classification_indices, directories, example_ids):
# """Used to transform one versus one results in multiclass results and to publish it"""
# multiclass_results = [{} for _ in range(stats_iter)]
#
# for flag, result, tracebacks in results:
# iter_index = flag[0]
# classifierPositive = flag[1][0]
# classifierNegative = flag[1][1]
#
# for benchmarkArgumentDictionary in benchmark_argument_dictionaries:
# if benchmarkArgumentDictionary["flag"] == flag:
# trainIndices, testIndices, testMulticlassIndices = \
# benchmarkArgumentDictionary["classification_indices"]
#
# for classifierResult in result:
# classifier_name = classifierResult.get_classifier_name()
# if classifier_name not in multiclass_results[iter_index]:
# multiclass_results[iter_index][classifier_name] = np.zeros(
# (nb_examples, nb_labels), dtype=int)
# for exampleIndex in trainIndices:
# label = classifierResult.full_labels_pred[exampleIndex]
# if label == 1:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierPositive] += 1
# else:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierNegative] += 1
# for multiclassIndex, exampleIndex in enumerate(
# testMulticlassIndices):
# label = classifierResult.y_test_multiclass_pred[multiclassIndex]
# if label == 1:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierPositive] += 1
# else:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierNegative] += 1
#
# for iter_index, multiclassiterResult in enumerate(multiclass_results):
# for key, value in multiclassiterResult.items():
# multiclass_results[iter_index][key] = {
# "labels": np.argmax(value, axis=1)}
#
# multiclass_results = gen_metrics_scores_multiclass(multiclass_results,
# multiclass_labels, metrics,
# benchmark_argument_dictionaries)
# multiclass_results = get_error_on_labels_multiclass(multiclass_results,
# multiclass_labels)
#
# results = publishMulticlassScores(multiclass_results, metrics, stats_iter, directories,
# benchmark_argument_dictionaries[0]["args"]["name"])
# publishMulticlassExmapleErrors(multiclass_results, directories,
# benchmark_argument_dictionaries[0][
# "args"]["name"], example_ids, multiclass_labels)
#
# return results, multiclass_results
def numpy_mean_and_std(scores_array):
return np.mean(scores_array, axis=1), np.std(scores_array, axis=1)
......@@ -1080,47 +908,6 @@ def publish_all_example_errors(iter_results, directory,
"Done:\t Global biclass label analysis figures generation")
# def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_names,
# data_base_name, directory, stats_iter,
# min_size=10):
# results = []
# for metric_name, scores in iter_multiclass_results["metrics_scores"].items():
# trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"])
# testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"])
#
# nb_results = classifiers_names.shape[0]
#
# file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str(
# stats_iter) + "_iter-" + metric_name + ".png")
#
# plot_metric_scores(trainMeans, testMeans, classifiers_names, nb_results,
# metric_name, file_name, tag=" averaged multiclass",
# train_STDs=trainSTDs, test_STDs=testSTDs)
#
# results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)]
# return results
# def publish_iter_multiclass_example_errors(iter_multiclass_results, directory,
# classifiers_names, stats_iter, example_ids, multiclass_labels, min_size=10):
# logging.debug(
# "Start:\t Global multiclass label analysis figures generation")
# nb_examples, nb_classifiers, data, error_on_examples, classifiers_names = gen_error_data_glob(
# dict((clf_name, combi_res)
# for clf_name, combi_res
# in zip(classifiers_names,
# iter_multiclass_results["error_on_examples"])),
# stats_iter)
#
# plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
# directory, stats_iter=stats_iter,
# example_ids=example_ids, labels=multiclass_labels)
#
# plot_errors_bar(error_on_examples, nb_classifiers * stats_iter, nb_examples,
# directory)
#
# logging.debug("Done:\t Global multiclass label analysis figures generation")
def gen_classifiers_dict(results, metrics):
classifiers_dict = dict((classifier_name, classifierIndex)
......@@ -1158,14 +945,14 @@ def add_new_metric(iter_biclass_results, metric, labels_combination,
return iter_biclass_results
def format_previous_results(biclass_results):
def format_previous_results(iter_results_lists):
"""
Formats each statistical iteration's result into a mean/std analysis for
the metrics and adds the errors of each statistical iteration.
Parameters
----------
biclass_results : The raw results, for each statistical iteration i contains
iter_results_lists : The raw results, for each statistical iteration i contains
- biclass_results[i]["metrics_scores"] is a dictionary with a pd.dataframe
for each metrics
- biclass_results[i]["example_errors"], a dicaitonary with a np.array
......@@ -1187,7 +974,7 @@ def format_previous_results(biclass_results):
metric_concat_dict = {}
for iter_index, metrics_score in enumerate(
biclass_results["metrics_scores"]):
iter_results_lists["metrics_scores"]):
for metric_name, dataframe in metrics_score.items():
if metric_name not in metric_concat_dict:
metric_concat_dict[metric_name] = dataframe
......@@ -1202,9 +989,18 @@ def format_previous_results(biclass_results):
metrics_analysis[metric_name][
"std"] = dataframe.groupby(dataframe.index).std(ddof=0)
durations_df_concat = pd.DataFrame(dtype=float)
for iter_index, durations_df in enumerate(iter_results_lists["durations"]):
durations_df_concat = pd.concat((durations_df_concat, durations_df),
axis=1)
durations_df_concat = durations_df_concat.astype(float)
grouped_df = durations_df_concat.groupby(durations_df_concat.columns, axis=1)
duration_means = grouped_df.mean()
duration_stds = grouped_df.std()
importance_concat_dict = {}
for iter_index, view_feature_importances in enumerate(
biclass_results["feature_importances"]):
iter_results_lists["feature_importances"]):
for view_name, feature_importances in view_feature_importances.items():
if view_name not in importance_concat_dict:
importance_concat_dict[view_name] = feature_importances
......@@ -1220,7 +1016,7 @@ def format_previous_results(biclass_results):
dataframe.index).std(ddof=0)
added_example_errors = {}
for example_errors in biclass_results["example_errors"]:
for example_errors in iter_results_lists["example_errors"]:
for classifier_name, errors in example_errors.items():
if classifier_name not in added_example_errors:
added_example_errors[classifier_name] = errors
......@@ -1228,7 +1024,7 @@ def format_previous_results(biclass_results):
added_example_errors[classifier_name] += errors
error_analysis = added_example_errors
return metrics_analysis, error_analysis, feature_importances_analysis, feature_importances_stds, \
biclass_results["labels"]
iter_results_lists["labels"], duration_means, duration_stds
def analyze_all(biclass_results, stats_iter, directory, data_base_name,
......@@ -1236,7 +1032,8 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name,
"""Used to format the results in order to plot the mean results on the iterations"""
metrics_analysis, error_analysis, \
feature_importances, feature_importances_stds, \
labels = format_previous_results(biclass_results)
labels, duration_means, \
duration_stds = format_previous_results(biclass_results)
results = publish_all_metrics_scores(metrics_analysis,
directory,
......@@ -1245,9 +1042,242 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name,
example_ids, labels)
publish_feature_importances(feature_importances, directory,
data_base_name, feature_importances_stds)
plot_durations(duration_means, directory, data_base_name, duration_stds)
return results
def save_failed(failed_list, directory):
with open(os.path.join(directory, "failed_algorithms.txt"),
"w") as failed_file:
failed_file.write(
"The following algorithms sent an error, the tracebacks are stored in the coressponding directory :\n")
failed_file.write(", \n".join(failed_list) + ".")
def get_results(results, stats_iter, benchmark_argument_dictionaries,
metrics, directory, example_ids, labels):
"""Used to analyze the results of the previous benchmarks"""
data_base_name = benchmark_argument_dictionaries[0]["args"]["name"]
results_means_std, biclass_results, flagged_failed = analyze_iterations(
results, benchmark_argument_dictionaries,
stats_iter, metrics, example_ids, labels)
if flagged_failed:
save_failed(flagged_failed, directory)
if stats_iter > 1:
results_means_std = analyze_all(
biclass_results, stats_iter, directory,
data_base_name, example_ids)
return results_means_std
# def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_names,
# data_base_name, directory, stats_iter,
# min_size=10):
# results = []
# for metric_name, scores in iter_multiclass_results["metrics_scores"].items():
# trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"])
# testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"])
#
# nb_results = classifiers_names.shape[0]
#
# file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str(
# stats_iter) + "_iter-" + metric_name + ".png")
#
# plot_metric_scores(trainMeans, testMeans, classifiers_names, nb_results,
# metric_name, file_name, tag=" averaged multiclass",
# train_STDs=trainSTDs, test_STDs=testSTDs)
#
# results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)]
# return results
# def publish_iter_multiclass_example_errors(iter_multiclass_results, directory,
# classifiers_names, stats_iter, example_ids, multiclass_labels, min_size=10):
# logging.debug(
# "Start:\t Global multiclass label analysis figures generation")
# nb_examples, nb_classifiers, data, error_on_examples, classifiers_names = gen_error_data_glob(
# dict((clf_name, combi_res)
# for clf_name, combi_res
# in zip(classifiers_names,
# iter_multiclass_results["error_on_examples"])),
# stats_iter)
#
# plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
# directory, stats_iter=stats_iter,
# example_ids=example_ids, labels=multiclass_labels)
#
# plot_errors_bar(error_on_examples, nb_classifiers * stats_iter, nb_examples,
# directory)
#
# logging.debug("Done:\t Global multiclass label analysis figures generation")
# def gen_metrics_scores_multiclass(results, true_labels, metrics_list,
# arguments_dictionaries):
# """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration"""
#
# logging.debug("Start:\t Getting multiclass scores for each metric")
#
# for metric in metrics_list:
# metric_module = getattr(metrics, metric[0])
# for iter_index, iter_results in enumerate(results):
#
# for argumentsDictionary in arguments_dictionaries:
# if argumentsDictionary["flag"][0] == iter_index:
# classification_indices = argumentsDictionary[
# "classification_indices"]
# train_indices, test_indices, multiclass_test_indices = classification_indices
#
# for classifier_name, resultDictionary in iter_results.items():
# if not "metrics_scores" in resultDictionary:
# results[iter_index][classifier_name]["metrics_scores"] = {}
# train_score = metric_module.score(true_labels[train_indices],
# resultDictionary["labels"][
# train_indices],
# multiclass=True)
# test_score = metric_module.score(
# true_labels[multiclass_test_indices],
# resultDictionary["labels"][multiclass_test_indices],
# multiclass=True)
# results[iter_index][classifier_name]["metrics_scores"][
# metric[0]] = [train_score, test_score]
# logging.debug("Done:\t Getting multiclass scores for each metric")
# return results
# def get_error_on_labels_multiclass(multiclass_results, multiclass_labels):
# """Used to add all the arrays showing on which example there is an error for each clf and each iteration"""
#
# logging.debug("Start:\t Getting errors on each example for each classifier")
#
# for iter_index, iter_results in enumerate(multiclass_results):
# for classifier_name, classifier_results in iter_results.items():
# error_on_examples = classifier_results["labels"] == multiclass_labels
# multiclass_results[iter_index][classifier_name][
# "error_on_examples"] = error_on_examples.astype(int)
#
# logging.debug("Done:\t Getting errors on each example for each classifier")
#
# return multiclass_results
# def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories,
# databaseName):
# results=[]
# for iter_index in range(stats_iter):
# directory = direcories[iter_index]
# for metric in metrics:
# logging.debug(
# "Start:\t Multiclass score graph generation for " + metric[0])
# classifiers_names = np.array([classifier_name for classifier_name in
# multiclass_results[iter_index].keys()])
# train_scores = np.array([multiclass_results[iter_index][
# classifier_name]["metrics_scores"][
# metric[0]][0]
# for classifier_name in classifiers_names])
# validationScores = np.array([multiclass_results[iter_index][
# classifier_name]["metrics_scores"][
# metric[0]][1]
# for classifier_name in
# classifiers_names])
#
# nbResults = classifiers_names.shape[0]
# fileName = os.path.join(directory , time.strftime(
# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[
# 0])
#
# plot_metric_scores(train_scores, validationScores, classifiers_names,
# nbResults, metric[0], fileName, tag=" multiclass")
#
# logging.debug(
# "Done:\t Multiclass score graph generation for " + metric[0])
# results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiers_names, validationScores, np.zeros(len(validationScores)))]
# return results
# def publishMulticlassExmapleErrors(multiclass_results, directories,
# databaseName, example_ids, multiclass_labels):
# for iter_index, multiclass_result in enumerate(multiclass_results):
# directory = directories[iter_index]
# logging.debug("Start:\t Multiclass Label analysis figure generation")
#
# base_file_name = os.path.join(directory, time.strftime(
# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-")
# nb_classifiers, nb_examples, classifiers_names, data, error_on_examples = gen_error_data(
# dict((key, multiclass_result[key]['error_on_examples'])
# for key in multiclass_result.keys()),)
# plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
# base_file_name, example_ids=example_ids, labels=multiclass_labels)
#
# plot_errors_bar(error_on_examples, nb_classifiers, nb_examples,
# base_file_name)
#
# logging.debug("Done:\t Multiclass Label analysis figure generation")
#
# def analyzeMulticlass(results, stats_iter, benchmark_argument_dictionaries,
# nb_examples, nb_labels, multiclass_labels,
# metrics, classification_indices, directories, example_ids):
# """Used to transform one versus one results in multiclass results and to publish it"""
# multiclass_results = [{} for _ in range(stats_iter)]
#
# for flag, result, tracebacks in results:
# iter_index = flag[0]
# classifierPositive = flag[1][0]
# classifierNegative = flag[1][1]
#
# for benchmarkArgumentDictionary in benchmark_argument_dictionaries:
# if benchmarkArgumentDictionary["flag"] == flag:
# trainIndices, testIndices, testMulticlassIndices = \
# benchmarkArgumentDictionary["classification_indices"]
#
# for classifierResult in result:
# classifier_name = classifierResult.get_classifier_name()
# if classifier_name not in multiclass_results[iter_index]:
# multiclass_results[iter_index][classifier_name] = np.zeros(
# (nb_examples, nb_labels), dtype=int)
# for exampleIndex in trainIndices:
# label = classifierResult.full_labels_pred[exampleIndex]
# if label == 1:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierPositive] += 1
# else:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierNegative] += 1
# for multiclassIndex, exampleIndex in enumerate(
# testMulticlassIndices):
# label = classifierResult.y_test_multiclass_pred[multiclassIndex]
# if label == 1:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierPositive] += 1
# else:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierNegative] += 1
#
# for iter_index, multiclassiterResult in enumerate(multiclass_results):
# for key, value in multiclassiterResult.items():
# multiclass_results[iter_index][key] = {
# "labels": np.argmax(value, axis=1)}
#
# multiclass_results = gen_metrics_scores_multiclass(multiclass_results,
# multiclass_labels, metrics,
# benchmark_argument_dictionaries)
# multiclass_results = get_error_on_labels_multiclass(multiclass_results,
# multiclass_labels)
#
# results = publishMulticlassScores(multiclass_results, metrics, stats_iter, directories,
# benchmark_argument_dictionaries[0]["args"]["name"])
# publishMulticlassExmapleErrors(multiclass_results, directories,
# benchmark_argument_dictionaries[0][
# "args"]["name"], example_ids, multiclass_labels)
#
# return results, multiclass_results
# def analyze_iter_multiclass(multiclass_results, directory, stats_iter, metrics,
# data_base_name, nb_examples, example_ids, multiclass_labels):
# """Used to mean the multiclass results on the iterations executed with different random states"""
......@@ -1288,29 +1318,3 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name,
# publish_iter_multiclass_example_errors(iter_multiclass_results, directory,
# classifiers_names, stats_iter, example_ids, multiclass_labels)
# return results
\ No newline at end of file
def save_failed(failed_list, directory):
with open(os.path.join(directory, "failed_algorithms.txt"),
"w") as failed_file:
failed_file.write(
"The following algorithms sent an error, the tracebacks are stored in the coressponding directory :\n")
failed_file.write(", \n".join(failed_list) + ".")
def get_results(results, stats_iter, benchmark_argument_dictionaries,
metrics, directory, example_ids, labels):
"""Used to analyze the results of the previous benchmarks"""
data_base_name = benchmark_argument_dictionaries[0]["args"]["name"]
results_means_std, biclass_results, flagged_failed = analyze_iterations(
results, benchmark_argument_dictionaries,
stats_iter, metrics, example_ids, labels)
if flagged_failed:
save_failed(flagged_failed, directory)
if stats_iter > 1:
results_means_std = analyze_all(
biclass_results, stats_iter, directory,
data_base_name, example_ids)
return results_means_std
......@@ -30,7 +30,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase):
{"accuracy_score":[0.9, 0.95],
"f1_score":[0.91, 0.96]}
, "", "", "", "", "",)]
metrics_scores = result_analysis.get_metrics_scores_biclass(metrics,
metrics_scores = result_analysis.get_metrics_scores(metrics,
results)
self.assertIsInstance(metrics_scores, dict)
self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
......@@ -70,7 +70,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase):
classifier="",
n_features="")
]
metrics_scores = result_analysis.get_metrics_scores_biclass(metrics,
metrics_scores = result_analysis.get_metrics_scores(metrics,
results)
self.assertIsInstance(metrics_scores, dict)
self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
......@@ -105,7 +105,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase):
classifier="",
n_features="")
]
metrics_scores = result_analysis.get_metrics_scores_biclass(metrics,
metrics_scores = result_analysis.get_metrics_scores(metrics,
results)
self.assertIsInstance(metrics_scores, dict)
self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
......@@ -141,7 +141,7 @@ class Test_get_example_errors_biclass(unittest.TestCase):
, np.array([0,0,1,1,0,0,1,1,0]), "", "",
"", "",)
]
example_errors = result_analysis.get_example_errors_biclass(ground_truth,
example_errors = result_analysis.get_example_errors(ground_truth,
results)
self.assertIsInstance(example_errors, dict)
np.testing.assert_array_equal(example_errors["mv"],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment