Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Supervised MultiModal Integration Tool
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Baptiste Bauvin
Supervised MultiModal Integration Tool
Commits
f98fd248
Commit
f98fd248
authored
5 years ago
by
Baptiste Bauvin
Browse files
Options
Downloads
Patches
Plain Diff
Added duration analysis"
parent
cda7ba8b
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
multiview_platform/mono_multi_view_classifiers/result_analysis.py
+280
-276
280 additions, 276 deletions
...w_platform/mono_multi_view_classifiers/result_analysis.py
multiview_platform/tests/test_ResultAnalysis.py
+8
-8
8 additions, 8 deletions
multiview_platform/tests/test_ResultAnalysis.py
with
288 additions
and
284 deletions
multiview_platform/mono_multi_view_classifiers/result_analysis.py
+
280
−
276
View file @
f98fd248
...
...
@@ -210,7 +210,7 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
-------
"""
fig
,
ax
=
plt
.
subplots
(
nrows
=
1
,
ncols
=
1
,
)
cmap
,
norm
=
iter
C
map
(
stats_iter
)
cmap
,
norm
=
iter
_c
map
(
stats_iter
)
cax
=
plt
.
imshow
(
data
,
cmap
=
cmap
,
norm
=
norm
,
aspect
=
'
auto
'
)
plt
.
title
(
'
Errors depending on the classifier
'
)
...
...
@@ -284,7 +284,7 @@ def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName):
plt
.
close
()
def
iter
C
map
(
statsIter
):
def
iter
_c
map
(
statsIter
):
r
"""
Used to generate a colormap that will have a tick for each iteration : the whiter the better.
Parameters
...
...
@@ -375,7 +375,7 @@ def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35):
return
fig_kwargs
,
bar_width
def
get_metrics_scores
_biclass
(
metrics
,
results
):
def
get_metrics_scores
(
metrics
,
results
):
r
"""
Used to extract metrics scores in case of biclass classification
Parameters
...
...
@@ -418,7 +418,7 @@ def get_metrics_scores_biclass(metrics, results):
return
metrics_scores
def
get_example_errors
_biclass
(
groud_truth
,
results
):
def
get_example_errors
(
groud_truth
,
results
):
r
"""
Used to get for each classifier and each example whether the classifier has misclassified the example or not.
Parameters
...
...
@@ -621,44 +621,35 @@ def publish_example_errors(example_errors, directory, databaseName,
def
plot_durations
(
durations
,
directory
,
database_name
,
durations_stds
=
None
):
file_name
=
os
.
path
.
join
(
directory
,
database_name
+
"
-durations
"
)
durations
.
to_csv
(
file_name
+
"
_dataframe.csv
"
)
fig
=
plotly
.
graph_objs
.
Figure
()
if
durations_stds
is
None
:
durations_stds
=
{}
for
dur_key
,
dur_val
in
durations
.
items
():
durations_stds
[
dur_key
]
=
dict
((
key
,
0
)
for
key
,
val
in
durations
[
dur_key
].
items
())
durations_stds
=
pd
.
DataFrame
(
0
,
durations
.
index
,
durations
.
columns
)
else
:
durations_stds
.
to_csv
(
file_name
+
"
_stds_dataframe.csv
"
)
fig
.
add_trace
(
plotly
.
graph_objs
.
Bar
(
name
=
'
Hyper-parameter Optimization
'
,
x
=
list
(
durations
[
'
hps
'
].
keys
())
,
y
=
list
(
durations
[
'
hps
'
]
.
values
())
,
x
=
durations
.
index
,
y
=
durations
[
'
hps
'
],
error_y
=
dict
(
type
=
'
data
'
,
array
=
list
(
durations_stds
[
"
hps
"
].
values
())),
array
=
durations_stds
[
"
hps
"
]),
marker_color
=
"
grey
"
))
fig
.
add_trace
(
plotly
.
graph_objs
.
Bar
(
name
=
'
Fit (on train set)
'
,
x
=
list
(
durations
[
'
fit
'
].
keys
())
,
y
=
list
(
durations
[
'
fit
'
]
.
values
())
,
x
=
durations
.
index
,
y
=
durations
[
'
fit
'
],
error_y
=
dict
(
type
=
'
data
'
,
array
=
list
(
durations_stds
[
"
fit
"
].
values
())),
array
=
durations_stds
[
"
fit
"
]),
marker_color
=
"
black
"
))
fig
.
add_trace
(
plotly
.
graph_objs
.
Bar
(
name
=
'
Prediction (on test set)
'
,
x
=
list
(
durations
[
'
pred
'
].
keys
())
,
y
=
list
(
durations
[
'
pred
'
]
.
values
())
,
x
=
durations
.
index
,
y
=
durations
[
'
pred
'
],
error_y
=
dict
(
type
=
'
data
'
,
array
=
list
(
durations_stds
[
"
pred
"
].
values
())),
array
=
durations_stds
[
"
pred
"
]),
marker_color
=
"
lightgrey
"
))
fig
.
update_layout
(
title
=
"
Durations for each classfier
"
)
fig
.
update_layout
(
title
=
"
Durations for each classfier
"
,
yaxis_title
=
"
Duration (s)
"
)
fig
.
update_layout
(
paper_bgcolor
=
'
rgba(0,0,0,0)
'
,
plot_bgcolor
=
'
rgba(0,0,0,0)
'
)
plotly
.
offline
.
plot
(
fig
,
filename
=
file_name
+
"
.html
"
,
auto_open
=
False
)
index
=
durations
[
"
hps
"
].
keys
()
df
=
pd
.
DataFrame
(
index
=
index
,
columns
=
[
"
hps
"
,
"
fit
"
,
"
pred
"
],)
for
key
,
value
in
durations
.
items
():
df
[
key
]
=
[
value
[
ind
]
for
ind
in
index
]
df
.
to_csv
(
file_name
+
"
_dataframe.csv
"
)
def
publish_feature_importances
(
feature_importances
,
directory
,
database_name
,
...
...
@@ -755,16 +746,15 @@ def get_feature_importances(result, feature_names=None):
def
get_duration
(
results
):
d
urations
=
{
"
hps
"
:{}
,
"
fit
"
:{}
,
"
pred
"
:{}}
d
f
=
pd
.
DataFrame
(
columns
=
[
"
hps
"
,
"
fit
"
,
"
pred
"
],
)
for
classifier_result
in
results
:
durations
[
"
hps
"
][
classifier_result
.
get_classifier_name
()]
=
classifier_result
.
hps_duration
durations
[
"
fit
"
][
classifier_result
.
get_classifier_name
()]
=
classifier_result
.
fit_duration
durations
[
"
pred
"
][
classifier_result
.
get_classifier_name
()]
=
classifier_result
.
pred_duration
return
durations
df
.
at
[
classifier_result
.
get_classifier_name
(),
"
hps
"
]
=
classifier_result
.
hps_duration
df
.
at
[
classifier_result
.
get_classifier_name
(),
"
fit
"
]
=
classifier_result
.
fit_duration
df
.
at
[
classifier_result
.
get_classifier_name
(),
"
pred
"
]
=
classifier_result
.
pred_duration
return
df
def
publish_tracebacks
(
directory
,
database_name
,
labels_names
,
tracebacks
,
...
...
@@ -818,8 +808,8 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
for
iter_index
,
result
,
tracebacks
in
results
:
arguments
=
get_arguments
(
benchmark_argument_dictionaries
,
iter_index
)
metrics_scores
=
get_metrics_scores
_biclass
(
metrics
,
result
)
example_errors
=
get_example_errors
_biclass
(
labels
,
result
)
metrics_scores
=
get_metrics_scores
(
metrics
,
result
)
example_errors
=
get_example_errors
(
labels
,
result
)
feature_importances
=
get_feature_importances
(
result
)
durations
=
get_duration
(
result
)
directory
=
arguments
[
"
directory
"
]
...
...
@@ -850,168 +840,6 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
return
res
,
iter_results
,
flagged_tracebacks_list
# def gen_metrics_scores_multiclass(results, true_labels, metrics_list,
# arguments_dictionaries):
# """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration"""
#
# logging.debug("Start:\t Getting multiclass scores for each metric")
#
# for metric in metrics_list:
# metric_module = getattr(metrics, metric[0])
# for iter_index, iter_results in enumerate(results):
#
# for argumentsDictionary in arguments_dictionaries:
# if argumentsDictionary["flag"][0] == iter_index:
# classification_indices = argumentsDictionary[
# "classification_indices"]
# train_indices, test_indices, multiclass_test_indices = classification_indices
#
# for classifier_name, resultDictionary in iter_results.items():
# if not "metrics_scores" in resultDictionary:
# results[iter_index][classifier_name]["metrics_scores"] = {}
# train_score = metric_module.score(true_labels[train_indices],
# resultDictionary["labels"][
# train_indices],
# multiclass=True)
# test_score = metric_module.score(
# true_labels[multiclass_test_indices],
# resultDictionary["labels"][multiclass_test_indices],
# multiclass=True)
# results[iter_index][classifier_name]["metrics_scores"][
# metric[0]] = [train_score, test_score]
# logging.debug("Done:\t Getting multiclass scores for each metric")
# return results
# def get_error_on_labels_multiclass(multiclass_results, multiclass_labels):
# """Used to add all the arrays showing on which example there is an error for each clf and each iteration"""
#
# logging.debug("Start:\t Getting errors on each example for each classifier")
#
# for iter_index, iter_results in enumerate(multiclass_results):
# for classifier_name, classifier_results in iter_results.items():
# error_on_examples = classifier_results["labels"] == multiclass_labels
# multiclass_results[iter_index][classifier_name][
# "error_on_examples"] = error_on_examples.astype(int)
#
# logging.debug("Done:\t Getting errors on each example for each classifier")
#
# return multiclass_results
# def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories,
# databaseName):
# results=[]
# for iter_index in range(stats_iter):
# directory = direcories[iter_index]
# for metric in metrics:
# logging.debug(
# "Start:\t Multiclass score graph generation for " + metric[0])
# classifiers_names = np.array([classifier_name for classifier_name in
# multiclass_results[iter_index].keys()])
# train_scores = np.array([multiclass_results[iter_index][
# classifier_name]["metrics_scores"][
# metric[0]][0]
# for classifier_name in classifiers_names])
# validationScores = np.array([multiclass_results[iter_index][
# classifier_name]["metrics_scores"][
# metric[0]][1]
# for classifier_name in
# classifiers_names])
#
# nbResults = classifiers_names.shape[0]
# fileName = os.path.join(directory , time.strftime(
# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[
# 0])
#
# plot_metric_scores(train_scores, validationScores, classifiers_names,
# nbResults, metric[0], fileName, tag=" multiclass")
#
# logging.debug(
# "Done:\t Multiclass score graph generation for " + metric[0])
# results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiers_names, validationScores, np.zeros(len(validationScores)))]
# return results
# def publishMulticlassExmapleErrors(multiclass_results, directories,
# databaseName, example_ids, multiclass_labels):
# for iter_index, multiclass_result in enumerate(multiclass_results):
# directory = directories[iter_index]
# logging.debug("Start:\t Multiclass Label analysis figure generation")
#
# base_file_name = os.path.join(directory, time.strftime(
# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-")
# nb_classifiers, nb_examples, classifiers_names, data, error_on_examples = gen_error_data(
# dict((key, multiclass_result[key]['error_on_examples'])
# for key in multiclass_result.keys()),)
# plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
# base_file_name, example_ids=example_ids, labels=multiclass_labels)
#
# plot_errors_bar(error_on_examples, nb_classifiers, nb_examples,
# base_file_name)
#
# logging.debug("Done:\t Multiclass Label analysis figure generation")
#
# def analyzeMulticlass(results, stats_iter, benchmark_argument_dictionaries,
# nb_examples, nb_labels, multiclass_labels,
# metrics, classification_indices, directories, example_ids):
# """Used to transform one versus one results in multiclass results and to publish it"""
# multiclass_results = [{} for _ in range(stats_iter)]
#
# for flag, result, tracebacks in results:
# iter_index = flag[0]
# classifierPositive = flag[1][0]
# classifierNegative = flag[1][1]
#
# for benchmarkArgumentDictionary in benchmark_argument_dictionaries:
# if benchmarkArgumentDictionary["flag"] == flag:
# trainIndices, testIndices, testMulticlassIndices = \
# benchmarkArgumentDictionary["classification_indices"]
#
# for classifierResult in result:
# classifier_name = classifierResult.get_classifier_name()
# if classifier_name not in multiclass_results[iter_index]:
# multiclass_results[iter_index][classifier_name] = np.zeros(
# (nb_examples, nb_labels), dtype=int)
# for exampleIndex in trainIndices:
# label = classifierResult.full_labels_pred[exampleIndex]
# if label == 1:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierPositive] += 1
# else:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierNegative] += 1
# for multiclassIndex, exampleIndex in enumerate(
# testMulticlassIndices):
# label = classifierResult.y_test_multiclass_pred[multiclassIndex]
# if label == 1:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierPositive] += 1
# else:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierNegative] += 1
#
# for iter_index, multiclassiterResult in enumerate(multiclass_results):
# for key, value in multiclassiterResult.items():
# multiclass_results[iter_index][key] = {
# "labels": np.argmax(value, axis=1)}
#
# multiclass_results = gen_metrics_scores_multiclass(multiclass_results,
# multiclass_labels, metrics,
# benchmark_argument_dictionaries)
# multiclass_results = get_error_on_labels_multiclass(multiclass_results,
# multiclass_labels)
#
# results = publishMulticlassScores(multiclass_results, metrics, stats_iter, directories,
# benchmark_argument_dictionaries[0]["args"]["name"])
# publishMulticlassExmapleErrors(multiclass_results, directories,
# benchmark_argument_dictionaries[0][
# "args"]["name"], example_ids, multiclass_labels)
#
# return results, multiclass_results
def
numpy_mean_and_std
(
scores_array
):
return
np
.
mean
(
scores_array
,
axis
=
1
),
np
.
std
(
scores_array
,
axis
=
1
)
...
...
@@ -1080,47 +908,6 @@ def publish_all_example_errors(iter_results, directory,
"
Done:
\t
Global biclass label analysis figures generation
"
)
# def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_names,
# data_base_name, directory, stats_iter,
# min_size=10):
# results = []
# for metric_name, scores in iter_multiclass_results["metrics_scores"].items():
# trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"])
# testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"])
#
# nb_results = classifiers_names.shape[0]
#
# file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str(
# stats_iter) + "_iter-" + metric_name + ".png")
#
# plot_metric_scores(trainMeans, testMeans, classifiers_names, nb_results,
# metric_name, file_name, tag=" averaged multiclass",
# train_STDs=trainSTDs, test_STDs=testSTDs)
#
# results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)]
# return results
# def publish_iter_multiclass_example_errors(iter_multiclass_results, directory,
# classifiers_names, stats_iter, example_ids, multiclass_labels, min_size=10):
# logging.debug(
# "Start:\t Global multiclass label analysis figures generation")
# nb_examples, nb_classifiers, data, error_on_examples, classifiers_names = gen_error_data_glob(
# dict((clf_name, combi_res)
# for clf_name, combi_res
# in zip(classifiers_names,
# iter_multiclass_results["error_on_examples"])),
# stats_iter)
#
# plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
# directory, stats_iter=stats_iter,
# example_ids=example_ids, labels=multiclass_labels)
#
# plot_errors_bar(error_on_examples, nb_classifiers * stats_iter, nb_examples,
# directory)
#
# logging.debug("Done:\t Global multiclass label analysis figures generation")
def
gen_classifiers_dict
(
results
,
metrics
):
classifiers_dict
=
dict
((
classifier_name
,
classifierIndex
)
...
...
@@ -1158,14 +945,14 @@ def add_new_metric(iter_biclass_results, metric, labels_combination,
return
iter_biclass_results
def
format_previous_results
(
biclass_resul
ts
):
def
format_previous_results
(
iter_results_lis
ts
):
"""
Formats each statistical iteration
'
s result into a mean/std analysis for
the metrics and adds the errors of each statistical iteration.
Parameters
----------
biclass_resul
ts : The raw results, for each statistical iteration i contains
iter_results_lis
ts : The raw results, for each statistical iteration i contains
- biclass_results[i][
"
metrics_scores
"
] is a dictionary with a pd.dataframe
for each metrics
- biclass_results[i][
"
example_errors
"
], a dicaitonary with a np.array
...
...
@@ -1187,7 +974,7 @@ def format_previous_results(biclass_results):
metric_concat_dict
=
{}
for
iter_index
,
metrics_score
in
enumerate
(
biclass_resul
ts
[
"
metrics_scores
"
]):
iter_results_lis
ts
[
"
metrics_scores
"
]):
for
metric_name
,
dataframe
in
metrics_score
.
items
():
if
metric_name
not
in
metric_concat_dict
:
metric_concat_dict
[
metric_name
]
=
dataframe
...
...
@@ -1202,9 +989,18 @@ def format_previous_results(biclass_results):
metrics_analysis
[
metric_name
][
"
std
"
]
=
dataframe
.
groupby
(
dataframe
.
index
).
std
(
ddof
=
0
)
durations_df_concat
=
pd
.
DataFrame
(
dtype
=
float
)
for
iter_index
,
durations_df
in
enumerate
(
iter_results_lists
[
"
durations
"
]):
durations_df_concat
=
pd
.
concat
((
durations_df_concat
,
durations_df
),
axis
=
1
)
durations_df_concat
=
durations_df_concat
.
astype
(
float
)
grouped_df
=
durations_df_concat
.
groupby
(
durations_df_concat
.
columns
,
axis
=
1
)
duration_means
=
grouped_df
.
mean
()
duration_stds
=
grouped_df
.
std
()
importance_concat_dict
=
{}
for
iter_index
,
view_feature_importances
in
enumerate
(
biclass_resul
ts
[
"
feature_importances
"
]):
iter_results_lis
ts
[
"
feature_importances
"
]):
for
view_name
,
feature_importances
in
view_feature_importances
.
items
():
if
view_name
not
in
importance_concat_dict
:
importance_concat_dict
[
view_name
]
=
feature_importances
...
...
@@ -1220,7 +1016,7 @@ def format_previous_results(biclass_results):
dataframe
.
index
).
std
(
ddof
=
0
)
added_example_errors
=
{}
for
example_errors
in
biclass_resul
ts
[
"
example_errors
"
]:
for
example_errors
in
iter_results_lis
ts
[
"
example_errors
"
]:
for
classifier_name
,
errors
in
example_errors
.
items
():
if
classifier_name
not
in
added_example_errors
:
added_example_errors
[
classifier_name
]
=
errors
...
...
@@ -1228,7 +1024,7 @@ def format_previous_results(biclass_results):
added_example_errors
[
classifier_name
]
+=
errors
error_analysis
=
added_example_errors
return
metrics_analysis
,
error_analysis
,
feature_importances_analysis
,
feature_importances_stds
,
\
biclass
_results
[
"
labels
"
]
iter
_results
_lists
[
"
labels
"
]
,
duration_means
,
duration_stds
def
analyze_all
(
biclass_results
,
stats_iter
,
directory
,
data_base_name
,
...
...
@@ -1236,7 +1032,8 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name,
"""
Used to format the results in order to plot the mean results on the iterations
"""
metrics_analysis
,
error_analysis
,
\
feature_importances
,
feature_importances_stds
,
\
labels
=
format_previous_results
(
biclass_results
)
labels
,
duration_means
,
\
duration_stds
=
format_previous_results
(
biclass_results
)
results
=
publish_all_metrics_scores
(
metrics_analysis
,
directory
,
...
...
@@ -1245,9 +1042,242 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name,
example_ids
,
labels
)
publish_feature_importances
(
feature_importances
,
directory
,
data_base_name
,
feature_importances_stds
)
plot_durations
(
duration_means
,
directory
,
data_base_name
,
duration_stds
)
return
results
def
save_failed
(
failed_list
,
directory
):
with
open
(
os
.
path
.
join
(
directory
,
"
failed_algorithms.txt
"
),
"
w
"
)
as
failed_file
:
failed_file
.
write
(
"
The following algorithms sent an error, the tracebacks are stored in the coressponding directory :
\n
"
)
failed_file
.
write
(
"
,
\n
"
.
join
(
failed_list
)
+
"
.
"
)
def
get_results
(
results
,
stats_iter
,
benchmark_argument_dictionaries
,
metrics
,
directory
,
example_ids
,
labels
):
"""
Used to analyze the results of the previous benchmarks
"""
data_base_name
=
benchmark_argument_dictionaries
[
0
][
"
args
"
][
"
name
"
]
results_means_std
,
biclass_results
,
flagged_failed
=
analyze_iterations
(
results
,
benchmark_argument_dictionaries
,
stats_iter
,
metrics
,
example_ids
,
labels
)
if
flagged_failed
:
save_failed
(
flagged_failed
,
directory
)
if
stats_iter
>
1
:
results_means_std
=
analyze_all
(
biclass_results
,
stats_iter
,
directory
,
data_base_name
,
example_ids
)
return
results_means_std
# def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_names,
# data_base_name, directory, stats_iter,
# min_size=10):
# results = []
# for metric_name, scores in iter_multiclass_results["metrics_scores"].items():
# trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"])
# testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"])
#
# nb_results = classifiers_names.shape[0]
#
# file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str(
# stats_iter) + "_iter-" + metric_name + ".png")
#
# plot_metric_scores(trainMeans, testMeans, classifiers_names, nb_results,
# metric_name, file_name, tag=" averaged multiclass",
# train_STDs=trainSTDs, test_STDs=testSTDs)
#
# results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)]
# return results
# def publish_iter_multiclass_example_errors(iter_multiclass_results, directory,
# classifiers_names, stats_iter, example_ids, multiclass_labels, min_size=10):
# logging.debug(
# "Start:\t Global multiclass label analysis figures generation")
# nb_examples, nb_classifiers, data, error_on_examples, classifiers_names = gen_error_data_glob(
# dict((clf_name, combi_res)
# for clf_name, combi_res
# in zip(classifiers_names,
# iter_multiclass_results["error_on_examples"])),
# stats_iter)
#
# plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
# directory, stats_iter=stats_iter,
# example_ids=example_ids, labels=multiclass_labels)
#
# plot_errors_bar(error_on_examples, nb_classifiers * stats_iter, nb_examples,
# directory)
#
# logging.debug("Done:\t Global multiclass label analysis figures generation")
# def gen_metrics_scores_multiclass(results, true_labels, metrics_list,
# arguments_dictionaries):
# """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration"""
#
# logging.debug("Start:\t Getting multiclass scores for each metric")
#
# for metric in metrics_list:
# metric_module = getattr(metrics, metric[0])
# for iter_index, iter_results in enumerate(results):
#
# for argumentsDictionary in arguments_dictionaries:
# if argumentsDictionary["flag"][0] == iter_index:
# classification_indices = argumentsDictionary[
# "classification_indices"]
# train_indices, test_indices, multiclass_test_indices = classification_indices
#
# for classifier_name, resultDictionary in iter_results.items():
# if not "metrics_scores" in resultDictionary:
# results[iter_index][classifier_name]["metrics_scores"] = {}
# train_score = metric_module.score(true_labels[train_indices],
# resultDictionary["labels"][
# train_indices],
# multiclass=True)
# test_score = metric_module.score(
# true_labels[multiclass_test_indices],
# resultDictionary["labels"][multiclass_test_indices],
# multiclass=True)
# results[iter_index][classifier_name]["metrics_scores"][
# metric[0]] = [train_score, test_score]
# logging.debug("Done:\t Getting multiclass scores for each metric")
# return results
# def get_error_on_labels_multiclass(multiclass_results, multiclass_labels):
# """Used to add all the arrays showing on which example there is an error for each clf and each iteration"""
#
# logging.debug("Start:\t Getting errors on each example for each classifier")
#
# for iter_index, iter_results in enumerate(multiclass_results):
# for classifier_name, classifier_results in iter_results.items():
# error_on_examples = classifier_results["labels"] == multiclass_labels
# multiclass_results[iter_index][classifier_name][
# "error_on_examples"] = error_on_examples.astype(int)
#
# logging.debug("Done:\t Getting errors on each example for each classifier")
#
# return multiclass_results
# def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories,
# databaseName):
# results=[]
# for iter_index in range(stats_iter):
# directory = direcories[iter_index]
# for metric in metrics:
# logging.debug(
# "Start:\t Multiclass score graph generation for " + metric[0])
# classifiers_names = np.array([classifier_name for classifier_name in
# multiclass_results[iter_index].keys()])
# train_scores = np.array([multiclass_results[iter_index][
# classifier_name]["metrics_scores"][
# metric[0]][0]
# for classifier_name in classifiers_names])
# validationScores = np.array([multiclass_results[iter_index][
# classifier_name]["metrics_scores"][
# metric[0]][1]
# for classifier_name in
# classifiers_names])
#
# nbResults = classifiers_names.shape[0]
# fileName = os.path.join(directory , time.strftime(
# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[
# 0])
#
# plot_metric_scores(train_scores, validationScores, classifiers_names,
# nbResults, metric[0], fileName, tag=" multiclass")
#
# logging.debug(
# "Done:\t Multiclass score graph generation for " + metric[0])
# results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiers_names, validationScores, np.zeros(len(validationScores)))]
# return results
# def publishMulticlassExmapleErrors(multiclass_results, directories,
# databaseName, example_ids, multiclass_labels):
# for iter_index, multiclass_result in enumerate(multiclass_results):
# directory = directories[iter_index]
# logging.debug("Start:\t Multiclass Label analysis figure generation")
#
# base_file_name = os.path.join(directory, time.strftime(
# "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-")
# nb_classifiers, nb_examples, classifiers_names, data, error_on_examples = gen_error_data(
# dict((key, multiclass_result[key]['error_on_examples'])
# for key in multiclass_result.keys()),)
# plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
# base_file_name, example_ids=example_ids, labels=multiclass_labels)
#
# plot_errors_bar(error_on_examples, nb_classifiers, nb_examples,
# base_file_name)
#
# logging.debug("Done:\t Multiclass Label analysis figure generation")
#
# def analyzeMulticlass(results, stats_iter, benchmark_argument_dictionaries,
# nb_examples, nb_labels, multiclass_labels,
# metrics, classification_indices, directories, example_ids):
# """Used to transform one versus one results in multiclass results and to publish it"""
# multiclass_results = [{} for _ in range(stats_iter)]
#
# for flag, result, tracebacks in results:
# iter_index = flag[0]
# classifierPositive = flag[1][0]
# classifierNegative = flag[1][1]
#
# for benchmarkArgumentDictionary in benchmark_argument_dictionaries:
# if benchmarkArgumentDictionary["flag"] == flag:
# trainIndices, testIndices, testMulticlassIndices = \
# benchmarkArgumentDictionary["classification_indices"]
#
# for classifierResult in result:
# classifier_name = classifierResult.get_classifier_name()
# if classifier_name not in multiclass_results[iter_index]:
# multiclass_results[iter_index][classifier_name] = np.zeros(
# (nb_examples, nb_labels), dtype=int)
# for exampleIndex in trainIndices:
# label = classifierResult.full_labels_pred[exampleIndex]
# if label == 1:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierPositive] += 1
# else:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierNegative] += 1
# for multiclassIndex, exampleIndex in enumerate(
# testMulticlassIndices):
# label = classifierResult.y_test_multiclass_pred[multiclassIndex]
# if label == 1:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierPositive] += 1
# else:
# multiclass_results[iter_index][classifier_name][
# exampleIndex, classifierNegative] += 1
#
# for iter_index, multiclassiterResult in enumerate(multiclass_results):
# for key, value in multiclassiterResult.items():
# multiclass_results[iter_index][key] = {
# "labels": np.argmax(value, axis=1)}
#
# multiclass_results = gen_metrics_scores_multiclass(multiclass_results,
# multiclass_labels, metrics,
# benchmark_argument_dictionaries)
# multiclass_results = get_error_on_labels_multiclass(multiclass_results,
# multiclass_labels)
#
# results = publishMulticlassScores(multiclass_results, metrics, stats_iter, directories,
# benchmark_argument_dictionaries[0]["args"]["name"])
# publishMulticlassExmapleErrors(multiclass_results, directories,
# benchmark_argument_dictionaries[0][
# "args"]["name"], example_ids, multiclass_labels)
#
# return results, multiclass_results
# def analyze_iter_multiclass(multiclass_results, directory, stats_iter, metrics,
# data_base_name, nb_examples, example_ids, multiclass_labels):
# """Used to mean the multiclass results on the iterations executed with different random states"""
...
...
@@ -1288,29 +1318,3 @@ def analyze_all(biclass_results, stats_iter, directory, data_base_name,
# publish_iter_multiclass_example_errors(iter_multiclass_results, directory,
# classifiers_names, stats_iter, example_ids, multiclass_labels)
# return results
\ No newline at end of file
def
save_failed
(
failed_list
,
directory
):
with
open
(
os
.
path
.
join
(
directory
,
"
failed_algorithms.txt
"
),
"
w
"
)
as
failed_file
:
failed_file
.
write
(
"
The following algorithms sent an error, the tracebacks are stored in the coressponding directory :
\n
"
)
failed_file
.
write
(
"
,
\n
"
.
join
(
failed_list
)
+
"
.
"
)
def
get_results
(
results
,
stats_iter
,
benchmark_argument_dictionaries
,
metrics
,
directory
,
example_ids
,
labels
):
"""
Used to analyze the results of the previous benchmarks
"""
data_base_name
=
benchmark_argument_dictionaries
[
0
][
"
args
"
][
"
name
"
]
results_means_std
,
biclass_results
,
flagged_failed
=
analyze_iterations
(
results
,
benchmark_argument_dictionaries
,
stats_iter
,
metrics
,
example_ids
,
labels
)
if
flagged_failed
:
save_failed
(
flagged_failed
,
directory
)
if
stats_iter
>
1
:
results_means_std
=
analyze_all
(
biclass_results
,
stats_iter
,
directory
,
data_base_name
,
example_ids
)
return
results_means_std
This diff is collapsed.
Click to expand it.
multiview_platform/tests/test_ResultAnalysis.py
+
8
−
8
View file @
f98fd248
...
...
@@ -30,7 +30,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase):
{
"
accuracy_score
"
:[
0.9
,
0.95
],
"
f1_score
"
:[
0.91
,
0.96
]}
,
""
,
""
,
""
,
""
,
""
,)]
metrics_scores
=
result_analysis
.
get_metrics_scores
_biclass
(
metrics
,
metrics_scores
=
result_analysis
.
get_metrics_scores
(
metrics
,
results
)
self
.
assertIsInstance
(
metrics_scores
,
dict
)
self
.
assertIsInstance
(
metrics_scores
[
"
accuracy_score
"
],
pd
.
DataFrame
)
...
...
@@ -70,7 +70,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase):
classifier
=
""
,
n_features
=
""
)
]
metrics_scores
=
result_analysis
.
get_metrics_scores
_biclass
(
metrics
,
metrics_scores
=
result_analysis
.
get_metrics_scores
(
metrics
,
results
)
self
.
assertIsInstance
(
metrics_scores
,
dict
)
self
.
assertIsInstance
(
metrics_scores
[
"
accuracy_score
"
],
pd
.
DataFrame
)
...
...
@@ -105,7 +105,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase):
classifier
=
""
,
n_features
=
""
)
]
metrics_scores
=
result_analysis
.
get_metrics_scores
_biclass
(
metrics
,
metrics_scores
=
result_analysis
.
get_metrics_scores
(
metrics
,
results
)
self
.
assertIsInstance
(
metrics_scores
,
dict
)
self
.
assertIsInstance
(
metrics_scores
[
"
accuracy_score
"
],
pd
.
DataFrame
)
...
...
@@ -141,7 +141,7 @@ class Test_get_example_errors_biclass(unittest.TestCase):
,
np
.
array
([
0
,
0
,
1
,
1
,
0
,
0
,
1
,
1
,
0
]),
""
,
""
,
""
,
""
,)
]
example_errors
=
result_analysis
.
get_example_errors
_biclass
(
ground_truth
,
example_errors
=
result_analysis
.
get_example_errors
(
ground_truth
,
results
)
self
.
assertIsInstance
(
example_errors
,
dict
)
np
.
testing
.
assert_array_equal
(
example_errors
[
"
mv
"
],
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment