From c61922aa2486333c2c963025ae21c6ff4353a18f Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Mon, 11 Nov 2019 10:31:35 -0500
Subject: [PATCH] Biclass result analysis working

---
 .../result_analysis.py                        | 163 +++++++++---------
 1 file changed, 85 insertions(+), 78 deletions(-)

diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py
index 41e9abcd..ac4e8a3b 100644
--- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py
@@ -182,11 +182,18 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
     ### The following part is used to generate an interactive graph.
     if use_plotly:
         import plotly
+        hover_text = [["Failed "+ str(stats_iter-data[i,j])+" time(s)"
+                       for j in range(data.shape[1])]
+                      for i in range(data.shape[0]) ]
         fig = plotly.graph_objs.Figure(data=plotly.graph_objs.Heatmap(
             x=list(classifiers_names),
             y=example_ids,
             z=data,
+            text=hover_text,
+            hoverinfo=["y", "x", "text"],
             colorscale="Greys",
+            colorbar=dict(tickvals=[0, stats_iter],
+                          ticktext=["Always Wrong", "Always Right"]),
             reversescale=True))
         fig.update_layout(
             xaxis={"showgrid": False, "showticklabels": False, "ticks": ''},
@@ -801,67 +808,77 @@ def numpy_mean_and_std(scores_array):
 
 
 def publish_iter_biclass_metrics_scores(iter_results, directory, labels_dictionary,
-                                    classifiers_dict, data_base_name, stats_iter,
+                                    data_base_name, stats_iter,
                                     min_size=10):
     results=[]
-    for labelsCombination, iterResult in iter_results.items():
-        currentDirectory = directory + labels_dictionary[
-            int(labelsCombination[0])] + "-vs-" + labels_dictionary[
-                               int(labelsCombination[1])] + "/"
-        if not os.path.exists(os.path.dirname(currentDirectory + "a")):
+    for labels_combination, iter_result in iter_results.items():
+        current_directory = directory + labels_dictionary[
+            int(labels_combination[0])] + "-vs-" + labels_dictionary[
+                               int(labels_combination[1])] + "/"
+        if not os.path.exists(os.path.dirname(current_directory + "a")):
             try:
-                os.makedirs(os.path.dirname(currentDirectory + "a"))
+                os.makedirs(os.path.dirname(current_directory + "a"))
             except OSError as exc:
                 if exc.errno != errno.EEXIST:
                     raise
 
-        for metricName, scores in iterResult["metrics_scores"].items():
-            trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"])
-            testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"])
-
-            names = np.array([name for name in classifiers_dict.keys()])
-            fileName = currentDirectory + time.strftime(
+        for metric_name, scores in iter_result.items():
+            train = np.array(scores["mean"].loc["train"])
+            test = np.array(scores["mean"].loc["test"])
+            names = np.array(scores["mean"].columns)
+            train_std = np.array(scores["std"].loc["train"])
+            test_std = np.array(scores["std"].loc["test"])
+            # trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"])
+            # testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"])
+
+            # names = np.array([name for name in classifiers_dict.keys()])
+            fileName = current_directory + time.strftime(
                 "%Y_%m_%d-%H_%M_%S") + "-" + data_base_name + "-Mean_on_" + str(
-                stats_iter) + "_iter-" + metricName + ".png"
+                stats_iter) + "_iter-" + metric_name + ".png"
             nbResults = names.shape[0]
 
-            plot_metric_scores(trainMeans, testMeans, names, nbResults,
-                               metricName, fileName, tag=" averaged",
-                               train_STDs=trainSTDs, test_STDs=testSTDs)
-            results+=[[classifiersName, metricName, testMean, testSTD] for classifiersName, testMean, testSTD in zip(names, testMeans, testSTDs)]
+            plot_metric_scores(train, test, names, nbResults,
+                               metric_name, fileName, tag=" averaged",
+                               train_STDs=train_std, test_STDs=test_std)
+            results+=[[classifier_name, metric_name, test_mean, test_std] for classifier_name, test_mean, test_std in zip(names, test, test_std)]
     return results
 
 
-def gen_error_dat_glob(combi_results, stats_iter, base_file_name):
-    nbExamples = combi_results["error_on_examples"].shape[1]
-    nbClassifiers = combi_results["error_on_examples"].shape[0]
-    data = np.transpose(combi_results["error_on_examples"])
-    error_on_examples = -1 * np.sum(data, axis=1) + (nbClassifiers * stats_iter)
-    np.savetxt(base_file_name + "clf_errors.csv", data, delimiter=",")
-    np.savetxt(base_file_name + "example_errors.csv", error_on_examples,
-               delimiter=",")
-    return nbExamples, nbClassifiers, data, error_on_examples
+def gen_error_dat_glob(combi_results, stats_iter):
+    nb_examples = next(iter(combi_results.values())).shape[0]
+    nb_classifiers = len(combi_results)
+    data = np.zeros((nb_examples, nb_classifiers), dtype=int)
+    classifier_names = []
+    for clf_index, (classifier_name, error_data) in enumerate(combi_results.items()):
+        data[:, clf_index] = error_data
+        classifier_names.append(classifier_name)
+    error_on_examples = -1 * np.sum(data, axis=1) + (nb_classifiers * stats_iter)
+    return nb_examples, nb_classifiers, data, error_on_examples, classifier_names
 
 
-def publish_iter_biclass_example_errors(iter_results, directory, labels_dictionary,
-                                        classifiers_dict, stats_iter, exmaple_ids, min_size=10):
-    for labelsCombination, combiResults in iter_results.items():
+def publish_iter_biclass_example_errors(iter_results, directory,
+                                        labels_dictionary, stats_iter,
+                                        example_ids):
+    for labels_combination, combi_results in iter_results.items():
         base_file_name = directory + labels_dictionary[
-            int(labelsCombination[0])] + "-vs-" + \
+            int(labels_combination[0])] + "-vs-" + \
                          labels_dictionary[
-                             int(labelsCombination[1])] + "/" + time.strftime(
+                             int(labels_combination[1])] + "/" + time.strftime(
             "%Y_%m_%d-%H_%M_%S") + "-"
-        classifiers_names = [classifier_name for classifier_name in
-                            classifiers_dict.keys()]
+
         logging.debug(
             "Start:\t Global biclass label analysis figure generation")
 
-        nbExamples, nbClassifiers, data, error_on_examples = gen_error_dat_glob(
-            combiResults, stats_iter, base_file_name)
+        nbExamples, nbClassifiers, data, \
+        error_on_examples, classifier_names = gen_error_dat_glob(combi_results,
+                                                                 stats_iter)
 
-        plot_2d(data, classifiers_names, nbClassifiers, nbExamples, 1,
-                base_file_name, stats_iter=stats_iter, example_ids=exmaple_ids)
+        np.savetxt(base_file_name + "clf_errors.csv", data, delimiter=",")
+        np.savetxt(base_file_name + "example_errors.csv", error_on_examples,
+                   delimiter=",")
 
+        plot_2d(data, classifier_names, nbClassifiers, nbExamples,
+                base_file_name, stats_iter=stats_iter, example_ids=example_ids)
         plot_errors_bar(error_on_examples, nbClassifiers * stats_iter,
                         nbExamples, base_file_name)
 
@@ -944,49 +961,39 @@ def add_new_metric(iter_biclass_results, metric, labels_combination, nb_classifi
 def analyzebiclass_iter(biclass_results, metrics, stats_iter, directory,
                        labels_dictionary, data_base_name, nb_examples, example_ids):
     """Used to format the results in order to plot the mean results on the iterations"""
-    iter_biclass_results = {}
-    classifiers_dict, nb_classifiers = gen_classifiers_dict(biclass_results,
+    classifiers_dict = gen_classifiers_dict(biclass_results,
                                                           metrics)
-
+    metrics_analysis = dict((key,{}) for key in biclass_results.keys())
+    error_analysis = dict((key,{}) for key in biclass_results.keys())
     for label_combination, biclass_result in biclass_results.items():
-        for iter_index, metric_score in enumerate(biclass_result["metrics_scores"]):
-            print(metric_score)
-
 
-    for iter_index, biclass_result in enumerate(biclass_results):
-        for labelsComination, results in biclass_result.items():
-            for metric in metrics:
-
-                iter_biclass_results = add_new_labels_combination(
-                    iter_biclass_results, labelsComination, nb_classifiers,
-                    nb_examples)
-                iter_biclass_results = add_new_metric(iter_biclass_results, metric,
-                                                    labelsComination,
-                                                    nb_classifiers, stats_iter)
-
-                metric_results = results["metrics_scores"][metric[0]]
-                for classifier_name, trainScore, testScore in zip(
-                        metric_results["classifiers_names"],
-                        metric_results["train_scores"],
-                        metric_results["test_scores"], ):
-                    iter_biclass_results[labelsComination]["metrics_scores"][
-                        metric[0]]["train_scores"][
-                        classifiers_dict[classifier_name], iter_index] = trainScore
-                    iter_biclass_results[labelsComination]["metrics_scores"][
-                        metric[0]]["test_scores"][
-                        classifiers_dict[classifier_name], iter_index] = testScore
-            for classifier_name, error_on_example in results[
-                "example_errors"].items():
-                iter_biclass_results[labelsComination]["error_on_examples"][
-                classifiers_dict[classifier_name], :] += error_on_example[
-                    "error_on_examples"]
-
-    results = publish_iter_biclass_metrics_scores(
-        iter_biclass_results, directory,
-        labels_dictionary, classifiers_dict,
-        data_base_name, stats_iter)
-    publish_iter_biclass_example_errors(iter_biclass_results, directory,
-                                        labels_dictionary, classifiers_dict,
+        concat_dict = {}
+        for iter_index, metrics_score in enumerate(biclass_result["metrics_scores"]):
+            for metric_name, dataframe in metrics_score.items():
+                if metric_name not in concat_dict:
+                    concat_dict[metric_name] = dataframe
+                else:
+                    concat_dict[metric_name] = pd.concat([concat_dict[metric_name], dataframe])
+
+        for metric_name, dataframe in concat_dict.items():
+            metrics_analysis[label_combination][metric_name] = {}
+            metrics_analysis[label_combination][metric_name]["mean"] = dataframe.groupby(dataframe.index).mean()
+            metrics_analysis[label_combination][metric_name]["std"] = dataframe.groupby(dataframe.index).std()
+
+        added_example_errors = {}
+        for example_errors in biclass_result["example_errors"]:
+            for classifier_name, errors in example_errors.items():
+                if classifier_name not in added_example_errors:
+                    added_example_errors[classifier_name] = errors
+                else:
+                    added_example_errors[classifier_name] += errors
+        error_analysis[label_combination] = added_example_errors
+
+    results = publish_iter_biclass_metrics_scores(metrics_analysis,
+                                                  directory, labels_dictionary,
+                                                  data_base_name, stats_iter)
+    publish_iter_biclass_example_errors(error_analysis, directory,
+                                        labels_dictionary,
                                         stats_iter, example_ids)
     return results
 
-- 
GitLab