Added tests on result analysis, added equiv support and missing whitespace"

98e14bb6 · Baptiste Bauvin · 65b27dd5 · 98e14bb6 · 98e14bb6 · 98e14bb6
Commit 98e14bb6 authored Feb 27, 2020 by Baptiste Bauvin
--- a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py
+++ b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py
-from datetime import timedelta as hms
+# from datetime import timedelta as hms
+#
-from .. import metrics
+# from .. import metrics
-from ..utils.base import get_metric
+# from ..utils.base import get_metric
+#
+#
-def get_db_config_string(name, feat, classification_indices, shape,
+# def get_db_config_string(name, feat, classification_indices, shape,
-                         class_labels_names, k_folds):
+#                          class_labels_names, k_folds):
-    """
+#     """
+#
-    Parameters
+#     Parameters
-    ----------
+#     ----------
-    name
+#     name
-    feat
+#     feat
-    classification_indices
+#     classification_indices
-    shape
+#     shape
-    class_labels_names
+#     class_labels_names
-    k_folds
+#     k_folds
+#
-    Returns
+#     Returns
-    -------
+#     -------
+#
-    """
+#     """
-    learning_rate = float(len(classification_indices[0])) / (
+#     learning_rate = float(len(classification_indices[0])) / (
-            len(classification_indices[0]) + len(classification_indices[1]))
+#             len(classification_indices[0]) + len(classification_indices[1]))
-    db_config_string = "Database configuration : \n"
+#     db_config_string = "Database configuration : \n"
-    db_config_string += "\t- Database name : " + name + "\n"
+#     db_config_string += "\t- Database name : " + name + "\n"
-    db_config_string += "\t- View name : " + feat + "\t View shape : " + str(
+#     db_config_string += "\t- View name : " + feat + "\t View shape : " + str(
-        shape) + "\n"
+#         shape) + "\n"
-    db_config_string += "\t- Learning Rate : " + str(learning_rate) + "\n"
+#     db_config_string += "\t- Learning Rate : " + str(learning_rate) + "\n"
-    db_config_string += "\t- Labels used : " + ", ".join(
+#     db_config_string += "\t- Labels used : " + ", ".join(
-        class_labels_names) + "\n"
+#         class_labels_names) + "\n"
-    db_config_string += "\t- Number of cross validation folds : " + str(
+#     db_config_string += "\t- Number of cross validation folds : " + str(
-        k_folds.n_splits) + "\n\n"
+#         k_folds.n_splits) + "\n\n"
-    return db_config_string
+#     return db_config_string
+#
+#
-def get_classifier_config_string(grid_search, nb_cores, n_iter, cl_kwargs,
+# def get_classifier_config_string(grid_search, nb_cores, n_iter, cl_kwargs,
-                                 classifier,
+#                                  classifier,
-                                 output_file_name, y_test):
+#                                  output_file_name, y_test):
-    classifier_config_string = "Classifier configuration : \n"
+#     classifier_config_string = "Classifier configuration : \n"
-    classifier_config_string += "\t- " + classifier.get_config()[5:] + "\n"
+#     classifier_config_string += "\t- " + classifier.get_config()[5:] + "\n"
-    classifier_config_string += "\t- Executed on " + str(
+#     classifier_config_string += "\t- Executed on " + str(
-        nb_cores) + " core(s) \n"
+#         nb_cores) + " core(s) \n"
-    if grid_search:
+#     if grid_search:
-        classifier_config_string += "\t- Got configuration using randomized search with " + str(
+#         classifier_config_string += "\t- Got configuration using randomized search with " + str(
-            n_iter) + " iterations \n"
+#             n_iter) + " iterations \n"
-    classifier_config_string += "\n\n"
+#     classifier_config_string += "\n\n"
-    classifier_interpret_string = classifier.get_interpretation(
+#     classifier_interpret_string = classifier.get_interpretation(
-        output_file_name,
+#         output_file_name,
-        y_test)
+#         y_test)
-    return classifier_config_string, classifier_interpret_string
+#     return classifier_config_string, classifier_interpret_string
+#
+#
-def get_metric_score(metric, y_train, y_train_pred, y_test, y_test_pred):
+# def get_metric_score(metric, y_train, y_train_pred, y_test, y_test_pred):
-    metric_module = getattr(metrics, metric[0])
+#     metric_module = getattr(metrics, metric[0])
-    if metric[1] is not None:
+#     if metric[1] is not None:
-        metric_kwargs = dict((index, metricConfig) for index, metricConfig in
+#         metric_kwargs = dict((index, metricConfig) for index, metricConfig in
-                             enumerate(metric[1]))
+#                              enumerate(metric[1]))
-    else:
+#     else:
-        metric_kwargs = {}
+#         metric_kwargs = {}
-    metric_score_train = metric_module.score(y_train, y_train_pred)
+#     metric_score_train = metric_module.score(y_train, y_train_pred)
-    metric_score_test = metric_module.score(y_test, y_test_pred)
+#     metric_score_test = metric_module.score(y_test, y_test_pred)
-    metric_score_string = "\tFor " + metric_module.get_config(
+#     metric_score_string = "\tFor " + metric_module.get_config(
-        **metric_kwargs) + " : "
+#         **metric_kwargs) + " : "
-    metric_score_string += "\n\t\t- Score on train : " + str(metric_score_train)
+#     metric_score_string += "\n\t\t- Score on train : " + str(metric_score_train)
-    metric_score_string += "\n\t\t- Score on test : " + str(metric_score_test)
+#     metric_score_string += "\n\t\t- Score on test : " + str(metric_score_test)
-    metric_score_string += "\n"
+#     metric_score_string += "\n"
-    return metric_score_string, [metric_score_train, metric_score_test]
+#     return metric_score_string, [metric_score_train, metric_score_test]
+#
+#
-def execute(name, learning_rate, k_folds, nb_cores, grid_search, metrics_list,
+# def execute(name, learning_rate, k_folds, nb_cores, grid_search, metrics_list,
-            n_iter,
+#             n_iter,
-            feat, cl_type, cl_kwargs, class_labels_names,
+#             feat, cl_type, cl_kwargs, class_labels_names,
-            shape, y_train, y_train_pred, y_test, y_test_pred, time,
+#             shape, y_train, y_train_pred, y_test, y_test_pred, time,
-            random_state, classifier, output_file_name):
+#             random_state, classifier, output_file_name):
-    metric_module, metric_kwargs = get_metric(metrics_list)
+#     metric_module, metric_kwargs = get_metric(metrics_list)
-    train_score = metric_module.score(y_train, y_train_pred)
+#     train_score = metric_module.score(y_train, y_train_pred)
-    test_score = metric_module.score(y_test, y_test_pred)
+#     test_score = metric_module.score(y_test, y_test_pred)
-    string_analysis = "Classification on " + name + " database for " + feat + " with " + cl_type + ".\n\n"
+#     string_analysis = "Classification on " + name + " database for " + feat + " with " + cl_type + ".\n\n"
-    string_analysis += metrics_list[0][0] + " on train : " + str(
+#     string_analysis += metrics_list[0][0] + " on train : " + str(
-        train_score) + "\n" + \
+#         train_score) + "\n" + \
-                       metrics_list[0][0] + " on test : " + str(
+#                        metrics_list[0][0] + " on test : " + str(
-        test_score) + "\n\n"
+#         test_score) + "\n\n"
-    string_analysis += get_db_config_string(name, feat, learning_rate, shape,
+#     string_analysis += get_db_config_string(name, feat, learning_rate, shape,
-                                            class_labels_names, k_folds)
+#                                             class_labels_names, k_folds)
-    classifier_config_string, classifier_intepret_string = get_classifier_config_string(
+#     classifier_config_string, classifier_intepret_string = get_classifier_config_string(
-        grid_search, nb_cores, n_iter, cl_kwargs, classifier, output_file_name,
+#         grid_search, nb_cores, n_iter, cl_kwargs, classifier, output_file_name,
-        y_test)
+#         y_test)
-    string_analysis += classifier_config_string
+#     string_analysis += classifier_config_string
-    metrics_scores = {}
+#     metrics_scores = {}
-    for metric in metrics_list:
+#     for metric in metrics_list:
-        metric_string, metric_score = get_metric_score(metric, y_train,
+#         metric_string, metric_score = get_metric_score(metric, y_train,
-                                                       y_train_pred, y_test,
+#                                                        y_train_pred, y_test,
-                                                       y_test_pred)
+#                                                        y_test_pred)
-        string_analysis += metric_string
+#         string_analysis += metric_string
-        metrics_scores[metric[0]] = metric_score
+#         metrics_scores[metric[0]] = metric_score
-    string_analysis += "\n\n Classification took " + str(hms(seconds=int(time)))
+#     string_analysis += "\n\n Classification took " + str(hms(seconds=int(time)))
-    string_analysis += "\n\n Classifier Interpretation : \n"
+#     string_analysis += "\n\n Classifier Interpretation : \n"
-    string_analysis += classifier_intepret_string
+#     string_analysis += classifier_intepret_string
+#
-    image_analysis = {}
+#     image_analysis = {}
-    return string_analysis, image_analysis, metrics_scores
+#     return string_analysis, image_analysis, metrics_scores
--- a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py
+++ b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py
-from .. import metrics
+# from .. import metrics
+#
-from ..utils.base import get_metric
+# from ..utils.base import get_metric
+#
-# Author-Info
+# # Author-Info
-__author__ = "Baptiste Bauvin"
+# __author__ = "Baptiste Bauvin"
-__status__ = "Prototype"  # Production, Development, Prototype
+# __status__ = "Prototype"  # Production, Development, Prototype
+#
+#
-def print_metric_score(metric_scores, metric_list):
+# def print_metric_score(metric_scores, metric_list):
-    """
+#     """
-    this function print the metrics scores
+#     this function print the metrics scores
+#
-    Parameters
+#     Parameters
-    ----------
+#     ----------
-    metric_scores : the score of metrics
+#     metric_scores : the score of metrics
+#
-    metric_list : list of metrics
+#     metric_list : list of metrics
+#
-    Returns
+#     Returns
-    -------
+#     -------
-    metric_score_string string constaining all metric results
+#     metric_score_string string constaining all metric results
-    """
+#     """
-    metric_score_string = "\n\n"
+#     metric_score_string = "\n\n"
-    for metric in metric_list:
+#     for metric in metric_list:
-        metric_module = getattr(metrics, metric[0])
+#         metric_module = getattr(metrics, metric[0])
-        if metric[1] is not None:
+#         if metric[1] is not None:
-            metric_kwargs = dict(
+#             metric_kwargs = dict(
-                (index, metricConfig) for index, metricConfig in
+#                 (index, metricConfig) for index, metricConfig in
-                enumerate(metric[1]))
+#                 enumerate(metric[1]))
-        else:
+#         else:
-            metric_kwargs = {}
+#             metric_kwargs = {}
-        metric_score_string += "\tFor " + metric_module.get_config(
+#         metric_score_string += "\tFor " + metric_module.get_config(
-            **metric_kwargs) + " : "
+#             **metric_kwargs) + " : "
-        metric_score_string += "\n\t\t- Score on train : " + str(
+#         metric_score_string += "\n\t\t- Score on train : " + str(
-            metric_scores[metric[0]][0])
+#             metric_scores[metric[0]][0])
-        metric_score_string += "\n\t\t- Score on test : " + str(
+#         metric_score_string += "\n\t\t- Score on test : " + str(
-            metric_scores[metric[0]][1])
+#             metric_scores[metric[0]][1])
-        metric_score_string += "\n\n"
+#         metric_score_string += "\n\n"
-    return metric_score_string
+#     return metric_score_string
+#
+#
-def get_total_metric_scores(metric, train_labels, test_labels,
+# def get_total_metric_scores(metric, train_labels, test_labels,
-                            validation_indices,
+#                             validation_indices,
-                            learning_indices, labels):
+#                             learning_indices, labels):
-    """
+#     """
+#
-    Parameters
+#     Parameters
-    ----------
+#     ----------
+#
-    metric :
+#     metric :
+#
-    train_labels : labels of train
+#     train_labels : labels of train
+#
-    test_labels :  labels of test
+#     test_labels :  labels of test
+#
-    validation_indices :
+#     validation_indices :
+#
-    learning_indices :
+#     learning_indices :
+#
-    labels :
+#     labels :
+#
-    Returns
+#     Returns
-    -------
+#     -------
-    list of [train_score, test_score]
+#     list of [train_score, test_score]
-    """
+#     """
-    metric_module = getattr(metrics, metric[0])
+#     metric_module = getattr(metrics, metric[0])
-    if metric[1] is not None:
+#     if metric[1] is not None:
-        metric_kwargs = dict((index, metricConfig) for index, metricConfig in
+#         metric_kwargs = dict((index, metricConfig) for index, metricConfig in
-                             enumerate(metric[1]))
+#                              enumerate(metric[1]))
-    else:
+#     else:
-        metric_kwargs = {}
+#         metric_kwargs = {}
-    train_score = metric_module.score(labels[learning_indices], train_labels,
+#     train_score = metric_module.score(labels[learning_indices], train_labels,
-                                      **metric_kwargs)
+#                                       **metric_kwargs)
-    test_score = metric_module.score(labels[validation_indices], test_labels,
+#     test_score = metric_module.score(labels[validation_indices], test_labels,
-                                     **metric_kwargs)
+#                                      **metric_kwargs)
-    return [train_score, test_score]
+#     return [train_score, test_score]
+#
+#
-def get_metrics_scores(metrics, train_labels, test_labels,
+# def get_metrics_scores(metrics, train_labels, test_labels,
-                       validation_indices, learning_indices, labels):
+#                        validation_indices, learning_indices, labels):
-    metrics_scores = {}
+#     metrics_scores = {}
-    for metric in metrics:
+#     for metric in metrics:
-        metrics_scores[metric[0]] = get_total_metric_scores(metric,
+#         metrics_scores[metric[0]] = get_total_metric_scores(metric,
-                                                            train_labels,
+#                                                             train_labels,
-                                                            test_labels,
+#                                                             test_labels,
-                                                            validation_indices,
+#                                                             validation_indices,
-                                                            learning_indices,
+#                                                             learning_indices,
-                                                            labels)
+#                                                             labels)
-    return metrics_scores
+#     return metrics_scores
+#
+#
-def execute(classifier, pred_train_labels, pred_test_labels,
+# def execute(classifier, pred_train_labels, pred_test_labels,
-            classification_indices, labels_dictionary, views, name, k_folds,
+#             classification_indices, labels_dictionary, views, name, k_folds,
-            metrics_list, labels, directory):
+#             metrics_list, labels, directory):
-    """
+#     """
+#
-    Parameters
+#     Parameters
-    ----------
+#     ----------
-    classifier : classifier used
+#     classifier : classifier used
+#
-    pred_train_labels : labels of train
+#     pred_train_labels : labels of train
+#
-    pred_test_labels : labels of test
+#     pred_test_labels : labels of test
+#
-    classification_indices
+#     classification_indices
+#
-    labels_dictionary
+#     labels_dictionary
+#
-    views
+#     views
+#
-    name
+#     name
+#
-    k_folds
+#     k_folds
+#
-    metrics_list
+#     metrics_list
+#
-    labels
+#     labels
+#
-    Returns
+#     Returns
-    -------
+#     -------
-    return tuple of (string_analysis, images_analysis, metricsScore)
+#     return tuple of (string_analysis, images_analysis, metricsScore)
-    """
+#     """
-    classifier_name = classifier.short_name
+#     classifier_name = classifier.short_name
-    learning_indices, validation_indices = classification_indices
+#     learning_indices, validation_indices = classification_indices
-    metric_module, metric_kwargs = get_metric(metrics_list)
+#     metric_module, metric_kwargs = get_metric(metrics_list)
-    score_on_train = metric_module.score(labels[learning_indices],
+#     score_on_train = metric_module.score(labels[learning_indices],
-                                         pred_train_labels,
+#                                          pred_train_labels,
-                                         **metric_kwargs)
+#                                          **metric_kwargs)
-    score_on_test = metric_module.score(labels[validation_indices],
+#     score_on_test = metric_module.score(labels[validation_indices],
-                                        pred_test_labels, **metric_kwargs)
+#                                         pred_test_labels, **metric_kwargs)
+#
-    string_analysis = "\t\tResult for multiview classification with " + classifier_name + \
+#     string_analysis = "\t\tResult for multiview classification with " + classifier_name + \
-                      "\n\n" + metrics_list[0][0] + " :\n\t-On Train : " + str(
+#                       "\n\n" + metrics_list[0][0] + " :\n\t-On Train : " + str(
-        score_on_train) + "\n\t-On Test : " + str(
+#         score_on_train) + "\n\t-On Test : " + str(
-        score_on_test) + \
+#         score_on_test) + \
-                      "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \
+#                       "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \
-                      ', '.join(
+#                       ', '.join(
-                          labels_dictionary.values()) + "\n\t-Views : " + ', '.join(
+#                           labels_dictionary.values()) + "\n\t-Views : " + ', '.join(
-        views) + "\n\t-" + str(
+#         views) + "\n\t-" + str(
-        k_folds.n_splits) + \
+#         k_folds.n_splits) + \
-                      " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.get_config()
+#                       " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.get_config()
+#
-    metrics_scores = get_metrics_scores(metrics_list, pred_train_labels,
+#     metrics_scores = get_metrics_scores(metrics_list, pred_train_labels,
-                                        pred_test_labels,
+#                                         pred_test_labels,
-                                        validation_indices, learning_indices,
+#                                         validation_indices, learning_indices,
-                                        labels)
+#                                         labels)
-    string_analysis += print_metric_score(metrics_scores, metrics_list)
+#     string_analysis += print_metric_score(metrics_scores, metrics_list)
-    string_analysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation(
+#     string_analysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation(
-        directory, labels)
+#         directory, labels)
-    images_analysis = {}
+#     images_analysis = {}
-    return string_analysis, images_analysis, metrics_scores
+#     return string_analysis, images_analysis, metrics_scores
--- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py
+++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py
@@ -175,6 +175,8 @@ class MultiviewResultAnalyzer(ResultAnalyser):
                 hps_method, metrics_list, n_iter, class_label_names,
                 train_pred, test_pred, output_file_name, labels, database_name,
                 nb_cores, duration):
+        if hps_method.endswith("equiv"):
+            n_iter = n_iter*len(view_names)
        ResultAnalyser.__init__(self, classifier, classification_indices, k_folds,
                                hps_method, metrics_list, n_iter, class_label_names,
                                train_pred, test_pred, output_file_name, labels, database_name,

--- a/multiview_platform/tests/test_utils/test_base.py
+++ b/multiview_platform/tests/test_utils/test_base.py
@@ -3,15 +3,100 @@ import unittest
 import yaml
 import numpy as np
 from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import StratifiedKFold
+from sklearn.metrics import accuracy_score, f1_score
 from multiview_platform.tests.utils import rm_tmp, tmp_path
 from multiview_platform.mono_multi_view_classifiers.utils import base
-class Test_ResultAnalyzer(unittest.TestCase):
+class FakeClassifier():
    pass
-class Test_BaseEstimator(unittest.TestCase):
+class Test_ResultAnalyzer(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.rs = np.random.RandomState(42)
+        cls.classifier = FakeClassifier()
+        cls.n_examples = 50
+        cls.n_classes = 3
+        cls.train_length = 24
+        cls.train_indices = cls.rs.choice(np.arange(cls.n_examples),
+                                          size=cls.train_length,
+                                          replace=False)
+        cls.test_indices = np.array([i for i in range(cls.n_examples)
+                                     if i not in cls.train_indices])
+        cls.test_length = cls.test_indices.shape[0]
+        cls.classification_indices = [cls.train_indices, cls.test_indices]
+        cls.n_splits = 5
+        cls.k_folds = StratifiedKFold(n_splits=cls.n_splits, )
+        cls.hps_method = "randomized_search"
+        cls.metrics_list = [("accuracy_score", {}), ("f1_score", {})]
+        cls.n_iter = 6
+        cls.class_label_names = ["class{}".format(ind+1)
+                                  for ind in range(cls.n_classes)]
+        cls.train_pred = np.random.randint(0, cls.n_classes,
+                                           size=cls.train_length)
+        cls.test_pred = np.random.randint(0, cls.n_classes,
+                                          size=cls.test_length)
+        cls.directory = "fake_directory"
+        cls.labels = np.random.randint(0, cls.n_classes,
+                                           size=cls.n_examples)
+        cls.database_name = "test_database"
+        cls.nb_cores = 0.5
+        cls.duration = -4
+        cls.train_accuracy = accuracy_score(cls.labels[cls.train_indices],
+                                            cls.train_pred)
+        cls.test_accuracy = accuracy_score(cls.labels[cls.test_indices],
+                                            cls.test_pred)
+        cls.train_f1 = f1_score(cls.labels[cls.train_indices],
+                                            cls.train_pred, average='micro')
+        cls.test_f1 = f1_score(cls.labels[cls.test_indices],
+                                           cls.test_pred, average='micro')
+    def test_simple(self):
+        RA = base.ResultAnalyser(self.classifier, self.classification_indices,
+                                 self.k_folds, self.hps_method, self.metrics_list,
+                                 self.n_iter, self.class_label_names,
+                                 self.train_pred, self.test_pred, self.directory,
+                                 self.labels, self.database_name,
+                                 self.nb_cores, self.duration)
+    def test_get_metric_scores(self):
+        RA = base.ResultAnalyser(self.classifier, self.classification_indices,
+                                 self.k_folds, self.hps_method,
+                                 self.metrics_list,
+                                 self.n_iter, self.class_label_names,
+                                 self.train_pred, self.test_pred,
+                                 self.directory,
+                                 self.labels, self.database_name,
+                                 self.nb_cores, self.duration)
+        train_score, test_score = RA.get_metric_scores("accuracy_score", {})
+        self.assertEqual(train_score, self.train_accuracy)
+        self.assertEqual(test_score, self.test_accuracy)
+    def test_get_all_metrics_scores(self):
+        RA = base.ResultAnalyser(self.classifier, self.classification_indices,
+                                 self.k_folds, self.hps_method,
+                                 self.metrics_list,
+                                 self.n_iter, self.class_label_names,
+                                 self.train_pred, self.test_pred,
+                                 self.directory,
+                                 self.labels, self.database_name,
+                                 self.nb_cores, self.duration)
+        RA.get_all_metrics_scores()
+        self.assertEqual(RA.metric_scores["accuracy_score"][0],
+                         self.train_accuracy)
+        self.assertEqual(RA.metric_scores["accuracy_score"][1],
+                         self.test_accuracy)
+        self.assertEqual(RA.metric_scores["f1_score"][0],
+                         self.train_f1)
+        self.assertEqual(RA.metric_scores["f1_score"][1],
+                         self.test_f1)
+class Test_BaseClassifier(unittest.TestCase):
    @classmethod
    def setUpClass(cls):