Merge branch 'develop'

bd0a2059 · Baptiste Bauvin · 243ec127 · c604231d · bd0a2059 · bd0a2059
Commit bd0a2059 authored Apr 27, 2020 by Baptiste Bauvin
--- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py
+++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py
@@ -14,7 +14,6 @@ from . import monoview_classifiers
 from . import multiview_classifiers
 from .monoview.exec_classif_mono_view import exec_monoview
 from .multiview.exec_multiview import exec_multiview
-from .result_analysis.noise_analysis import plot_results_noise
 from .result_analysis.execution import analyze_iterations, analyze
 from .utils import execution, dataset, configuration
 from .utils.organization import secure_file_path

--- a/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py
+++ b/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py
@@ -12,11 +12,6 @@ __status__ = "Prototype"  # Production, Development, Prototype


 def score(y_true, y_pred, multiclass=False, **kwargs):
-    if multiclass:
-        mlb = MultiLabelBinarizer()
-        y_true = mlb.fit_transform([(label) for label in y_true])
-        y_pred = mlb.fit_transform([(label) for label in y_pred])
-
    score = metric(y_true, y_pred, **kwargs)
    return score


--- a/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py
-
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import os
-from matplotlib.patches import Patch
-
-
-def plot_results_noise(directory, noise_results, metric_to_plot, name,
-                       width=0.1):
-    avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray",
-                    "tab:olive", "tab:red", ]
-    colors = {}
-    lengend_patches = []
-    noise_levels = np.array([noise_level for noise_level, _ in noise_results])
-    df = pd.DataFrame(
-        columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], )
-    if len(noise_results) > 1:
-        width = np.min(np.diff(noise_levels))
-    for noise_level, noise_result in noise_results:
-        classifiers_names, meaned_metrics, metric_stds = [], [], []
-        for noise_result in noise_result:
-            classifier_name = noise_result[0].split("-")[0]
-            if noise_result[1] is metric_to_plot:
-                classifiers_names.append(classifier_name)
-                meaned_metrics.append(noise_result[2])
-                metric_stds.append(noise_result[3])
-                if classifier_name not in colors:
-                    try:
-                        colors[classifier_name] = avail_colors.pop(0)
-                    except IndexError:
-                        colors[classifier_name] = "k"
-        classifiers_names, meaned_metrics, metric_stds = np.array(
-            classifiers_names), np.array(meaned_metrics), np.array(metric_stds)
-        sorted_indices = np.argsort(-meaned_metrics)
-        for index in sorted_indices:
-            row = pd.DataFrame(
-                {'noise_level': noise_level,
-                 'classifier_name': classifiers_names[index],
-                 'mean_score': meaned_metrics[index],
-                 'score_std': metric_stds[index]}, index=[0])
-            df = pd.concat([df, row])
-            plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index],
-                    width=0.5 * width, label=classifiers_names[index],
-                    color=colors[classifiers_names[index]])
-    for classifier_name, color in colors.items():
-        lengend_patches.append(Patch(facecolor=color, label=classifier_name))
-    plt.legend(handles=lengend_patches, loc='lower center',
-               bbox_to_anchor=(0.5, 1.05), ncol=2)
-    plt.ylabel(metric_to_plot)
-    plt.title(name)
-    plt.xticks(noise_levels)
-    plt.xlabel("Noise level")
-    plt.savefig(os.path.join(directory, name + "_noise_analysis.png"))
-    plt.close()
-    df.to_csv(os.path.join(directory, name + "_noise_analysis.csv"))
+#
+# import numpy as np
+# import pandas as pd
+# import matplotlib.pyplot as plt
+# import os
+# from matplotlib.patches import Patch
+#
+#
+# def plot_results_noise(directory, noise_results, metric_to_plot, name,
+#                        width=0.1):
+#     avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray",
+#                     "tab:olive", "tab:red", ]
+#     colors = {}
+#     lengend_patches = []
+#     noise_levels = np.array([noise_level for noise_level, _ in noise_results])
+#     df = pd.DataFrame(
+#         columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], )
+#     if len(noise_results) > 1:
+#         width = np.min(np.diff(noise_levels))
+#     for noise_level, noise_result in noise_results:
+#         classifiers_names, meaned_metrics, metric_stds = [], [], []
+#         for noise_result in noise_result:
+#             classifier_name = noise_result[0].split("-")[0]
+#             if noise_result[1] is metric_to_plot:
+#                 classifiers_names.append(classifier_name)
+#                 meaned_metrics.append(noise_result[2])
+#                 metric_stds.append(noise_result[3])
+#                 if classifier_name not in colors:
+#                     try:
+#                         colors[classifier_name] = avail_colors.pop(0)
+#                     except IndexError:
+#                         colors[classifier_name] = "k"
+#         classifiers_names, meaned_metrics, metric_stds = np.array(
+#             classifiers_names), np.array(meaned_metrics), np.array(metric_stds)
+#         sorted_indices = np.argsort(-meaned_metrics)
+#         for index in sorted_indices:
+#             row = pd.DataFrame(
+#                 {'noise_level': noise_level,
+#                  'classifier_name': classifiers_names[index],
+#                  'mean_score': meaned_metrics[index],
+#                  'score_std': metric_stds[index]}, index=[0])
+#             df = pd.concat([df, row])
+#             plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index],
+#                     width=0.5 * width, label=classifiers_names[index],
+#                     color=colors[classifiers_names[index]])
+#     for classifier_name, color in colors.items():
+#         lengend_patches.append(Patch(facecolor=color, label=classifier_name))
+#     plt.legend(handles=lengend_patches, loc='lower center',
+#                bbox_to_anchor=(0.5, 1.05), ncol=2)
+#     plt.ylabel(metric_to_plot)
+#     plt.title(name)
+#     plt.xticks(noise_levels)
+#     plt.xlabel("Noise level")
+#     plt.savefig(os.path.join(directory, name + "_noise_analysis.png"))
+#     plt.close()
+#     df.to_csv(os.path.join(directory, name + "_noise_analysis.csv"))
--- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py
+++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py
@@ -166,7 +166,7 @@ def init_log_file(name, views, cl_type, log, debug, label,
                                            "%Y_%m_%d-%H_%M") + "_" + label)
    log_file_name = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join(
        cl_type) + "-" + "_".join(views) + "-" + name + "-LOG.log"
-    if os.path.exists(result_directory):
+    if os.path.exists(result_directory): # pragma: no cover
        raise NameError("The result dir already exists, wait 1 min and retry")
    log_file_path = os.path.join(result_directory, log_file_name)
    os.makedirs(os.path.dirname(log_file_path))
@@ -348,8 +348,10 @@ def find_dataset_names(path, type, names):
                    available_file_names))
        return path, [used_name for used_name in available_file_names if
                used_name in names]
-    else:
+    elif names[0] in available_file_names:
        return path, names
+    else:
+        raise ValueError("The asked dataset ({}) is not available in {}. \n The available ones are {}".format(names[0], path, available_file_names))


 def gen_argument_dictionaries(labels_dictionary, directories,

--- a/multiview_platform/tests/test_exec_classif.py
+++ b/multiview_platform/tests/test_exec_classif.py
@@ -28,6 +28,21 @@ class Test_execute(unittest.TestCase):
    def tearDown(self):
        rm_tmp()

+class Test_gen_single_monoview_arg_dictionary(unittest.TestCase):
+
+    def test_no_config(self):
+        conf = exec_classif.gen_single_monoview_arg_dictionary("classifier_name1",
+                                                               {}, "nb_class",
+                                                               "view_index",
+                                                               "view_name",
+                                                               "hps_kwargs")
+        self.assertEqual(conf, {"classifier_name1": {},
+            "view_name": "view_name",
+            "view_index": "view_index",
+            "classifier_name": "classifier_name1",
+            "nb_class": "nb_class",
+            "hps_kwargs":"hps_kwargs" } )
+
 class Test_initBenchmark(unittest.TestCase):

    def test_benchmark_wanted(self):

--- a/multiview_platform/tests/test_mono_view/test_exec_classif_mono_view.py
+++ b/multiview_platform/tests/test_mono_view/test_exec_classif_mono_view.py
@@ -143,6 +143,21 @@ class Test_getHPs(unittest.TestCase):
                                                         self.metrics,
                                                         self.kwargs,
                                                         **self.hps_kwargs)
+    def test_simple_config(self):
+        kwargs = exec_classif_mono_view.get_hyper_params(self.classifierModule,
+                                                         "None",
+                                                         self.classifier_name,
+                                                         self.classifier_class_name,
+                                                         self.X,
+                                                         self.y,
+                                                         self.random_state,
+                                                         self.output_file_name,
+                                                         self.cv,
+                                                         self.nb_cores,
+                                                         self.metrics,
+                                                         self.kwargs,
+                                                         **self.hps_kwargs)
+

 class Test_exec_monoview(unittest.TestCase):


--- a/multiview_platform/tests/test_mono_view/test_monoview_utils.py
+++ b/multiview_platform/tests/test_mono_view/test_monoview_utils.py
@@ -37,3 +37,15 @@ class TestFunctions(unittest.TestCase):
    def test_compute_possible_combinations(self):
        n_possib = monoview_utils.compute_possible_combinations({"a":[1, 2], "b":{"c":[2,3]}, "d":CustomRandint(0,10)})
        np.testing.assert_array_equal(n_possib, np.array([2, np.inf, 10]))
+
+class FakeClf(monoview_utils.BaseMonoviewClassifier):
+
+    def __init__(self):
+        pass
+
+
+class TestBaseMonoviewClassifier(unittest.TestCase):
+
+    def test_simple(self):
+        name = FakeClf().get_name_for_fusion()
+        self.assertEqual(name, 'Fake')
--- a/multiview_platform/tests/test_multi_view/test_multiview_utils.py
+++ b/multiview_platform/tests/test_multi_view/test_multiview_utils.py
@@ -40,3 +40,50 @@ class TestBaseMultiviewClassifier(unittest.TestCase):
        self.assertEqual(accepts, True)
        accepts = FakeMVClassif(mc=False).accepts_multi_class(rs)
        self.assertEqual(accepts, False)
+        self.assertRaises(ValueError, FakeMVClassif(mc=False).accepts_multi_class, rs,**{"n_samples":2, "n_classes":3})
+
+class TestConfigGenerator(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.rs = np.random.RandomState(42)
+
+    def test_simple(self):
+        cfg_gen = multiview_utils.ConfigGenerator(["decision_tree", "decision_tree"])
+        sample = cfg_gen.rvs(self.rs)
+        self.assertEqual(sample, {'decision_tree': {'criterion': 'entropy',
+                   'max_depth': 103,
+                   'splitter': 'best'}})
+
+class TestFunctions(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        os.mkdir(tmp_path)
+        cls.rs = np.random.RandomState(42)
+
+    @classmethod
+    def tearDownClass(cls):
+        rm_tmp()
+
+    def test_get_available_monoview_classifiers(self):
+        avail = multiview_utils.get_available_monoview_classifiers()
+        self.assertEqual(avail, ['adaboost',
+                                 'decision_tree',
+                                 'gradient_boosting',
+                                 'knn',
+                                 'lasso',
+                                 'random_forest',
+                                 'sgd',
+                                 'svm_linear',
+                                 'svm_poly',
+                                 'svm_rbf'])
+        avail = multiview_utils.get_available_monoview_classifiers(need_probas=True)
+        self.assertEqual(avail, ['adaboost',
+                                 'decision_tree',
+                                 'gradient_boosting',
+                                 'knn',
+                                 'random_forest',
+                                 'svm_linear',
+                                 'svm_poly',
+                                 'svm_rbf'])
--- a/multiview_platform/tests/test_utils/test_execution.py
+++ b/multiview_platform/tests/test_utils/test_execution.py
@@ -3,7 +3,7 @@ import unittest

 import numpy as np

-from multiview_platform.tests.utils import rm_tmp, tmp_path
+from multiview_platform.tests.utils import rm_tmp, tmp_path, test_dataset

 from multiview_platform.mono_multi_view_classifiers.utils import execution

@@ -15,7 +15,138 @@ class Test_parseTheArgs(unittest.TestCase):

    def test_empty_args(self):
        args = execution.parse_the_args([])
-        # print args
+
+class Test_init_log_file(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        os.mkdir(tmp_path)
+
+    @classmethod
+    def tearDownClass(cls):
+        rm_tmp()
+
+    def test_simple(self):
+        res_dir = execution.init_log_file(name="test_dataset",
+                                          views=["V1", "V2", "V3"],
+                                          cl_type="",
+                                          log=True,
+                                          debug=False,
+                                          label="No",
+                                          result_directory=tmp_path,
+                                          args={})
+        self.assertTrue(res_dir.startswith(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))),"tmp_tests", "test_dataset", "started" )))
+
+    def test_no_log(self):
+        res_dir = execution.init_log_file(name="test_dataset",
+                                          views=["V1", "V2", "V3"],
+                                          cl_type="",
+                                          log=False,
+                                          debug=False,
+                                          label="No1",
+                                          result_directory=tmp_path,
+                                          args={})
+        self.assertTrue(res_dir.startswith(os.path.join(
+            os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
+            "tmp_tests", "test_dataset", "started")))
+
+    def test_debug(self):
+        res_dir = execution.init_log_file(name="test_dataset",
+                                          views=["V1", "V2", "V3"],
+                                          cl_type="",
+                                          log=True,
+                                          debug=True,
+                                          label="No",
+                                          result_directory=tmp_path,
+                                          args={})
+        self.assertTrue(res_dir.startswith(os.path.join(
+            os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
+            "tmp_tests", "test_dataset", "debug_started")))
+
+class Test_gen_k_folds(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.random_state = np.random.RandomState(42)
+        cls.statsIter = 1
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_simple(self):
+        folds_list = execution.gen_k_folds(stats_iter=1,
+                                           nb_folds=4,
+                                           stats_iter_random_states=np.random.RandomState(42))
+        self.assertEqual(folds_list[0].n_splits, 4)
+        self.assertEqual(len(folds_list), 1)
+
+    def test_multple_iters(self):
+        folds_list = execution.gen_k_folds(stats_iter=2,
+                                           nb_folds=4,
+                                           stats_iter_random_states=[np.random.RandomState(42), np.random.RandomState(43)])
+        self.assertEqual(folds_list[0].n_splits, 4)
+        self.assertEqual(len(folds_list), 2)
+
+    def test_list_rs(self):
+        folds_list = execution.gen_k_folds(stats_iter=1,
+                                           nb_folds=4,
+                                           stats_iter_random_states=[np.random.RandomState(42)])
+        self.assertEqual(folds_list[0].n_splits, 4)
+        self.assertEqual(len(folds_list), 1)
+
+
+class Test_init_views(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.random_state = np.random.RandomState(42)
+        cls.statsIter = 1
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_simple(self):
+        views, views_indices, all_views = execution.init_views(test_dataset, ["ViewN1", "ViewN2"])
+        self.assertEqual(views,  ["ViewN1", "ViewN2"])
+        self.assertEqual(views_indices, [1,2])
+        self.assertEqual(all_views, ["ViewN0", "ViewN1", "ViewN2"])
+
+        views, views_indices, all_views = execution.init_views(test_dataset,None)
+        self.assertEqual(views, ["ViewN0", "ViewN1", "ViewN2"])
+        self.assertEqual(views_indices, range(3))
+        self.assertEqual(all_views, ["ViewN0", "ViewN1", "ViewN2"])
+
+
+class Test_find_dataset_names(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        os.mkdir(tmp_path)
+        with open(os.path.join(tmp_path, "test.txt"), "w") as file_stream:
+            file_stream.write("test")
+        with open(os.path.join(tmp_path, "test1.txt"), "w") as file_stream:
+            file_stream.write("test")
+
+
+
+    @classmethod
+    def tearDownClass(cls):
+        rm_tmp()
+
+    def test_simple(self):
+        path, names = execution.find_dataset_names(tmp_path, ".txt", ["test"])
+        self.assertEqual(path, tmp_path)
+        self.assertEqual(names, ["test"])
+        path, names = execution.find_dataset_names(tmp_path, ".txt", ["test", 'test1'])
+        self.assertEqual(path, tmp_path)
+        self.assertEqual(names, ["test1", 'test'])
+        path, names = execution.find_dataset_names("examples/data", ".hdf5", ["all"])
+        self.assertEqual(names, ["doc_summit", "digits_doc"])
+        self.assertRaises(ValueError, execution.find_dataset_names, tmp_path+"test", ".txt",
+                                                   ["test"])
+        self.assertRaises(ValueError, execution.find_dataset_names, tmp_path, ".txt", ["ah"])


 class Test_initStatsIterRandomStates(unittest.TestCase):

--- a/setup.cfg
+++ b/setup.cfg
@@ -17,6 +17,8 @@ omit = */tests/*
       */datasets/*
       *declare_classifier.py
       *make_file_config.py
+        *execute*
+        *versions*


 [coverage:report]