diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index b81a93048436ee5f12509c954236c832d5089c0a..aaf8743c0ae2668fca593f94bc35e04b36fca949 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -14,7 +14,6 @@ from . import monoview_classifiers from . import multiview_classifiers from .monoview.exec_classif_mono_view import exec_monoview from .multiview.exec_multiview import exec_multiview -from .result_analysis.noise_analysis import plot_results_noise from .result_analysis.execution import analyze_iterations, analyze from .utils import execution, dataset, configuration from .utils.organization import secure_file_path diff --git a/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py b/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py index 4ab88b73e7e3bbece56f333003f91ef807ac0cb8..ae21428b347caef47dc3bcc596404ea6d85c5dd5 100644 --- a/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py +++ b/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py @@ -12,11 +12,6 @@ __status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, multiclass=False, **kwargs): - if multiclass: - mlb = MultiLabelBinarizer() - y_true = mlb.fit_transform([(label) for label in y_true]) - y_pred = mlb.fit_transform([(label) for label in y_pred]) - score = metric(y_true, y_pred, **kwargs) return score diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py index 96973ba36f3858fc16eaa54179f7b6effcb90db2..b4fc81215d5b50564d98108262a332adf617932c 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py @@ -1,56 +1,56 @@ - -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt -import os -from matplotlib.patches import Patch - - -def plot_results_noise(directory, noise_results, metric_to_plot, name, - width=0.1): - avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray", - "tab:olive", "tab:red", ] - colors = {} - lengend_patches = [] - noise_levels = np.array([noise_level for noise_level, _ in noise_results]) - df = pd.DataFrame( - columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], ) - if len(noise_results) > 1: - width = np.min(np.diff(noise_levels)) - for noise_level, noise_result in noise_results: - classifiers_names, meaned_metrics, metric_stds = [], [], [] - for noise_result in noise_result: - classifier_name = noise_result[0].split("-")[0] - if noise_result[1] is metric_to_plot: - classifiers_names.append(classifier_name) - meaned_metrics.append(noise_result[2]) - metric_stds.append(noise_result[3]) - if classifier_name not in colors: - try: - colors[classifier_name] = avail_colors.pop(0) - except IndexError: - colors[classifier_name] = "k" - classifiers_names, meaned_metrics, metric_stds = np.array( - classifiers_names), np.array(meaned_metrics), np.array(metric_stds) - sorted_indices = np.argsort(-meaned_metrics) - for index in sorted_indices: - row = pd.DataFrame( - {'noise_level': noise_level, - 'classifier_name': classifiers_names[index], - 'mean_score': meaned_metrics[index], - 'score_std': metric_stds[index]}, index=[0]) - df = pd.concat([df, row]) - plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index], - width=0.5 * width, label=classifiers_names[index], - color=colors[classifiers_names[index]]) - for classifier_name, color in colors.items(): - lengend_patches.append(Patch(facecolor=color, label=classifier_name)) - plt.legend(handles=lengend_patches, loc='lower center', - bbox_to_anchor=(0.5, 1.05), ncol=2) - plt.ylabel(metric_to_plot) - plt.title(name) - plt.xticks(noise_levels) - plt.xlabel("Noise level") - plt.savefig(os.path.join(directory, name + "_noise_analysis.png")) - plt.close() - df.to_csv(os.path.join(directory, name + "_noise_analysis.csv")) +# +# import numpy as np +# import pandas as pd +# import matplotlib.pyplot as plt +# import os +# from matplotlib.patches import Patch +# +# +# def plot_results_noise(directory, noise_results, metric_to_plot, name, +# width=0.1): +# avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray", +# "tab:olive", "tab:red", ] +# colors = {} +# lengend_patches = [] +# noise_levels = np.array([noise_level for noise_level, _ in noise_results]) +# df = pd.DataFrame( +# columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], ) +# if len(noise_results) > 1: +# width = np.min(np.diff(noise_levels)) +# for noise_level, noise_result in noise_results: +# classifiers_names, meaned_metrics, metric_stds = [], [], [] +# for noise_result in noise_result: +# classifier_name = noise_result[0].split("-")[0] +# if noise_result[1] is metric_to_plot: +# classifiers_names.append(classifier_name) +# meaned_metrics.append(noise_result[2]) +# metric_stds.append(noise_result[3]) +# if classifier_name not in colors: +# try: +# colors[classifier_name] = avail_colors.pop(0) +# except IndexError: +# colors[classifier_name] = "k" +# classifiers_names, meaned_metrics, metric_stds = np.array( +# classifiers_names), np.array(meaned_metrics), np.array(metric_stds) +# sorted_indices = np.argsort(-meaned_metrics) +# for index in sorted_indices: +# row = pd.DataFrame( +# {'noise_level': noise_level, +# 'classifier_name': classifiers_names[index], +# 'mean_score': meaned_metrics[index], +# 'score_std': metric_stds[index]}, index=[0]) +# df = pd.concat([df, row]) +# plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index], +# width=0.5 * width, label=classifiers_names[index], +# color=colors[classifiers_names[index]]) +# for classifier_name, color in colors.items(): +# lengend_patches.append(Patch(facecolor=color, label=classifier_name)) +# plt.legend(handles=lengend_patches, loc='lower center', +# bbox_to_anchor=(0.5, 1.05), ncol=2) +# plt.ylabel(metric_to_plot) +# plt.title(name) +# plt.xticks(noise_levels) +# plt.xlabel("Noise level") +# plt.savefig(os.path.join(directory, name + "_noise_analysis.png")) +# plt.close() +# df.to_csv(os.path.join(directory, name + "_noise_analysis.csv")) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index fab4079b6c8a17b48dfdcc28dc0795d3d888c7ad..3570bb2b685a9fe0e2cdded10f367177ad046a85 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -166,7 +166,7 @@ def init_log_file(name, views, cl_type, log, debug, label, "%Y_%m_%d-%H_%M") + "_" + label) log_file_name = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join( cl_type) + "-" + "_".join(views) + "-" + name + "-LOG.log" - if os.path.exists(result_directory): + if os.path.exists(result_directory): # pragma: no cover raise NameError("The result dir already exists, wait 1 min and retry") log_file_path = os.path.join(result_directory, log_file_name) os.makedirs(os.path.dirname(log_file_path)) @@ -348,8 +348,10 @@ def find_dataset_names(path, type, names): available_file_names)) return path, [used_name for used_name in available_file_names if used_name in names] - else: + elif names[0] in available_file_names: return path, names + else: + raise ValueError("The asked dataset ({}) is not available in {}. \n The available ones are {}".format(names[0], path, available_file_names)) def gen_argument_dictionaries(labels_dictionary, directories, diff --git a/multiview_platform/tests/test_exec_classif.py b/multiview_platform/tests/test_exec_classif.py index ef3ed1172e0077e823baa9e87aa846aa34218cbc..71cdc9e8948cbd1a129091c08e8c6fdedde0f486 100644 --- a/multiview_platform/tests/test_exec_classif.py +++ b/multiview_platform/tests/test_exec_classif.py @@ -28,6 +28,21 @@ class Test_execute(unittest.TestCase): def tearDown(self): rm_tmp() +class Test_gen_single_monoview_arg_dictionary(unittest.TestCase): + + def test_no_config(self): + conf = exec_classif.gen_single_monoview_arg_dictionary("classifier_name1", + {}, "nb_class", + "view_index", + "view_name", + "hps_kwargs") + self.assertEqual(conf, {"classifier_name1": {}, + "view_name": "view_name", + "view_index": "view_index", + "classifier_name": "classifier_name1", + "nb_class": "nb_class", + "hps_kwargs":"hps_kwargs" } ) + class Test_initBenchmark(unittest.TestCase): def test_benchmark_wanted(self): diff --git a/multiview_platform/tests/test_mono_view/test_exec_classif_mono_view.py b/multiview_platform/tests/test_mono_view/test_exec_classif_mono_view.py index c4059803861fa02c605072bbb1431e93330cd283..784bac2a394c614d1693a343a9e039ca20ef4e06 100644 --- a/multiview_platform/tests/test_mono_view/test_exec_classif_mono_view.py +++ b/multiview_platform/tests/test_mono_view/test_exec_classif_mono_view.py @@ -143,6 +143,21 @@ class Test_getHPs(unittest.TestCase): self.metrics, self.kwargs, **self.hps_kwargs) + def test_simple_config(self): + kwargs = exec_classif_mono_view.get_hyper_params(self.classifierModule, + "None", + self.classifier_name, + self.classifier_class_name, + self.X, + self.y, + self.random_state, + self.output_file_name, + self.cv, + self.nb_cores, + self.metrics, + self.kwargs, + **self.hps_kwargs) + class Test_exec_monoview(unittest.TestCase): diff --git a/multiview_platform/tests/test_mono_view/test_monoview_utils.py b/multiview_platform/tests/test_mono_view/test_monoview_utils.py index 7488e4778f79f9dadb7a0435f996d9daf32d7b72..b0f414ba102a1e55e8882d26052c1af695518695 100644 --- a/multiview_platform/tests/test_mono_view/test_monoview_utils.py +++ b/multiview_platform/tests/test_mono_view/test_monoview_utils.py @@ -36,4 +36,16 @@ class TestFunctions(unittest.TestCase): def test_compute_possible_combinations(self): n_possib = monoview_utils.compute_possible_combinations({"a":[1, 2], "b":{"c":[2,3]}, "d":CustomRandint(0,10)}) - np.testing.assert_array_equal(n_possib, np.array([2, np.inf, 10])) \ No newline at end of file + np.testing.assert_array_equal(n_possib, np.array([2, np.inf, 10])) + +class FakeClf(monoview_utils.BaseMonoviewClassifier): + + def __init__(self): + pass + + +class TestBaseMonoviewClassifier(unittest.TestCase): + + def test_simple(self): + name = FakeClf().get_name_for_fusion() + self.assertEqual(name, 'Fake') diff --git a/multiview_platform/tests/test_multi_view/test_multiview_utils.py b/multiview_platform/tests/test_multi_view/test_multiview_utils.py index f68daf67eaec79e8e42f5368f219d1cc5d8d3a29..6cb880637e3d415844199fb103c6122184c3a143 100644 --- a/multiview_platform/tests/test_multi_view/test_multiview_utils.py +++ b/multiview_platform/tests/test_multi_view/test_multiview_utils.py @@ -40,3 +40,50 @@ class TestBaseMultiviewClassifier(unittest.TestCase): self.assertEqual(accepts, True) accepts = FakeMVClassif(mc=False).accepts_multi_class(rs) self.assertEqual(accepts, False) + self.assertRaises(ValueError, FakeMVClassif(mc=False).accepts_multi_class, rs,**{"n_samples":2, "n_classes":3}) + +class TestConfigGenerator(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.rs = np.random.RandomState(42) + + def test_simple(self): + cfg_gen = multiview_utils.ConfigGenerator(["decision_tree", "decision_tree"]) + sample = cfg_gen.rvs(self.rs) + self.assertEqual(sample, {'decision_tree': {'criterion': 'entropy', + 'max_depth': 103, + 'splitter': 'best'}}) + +class TestFunctions(unittest.TestCase): + + @classmethod + def setUpClass(cls): + os.mkdir(tmp_path) + cls.rs = np.random.RandomState(42) + + @classmethod + def tearDownClass(cls): + rm_tmp() + + def test_get_available_monoview_classifiers(self): + avail = multiview_utils.get_available_monoview_classifiers() + self.assertEqual(avail, ['adaboost', + 'decision_tree', + 'gradient_boosting', + 'knn', + 'lasso', + 'random_forest', + 'sgd', + 'svm_linear', + 'svm_poly', + 'svm_rbf']) + avail = multiview_utils.get_available_monoview_classifiers(need_probas=True) + self.assertEqual(avail, ['adaboost', + 'decision_tree', + 'gradient_boosting', + 'knn', + 'random_forest', + 'svm_linear', + 'svm_poly', + 'svm_rbf']) diff --git a/multiview_platform/tests/test_utils/test_execution.py b/multiview_platform/tests/test_utils/test_execution.py index cffb6de632950fa3c794beb40a61bb0cf1fadaa1..2bd7e8608e6a92140a9125ba1a27c6c76002cdc0 100644 --- a/multiview_platform/tests/test_utils/test_execution.py +++ b/multiview_platform/tests/test_utils/test_execution.py @@ -3,7 +3,7 @@ import unittest import numpy as np -from multiview_platform.tests.utils import rm_tmp, tmp_path +from multiview_platform.tests.utils import rm_tmp, tmp_path, test_dataset from multiview_platform.mono_multi_view_classifiers.utils import execution @@ -15,7 +15,138 @@ class Test_parseTheArgs(unittest.TestCase): def test_empty_args(self): args = execution.parse_the_args([]) - # print args + +class Test_init_log_file(unittest.TestCase): + + @classmethod + def setUpClass(cls): + os.mkdir(tmp_path) + + @classmethod + def tearDownClass(cls): + rm_tmp() + + def test_simple(self): + res_dir = execution.init_log_file(name="test_dataset", + views=["V1", "V2", "V3"], + cl_type="", + log=True, + debug=False, + label="No", + result_directory=tmp_path, + args={}) + self.assertTrue(res_dir.startswith(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))),"tmp_tests", "test_dataset", "started" ))) + + def test_no_log(self): + res_dir = execution.init_log_file(name="test_dataset", + views=["V1", "V2", "V3"], + cl_type="", + log=False, + debug=False, + label="No1", + result_directory=tmp_path, + args={}) + self.assertTrue(res_dir.startswith(os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), + "tmp_tests", "test_dataset", "started"))) + + def test_debug(self): + res_dir = execution.init_log_file(name="test_dataset", + views=["V1", "V2", "V3"], + cl_type="", + log=True, + debug=True, + label="No", + result_directory=tmp_path, + args={}) + self.assertTrue(res_dir.startswith(os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), + "tmp_tests", "test_dataset", "debug_started"))) + +class Test_gen_k_folds(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state = np.random.RandomState(42) + cls.statsIter = 1 + + @classmethod + def tearDownClass(cls): + pass + + def test_simple(self): + folds_list = execution.gen_k_folds(stats_iter=1, + nb_folds=4, + stats_iter_random_states=np.random.RandomState(42)) + self.assertEqual(folds_list[0].n_splits, 4) + self.assertEqual(len(folds_list), 1) + + def test_multple_iters(self): + folds_list = execution.gen_k_folds(stats_iter=2, + nb_folds=4, + stats_iter_random_states=[np.random.RandomState(42), np.random.RandomState(43)]) + self.assertEqual(folds_list[0].n_splits, 4) + self.assertEqual(len(folds_list), 2) + + def test_list_rs(self): + folds_list = execution.gen_k_folds(stats_iter=1, + nb_folds=4, + stats_iter_random_states=[np.random.RandomState(42)]) + self.assertEqual(folds_list[0].n_splits, 4) + self.assertEqual(len(folds_list), 1) + + +class Test_init_views(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state = np.random.RandomState(42) + cls.statsIter = 1 + + @classmethod + def tearDownClass(cls): + pass + + def test_simple(self): + views, views_indices, all_views = execution.init_views(test_dataset, ["ViewN1", "ViewN2"]) + self.assertEqual(views, ["ViewN1", "ViewN2"]) + self.assertEqual(views_indices, [1,2]) + self.assertEqual(all_views, ["ViewN0", "ViewN1", "ViewN2"]) + + views, views_indices, all_views = execution.init_views(test_dataset,None) + self.assertEqual(views, ["ViewN0", "ViewN1", "ViewN2"]) + self.assertEqual(views_indices, range(3)) + self.assertEqual(all_views, ["ViewN0", "ViewN1", "ViewN2"]) + + +class Test_find_dataset_names(unittest.TestCase): + + @classmethod + def setUpClass(cls): + os.mkdir(tmp_path) + with open(os.path.join(tmp_path, "test.txt"), "w") as file_stream: + file_stream.write("test") + with open(os.path.join(tmp_path, "test1.txt"), "w") as file_stream: + file_stream.write("test") + + + + @classmethod + def tearDownClass(cls): + rm_tmp() + + def test_simple(self): + path, names = execution.find_dataset_names(tmp_path, ".txt", ["test"]) + self.assertEqual(path, tmp_path) + self.assertEqual(names, ["test"]) + path, names = execution.find_dataset_names(tmp_path, ".txt", ["test", 'test1']) + self.assertEqual(path, tmp_path) + self.assertEqual(names, ["test1", 'test']) + path, names = execution.find_dataset_names("examples/data", ".hdf5", ["all"]) + self.assertEqual(names, ["doc_summit", "digits_doc"]) + self.assertRaises(ValueError, execution.find_dataset_names, tmp_path+"test", ".txt", + ["test"]) + self.assertRaises(ValueError, execution.find_dataset_names, tmp_path, ".txt", ["ah"]) class Test_initStatsIterRandomStates(unittest.TestCase): diff --git a/setup.cfg b/setup.cfg index ebf39f6bbf5995493862c14b54398767cd66de3a..43c4b4ed043e81147260b3d5e1bdec505d7f19c9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,6 +17,8 @@ omit = */tests/* */datasets/* *declare_classifier.py *make_file_config.py + *execute* + *versions* [coverage:report]