Skip to content
Snippets Groups Projects
Commit bd0a2059 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Merge branch 'develop'

parents 243ec127 c604231d
No related branches found
No related tags found
No related merge requests found
Pipeline #4840 failed
Showing with 285 additions and 67 deletions
......@@ -14,7 +14,6 @@ from . import monoview_classifiers
from . import multiview_classifiers
from .monoview.exec_classif_mono_view import exec_monoview
from .multiview.exec_multiview import exec_multiview
from .result_analysis.noise_analysis import plot_results_noise
from .result_analysis.execution import analyze_iterations, analyze
from .utils import execution, dataset, configuration
from .utils.organization import secure_file_path
......
......@@ -12,11 +12,6 @@ __status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
if multiclass:
mlb = MultiLabelBinarizer()
y_true = mlb.fit_transform([(label) for label in y_true])
y_pred = mlb.fit_transform([(label) for label in y_pred])
score = metric(y_true, y_pred, **kwargs)
return score
......
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from matplotlib.patches import Patch
def plot_results_noise(directory, noise_results, metric_to_plot, name,
width=0.1):
avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray",
"tab:olive", "tab:red", ]
colors = {}
lengend_patches = []
noise_levels = np.array([noise_level for noise_level, _ in noise_results])
df = pd.DataFrame(
columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], )
if len(noise_results) > 1:
width = np.min(np.diff(noise_levels))
for noise_level, noise_result in noise_results:
classifiers_names, meaned_metrics, metric_stds = [], [], []
for noise_result in noise_result:
classifier_name = noise_result[0].split("-")[0]
if noise_result[1] is metric_to_plot:
classifiers_names.append(classifier_name)
meaned_metrics.append(noise_result[2])
metric_stds.append(noise_result[3])
if classifier_name not in colors:
try:
colors[classifier_name] = avail_colors.pop(0)
except IndexError:
colors[classifier_name] = "k"
classifiers_names, meaned_metrics, metric_stds = np.array(
classifiers_names), np.array(meaned_metrics), np.array(metric_stds)
sorted_indices = np.argsort(-meaned_metrics)
for index in sorted_indices:
row = pd.DataFrame(
{'noise_level': noise_level,
'classifier_name': classifiers_names[index],
'mean_score': meaned_metrics[index],
'score_std': metric_stds[index]}, index=[0])
df = pd.concat([df, row])
plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index],
width=0.5 * width, label=classifiers_names[index],
color=colors[classifiers_names[index]])
for classifier_name, color in colors.items():
lengend_patches.append(Patch(facecolor=color, label=classifier_name))
plt.legend(handles=lengend_patches, loc='lower center',
bbox_to_anchor=(0.5, 1.05), ncol=2)
plt.ylabel(metric_to_plot)
plt.title(name)
plt.xticks(noise_levels)
plt.xlabel("Noise level")
plt.savefig(os.path.join(directory, name + "_noise_analysis.png"))
plt.close()
df.to_csv(os.path.join(directory, name + "_noise_analysis.csv"))
#
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import os
# from matplotlib.patches import Patch
#
#
# def plot_results_noise(directory, noise_results, metric_to_plot, name,
# width=0.1):
# avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray",
# "tab:olive", "tab:red", ]
# colors = {}
# lengend_patches = []
# noise_levels = np.array([noise_level for noise_level, _ in noise_results])
# df = pd.DataFrame(
# columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], )
# if len(noise_results) > 1:
# width = np.min(np.diff(noise_levels))
# for noise_level, noise_result in noise_results:
# classifiers_names, meaned_metrics, metric_stds = [], [], []
# for noise_result in noise_result:
# classifier_name = noise_result[0].split("-")[0]
# if noise_result[1] is metric_to_plot:
# classifiers_names.append(classifier_name)
# meaned_metrics.append(noise_result[2])
# metric_stds.append(noise_result[3])
# if classifier_name not in colors:
# try:
# colors[classifier_name] = avail_colors.pop(0)
# except IndexError:
# colors[classifier_name] = "k"
# classifiers_names, meaned_metrics, metric_stds = np.array(
# classifiers_names), np.array(meaned_metrics), np.array(metric_stds)
# sorted_indices = np.argsort(-meaned_metrics)
# for index in sorted_indices:
# row = pd.DataFrame(
# {'noise_level': noise_level,
# 'classifier_name': classifiers_names[index],
# 'mean_score': meaned_metrics[index],
# 'score_std': metric_stds[index]}, index=[0])
# df = pd.concat([df, row])
# plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index],
# width=0.5 * width, label=classifiers_names[index],
# color=colors[classifiers_names[index]])
# for classifier_name, color in colors.items():
# lengend_patches.append(Patch(facecolor=color, label=classifier_name))
# plt.legend(handles=lengend_patches, loc='lower center',
# bbox_to_anchor=(0.5, 1.05), ncol=2)
# plt.ylabel(metric_to_plot)
# plt.title(name)
# plt.xticks(noise_levels)
# plt.xlabel("Noise level")
# plt.savefig(os.path.join(directory, name + "_noise_analysis.png"))
# plt.close()
# df.to_csv(os.path.join(directory, name + "_noise_analysis.csv"))
......@@ -166,7 +166,7 @@ def init_log_file(name, views, cl_type, log, debug, label,
"%Y_%m_%d-%H_%M") + "_" + label)
log_file_name = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join(
cl_type) + "-" + "_".join(views) + "-" + name + "-LOG.log"
if os.path.exists(result_directory):
if os.path.exists(result_directory): # pragma: no cover
raise NameError("The result dir already exists, wait 1 min and retry")
log_file_path = os.path.join(result_directory, log_file_name)
os.makedirs(os.path.dirname(log_file_path))
......@@ -348,8 +348,10 @@ def find_dataset_names(path, type, names):
available_file_names))
return path, [used_name for used_name in available_file_names if
used_name in names]
else:
elif names[0] in available_file_names:
return path, names
else:
raise ValueError("The asked dataset ({}) is not available in {}. \n The available ones are {}".format(names[0], path, available_file_names))
def gen_argument_dictionaries(labels_dictionary, directories,
......
......@@ -28,6 +28,21 @@ class Test_execute(unittest.TestCase):
def tearDown(self):
rm_tmp()
class Test_gen_single_monoview_arg_dictionary(unittest.TestCase):
def test_no_config(self):
conf = exec_classif.gen_single_monoview_arg_dictionary("classifier_name1",
{}, "nb_class",
"view_index",
"view_name",
"hps_kwargs")
self.assertEqual(conf, {"classifier_name1": {},
"view_name": "view_name",
"view_index": "view_index",
"classifier_name": "classifier_name1",
"nb_class": "nb_class",
"hps_kwargs":"hps_kwargs" } )
class Test_initBenchmark(unittest.TestCase):
def test_benchmark_wanted(self):
......
......@@ -143,6 +143,21 @@ class Test_getHPs(unittest.TestCase):
self.metrics,
self.kwargs,
**self.hps_kwargs)
def test_simple_config(self):
kwargs = exec_classif_mono_view.get_hyper_params(self.classifierModule,
"None",
self.classifier_name,
self.classifier_class_name,
self.X,
self.y,
self.random_state,
self.output_file_name,
self.cv,
self.nb_cores,
self.metrics,
self.kwargs,
**self.hps_kwargs)
class Test_exec_monoview(unittest.TestCase):
......
......@@ -37,3 +37,15 @@ class TestFunctions(unittest.TestCase):
def test_compute_possible_combinations(self):
n_possib = monoview_utils.compute_possible_combinations({"a":[1, 2], "b":{"c":[2,3]}, "d":CustomRandint(0,10)})
np.testing.assert_array_equal(n_possib, np.array([2, np.inf, 10]))
class FakeClf(monoview_utils.BaseMonoviewClassifier):
def __init__(self):
pass
class TestBaseMonoviewClassifier(unittest.TestCase):
def test_simple(self):
name = FakeClf().get_name_for_fusion()
self.assertEqual(name, 'Fake')
......@@ -40,3 +40,50 @@ class TestBaseMultiviewClassifier(unittest.TestCase):
self.assertEqual(accepts, True)
accepts = FakeMVClassif(mc=False).accepts_multi_class(rs)
self.assertEqual(accepts, False)
self.assertRaises(ValueError, FakeMVClassif(mc=False).accepts_multi_class, rs,**{"n_samples":2, "n_classes":3})
class TestConfigGenerator(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.rs = np.random.RandomState(42)
def test_simple(self):
cfg_gen = multiview_utils.ConfigGenerator(["decision_tree", "decision_tree"])
sample = cfg_gen.rvs(self.rs)
self.assertEqual(sample, {'decision_tree': {'criterion': 'entropy',
'max_depth': 103,
'splitter': 'best'}})
class TestFunctions(unittest.TestCase):
@classmethod
def setUpClass(cls):
os.mkdir(tmp_path)
cls.rs = np.random.RandomState(42)
@classmethod
def tearDownClass(cls):
rm_tmp()
def test_get_available_monoview_classifiers(self):
avail = multiview_utils.get_available_monoview_classifiers()
self.assertEqual(avail, ['adaboost',
'decision_tree',
'gradient_boosting',
'knn',
'lasso',
'random_forest',
'sgd',
'svm_linear',
'svm_poly',
'svm_rbf'])
avail = multiview_utils.get_available_monoview_classifiers(need_probas=True)
self.assertEqual(avail, ['adaboost',
'decision_tree',
'gradient_boosting',
'knn',
'random_forest',
'svm_linear',
'svm_poly',
'svm_rbf'])
......@@ -3,7 +3,7 @@ import unittest
import numpy as np
from multiview_platform.tests.utils import rm_tmp, tmp_path
from multiview_platform.tests.utils import rm_tmp, tmp_path, test_dataset
from multiview_platform.mono_multi_view_classifiers.utils import execution
......@@ -15,7 +15,138 @@ class Test_parseTheArgs(unittest.TestCase):
def test_empty_args(self):
args = execution.parse_the_args([])
# print args
class Test_init_log_file(unittest.TestCase):
@classmethod
def setUpClass(cls):
os.mkdir(tmp_path)
@classmethod
def tearDownClass(cls):
rm_tmp()
def test_simple(self):
res_dir = execution.init_log_file(name="test_dataset",
views=["V1", "V2", "V3"],
cl_type="",
log=True,
debug=False,
label="No",
result_directory=tmp_path,
args={})
self.assertTrue(res_dir.startswith(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))),"tmp_tests", "test_dataset", "started" )))
def test_no_log(self):
res_dir = execution.init_log_file(name="test_dataset",
views=["V1", "V2", "V3"],
cl_type="",
log=False,
debug=False,
label="No1",
result_directory=tmp_path,
args={})
self.assertTrue(res_dir.startswith(os.path.join(
os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
"tmp_tests", "test_dataset", "started")))
def test_debug(self):
res_dir = execution.init_log_file(name="test_dataset",
views=["V1", "V2", "V3"],
cl_type="",
log=True,
debug=True,
label="No",
result_directory=tmp_path,
args={})
self.assertTrue(res_dir.startswith(os.path.join(
os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
"tmp_tests", "test_dataset", "debug_started")))
class Test_gen_k_folds(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.random_state = np.random.RandomState(42)
cls.statsIter = 1
@classmethod
def tearDownClass(cls):
pass
def test_simple(self):
folds_list = execution.gen_k_folds(stats_iter=1,
nb_folds=4,
stats_iter_random_states=np.random.RandomState(42))
self.assertEqual(folds_list[0].n_splits, 4)
self.assertEqual(len(folds_list), 1)
def test_multple_iters(self):
folds_list = execution.gen_k_folds(stats_iter=2,
nb_folds=4,
stats_iter_random_states=[np.random.RandomState(42), np.random.RandomState(43)])
self.assertEqual(folds_list[0].n_splits, 4)
self.assertEqual(len(folds_list), 2)
def test_list_rs(self):
folds_list = execution.gen_k_folds(stats_iter=1,
nb_folds=4,
stats_iter_random_states=[np.random.RandomState(42)])
self.assertEqual(folds_list[0].n_splits, 4)
self.assertEqual(len(folds_list), 1)
class Test_init_views(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.random_state = np.random.RandomState(42)
cls.statsIter = 1
@classmethod
def tearDownClass(cls):
pass
def test_simple(self):
views, views_indices, all_views = execution.init_views(test_dataset, ["ViewN1", "ViewN2"])
self.assertEqual(views, ["ViewN1", "ViewN2"])
self.assertEqual(views_indices, [1,2])
self.assertEqual(all_views, ["ViewN0", "ViewN1", "ViewN2"])
views, views_indices, all_views = execution.init_views(test_dataset,None)
self.assertEqual(views, ["ViewN0", "ViewN1", "ViewN2"])
self.assertEqual(views_indices, range(3))
self.assertEqual(all_views, ["ViewN0", "ViewN1", "ViewN2"])
class Test_find_dataset_names(unittest.TestCase):
@classmethod
def setUpClass(cls):
os.mkdir(tmp_path)
with open(os.path.join(tmp_path, "test.txt"), "w") as file_stream:
file_stream.write("test")
with open(os.path.join(tmp_path, "test1.txt"), "w") as file_stream:
file_stream.write("test")
@classmethod
def tearDownClass(cls):
rm_tmp()
def test_simple(self):
path, names = execution.find_dataset_names(tmp_path, ".txt", ["test"])
self.assertEqual(path, tmp_path)
self.assertEqual(names, ["test"])
path, names = execution.find_dataset_names(tmp_path, ".txt", ["test", 'test1'])
self.assertEqual(path, tmp_path)
self.assertEqual(names, ["test1", 'test'])
path, names = execution.find_dataset_names("examples/data", ".hdf5", ["all"])
self.assertEqual(names, ["doc_summit", "digits_doc"])
self.assertRaises(ValueError, execution.find_dataset_names, tmp_path+"test", ".txt",
["test"])
self.assertRaises(ValueError, execution.find_dataset_names, tmp_path, ".txt", ["ah"])
class Test_initStatsIterRandomStates(unittest.TestCase):
......
......@@ -17,6 +17,8 @@ omit = */tests/*
*/datasets/*
*declare_classifier.py
*make_file_config.py
*execute*
*versions*
[coverage:report]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment