From ce4209e65d0e9ac70645637c0e10510fe3259b47 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Fri, 24 Apr 2020 11:52:14 -0400 Subject: [PATCH] Tests coverage and automatic test suite" --- .gitignore | 2 + multiview_platform/datasets/base.py | 2 +- multiview_platform/execute.py | 3 +- .../exec_classif.py | 171 +------ .../metrics/framework.py | 78 ---- .../metrics/generic_score.py | 417 ------------------ .../metrics/jaccard_similarity_score.py | 27 -- .../metrics/roc_auc_score.py | 23 +- .../monoview/exec_classif_mono_view.py | 8 +- .../monoview/monoview_utils.py | 73 +-- .../monoview_classifiers/adaboost.py | 2 +- .../additions/SVCClassifier.py | 8 - .../multiview/exec_multiview.py | 13 +- .../multiview/multiview_utils.py | 45 +- .../additions/data_sample.py | 245 ---------- .../weighted_linear_early_fusion.py | 4 +- .../result_analysis/duration_analysis.py | 2 +- .../result_analysis/error_analysis.py | 4 +- .../result_analysis/feature_importances.py | 52 ++- .../result_analysis/metric_analysis.py | 6 +- .../mono_multi_view_classifiers/utils/base.py | 50 +-- .../utils/dataset.py | 135 +++--- .../utils/execution.py | 2 +- .../utils/hyper_parameter_search.py | 146 +++--- .../utils/multiclass.py | 102 +---- .../utils/multiview_result_analysis.py | 108 ++--- .../utils/organization.py | 2 +- multiview_platform/tests.py | 6 - multiview_platform/tests/test_exec_classif.py | 298 ++++--------- .../tests/test_metrics/test_accuracy_score.py | 14 - .../test_ExecClassifMonoView.py | 208 --------- .../test_mono_view/test_MonoviewUtils.py | 33 -- .../test_multi_view/test_multiview_utils.py | 42 ++ .../tests/test_utils/test_GetMultiviewDB.py | 4 +- .../tests/test_utils/test_base.py | 149 ++++++- .../tests/test_utils/test_configuration.py | 51 +-- .../tests/test_utils/test_dataset.py | 189 +++++++- multiview_platform/tests/utils.py | 11 +- 38 files changed, 816 insertions(+), 1919 deletions(-) delete mode 100644 multiview_platform/mono_multi_view_classifiers/metrics/framework.py delete mode 100644 multiview_platform/mono_multi_view_classifiers/metrics/generic_score.py delete mode 100644 multiview_platform/mono_multi_view_classifiers/metrics/jaccard_similarity_score.py delete mode 100644 multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/data_sample.py delete mode 100644 multiview_platform/tests.py delete mode 100644 multiview_platform/tests/test_metrics/test_accuracy_score.py delete mode 100644 multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py delete mode 100644 multiview_platform/tests/test_mono_view/test_MonoviewUtils.py diff --git a/.gitignore b/.gitignore index 226e3c14..372dd296 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ multiview_platform/examples/results/example_2_3/* multiview_platform/examples/results/example_3/* multiview_platform/examples/results/example_4/* multiview_platform/examples/results/example_5/* +multiview_platform/html_cov/ +multiview_platform/.coverage* \ No newline at end of file diff --git a/multiview_platform/datasets/base.py b/multiview_platform/datasets/base.py index 338f3e04..e559b866 100644 --- a/multiview_platform/datasets/base.py +++ b/multiview_platform/datasets/base.py @@ -1,9 +1,9 @@ +from __future__ import print_function import pickle import numpy as np import numpy.ma as ma from multiview_platform.datasets.data_sample import DataSample from six.moves import cPickle as pickle #for performance -from __future__ import print_function import numpy as np diff --git a/multiview_platform/execute.py b/multiview_platform/execute.py index 555b90d4..de9d5768 100644 --- a/multiview_platform/execute.py +++ b/multiview_platform/execute.py @@ -2,7 +2,8 @@ import os -def execute(config_path=None): + +def execute(config_path=None): # pragma: no cover from multiview_platform import versions as vs vs.test_versions() import sys diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index edffa453..b81a9304 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -28,7 +28,7 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def init_benchmark(cl_type, monoview_algos, multiview_algos, args): +def init_benchmark(cl_type, monoview_algos, multiview_algos): r"""Used to create a list of all the algorithm packages names used for the benchmark. First this function will check if the benchmark need mono- or/and multiview @@ -56,7 +56,7 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args): benchmark = {"monoview": {}, "multiview": {}} if "monoview" in cl_type: - if monoview_algos == ['all']: + if monoview_algos == ['all']: # pragma: no cover benchmark["monoview"] = [name for _, name, isPackage in pkgutil.iter_modules( monoview_classifiers.__path__) @@ -66,7 +66,7 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args): benchmark["monoview"] = monoview_algos if "multiview" in cl_type: - if multiview_algos == ["all"]: + if multiview_algos == ["all"]: # pragma: no cover benchmark["multiview"] = [name for _, name, isPackage in pkgutil.iter_modules( multiview_classifiers.__path__) @@ -77,7 +77,8 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args): def init_argument_dictionaries(benchmark, views_dictionary, - nb_class, init_kwargs, hps_method, hps_kwargs): + nb_class, init_kwargs, hps_method, + hps_kwargs): # pragma: no cover argument_dictionaries = {"monoview": [], "multiview": []} if benchmark["monoview"]: argument_dictionaries["monoview"] = init_monoview_exps( @@ -95,7 +96,7 @@ def init_argument_dictionaries(benchmark, views_dictionary, def init_multiview_exps(classifier_names, views_dictionary, nb_class, - kwargs_init, hps_method, hps_kwargs): + kwargs_init, hps_method, hps_kwargs): # pragma: no cover multiview_arguments = [] for classifier_name in classifier_names: arguments = get_path_dict(kwargs_init[classifier_name]) @@ -134,7 +135,7 @@ def init_multiview_exps(classifier_names, views_dictionary, nb_class, def init_monoview_exps(classifier_names, views_dictionary, nb_class, kwargs_init, hps_method, - hps_kwargs): + hps_kwargs): # pragma: no cover r"""Used to add each monoview exeperience args to the list of monoview experiences args. First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right @@ -245,16 +246,6 @@ def set_element(dictionary, path, value): return dictionary -# def multiple_args(classifier_configuration): -# """Checks if multiple values were provided for at least one arg""" -# listed_args = [type(value) == list and len(value) > 1 for key, value in -# classifier_configuration.items()] -# if True in listed_args: -# return True -# else: -# return False - - def get_path_dict(multiview_classifier_args): """This function is used to generate a dictionary with each key being the path to the value. @@ -290,108 +281,6 @@ def is_dict_in(dictionary): paths.append(key) return paths - -# def gen_multiple_kwargs_combinations(cl_kwrags): -# """ -# Generates all the possible combination of the asked args -# -# Parameters -# ---------- -# cl_kwrags : dict -# The arguments, with one at least having multiple values -# -# Returns -# ------- -# kwargs_combination : list -# The list of all the combinations of arguments -# -# reduced_kwargs_combination : list -# The reduced names and values of the arguments will be used in the naming -# process of the different classifiers -# -# """ -# values = list(cl_kwrags.values()) -# listed_values = [[_] if type(_) is not list else _ for _ in values] -# values_cartesian_prod = [_ for _ in itertools.product(*listed_values)] -# keys = cl_kwrags.keys() -# kwargs_combination = [dict((key, value) for key, value in zip(keys, values)) -# for values in values_cartesian_prod] -# -# reduce_dict = {DecisionTreeClassifier: "DT", } -# reduced_listed_values = [ -# [_ if type(_) not in reduce_dict else reduce_dict[type(_)] for _ in -# list_] for list_ in listed_values] -# reduced_values_cartesian_prod = [_ for _ in -# itertools.product(*reduced_listed_values)] -# reduced_kwargs_combination = [ -# dict((key, value) for key, value in zip(keys, values)) -# for values in reduced_values_cartesian_prod] -# return kwargs_combination, reduced_kwargs_combination - - -# def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, -# view_name=None, view_index=None, -# views_dictionary=None, -# framework="monoview"): -# """ -# Used in the case of mutliple arguments asked in the config file. -# Will combine the arguments to explore all the possibilities. -# -# Parameters -# ---------- -# nb_class : int, -# The number of classes in the dataset -# -# kwargs_init : dict -# The arguments given in the config file -# -# classifier : str -# The name of the classifier for which multiple arguments have been asked -# -# view_name : str -# The name of the view in consideration. -# -# view_index : int -# The index of the view in consideration -# -# views_dictionary : dict -# The dictionary of all the views indices and their names -# -# framework : str -# Either monoview or multiview -# -# Returns -# ------- -# args_dictionaries : list -# The list of all the possible combination of asked arguments -# -# """ -# if framework == "multiview": -# classifier_config = get_path_dict(kwargs_init[classifier]) -# else: -# classifier_config = kwargs_init[classifier] -# multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations( -# classifier_config) -# multiple_kwargs_dict = dict( -# (classifier + "_" + "_".join( -# map(str, list(reduced_dictionary.values()))), dictionary) -# for reduced_dictionary, dictionary in -# zip(reduced_multiple_kwargs_list, multiple_kwargs_list)) -# args_dictionnaries = [gen_single_monoview_arg_dictionary(classifier_name, -# arguments, -# nb_class, -# view_index=view_index, -# view_name=view_name) -# if framework == "monoview" else -# gen_single_multiview_arg_dictionary(classifier_name, -# arguments, -# nb_class, -# views_dictionary=views_dictionary) -# for classifier_name, arguments -# in multiple_kwargs_dict.items()] -# return args_dictionnaries - - def init_kwargs(args, classifiers_names, framework="monoview"): r"""Used to init kwargs thanks to a function in each monoview classifier package. @@ -457,44 +346,6 @@ def init_kwargs_func(args, benchmark): return kwargs -# def init_multiview_kwargs(args, classifiers_names): -# logging.debug("Start:\t Initializing multiview classifiers arguments") -# multiview_kwargs = {} -# for classifiers_name in classifiers_names: -# try: -# getattr(multiview_classifiers, classifiers_name) -# except AttributeError: -# raise AttributeError( -# classifiers_name + " is not implemented in mutliview_classifiers, " -# "please specify the name of the coressponding .py " -# "file in mutliview_classifiers") -# multiview_kwargs[classifiers_name] = args[classifiers_name] -# logging.debug("Done:\t Initializing multiview classifiers arguments") -# return multiview_kwargs - - -# def init_multiview_arguments(args, benchmark, views, views_indices, -# argument_dictionaries, random_state, directory, -# results_monoview, classification_indices): -# """Used to add each monoview exeperience args to the list of monoview experiences args""" -# logging.debug("Start:\t Initializing multiview classifiers arguments") -# multiview_arguments = [] -# if "multiview" in benchmark: -# for multiview_algo_name in benchmark["multiview"]: -# mutliview_module = getattr(multiview_classifiers, -# multiview_algo_name) -# -# multiview_arguments += mutliview_module.getArgs(args, benchmark, -# views, views_indices, -# random_state, -# directory, -# results_monoview, -# classification_indices) -# argument_dictionaries["multiview"] = multiview_arguments -# logging.debug("Start:\t Initializing multiview classifiers arguments") -# return argument_dictionaries - - def arange_metrics(metrics, metric_princ): """Used to get the metrics list in the right order so that the first one is the principal metric specified in args @@ -515,7 +366,7 @@ def arange_metrics(metrics, metric_princ): if metric_princ in metrics: metrics = dict((key, value) if not key == metric_princ else (key+"*", value) for key, value in metrics.items()) else: - raise AttributeError("{} not in metric pool ({})".format(metric_princ, + raise ValueError("{} not in metric pool ({})".format(metric_princ, metrics)) return metrics @@ -688,7 +539,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, argument_dictionaries=None, benchmark=None, views=None, views_indices=None, flag=None, labels=None, - track_tracebacks=False): + track_tracebacks=False): # pragma: no cover results_monoview, labels_names = benchmark_init(directory, classification_indices, labels, @@ -757,7 +608,7 @@ def exec_benchmark(nb_cores, stats_iter, directory, metrics, dataset_var, track_tracebacks, exec_one_benchmark_mono_core=exec_one_benchmark_mono_core, analyze=analyze, delete=delete_HDF5, - analyze_iterations=analyze_iterations): + analyze_iterations=analyze_iterations): # pragma: no cover r"""Used to execute the needed benchmark(s) on multicore or mono-core functions. Parameters @@ -841,7 +692,7 @@ def exec_benchmark(nb_cores, stats_iter, return results_mean_stds -def exec_classif(arguments): +def exec_classif(arguments): # pragma: no cover """ Runs the benchmark with the given arguments diff --git a/multiview_platform/mono_multi_view_classifiers/metrics/framework.py b/multiview_platform/mono_multi_view_classifiers/metrics/framework.py deleted file mode 100644 index 6351bac8..00000000 --- a/multiview_platform/mono_multi_view_classifiers/metrics/framework.py +++ /dev/null @@ -1,78 +0,0 @@ -"""In ths file, we explain how to add a metric to the platform. - -In order to do that, on needs to add a file with the following functions -which are mandatory for the metric to work with the platform. -""" -import warnings - -warnings.warn("the framework module is deprecated", DeprecationWarning, - stacklevel=2) -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -def score(y_true, y_pred, multiclass=False, **kwargs): - """Get the metric's score from the ground truth (``y_true``) and predictions (``y_pred``). - - Parameters - ---------- - y_true : array-like, shape = (n_samples,) - Target values (class labels). - - y_pred : array-like, shape = (n_samples,) - Predicted target values (class labels). - - multiclass : boolean (default=False) - Parameter specifying whether the target values are multiclass or not. - - kwargs : dict - The arguments stored in this dictionary must be keyed by string of - integers as "0", .., etc and decrypted in the function - - Returns - ------- - score : float - Returns the score of the prediction. - """ - score = 0.0 - return score - - -def get_scorer(**kwargs): - """Get the metric's scorer as in the sklearn.metrics package. - - Parameters - ---------- - kwargs : dict - The arguments stored in this dictionary must be keyed by string of - integers as "0", .., etc and decrypted in the function. These arguments - are a configuration of the metric. - - Returns - ------- - scorer : object - Callable object that returns a scalar score; greater is better. (cf sklearn.metrics.make_scorer) - """ - scorer = None - return scorer - - -def get_config(**kwargs): - """Get the metric's configuration as a string. - - Parameters - ---------- - kwargs : dict - The arguments stored in this dictionary must be keyed by string of - integers as "0", .., etc and decrypted in the function. These arguments - are a configuration of the metric. - - Returns - ------- - configString : string - The string describing the metric's configuration. - """ - - config_tring = "This is a framework" - return config_tring diff --git a/multiview_platform/mono_multi_view_classifiers/metrics/generic_score.py b/multiview_platform/mono_multi_view_classifiers/metrics/generic_score.py deleted file mode 100644 index 81d896d8..00000000 --- a/multiview_platform/mono_multi_view_classifiers/metrics/generic_score.py +++ /dev/null @@ -1,417 +0,0 @@ -from sklearn.metrics import make_scorer - - -def score(y_true, y_pred, multiclass=False, type='f1_score', **kwargs): - """Arguments: - y_true: real labels - y_pred: predicted labels - - Keyword Arguments: - "0": weights to compute accuracy - - Returns: - Weighted accuracy score for y_true, y_pred""" - _type_names = ['accuracy_score', 'f1_score', 'fbeta_score', 'hamming_loss', - 'jaccard_similarity_score', 'precision_score', - 'recall_score', - 'roc_auc_score', 'zero_one_loss', 'zero_one_loss', - 'framework'] - if type not in _type_names: - raise NameError('type must be in :' + _type_names) - version = -1 - try: - kwargs0 = kwargs["0"] - except Exception: - kwargs0 = None - if type.startswith('matthews_corrcoef'): - from sklearn.metrics import matthews_corrcoef - score = matthews_corrcoef(y_true, y_pred) - elif type.startswith('accuracy_score'): - version = 0 - from sklearn.metrics import accuracy_score - score = accuracy_score(y_true, y_pred, sample_weight=kwargs0) - elif type.startswith('zero_one_loss'): - from sklearn.metrics import zero_one_loss - score = zero_one_loss(y_true, y_pred, sample_weight=kwargs0) - elif type.startswith('hamming_loss'): - from sklearn.metrics import hamming_loss - classes = kwargs0 - score = hamming_loss(y_true, y_pred) - elif type.startswith('f1_score'): - version = 1 - from sklearn.metrics import f1_score - - try: - labels = kwargs["1"] - except: - labels = None - try: - pos_label = kwargs["2"] - except: - pos_label = 1 - try: - average = kwargs["3"] - except Exception: - if multiclass: - average = "micro" - else: - average = "binary" - score = f1_score(y_true, y_pred, sample_weight=kwargs0, labels=labels, - pos_label=pos_label, average=average) - elif type.startswith('fbeta_score'): - from sklearn.metrics import fbeta_score - try: - beta = kwargs["1"] - except Exception: - beta = 1.0 - try: - labels = kwargs["2"] - except Exception: - labels = None - try: - pos_label = kwargs["3"] - except Exception: - pos_label = 1 - try: - average = kwargs["4"] - except Exception: - if multiclass: - average = "micro" - else: - average = "binary" - score = fbeta_score(y_true, y_pred, beta, sample_weight=kwargs0, - labels=labels, pos_label=pos_label, - average=average) - elif type.startswith('jaccard_similarity_score'): - from sklearn.metrics import jaccard_similarity_score - score = jaccard_similarity_score(y_true, y_pred, sample_weight=kwargs0) - elif type.startswith('log_loss'): - from sklearn.metrics import log_loss - try: - eps = kwargs["1"] - except Exception: - eps = 1e-15 - score = log_loss(y_true, y_pred, sample_weight=kwargs0, eps=eps) - elif type.startswith('precision_score'): - from sklearn.metrics import precision_score - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except Exception: - if multiclass: - average = "micro" - else: - average = "binary" - score = precision_score(y_true, y_pred, - sample_weight=kwargs0, labels=labels, - pos_label=pos_label, average=average) - elif type.startswith('recall_score'): - from sklearn.metrics import recall_score - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except Exception: - if multiclass: - average = "micro" - else: - average = "binary" - score = recall_score(y_true, y_pred, sample_weight=kwargs0, - labels=labels, - pos_label=pos_label, average=average) - elif type.startswith('roc_auc_score'): - from sklearn.metrics import roc_auc_score - from sklearn.preprocessing import MultiLabelBinarizer - try: - average = kwargs["1"] - except Exception: - if multiclass: - average = "micro" - else: - average = None - if multiclass: - mlb = MultiLabelBinarizer() - y_true = mlb.fit_transform([(label) for label in y_true]) - y_pred = mlb.fit_transform([(label) for label in y_pred]) - score = roc_auc_score(y_true, y_pred, - sample_weight=kwargs0, average=average) - else: - score = 0.0 - return score - - -def get_scorer(type='f1_score', **kwargs): - """Keyword Arguments: - "0": weights to compute accuracy - - Returns: - A weighted sklearn scorer for accuracy""" - _type_names = ['accuracy_score', 'f1_score', 'fbeta_score', 'hamming_loss', - 'jaccard_similarity_score', 'precision_score', - 'recall_score', - 'roc_auc_score', 'zero_one_loss', 'zero_one_loss', - 'framework'] - if type not in _type_names: - raise NameError('type must be in :' + _type_names) - try: - sample_weight = kwargs["0"] - except Exception: - sample_weight = None - if type.startswith('accuracy_score'): - version = 0 - from sklearn.metrics import accuracy_score as metric - return make_scorer(metric, greater_is_better=True, - sample_weight=sample_weight) - elif type.startswith('f1_score'): - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except: - average = "binary" - from sklearn.metrics import f1_score as metric - return make_scorer(metric, greater_is_better=True, - sample_weight=sample_weight, labels=labels, - pos_label=pos_label, average=average) - elif type.startswith('fbeta_score'): - try: - beta = kwargs["1"] - except Exception: - beta = 1.0 - try: - labels = kwargs["2"] - except Exception: - labels = None - try: - pos_label = kwargs["3"] - except Exception: - pos_label = 1 - try: - average = kwargs["4"] - except Exception: - average = "binary" - from sklearn.metrics import fbeta_score as metric - return make_scorer(metric, greater_is_better=True, beta=beta, - sample_weight=sample_weight, labels=labels, - pos_label=pos_label, average=average) - elif type.startswith('hamming_loss'): - try: - classes = kwargs["0"] - except Exception: - classes = None - from sklearn.metrics import hamming_loss as metric - return make_scorer(metric, greater_is_better=False, classes=classes) - elif type.startswith('jaccard_similarity_score'): - from sklearn.metrics import jaccard_similarity_score as metric - return make_scorer(metric, greater_is_better=True, - sample_weight=sample_weight) - - elif type.startswith('log_loss'): - from sklearn.metrics import log_loss as metric - - try: - eps = kwargs["1"] - except Exception: - eps = 1e-15 - return make_scorer(metric, greater_is_better=False, - sample_weight=sample_weight, eps=eps) - elif type.startswith('matthews_corrcoef'): - from sklearn.metrics import matthews_corrcoef as metric - return make_scorer(metric, greater_is_better=True) - - elif type.startswith('precision_score'): - from sklearn.metrics import precision_score as metric - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except Exception: - average = "binary" - return make_scorer(metric, greater_is_better=True, - sample_weight=sample_weight, labels=labels, - pos_label=pos_label, - average=average) - elif type.startswith('recall_score'): - try: - sample_weight = kwargs["0"] - except Exception: - sample_weight = None - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except Exception: - average = "binary" - from sklearn.metrics import recall_score as metric - return make_scorer(metric, greater_is_better=True, - sample_weight=sample_weight, labels=labels, - pos_label=pos_label, - average=average) - elif type.startswith('roc_auc_score'): - try: - average = kwargs["1"] - except: - average = "micro" - from sklearn.metrics import roc_auc_score as metric - return make_scorer(metric, greater_is_better=True, - sample_weight=sample_weight, average=average) - elif type.startswith('zero_one_loss'): - from sklearn.metrics import zero_one_loss as metric - return make_scorer(metric, greater_is_better=False, - sample_weight=sample_weight) - else: - scorer = None - return scorer - - -def get_config(type='f1_score', **kwargs): - _type_names = ['accuracy_score', 'f1_score', 'fbeta_score', 'hamming_loss', - 'jaccard_similarity_score', 'precision_score', - 'recall_score', - 'roc_auc_score', 'zero_one_loss', 'zero_one_loss', - 'framework'] - if type not in _type_names: - raise NameError('type must be in :' + _type_names) - try: - sample_weight = kwargs["0"] - except Exception: - sample_weight = None - if type.startswith('accuracy_score'): - config_string = "Accuracy score using " + str( - sample_weight) + " as sample_weights (higher is better)" - elif type.startswith('f1_score'): - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except Exception: - average = "binary" - config_string = "F1 score using " + str( - sample_weight) + " as sample_weights, " + str( - labels) + " as labels, " + str( - pos_label) \ - + " as pos_label, " + average + " as average (higher is better)" - - elif type.startswith('fbeta_score'): - try: - beta = kwargs["1"] - except Exception: - beta = 1.0 - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except Exception: - average = "binary" - config_string = "F-beta score using " + str( - sample_weight) + " as sample_weights, " + str( - labels) + " as labels, " + str(pos_label) \ - + " as pos_label, " + average + " as average, " + str( - beta) + " as beta (higher is better)" - elif type.startswith('hamming_loss'): - try: - classes = kwargs["0"] - except Exception: - classes = None - config_string = "Hamming loss using " + str( - classes) + " as classes (lower is better)" - elif type.startswith('jaccard_similarity_score'): - config_string = "Jaccard_similarity score using " + str( - sample_weight) + " as sample_weights (higher is better)" - elif type.startswith('log_loss'): - try: - eps = kwargs["1"] - except Exception: - eps = 1e-15 - config_string = "Log loss using " + str( - sample_weight) + " as sample_weights, " + str( - eps) + " as eps (lower is better)" - elif type.startswith('matthews_corrcoef'): - config_string = "Matthews correlation coefficient (higher is better)" - elif type.startswith('precision_score'): - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except: - average = "binary" - config_string = "Precision score using " + str( - sample_weight) + " as sample_weights, " + str( - labels) + " as labels, " + str(pos_label) \ - + " as pos_label, " + average + " as average (higher is better)" - elif type.startswith('recall_score'): - try: - labels = kwargs["1"] - except Exception: - labels = None - try: - pos_label = kwargs["2"] - except Exception: - pos_label = 1 - try: - average = kwargs["3"] - except Exception: - average = "binary" - configString = "Recall score using " + str( - sample_weight) + " as sample_weights, " + str( - labels) + " as labels, " + str(pos_label) \ - + " as pos_label, " + average + "as average (higher is " \ - "better) " - elif type.startswith('roc_auc_score'): - configString = "ROC_AUC score using " + str( - sample_weight) + " as sample_weights, " + average + " as average (higher is better)" - elif type.startswith('zero_one_loss'): - configString = "Zero_one loss using " + str( - sample_weight) + " as sample_weights (lower is better)" - else: - config_tring = "This is a framework" - return configString diff --git a/multiview_platform/mono_multi_view_classifiers/metrics/jaccard_similarity_score.py b/multiview_platform/mono_multi_view_classifiers/metrics/jaccard_similarity_score.py deleted file mode 100644 index 2d7b639a..00000000 --- a/multiview_platform/mono_multi_view_classifiers/metrics/jaccard_similarity_score.py +++ /dev/null @@ -1,27 +0,0 @@ -import warnings - -from sklearn.metrics import jaccard_similarity_score as metric -from sklearn.metrics import make_scorer - -warnings.warn("the jaccard_similarity_score module is deprecated", - DeprecationWarning, - stacklevel=2) -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -def score(y_true, y_pred, multiclass=False, **kwargs): - score = metric(y_true, y_pred, **kwargs) - return score - - -def get_scorer(**kwargs): - return make_scorer(metric, greater_is_better=True, - **kwargs) - - -def get_config(**kwargs): - config_string = "Jaccard_similarity score using {} (higher is better)".format( - kwargs) - return config_string diff --git a/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py b/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py index 927de412..4ab88b73 100644 --- a/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py +++ b/multiview_platform/mono_multi_view_classifiers/metrics/roc_auc_score.py @@ -17,32 +17,15 @@ def score(y_true, y_pred, multiclass=False, **kwargs): y_true = mlb.fit_transform([(label) for label in y_true]) y_pred = mlb.fit_transform([(label) for label in y_pred]) - score = metric(y_true, y_pred, sample_weight=sample_weight, average=average) + score = metric(y_true, y_pred, **kwargs) return score def get_scorer(**kwargs): - try: - sample_weight = kwargs["0"] - except: - sample_weight = None - try: - average = kwargs["1"] - except: - average = "micro" return make_scorer(metric, greater_is_better=True, - sample_weight=sample_weight, average=average) + **kwargs) def get_config(**kwargs): - try: - sample_weight = kwargs["0"] - except: - sample_weight = None - try: - average = kwargs["3"] - except Exception: - average = "micro" - configString = "ROC_AUC score using " + str( - sample_weight) + " as sample_weights, " + average + " as average (higher is better)" + configString = "ROC_AUC score using {}".format(kwargs) return configString diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 41835650..cbfd5d5f 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -33,7 +33,7 @@ def exec_monoview_multicore(directory, name, labels_names, path, random_state, labels, hyper_param_search="randomized_search", metrics=[["accuracy_score", None]], n_iter=30, - **args): + **args): # pragma: no cover dataset_var = HDF5Dataset( hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r")) @@ -52,8 +52,8 @@ def exec_monoview_multicore(directory, name, labels_names, def exec_monoview(directory, X, Y, database_name, labels_names, classification_indices, k_folds, nb_cores, databaseType, path, - random_state, hyper_param_search="randomized_search", - metrics={"accuracy_score":{}}, n_iter=30, view_name="", + random_state, hyper_param_search="Random", + metrics={"accuracy_score*":{}}, n_iter=30, view_name="", hps_kwargs={}, **args): logging.debug("Start:\t Loading data") kwargs, \ @@ -223,7 +223,7 @@ def get_hyper_params(classifier_module, search_method, classifier_module_name, def save_results(string_analysis, output_file_name, full_labels_pred, y_train_pred, - y_train, images_analysis, y_test, confusion_matrix): + y_train, images_analysis, y_test, confusion_matrix): # pragma: no cover logging.info(string_analysis) output_text_file = open(output_file_name + 'summary.txt', 'w') output_text_file.write(string_analysis) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index 076044b8..dcecfa6f 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -6,6 +6,7 @@ from matplotlib.ticker import FuncFormatter from scipy.stats import uniform, randint from ..utils.base import BaseClassifier, ResultAnalyser +from ..utils.hyper_parameter_search import CustomRandint, CustomUniform # Author-Info __author__ = "Baptiste Bauvin" @@ -75,41 +76,41 @@ def gen_test_folds_preds(X_train, y_train, KFolds, estimator): return test_folds_preds -class CustomRandint: - """Used as a distribution returning a integer between low and high-1. - It can be used with a multiplier agrument to be able to perform more complex generation - for example 10 e -(randint)""" - - def __init__(self, low=0, high=0, multiplier=""): - self.randint = randint(low, high) - self.multiplier = multiplier - - def rvs(self, random_state=None): - randinteger = self.randint.rvs(random_state=random_state) - if self.multiplier == "e-": - return 10 ** -randinteger - else: - return randinteger - - def get_nb_possibilities(self): - return self.randint.b - self.randint.a - - -class CustomUniform: - """Used as a distribution returning a float between loc and loc + scale.. - It can be used with a multiplier agrument to be able to perform more complex generation - for example 10 e -(float)""" - - def __init__(self, loc=0, state=1, multiplier=""): - self.uniform = uniform(loc, state) - self.multiplier = multiplier - - def rvs(self, random_state=None): - unif = self.uniform.rvs(random_state=random_state) - if self.multiplier == 'e-': - return 10 ** -unif - else: - return unif +# class CustomRandint: +# """Used as a distribution returning a integer between low and high-1. +# It can be used with a multiplier agrument to be able to perform more complex generation +# for example 10 e -(randint)""" +# +# def __init__(self, low=0, high=0, multiplier=""): +# self.randint = randint(low, high) +# self.multiplier = multiplier +# +# def rvs(self, random_state=None): +# randinteger = self.randint.rvs(random_state=random_state) +# if self.multiplier == "e-": +# return 10 ** -randinteger +# else: +# return randinteger +# +# def get_nb_possibilities(self): +# return self.randint.b - self.randint.a +# +# +# class CustomUniform: +# """Used as a distribution returning a float between loc and loc + scale.. +# It can be used with a multiplier agrument to be able to perform more complex generation +# for example 10 e -(float)""" +# +# def __init__(self, loc=0, state=1, multiplier=""): +# self.uniform = uniform(loc, state) +# self.multiplier = multiplier +# +# def rvs(self, random_state=None): +# unif = self.uniform.rvs(random_state=random_state) +# if self.multiplier == 'e-': +# return 10 ** -unif +# else: +# return unif class BaseMonoviewClassifier(BaseClassifier): @@ -179,7 +180,7 @@ class MonoviewResult(object): def get_accuracy_graph(plotted_data, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, - boosting_bound=None, set="train", zero_to_one=True): + boosting_bound=None, set="train", zero_to_one=True): # pragma: no cover if type(name) is not str: name = " ".join(name.getConfig().strip().split(" ")[:2]) f, ax = plt.subplots(nrows=1, ncols=1) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py index e32d0ae9..88a042ec 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py @@ -128,7 +128,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): [step_pred for step_pred in self.staged_predict(X)]) return pred - def get_interpretation(self, directory, base_file_name, y_test, multi_class=False): + def get_interpretation(self, directory, base_file_name, y_test, multi_class=False): # pragma: no cover interpretString = "" interpretString += self.get_feature_importance(directory, base_file_name) interpretString += "\n\n Estimator error | Estimator weight\n" diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/additions/SVCClassifier.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/additions/SVCClassifier.py index 76220f21..06d6da20 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/additions/SVCClassifier.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/additions/SVCClassifier.py @@ -15,11 +15,3 @@ class SVCClassifier(SVC): ) self.classed_params = [] self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - interpretString = "" - return interpretString diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 8748b795..61d0bf16 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -46,8 +46,8 @@ def init_constants(kwargs, classification_indices, metrics, """ views = kwargs["view_names"] views_indices = kwargs["view_indices"] - if not metrics: - metrics = [["f1_score", None]] + if metrics is None: + metrics = {"f1_score*":{}} classifier_name = kwargs["classifier_name"] classifier_config = kwargs[classifier_name] learning_rate = len(classification_indices[0]) / float( @@ -68,11 +68,11 @@ def init_constants(kwargs, classification_indices, metrics, output_file_name = os.path.join(directory, base_file_name) return classifier_name, t_start, views_indices, \ classifier_config, views, learning_rate, labels, output_file_name,\ - directory, base_file_name + directory, base_file_name, metrics def save_results(string_analysis, images_analysis, output_file_name, - confusion_matrix): + confusion_matrix): # pragma: no cover """ Save results in derectory @@ -128,7 +128,7 @@ def exec_multiview_multicore(directory, core_index, name, learning_rate, database_type, path, labels_dictionary, random_state, labels, hyper_param_search=False, nb_cores=1, metrics=None, - n_iter=30, **arguments): + n_iter=30, **arguments): # pragma: no cover """ execute multiview process on @@ -246,7 +246,8 @@ def exec_multiview(directory, dataset_var, name, classification_indices, labels, \ output_file_name,\ directory,\ - base_file_name = init_constants(kwargs, classification_indices, metrics, name, + base_file_name, \ + metrics = init_constants(kwargs, classification_indices, metrics, name, nb_cores, k_folds, dataset_var, directory) logging.debug("Done:\t Initialize constants") diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index 4144dd02..9ad93b6c 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -7,10 +7,10 @@ from ..utils.base import BaseClassifier, ResultAnalyser from ..utils.dataset import RAMDataset, get_examples_views_indices -class FakeEstimator(): - - def predict(self, X, example_indices=None, view_indices=None): - return np.zeros(example_indices.shape[0]) +# class FakeEstimator(): +# +# def predict(self, X, example_indices=None, view_indices=None): +# return np.zeros(example_indices.shape[0]) class BaseMultiviewClassifier(BaseClassifier): @@ -32,31 +32,31 @@ class BaseMultiviewClassifier(BaseClassifier): self.used_views = None @abstractmethod - def fit(self, X, y, train_indices=None, view_indices=None): + def fit(self, X, y, train_indices=None, view_indices=None): # pragma: no cover pass @abstractmethod - def predict(self, X, example_indices=None, view_indices=None): + def predict(self, X, example_indices=None, view_indices=None): # pragma: no cover pass - def _check_views(self, view_indices): + def _check_views(self, view_indices): # pragma: no cover if self.used_views is not None and not np.array_equal(np.sort(self.used_views), np.sort(view_indices)): raise ValueError('Used {} views to fit, and trying to predict on {}'.format(self.used_views, view_indices)) - def to_str(self, param_name): - if param_name in self.weird_strings: - string = "" - if "class_name" in self.weird_strings[param_name]: - string += self.get_params()[param_name].__class__.__name__ - if "config" in self.weird_strings[param_name]: - string += "( with " + self.get_params()[ - param_name].params_to_string() + ")" - else: - string += self.weird_strings[param_name]( - self.get_params()[param_name]) - return string - else: - return str(self.get_params()[param_name]) + # def to_str(self, param_name): + # if param_name in self.weird_strings: + # string = "" + # if "class_name" in self.weird_strings[param_name]: + # string += self.get_params()[param_name].__class__.__name__ + # if "config" in self.weird_strings[param_name]: + # string += "( with " + self.get_params()[ + # param_name].params_to_string() + ")" + # else: + # string += self.weird_strings[param_name]( + # self.get_params()[param_name]) + # return string + # else: + # return str(self.get_params()[param_name]) def accepts_multi_class(self, random_state, n_samples=10, dim=2, n_classes=3, n_views=2): @@ -66,7 +66,7 @@ class BaseMultiviewClassifier(BaseClassifier): n_samples, n_classes)) fake_mc_X = RAMDataset( - views=[random_state.random_integers(low=0, high=100, + views=[random_state.randint(low=0, high=101, size=(n_samples, dim)) for i in range(n_views)], labels=[class_index @@ -85,7 +85,6 @@ class BaseMultiviewClassifier(BaseClassifier): fake_mc_y = np.asarray(fake_mc_y) try: self.fit(fake_mc_X, fake_mc_y) - self.predict(fake_mc_X) return True except ValueError: return False diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/data_sample.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/data_sample.py deleted file mode 100644 index f584284b..00000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/data_sample.py +++ /dev/null @@ -1,245 +0,0 @@ -# -*- coding: utf-8 -*- - -"""This module contains the DataSample class and Splearn_array class -The DataSample class encapsulates a sample 's components -nbL and nbEx numbers, -Splearn_array class inherit from numpy ndarray and contains a 2d data ndarray -with the shape - -==== ==== ==== ==== ==== -x x x x -1 -x x x x x -x x -1 -1 -1 -x -1 -1 -1 -1 --1 -1 -1 -1 -1 -==== ==== ==== ==== ==== - -where -1 a indicates a empty cell, -the number nbL and nbEx and , the fourth dictionaries for sample, -prefix, suffix and factor where they are computed -""" -import numpy as np -import numpy.ma as ma - - -class Metriclearn_array(ma.MaskedArray, np.ndarray): - """ - - Parameters - ---------- - data: - view_ind: - - Returns - ------- - - """ - """Splearn_array inherit from numpy ndarray - - :Example: - - >>> from metriclearning.datasets.base import load_data - >>> from metriclearning.datasets.get_dataset_path import get_dataset_path - >>> train_file = '' # '4.spice.train' - >>> data = load_data(adr=get_dataset_path(train_file)) - >>> print(data.__class__) - >>> data.data - - """ - - def __new__(cls, data, view_ind=None): - - shapes_int = [] - index = 0 - new_data = np.ndarray([]) - n_views = len(data) - thekeys = None - view_ind_self = None - if isinstance(data, dict): - n_views = len(data) - for key, dat_values in data.items(): - new_data = cls._populate_new_data(index, dat_values, new_data) - shapes_int.append(dat_values.shape[0]) - index += 1 - thekeys = data.keys() - if isinstance(data, np.ndarray) and view_ind is None and data.ndim == 1: - n_views = data.shape[0] - for dat_values in data: - shapes_int.append(dat_values.shape[0]) - new_data = cls._populate_new_data(index, dat_values, new_data) - index += 1 - elif isinstance(data, np.ndarray) and data.ndim > 1: - if view_ind is not None: - n_views = view_ind.shape[0] - shapes_int = [in2 - in1 for in1, in2 in - zip(view_ind, view_ind[1:])] - elif view_ind is None: - if data.shape[1] > 1: - view_ind = np.array([0, data.shape[1] // 2, data.shape[1]]) - else: - view_ind = np.array([0, data.shape[1]]) - view_ind, n_views = cls._validate_views_ind(view_ind, - data.shape[1]) - new_data = data - view_ind_self = view_ind - - # obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls) - # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask) - if hasattr(new_data, "mask"): - obj = ma.masked_array(new_data.data, new_data.mask).view(cls) - elif hasattr(new_data, "data") and \ - hasattr(new_data, "shape") and len(new_data.shape) > 0: - obj = np.ndarray(new_data.data).view(cls) - else: - obj = np.recarray.__new__(cls, shape=(), dtype=np.float) - obj.views_ind = view_ind_self - obj.shapes_int = shapes_int - obj.n_views = n_views - obj.keys = thekeys - return obj - - @staticmethod - def _populate_new_data(index, dat_values, new_data): - if index == 0: - if isinstance(dat_values, ma.MaskedArray) or isinstance(dat_values, - np.ndarray): - new_data = dat_values - else: - new_data = dat_values.view( - ma.MaskedArray) # ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #( - new_data.mask = ma.nomask - else: - if isinstance(dat_values, ma.MaskedArray) or isinstance(dat_values, - np.ndarray): - new_data = ma.hstack((new_data, dat_values)) - else: - new_data = ma.hstack((new_data, dat_values.view( - ma.MaskedArray))) # ma.masked_array(dat_values, mask=ma.nomask - return new_data - - def __array_finalize__(self, obj): - if obj is None: return - super(Metriclearn_array, self).__array_finalize__(obj) - self.shapes_int = getattr(obj, 'shapes_int', None) - self.n_views = getattr(obj, 'n_views', None) - self.keys = getattr(obj, 'keys', None) - self.views_ind_self = getattr(obj, 'views_ind_self', None) - - def get_col(self, view, col): - start = np.sum(np.asarray(self.shapes_int[0: view])) - return self.data[start + col, :] - - def get_view(self, view): - start = int(np.sum(np.asarray(self.shapes_int[0: view]))) - stop = int(start + self.shapes_int[view]) - return self.data[:, start:stop] - - def set_view(self, view, data): - start = int(np.sum(np.asarray(self.shapes_int[0: view]))) - stop = int(start + self.shapes_int[view]) - if stop - start == data.shape[0] and data.shape[1] == self.data.shape[ - 1]: - self.data[:, start:stop] = data - else: - raise ValueError( - "shape of data does not match (%d, %d)" % stop - start % - self.data.shape[1]) - - def get_raw(self, view, raw): - start = np.sum(np.asarray(self.shapes_int[0: view])) - stop = np.sum(np.asarray(self.shapes_int[0: view + 1])) - return self.data[start:stop, raw] - - def add_view(self, v, data): - if len(self.shape) > 0: - if data.shape[0] == self.data.shape[0]: - indice = self.shapes_int[v] - np.insert(self.data, data, indice + 1, axis=0) - self.shapes_int.append(data.shape[1]) - self.n_views += 1 - else: - raise ValueError("New view can't initialazed") - # self.shapes_int= [data.shape[1]] - # self.data.reshape(data.shape[0],) - # np.insert(self.data, data, 0) - # self.n_views = 1 - - def _todict(self): - dico = {} - for view in range(self.n_views): - dico[view] = self.X.get_view(view) - return dico - - def _validate_views_ind(self, views_ind, n_features): - """Ensure proper format for views_ind and return number of views.""" - views_ind = np.array(views_ind) - if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1: - if np.any(views_ind[:-1] >= views_ind[1:]): - raise ValueError("Values in views_ind must be sorted.") - if views_ind[0] < 0 or views_ind[-1] > n_features: - raise ValueError("Values in views_ind are not in a correct " - + "range for the provided data.") - self.view_mode_ = "slices" - n_views = views_ind.shape[0] - 1 - else: - raise ValueError("The format of views_ind is not " - + "supported.") - - return (views_ind, n_views) - - -class DataSample(dict): - """ A DataSample instance - - :Example: - - >>> from metriclearning.datasets.base import load_dict - >>> from metriclearning.datasets.tests.get_dataset_path import get_dataset_path - >>> file = 'input_x_dic.pkl' # '4.spice.train' - >>> data = load_dict(adr=get_dataset_path(file)) - >>> print - (data.__class__) - - >>> data.data - - - Input: - - :param string adr: adresse and name of the loaden file - :param string type: (default value = 'SPiCe') indicate - the structure of the file - :param lrows: number or list of rows, - a list of strings if partial=True; - otherwise, based on self.pref if version="classic" or - "prefix", self.fact otherwise - :type lrows: int or list of int - :param lcolumns: number or list of columns - a list of strings if partial=True ; - otherwise, based on self.suff if version="classic" or "suffix", - self.fact otherwise - :type lcolumns: int or list of int - :param string version: (default = "classic") version name - :param boolean partial: (default value = False) build of partial - - """ - - def __init__(self, data=None, **kwargs): - - # The dictionary that contains the sample - super(DataSample, self).__init__(kwargs) - self._data = None # Metriclearn_array(np.zeros((0,0))) - if data is not None: - self._data = Metriclearn_array(data) - - @property - def data(self): - """Metriclearn_array""" - - return self._data - - @data.setter - def data(self, data): - if isinstance(data, ( - Metriclearn_array, np.ndarray, ma.MaskedArray, np.generic)): - self._data = data - else: - raise TypeError("sample should be a Metriclearn_array.") diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py index d04b2ef7..6635119f 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -50,8 +50,8 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier): self.classed_params = [] self.weird_strings = {} - def set_params(self, monoview_classifier_name=None, - monoview_classifier_config=None, **params): + def set_params(self, monoview_classifier_name="decision_tree", + monoview_classifier_config={}, **params): self.monoview_classifier_name = monoview_classifier_name self.monoview_classifier = self.init_monoview_estimator( monoview_classifier_name, diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/duration_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/duration_analysis.py index ac315880..fb3a539c 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis/duration_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/duration_analysis.py @@ -14,7 +14,7 @@ def get_duration(results): "pred"] = classifier_result.pred_duration return df -def plot_durations(durations, directory, database_name, durations_stds=None): +def plot_durations(durations, directory, database_name, durations_stds=None): # pragma: no cover file_name = os.path.join(directory, database_name + "-durations") durations.to_csv(file_name+"_dataframe.csv") fig = plotly.graph_objs.Figure() diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py index e989ee36..f78955dc 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py @@ -157,7 +157,7 @@ def gen_error_data_glob(iter_results, stats_iter): def plot_2d(data, classifiers_names, nb_classifiers, file_name, labels=None, - stats_iter=1, use_plotly=True, example_ids=None): + stats_iter=1, use_plotly=True, example_ids=None): # pragma: no cover r"""Used to generate a 2D plot of the errors. Parameters @@ -232,7 +232,7 @@ def plot_2d(data, classifiers_names, nb_classifiers, file_name, labels=None, def plot_errors_bar(error_on_examples, nb_examples, file_name, - use_plotly=True, example_ids=None): + use_plotly=True, example_ids=None): # pragma: no cover r"""Used to generate a barplot of the muber of classifiers that failed to classify each examples Parameters diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/feature_importances.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/feature_importances.py index a86ccf46..c30a1c63 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis/feature_importances.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/feature_importances.py @@ -52,29 +52,33 @@ def publish_feature_importances(feature_importances, directory, database_name, feature_std = pd.DataFrame(data=np.zeros(feature_importance.shape), index=feature_importance.index, columns=feature_importance.columns) - feature_importance.to_csv(file_name + "_dataframe.csv") - hover_text = [["-Feature :" + str(feature_name) + - "<br>-Classifier : " + classifier_name + - "<br>-Importance : " + str( - feature_importance.loc[feature_name][classifier_name]) + - "<br>-STD : " + str( - feature_std.loc[feature_name][classifier_name]) - for classifier_name in list(feature_importance.columns)] - for feature_name in list(feature_importance.index)] - fig = plotly.graph_objs.Figure(data=plotly.graph_objs.Heatmap( - x=list(feature_importance.columns), - y=list(feature_importance.index), - z=feature_importance.values, - text=hover_text, - hoverinfo=["text"], - colorscale="Greys", - reversescale=False)) - fig.update_layout( - xaxis={"showgrid": False, "showticklabels": False, "ticks": ''}, - yaxis={"showgrid": False, "showticklabels": False, "ticks": ''}) - fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', - plot_bgcolor='rgba(0,0,0,0)') - plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False) + plot_feature_importances(file_name, feature_importance, feature_std) - del fig + +def plot_feature_importances(file_name, feature_importance, feature_std): # pragma: no cover + feature_importance.to_csv(file_name + "_dataframe.csv") + hover_text = [["-Feature :" + str(feature_name) + + "<br>-Classifier : " + classifier_name + + "<br>-Importance : " + str( + feature_importance.loc[feature_name][classifier_name]) + + "<br>-STD : " + str( + feature_std.loc[feature_name][classifier_name]) + for classifier_name in list(feature_importance.columns)] + for feature_name in list(feature_importance.index)] + fig = plotly.graph_objs.Figure(data=plotly.graph_objs.Heatmap( + x=list(feature_importance.columns), + y=list(feature_importance.index), + z=feature_importance.values, + text=hover_text, + hoverinfo=["text"], + colorscale="Greys", + reversescale=False)) + fig.update_layout( + xaxis={"showgrid": False, "showticklabels": False, "ticks": ''}, + yaxis={"showgrid": False, "showticklabels": False, "ticks": ''}) + fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)') + plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False) + + del fig diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/metric_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/metric_analysis.py index 32ac4830..94c50be9 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis/metric_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/metric_analysis.py @@ -170,7 +170,7 @@ def plot_metric_scores(train_scores, test_scores, names, nb_results, metric_name, file_name, tag="", train_STDs=None, test_STDs=None, - use_plotly=True): + use_plotly=True): # pragma: no cover r"""Used to plot and save the score barplot for a specific metric. Parameters @@ -263,7 +263,7 @@ def plot_metric_scores(train_scores, test_scores, names, nb_results, def plot_class_metric_scores(class_test_scores, class_file_name, labels_names, classifier_names, metric_name, - stds=None, tag=""): + stds=None, tag=""): # pragma: no cover fig = plotly.graph_objs.Figure() for lab_index, scores in enumerate(class_test_scores): if stds is None: @@ -312,7 +312,7 @@ def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35): return fig_kwargs, bar_width -def autolabel(rects, ax, set=1, std=None): +def autolabel(rects, ax, set=1, std=None): # pragma: no cover r"""Used to print the score below the bars. Parameters diff --git a/multiview_platform/mono_multi_view_classifiers/utils/base.py b/multiview_platform/mono_multi_view_classifiers/utils/base.py index d35c40fc..34894b5a 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/base.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/base.py @@ -62,14 +62,14 @@ class BaseClassifier(BaseEstimator, ): if self.param_names: return self.__class__.__name__ + " with " + self.params_to_string() else: - return self.__class__.__name__ + "with no config." + return self.__class__.__name__ + " with no config." def get_base_estimator(self, base_estimator, estimator_config): if estimator_config is None: estimator_config = {} if base_estimator is None: return DecisionTreeClassifier(**estimator_config) - if isinstance(base_estimator, str): + if isinstance(base_estimator, str): # pragma: no cover if base_estimator == "DecisionTreeClassifier": return DecisionTreeClassifier(**estimator_config) elif base_estimator == "AdaboostClassifier": @@ -89,11 +89,12 @@ class BaseClassifier(BaseEstimator, ): Formats a parameter into a string """ if param_name in self.weird_strings: - if self.weird_strings[param_name] == "class_name": - return self.get_params()[param_name].__class__.__name__ - else: - return self.weird_strings[param_name]( - self.get_params()[param_name]) + string = "" + if "class_name" in self.weird_strings[param_name]: + string += self.get_params()[param_name].__class__.__name__ + if "config" in self.weird_strings[param_name]: + string += "( with " + self.get_params()[ + param_name].params_to_string() + ")" else: return str(self.get_params()[param_name]) @@ -118,22 +119,21 @@ class BaseClassifier(BaseEstimator, ): "n_samples ({}) / n_class ({}) must be over 1".format( n_samples, n_classes)) - if hasattr(self, "accepts_mutli_class"): - return self.accepts_multi_class - else: - fake_mc_X = random_state.random_integers(low=0, high=100, - size=(n_samples, dim)) - fake_mc_y = [class_index - for _ in range(int(n_samples / n_classes)) - for class_index in range(n_classes)] - fake_mc_y += [0 for _ in range(n_samples % n_classes)] - fake_mc_y = np.asarray(fake_mc_y) - try: - self.fit(fake_mc_X, fake_mc_y) - self.predict(fake_mc_X) - return True - except ValueError: - return False + # if hasattr(self, "accepts_mutli_class"): + # return self.accepts_multi_class + fake_mc_X = random_state.randint(low=0, high=101, + size=(n_samples, dim)) + fake_mc_y = [class_index + for _ in range(int(n_samples / n_classes)) + for class_index in range(n_classes)] + fake_mc_y += [0 for _ in range(n_samples % n_classes)] + fake_mc_y = np.asarray(fake_mc_y) + try: + self.fit(fake_mc_X, fake_mc_y) + # self.predict(fake_mc_X) + return True + except ValueError: + return False def get_names(classed_list): @@ -297,11 +297,11 @@ class ResultAnalyser(): return metric_score_string @abstractmethod - def get_view_specific_info(self): + def get_view_specific_info(self): # pragma: no cover pass @abstractmethod - def get_base_string(self): + def get_base_string(self): # pragma: no cover pass def get_db_config_string(self,): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py index 1551a1da..00ea3aad 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py @@ -13,27 +13,27 @@ from .organization import secure_file_path class Dataset(): @abstractmethod - def get_nb_examples(self): + def get_nb_examples(self): # pragma: no cover pass @abstractmethod - def get_v(self, view_index, example_indices=None): + def get_v(self, view_index, example_indices=None): # pragma: no cover pass @abstractmethod - def get_label_names(self, example_indices=None): + def get_label_names(self, example_indices=None): # pragma: no cover pass @abstractmethod - def get_labels(self, example_indices=None): + def get_labels(self, example_indices=None): # pragma: no cover pass @abstractmethod def filter(self, labels, label_names, example_indices, view_names, - path=None): + path=None): # pragma: no cover pass - def init_example_indces(self, example_indices=None): + def init_example_indices(self, example_indices=None): """If no example indices are provided, selects all the examples.""" if example_indices is None: return range(self.get_nb_examples()) @@ -106,7 +106,7 @@ class Dataset(): def select_views_and_labels(self, nb_labels=None, selected_label_names=None, random_state=None, view_names=None, path_for_new="../data/"): - if view_names is None and selected_label_names is None and nb_labels is None: + if view_names is None and selected_label_names is None and nb_labels is None: # pragma: no cover pass else: selected_label_names = self.check_selected_label_names(nb_labels, @@ -159,7 +159,7 @@ class RAMDataset(Dataset): self.saved_on_disk = False self.views = views self.labels = np.asarray(labels) - if isinstance(are_sparse, bool): + if isinstance(are_sparse, bool): # pragma: no cover self.are_sparse = [are_sparse for _ in range(len(views))] else: self.are_sparse = are_sparse @@ -205,11 +205,11 @@ class RAMDataset(Dataset): if label in selected_labels] def get_labels(self, example_indices=None): - example_indices = self.init_example_indces(example_indices) + example_indices = self.init_example_indices(example_indices) return self.labels[example_indices] def get_v(self, view_index, example_indices=None): - example_indices = self.init_example_indces(example_indices) + example_indices = self.init_example_indices(example_indices) if type(example_indices) is int: return self.views[view_index][example_indices, :] else: @@ -219,13 +219,13 @@ class RAMDataset(Dataset): if not self.are_sparse[view_index]: return self.views[view_index][ example_indices, :] - else: + else: # pragma: no cover # TODO Sparse support pass def get_nb_class(self, example_indices=None): """Gets the number of class of the dataset""" - example_indices = self.init_example_indces(example_indices) + example_indices = self.init_example_indices(example_indices) return len(np.unique(self.labels[example_indices])) def filter(self, labels, label_names, example_indices, view_names, @@ -233,7 +233,9 @@ class RAMDataset(Dataset): if self.example_ids is not None: self.example_ids = self.example_ids[example_indices] self.labels = self.labels[example_indices] - self.labels_names = self.labels_names[np.unique(self.labels)] + self.labels_names = [name for lab_index, name + in enumerate(self.labels_names) + if lab_index in np.unique(self.labels)] self.labels = np.array( [np.where(label == np.unique(self.labels))[0] for label in self.labels]) @@ -322,7 +324,7 @@ class HDF5Dataset(Dataset): if view_names is None: view_names = ["View" + str(index) for index in range(len(views))] - if isinstance(are_sparse, bool): + if isinstance(are_sparse, bool): # pragma: no cover are_sparse = [are_sparse for _ in views] for view_index, (view_name, view, is_sparse) in enumerate( zip(view_names, views, are_sparse)): @@ -372,7 +374,7 @@ class HDF5Dataset(Dataset): ------- A numpy.ndarray containing the view data for the needed examples """ - example_indices = self.init_example_indces(example_indices) + example_indices = self.init_example_indices(example_indices) if type(example_indices) is int: return self.dataset["View" + str(view_index)][example_indices, :] else: @@ -383,7 +385,7 @@ class HDF5Dataset(Dataset): if not self.dataset["View" + str(view_index)].attrs["sparse"]: return self.dataset["View" + str(view_index)][()][ example_indices, :] # [np.argsort(sorted_indices), :] - else: + else: # pragma: no cover # Work in progress pass @@ -487,7 +489,7 @@ class HDF5Dataset(Dataset): int : The number of classes """ - example_indices = self.init_example_indces(example_indices) + example_indices = self.init_example_indices(example_indices) return len(np.unique(self.dataset["Labels"][()][example_indices])) def get_labels(self, example_indices=None): @@ -501,10 +503,10 @@ class HDF5Dataset(Dataset): Returns ------- numpy.ndarray containing the labels of the asked examples""" - example_indices = self.init_example_indces(example_indices) + example_indices = self.init_example_indices(example_indices) return self.dataset["Labels"][()][example_indices] - def rm(self): + def rm(self): # pragma: no cover """ Method used to delete the dataset file on the disk if the dataset is temporary. @@ -521,7 +523,7 @@ class HDF5Dataset(Dataset): def copy_view(self, target_dataset=None, source_view_name=None, target_view_index=None, example_indices=None): - example_indices = self.init_example_indces(example_indices) + example_indices = self.init_example_indices(example_indices) new_d_set = target_dataset.create_dataset( "View" + str(target_view_index), data=self.get_v(self.view_dict[source_view_name], @@ -602,12 +604,8 @@ class HDF5Dataset(Dataset): for view_index in range(noisy_dataset["Metadata"].attrs["nbView"]): view_key = "View" + str(view_index) view_dset = noisy_dataset[view_key] - try: - view_limits = self.dataset[ + view_limits = self.dataset[ "Metadata/View" + str(view_index) + "_limits"][()] - except: - import pdb; - pdb.set_trace() view_ranges = view_limits[:, 1] - view_limits[:, 0] normal_dist = random_state.normal(0, noise_std, view_dset[()].shape) noise = normal_dist * view_ranges @@ -640,38 +638,37 @@ def datasets_already_exist(pathF, name, nbCores): """Used to check if it's necessary to copy datasets""" allDatasetExist = True for coreIndex in range(nbCores): - import os.path - allDatasetExist *= os.path.isfile( - pathF + name + str(coreIndex) + ".hdf5") + allDatasetExist *= os.path.isfile(os.path.join( + pathF, name + str(coreIndex) + ".hdf5")) return allDatasetExist def extract_subset(matrix, used_indices): - """Used to extract a subset of a matrix even if it's sparse""" - if sparse.issparse(matrix): - new_indptr = np.zeros(len(used_indices) + 1, dtype=int) - oldindptr = matrix.indptr - for exampleIndexIndex, exampleIndex in enumerate(used_indices): - new_indptr[exampleIndexIndex + 1] = new_indptr[ - exampleIndexIndex] + ( - oldindptr[ - exampleIndex + 1] - - oldindptr[exampleIndex]) - new_data = np.ones(new_indptr[-1], dtype=bool) - new_indices = np.zeros(new_indptr[-1], dtype=int) - old_indices = matrix.indices - for exampleIndexIndex, exampleIndex in enumerate(used_indices): - new_indices[new_indptr[exampleIndexIndex]:new_indptr[ - exampleIndexIndex + 1]] = old_indices[ - oldindptr[exampleIndex]: - oldindptr[exampleIndex + 1]] - return sparse.csr_matrix((new_data, new_indices, new_indptr), - shape=(len(used_indices), matrix.shape[1])) - else: - return matrix[used_indices] - - -def init_multiple_datasets(path_f, name, nb_cores): + """Used to extract a subset of a matrix even if it's sparse WIP""" + # if sparse.issparse(matrix): + # new_indptr = np.zeros(len(used_indices) + 1, dtype=int) + # oldindptr = matrix.indptr + # for exampleIndexIndex, exampleIndex in enumerate(used_indices): + # new_indptr[exampleIndexIndex + 1] = new_indptr[ + # exampleIndexIndex] + ( + # oldindptr[ + # exampleIndex + 1] - + # oldindptr[exampleIndex]) + # new_data = np.ones(new_indptr[-1], dtype=bool) + # new_indices = np.zeros(new_indptr[-1], dtype=int) + # old_indices = matrix.indices + # for exampleIndexIndex, exampleIndex in enumerate(used_indices): + # new_indices[new_indptr[exampleIndexIndex]:new_indptr[ + # exampleIndexIndex + 1]] = old_indices[ + # oldindptr[exampleIndex]: + # oldindptr[exampleIndex + 1]] + # return sparse.csr_matrix((new_data, new_indices, new_indptr), + # shape=(len(used_indices), matrix.shape[1])) + # else: + return matrix[used_indices] + + +def init_multiple_datasets(path_f, name, nb_cores): # pragma: no cover r"""Used to create copies of the dataset if multicore computation is used. This is a temporary solution to fix the sharing memory issue with HDF5 datasets. @@ -696,19 +693,23 @@ def init_multiple_datasets(path_f, name, nb_cores): "Info:\t Enough copies of the dataset are already available") pass else: - logging.debug("Start:\t Creating " + str( - nb_cores) + " temporary datasets for multiprocessing") - logging.warning( - " WARNING : /!\ This may use a lot of HDD storage space : " + - str(os.path.getsize(path_f + name + ".hdf5") * nb_cores / float( - 1024) / 1000 / 1000) + " Gbytes /!\ ") - confirmation = confirm() - if not confirmation: - sys.exit(0) + if os.path.getsize(os.path.join(path_f, name + ".hdf5")) * nb_cores / float(1024) / 1000 / 1000 > 0.1: + logging.debug("Start:\t Creating " + str( + nb_cores) + " temporary datasets for multiprocessing") + logging.warning( + " WARNING : /!\ This may use a lot of HDD storage space : " + + str(os.path.getsize(os.path.join(path_f, name + ".hdf5")) * nb_cores / float( + 1024) / 1000 / 1000) + " Gbytes /!\ ") + confirmation = confirm() + if not confirmation: + sys.exit(0) + else: + pass else: - dataset_files = copy_hdf5(path_f, name, nb_cores) - logging.debug("Start:\t Creating datasets for multiprocessing") - return dataset_files + pass + dataset_files = copy_hdf5(path_f, name, nb_cores) + logging.debug("Start:\t Creating datasets for multiprocessing") + return dataset_files def copy_hdf5(pathF, name, nbCores): @@ -735,7 +736,7 @@ def delete_HDF5(benchmarkArgumentsDictionaries, nbCores, dataset): dataset.rm() -def confirm(resp=True, timeout=15): +def confirm(resp=True, timeout=15): # pragma: no cover """Used to process answer""" ans = input_(timeout) if not ans: @@ -748,7 +749,7 @@ def confirm(resp=True, timeout=15): return False -def input_(timeout=15): +def input_(timeout=15): # pragma: no cover """used as a UI to stop if too much HDD space will be used""" logging.warning("You have " + str( timeout) + " seconds to stop the dataset copy by typing n") diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index 019db875..fab4079b 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -357,7 +357,7 @@ def gen_argument_dictionaries(labels_dictionary, directories, hyper_param_search, args, k_folds, stats_iter_random_states, metrics, argument_dictionaries, - benchmark, views, views_indices,): + benchmark, views, views_indices,): # pragma: no cover r"""Used to generate a dictionary for each benchmark. One for each label combination (if multiclass), for each statistical iteration, generates an dictionary with diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index d1230433..a13f6cab 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -17,33 +17,6 @@ from .base import get_metric from .. import metrics -# def search_best_settings(dataset_var, labels, classifier_module, -# classifier_name, -# metrics, learning_indices, i_k_folds, random_state, -# directory, views_indices=None, nb_cores=1, -# searching_tool="randomized_search-equiv", n_iter=1, -# classifier_config=None): -# """Used to select the right hyper-parameter optimization function -# to optimize hyper parameters""" -# if views_indices is None: -# views_indices = list(range(dataset_var.get_nb_view)) -# output_file_name = directory -# thismodule = sys.modules[__name__] -# if searching_tool is not "None": -# searching_tool_method = getattr(thismodule, -# searching_tool.split("-")[0]) -# best_settings, scores, params = searching_tool_method( -# dataset_var, labels, "multiview", random_state, output_file_name, -# classifier_module, classifier_name, i_k_folds, -# nb_cores, metrics, n_iter, classifier_config, -# learning_indices=learning_indices, view_indices=views_indices, -# equivalent_draws=searching_tool.endswith("equiv")) -# gen_report(params, scores, directory, ) -# else: -# best_settings = classifier_config -# return best_settings # or well set clasifier ? - - class HPSearch: def get_scoring(self, metric): @@ -116,7 +89,7 @@ class HPSearch: return self @abstractmethod - def get_candidate_params(self, X): + def get_candidate_params(self, X): # pragma: no cover raise NotImplementedError def get_best_params(self): @@ -171,7 +144,7 @@ class Random(RandomizedSearchCV, HPSearch): else: return estimator.gen_distribs() - def fit(self, X, y=None, groups=None, **fit_params): + def fit(self, X, y=None, groups=None, **fit_params): # pragma: no cover if self.framework == "monoview": return RandomizedSearchCV.fit(self, X, y=y, groups=groups, **fit_params) @@ -323,60 +296,60 @@ class Grid(GridSearchCV, HPSearch): - -def spear_mint(dataset, classifier_name, views_indices=None, k_folds=None, - n_iter=1, - **kwargs): - """Used to perform spearmint on the classifiers to optimize hyper parameters, - longer than randomsearch (can't be parallelized)""" - pass - - -def gen_heat_maps(params, scores_array, output_file_name): - """Used to generate a heat map for each doublet of hyperparms - optimized on the previous function""" - nb_params = len(params) - if nb_params > 2: - combinations = itertools.combinations(range(nb_params), 2) - elif nb_params == 2: - combinations = [(0, 1)] - else: - combinations = [()] - for combination in combinations: - if combination: - param_name1, param_array1 = params[combination[0]] - param_name2, param_array2 = params[combination[1]] - else: - param_name1, param_array1 = params[0] - param_name2, param_array2 = ("Control", np.array([0])) - - param_array1_set = np.sort(np.array(list(set(param_array1)))) - param_array2_set = np.sort(np.array(list(set(param_array2)))) - - scores_matrix = np.zeros( - (len(param_array2_set), len(param_array1_set))) - 0.1 - for param1, param2, score in zip(param_array1, param_array2, - scores_array): - param1_index, = np.where(param_array1_set == param1) - param2_index, = np.where(param_array2_set == param2) - scores_matrix[int(param2_index), int(param1_index)] = score - - plt.figure(figsize=(8, 6)) - plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) - plt.imshow(scores_matrix, interpolation='nearest', cmap=plt.cm.hot, - ) - plt.xlabel(param_name1) - plt.ylabel(param_name2) - plt.colorbar() - plt.xticks(np.arange(len(param_array1_set)), param_array1_set) - plt.yticks(np.arange(len(param_array2_set)), param_array2_set, - rotation=45) - plt.title('Validation metric') - plt.savefig( - output_file_name + "heat_map-" + param_name1 + "-" + param_name2 + ".png", - transparent=True) - plt.close() - +# +# def spear_mint(dataset, classifier_name, views_indices=None, k_folds=None, +# n_iter=1, +# **kwargs): +# """Used to perform spearmint on the classifiers to optimize hyper parameters, +# longer than randomsearch (can't be parallelized)""" +# pass +# +# +# def gen_heat_maps(params, scores_array, output_file_name): +# """Used to generate a heat map for each doublet of hyperparms +# optimized on the previous function""" +# nb_params = len(params) +# if nb_params > 2: +# combinations = itertools.combinations(range(nb_params), 2) +# elif nb_params == 2: +# combinations = [(0, 1)] +# else: +# combinations = [()] +# for combination in combinations: +# if combination: +# param_name1, param_array1 = params[combination[0]] +# param_name2, param_array2 = params[combination[1]] +# else: +# param_name1, param_array1 = params[0] +# param_name2, param_array2 = ("Control", np.array([0])) +# +# param_array1_set = np.sort(np.array(list(set(param_array1)))) +# param_array2_set = np.sort(np.array(list(set(param_array2)))) +# +# scores_matrix = np.zeros( +# (len(param_array2_set), len(param_array1_set))) - 0.1 +# for param1, param2, score in zip(param_array1, param_array2, +# scores_array): +# param1_index, = np.where(param_array1_set == param1) +# param2_index, = np.where(param_array2_set == param2) +# scores_matrix[int(param2_index), int(param1_index)] = score +# +# plt.figure(figsize=(8, 6)) +# plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) +# plt.imshow(scores_matrix, interpolation='nearest', cmap=plt.cm.hot, +# ) +# plt.xlabel(param_name1) +# plt.ylabel(param_name2) +# plt.colorbar() +# plt.xticks(np.arange(len(param_array1_set)), param_array1_set) +# plt.yticks(np.arange(len(param_array2_set)), param_array2_set, +# rotation=45) +# plt.title('Validation metric') +# plt.savefig( +# output_file_name + "heat_map-" + param_name1 + "-" + param_name2 + ".png", +# transparent=True) +# plt.close() +# @@ -387,6 +360,8 @@ class CustomRandint: def __init__(self, low=0, high=0, multiplier=""): self.randint = randint(low, high) + self.low=low + self.high=high self.multiplier = multiplier def rvs(self, random_state=None): @@ -397,7 +372,10 @@ class CustomRandint: return randinteger def get_nb_possibilities(self): - return self.randint.b - self.randint.a + if self.multiplier == "e-": + return abs(10 ** -self.low - 10 ** -self.high) + else: + return self.high - self.low class CustomUniform: diff --git a/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py b/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py index 07869d0f..0b7210a7 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py @@ -10,102 +10,6 @@ from sklearn.preprocessing import LabelBinarizer from .dataset import get_examples_views_indices -# def gen_multiclass_labels(labels, multiclass_method, splits): -# r"""Used to gen the train/test splits and to set up the framework of the adaptation of a multiclass dataset -# to biclass algorithms. -# -# First, the function checks whether the dataset is really multiclass. -# -# Then, it generates all the possible couples of different labels in order to perform one versus one classification. -# -# For each combination, it selects the examples in the training sets (for each statistical iteration) that have their -# label in the combination and does the same for the testing set. It also saves the multiclass testing set in order to -# use multiclass metrics on the decisions. -# -# Lastly, it creates a new array of biclass labels (0/1) for the biclass classifications used in oneVersusOne -# -# Parameters -# ---------- -# labels : numpy.ndarray -# Name of the database. -# multiclass_method : string -# The name of the multiclass method used (oneVersusOne, oneVersusAll, ...). -# splits : list of lists of numpy.ndarray -# For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and -# the ones of the testing set. -# -# Returns -# ------- -# multiclass_labels : list of lists of numpy.ndarray -# For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the -# indices for the biclass training set, the ones for the biclass testing set and the ones for the -# multiclass testing set. -# -# labels_indices : list of lists of numpy.ndarray -# Each original couple of different labels. -# -# indices_multiclass : list of lists of numpy.ndarray -# For each combination, contains a biclass labels numpy.ndarray with the 0/1 labels of combination. -# """ -# if multiclass_method == "oneVersusOne": -# nb_labels = len(set(list(labels))) -# if nb_labels == 2: -# splits = [[trainIndices for trainIndices, _ in splits], -# [testIndices for _, testIndices in splits], -# [[] for _ in splits]] -# return [labels], [(0, 1)], [splits] -# else: -# combinations = itertools.combinations(np.arange(nb_labels), 2) -# multiclass_labels = [] -# labels_indices = [] -# indices_multiclass = [] -# for combination in combinations: -# labels_indices.append(combination) -# old_indices = [example_index -# for example_index, example_label in -# enumerate(labels) -# if example_label in combination] -# train_indices = [np.array([old_index for old_index in old_indices if -# old_index in iterIndices[0]]) -# for iterIndices in splits] -# test_indices = [np.array([old_index for old_index in old_indices if -# old_index in iterindices[1]]) -# for iterindices in splits] -# test_indices_multiclass = [np.array(iterindices[1]) for -# iterindices in splits] -# indices_multiclass.append( -# [train_indices, test_indices, test_indices_multiclass]) -# new_labels = np.zeros(len(labels), dtype=int) - 100 -# for labelIndex, label in enumerate(labels): -# if label == combination[0]: -# new_labels[labelIndex] = 1 -# elif label == combination[1]: -# new_labels[labelIndex] = 0 -# else: -# pass -# multiclass_labels.append(new_labels) -# -# elif multiclass_method == "oneVersusRest": -# # TODO : Implement one versus rest if probas are not a problem anymore -# pass -# return multiclass_labels, labels_indices, indices_multiclass - - -# def gen_multiclass_monoview_decision(monoview_result, classification_indices): -# learning_indices, validation_indices, test_indices_multiclass = classification_indices -# multiclass_monoview_decisions = monoview_result.full_labels_pred -# multiclass_monoview_decisions[ -# test_indices_multiclass] = monoview_result.y_test_multiclass_pred -# return multiclass_monoview_decisions -# -# -# def is_biclass(multiclass_preds): -# if multiclass_preds[0] is []: -# return True -# else: -# return False - - def get_mc_estim(estimator, random_state, y=None, multiview=False, multiclass=False): r"""Used to get a multiclass-compatible estimator if the one in param does not natively support multiclass. @@ -179,8 +83,6 @@ class MultiClassWrapper: def get_interpretation(self, directory, base_file_name, y_test=None): - # return self.estimator.get_interpretation(output_file_name, y_test, - # multi_class=True) # TODO : Multiclass interpretation return "Multiclass wrapper is not interpretable yet" @@ -286,7 +188,7 @@ class MultiviewOVRWrapper(MultiviewWrapper, OneVsRestClassifier): np.maximum(maxima, pred, out=maxima) argmaxima[maxima == pred] = i return self.classes_[argmaxima] - else: + else: # pragma: no cover if (hasattr(self.estimators_[0], "decision_function") and is_classifier(self.estimators_[0])): thresh = 0 @@ -394,7 +296,7 @@ class MultiviewOVOWrapper(MultiviewWrapper, OneVsOneClassifier): return self.classes_[(Y > 0).astype(np.int)] return self.classes_[Y.argmax(axis=1)] - def multiview_decision_function(self, X, example_indices, view_indices): + def multiview_decision_function(self, X, example_indices, view_indices): # pragma: no cover # check_is_fitted(self) indices = self.pairwise_indices_ diff --git a/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py b/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py index d3a87d7f..a980b3be 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py @@ -1,54 +1,54 @@ -from .. import metrics - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -def print_metric_score(metric_scores, metrics): - metric_score_string = "\n\n" - for metric in metrics: - metric_module = getattr(metrics, metric[0]) - if metric[1] is not None: - metric_kwargs = dict( - (index, metricConfig) for index, metricConfig in - enumerate(metric[1])) - else: - metric_kwargs = {} - metric_score_string += "\tFor " + metric_module.get_config( - **metric_kwargs) + " : " - metric_score_string += "\n\t\t- Score on train : " + str( - metric_scores[metric[0]][0]) - metric_score_string += "\n\t\t- Score on test : " + str( - metric_scores[metric[0]][1]) - metric_score_string += "\n\n" - return metric_score_string - - -def get_total_metric_scores(metric, train_labels, test_labels, - validation_indices, - learning_indices, labels): - metric_module = getattr(metrics, metric[0]) - if metric[1] is not None: - metric_kwargs = dict((index, metricConfig) for index, metricConfig in - enumerate(metric[1])) - else: - metric_kwargs = {} - train_score = metric_module.score(labels[learning_indices], train_labels, - **metric_kwargs) - test_score = metric_module.score(labels[validation_indices], test_labels, - **metric_kwargs) - return [train_score, test_score] - - -def get_metrics_scores(metrics_var, train_labels, test_labels, - validation_indices, learning_indices, labels): - metrics_scores = {} - for metric in metrics_var: - metrics_scores[metric[0]] = get_total_metric_scores(metric, - train_labels, - test_labels, - validation_indices, - learning_indices, - labels) - return metrics_scores +# from .. import metrics +# +# # Author-Info +# __author__ = "Baptiste Bauvin" +# __status__ = "Prototype" # Production, Development, Prototype +# +# +# def print_metric_score(metric_scores, metrics): +# metric_score_string = "\n\n" +# for metric in metrics: +# metric_module = getattr(metrics, metric[0]) +# if metric[1] is not None: +# metric_kwargs = dict( +# (index, metricConfig) for index, metricConfig in +# enumerate(metric[1])) +# else: +# metric_kwargs = {} +# metric_score_string += "\tFor " + metric_module.get_config( +# **metric_kwargs) + " : " +# metric_score_string += "\n\t\t- Score on train : " + str( +# metric_scores[metric[0]][0]) +# metric_score_string += "\n\t\t- Score on test : " + str( +# metric_scores[metric[0]][1]) +# metric_score_string += "\n\n" +# return metric_score_string +# +# +# def get_total_metric_scores(metric, train_labels, test_labels, +# validation_indices, +# learning_indices, labels): +# metric_module = getattr(metrics, metric[0]) +# if metric[1] is not None: +# metric_kwargs = dict((index, metricConfig) for index, metricConfig in +# enumerate(metric[1])) +# else: +# metric_kwargs = {} +# train_score = metric_module.score(labels[learning_indices], train_labels, +# **metric_kwargs) +# test_score = metric_module.score(labels[validation_indices], test_labels, +# **metric_kwargs) +# return [train_score, test_score] +# +# +# def get_metrics_scores(metrics_var, train_labels, test_labels, +# validation_indices, learning_indices, labels): +# metrics_scores = {} +# for metric in metrics_var: +# metrics_scores[metric[0]] = get_total_metric_scores(metric, +# train_labels, +# test_labels, +# validation_indices, +# learning_indices, +# labels) +# return metrics_scores diff --git a/multiview_platform/mono_multi_view_classifiers/utils/organization.py b/multiview_platform/mono_multi_view_classifiers/utils/organization.py index 663536eb..1fdc0ecf 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/organization.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/organization.py @@ -2,7 +2,7 @@ import os import errno -def secure_file_path(file_name): +def secure_file_path(file_name): # pragma: no cover if not os.path.exists(os.path.dirname(file_name)): try: os.makedirs(os.path.dirname(file_name)) diff --git a/multiview_platform/tests.py b/multiview_platform/tests.py deleted file mode 100644 index 90705cc4..00000000 --- a/multiview_platform/tests.py +++ /dev/null @@ -1,6 +0,0 @@ -# if __name__=="__main__": -# import unittest -# from .tests.test_ExecClassif import suite -# -# runner = unittest.TextTestRunner() -# runner.run(suite()) diff --git a/multiview_platform/tests/test_exec_classif.py b/multiview_platform/tests/test_exec_classif.py index 06276139..ef3ed117 100644 --- a/multiview_platform/tests/test_exec_classif.py +++ b/multiview_platform/tests/test_exec_classif.py @@ -9,20 +9,98 @@ from multiview_platform.tests.utils import rm_tmp, tmp_path, test_dataset from multiview_platform.mono_multi_view_classifiers import exec_classif +class Test_execute(unittest.TestCase): + + @classmethod + def setUpClass(cls): + os.mkdir(tmp_path) + + def test_exec_simple(self): + exec_classif.exec_classif(["--config_path", os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_config_simple.yml")]) + + def test_exec_iter(self): + exec_classif.exec_classif(["--config_path", os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_config_iter.yml")]) + + def test_exec_hps(self): + exec_classif.exec_classif(["--config_path", os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_config_hps.yml")]) + + @classmethod + def tearDown(self): + rm_tmp() + class Test_initBenchmark(unittest.TestCase): def test_benchmark_wanted(self): - # benchmark_output = ExecClassif.init_benchmark(self.args) - self.assertEqual(1, 1) + benchmark_output = exec_classif.init_benchmark(cl_type=["monoview", "multiview"], monoview_algos=["decision_tree"], multiview_algos=["weighted_linear_late_fusion"]) + self.assertEqual(benchmark_output , {'monoview': ['decision_tree'], 'multiview': ['weighted_linear_late_fusion']}) + benchmark_output = exec_classif.init_benchmark( + cl_type=["monoview", "multiview"], monoview_algos=["all"], + multiview_algos=["all"]) + self.assertEqual(benchmark_output, {'monoview': ['adaboost', + 'decision_tree', + 'gradient_boosting', + 'knn', + 'lasso', + 'random_forest', + 'sgd', + 'svm_linear', + 'svm_poly', + 'svm_rbf'], + 'multiview': ['bayesian_inference_fusion', + 'difficulty_fusion', + 'disagree_fusion', + 'double_fault_fusion', + 'entropy_fusion', + 'majority_voting_fusion', + 'svm_jumbo_fusion', + 'weighted_linear_early_fusion', + 'weighted_linear_late_fusion']}) + + +class Test_Functs(unittest.TestCase): + @classmethod + def setUpClass(cls): + os.mkdir(tmp_path) -class Test_initKWARGS(unittest.TestCase): + @classmethod + def tearDownClass(cls): + rm_tmp() def test_initKWARGSFunc_no_monoview(self): benchmark = {"monoview": {}, "multiview": {}} args = exec_classif.init_kwargs_func({}, benchmark) self.assertEqual(args, {"monoview": {}, "multiview": {}}) + def test_init_kwargs(self): + kwargs = exec_classif.init_kwargs({"decision_tree":""},["decision_tree"]) + self.assertEqual(kwargs, {"decision_tree":""}) + kwargs = exec_classif.init_kwargs({"weighted_linear_late_fusion": ""}, + ["weighted_linear_late_fusion"], framework="multiview") + self.assertEqual(kwargs, {"weighted_linear_late_fusion": ""}) + kwargs = exec_classif.init_kwargs({}, ["decision_tree"],) + self.assertEqual(kwargs, {"decision_tree":{}}) + self.assertRaises(AttributeError, exec_classif.init_kwargs, {}, ["test"]) + + def test_arange_metrics(self): + metrics = exec_classif.arange_metrics({"accuracy_score":{}}, "accuracy_score") + self.assertEqual(metrics, {"accuracy_score*":{}}) + self.assertRaises(ValueError, exec_classif.arange_metrics, {"test1":{}}, "test") + + def test_banchmark_init(self): + from sklearn.model_selection import StratifiedKFold + folds = StratifiedKFold(n_splits=2) + res, lab_names = exec_classif.benchmark_init(directory=tmp_path, + classification_indices=[np.array([0,1,2,3]), np.array([4])], + labels=test_dataset.get_labels(), + labels_dictionary={"yes":0, "no":1}, + k_folds=folds, + dataset_var=test_dataset) + self.assertEqual(res, []) + self.assertEqual(lab_names, [0, 1]) + + + class Test_InitArgumentDictionaries(unittest.TestCase): @classmethod @@ -93,50 +171,6 @@ class Test_InitArgumentDictionaries(unittest.TestCase): },] self.assertEqual(arguments["multiview"][0], expected_output[0]) - # def test_init_argument_dictionaries_multiview_multiple(self): - # self.multiview_classifier_arg_value = ["fake_value_2", "fake_arg_value_3"] - # self.init_kwargs = { - # 'monoview': { - # self.monoview_classifier_name: - # { - # self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} - # }, - # "multiview": { - # self.multiview_classifier_name: { - # self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} - # } - # } - # self.benchmark["multiview"] = ["fake_multiview_classifier"] - # self.benchmark["monoview"] = {} - # arguments = exec_classif.init_argument_dictionaries(self.benchmark, - # self.views_dictionnary, - # self.nb_class, - # self.init_kwargs, - # "None", {}) - # expected_output = [{ - # "classifier_name": self.multiview_classifier_name+"_fake_value_2", - # "view_indices": [0,1], - # "view_names": ["test_view_0", "test_view"], - # "nb_class": self.nb_class, - # 'hps_kwargs': {}, - # "labels_names":None, - # self.multiview_classifier_name + "_fake_value_2": { - # self.multiview_classifier_arg_name: - # self.multiview_classifier_arg_value[0]}, - # }, - # { - # "classifier_name": self.multiview_classifier_name+"_fake_arg_value_3", - # "view_indices": [0, 1], - # "view_names": ["test_view_0", "test_view"], - # "nb_class": self.nb_class, - # 'hps_kwargs': {}, - # "labels_names": None, - # self.multiview_classifier_name+"_fake_arg_value_3": { - # self.multiview_classifier_arg_name: - # self.multiview_classifier_arg_value[1]}, - # } - # ] - # self.assertEqual(arguments["multiview"][0], expected_output[0]) def test_init_argument_dictionaries_multiview_complex(self): self.multiview_classifier_arg_value = {"fake_value_2":"plif", "plaf":"plouf"} @@ -171,51 +205,6 @@ class Test_InitArgumentDictionaries(unittest.TestCase): }] self.assertEqual(arguments["multiview"][0], expected_output[0]) - # def test_init_argument_dictionaries_multiview_multiple_complex(self): - # self.multiview_classifier_arg_value = {"fake_value_2":["plif", "pluf"], "plaf":"plouf"} - # self.init_kwargs = { - # 'monoview': { - # self.monoview_classifier_name: - # { - # self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} - # }, - # "multiview": { - # self.multiview_classifier_name: { - # self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} - # } - # } - # self.benchmark["multiview"] = ["fake_multiview_classifier"] - # self.benchmark["monoview"] = {} - # arguments = exec_classif.init_argument_dictionaries(self.benchmark, - # self.views_dictionnary, - # self.nb_class, - # self.init_kwargs, - # "None", {}) - # expected_output = [{ - # "classifier_name": self.multiview_classifier_name+"_plif_plouf", - # "view_indices": [0,1], - # "view_names": ["test_view_0", "test_view"], - # "nb_class": self.nb_class, - # "labels_names":None, - # 'hps_kwargs': {}, - # self.multiview_classifier_name + "_plif_plouf": { - # self.multiview_classifier_arg_name: - # {"fake_value_2": "plif", "plaf": "plouf"}}, - # }, - # { - # "classifier_name": self.multiview_classifier_name+"_pluf_plouf", - # "view_indices": [0, 1], - # "view_names": ["test_view_0", "test_view"], - # "nb_class": self.nb_class, - # "labels_names": None, - # 'hps_kwargs': {}, - # self.multiview_classifier_name+"_pluf_plouf": { - # self.multiview_classifier_arg_name: - # {"fake_value_2":"pluf", "plaf":"plouf"}}, - # } - # ] - # self.assertEqual(arguments["multiview"][0], expected_output[0]) - def fakeBenchmarkExec(core_index=-1, a=7, args=1): return [core_index, a] @@ -357,131 +346,6 @@ class FakeKfold(): return [([X[0], X[1]], [X[2], X[3]]), (([X[2], X[3]], [X[0], X[1]]))] -# class Test_execOneBenchmark(unittest.TestCase): -# -# @classmethod -# def setUp(cls): -# rm_tmp() -# os.mkdir(tmp_path) -# cls.args = { -# "Base": {"name": "chicken_is_heaven", "type": "type", -# "pathf": "pathF"}, -# "Classification": {"hps_iter": 1}} -# -# def test_simple(cls): -# flag, results = exec_classif.exec_one_benchmark(core_index=10, -# labels_dictionary={ -# 0: "a", -# 1: "b"}, -# directory=tmp_path, -# classification_indices=( -# [1, 2, 3, 4], -# [0, 5, 6, 7, 8]), -# args=cls.args, -# k_folds=FakeKfold(), -# random_state="try", -# hyper_param_search="try", -# metrics="try", -# argument_dictionaries={ -# "Monoview": [ -# { -# "try": 0}, -# { -# "try2": 100}], -# "multiview":[{ -# "try3": 5}, -# { -# "try4": 10}]}, -# benchmark="try", -# views="try", -# views_indices="try", -# flag=None, -# labels=np.array( -# [0, 1, 2, 1, -# 2, 2, 2, 12, -# 1, 2, 1, 1, -# 2, 1, 21]), -# exec_monoview_multicore=fakeExecMono, -# exec_multiview_multicore=fakeExecMulti,) -# -# cls.assertEqual(flag, None) -# cls.assertEqual(results , -# [['Mono', {'try': 0}], ['Mono', {'try2': 100}], -# ['Multi', {'try3': 5}], ['Multi', {'try4': 10}]]) -# -# @classmethod -# def tearDown(cls): -# path = tmp_path -# for file_name in os.listdir(path): -# dir_path = os.path.join(path, file_name) -# if os.path.isdir(dir_path): -# for file_name in os.listdir(dir_path): -# os.remove(os.path.join(dir_path, file_name)) -# os.rmdir(dir_path) -# else: -# os.remove(os.path.join(path, file_name)) -# os.rmdir(path) -# -# -# class Test_execOneBenchmark_multicore(unittest.TestCase): -# -# @classmethod -# def setUpClass(cls): -# rm_tmp() -# os.mkdir(tmp_path) -# cls.args = { -# "Base": {"name": "chicken_is_heaven", "type": "type", -# "pathf": "pathF"}, -# "Classification": {"hps_iter": 1}} -# -# def test_simple(cls): -# flag, results = exec_classif.exec_one_benchmark_multicore( -# nb_cores=2, -# labels_dictionary={0: "a", 1: "b"}, -# directory=tmp_path, -# classification_indices=([1, 2, 3, 4], [0, 10, 20, 30, 40]), -# args=cls.args, -# k_folds=FakeKfold(), -# random_state="try", -# hyper_param_search="try", -# metrics="try", -# argument_dictionaries={ -# "monoview": [ -# { -# "try": 0}, -# { -# "try2": 100}], -# "multiview":[{ -# "try3": 5}, -# { -# "try4": 10}]}, -# benchmark="try", -# views="try", -# views_indices="try", -# flag=None, -# labels=np.array([0, 1, 2, 3, 4, 2, 2, 12, 1, 2, 1, 1, 2, 1, 21]), -# exec_monoview_multicore=fakeExecMono, -# exec_multiview_multicore=fakeExecMulti,) -# -# cls.assertEqual(flag, None) -# cls.assertEqual(results , -# [['Mono', {'try': 0}], ['Mono', {'try2': 100}], -# ['Multi', {'try3': 5}], ['Multi', {'try4': 10}]]) -# -# @classmethod -# def tearDown(cls): -# path = tmp_path -# for file_name in os.listdir(path): -# dir_path = os.path.join(path, file_name) -# if os.path.isdir(dir_path): -# for file_name in os.listdir(dir_path): -# os.remove(os.path.join(dir_path, file_name)) -# os.rmdir(dir_path) -# else: -# os.remove(os.path.join(path, file_name)) -# os.rmdir(path) - - class Test_set_element(unittest.TestCase): @classmethod diff --git a/multiview_platform/tests/test_metrics/test_accuracy_score.py b/multiview_platform/tests/test_metrics/test_accuracy_score.py deleted file mode 100644 index 622141cf..00000000 --- a/multiview_platform/tests/test_metrics/test_accuracy_score.py +++ /dev/null @@ -1,14 +0,0 @@ -import unittest - - -# Tester que chaque metrique a bien les bonnes fonctions qui renvoient bien les bons types d'outputs avec les bons types d'inputs -# Faire de meme pour les differents classifeurs monovues et les differents classifeurs multivues - - -class Test_accuracy_score(unittest.TestCase): - - def setUpClass(cls): - pass - - def score_test(cls, metric_module): - pass diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py deleted file mode 100644 index e51c9e7b..00000000 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ /dev/null @@ -1,208 +0,0 @@ -import os -import unittest - -import h5py -import numpy as np -from sklearn.model_selection import StratifiedKFold - -from multiview_platform.tests.utils import rm_tmp, tmp_path - -from multiview_platform.mono_multi_view_classifiers.monoview import exec_classif_mono_view -from multiview_platform.mono_multi_view_classifiers.monoview_classifiers import decision_tree - - -class Test_initConstants(unittest.TestCase): - - @classmethod - def setUpClass(cls): - rm_tmp() - os.mkdir(tmp_path) - cls.view_name="test_dataset" - cls.datasetFile = h5py.File( - tmp_path+"test.hdf5", "w") - cls.random_state = np.random.RandomState(42) - cls.args = {"classifier_name": "test_clf"} - cls.X_value = cls.random_state.randint(0, 500, (10, 20)) - cls.X = cls.datasetFile.create_dataset("View0", data=cls.X_value) - cls.X.attrs["name"] = "test_dataset" - cls.X.attrs["sparse"] = False - cls.classification_indices = [np.array([0, 2, 4, 6, 8]), - np.array([1, 3, 5, 7, 9]), - np.array([1, 3, 5, 7, 9])] - cls.labels_names = ["test_true", "test_false"] - cls.name = "test" - cls.directory = os.path.join(tmp_path, "test_dir/") - - def test_simple(cls): - kwargs, \ - t_start, \ - feat, \ - CL_type, \ - X, \ - learningRate, \ - labelsString, \ - output_file_name,\ - directory,\ - base_file_name = exec_classif_mono_view.init_constants(cls.args, - cls.X, - cls.classification_indices, - cls.labels_names, - cls.name, - cls.directory, - cls.view_name) - cls.assertEqual(kwargs, cls.args) - cls.assertEqual(feat, "test_dataset") - cls.assertEqual(CL_type, "test_clf") - np.testing.assert_array_equal(X, cls.X_value) - cls.assertEqual(learningRate, 0.5) - cls.assertEqual(labelsString, "test_true-test_false") - # cls.assertEqual(output_file_name, "Code/tests/temp_tests/test_dir/test_clf/test_dataset/results-test_clf-test_true-test_false-learnRate0.5-test-test_dataset-") - - @classmethod - def tearDownClass(cls): - os.remove(tmp_path+"test.hdf5") - os.rmdir( - tmp_path+"test_dir/test_clf/test_dataset") - os.rmdir(tmp_path+"test_dir/test_clf") - os.rmdir(tmp_path+"test_dir") - os.rmdir(tmp_path) - - -class Test_initTrainTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - rm_tmp() - cls.random_state = np.random.RandomState(42) - cls.X = cls.random_state.randint(0, 500, (10, 5)) - cls.Y = cls.random_state.randint(0, 2, 10) - cls.classification_indices = [np.array([0, 2, 4, 6, 8]), - np.array([1, 3, 5, 7, 9]), - ] - - def test_simple(cls): - X_train, y_train, X_test, y_test = exec_classif_mono_view.init_train_test( - cls.X, cls.Y, cls.classification_indices) - - np.testing.assert_array_equal(X_train, np.array( - [np.array([102, 435, 348, 270, 106]), - np.array([466, 214, 330, 458, 87]), - np.array([149, 308, 257, 343, 491]), - np.array([276, 160, 459, 313, 21]), - np.array([58, 169, 475, 187, 463])])) - np.testing.assert_array_equal(X_test, np.array( - [np.array([71, 188, 20, 102, 121]), - np.array([372, 99, 359, 151, 130]), - np.array([413, 293, 385, 191, 443]), - np.array([252, 235, 344, 48, 474]), - np.array([270, 189, 445, 174, 445])])) - np.testing.assert_array_equal(y_train, np.array([0, 0, 1, 0, 0])) - np.testing.assert_array_equal(y_test, np.array([1, 1, 0, 0, 0])) - - -class Test_getHPs(unittest.TestCase): - - @classmethod - def setUpClass(cls): - rm_tmp() - os.mkdir(tmp_path) - cls.classifierModule = decision_tree - cls.hyper_param_search = "Random" - cls.classifier_name = "decision_tree" - cls.random_state = np.random.RandomState(42) - cls.X = cls.random_state.randint(0,10,size=(10,5)) - cls.y = cls.random_state.randint(0,2,size=10) - cls.output_file_name = tmp_path - cls.cv = StratifiedKFold(n_splits=2, random_state=cls.random_state, shuffle=True) - cls.nb_cores = 1 - cls.metrics = {"accuracy_score*": {}} - cls.kwargs = {"decision_tree" : {"max_depth": 1, - "criterion": "gini", - "splitter": "best"}} - cls.classifier_class_name = "DecisionTree" - cls.hps_kwargs = {"n_iter": 2} - - @classmethod - def tearDownClass(cls): - for file_name in os.listdir(tmp_path): - os.remove( - os.path.join(tmp_path, file_name)) - os.rmdir(tmp_path) - - def test_simple(self): - kwargs = exec_classif_mono_view.get_hyper_params(self.classifierModule, - self.hyper_param_search, - self.classifier_name, - self.classifier_class_name, - self.X, - self.y, - self.random_state, - self.output_file_name, - self.cv, - self.nb_cores, - self.metrics, - self.kwargs, - **self.hps_kwargs) - -# class Test_getKWARGS(unittest.TestCase): -# -# @classmethod -# def setUpClass(cls): -# cls.classifierModule = None -# cls.hyper_param_search = "None" -# cls.nIter = 2 -# cls.CL_type = "string" -# cls.X_train = np.zeros((10,20)) -# cls.y_train = np.zeros((10)) -# cls.random_state = np.random.RandomState(42) -# cls.outputFileName = "test_file" -# cls.KFolds = None -# cls.nbCores = 1 -# cls.metrics = {"accuracy_score":""} -# cls.kwargs = {} -# -# def test_simple(cls): -# clKWARGS = ExecClassifMonoView.getHPs(cls.classifierModule, -# cls.hyper_param_search, -# cls.nIter, -# cls.CL_type, -# cls.X_train, -# cls.y_train, -# cls.random_state, -# cls.outputFileName, -# cls.KFolds, -# cls.nbCores, -# cls.metrics, -# cls.kwargs) -# pass -# -# class Test_saveResults(unittest.TestCase): -# -# @classmethod -# def setUpClass(cls): -# cls.stringAnalysis = "string analysis" -# cls.outputFileName = "test_file" -# cls.full_labels_pred = np.zeros(10) -# cls.y_train_pred = np.ones(5) -# cls.y_train = np.zeros(5) -# cls.imagesAnalysis = {} -# -# def test_simple(cls): -# ExecClassifMonoView.saveResults(cls.stringAnalysis, -# cls.outputFileName, -# cls.full_labels_pred, -# cls.y_train_pred, -# cls.y_train, -# cls.imagesAnalysis) -# # Test if the files are created with the right content -# -# def test_with_image_analysis(cls): -# cls.imagesAnalysis = {"test_image":"image.png"} # Image to gen -# ExecClassifMonoView.saveResults(cls.stringAnalysis, -# cls.outputFileName, -# cls.full_labels_pred, -# cls.y_train_pred, -# cls.y_train, -# cls.imagesAnalysis) -# # Test if the files are created with the right content -# diff --git a/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py b/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py deleted file mode 100644 index 61277552..00000000 --- a/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py +++ /dev/null @@ -1,33 +0,0 @@ -import unittest - -import numpy as np -from sklearn.model_selection import StratifiedKFold -from sklearn.tree import DecisionTreeClassifier - -from multiview_platform.mono_multi_view_classifiers.monoview import monoview_utils - - -class Test_genTestFoldsPreds(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.random_state = np.random.RandomState(42) - cls.X_train = cls.random_state.random_sample((31, 10)) - cls.y_train = np.ones(31, dtype=int) - cls.KFolds = StratifiedKFold(n_splits=3,) - - cls.estimator = DecisionTreeClassifier(max_depth=1) - - cls.y_train[15:] = -1 - # print(cls.X_train) - # print(cls.y_train) - - def test_simple(cls): - testFoldsPreds = monoview_utils.gen_test_folds_preds(cls.X_train, - cls.y_train, - cls.KFolds, - cls.estimator) - cls.assertEqual(testFoldsPreds.shape, (3, 10)) - np.testing.assert_array_equal(testFoldsPreds[0], np.array( - [ 1, 1, -1, -1, 1, 1, -1, 1, -1, 1])) - diff --git a/multiview_platform/tests/test_multi_view/test_multiview_utils.py b/multiview_platform/tests/test_multi_view/test_multiview_utils.py index e69de29b..f68daf67 100644 --- a/multiview_platform/tests/test_multi_view/test_multiview_utils.py +++ b/multiview_platform/tests/test_multi_view/test_multiview_utils.py @@ -0,0 +1,42 @@ +import os +import unittest + +import h5py +import numpy as np +from sklearn.model_selection import StratifiedKFold + +from multiview_platform.tests.utils import rm_tmp, tmp_path, test_dataset + +from multiview_platform.mono_multi_view_classifiers.multiview import multiview_utils + + +class FakeMVClassif(multiview_utils.BaseMultiviewClassifier): + + def __init__(self, mc=True): + self.mc=mc + pass + + def fit(self, X, y): + if not self.mc: + raise ValueError + else: + pass + + + +class TestBaseMultiviewClassifier(unittest.TestCase): + + @classmethod + def setUpClass(cls): + os.mkdir(tmp_path) + + @classmethod + def tearDownClass(cls): + rm_tmp() + + def test_accepts_multiclass(self): + rs = np.random.RandomState(42) + accepts = FakeMVClassif().accepts_multi_class(rs) + self.assertEqual(accepts, True) + accepts = FakeMVClassif(mc=False).accepts_multi_class(rs) + self.assertEqual(accepts, False) diff --git a/multiview_platform/tests/test_utils/test_GetMultiviewDB.py b/multiview_platform/tests/test_utils/test_GetMultiviewDB.py index a9f5dae8..c1068e7b 100644 --- a/multiview_platform/tests/test_utils/test_GetMultiviewDB.py +++ b/multiview_platform/tests/test_utils/test_GetMultiviewDB.py @@ -150,7 +150,7 @@ class Test_get_plausible_db_hdf5(unittest.TestCase): "", self.path, "", nb_class=self.nb_class, random_state=self.rs, nb_view=3, nb_examples=self.nb_examples, nb_features=self.nb_features) - self.assertEqual(dataset.init_example_indces(), range(5)) + self.assertEqual(dataset.init_example_indices(), range(5)) self.assertEqual(dataset.get_nb_class(), self.nb_class) def test_two_class(self): @@ -158,7 +158,7 @@ class Test_get_plausible_db_hdf5(unittest.TestCase): "", self.path, "", nb_class=2, random_state=self.rs, nb_view=3, nb_examples=self.nb_examples, nb_features=self.nb_features) - self.assertEqual(dataset.init_example_indces(), range(5)) + self.assertEqual(dataset.init_example_indices(), range(5)) self.assertEqual(dataset.get_nb_class(), 2) diff --git a/multiview_platform/tests/test_utils/test_base.py b/multiview_platform/tests/test_utils/test_base.py index 90532ce0..027da26f 100644 --- a/multiview_platform/tests/test_utils/test_base.py +++ b/multiview_platform/tests/test_utils/test_base.py @@ -10,8 +10,42 @@ from multiview_platform.tests.utils import rm_tmp, tmp_path from multiview_platform.mono_multi_view_classifiers.utils import base -class FakeClassifier(): - pass +class FakeClassifier(base.BaseClassifier): + def __init__(self, no_params=False, accepts_mc=True): + if no_params: + self.param_names = [] + self.classed_params = [] + else: + self.param_names = ["test1", "test2"] + self.classed_params = ["test2"] + self.weird_strings = [] + self.accepts_mc = accepts_mc + + def get_params(self, deep=True): + return {"test1": 10, + "test2": "test"} + + def fit(self, X, y): + if np.unique(y).shape[0]>2 and not self.accepts_mc: + raise ValueError('Does not accept MC') + else: + return self + + +class FakeDetector: + def __init__(self): + self.best_params_ = {"test1": 10, + "test2": "test"} + self.cv_results_ = {"param_test1": [10], + "param_test2": ["test"]} + +class FakeResultAnalyzer(base.ResultAnalyser): + + def get_view_specific_info(self): + return "test" + + def get_base_string(self): + return 'test2' class Test_ResultAnalyzer(unittest.TestCase): @@ -32,15 +66,15 @@ class Test_ResultAnalyzer(unittest.TestCase): cls.n_splits = 5 cls.k_folds = StratifiedKFold(n_splits=cls.n_splits, ) cls.hps_method = "randomized_search" - cls.metrics_list = {"accuracy_score": {}, "f1_score":{}} + cls.metrics_list = {"accuracy_score": {}, "f1_score*":{}} cls.n_iter = 6 cls.class_label_names = ["class{}".format(ind+1) for ind in range(cls.n_classes)] - cls.pred = np.random.randint(0, cls.n_classes, + cls.pred = cls.rs.randint(0, cls.n_classes, size=cls.n_examples) cls.directory = "fake_directory" cls.base_file_name = "fake_file" - cls.labels = np.random.randint(0, cls.n_classes, + cls.labels = cls.rs.randint(0, cls.n_classes, size=cls.n_examples) cls.database_name = "test_database" cls.nb_cores = 0.5 @@ -90,11 +124,61 @@ class Test_ResultAnalyzer(unittest.TestCase): self.train_accuracy) self.assertEqual(RA.metric_scores["accuracy_score"][1], self.test_accuracy) - self.assertEqual(RA.metric_scores["f1_score"][0], + self.assertEqual(RA.metric_scores["f1_score*"][0], self.train_f1) - self.assertEqual(RA.metric_scores["f1_score"][1], + self.assertEqual(RA.metric_scores["f1_score*"][1], self.test_f1) + def test_print_metrics_scores(self): + RA = base.ResultAnalyser(self.classifier, self.classification_indices, + self.k_folds, self.hps_method, + self.metrics_list, + self.n_iter, self.class_label_names, + self.pred, + self.directory, self.base_file_name, + self.labels, self.database_name, + self.nb_cores, self.duration) + RA.get_all_metrics_scores() + string = RA.print_metric_score() + print(repr(string)) + self.assertEqual(string, '\n\n\tFor Accuracy score using {}, (higher is better) : \n\t\t- Score on train : 0.25\n\t\t- Score on test : 0.2692307692307692\n\n\tFor F1 score using average: micro, {} (higher is better) : \n\t\t- Score on train : 0.25\n\t\t- Score on test : 0.2692307692307692\n\nTest set confusion matrix : \n\n╒════════╤══════════╤══════════╤══════════╕\n│ │ class1 │ class2 │ class3 │\n╞════════╪══════════╪══════════╪══════════╡\n│ class1 │ 3 │ 1 │ 2 │\n├────────┼──────────┼──────────┼──────────┤\n│ class2 │ 3 │ 2 │ 2 │\n├────────┼──────────┼──────────┼──────────┤\n│ class3 │ 3 │ 8 │ 2 │\n╘════════╧══════════╧══════════╧══════════╛\n\n') + + def test_get_db_config_string(self): + RA = FakeResultAnalyzer(self.classifier, self.classification_indices, + self.k_folds, self.hps_method, + self.metrics_list, + self.n_iter, self.class_label_names, + self.pred, + self.directory, self.base_file_name, + self.labels, self.database_name, + self.nb_cores, self.duration) + self.assertEqual(RA.get_db_config_string(), 'Database configuration : \n\t- Database name : test_database\ntest\t- Learning Rate : 0.48\n\t- Labels used : class1, class2, class3\n\t- Number of cross validation folds : 5\n\n') + + def test_get_classifier_config_string(self): + RA = base.ResultAnalyser(self.classifier, self.classification_indices, + self.k_folds, self.hps_method, + self.metrics_list, + self.n_iter, self.class_label_names, + self.pred, + self.directory, self.base_file_name, + self.labels, self.database_name, + self.nb_cores, self.duration) + self.assertEqual(RA.get_classifier_config_string(), 'Classifier configuration : \n\t- FakeClassifier with test1 : 10, test2 : test\n\t- Executed on 0.5 core(s) \n\t- Got configuration using randomized search with 6 iterations \n') + + def test_analyze(self): + RA = FakeResultAnalyzer(self.classifier, self.classification_indices, + self.k_folds, self.hps_method, + self.metrics_list, + self.n_iter, self.class_label_names, + self.pred, + self.directory, self.base_file_name, + self.labels, self.database_name, + self.nb_cores, self.duration) + str_analysis, img_analysis, metric_scores, class_metric_scores, conf_mat = RA.analyze() + print(repr(str_analysis)) + self.assertEqual(str_analysis, 'test2Database configuration : \n\t- Database name : test_database\ntest\t- Learning Rate : 0.48\n\t- Labels used : class1, class2, class3\n\t- Number of cross validation folds : 5\n\nClassifier configuration : \n\t- FakeClassifier with test1 : 10, test2 : test\n\t- Executed on 0.5 core(s) \n\t- Got configuration using randomized search with 6 iterations \n\n\n\tFor Accuracy score using {}, (higher is better) : \n\t\t- Score on train : 0.25\n\t\t- Score on test : 0.2692307692307692\n\n\tFor F1 score using average: micro, {} (higher is better) : \n\t\t- Score on train : 0.25\n\t\t- Score on test : 0.2692307692307692\n\nTest set confusion matrix : \n\n╒════════╤══════════╤══════════╤══════════╕\n│ │ class1 │ class2 │ class3 │\n╞════════╪══════════╪══════════╪══════════╡\n│ class1 │ 3 │ 1 │ 2 │\n├────────┼──────────┼──────────┼──────────┤\n│ class2 │ 3 │ 2 │ 2 │\n├────────┼──────────┼──────────┼──────────┤\n│ class3 │ 3 │ 8 │ 2 │\n╘════════╧══════════╧══════════╧══════════╛\n\n\n\n Classification took -1 day, 23:59:56\n\n Classifier Interpretation : \n') + + class Test_BaseClassifier(unittest.TestCase): @@ -104,6 +188,7 @@ class Test_BaseClassifier(unittest.TestCase): cls.base_estimator_config = {"max_depth":10, "splitter": "best"} cls.est = base.BaseClassifier() + cls.rs = np.random.RandomState(42) def test_simple(self): base_estim = self.est.get_base_estimator(self.base_estimator, @@ -112,6 +197,36 @@ class Test_BaseClassifier(unittest.TestCase): self.assertEqual(base_estim.max_depth, 10) self.assertEqual(base_estim.splitter, "best") + def test_gen_best_params(self): + fake_class = FakeClassifier() + best_params = fake_class.gen_best_params(FakeDetector()) + self.assertEqual(best_params, {"test1":10, "test2":"test"}) + + def test_gen_params_from_detector(self): + fake_class = FakeClassifier() + params = fake_class.gen_params_from_detector(FakeDetector()) + self.assertEqual(params, [("test1",np.array([10])), + ("test2",np.array(["str"], dtype='<U3'))]) + params = FakeClassifier(no_params=True).gen_params_from_detector(FakeDetector()) + self.assertEqual(params, [()]) + + def test_params_to_string(self): + fake_class = FakeClassifier() + string = fake_class.params_to_string() + self.assertEqual(string, "test1 : 10, test2 : test") + + def test_get_iterpret(self): + fake_class = FakeClassifier() + self.assertEqual("", fake_class.get_interpretation("", "", "",)) + + def test_accepts_mutliclass(self): + accepts = FakeClassifier().accepts_multi_class(self.rs) + self.assertEqual(accepts, True) + accepts = FakeClassifier(accepts_mc=False).accepts_multi_class(self.rs) + self.assertEqual(accepts, False) + self.assertRaises(ValueError, FakeClassifier().accepts_multi_class, self.rs, **{"n_samples":2}) + + def test_class(self): base_estimator = DecisionTreeClassifier(max_depth=15, splitter="random") base_estim = self.est.get_base_estimator(base_estimator, @@ -125,4 +240,22 @@ class Test_BaseClassifier(unittest.TestCase): "splitter": "best"} with self.assertRaises(TypeError): base_estim = self.est.get_base_estimator(self.base_estimator, - base_estimator_config) \ No newline at end of file + base_estimator_config) + + def test_get_config(self): + conf = FakeClassifier(no_params=True).get_config() + self.assertEqual(conf, 'FakeClassifier with no config.') + +class Test_Functions(unittest.TestCase): + + def test_get_name(self): + classed_list = ["test", 42] + np.testing.assert_array_equal(base.get_names(classed_list), + np.array(["str", "int"], dtype="<U3")) + + + def test_get_metric(self): + from multiview_platform.mono_multi_view_classifiers.metrics import accuracy_score + metrics_dict = {"accuracy_score*":{}} + self.assertEqual(base.get_metric(metrics_dict), (accuracy_score, {})) + diff --git a/multiview_platform/tests/test_utils/test_configuration.py b/multiview_platform/tests/test_utils/test_configuration.py index 3498329f..dc1fed6c 100644 --- a/multiview_platform/tests/test_utils/test_configuration.py +++ b/multiview_platform/tests/test_utils/test_configuration.py @@ -40,39 +40,24 @@ class Test_get_the_args(unittest.TestCase): self.assertEqual(config_dict["name"], [12.5, 1e-06]) self.assertEqual(config_dict["type"], True) -# class Test_format_the_args(unittest.TestCase): -# -# def test_bool(self): -# value = configuration.format_raw_arg("bool ; yes") -# self.assertEqual(value, True) -# -# def test_int(self): -# value = configuration.format_raw_arg("int ; 1") -# self.assertEqual(value, 1) -# -# def test_float(self): -# value = configuration.format_raw_arg("float ; 1.5") -# self.assertEqual(value, 1.5) -# -# def test_string(self): -# value = configuration.format_raw_arg("str ; chicken_is_heaven") -# self.assertEqual(value, "chicken_is_heaven") -# -# def test_list_bool(self): -# value = configuration.format_raw_arg("list_bool ; yes no yes yes") -# self.assertEqual(value, [True, False, True, True]) -# -# def test_list_int(self): -# value = configuration.format_raw_arg("list_int ; 1 2 3 4") -# self.assertEqual(value, [1,2,3,4]) -# -# def test_list_float(self): -# value = configuration.format_raw_arg("list_float ; 1.5 1.6 1.7") -# self.assertEqual(value, [1.5, 1.6, 1.7]) -# -# def test_list_string(self): -# value = configuration.format_raw_arg("list_str ; list string") -# self.assertEqual(value, ["list", "string"]) +class Test_save_config(unittest.TestCase): + @classmethod + def setUpClass(cls): + rm_tmp() + path_file = os.path.dirname(os.path.abspath(__file__)) + make_tmp_dir = os.path.join(path_file, "../tmp_tests") + os.mkdir(make_tmp_dir) + + def test_simple(self): + configuration.save_config(tmp_path, {"test":10}) + with open(os.path.join(tmp_path,"config_file.yml" ), 'r') as stream: + yaml_config = yaml.safe_load(stream) + self.assertEqual(yaml_config,{"test":10} ) + + @classmethod + def tearDownClass(cls): + os.remove(os.path.join(tmp_path, "config_file.yml")) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/multiview_platform/tests/test_utils/test_dataset.py b/multiview_platform/tests/test_utils/test_dataset.py index 1a41f0d9..76644bcb 100644 --- a/multiview_platform/tests/test_utils/test_dataset.py +++ b/multiview_platform/tests/test_utils/test_dataset.py @@ -47,6 +47,29 @@ class Test_Dataset(unittest.TestCase): def tearDownClass(cls): cls.dataset_file.close() + def test_get_shape(self): + dataset_object = dataset.HDF5Dataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + file_name="from_scratch" + self.file_name, + view_names=self.view_names, + path=tmp_path, + labels_names=self.labels_names) + shape = dataset_object.get_shape(0) + self.assertEqual(shape, (5,7)) + + def test_to_numpy_array(self): + dataset_object = dataset.HDF5Dataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + file_name="from_scratch" + self.file_name, + view_names=self.view_names, + path=tmp_path, + labels_names=self.labels_names) + array, limits = dataset_object.to_numpy_array(view_indices=[0,1,2]) + + self.assertEqual(array.shape, (5, 21)) + def test_filter(self): """Had to create a new dataset to aviod playing with the class one""" file_name = "test_filter.hdf5" @@ -90,15 +113,17 @@ class Test_Dataset(unittest.TestCase): labels_names=self.labels_names) nb_class = dataset_object.get_nb_class() self.assertEqual(nb_class, self.nb_class) - example_indices = dataset_object.init_example_indces() + example_indices = dataset_object.init_example_indices() self.assertEqual(example_indices, range(self.nb_examples)) view = dataset_object.get_v(0) np.testing.assert_array_equal(view, self.views[0]) def test_init_example_indices(self): - example_indices = dataset.HDF5Dataset(hdf5_file=self.dataset_file).init_example_indces() + example_indices = dataset.HDF5Dataset( + hdf5_file=self.dataset_file).init_example_indices() self.assertEqual(example_indices, range(self.nb_examples)) - example_indices = dataset.HDF5Dataset(hdf5_file=self.dataset_file).init_example_indces([0,1,2]) + example_indices = dataset.HDF5Dataset( + hdf5_file=self.dataset_file).init_example_indices([0, 1, 2]) self.assertEqual(example_indices, [0,1,2]) def test_get_v(self): @@ -235,6 +260,164 @@ class Test_Dataset(unittest.TestCase): os.remove(os.path.join(tmp_path, "test_noise_noised.hdf5")) os.remove(os.path.join(tmp_path, "test_noise.hdf5")) +class TestRAMDataset(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.rs = np.random.RandomState(42) + cls.nb_view = 3 + cls.file_name = "test.hdf5" + cls.nb_examples = 5 + cls.nb_attr = 7 + cls.nb_class = 3 + cls.views = [cls.rs.randint(0, 10, size=(cls.nb_examples, cls.nb_attr)) + for _ in range(cls.nb_view)] + cls.labels = cls.rs.randint(0, cls.nb_class, cls.nb_examples) + cls.view_names = ["ViewN" + str(index) for index in + range(len(cls.views))] + cls.are_sparse = [False for _ in cls.views] + cls.labels_names = [str(index) for index in np.unique(cls.labels)] + + def test_get_view_name(self): + dataset_object = dataset.RAMDataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) + self.assertEqual(dataset_object.get_view_name(0), + "ViewN0") + + def test_init_attrs(self): + dataset_object = dataset.RAMDataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) + + + dataset_object.init_attrs() + self.assertEqual(dataset_object.nb_view, 3) + + def test_get_label_names(self): + dataset_object = dataset.RAMDataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) + shape = dataset_object.get_label_names() + self.assertEqual(shape, ['0'.encode('utf-8'), + '1'.encode('utf-8'), + '2'.encode('utf-8')]) + shape = dataset_object.get_label_names(decode=False) + self.assertEqual(shape, ['0'.encode('utf-8'), + '1'.encode('utf-8'), + '2'.encode('utf-8')]) + + def test_get_v(self): + dataset_object = dataset.RAMDataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) + data = dataset_object.get_v(0, 1) + np.testing.assert_array_equal(data, np.array([6, 7, 4, 3, 7, 7, 2])) + data = dataset_object.get_v(0, None) + np.testing.assert_array_equal(data, np.array([[6, 3, 7, 4, 6, 9, 2], + [6, 7, 4, 3, 7, 7, 2], + [5, 4, 1, 7, 5, 1, 4], + [0, 9, 5, 8, 0, 9, 2], + [6, 3, 8, 2, 4, 2, 6]])) + + def test_filter(self): + dataset_object = dataset.RAMDataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) + dataset_object.filter("", "", np.array([1,2]), ["ViewN0", "ViewN1"], + path=None) + self.assertEqual(dataset_object.nb_view, 2) + self.assertEqual(dataset_object.labels.shape, (2,1)) + + def test_get_view_dict(self): + dataset_object = dataset.RAMDataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) + d = dataset_object.get_view_dict() + self.assertEqual(d, {'ViewN0': 0, 'ViewN1': 1, 'ViewN2': 2}) + + def test_get_name(self): + dataset_object = dataset.RAMDataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) + n = dataset_object.get_name() + self.assertEqual(n, None) + +class Test_Functions(unittest.TestCase): + @classmethod + def setUpClass(cls): + rm_tmp() + os.mkdir(tmp_path) + cls.rs = np.random.RandomState(42) + cls.nb_view = 3 + cls.file_name = "test0.hdf5" + cls.nb_examples = 5 + cls.nb_attr = 7 + cls.nb_class = 3 + cls.views = [cls.rs.randint(0, 10, size=(cls.nb_examples, cls.nb_attr)) + for _ in range(cls.nb_view)] + cls.labels = cls.rs.randint(0, cls.nb_class, cls.nb_examples) + cls.dataset_file = h5py.File(os.path.join(tmp_path, cls.file_name), "w") + cls.view_names = ["ViewN" + str(index) for index in + range(len(cls.views))] + cls.are_sparse = [False for _ in cls.views] + for view_index, (view_name, view, is_sparse) in enumerate( + zip(cls.view_names, cls.views, cls.are_sparse)): + view_dataset = cls.dataset_file.create_dataset( + "View" + str(view_index), + view.shape, + data=view) + view_dataset.attrs["name"] = view_name + view_dataset.attrs["sparse"] = is_sparse + labels_dataset = cls.dataset_file.create_dataset("Labels", + shape=cls.labels.shape, + data=cls.labels) + cls.labels_names = [str(index) for index in np.unique(cls.labels)] + labels_dataset.attrs["names"] = [label_name.encode() + for label_name in cls.labels_names] + meta_data_grp = cls.dataset_file.create_group("Metadata") + meta_data_grp.attrs["nbView"] = len(cls.views) + meta_data_grp.attrs["nbClass"] = len(np.unique(cls.labels)) + meta_data_grp.attrs["datasetLength"] = len(cls.labels) + + @classmethod + def tearDownClass(cls): + cls.dataset_file.close() + rm_tmp() + + def test_datasets_already_exist(self): + self.assertEqual(True, dataset.datasets_already_exist(tmp_path, "test", 1)) + + def test_init_multiple_datasets(self): + dataset.init_multiple_datasets(tmp_path, "test0", 2) + self.assertTrue(os.path.isfile(os.path.join(tmp_path,'test00.hdf5'))) + dataset.delete_HDF5([{"args":{"pathf":tmp_path, "name":"test0"}}], + 2, dataset.HDF5Dataset(hdf5_file=self.dataset_file)) + self.assertFalse(os.path.isfile(os.path.join(tmp_path,'test00.hdf5'))) + + + + + + + + + + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/multiview_platform/tests/utils.py b/multiview_platform/tests/utils.py index c27c31bf..9a3f04cb 100644 --- a/multiview_platform/tests/utils.py +++ b/multiview_platform/tests/utils.py @@ -9,11 +9,14 @@ tmp_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp_tests/" # TODO Convert to ram dataset test_dataset = HDF5Dataset(hdf5_file=h5py.File(os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_database.hdf5"), "r")) -def rm_tmp(): +def rm_tmp(path=tmp_path): try: - for file_name in os.listdir(tmp_path): - os.remove(os.path.join(tmp_path, file_name)) - os.rmdir(tmp_path) + for file_name in os.listdir(path): + if os.path.isdir(os.path.join(path, file_name)): + rm_tmp(os.path.join(path, file_name)) + else: + os.remove(os.path.join(path, file_name)) + os.rmdir(path) except: pass -- GitLab