diff --git a/config_files/config_test.yml b/config_files/config_test.yml index 4eb8866752b17d41258ae6135ee4f4334d8fbea5..64cb127307de84cc7b746065b69b40061a39dad9 100644 --- a/config_files/config_test.yml +++ b/config_files/config_test.yml @@ -21,14 +21,28 @@ split: 0.49 nb_folds: 2 nb_class: 2 classes: -type: ["multiview","monoview"] +type: ["multiview", "monoview"] algos_monoview: ["decision_tree" ] algos_multiview: ["weighted_linear_early_fusion",] stats_iter: 2 metrics: ["accuracy_score", "f1_score"] metric_princ: "accuracy_score" -hps_type: "randomized_search-equiv" -hps_iter: 10 +hps_type: "Random" +hps_args: + n_iter: 4 + equivalent_draws: False + weighted_linear_early_fusion: + view_weights: [null] + monoview_classifier_name: ["decision_tree"] + monoview_classifier_config: + - decision_tree: + max_depth: 1 + criterion: "gini" + splitter: "best" + - decision_tree: + max_depth: 2 + criterion: "gini" + splitter: "best" ###################################### diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index 39e8a09a4b3d391adfc2ffa38ce0ae7f418c7c19..2d6a1532f649598eb8dbfa8d791e8e68d088993e 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -76,46 +76,64 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args): def init_argument_dictionaries(benchmark, views_dictionary, - nb_class, init_kwargs): + nb_class, init_kwargs, hps_method, hps_kwargs): argument_dictionaries = {"monoview": [], "multiview": []} if benchmark["monoview"]: argument_dictionaries["monoview"] = init_monoview_exps( benchmark["monoview"], views_dictionary, nb_class, - init_kwargs["monoview"]) + init_kwargs["monoview"], hps_method, hps_kwargs) if benchmark["multiview"]: argument_dictionaries["multiview"] = init_multiview_exps( benchmark["multiview"], views_dictionary, nb_class, - init_kwargs["multiview"]) + init_kwargs["multiview"], hps_method, hps_kwargs) return argument_dictionaries def init_multiview_exps(classifier_names, views_dictionary, nb_class, - kwargs_init): + kwargs_init, hps_method, hps_kwargs): multiview_arguments = [] for classifier_name in classifier_names: - if multiple_args(get_path_dict(kwargs_init[classifier_name])): - multiview_arguments += gen_multiple_args_dictionnaries( - nb_class, - kwargs_init, - classifier_name, - views_dictionary=views_dictionary, - framework="multiview") - else: - arguments = get_path_dict(kwargs_init[classifier_name]) + arguments = get_path_dict(kwargs_init[classifier_name]) + if hps_method == "Grid": + multiview_arguments += [ + gen_single_multiview_arg_dictionary(classifier_name, + arguments, + nb_class, + {"param_grid":hps_kwargs[classifier_name]}, + views_dictionary=views_dictionary)] + elif hps_method == "Random": + hps_kwargs = dict((key, value) + for key, value in hps_kwargs.items() + if key in ["n_iter", "equivalent_draws"]) + multiview_arguments += [ + gen_single_multiview_arg_dictionary(classifier_name, + arguments, + nb_class, + hps_kwargs, + views_dictionary=views_dictionary)] + elif hps_method == "None": multiview_arguments += [ gen_single_multiview_arg_dictionary(classifier_name, arguments, nb_class, + hps_kwargs, views_dictionary=views_dictionary)] + else: + raise ValueError('At the moment only "None", "Random" or "Grid" ' + 'are available as hyper-parameter search ' + 'methods, sadly "{}" is not'.format(hps_method) + ) + return multiview_arguments def init_monoview_exps(classifier_names, - views_dictionary, nb_class, kwargs_init): + views_dictionary, nb_class, kwargs_init, hps_method, + hps_kwargs): r"""Used to add each monoview exeperience args to the list of monoview experiences args. First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right @@ -141,25 +159,44 @@ def init_monoview_exps(classifier_names, """ monoview_arguments = [] for view_name, view_index in views_dictionary.items(): - for classifier in classifier_names: - if multiple_args(kwargs_init[classifier]): - monoview_arguments += gen_multiple_args_dictionnaries(nb_class, - kwargs_init, - classifier, - view_name, - view_index) - else: - arguments = gen_single_monoview_arg_dictionary(classifier, + for classifier_name in classifier_names: + if hps_method == "Grid": + arguments = gen_single_monoview_arg_dictionary(classifier_name, + kwargs_init, + nb_class, + view_index, + view_name, + {"param_grid": + hps_kwargs[classifier_name]}) + elif hps_method == "Random": + hps_kwargs = dict((key, value) + for key, value in hps_kwargs.items() + if key in ["n_iter", "equivalent_draws"]) + arguments = gen_single_monoview_arg_dictionary(classifier_name, kwargs_init, nb_class, view_index, - view_name) - monoview_arguments.append(arguments) + view_name, + hps_kwargs) + elif hps_method == "None": + arguments = gen_single_monoview_arg_dictionary(classifier_name, + kwargs_init, + nb_class, + view_index, + view_name, + hps_kwargs) + + else: + raise ValueError('At the moment only "None", "Random" or "Grid" ' + 'are available as hyper-parameter search ' + 'methods, sadly "{}" is not'.format(hps_method) + ) + monoview_arguments.append(arguments) return monoview_arguments def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class, - view_index, view_name): + view_index, view_name, hps_kwargs): if classifier_name in arguments: classifier_config = dict((key, value[0]) for key, value in arguments[ classifier_name].items()) @@ -169,16 +206,18 @@ def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class, "view_name": view_name, "view_index": view_index, "classifier_name": classifier_name, - "nb_class": nb_class} + "nb_class": nb_class, + "hps_kwargs":hps_kwargs } def gen_single_multiview_arg_dictionary(classifier_name, arguments, nb_class, - views_dictionary=None): + hps_kwargs, views_dictionary=None): return {"classifier_name": classifier_name, "view_names": list(views_dictionary.keys()), 'view_indices': list(views_dictionary.values()), "nb_class": nb_class, "labels_names": None, + "hps_kwargs": hps_kwargs, classifier_name: extract_dict(arguments) } @@ -208,14 +247,14 @@ def set_element(dictionary, path, value): return dictionary -def multiple_args(classifier_configuration): - """Checks if multiple values were provided for at least one arg""" - listed_args = [type(value) == list and len(value) > 1 for key, value in - classifier_configuration.items()] - if True in listed_args: - return True - else: - return False +# def multiple_args(classifier_configuration): +# """Checks if multiple values were provided for at least one arg""" +# listed_args = [type(value) == list and len(value) > 1 for key, value in +# classifier_configuration.items()] +# if True in listed_args: +# return True +# else: +# return False def get_path_dict(multiview_classifier_args): @@ -254,105 +293,105 @@ def is_dict_in(dictionary): return paths -def gen_multiple_kwargs_combinations(cl_kwrags): - """ - Generates all the possible combination of the asked args - - Parameters - ---------- - cl_kwrags : dict - The arguments, with one at least having multiple values - - Returns - ------- - kwargs_combination : list - The list of all the combinations of arguments - - reduced_kwargs_combination : list - The reduced names and values of the arguments will be used in the naming - process of the different classifiers - - """ - values = list(cl_kwrags.values()) - listed_values = [[_] if type(_) is not list else _ for _ in values] - values_cartesian_prod = [_ for _ in itertools.product(*listed_values)] - keys = cl_kwrags.keys() - kwargs_combination = [dict((key, value) for key, value in zip(keys, values)) - for values in values_cartesian_prod] - - reduce_dict = {DecisionTreeClassifier: "DT", } - reduced_listed_values = [ - [_ if type(_) not in reduce_dict else reduce_dict[type(_)] for _ in - list_] for list_ in listed_values] - reduced_values_cartesian_prod = [_ for _ in - itertools.product(*reduced_listed_values)] - reduced_kwargs_combination = [ - dict((key, value) for key, value in zip(keys, values)) - for values in reduced_values_cartesian_prod] - return kwargs_combination, reduced_kwargs_combination - - -def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, - view_name=None, view_index=None, - views_dictionary=None, - framework="monoview"): - """ - Used in the case of mutliple arguments asked in the config file. - Will combine the arguments to explore all the possibilities. - - Parameters - ---------- - nb_class : int, - The number of classes in the dataset - - kwargs_init : dict - The arguments given in the config file - - classifier : str - The name of the classifier for which multiple arguments have been asked - - view_name : str - The name of the view in consideration. - - view_index : int - The index of the view in consideration - - views_dictionary : dict - The dictionary of all the views indices and their names - - framework : str - Either monoview or multiview - - Returns - ------- - args_dictionaries : list - The list of all the possible combination of asked arguments - - """ - if framework == "multiview": - classifier_config = get_path_dict(kwargs_init[classifier]) - else: - classifier_config = kwargs_init[classifier] - multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations( - classifier_config) - multiple_kwargs_dict = dict( - (classifier + "_" + "_".join( - map(str, list(reduced_dictionary.values()))), dictionary) - for reduced_dictionary, dictionary in - zip(reduced_multiple_kwargs_list, multiple_kwargs_list)) - args_dictionnaries = [gen_single_monoview_arg_dictionary(classifier_name, - arguments, - nb_class, - view_index=view_index, - view_name=view_name) - if framework == "monoview" else - gen_single_multiview_arg_dictionary(classifier_name, - arguments, - nb_class, - views_dictionary=views_dictionary) - for classifier_name, arguments - in multiple_kwargs_dict.items()] - return args_dictionnaries +# def gen_multiple_kwargs_combinations(cl_kwrags): +# """ +# Generates all the possible combination of the asked args +# +# Parameters +# ---------- +# cl_kwrags : dict +# The arguments, with one at least having multiple values +# +# Returns +# ------- +# kwargs_combination : list +# The list of all the combinations of arguments +# +# reduced_kwargs_combination : list +# The reduced names and values of the arguments will be used in the naming +# process of the different classifiers +# +# """ +# values = list(cl_kwrags.values()) +# listed_values = [[_] if type(_) is not list else _ for _ in values] +# values_cartesian_prod = [_ for _ in itertools.product(*listed_values)] +# keys = cl_kwrags.keys() +# kwargs_combination = [dict((key, value) for key, value in zip(keys, values)) +# for values in values_cartesian_prod] +# +# reduce_dict = {DecisionTreeClassifier: "DT", } +# reduced_listed_values = [ +# [_ if type(_) not in reduce_dict else reduce_dict[type(_)] for _ in +# list_] for list_ in listed_values] +# reduced_values_cartesian_prod = [_ for _ in +# itertools.product(*reduced_listed_values)] +# reduced_kwargs_combination = [ +# dict((key, value) for key, value in zip(keys, values)) +# for values in reduced_values_cartesian_prod] +# return kwargs_combination, reduced_kwargs_combination + + +# def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, +# view_name=None, view_index=None, +# views_dictionary=None, +# framework="monoview"): +# """ +# Used in the case of mutliple arguments asked in the config file. +# Will combine the arguments to explore all the possibilities. +# +# Parameters +# ---------- +# nb_class : int, +# The number of classes in the dataset +# +# kwargs_init : dict +# The arguments given in the config file +# +# classifier : str +# The name of the classifier for which multiple arguments have been asked +# +# view_name : str +# The name of the view in consideration. +# +# view_index : int +# The index of the view in consideration +# +# views_dictionary : dict +# The dictionary of all the views indices and their names +# +# framework : str +# Either monoview or multiview +# +# Returns +# ------- +# args_dictionaries : list +# The list of all the possible combination of asked arguments +# +# """ +# if framework == "multiview": +# classifier_config = get_path_dict(kwargs_init[classifier]) +# else: +# classifier_config = kwargs_init[classifier] +# multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations( +# classifier_config) +# multiple_kwargs_dict = dict( +# (classifier + "_" + "_".join( +# map(str, list(reduced_dictionary.values()))), dictionary) +# for reduced_dictionary, dictionary in +# zip(reduced_multiple_kwargs_list, multiple_kwargs_list)) +# args_dictionnaries = [gen_single_monoview_arg_dictionary(classifier_name, +# arguments, +# nb_class, +# view_index=view_index, +# view_name=view_name) +# if framework == "monoview" else +# gen_single_multiview_arg_dictionary(classifier_name, +# arguments, +# nb_class, +# views_dictionary=views_dictionary) +# for classifier_name, arguments +# in multiple_kwargs_dict.items()] +# return args_dictionnaries def init_kwargs(args, classifiers_names, framework="monoview"): @@ -672,7 +711,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, 1, args["file_type"], args["pathf"], random_state, hyper_param_search=hyper_param_search, metrics=metrics, - n_iter=args["hps_iter"], **arguments)] + **arguments)] except: if track_tracebacks: traceback_outputs[ @@ -703,7 +742,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, k_folds, 1, args["file_type"], args["pathf"], labels_dictionary, random_state, labels, - hyper_param_search=hyper_param_search, + hps_method=hyper_param_search, metrics=metrics, n_iter=args["hps_iter"], **arguments)] except: @@ -829,7 +868,8 @@ def exec_classif(arguments): if nb_cores == 1: os.environ['OPENBLAS_NUM_THREADS'] = '1' stats_iter = args["stats_iter"] - hyper_param_search = args["hps_type"] + hps_method = args["hps_type"] + hps_kwargs = args["hps_args"] cl_type = args["type"] monoview_algos = args["algos_monoview"] multiview_algos = args["algos_multiview"] @@ -915,17 +955,17 @@ def exec_classif(arguments): data_base_time = time.time() - start argument_dictionaries = init_argument_dictionaries( benchmark, views_dictionary, - nb_class, init_kwargs) + nb_class, init_kwargs, hps_method, hps_kwargs) # argument_dictionaries = initMonoviewExps(benchmark, viewsDictionary, # NB_CLASS, initKWARGS) directories = execution.gen_direcorties_names(directory, stats_iter) benchmark_argument_dictionaries = execution.gen_argument_dictionaries( labels_dictionary, directories, splits, - hyper_param_search, args, k_folds, + hps_method, args, k_folds, stats_iter_random_states, metrics, argument_dictionaries, benchmark, - views, views_indices, ) + views, views_indices) results_mean_stds = exec_benchmark( nb_cores, stats_iter, benchmark_argument_dictionaries, directory, metrics, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 84d45a22a4c9bbb226ebf16e5b5c55413cdadcc6..719501d394bdae7ea36433052e98abd02e37cf6b 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -54,7 +54,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i k_folds, nb_cores, databaseType, path, random_state, hyper_param_search="randomized_search", metrics=[["accuracy_score", None]], n_iter=30, view_name="", - **args): + hps_kwargs={}, **args): logging.debug("Start:\t Loading data") kwargs, \ t_start, \ @@ -65,7 +65,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i labelsString, \ output_file_name = init_constants(args, X, classification_indices, labels_names, - database_name, directory, view_name) + database_name, directory, view_name, ) logging.debug("Done:\t Loading data") logging.debug( @@ -89,12 +89,13 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i classifier_module = getattr(monoview_classifiers, classifier_name) classifier_class_name = classifier_module.classifier_class_name hyper_param_beg = time.monotonic() - cl_kwargs, test_folds_preds = get_hyper_params(classifier_module, hyper_param_search, - n_iter, classifier_name, + cl_kwargs = get_hyper_params(classifier_module, hyper_param_search, + classifier_name, classifier_class_name, X_train, y_train, random_state, output_file_name, - k_folds, nb_cores, metrics, kwargs) + k_folds, nb_cores, metrics, kwargs, + **hps_kwargs) hyper_param_duration = time.monotonic() - hyper_param_beg logging.debug("Done:\t Generate classifier args") @@ -156,11 +157,9 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i logging.info("Done:\t Saving results") view_index = args["view_index"] - if test_folds_preds is None: - test_folds_preds = train_pred return MonoviewResult(view_index, classifier_name, view_name, metrics_scores, full_pred, cl_kwargs, - test_folds_preds, classifier, X_train.shape[1], + classifier, X_train.shape[1], hyper_param_duration, fit_duration, pred_duration) @@ -192,35 +191,31 @@ def init_train_test(X, Y, classification_indices): return X_train, y_train, X_test, y_test -def get_hyper_params(classifier_module, hyper_param_search, nIter, classifier_module_name, +def get_hyper_params(classifier_module, search_method, classifier_module_name, classifier_class_name, X_train, y_train, random_state, - output_file_name, k_folds, nb_cores, metrics, kwargs): - if hyper_param_search != "None": + output_file_name, k_folds, nb_cores, metrics, kwargs, + **hps_kwargs): + if search_method != "None": logging.debug( - "Start:\t " + hyper_param_search + " best settings with " + str( - nIter) + " iterations for " + classifier_module_name) - classifier_hp_search = getattr(hyper_parameter_search, - hyper_param_search.split("-")[0]) - cl_kwargs, test_folds_preds, scores, params = classifier_hp_search(X_train, y_train, - "monoview", - random_state, - output_file_name, - classifier_module, - classifier_class_name, - folds=k_folds, - nb_cores=nb_cores, - metric=metrics[0], - n_iter=nIter, - classifier_kwargs= - kwargs[ - classifier_module_name]) - hyper_parameter_search.gen_report(params, scores, output_file_name) - logging.debug("Done:\t " + hyper_param_search + " best settings") + "Start:\t " + search_method + " best settings for " + classifier_module_name) + classifier_hp_search = getattr(hyper_parameter_search, search_method) + estimator = getattr(classifier_module, classifier_class_name)( + random_state=random_state, + **kwargs[classifier_module_name]) + estimator = get_mc_estim(estimator, random_state, + multiview=False, y=y_train) + hps = classifier_hp_search(estimator, scoring=metrics, cv=k_folds, + random_state=random_state, + framework="monoview", n_jobs=nb_cores, + **hps_kwargs) + hps.fit(X_train, y_train, **kwargs[classifier_module_name]) + cl_kwargs = hps.get_best_params() + hps.gen_report(output_file_name) + logging.debug("Done:\t " + search_method + " best settings") else: cl_kwargs = kwargs[classifier_module_name] - test_folds_preds = None - return cl_kwargs, test_folds_preds + return cl_kwargs def save_results(string_analysis, output_file_name, full_labels_pred, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index 321f4195f5e1a196cfeeb00700b04b1fca2eeede..4af2ee3cbaeaf6443b1ce0d7349465c82b49c4f8 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -155,7 +155,7 @@ def percent(x, pos): class MonoviewResult(object): def __init__(self, view_index, classifier_name, view_name, metrics_scores, - full_labels_pred, classifier_config, test_folds_preds, + full_labels_pred, classifier_config, classifier, n_features, hps_duration, fit_duration, pred_duration): self.view_index = view_index @@ -164,7 +164,6 @@ class MonoviewResult(object): self.metrics_scores = metrics_scores self.full_labels_pred = full_labels_pred self.classifier_config = classifier_config - self.test_folds_preds = test_folds_preds self.clf = classifier self.n_features = n_features self.hps_duration = hps_duration diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 7e5d5e5287bbda178c0a27f85f5ba21bba08423e..9de4e48deb2733476e50b12c1ad937426df5db9a 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -176,7 +176,7 @@ def exec_multiview_multicore(directory, core_index, name, learning_rate, 1, database_type, path, labels_dictionary, random_state, labels, - hyper_param_search=hyper_param_search, + hps_method=hyper_param_search, metrics=metrics, n_iter=n_iter, **arguments) @@ -185,7 +185,8 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds, nb_cores, database_type, path, labels_dictionary, random_state, labels, - hyper_param_search=False, metrics=None, n_iter=30, **kwargs): + hps_method="None", hps_kwargs={}, metrics=None, + n_iter=30, **kwargs): """Used to execute multiview classification and result analysis Parameters @@ -216,7 +217,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, labels - hyper_param_search + hps_method metrics @@ -259,14 +260,29 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.debug("Start:\t Optimizing hyperparameters") hps_beg = time.monotonic() - if hyper_param_search != "None": - classifier_config = hyper_parameter_search.search_best_settings( - dataset_var, dataset_var.get_labels(), classifier_module, - classifier_name, - metrics[0], learning_indices, k_folds, random_state, - output_file_name, nb_cores=nb_cores, views_indices=views_indices, - searching_tool=hyper_param_search, n_iter=n_iter, - classifier_config=classifier_config) + if hps_method != "None": + hps_method_class = getattr(hyper_parameter_search, hps_method) + estimator = getattr(classifier_module, classifier_name)( + random_state=random_state, + **classifier_config) + estimator = get_mc_estim(estimator, random_state, + multiview=True, + y=dataset_var.get_labels()[learning_indices]) + hps = hps_method_class(estimator, scoring=metrics, cv=k_folds, + random_state=random_state, framework="multiview", + n_jobs=nb_cores, + learning_indices=learning_indices, + view_indices=views_indices, **hps_kwargs) + hps.fit(dataset_var, dataset_var.get_labels(), ) + classifier_config = hps.get_best_params() + hps.gen_report(output_file_name) + # classifier_config = hyper_parameter_search.search_best_settings( + # dataset_var, dataset_var.get_labels(), classifier_module, + # classifier_name, + # metrics[0], learning_indices, k_folds, random_state, + # output_file_name, nb_cores=nb_cores, views_indices=views_indices, + # searching_tool=hps_method, n_iter=n_iter, + # classifier_config=classifier_config) hps_duration = time.monotonic() - hps_beg classifier = get_mc_estim( getattr(classifier_module, classifier_name)(random_state=random_state, @@ -308,7 +324,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, classifier=classifier, classification_indices=classification_indices, k_folds=k_folds, - hps_method=hyper_param_search, + hps_method=hps_method, metrics_list=metrics, n_iter=n_iter, class_label_names=list(labels_dictionary.values()), diff --git a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py index 7544cdae992fc1d8d901b2c944e041b616a78b9b..9f3490847129dc3fc72e60103f6df95ab8834221 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py @@ -36,7 +36,7 @@ def pass_default_config(log=True, add_noise=False, noise_std=0.0, res_dir="../results/", - track_tracebacks=False, + track_tracebacks=True, split=0.49, nb_folds=5, nb_class=None, @@ -46,9 +46,11 @@ def pass_default_config(log=True, algos_multiview=["svm_jumbo_fusion", ], stats_iter=2, metrics=["accuracy_score", "f1_score"], - metric_princ="f1_score", - hps_type="randomized_search", - hps_iter=1, **kwargs): + metric_princ="accuracy_score", + hps_type="Random", + hps_iter=1, + hps_kwargs={'n_iter':10, "equivalent_draws":True}, + **kwargs): """ :param log: diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index 0c82c6677c2976b90e408824338adaa0f95cebfc..9bf1b72d8728ce71eadb6dffc4a51141a846f9fa 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -346,7 +346,7 @@ def gen_argument_dictionaries(labels_dictionary, directories, hyper_param_search, args, k_folds, stats_iter_random_states, metrics, argument_dictionaries, - benchmark, views, views_indices): + benchmark, views, views_indices,): r"""Used to generate a dictionary for each benchmark. One for each label combination (if multiclass), for each statistical iteration, generates an dictionary with diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 621ee28050d0e075ff26cc18b83442993d5026cc..191029796b4b38a951643351c60d69803a61edf7 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -1,14 +1,18 @@ import itertools import sys import traceback +from abc import abstractmethod import matplotlib.pyplot as plt import numpy as np from scipy.stats import randint, uniform -from sklearn.model_selection import RandomizedSearchCV +from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \ + ParameterGrid, ParameterSampler +from sklearn.base import clone -from .multiclass import get_mc_estim +from .multiclass import MultiClassWrapper from .organization import secure_file_path +from .base import get_metric from .. import metrics @@ -21,13 +25,13 @@ def search_best_settings(dataset_var, labels, classifier_module, """Used to select the right hyper-parameter optimization function to optimize hyper parameters""" if views_indices is None: - views_indices = range(dataset_var.get("Metadata").attrs["nbView"]) + views_indices = list(range(dataset_var.get_nb_view)) output_file_name = directory thismodule = sys.modules[__name__] if searching_tool is not "None": searching_tool_method = getattr(thismodule, searching_tool.split("-")[0]) - best_settings, test_folds_preds, scores, params = searching_tool_method( + best_settings, scores, params = searching_tool_method( dataset_var, labels, "multiview", random_state, output_file_name, classifier_module, classifier_name, i_k_folds, nb_cores, metrics, n_iter, classifier_config, @@ -39,199 +43,54 @@ def search_best_settings(dataset_var, labels, classifier_module, return best_settings # or well set clasifier ? -def grid_search(dataset, classifier_name, views_indices=None, k_folds=None, - n_iter=1, - **kwargs): - """Used to perfom gridsearch on the classifiers""" - pass - - -class CustomUniform: - """Used as a distribution returning a float between loc and loc + scale.. - It can be used with a multiplier agrument to be able to perform more complex generation - for example 10 e -(float)""" - - def __init__(self, loc=0, state=1, multiplier=""): - self.uniform = uniform(loc, state) - self.multiplier = multiplier - - def rvs(self, random_state=None): - unif = self.uniform.rvs(random_state=random_state) - if self.multiplier == 'e-': - return 10 ** -unif - else: - return unif - - -class CustomRandint: - """Used as a distribution returning a integer between low and high-1. - It can be used with a multiplier agrument to be able to perform more complex generation - for example 10 e -(randint)""" - - def __init__(self, low=0, high=0, multiplier=""): - self.randint = randint(low, high) - self.multiplier = multiplier - - def rvs(self, random_state=None): - randinteger = self.randint.rvs(random_state=random_state) - if self.multiplier == "e-": - return 10 ** -randinteger - else: - return randinteger - - def get_nb_possibilities(self): - return self.randint.b - self.randint.a - - -def compute_possible_combinations(params_dict): - n_possibs = np.ones(len(params_dict)) * np.inf - for value_index, value in enumerate(params_dict.values()): - if type(value) == list: - n_possibs[value_index] = len(value) - elif isinstance(value, CustomRandint): - n_possibs[value_index] = value.get_nb_possibilities() - return np.prod(n_possibs) - - -def get_test_folds_preds(X, y, cv, estimator, framework, - available_indices=None): - test_folds_prediction = [] - if framework == "monoview": - folds = cv.split(np.arange(len(y)), y) - if framework == "multiview": - folds = cv.split(available_indices, y[available_indices]) - fold_lengths = np.zeros(cv.n_splits, dtype=int) - for fold_idx, (train_indices, test_indices) in enumerate(folds): - fold_lengths[fold_idx] = len(test_indices) - if framework == "monoview": - estimator.fit(X[train_indices], y[train_indices]) - test_folds_prediction.append(estimator.predict(X[train_indices])) - if framework == "multiview": - estimator.fit(X, y, available_indices[train_indices]) - test_folds_prediction.append( - estimator.predict(X, available_indices[test_indices])) - min_fold_length = fold_lengths.min() - test_folds_prediction = np.array( - [test_fold_prediction[:min_fold_length] for test_fold_prediction in - test_folds_prediction]) - return test_folds_prediction - - -def randomized_search(X, y, framework, random_state, output_file_name, - classifier_module, - classifier_name, folds=4, nb_cores=1, - metric=["accuracy_score", None], - n_iter=30, classifier_kwargs=None, learning_indices=None, - view_indices=None, - equivalent_draws=True): - estimator = getattr(classifier_module, classifier_name)( - random_state=random_state, - **classifier_kwargs) - params_dict = estimator.gen_distribs() - estimator = get_mc_estim(estimator, random_state, - multiview=(framework == "multiview"), - y=y) - if params_dict: - metric_module = getattr(metrics, metric[0]) - if metric[1] is not None: - metric_kargs = dict((index, metricConfig) for index, metricConfig in - enumerate(metric[1])) +class HPSearch: + + # def __init__(self, y, framework, random_state, output_file_name, + # classifier_module, + # classifier_name, folds=4, nb_cores=1, + # metric = [["accuracy_score", None]], + # classifier_kwargs={}, learning_indices=None, + # view_indices=None, + # track_tracebacks=True): + # estimator = getattr(classifier_module, classifier_name)( + # random_state=random_state, + # **classifier_kwargs) + # self.init_params() + # self.estimator = get_mc_estim(estimator, random_state, + # multiview=(framework == "multiview"), + # y=y) + # self.folds = folds + # self.nb_cores = nb_cores + # self.clasifier_kwargs = classifier_kwargs + # self.learning_indices = learning_indices + # self.view_indices = view_indices + # self.output_file_name = output_file_name + # metric_module, metric_kwargs = get_metric(metric) + # self.scorer = metric_module.get_scorer(**metric_kwargs) + # self.track_tracebacks = track_tracebacks + + def get_scoring(self, metric): + if isinstance(metric, list): + metric_module, metric_kwargs = get_metric(metric) + return metric_module.get_scorer(**metric_kwargs) else: - metric_kargs = {} - - scorer = metric_module.get_scorer(**metric_kargs) - nb_possible_combinations = compute_possible_combinations(params_dict) - n_iter_real = min(n_iter, nb_possible_combinations) - - random_search = MultiviewCompatibleRandomizedSearchCV(estimator, - n_iter=int( - n_iter_real), - param_distributions=params_dict, - refit=True, - n_jobs=nb_cores, - scoring=scorer, - cv=folds, - random_state=random_state, - learning_indices=learning_indices, - view_indices=view_indices, - framework=framework, - equivalent_draws=equivalent_draws) - random_search.fit(X, y) - best_params = random_search.best_params_ - if "random_state" in best_params: - best_params.pop("random_state") - - scores_array = random_search.cv_results_['mean_test_score'] - sorted_indices = np.argsort(-scores_array) - params = [random_search.cv_results_["params"][score_index] - for score_index in sorted_indices] - scores_array = scores_array[sorted_indices] - # gen_heat_maps(params, scores_array, output_file_name) - best_estimator = random_search.best_estimator_ - else: - best_estimator = estimator - best_params = {} - scores_array = {} - params = {} - test_folds_preds = get_test_folds_preds(X, y, folds, best_estimator, - framework, learning_indices) - return best_params, test_folds_preds, scores_array, params + return metric - -from sklearn.base import clone - - -class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): - - def __init__(self, estimator, param_distributions, n_iter=10, - refit=True, n_jobs=1, scoring=None, cv=None, - random_state=None, learning_indices=None, view_indices=None, - framework="monoview", - equivalent_draws=True): - super(MultiviewCompatibleRandomizedSearchCV, self).__init__(estimator, - n_iter=n_iter, - param_distributions=param_distributions, - refit=refit, - n_jobs=n_jobs, - scoring=scoring, - cv=cv, - random_state=random_state) - self.framework = framework - self.available_indices = learning_indices - self.view_indices = view_indices - self.equivalent_draws = equivalent_draws - - def fit(self, X, y=None, groups=None, **fit_params): - if self.framework == "monoview": - return super(MultiviewCompatibleRandomizedSearchCV, self).fit(X, - y=y, - groups=groups, - **fit_params) - elif self.framework == "multiview": - return self.fit_multiview(X, y=y, groups=groups, **fit_params) - - def fit_multiview(self, X, y=None, groups=None, track_tracebacks=True, - **fit_params): + def fit_multiview(self, X, y, groups=None, **fit_params): n_splits = self.cv.get_n_splits(self.available_indices, y[self.available_indices]) folds = list( self.cv.split(self.available_indices, y[self.available_indices])) - if self.equivalent_draws: - self.n_iter = self.n_iter * X.nb_view - # Fix to allow sklearn > 0.19 - from sklearn.model_selection import ParameterSampler - candidate_params = list( - ParameterSampler(self.param_distributions, self.n_iter, - random_state=self.random_state)) + self.get_candidate_params(X) base_estimator = clone(self.estimator) results = {} self.cv_results_ = dict(("param_" + param_name, []) for param_name in - candidate_params[0].keys()) + self.candidate_params[0].keys()) self.cv_results_["mean_test_score"] = [] - self.cv_results_["params"]=[] + self.cv_results_["params"] = [] n_failed = 0 tracebacks = [] - for candidate_param_idx, candidate_param in enumerate(candidate_params): + for candidate_param_idx, candidate_param in enumerate(self.candidate_params): test_scores = np.zeros(n_splits) + 1000 try: for fold_idx, (train_indices, test_indices) in enumerate(folds): @@ -250,16 +109,17 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): test_prediction, **self.scoring._kwargs) test_scores[fold_idx] = test_score - self.cv_results_['params'].append(current_estimator.get_params()) + self.cv_results_['params'].append( + current_estimator.get_params()) cross_validation_score = np.mean(test_scores) self.cv_results_["mean_test_score"].append( cross_validation_score) results[candidate_param_idx] = cross_validation_score if cross_validation_score >= max(results.values()): - self.best_params_ = candidate_params[candidate_param_idx] + self.best_params_ = self.candidate_params[candidate_param_idx] self.best_score_ = cross_validation_score except: - if track_tracebacks: + if self.track_tracebacks: n_failed += 1 tracebacks.append(traceback.format_exc()) else: @@ -268,7 +128,8 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): raise ValueError( 'No fits were performed. All HP combination returned errors \n\n' + '\n'.join( tracebacks)) - self.cv_results_["mean_test_score"] = np.array(self.cv_results_["mean_test_score"]) + self.cv_results_["mean_test_score"] = np.array( + self.cv_results_["mean_test_score"]) if self.refit: self.best_estimator_ = clone(base_estimator).set_params( **self.best_params_) @@ -276,28 +137,293 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): self.n_splits_ = n_splits return self - def get_test_folds_preds(self, X, y, estimator): - test_folds_prediction = [] + @abstractmethod + def init_params(self): + self.params_dict = {} + raise NotImplementedError + + @abstractmethod + def get_candidate_params(self, X): + raise NotImplementedError + + def get_best_params(self): + best_params = self.best_params_ + if "random_state" in best_params: + best_params.pop("random_state") + return best_params + + def gen_report(self, output_file_name): + scores_array = self.cv_results_['mean_test_score'] + sorted_indices = np.argsort(-scores_array) + tested_params = [self.cv_results_["params"][score_index] + for score_index in sorted_indices] + scores_array = scores_array[sorted_indices] + output_string = "" + for parameters, score in zip(tested_params, scores_array): + if "random_state" in parameters: + parameters.pop("random_state") + output_string += "\n{}\t\t{}".format(parameters, score) + secure_file_path(output_file_name + "hps_report.txt") + with open(output_file_name + "hps_report.txt", "w") as output_file: + output_file.write(output_string) + + +class Random(RandomizedSearchCV, HPSearch): + + def __init__(self, estimator, param_distributions=None, n_iter=10, + refit=False, n_jobs=1, scoring=None, cv=None, + random_state=None, learning_indices=None, view_indices=None, + framework="monoview", + equivalent_draws=True, track_tracebacks=True): + if param_distributions is None: + param_distributions = self.get_param_distribs(estimator) + scoring = HPSearch.get_scoring(self, scoring) + RandomizedSearchCV.__init__(self, estimator, n_iter=n_iter, + param_distributions=param_distributions, + refit=refit, n_jobs=n_jobs, scoring=scoring, + cv=cv, random_state=random_state) + self.framework = framework + self.available_indices = learning_indices + self.view_indices = view_indices + self.equivalent_draws = equivalent_draws + self.track_tracebacks = track_tracebacks + + def get_param_distribs(self, estimator): + if isinstance(estimator, MultiClassWrapper): + return estimator.estimator.gen_distribs() + else: + return estimator.gen_distribs() + + def fit(self, X, y=None, groups=None, **fit_params): + if self.framework == "monoview": + return RandomizedSearchCV.fit(self, X, y=y, groups=groups, + **fit_params) + + elif self.framework == "multiview": + return HPSearch.fit_multiview(self, X, y=y, groups=groups, + **fit_params) + + # def init_params(self,): + # self.params_dict = self.estimator.gen_distribs() + + def get_candidate_params(self, X): + if self.equivalent_draws: + self.n_iter = self.n_iter * X.nb_view + self.candidate_params = list( + ParameterSampler(self.param_distributions, self.n_iter, + random_state=self.random_state)) + + # def fit_multiview(self, X, y=None, groups=None, track_tracebacks=True, + # **fit_params): + # n_splits = self.cv.get_n_splits(self.available_indices, + # y[self.available_indices]) + + + + +class Grid(GridSearchCV, HPSearch): + + def __init__(self, estimator, param_grid={}, refit=True, n_jobs=1, scoring=None, cv=None, + learning_indices=None, view_indices=None, framework="monoview", + random_state=None, track_tracebacks=True): + scoring = HPSearch.get_scoring(self, scoring) + GridSearchCV.__init__(self, estimator, param_grid, scoring=scoring, + n_jobs=n_jobs, iid='deprecated', refit=refit, + cv=cv) + self.framework = framework + self.available_indices = learning_indices + self.view_indices = view_indices + self.track_tracebacks = track_tracebacks + + def fit(self, X, y=None, groups=None, **fit_params): if self.framework == "monoview": - folds = self.cv.split(np.arange(len(y)), y) - if self.framework == "multiview": - folds = self.cv.split(self.available_indices, y) - fold_lengths = np.zeros(self.cv.n_splits, dtype=int) - for fold_idx, (train_indices, test_indices) in enumerate(folds): - fold_lengths[fold_idx] = len(test_indices) - if self.framework == "monoview": - estimator.fit(X[train_indices], y[train_indices]) - test_folds_prediction.append( - estimator.predict(X[train_indices])) - if self.framework == "multiview": - estimator.fit(X, y, self.available_indices[train_indices]) - test_folds_prediction.append( - estimator.predict(X, self.available_indices[test_indices])) - min_fold_length = fold_lengths.min() - test_folds_prediction = np.array( - [test_fold_prediction[:min_fold_length] for test_fold_prediction in - test_folds_prediction]) - return test_folds_prediction + return GridSearchCV.fit(self, X, y=y, groups=groups, + **fit_params) + elif self.framework == "multiview": + return HPSearch.fit_multiview(self, X, y=y, groups=groups, + **fit_params) + + def get_candidate_params(self, X): + self.candidate_params = list(ParameterGrid(self.param_grid)) + self.n_iter = len(self.candidate_params) + + + + + +# +# def hps_search(): +# pass +# +# def grid_search(X, y, framework, random_state, output_file_name, +# classifier_module, +# classifier_name, folds=4, nb_cores=1, +# metric=["accuracy_score", None], +# n_iter=30, classifier_kwargs={}, learning_indices=None, +# view_indices=None, +# equivalent_draws=True, grid_search_config=None): +# """Used to perfom gridsearch on the classifiers""" +# pass + + + +# class RS(HPSSearch): +# +# def __init__(self, X, y, framework, random_state, output_file_name, +# classifier_module, +# classifier_name, folds=4, nb_cores=1, +# metric=["accuracy_score", None], +# n_iter=30, classifier_kwargs={}, learning_indices=None, +# view_indices=None, +# equivalent_draws=True): +# HPSSearch.__init__() + + + +# def randomized_search(X, y, framework, random_state, output_file_name, +# classifier_module, +# classifier_name, folds=4, nb_cores=1, +# metric=["accuracy_score", None], +# n_iter=30, classifier_kwargs={}, learning_indices=None, +# view_indices=None, +# equivalent_draws=True): +# estimator = getattr(classifier_module, classifier_name)( +# random_state=random_state, +# **classifier_kwargs) +# params_dict = estimator.gen_distribs() +# estimator = get_mc_estim(estimator, random_state, +# multiview=(framework == "multiview"), +# y=y) +# if params_dict: +# metric_module, metric_kwargs = get_metric(metric) +# scorer = metric_module.get_scorer(**metric_kwargs) +# # nb_possible_combinations = compute_possible_combinations(params_dict) +# # n_iter_real = min(n_iter, nb_possible_combinations) +# +# random_search = MultiviewCompatibleRandomizedSearchCV(estimator, +# n_iter=n_iter, +# param_distributions=params_dict, +# refit=True, +# n_jobs=nb_cores, +# scoring=scorer, +# cv=folds, +# random_state=random_state, +# learning_indices=learning_indices, +# view_indices=view_indices, +# framework=framework, +# equivalent_draws=equivalent_draws) +# random_search.fit(X, y) +# return random_search.transform_results() +# else: +# best_estimator = estimator +# best_params = {} +# scores_array = {} +# params = {} +# test_folds_preds = np.zeros(10)#get_test_folds_preds(X, y, folds, best_estimator, +# # framework, learning_indices) +# return best_params, scores_array, params + + + + + + + + + + +def spear_mint(dataset, classifier_name, views_indices=None, k_folds=None, + n_iter=1, + **kwargs): + """Used to perform spearmint on the classifiers to optimize hyper parameters, + longer than randomsearch (can't be parallelized)""" + pass + + +def gen_heat_maps(params, scores_array, output_file_name): + """Used to generate a heat map for each doublet of hyperparms + optimized on the previous function""" + nb_params = len(params) + if nb_params > 2: + combinations = itertools.combinations(range(nb_params), 2) + elif nb_params == 2: + combinations = [(0, 1)] + else: + combinations = [()] + for combination in combinations: + if combination: + param_name1, param_array1 = params[combination[0]] + param_name2, param_array2 = params[combination[1]] + else: + param_name1, param_array1 = params[0] + param_name2, param_array2 = ("Control", np.array([0])) + + param_array1_set = np.sort(np.array(list(set(param_array1)))) + param_array2_set = np.sort(np.array(list(set(param_array2)))) + + scores_matrix = np.zeros( + (len(param_array2_set), len(param_array1_set))) - 0.1 + for param1, param2, score in zip(param_array1, param_array2, + scores_array): + param1_index, = np.where(param_array1_set == param1) + param2_index, = np.where(param_array2_set == param2) + scores_matrix[int(param2_index), int(param1_index)] = score + + plt.figure(figsize=(8, 6)) + plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) + plt.imshow(scores_matrix, interpolation='nearest', cmap=plt.cm.hot, + ) + plt.xlabel(param_name1) + plt.ylabel(param_name2) + plt.colorbar() + plt.xticks(np.arange(len(param_array1_set)), param_array1_set) + plt.yticks(np.arange(len(param_array2_set)), param_array2_set, + rotation=45) + plt.title('Validation metric') + plt.savefig( + output_file_name + "heat_map-" + param_name1 + "-" + param_name2 + ".png", + transparent=True) + plt.close() + + + + +class CustomRandint: + """Used as a distribution returning a integer between low and high-1. + It can be used with a multiplier agrument to be able to perform more complex generation + for example 10 e -(randint)""" + + def __init__(self, low=0, high=0, multiplier=""): + self.randint = randint(low, high) + self.multiplier = multiplier + + def rvs(self, random_state=None): + randinteger = self.randint.rvs(random_state=random_state) + if self.multiplier == "e-": + return 10 ** -randinteger + else: + return randinteger + + def get_nb_possibilities(self): + return self.randint.b - self.randint.a + + +class CustomUniform: + """Used as a distribution returning a float between loc and loc + scale.. + It can be used with a multiplier agrument to be able to perform more complex generation + for example 10 e -(float)""" + + def __init__(self, loc=0, state=1, multiplier=""): + self.uniform = uniform(loc, state) + self.multiplier = multiplier + + def rvs(self, random_state=None): + unif = self.uniform.rvs(random_state=random_state) + if self.multiplier == 'e-': + return 10 ** -unif + else: + return unif + # def randomized_search_(dataset_var, labels, classifier_package, classifier_name, @@ -369,70 +495,39 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): # # return classifier - -def spear_mint(dataset, classifier_name, views_indices=None, k_folds=None, - n_iter=1, - **kwargs): - """Used to perform spearmint on the classifiers to optimize hyper parameters, - longer than randomsearch (can't be parallelized)""" - pass - - -def gen_heat_maps(params, scores_array, output_file_name): - """Used to generate a heat map for each doublet of hyperparms - optimized on the previous function""" - nb_params = len(params) - if nb_params > 2: - combinations = itertools.combinations(range(nb_params), 2) - elif nb_params == 2: - combinations = [(0, 1)] - else: - combinations = [()] - for combination in combinations: - if combination: - param_name1, param_array1 = params[combination[0]] - param_name2, param_array2 = params[combination[1]] - else: - param_name1, param_array1 = params[0] - param_name2, param_array2 = ("Control", np.array([0])) - - param_array1_set = np.sort(np.array(list(set(param_array1)))) - param_array2_set = np.sort(np.array(list(set(param_array2)))) - - scores_matrix = np.zeros( - (len(param_array2_set), len(param_array1_set))) - 0.1 - for param1, param2, score in zip(param_array1, param_array2, - scores_array): - param1_index, = np.where(param_array1_set == param1) - param2_index, = np.where(param_array2_set == param2) - scores_matrix[int(param2_index), int(param1_index)] = score - - plt.figure(figsize=(8, 6)) - plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) - plt.imshow(scores_matrix, interpolation='nearest', cmap=plt.cm.hot, - ) - plt.xlabel(param_name1) - plt.ylabel(param_name2) - plt.colorbar() - plt.xticks(np.arange(len(param_array1_set)), param_array1_set) - plt.yticks(np.arange(len(param_array2_set)), param_array2_set, - rotation=45) - plt.title('Validation metric') - plt.savefig( - output_file_name + "heat_map-" + param_name1 + "-" + param_name2 + ".png", - transparent=True) - plt.close() - - -def gen_report(params, scores_array, output_file_name): - output_string = "" - for parameters, score in zip(params, scores_array): - if "random_state" in parameters: - parameters.pop("random_state") - output_string+="\n{}\t\t{}".format(parameters, score) - secure_file_path(output_file_name + "hps_report.txt") - with open(output_file_name+"hps_report.txt", "w") as output_file: - output_file.write(output_string) +# +# def compute_possible_combinations(params_dict): +# n_possibs = np.ones(len(params_dict)) * np.inf +# for value_index, value in enumerate(params_dict.values()): +# if type(value) == list: +# n_possibs[value_index] = len(value) +# elif isinstance(value, CustomRandint): +# n_possibs[value_index] = value.get_nb_possibilities() +# return np.prod(n_possibs) + + +# def get_test_folds_preds(X, y, cv, estimator, framework, +# available_indices=None): +# test_folds_prediction = [] +# if framework == "monoview": +# folds = cv.split(np.arange(len(y)), y) +# if framework == "multiview": +# folds = cv.split(available_indices, y[available_indices]) +# fold_lengths = np.zeros(cv.n_splits, dtype=int) +# for fold_idx, (train_indices, test_indices) in enumerate(folds): +# fold_lengths[fold_idx] = len(test_indices) +# if framework == "monoview": +# estimator.fit(X[train_indices], y[train_indices]) +# test_folds_prediction.append(estimator.predict(X[train_indices])) +# if framework == "multiview": +# estimator.fit(X, y, available_indices[train_indices]) +# test_folds_prediction.append( +# estimator.predict(X, available_indices[test_indices])) +# min_fold_length = fold_lengths.min() +# test_folds_prediction = np.array( +# [test_fold_prediction[:min_fold_length] for test_fold_prediction in +# test_folds_prediction]) +# return test_folds_prediction # nohup python ~/dev/git/spearmint/spearmint/main.py . & diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 5b5726e2f867401c03b47b4f57286ae4a8324ae2..3179d209d5a14c570cc0c4bf3da9dff32103d5b6 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -52,17 +52,20 @@ class Test_InitArgumentDictionaries(unittest.TestCase): arguments = exec_classif.init_argument_dictionaries(self.benchmark, self.views_dictionnary, self.nb_class, - self.init_kwargs) + self.init_kwargs, + "None", {}) expected_output = [{ self.monoview_classifier_name: { self.monoview_classifier_arg_name:self.monoview_classifier_arg_value[0]}, "view_name": "test_view_0", + 'hps_kwargs': {}, "classifier_name": self.monoview_classifier_name, "nb_class": self.nb_class, "view_index": 0}, {self.monoview_classifier_name: { self.monoview_classifier_arg_name: self.monoview_classifier_arg_value[0]}, "view_name": "test_view", + 'hps_kwargs': {}, "classifier_name": self.monoview_classifier_name, "nb_class": self.nb_class, "view_index": 1}, @@ -75,12 +78,14 @@ class Test_InitArgumentDictionaries(unittest.TestCase): arguments = exec_classif.init_argument_dictionaries(self.benchmark, self.views_dictionnary, self.nb_class, - self.init_kwargs) + self.init_kwargs, + "None", {}) expected_output = [{ "classifier_name": self.multiview_classifier_name, "view_indices": [0,1], "view_names": ["test_view_0", "test_view"], "nb_class": self.nb_class, + 'hps_kwargs': {}, "labels_names":None, self.multiview_classifier_name: { self.multiview_classifier_arg_name: @@ -88,47 +93,50 @@ class Test_InitArgumentDictionaries(unittest.TestCase): },] self.assertEqual(arguments["multiview"][0], expected_output[0]) - def test_init_argument_dictionaries_multiview_multiple(self): - self.multiview_classifier_arg_value = ["fake_value_2", "fake_arg_value_3"] - self.init_kwargs = { - 'monoview': { - self.monoview_classifier_name: - { - self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} - }, - "multiview": { - self.multiview_classifier_name: { - self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} - } - } - self.benchmark["multiview"] = ["fake_multiview_classifier"] - self.benchmark["monoview"] = {} - arguments = exec_classif.init_argument_dictionaries(self.benchmark, - self.views_dictionnary, - self.nb_class, - self.init_kwargs) - expected_output = [{ - "classifier_name": self.multiview_classifier_name+"_fake_value_2", - "view_indices": [0,1], - "view_names": ["test_view_0", "test_view"], - "nb_class": self.nb_class, - "labels_names":None, - self.multiview_classifier_name + "_fake_value_2": { - self.multiview_classifier_arg_name: - self.multiview_classifier_arg_value[0]}, - }, - { - "classifier_name": self.multiview_classifier_name+"_fake_arg_value_3", - "view_indices": [0, 1], - "view_names": ["test_view_0", "test_view"], - "nb_class": self.nb_class, - "labels_names": None, - self.multiview_classifier_name+"_fake_arg_value_3": { - self.multiview_classifier_arg_name: - self.multiview_classifier_arg_value[1]}, - } - ] - self.assertEqual(arguments["multiview"][0], expected_output[0]) + # def test_init_argument_dictionaries_multiview_multiple(self): + # self.multiview_classifier_arg_value = ["fake_value_2", "fake_arg_value_3"] + # self.init_kwargs = { + # 'monoview': { + # self.monoview_classifier_name: + # { + # self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} + # }, + # "multiview": { + # self.multiview_classifier_name: { + # self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} + # } + # } + # self.benchmark["multiview"] = ["fake_multiview_classifier"] + # self.benchmark["monoview"] = {} + # arguments = exec_classif.init_argument_dictionaries(self.benchmark, + # self.views_dictionnary, + # self.nb_class, + # self.init_kwargs, + # "None", {}) + # expected_output = [{ + # "classifier_name": self.multiview_classifier_name+"_fake_value_2", + # "view_indices": [0,1], + # "view_names": ["test_view_0", "test_view"], + # "nb_class": self.nb_class, + # 'hps_kwargs': {}, + # "labels_names":None, + # self.multiview_classifier_name + "_fake_value_2": { + # self.multiview_classifier_arg_name: + # self.multiview_classifier_arg_value[0]}, + # }, + # { + # "classifier_name": self.multiview_classifier_name+"_fake_arg_value_3", + # "view_indices": [0, 1], + # "view_names": ["test_view_0", "test_view"], + # "nb_class": self.nb_class, + # 'hps_kwargs': {}, + # "labels_names": None, + # self.multiview_classifier_name+"_fake_arg_value_3": { + # self.multiview_classifier_arg_name: + # self.multiview_classifier_arg_value[1]}, + # } + # ] + # self.assertEqual(arguments["multiview"][0], expected_output[0]) def test_init_argument_dictionaries_multiview_complex(self): self.multiview_classifier_arg_value = {"fake_value_2":"plif", "plaf":"plouf"} @@ -148,10 +156,12 @@ class Test_InitArgumentDictionaries(unittest.TestCase): arguments = exec_classif.init_argument_dictionaries(self.benchmark, self.views_dictionnary, self.nb_class, - self.init_kwargs) + self.init_kwargs, + "None", {}) expected_output = [{ "classifier_name": self.multiview_classifier_name, "view_indices": [0,1], + 'hps_kwargs': {}, "view_names": ["test_view_0", "test_view"], "nb_class": self.nb_class, "labels_names":None, @@ -161,47 +171,50 @@ class Test_InitArgumentDictionaries(unittest.TestCase): }] self.assertEqual(arguments["multiview"][0], expected_output[0]) - def test_init_argument_dictionaries_multiview_multiple_complex(self): - self.multiview_classifier_arg_value = {"fake_value_2":["plif", "pluf"], "plaf":"plouf"} - self.init_kwargs = { - 'monoview': { - self.monoview_classifier_name: - { - self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} - }, - "multiview": { - self.multiview_classifier_name: { - self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} - } - } - self.benchmark["multiview"] = ["fake_multiview_classifier"] - self.benchmark["monoview"] = {} - arguments = exec_classif.init_argument_dictionaries(self.benchmark, - self.views_dictionnary, - self.nb_class, - self.init_kwargs) - expected_output = [{ - "classifier_name": self.multiview_classifier_name+"_plif_plouf", - "view_indices": [0,1], - "view_names": ["test_view_0", "test_view"], - "nb_class": self.nb_class, - "labels_names":None, - self.multiview_classifier_name + "_plif_plouf": { - self.multiview_classifier_arg_name: - {"fake_value_2": "plif", "plaf": "plouf"}}, - }, - { - "classifier_name": self.multiview_classifier_name+"_pluf_plouf", - "view_indices": [0, 1], - "view_names": ["test_view_0", "test_view"], - "nb_class": self.nb_class, - "labels_names": None, - self.multiview_classifier_name+"_pluf_plouf": { - self.multiview_classifier_arg_name: - {"fake_value_2":"pluf", "plaf":"plouf"}}, - } - ] - self.assertEqual(arguments["multiview"][0], expected_output[0]) + # def test_init_argument_dictionaries_multiview_multiple_complex(self): + # self.multiview_classifier_arg_value = {"fake_value_2":["plif", "pluf"], "plaf":"plouf"} + # self.init_kwargs = { + # 'monoview': { + # self.monoview_classifier_name: + # { + # self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} + # }, + # "multiview": { + # self.multiview_classifier_name: { + # self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} + # } + # } + # self.benchmark["multiview"] = ["fake_multiview_classifier"] + # self.benchmark["monoview"] = {} + # arguments = exec_classif.init_argument_dictionaries(self.benchmark, + # self.views_dictionnary, + # self.nb_class, + # self.init_kwargs, + # "None", {}) + # expected_output = [{ + # "classifier_name": self.multiview_classifier_name+"_plif_plouf", + # "view_indices": [0,1], + # "view_names": ["test_view_0", "test_view"], + # "nb_class": self.nb_class, + # "labels_names":None, + # 'hps_kwargs': {}, + # self.multiview_classifier_name + "_plif_plouf": { + # self.multiview_classifier_arg_name: + # {"fake_value_2": "plif", "plaf": "plouf"}}, + # }, + # { + # "classifier_name": self.multiview_classifier_name+"_pluf_plouf", + # "view_indices": [0, 1], + # "view_names": ["test_view_0", "test_view"], + # "nb_class": self.nb_class, + # "labels_names": None, + # 'hps_kwargs': {}, + # self.multiview_classifier_name+"_pluf_plouf": { + # self.multiview_classifier_arg_name: + # {"fake_value_2":"pluf", "plaf":"plouf"}}, + # } + # ] + # self.assertEqual(arguments["multiview"][0], expected_output[0]) def fakeBenchmarkExec(core_index=-1, a=7, args=1): diff --git a/multiview_platform/tests/test_ResultAnalysis.py b/multiview_platform/tests/test_ResultAnalysis.py index 413c8d52260bc22d89119b4750bdb50c3ca25413..98e4cabf602f66505fc784e0bd66fd464f7656de 100644 --- a/multiview_platform/tests/test_ResultAnalysis.py +++ b/multiview_platform/tests/test_ResultAnalysis.py @@ -29,7 +29,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): "0", {"accuracy_score":[0.9, 0.95], "f1_score":[0.91, 0.96]} - , "", "", "", "", "",)] + , "", "", "", "", "",0,0)] metrics_scores = result_analysis.get_metrics_scores(metrics, results) self.assertIsInstance(metrics_scores, dict) @@ -56,9 +56,11 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): "f1_score": [0.91, 0.96]}, full_labels_pred="", classifier_config="", - test_folds_preds="", classifier="", - n_features=""), + n_features="", + hps_duration=0, + fit_duration=0, + pred_duration=0), MonoviewResult(view_index=0, classifier_name="dt", view_name="1", @@ -66,9 +68,11 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): "f1_score": [0.81, 0.86]}, full_labels_pred="", classifier_config="", - test_folds_preds="", classifier="", - n_features="") + n_features="", + hps_duration=0, + fit_duration=0, + pred_duration=0) ] metrics_scores = result_analysis.get_metrics_scores(metrics, results) @@ -93,7 +97,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): def test_mutiview_result(self): metrics = [["accuracy_score"], ["f1_score"]] results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75], - "f1_score": [0.71, 0.76]}, "", ), + "f1_score": [0.71, 0.76]}, "",0,0,0 ), MonoviewResult(view_index=0, classifier_name="dt", view_name="1", @@ -101,9 +105,11 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): "f1_score": [0.81, 0.86]}, full_labels_pred="", classifier_config="", - test_folds_preds="", classifier="", - n_features="") + n_features="", + hps_duration=0, + fit_duration=0, + pred_duration=0) ] metrics_scores = result_analysis.get_metrics_scores(metrics, results) @@ -132,14 +138,14 @@ class Test_get_example_errors_biclass(unittest.TestCase): results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75], "f1_score": [0.71, 0.76]}, np.array([0,0,0,0,1,1,1,1,1]), - ), + 0,0,0), MonoviewResult(0, "dt", "1", {"accuracy_score": [0.8, 0.85], "f1_score": [0.81, 0.86]} , np.array([0,0,1,1,0,0,1,1,0]), "", "", - "", "",) + "", "",0,0) ] example_errors = result_analysis.get_example_errors(ground_truth, results) @@ -196,7 +202,7 @@ class Test_gen_error_data(unittest.TestCase): class Test_format_previous_results(unittest.TestCase): def test_simple(self): - biclass_results = {"metrics_scores":[], "example_errors":[], "feature_importances":[], "labels":[]} + biclass_results = {"metrics_scores":[], "example_errors":[], "feature_importances":[], "labels":[], "durations":[]} random_state = np.random.RandomState(42) # Gen metrics data @@ -223,8 +229,18 @@ class Test_format_previous_results(unittest.TestCase): biclass_results["example_errors"][1]["ada-1"] = ada_error_data_2 biclass_results["example_errors"][1]["mv"] = mv_error_data_2 + biclass_results["durations"].append(pd.DataFrame(index=["ada-1", "mv"], + columns=["plif", "plaf"], + data=np.zeros((2,2)))) + biclass_results["durations"].append(pd.DataFrame(index=["ada-1", "mv"], + columns=["plif", + "plaf"], + data=np.ones((2, 2)))) + # Running the function - metric_analysis, error_analysis, feature_importances, feature_stds,labels = result_analysis.format_previous_results(biclass_results) + metric_analysis, error_analysis, \ + feature_importances, feature_stds, \ + labels, durations_mean, duration_std = result_analysis.format_previous_results(biclass_results) mean_df = pd.DataFrame(data=np.mean(np.array([metrics_1_data, metrics_2_data]), axis=0), @@ -247,6 +263,7 @@ class Test_format_previous_results(unittest.TestCase): std_df.loc["test"]) np.testing.assert_array_equal(ada_sum, error_analysis["ada-1"]) np.testing.assert_array_equal(mv_sum, error_analysis["mv"]) + self.assertEqual(durations_mean.at["ada-1", 'plif'], 0.5) class Test_gen_error_data_glob(unittest.TestCase): diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index d610ccb70b72ec9778b3029e9bebc85ecc6bbc01..055fa1cb80ef74d01ab3679cf2c3f95031517abe 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -105,8 +105,7 @@ class Test_getHPs(unittest.TestCase): rm_tmp() os.mkdir(tmp_path) cls.classifierModule = decision_tree - cls.hyper_param_search = "randomized_search" - cls.n_iter = 2 + cls.hyper_param_search = "Random" cls.classifier_name = "decision_tree" cls.random_state = np.random.RandomState(42) cls.X = cls.random_state.randint(0,10,size=(10,5)) @@ -119,6 +118,7 @@ class Test_getHPs(unittest.TestCase): "criterion": "gini", "splitter": "best"}} cls.classifier_class_name = "DecisionTree" + cls.hps_kwargs = {"n_iter": 2} @classmethod def tearDownClass(cls): @@ -130,7 +130,6 @@ class Test_getHPs(unittest.TestCase): def test_simple(self): kwargs = exec_classif_mono_view.get_hyper_params(self.classifierModule, self.hyper_param_search, - self.n_iter, self.classifier_name, self.classifier_class_name, self.X, @@ -140,7 +139,8 @@ class Test_getHPs(unittest.TestCase): self.cv, self.nb_cores, self.metrics, - self.kwargs) + self.kwargs, + **self.hps_kwargs) # class Test_getKWARGS(unittest.TestCase): # diff --git a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py index edbfb93f755336956cdf483f1b8b513819e95f6a..01a4230fe6c8b1e1b9ef0e0fd3bc9eca58de48a3 100644 --- a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py +++ b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py @@ -4,7 +4,10 @@ import unittest import h5py import numpy as np from sklearn.model_selection import StratifiedKFold +from sklearn.metrics import accuracy_score, make_scorer from multiview_platform.tests.utils import rm_tmp, tmp_path, test_dataset +from sklearn.base import BaseEstimator +import sys from multiview_platform.mono_multi_view_classifiers.utils.dataset import HDF5Dataset @@ -12,65 +15,20 @@ from multiview_platform.mono_multi_view_classifiers.utils import hyper_parameter from multiview_platform.mono_multi_view_classifiers.multiview_classifiers import weighted_linear_early_fusion -class Test_randomized_search(unittest.TestCase): - - @classmethod - def setUpClass(cls): - rm_tmp() - cls.random_state = np.random.RandomState(42) - cls.view_weights = [0.5, 0.5] - os.mkdir(tmp_path) - cls.dataset_file = h5py.File( - tmp_path+"test_file.hdf5", "w") - cls.labels = cls.dataset_file.create_dataset("Labels", - data=np.array( - [0, 1, 0, 0, 1, 0, 1, 0, 0, 1, ])) - cls.view0_data = cls.random_state.randint(1, 10, size=(10, 4)) - view0 = cls.dataset_file.create_dataset("View0", - data=cls.view0_data) - view0.attrs["sparse"] = False - view0.attrs["name"] = "ViewN0" - cls.view1_data = cls.random_state.randint(1, 10, size=(10, 4)) - view1 = cls.dataset_file.create_dataset("View1", - data=cls.view1_data) - view1.attrs["sparse"] = False - view1.attrs["name"] = "ViewN1" - metaDataGrp = cls.dataset_file.create_group("Metadata") - metaDataGrp.attrs["nbView"] = 2 - metaDataGrp.attrs["nbClass"] = 2 - metaDataGrp.attrs["datasetLength"] = 10 - cls.monoview_classifier_name = "decision_tree" - cls.monoview_classifier_config = {"max_depth": 1, - "criterion": "gini", - "splitter": "best"} - cls.k_folds = StratifiedKFold(n_splits=3, random_state=cls.random_state, - shuffle=True) - cls.learning_indices = np.array([1,2,3,4, 5,6,7,8,9]) - cls.dataset = HDF5Dataset(hdf5_file=cls.dataset_file) - - @classmethod - def tearDownClass(cls): - cls.dataset_file.close() - rm_tmp() - - def test_simple(self): - best_params, _, params, scores = hyper_parameter_search.randomized_search( - self.dataset, self.labels[()], "multiview", self.random_state, tmp_path, - weighted_linear_early_fusion, "WeightedLinearEarlyFusion", self.k_folds, - 1, ["accuracy_score", None], 2, {}, learning_indices=self.learning_indices) - self.assertIsInstance(best_params, dict) -from sklearn.base import BaseEstimator class FakeEstim(BaseEstimator): - def __init__(self, param1=None, param2=None): + def __init__(self, param1=None, param2=None, random_state=None): self.param1 = param1 self.param2 = param2 def fit(self, X, y,): return self + def accepts_multi_class(self, rs): + return True + def predict(self, X): return np.zeros(X.shape[0]) @@ -89,8 +47,7 @@ class FakeEstimMV(BaseEstimator): else: return np.zeros(example_indices.shape[0]) -from sklearn.metrics import accuracy_score, make_scorer -from sklearn.model_selection import StratifiedKFold + class Test_MultiviewCompatibleRandomizedSearchCV(unittest.TestCase): @@ -113,7 +70,7 @@ class Test_MultiviewCompatibleRandomizedSearchCV(unittest.TestCase): cls.y = cls.random_state.randint(0,2, 10) def test_simple(self): - hyper_parameter_search.MultiviewCompatibleRandomizedSearchCV( + hyper_parameter_search.Random( self.estimator, self.param_distributions, n_iter=self.n_iter, refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, random_state=self.random_state, @@ -123,7 +80,7 @@ class Test_MultiviewCompatibleRandomizedSearchCV(unittest.TestCase): ) def test_fit(self): - RSCV = hyper_parameter_search.MultiviewCompatibleRandomizedSearchCV( + RSCV = hyper_parameter_search.Random( self.estimator, self.param_distributions, n_iter=self.n_iter, refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, @@ -140,7 +97,7 @@ class Test_MultiviewCompatibleRandomizedSearchCV(unittest.TestCase): tested_param1) def test_fit_multiview(self): - RSCV = hyper_parameter_search.MultiviewCompatibleRandomizedSearchCV( + RSCV = hyper_parameter_search.Random( FakeEstimMV(), self.param_distributions, n_iter=self.n_iter, refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, @@ -155,7 +112,7 @@ class Test_MultiviewCompatibleRandomizedSearchCV(unittest.TestCase): def test_fit_multiview_equiv(self): self.n_iter=1 - RSCV = hyper_parameter_search.MultiviewCompatibleRandomizedSearchCV( + RSCV = hyper_parameter_search.Random( FakeEstimMV(), self.param_distributions, n_iter=self.n_iter, refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, @@ -171,7 +128,7 @@ class Test_MultiviewCompatibleRandomizedSearchCV(unittest.TestCase): def test_gets_good_params(self): self.param_distributions["param1"].append('return exact') self.n_iter=6 - RSCV = hyper_parameter_search.MultiviewCompatibleRandomizedSearchCV( + RSCV = hyper_parameter_search.Random( FakeEstimMV(), self.param_distributions, n_iter=self.n_iter, refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, @@ -188,4 +145,52 @@ class Test_MultiviewCompatibleRandomizedSearchCV(unittest.TestCase): # if __name__ == '__main__': # # unittest.main() # suite = unittest.TestLoader().loadTestsFromTestCase(Test_randomized_search) -# unittest.TextTestRunner(verbosity=2).run(suite) \ No newline at end of file +# unittest.TextTestRunner(verbosity=2).run(suite) +# class Test_randomized_search(unittest.TestCase): +# +# @classmethod +# def setUpClass(cls): +# rm_tmp() +# cls.random_state = np.random.RandomState(42) +# cls.view_weights = [0.5, 0.5] +# os.mkdir(tmp_path) +# cls.dataset_file = h5py.File( +# tmp_path+"test_file.hdf5", "w") +# cls.labels = cls.dataset_file.create_dataset("Labels", +# data=np.array( +# [0, 1, 0, 0, 1, 0, 1, 0, 0, 1, ])) +# cls.view0_data = cls.random_state.randint(1, 10, size=(10, 4)) +# view0 = cls.dataset_file.create_dataset("View0", +# data=cls.view0_data) +# view0.attrs["sparse"] = False +# view0.attrs["name"] = "ViewN0" +# cls.view1_data = cls.random_state.randint(1, 10, size=(10, 4)) +# view1 = cls.dataset_file.create_dataset("View1", +# data=cls.view1_data) +# view1.attrs["sparse"] = False +# view1.attrs["name"] = "ViewN1" +# metaDataGrp = cls.dataset_file.create_group("Metadata") +# metaDataGrp.attrs["nbView"] = 2 +# metaDataGrp.attrs["nbClass"] = 2 +# metaDataGrp.attrs["datasetLength"] = 10 +# cls.monoview_classifier_name = "decision_tree" +# cls.monoview_classifier_config = {"max_depth": 1, +# "criterion": "gini", +# "splitter": "best"} +# cls.k_folds = StratifiedKFold(n_splits=3, random_state=cls.random_state, +# shuffle=True) +# cls.learning_indices = np.array([1,2,3,4, 5,6,7,8,9]) +# cls.dataset = HDF5Dataset(hdf5_file=cls.dataset_file) +# +# @classmethod +# def tearDownClass(cls): +# cls.dataset_file.close() +# rm_tmp() +# +# +# def test_simple(self): +# best_params, _, params, scores = hyper_parameter_search.randomized_search( +# self.dataset, self.labels[()], "multiview", self.random_state, tmp_path, +# weighted_linear_early_fusion, "WeightedLinearEarlyFusion", self.k_folds, +# 1, ["accuracy_score", None], 2, {}, learning_indices=self.learning_indices) +# self.assertIsInstance(best_params, dict)