diff --git a/summit/multiview_platform/exec_classif.py b/summit/multiview_platform/exec_classif.py index 6c75194aaf2c10a58cfdd38ecd9e2fa8e071b27b..fd0499edbb1c30d7d7373d7149de3f2e4451f002 100644 --- a/summit/multiview_platform/exec_classif.py +++ b/summit/multiview_platform/exec_classif.py @@ -203,6 +203,7 @@ def get_random_hps_args(hps_args, classifier_name): hps_dict["param_distributions"] = value return hps_dict + def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class, view_index, view_name, hps_kwargs): if classifier_name in arguments: @@ -430,116 +431,6 @@ def benchmark_init(directory, classification_indices, labels, labels_dictionary, return results_monoview, labels_names -# def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None, -# classification_indices=None, args=None, -# k_folds=None, random_state=None, hyper_param_search=None, -# metrics=None, argument_dictionaries=None, -# benchmark=None, views=None, views_indices=None, flag=None, -# labels=None, -# exec_monoview_multicore=exec_monoview_multicore, -# exec_multiview_multicore=exec_multiview_multicore,): -# """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and -# exec_multiview_multicore args are only used for tests""" -# -# results_monoview, labels_names = benchmark_init(directory, -# classification_indices, labels, -# labels_dictionary, k_folds) -# -# logging.debug("Start:\t monoview benchmark") -# results_monoview += [ -# exec_monoview_multicore(directory, args["name"], labels_names, -# classification_indices, k_folds, -# core_index, args["file_type"], args["pathf"], random_state, -# labels, -# hyper_param_search=hyper_param_search, -# metrics=metrics, -# n_iter=args["hps_iter"], **argument) -# for argument in argument_dictionaries["Monoview"]] -# logging.debug("Done:\t monoview benchmark") -# -# -# logging.debug("Start:\t multiview benchmark") -# results_multiview = [ -# exec_multiview_multicore(directory, core_index, args["name"], -# classification_indices, k_folds, args["file_type"], -# args["pathf"], labels_dictionary, random_state, -# labels, hyper_param_search=hyper_param_search, -# metrics=metrics, n_iter=args["hps_iter"], -# **arguments) -# for arguments in argument_dictionaries["multiview"]] -# logging.debug("Done:\t multiview benchmark") -# -# return [flag, results_monoview + results_multiview] -# -# -# def exec_one_benchmark_multicore(nb_cores=-1, labels_dictionary=None, -# directory=None, classification_indices=None, -# args=None, -# k_folds=None, random_state=None, -# hyper_param_search=None, metrics=None, -# argument_dictionaries=None, -# benchmark=None, views=None, views_indices=None, -# flag=None, labels=None, -# exec_monoview_multicore=exec_monoview_multicore, -# exec_multiview_multicore=exec_multiview_multicore,): -# """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and -# exec_multiview_multicore args are only used for tests""" -# -# results_monoview, labels_names = benchmark_init(directory, -# classification_indices, labels, -# labels_dictionary, k_folds) -# -# logging.debug("Start:\t monoview benchmark") -# nb_experiments = len(argument_dictionaries["monoview"]) -# nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores)) -# for step_index in range(nb_multicore_to_do): -# results_monoview += (Parallel(n_jobs=nb_cores)( -# delayed(exec_monoview_multicore)(directory, args["name"], labels_names, -# classification_indices, k_folds, -# core_index, args["file_type"], args["pathf"], -# random_state, labels, -# hyper_param_search=hyper_param_search, -# metrics=metrics, -# n_iter=args["hps_iter"], -# **argument_dictionaries["monoview"][ -# core_index + step_index * nb_cores]) -# for core_index in -# range(min(nb_cores, nb_experiments - step_index * nb_cores)))) -# logging.debug("Done:\t monoview benchmark") -# -# logging.debug("Start:\t multiview arguments initialization") -# # argument_dictionaries = initMultiviewArguments(args, benchmark, views, -# # views_indices, -# # argument_dictionaries, -# # random_state, directory, -# # resultsMonoview, -# # classification_indices) -# logging.debug("Done:\t multiview arguments initialization") -# -# logging.debug("Start:\t multiview benchmark") -# results_multiview = [] -# nb_experiments = len(argument_dictionaries["multiview"]) -# nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores)) -# for step_index in range(nb_multicore_to_do): -# results_multiview += Parallel(n_jobs=nb_cores)( -# delayed(exec_multiview_multicore)(directory, core_index, args["name"], -# classification_indices, k_folds, -# args["file_type"], args["Base"]["pathf"], -# labels_dictionary, random_state, -# labels, -# hyper_param_search=hyper_param_search, -# metrics=metrics, -# n_iter=args["hps_iter"], -# ** -# argument_dictionaries["multiview"][ -# step_index * nb_cores + core_index]) -# for core_index in -# range(min(nb_cores, nb_experiments - step_index * nb_cores))) -# logging.debug("Done:\t multiview benchmark") -# -# return [flag, results_monoview + results_multiview] - - def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, directory=None, classification_indices=None, args=None, @@ -548,7 +439,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, argument_dictionaries=None, benchmark=None, views=None, views_indices=None, flag=None, labels=None, - track_tracebacks=False): # pragma: no cover + track_tracebacks=False, nb_cores=1): # pragma: no cover results_monoview, labels_names = benchmark_init(directory, classification_indices, labels, @@ -564,7 +455,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, results_monoview += [ exec_monoview(directory, X, Y, args["name"], labels_names, classification_indices, k_folds, - 1, args["file_type"], args["pathf"], random_state, + nb_cores, args["file_type"], args["pathf"], random_state, hyper_param_search=hyper_param_search, metrics=metrics, **arguments)] @@ -575,19 +466,8 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, "view_name"]] = traceback.format_exc() else: raise - logging.info("Done:\t monoview benchmark") - logging.info("Start:\t multiview arguments initialization") - - # argument_dictionaries = initMultiviewArguments(args, benchmark, views, - # views_indices, - # argument_dictionaries, - # random_state, directory, - # resultsMonoview, - # classification_indices) - logging.info("Done:\t multiview arguments initialization") - logging.info("Start:\t multiview benchmark") results_multiview = [] for arguments in argument_dictionaries["multiview"]: @@ -595,7 +475,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, results_multiview += [ exec_multiview(directory, dataset_var, args["name"], classification_indices, - k_folds, 1, args["file_type"], + k_folds, nb_cores, args["file_type"], args["pathf"], labels_dictionary, random_state, labels, hps_method=hyper_param_search, @@ -660,26 +540,10 @@ def exec_benchmark(nb_cores, stats_iter, """ logging.info("Start:\t Executing all the needed benchmarks") results = [] - # if nb_cores > 1: - # if stats_iter > 1 or nb_multiclass > 1: - # nb_exps_to_do = len(benchmark_arguments_dictionaries) - # nb_multicore_to_do = range(int(math.ceil(float(nb_exps_to_do) / nb_cores))) - # for step_index in nb_multicore_to_do: - # results += (Parallel(n_jobs=nb_cores)(delayed(exec_one_benchmark) - # (core_index=core_index, - # ** - # benchmark_arguments_dictionaries[ - # core_index + step_index * nb_cores]) - # for core_index in range( - # min(nb_cores, nb_exps_to_do - step_index * nb_cores)))) - # else: - # results += [exec_one_benchmark_multicore(nb_cores=nb_cores, ** - # benchmark_arguments_dictionaries[0])] - # else: for arguments in benchmark_arguments_dictionaries: benchmark_results = exec_one_benchmark_mono_core( dataset_var=dataset_var, - track_tracebacks=track_tracebacks, + track_tracebacks=track_tracebacks, nb_cores=nb_cores, **arguments) analyze_iterations([benchmark_results], benchmark_arguments_dictionaries, stats_iter, @@ -697,7 +561,6 @@ def exec_benchmark(nb_cores, stats_iter, dataset_var.sample_ids, dataset_var.get_labels()) logging.info("Done:\t Analyzing predictions") - delete(benchmark_arguments_dictionaries, nb_cores, dataset_var) return results_mean_stds @@ -816,11 +679,5 @@ def exec_classif(arguments): # pragma: no cover stats_iter_random_states, metrics, argument_dictionaries, benchmark, views, views_indices) - results_mean_stds = exec_benchmark( - nb_cores, stats_iter, - benchmark_argument_dictionaries, directory, metrics, - dataset_var, - args["track_tracebacks"]) - # noise_results.append([noise_std, results_mean_stds]) - # plot_results_noise(directory, noise_results, metrics[0][0], - # dataset_name) + exec_benchmark(nb_cores, stats_iter, benchmark_argument_dictionaries, + directory, metrics, dataset_var, args["track_tracebacks"]) diff --git a/summit/multiview_platform/monoview/exec_classif_mono_view.py b/summit/multiview_platform/monoview/exec_classif_mono_view.py index 18d896bc353664555135950c21ffa4a137948b89..98b65e3dbfde3c8cbbbb51e6096065eb2ef0214e 100644 --- a/summit/multiview_platform/monoview/exec_classif_mono_view.py +++ b/summit/multiview_platform/monoview/exec_classif_mono_view.py @@ -36,8 +36,8 @@ def exec_monoview_multicore(directory, name, labels_names, **args): # pragma: no cover dataset_var = HDF5Dataset( hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r")) - neededViewIndex = args["view_index"] - X = dataset_var.get_v(neededViewIndex) + needed_view_index = args["view_index"] + X = dataset_var.get_v(needed_view_index) Y = labels return exec_monoview(directory, X, Y, name, labels_names, classification_indices, k_folds, 1, database_type, @@ -61,8 +61,8 @@ def exec_monoview(directory, X, Y, database_name, labels_names, view_name, \ classifier_name, \ X, \ - learningRate, \ - labelsString, \ + learning_rate, \ + labels_string, \ output_file_name, \ directory, \ base_file_name = init_constants(args, X, classification_indices, @@ -74,7 +74,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, "Info:\t Classification - Database:" + str( database_name) + " View:" + str( view_name) + " train ratio:" - + str(learningRate) + ", CrossValidation k-folds: " + str( + + str(learning_rate) + ", CrossValidation k-folds: " + str( k_folds.n_splits) + ", cores:" + str(nb_cores) + ", algorithm : " + classifier_name) @@ -110,7 +110,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, random_state, y=Y) fit_beg = time.monotonic() - classifier.fit(X_train, y_train) # NB_CORES=nbCores, + classifier.fit(X_train, y_train) fit_duration = time.monotonic() - fit_beg logging.info("Done:\t Training") @@ -122,10 +122,10 @@ def exec_monoview(directory, X, Y, database_name, labels_names, # Filling the full prediction in the right order full_pred = np.zeros(Y.shape, dtype=int) - 100 - for trainIndex, index in enumerate(classification_indices[0]): - full_pred[index] = train_pred[trainIndex] - for testIndex, index in enumerate(classification_indices[1]): - full_pred[index] = test_pred[testIndex] + for train_index, index in enumerate(classification_indices[0]): + full_pred[index] = train_pred[train_index] + for test_index, index in enumerate(classification_indices[1]): + full_pred[index] = test_pred[test_index] logging.info("Done:\t Predicting") @@ -185,7 +185,8 @@ def init_constants(args, X, classification_indices, labels_names, base_file_name = cl_type_string + '-' + name + "-" + view_name + "-" output_file_name = os.path.join(directory, base_file_name) secure_file_path(output_file_name) - return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string, output_file_name, directory, base_file_name + return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string,\ + output_file_name, directory, base_file_name def init_train_test(X, Y, classification_indices): diff --git a/summit/multiview_platform/monoview/monoview_utils.py b/summit/multiview_platform/monoview/monoview_utils.py index c46c8b2e75c4e2b28ee2278fdd3f44316cbc5e44..2fbf53d3e077e013382663ceaa2af5ce57024531 100644 --- a/summit/multiview_platform/monoview/monoview_utils.py +++ b/summit/multiview_platform/monoview/monoview_utils.py @@ -13,8 +13,6 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -# __date__ = 2016 - 03 - 25 - def change_label_to_minus(y): """ Change the label 0 to minus one @@ -76,43 +74,6 @@ def gen_test_folds_preds(X_train, y_train, KFolds, estimator): return test_folds_preds -# class CustomRandint: -# """Used as a distribution returning a integer between low and high-1. -# It can be used with a multiplier agrument to be able to perform more complex generation -# for example 10 e -(randint)""" -# -# def __init__(self, low=0, high=0, multiplier=""): -# self.randint = randint(low, high) -# self.multiplier = multiplier -# -# def rvs(self, random_state=None): -# randinteger = self.randint.rvs(random_state=random_state) -# if self.multiplier == "e-": -# return 10 ** -randinteger -# else: -# return randinteger -# -# def get_nb_possibilities(self): -# return self.randint.b - self.randint.a -# -# -# class CustomUniform: -# """Used as a distribution returning a float between loc and loc + scale.. -# It can be used with a multiplier agrument to be able to perform more complex generation -# for example 10 e -(float)""" -# -# def __init__(self, loc=0, state=1, multiplier=""): -# self.uniform = uniform(loc, state) -# self.multiplier = multiplier -# -# def rvs(self, random_state=None): -# unif = self.uniform.rvs(random_state=random_state) -# if self.multiplier == 'e-': -# return 10 ** -unif -# else: -# return unif - - class BaseMonoviewClassifier(BaseClassifier): def get_feature_importance(self, directory, base_file_name, diff --git a/summit/multiview_platform/multiview/exec_multiview.py b/summit/multiview_platform/multiview/exec_multiview.py index 1f3dcdc39b11f0d0e79c3f1629068bbfd72973b4..a594624c8f5aab3e332b6ef7d20776d91308500c 100644 --- a/summit/multiview_platform/multiview/exec_multiview.py +++ b/summit/multiview_platform/multiview/exec_multiview.py @@ -260,11 +260,8 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.info("Done:\t Getting train/test split") logging.info("Start:\t Getting classifiers modules") - # classifierPackage = getattr(multiview_classifiers, - # CL_type) # Permet d'appeler un module avec une string classifier_module = getattr(multiview_classifiers, cl_type) classifier_name = classifier_module.classifier_class_name - # classifierClass = getattr(classifierModule, CL_type + "Class") logging.info("Done:\t Getting classifiers modules") logging.info("Start:\t Optimizing hyperparameters") @@ -285,13 +282,6 @@ def exec_multiview(directory, dataset_var, name, classification_indices, hps.fit(dataset_var, dataset_var.get_labels(), ) classifier_config = hps.get_best_params() hps.gen_report(output_file_name) - # classifier_config = hyper_parameter_search.search_best_settings( - # dataset_var, dataset_var.get_labels(), classifier_module, - # classifier_name, - # metrics[0], learning_indices, k_folds, random_state, - # output_file_name, nb_cores=nb_cores, views_indices=views_indices, - # searching_tool=hps_method, n_iter=n_iter, - # classifier_config=classifier_config) hps_duration = time.monotonic() - hps_beg classifier = get_mc_estim( getattr(classifier_module, classifier_name)(random_state=random_state, @@ -325,8 +315,6 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.info( "Info:\t Classification duration " + str(extraction_time) + "s") - # TODO: get better cltype - logging.info("Start:\t Result Analysis for " + cl_type) times = (extraction_time, whole_duration) result_analyzer = MultiviewResultAnalyzer(view_names=views, diff --git a/summit/multiview_platform/multiview/multiview_utils.py b/summit/multiview_platform/multiview/multiview_utils.py index 88c4ac534a3623a67cfe7acdc5903edb04e41587..6a26aaa37623b1b1f265e10af39ecdc3bef513fa 100644 --- a/summit/multiview_platform/multiview/multiview_utils.py +++ b/summit/multiview_platform/multiview/multiview_utils.py @@ -7,13 +7,6 @@ from .. import monoview_classifiers from ..utils.base import BaseClassifier, ResultAnalyser from ..utils.dataset import RAMDataset - -# class FakeEstimator(): -# -# def predict(self, X, sample_indices=None, view_indices=None): -# return np.zeros(sample_indices.shape[0]) - - class BaseMultiviewClassifier(BaseClassifier): """ BaseMultiviewClassifier base of Multiview classifiers @@ -49,21 +42,6 @@ class BaseMultiviewClassifier(BaseClassifier): 'Used {} views to fit, and trying to predict on {}'.format( self.used_views, view_indices)) - # def to_str(self, param_name): - # if param_name in self.weird_strings: - # string = "" - # if "class_name" in self.weird_strings[param_name]: - # string += self.get_params()[param_name].__class__.__name__ - # if "config" in self.weird_strings[param_name]: - # string += "( with " + self.get_params()[ - # param_name].params_to_string() + ")" - # else: - # string += self.weird_strings[param_name]( - # self.get_params()[param_name]) - # return string - # else: - # return str(self.get_params()[param_name]) - def accepts_multi_class(self, random_state, n_samples=10, dim=2, n_classes=3, n_views=2): if int(n_samples / n_classes) < 1: diff --git a/summit/multiview_platform/result_analysis/error_analysis.py b/summit/multiview_platform/result_analysis/error_analysis.py index 7ff5f06b9471ba08a15d2487e334285e1c6c5ef1..90d6b69756560a450b304db55abba3cea496f888 100644 --- a/summit/multiview_platform/result_analysis/error_analysis.py +++ b/summit/multiview_platform/result_analysis/error_analysis.py @@ -199,7 +199,6 @@ def plot_2d(data, classifiers_names, nb_classifiers, file_name, dataset_name, la plt.close() # The following part is used to generate an interactive graph. if use_plotly: - # [np.where(labels==i)[0] for i in np.unique(labels)] hover_text = [[sample_ids[sample_index] + " failed " + str( stats_iter - data[ sample_index, classifier_index]) + " time(s), labelled " + str(