diff --git a/data/Plausible.hdf5 b/data/Plausible.hdf5 index 331fea8b825aa82ae829a32ffe09809f5e2880ff..4f10a2ad8f524e8692771be0ab2f3f3709f37c16 100644 Binary files a/data/Plausible.hdf5 and b/data/Plausible.hdf5 differ diff --git a/multiview_platform/execute.py b/multiview_platform/execute.py index 53d4fcc9fdb31920c40ae1802db43fb17f7058e3..a142cb88fa45fb4f904bb71a97295b50f02195a5 100644 --- a/multiview_platform/execute.py +++ b/multiview_platform/execute.py @@ -2,12 +2,12 @@ def exec(): - import versions - versions.testVersions() + import multiview_platform.versions as versions + versions.test_versions() import sys - from mono_multi_view_classifiers import exec_classif - exec_classif.execClassif(sys.argv[1:]) + from multiview_platform.mono_multi_view_classifiers import exec_classif + exec_classif.exec_classif(sys.argv[1:]) if __name__ == "__main__": diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index a7d4eb653fa572477eb8b94c2d94b3e53ac44383..fd51e53012fe6a345aa592cbc6c124aed1317476 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -10,40 +10,43 @@ import itertools import numpy as np from joblib import Parallel, delayed from sklearn.tree import DecisionTreeClassifier - -matplotlib.use( - 'Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure - # Import own modules from . import monoview_classifiers from . import multiview_classifiers -from .multiview.exec_multiview import ExecMultiview, ExecMultiview_multicore -from .monoview.exec_classif_mono_view import ExecMonoview, ExecMonoview_multicore +from .multiview.exec_multiview import exec_multiview, exec_multiview_multicore +from .monoview.exec_classif_mono_view import exec_monoview, exec_monoview_multicore from .utils import get_multiview_db as DB -from .result_analysis import \ - getResults, plot_results_noise # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, +from .result_analysis import get_results +from .result_analysis import plot_results_noise +# resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, from .utils import execution, dataset, multiclass, configuration +matplotlib.use( + 'Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure + + + # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): +def init_benchmark(cl_type, monoview_algos, multiview_algos, args): r"""Used to create a list of all the algorithm packages names used for the benchmark. - First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right + First this function will check if the benchmark need mono- or/and multiview + algorithms and adds to the right dictionary the asked algorithms. If none is asked by the user, all will be added. If the keyword `"Benchmark"` is used, all mono- and multiview algorithms will be added. Parameters ---------- - CL_type : List of string + cl_type : List of string List of types of needed benchmark - multiviewAlgos : List of strings + multiview_algos : List of strings List of multiview algorithms needed for the benchmark - monoviewAlgos : Listof strings + monoview_algos : Listof strings List of monoview algorithms needed for the benchmark args : ParsedArgumentParser args All the input args (used to tune the algorithms) @@ -54,21 +57,21 @@ def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ benchmark = {"monoview": {}, "multiview": {}} - allMultiviewPackages = [name for _, name, isPackage + all_multiview_packages = [name for _, name, isPackage in pkgutil.iter_modules( ['./mono_multi_view_classifiers/multiview_classifiers/']) if isPackage] - if "monoview" in CL_type: - if monoviewAlgos == ['all']: + if "monoview" in cl_type: + if monoview_algos == ['all']: benchmark["monoview"] = [name for _, name, isPackage in pkgutil.iter_modules([ "./mono_multi_view_classifiers/monoview_classifiers"]) if not isPackage] else: - benchmark["monoview"] = monoviewAlgos + benchmark["monoview"] = monoview_algos - if "multiview" in CL_type: + if "multiview" in cl_type: benchmark["multiview"] = [name for _, name, isPackage in pkgutil.iter_modules([ "./mono_multi_view_classifiers/multiview_classifiers"]) @@ -76,12 +79,12 @@ def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): return benchmark -def genViewsDictionnary(DATASET, views): +def gen_views_dictionnary(dataset_var, views): r"""Used to generate a dictionary mapping a view name (key) to it's index in the dataset (value). Parameters ---------- - DATASET : `h5py` dataset file + dataset_var : `h5py` dataset file The full dataset on which the benchmark will be done views : List of strings Names of the selected views on which the banchmark will be done @@ -91,21 +94,21 @@ def genViewsDictionnary(DATASET, views): viewDictionary : Dictionary Dictionary mapping the view names totheir indexin the full dataset. """ - datasetsNames = DATASET.keys() - viewsDictionary = {} - for datasetName in datasetsNames: - if datasetName[:4] == "View": - viewName = DATASET.get(datasetName).attrs["name"] - if type(viewName) == bytes: - viewName = viewName.decode("utf-8") - if viewName in views: - viewsDictionary[viewName] = int(datasetName[4:]) + datasets_names = dataset_var.keys() + views_dictionary = {} + for dataset_name in datasets_names: + if dataset_name[:4] == "View": + view_name = dataset_var.get(dataset_name).attrs["name"] + if type(view_name) == bytes: + view_name = view_name.decode("utf-8") + if view_name in views: + views_dictionary[view_name] = int(dataset_name[4:]) - return viewsDictionary + return views_dictionary def init_argument_dictionaries(benchmark, views_dictionary, - nb_class, init_kwargs): + nb_class, init_kwargs): argument_dictionaries = {"monoview": [], "multiview": []} if benchmark["monoview"]: argument_dictionaries["monoview"] = init_monoview_exps( @@ -126,10 +129,10 @@ def init_multiview_exps(classifier_names, views_dictionary, nb_class, kwargs_ini for classifier_name in classifier_names: if multiple_args(classifier_name, kwargs_init): multiview_arguments += gen_multiple_args_dictionnaries(nb_class, - kwargs_init, - classifier_name, - views_dictionary=views_dictionary, - framework="multiview") + kwargs_init, + classifier_name, + views_dictionary=views_dictionary, + framework="multiview") else: multiview_arguments += [gen_single_multiview_arg_dictionary(classifier_name, kwargs_init, @@ -153,7 +156,7 @@ def init_monoview_exps(classifier_names, All types of monoview and multiview experiments that have to be benchmarked argument_dictionaries : dictionary Maps monoview and multiview experiments arguments. - viewDictionary : dictionary + views_dictionary : dictionary Maps the view names to their index in the HDF5 dataset nb_class : integer Number of different labels in the classification @@ -204,20 +207,20 @@ def gen_single_multiview_arg_dictionary(classifier_name,arguments,nb_class, } -def multiple_args(classifier, kwargsInit): +def multiple_args(classifier, kwargs_init): listed_args = [type(value) == list and len(value)>1 for key, value in - kwargsInit[classifier].items()] + kwargs_init[classifier].items()] if True in listed_args: return True else: return False -def gen_multiple_kwargs_combinations(clKWARGS): - values = list(clKWARGS.values()) +def gen_multiple_kwargs_combinations(cl_kwrags): + values = list(cl_kwrags.values()) listed_values = [[_] if type(_) is not list else _ for _ in values] values_cartesian_prod = [_ for _ in itertools.product(*listed_values)] - keys = clKWARGS.keys() + keys = cl_kwrags.keys() kwargs_combination = [dict((key, value) for key, value in zip(keys, values)) for values in values_cartesian_prod] @@ -231,10 +234,11 @@ def gen_multiple_kwargs_combinations(clKWARGS): return kwargs_combination, reduced_kwargs_combination -def gen_multiple_args_dictionnaries(nb_class, kwargsInit, classifier, +def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, view_name=None, view_index=None, views_indices=None, framework="monoview"): - multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations(kwargsInit[classifier]) + multiple_kwargs_list, reduced_multiple_kwargs_list = \ + gen_multiple_kwargs_combinations(kwargs_init[classifier]) multiple_kwargs_dict = dict( (classifier+"_"+"_".join(map(str,list(reduced_dictionary.values()))), dictionary) for reduced_dictionary, dictionary in zip(reduced_multiple_kwargs_list, multiple_kwargs_list )) @@ -253,14 +257,14 @@ def gen_multiple_args_dictionnaries(nb_class, kwargsInit, classifier, return args_dictionnaries -def init_monoview_kwargs(args, classifiersNames): +def init_monoview_kwargs(args, classifiers_names): r"""Used to init kwargs thanks to a function in each monoview classifier package. Parameters ---------- args : parsed args objects All the args passed by the user. - classifiersNames : list of strings + classifiers-names : list of strings List of the benchmarks's monoview classifiers names. Returns @@ -272,7 +276,7 @@ def init_monoview_kwargs(args, classifiersNames): logging.debug("Start:\t Initializing monoview classifiers arguments") monoviewKWARGS = {} - for classifiersName in classifiersNames: + for classifiersName in classifiers_names: try: getattr(monoview_classifiers, classifiersName) except AttributeError: @@ -309,43 +313,43 @@ def init_multiview_kwargs(args, classifiers_names): return multiview_kwargs -def initMultiviewArguments(args, benchmark, views, viewsIndices, - argumentDictionaries, randomState, directory, - resultsMonoview, classificationIndices): +def init_multiview_arguments(args, benchmark, views, views_indices, + argument_dictionaries, random_state, directory, + results_monoview, classification_indices): """Used to add each monoview exeperience args to the list of monoview experiences args""" logging.debug("Start:\t Initializing multiview classifiers arguments") - multiviewArguments = [] + multiview_arguments = [] if "multiview" in benchmark: - for multiviewAlgoName in benchmark["multiview"]: - mutliviewModule = getattr(multiview_classifiers, - multiviewAlgoName) + for multiview_algo_name in benchmark["multiview"]: + mutliview_module = getattr(multiview_classifiers, + multiview_algo_name) - multiviewArguments += mutliviewModule.getArgs(args, benchmark, - views, viewsIndices, - randomState, + multiview_arguments += mutliview_module.getArgs(args, benchmark, + views, views_indices, + random_state, directory, - resultsMonoview, - classificationIndices) - argumentDictionaries["multiview"] = multiviewArguments + results_monoview, + classification_indices) + argument_dictionaries["multiview"] = multiview_arguments logging.debug("Start:\t Initializing multiview classifiers arguments") - return argumentDictionaries + return argument_dictionaries -def arangeMetrics(metrics, metricPrinc): +def arange_metrics(metrics, metric_princ): """Used to get the metrics list in the right order so that the first one is the principal metric specified in args""" - if [metricPrinc] in metrics: - metricIndex = metrics.index([metricPrinc]) - firstMetric = metrics[0] - metrics[0] = [metricPrinc] - metrics[metricIndex] = firstMetric + if [metric_princ] in metrics: + metric_index = metrics.index([metric_princ]) + first_metric = metrics[0] + metrics[0] = [metric_princ] + metrics[metric_index] = first_metric else: - raise AttributeError(metricPrinc + " not in metric pool") + raise AttributeError(metric_princ + " not in metric pool") return metrics -def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, - kFolds): +def benchmark_init(directory, classification_indices, labels, labels_dictionary, + k_folds): logging.debug("Start:\t Benchmark initialization") if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): try: @@ -353,56 +357,56 @@ def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, except OSError as exc: if exc.errno != errno.EEXIST: raise - trainIndices = classificationIndices[0] - trainLabels = labels[trainIndices] - np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",") - np.savetxt(directory + "train_indices.csv", classificationIndices[0], + train_indices = classification_indices[0] + train_labels = labels[train_indices] + np.savetxt(directory + "train_labels.csv", train_labels, delimiter=",") + np.savetxt(directory + "train_indices.csv", classification_indices[0], delimiter=",") - resultsMonoview = [] - folds = kFolds.split(np.arange(len(trainLabels)), trainLabels) - minFoldLen = int(len(trainLabels) / kFolds.n_splits) - for foldIndex, (trainCVIndices, testCVIndices) in enumerate(folds): - fileName = directory + "/folds/test_labels_fold_" + str( - foldIndex) + ".csv" - if not os.path.exists(os.path.dirname(fileName)): + results_monoview = [] + folds = k_folds.split(np.arange(len(train_labels)), train_labels) + min_fold_len = int(len(train_labels) / k_folds.n_splits) + for fold_index, (train_cv_indices, test_cv_indices) in enumerate(folds): + file_name = directory + "/folds/test_labels_fold_" + str( + fold_index) + ".csv" + if not os.path.exists(os.path.dirname(file_name)): try: - os.makedirs(os.path.dirname(fileName)) + os.makedirs(os.path.dirname(file_name)) except OSError as exc: if exc.errno != errno.EEXIST: raise - np.savetxt(fileName, trainLabels[testCVIndices[:minFoldLen]], + np.savetxt(file_name, train_labels[test_cv_indices[:min_fold_len]], delimiter=",") - labelsNames = list(LABELS_DICTIONARY.values()) + labels_names = list(labels_dictionary.values()) logging.debug("Done:\t Benchmark initialization") - return resultsMonoview, labelsNames + return results_monoview, labels_names -def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, - classificationIndices=None, args=None, - kFolds=None, randomState=None, hyperParamSearch=None, - metrics=None, argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, flag=None, +def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None, + classification_indices=None, args=None, + k_folds=None, random_state=None, hyper_param_search=None, + metrics=None, argument_dictionaries=None, + benchmark=None, views=None, views_indices=None, flag=None, labels=None, - ExecMonoview_multicore=ExecMonoview_multicore, - ExecMultiview_multicore=ExecMultiview_multicore, - initMultiviewArguments=initMultiviewArguments): + exec_monoview_multicore=exec_monoview_multicore, + exec_multiview_multicore=exec_multiview_multicore, + init_multiview_arguments=init_multiview_arguments): """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and - ExecMultiview_multicore args are only used for tests""" + exec_multiview_multicore args are only used for tests""" - resultsMonoview, labelsNames = benchmarkInit(directory, - classificationIndices, labels, - LABELS_DICTIONARY, kFolds) + results_monoview, labels_names = benchmark_init(directory, + classification_indices, labels, + labels_dictionary, k_folds) logging.debug("Start:\t monoview benchmark") - resultsMonoview += [ - ExecMonoview_multicore(directory, args["Base"]["name"], labelsNames, - classificationIndices, kFolds, - coreIndex, args["Base"]["type"], args["Base"]["pathf"], randomState, + results_monoview += [ + ExecMonoview_multicore(directory, args["Base"]["name"], labels_names, + classification_indices, k_folds, + core_index, args["Base"]["type"], args["Base"]["pathf"], random_state, labels, - hyperParamSearch=hyperParamSearch, + hyperParamSearch=hyper_param_search, metrics=metrics, nIter=args["Classification"]["hps_iter"], **argument) - for argument in argumentDictionaries["Monoview"]] + for argument in argument_dictionaries["Monoview"]] logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") @@ -415,53 +419,53 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") - resultsMultiview = [ - ExecMultiview_multicore(directory, coreIndex, args["Base"]["name"], - classificationIndices, kFolds, args["Base"]["type"], - args["Base"]["pathf"], LABELS_DICTIONARY, randomState, - labels, hyperParamSearch=hyperParamSearch, + results_multiview = [ + exec_multiview_multicore(directory, core_index, args["Base"]["name"], + classification_indices, kFolds, args["Base"]["type"], + args["Base"]["pathf"], labels_dictionary, random_state, + labels, hyper_param_search=hyper_param_search, metrics=metrics, nIter=args["Classification"]["hps_iter"], **arguments) - for arguments in argumentDictionaries["multiview"]] + for arguments in argument_dictionaries["multiview"]] logging.debug("Done:\t multiview benchmark") - return [flag, resultsMonoview + resultsMultiview] + return [flag, results_monoview + results_multiview] -def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, - directory=None, classificationIndices=None, - args=None, - kFolds=None, randomState=None, - hyperParamSearch=None, metrics=None, - argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, - flag=None, labels=None, - ExecMonoview_multicore=ExecMonoview_multicore, - ExecMultiview_multicore=ExecMultiview_multicore, - initMultiviewArguments=initMultiviewArguments): +def exec_one_benchmark_multicore(nbCores=-1, labels_dictionary=None, + directory=None, classification_indices=None, + args=None, + k_folds=None, random_state=None, + hyper_param_search=None, metrics=None, + argument_dictionaries=None, + benchmark=None, views=None, viewsIndices=None, + flag=None, labels=None, + exec_monoview_multicore=exec_monoview_multicore, + exec_multiview_multicore=exec_multiview_multicore, + init_multiview_arguments=init_multiview_arguments): """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and - ExecMultiview_multicore args are only used for tests""" + exec_multiview_multicore args are only used for tests""" - resultsMonoview, labelsNames = benchmarkInit(directory, - classificationIndices, labels, - LABELS_DICTIONARY, kFolds) + results_monoview, labels_names = benchmark_init(directory, + classification_indices, labels, + labels_dictionary, k_folds) logging.debug("Start:\t monoview benchmark") - nbExperiments = len(argumentDictionaries["monoview"]) - nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) - for stepIndex in range(nbMulticoreToDo): - resultsMonoview += (Parallel(n_jobs=nbCores)( - delayed(ExecMonoview_multicore)(directory, args["Base"]["name"], labelsNames, - classificationIndices, kFolds, - coreIndex, args["Base"]["type"], args["Base"]["pathf"], - randomState, labels, - hyperParamSearch=hyperParamSearch, + nb_experiments = len(argument_dictionaries["monoview"]) + nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nbCores)) + for step_index in range(nb_multicore_to_do): + results_monoview += (Parallel(n_jobs=nb_cores)( + delayed(exec_monoview_multicore)(directory, args["Base"]["name"], labels_names, + classification_indices, k_folds, + core_index, args["Base"]["type"], args["Base"]["pathf"], + random_state, labels, + hyper_param_search=hyper_param_search, metrics=metrics, nIter=args["Classification"]["hps_iter"], - **argumentDictionaries["monoview"][ - coreIndex + stepIndex * nbCores]) - for coreIndex in - range(min(nbCores, nbExperiments - stepIndex * nbCores)))) + **argument_dictionaries["monoview"][ + core_index + step_index * nb_cores]) + for core_index in + range(min(nb_cores, nb_experiments - step_index * nb_cores)))) logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") @@ -474,30 +478,30 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") - resultsMultiview = [] - nbExperiments = len(argumentDictionaries["multiview"]) - nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) - for stepIndex in range(nbMulticoreToDo): - resultsMultiview += Parallel(n_jobs=nbCores)( - delayed(ExecMultiview_multicore)(directory, coreIndex, args["Base"]["name"], - classificationIndices, kFolds, + results_multiview = [] + nb_experiments = len(argument_dictionaries["multiview"]) + nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nbCores)) + for step_index in range(nb_multicore_to_do): + results_multiview += Parallel(n_jobs=nbCores)( + delayed(exec_multiview_multicore)(directory, coreIndex, args["Base"]["name"], + classification_indices, k_folds, args["Base"]["type"], args["Base"]["pathf"], - LABELS_DICTIONARY, randomState, + labels_dictionary, random_state, labels, - hyperParamSearch=hyperParamSearch, + hyper_param_search=hyper_param_search, metrics=metrics, nIter=args["Classification"]["hps_iter"], ** - argumentDictionaries["multiview"][ - stepIndex * nbCores + coreIndex]) + argument_dictionaries["multiview"][ + step_index * nb_cores + core_index]) for coreIndex in - range(min(nbCores, nbExperiments - stepIndex * nbCores))) + range(min(nb_cores, nb_experiments - step_index * nb_cores))) logging.debug("Done:\t multiview benchmark") - return [flag, resultsMonoview + resultsMultiview] + return [flag, results_monoview + results_multiview] -def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, +def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, directory=None, classificationIndices=None, args=None, kFolds=None, randomState=None, @@ -505,22 +509,22 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, argumentDictionaries=None, benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, - ExecMonoview_multicore=ExecMonoview_multicore, - ExecMultiview_multicore=ExecMultiview_multicore, - initMultiviewArguments=initMultiviewArguments): - resultsMonoview, labelsNames = benchmarkInit(directory, + exec_monoview_multicore=exec_monoview_multicore, + exec_multiview_multicore=exec_multiview_multicore, + init_multiview_arguments=init_multiview_arguments): + results_monoview, labels_names = benchmark_init(directory, classificationIndices, labels, - LABELS_DICTIONARY, kFolds) + labels_dictionary, kFolds) logging.debug("Start:\t monoview benchmark") for arguments in argumentDictionaries["monoview"]: - X = DATASET.get("View" + str(arguments["view_index"])) + X = dataset_var.get("View" + str(arguments["view_index"])) Y = labels - resultsMonoview += [ - ExecMonoview(directory, X, Y, args["Base"]["name"], labelsNames, + results_monoview += [ + exec_monoview(directory, X, Y, args["Base"]["name"], labels_names, classificationIndices, kFolds, 1, args["Base"]["type"], args["Base"]["pathf"], randomState, - hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args["Classification"]["hps_iter"], **arguments)] + hyper_param_search=hyper_param_search, metrics=metrics, + n_iter=args["Classification"]["hps_iter"], **arguments)] logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") @@ -534,46 +538,46 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") - resultsMultiview = [] + results_multiview = [] for arguments in argumentDictionaries["multiview"]: resultsMultiview += [ - ExecMultiview(directory, DATASET, args["Base"]["name"], classificationIndices, + exec_multiview(directory, dataset_var, args["Base"]["name"], classificationIndices, kFolds, 1, args["Base"]["type"], - args["Base"]["pathf"], LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args["Classification"]["hps_iter"], **arguments)] + args["Base"]["pathf"], labels_dictionary, randomState, labels, + hyper_param_search=hyper_param_search, + metrics=metrics, n_iter=args["Classification"]["hps_iter"], **arguments)] logging.debug("Done:\t multiview benchmark") - return [flag, resultsMonoview + resultsMultiview] + return [flag, results_monoview + results_multiview] -def execBenchmark(nbCores, statsIter, nbMulticlass, - benchmarkArgumentsDictionaries, classificationIndices, +def exec_benchmark(nb_cores, stats_iter, nb_multiclass, + benchmark_arguments_dictionaries, classification_indices, directories, - directory, multiClassLabels, metrics, labelsDictionary, - nbLabels, DATASET, - execOneBenchmark=execOneBenchmark, - execOneBenchmark_multicore=execOneBenchmark_multicore, - execOneBenchmarkMonoCore=execOneBenchmarkMonoCore, - getResults=getResults, delete=DB.deleteHDF5): + directory, multi_class_labels, metrics, labels_dictionary, + nb_labels, dataset_var, + exec_one_benchmark=exec_one_benchmark, + exec_one_benchmark_multicore=exec_one_benchmark_multicore, + exec_one_benchmark_mono_core=exec_one_benchmark_mono_core, + get_results=get_results, delete=DB.deleteHDF5): r"""Used to execute the needed benchmark(s) on multicore or mono-core functions. Parameters ---------- - nbCores : int + nb_cores : int Number of threads that the benchmarks can use. - statsIter : int + stats_iter : int Number of statistical iterations that have to be done. - benchmarkArgumentsDictionaries : list of dictionaries + benchmark_arguments_dictionaries : list of dictionaries All the needed arguments for the benchmarks. - classificationIndices : list of lists of numpy.ndarray + classification_indices : list of lists of numpy.ndarray For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and the ones of the testing set. directories : list of strings List of the paths to the result directories for each statistical iteration. directory : string Path to the main results directory. - multiClassLabels : ist of lists of numpy.ndarray + multi_class_labels : ist of lists of numpy.ndarray For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the indices for the biclass training set, the ones for the biclass testing set and the ones for the multiclass testing set. @@ -583,7 +587,7 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, Dictionary mapping labels indices to labels names. nbLabels : int Total number of different labels in the dataset. - DATASET : HDF5 dataset file + dataset_var : HDF5 dataset file The full dataset that wil be used by the benchmark. classifiersNames : list of strings List of the benchmarks's monoview classifiers names. @@ -598,61 +602,61 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, """ logging.debug("Start:\t Executing all the needed biclass benchmarks") results = [] - if nbCores > 1: - if statsIter > 1 or nbMulticlass > 1: - nbExpsToDo = len(benchmarkArgumentsDictionaries) - nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores))) - for stepIndex in nbMulticoreToDo: - results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark) - (coreIndex=coreIndex, + if nb_cores > 1: + if stats_iter > 1 or nb_multiclass > 1: + nb_exps_to_do = len(benchmark_arguments_dictionaries) + nb_multicore_to_do = range(int(math.ceil(float(nb_exps_to_do) / nb_cores))) + for step_index in nb_multicore_to_do: + results += (Parallel(n_jobs=nb_cores)(delayed(exec_one_benchmark) + (core_index=coreIndex, ** - benchmarkArgumentsDictionaries[ - coreIndex + stepIndex * nbCores]) + benchmark_arguments_dictionaries[ + coreIndex + step_index * nb_cores]) for coreIndex in range( - min(nbCores, nbExpsToDo - stepIndex * nbCores)))) + min(nb_cores, nb_exps_to_do - step_index * nb_cores)))) else: - results += [execOneBenchmark_multicore(nbCores=nbCores, ** - benchmarkArgumentsDictionaries[0])] + results += [exec_one_benchmark_multicore(nb_cores=nb_cores, ** + benchmark_arguments_dictionaries[0])] else: - for arguments in benchmarkArgumentsDictionaries: - results += [execOneBenchmarkMonoCore(DATASET=DATASET, **arguments)] + for arguments in benchmark_arguments_dictionaries: + results += [exec_one_benchmark_mono_core(dataset_var=dataset_var, **arguments)] logging.debug("Done:\t Executing all the needed biclass benchmarks") # Do everything with flagging - nbExamples = len(classificationIndices[0][0]) + len( - classificationIndices[0][1]) - multiclassGroundTruth = DATASET.get("Labels").value + nb_examples = len(classification_indices[0][0]) + len( + classification_indices[0][1]) + multiclass_ground_truth = dataset_var.get("Labels").value logging.debug("Start:\t Analyzing predictions") - results_mean_stds = getResults(results, statsIter, nbMulticlass, - benchmarkArgumentsDictionaries, - multiclassGroundTruth, + results_mean_stds = get_results(results, stats_iter, nb_multiclass, + benchmark_arguments_dictionaries, + multiclass_ground_truth, metrics, - classificationIndices, + classification_indices, directories, directory, - labelsDictionary, - nbExamples, - nbLabels) + labels_dictionary, + nb_examples, + nb_labels) logging.debug("Done:\t Analyzing predictions") - delete(benchmarkArgumentsDictionaries, nbCores, DATASET) + delete(benchmark_arguments_dictionaries, nb_cores, dataset_var) return results_mean_stds -def execClassif(arguments): +def exec_classif(arguments): """Main function to execute the benchmark""" start = time.time() - args = execution.parseTheArgs(arguments) + args = execution.parse_the_args(arguments) args = configuration.get_the_args(args.path_config) os.nice(args["Base"]["nice"]) - nbCores = args["Base"]["nb_cores"] - if nbCores == 1: + nb_cores = args["Base"]["nb_cores"] + if nb_cores == 1: os.environ['OPENBLAS_NUM_THREADS'] = '1' - statsIter = args["Classification"]["stats_iter"] - hyperParamSearch = args["Classification"]["hps_type"] - multiclassMethod = args["Classification"]["multiclass_method"] - CL_type = args["Classification"]["type"] - monoviewAlgos = args["Classification"]["algos_monoview"] - multiviewAlgos = args["Classification"]["algos_multiview"] + stats_iter = args["Classification"]["stats_iter"] + hyper_param_search = args["Classification"]["hps_type"] + multiclass_method = args["Classification"]["multiclass_method"] + cl_type = args["Classification"]["type"] + monoview_algos = args["Classification"]["algos_monoview"] + multiview_algos = args["Classification"]["algos_multiview"] dataset_list = execution.find_dataset_names(args["Base"]["pathf"], args["Base"]["type"], args["Base"]["name"]) @@ -663,76 +667,76 @@ def execClassif(arguments): noise_results = [] for noise_std in args["Base"]["noise_std"]: - directory = execution.initLogFile(dataset_name, args["Base"]["views"], args["Classification"]["type"], + directory = execution.init_log_file(dataset_name, args["Base"]["views"], args["Classification"]["type"], args["Base"]["log"], args["Base"]["debug"], args["Base"]["label"], args["Base"]["res_dir"], args["Base"]["add_noise"], noise_std) - randomState = execution.initRandomState(args["Base"]["random_state"], directory) - statsIterRandomStates = execution.initStatsIterRandomStates(statsIter, - randomState) + random_state = execution.init_random_state(args["Base"]["random_state"], directory) + stats_iter_random_states = execution.init_stats_iter_random_states(stats_iter, + random_state) - getDatabase = execution.getDatabaseFunction(dataset_name, args["Base"]["type"]) + get_database = execution.get_database_function(dataset_name, args["Base"]["type"]) - DATASET, LABELS_DICTIONARY, datasetname = getDatabase(args["Base"]["views"], + dataset_var, labels_dictionary, datasetname = get_database(args["Base"]["views"], args["Base"]["pathf"], dataset_name, args["Classification"]["nb_class"], args["Classification"]["classes"], - randomState, + random_state, args["Base"]["full"], args["Base"]["add_noise"], noise_std) args["Base"]["name"] = datasetname - splits = execution.genSplits(DATASET.get("Labels").value, args["Classification"]["split"], - statsIterRandomStates) + splits = execution.gen_splits(dataset_var.get("Labels").value, args["Classification"]["split"], + stats_iter_random_states) - multiclassLabels, labelsCombinations, indicesMulticlass = multiclass.genMulticlassLabels( - DATASET.get("Labels").value, multiclassMethod, splits) + multiclass_labels, labels_combinations, indices_multiclass = multiclass.gen_multiclass_labels( + dataset_var.get("Labels").value, multiclass_method, splits) - kFolds = execution.genKFolds(statsIter, args["Classification"]["nb_folds"], - statsIterRandomStates) + k_folds = execution.gen_k_folds(stats_iter, args["Classification"]["nb_folds"], + stats_iter_random_states) - datasetFiles = dataset.init_multiple_datasets(args["Base"]["pathf"], args["Base"]["name"], nbCores) + dataset_files = dataset.init_multiple_datasets(args["Base"]["pathf"], args["Base"]["name"], nb_cores) - views, viewsIndices, allViews = execution.initViews(DATASET, args["Base"]["views"]) - viewsDictionary = genViewsDictionnary(DATASET, views) - nbViews = len(views) - NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] + views, views_indices, all_views = execution.init_views(dataset_var, args["Base"]["views"]) + views_dictionary = gen_views_dictionnary(dataset_var, views) + nb_views = len(views) + nb_class = dataset_var.get("Metadata").attrs["nbClass"] metrics = [metric.split(":") for metric in args["Classification"]["metrics"]] if metrics == [["all"]]: - metricsNames = [name for _, name, isPackage + metrics_names = [name for _, name, isPackage in pkgutil.iter_modules( ['./mono_multi_view_classifiers/metrics']) if not isPackage and name not in ["framework", "log_loss", "matthews_corrcoef", "roc_auc_score"]] - metrics = [[metricName] for metricName in metricsNames] + metrics = [[metricName] for metricName in metrics_names] metrics = arangeMetrics(metrics, args["Classification"]["metric_princ"]) for metricIndex, metric in enumerate(metrics): if len(metric) == 1: metrics[metricIndex] = [metric[0], None] - benchmark = initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args) - initKWARGS = initKWARGSFunc(args, benchmark) - dataBaseTime = time.time() - start - argumentDictionaries = init_argument_dictionaries(benchmark, viewsDictionary, - NB_CLASS, initKWARGS) + benchmark = init_benchmark(cl_type, monoview_algos, multiview_algos, args) + init_kwargs= init_kwargs_func(args, benchmark) + data_base_time = time.time() - start + argument_dictionaries = init_argument_dictionaries(benchmark, views_dictionary, + nb_class, init_kwargs) # argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, # NB_CLASS, initKWARGS) - directories = execution.genDirecortiesNames(directory, statsIter) - benchmarkArgumentDictionaries = execution.genArgumentDictionaries( - LABELS_DICTIONARY, directories, multiclassLabels, - labelsCombinations, indicesMulticlass, - hyperParamSearch, args, kFolds, - statsIterRandomStates, metrics, - argumentDictionaries, benchmark, nbViews, - views, viewsIndices) - nbMulticlass = len(labelsCombinations) - results_mean_stds = execBenchmark(nbCores, statsIter, nbMulticlass, - benchmarkArgumentDictionaries, splits, directories, - directory, multiclassLabels, metrics, LABELS_DICTIONARY, - NB_CLASS, DATASET) + directories = execution.gen_direcorties_names(directory, stats_iter) + benchmark_argument_dictionaries = execution.gen_argument_dictionaries( + labels_dictionary, directories, multiclass_labels, + labels_combinations, indices_multiclass, + hyper_param_search, args, k_folds, + stats_iter_random_states, metrics, + argument_dictionaries, benchmark, nb_views, + views, views_indices) + nb_multiclass = len(labels_combinations) + results_mean_stds = exec_benchmark(nb_cores, stats_iter, nb_multiclass, + benchmark_argument_dictionaries, splits, directories, + directory, multiclass_labels, metrics, labels_dictionary, + nb_class, dataset_var) noise_results.append([noise_std, results_mean_stds]) plot_results_noise(directory, noise_results, metrics[0][0], dataset_name) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index bf1f766bc359b739a5c55bd903f58ac3ede9862b..5c807f3ac3523e21b9b17ec7c91b5112385fa564 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -27,7 +27,7 @@ __status__ = "Prototype" # Production, Development, Prototype # __date__ = 2016 - 03 - 25 -def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, +def exec_monoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, path, randomState, labels, hyperParamSearch="randomizedSearch", @@ -43,7 +43,7 @@ def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, metrics=metrics, nIter=nIter, **args) -def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, +def exec_monoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, randomState, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): @@ -164,7 +164,7 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, outputFileName -def initTrainTest(X, Y, classificationIndices): +def init_train_test(X, Y, classificationIndices): trainIndices, testIndices, testIndicesMulticlass = classificationIndices X_train = extract_subset(X, trainIndices) X_test = extract_subset(X, testIndices) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index bcc9f201f61d1f26ccaa90d66196955bded3bc36..696c97e5cafd44e82d78bebbd9dd71d21d3405d8 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -77,7 +77,7 @@ def saveResults(classifier, LABELS_DICTIONARY, stringAnalysis, views, classifier outputFileName + imageName + '.png', transparent=True) -def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, +def exec_multiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=False, nbCores=1, metrics=None, @@ -91,7 +91,7 @@ def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, nIter=nIter, **arguments) -def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, +def exec_multiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=False, metrics=None, nIter=30, **kwargs): diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index 50cb31a6078f9a577f8626dd74a460761e026b94..765e980612319c4b4e62752c5d62fb7b30fa9bd4 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -98,7 +98,7 @@ def autolabel(rects, ax, set=1, std=None): ha='center', va='bottom', size="small") -def getMetricsScoresBiclass(metrics, results): +def get_metrics_scores_biclass(metrics, results): r"""Used to extract metrics scores in case of biclass classification Parameters @@ -117,22 +117,22 @@ def getMetricsScoresBiclass(metrics, results): -`metricScores[metric_name]["trainScores"]` is a list of all the available classifiers scores on the train set, -`metricScores[metric_name]["testScores"]` is a list of all the available classifiers scores on the test set. """ - metricsScores = {} + metrics_scores = {} for metric in metrics: - classifiersNames = [] - trainScores = [] - testScores = [] + classifiers_names = [] + train_scores = [] + test_scores = [] for classifierResult in results: - trainScores.append(classifierResult.metrics_scores[metric[0]][0]) - testScores.append(classifierResult.metrics_scores[metric[0]][1]) - classifiersNames.append(classifierResult.get_classifier_name()) - - metricsScores[metric[0]] = {"classifiersNames": classifiersNames, - "trainScores": trainScores, - "testScores": testScores} - return metricsScores + train_scores.append(classifierResult.metrics_scores[metric[0]][0]) + test_scores.append(classifierResult.metrics_scores[metric[0]][1]) + classifiers_names.append(classifierResult.get_classifier_name()) + + metrics_scores[metric[0]] = {"classifiersNames": classifiers_names, + "trainScores": train_scores, + "testScores": test_scores} + return metrics_scores def getExampleErrorsBiclass(groud_truth, results): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index b0e3779f3780def50e4e057b0e41693f79a3c6fe..5fe42ee0ccae9a636d1076002cf69e5d4d740ad4 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -10,7 +10,7 @@ import sklearn from . import get_multiview_db as DB -def parseTheArgs(arguments): +def parse_the_args(arguments): """Used to parse the args entered by the user""" parser = argparse.ArgumentParser( @@ -603,7 +603,8 @@ def parseTheArgs(arguments): # default=['']) # groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', # nargs='+', -# help='Configuration for the monoview classifier in Mumbo separate each classifier with sapce and each argument with:', +# help='Configuration for the monoview classifier in Mumbo' +# ' separate each classifier with sapce and each argument with:', # default=['']) # groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3, # help='Max number of iteration, min number of iteration, convergence threshold', @@ -630,8 +631,10 @@ def parseTheArgs(arguments): # default=['']) # groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', # action='store', nargs='+', -# help='Configuration for the monoview classifiers used separate classifier by space ' -# 'and configs must be of form argument1_name:value,argument2_name:value', +# help='Configuration for the monoview classifiers ' +# ' used separate classifier by space ' +# 'and configs must be of form argument1_name:value,' +# 'argument2_name:value', # default=['']) # groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', # action='store', nargs='+', @@ -696,77 +699,79 @@ def parseTheArgs(arguments): return args -def initRandomState(randomStateArg, directory): +def init_random_state(random_state_arg, directory): r""" Used to init a random state. If no random state is specified, it will generate a 'random' seed. - If the `randomSateArg` is a string containing only numbers, it will be converted in an int to generate a seed. - If the `randomSateArg` is a string with letters, it must be a path to a pickled random state file that will be loaded. + If the `randomSateArg` is a string containing only numbers, it will be converted in + an int to generate a seed. + If the `randomSateArg` is a string with letters, it must be a path to a pickled random + state file that will be loaded. The function will also pickle the new random state in a file tobe able to retrieve it later. Tested Parameters ---------- - randomStateArg : None or string + random_state_arg : None or string See function description. directory : string Path to the results directory. Returns ------- - randomState : numpy.random.RandomState object + random_state : numpy.random.RandomState object This random state will be used all along the benchmark . """ - if randomStateArg is None: - randomState = np.random.RandomState(randomStateArg) + if random_state_arg is None: + random_state = np.random.RandomState(random_state_arg) else: try: - seed = int(randomStateArg) - randomState = np.random.RandomState(seed) + seed = int(random_state_arg) + random_state = np.random.RandomState(seed) except ValueError: - fileName = randomStateArg - with open(fileName, 'rb') as handle: - randomState = pickle.load(handle) + file_name = random_state_arg + with open(file_name, 'rb') as handle: + random_state = pickle.load(handle) with open(directory + "randomState.pickle", "wb") as handle: - pickle.dump(randomState, handle) - return randomState + pickle.dump(random_state, handle) + return random_state -def initStatsIterRandomStates(statsIter, randomState): +def init_stats_iter_random_states(stats_iter, random_state): r""" Used to initialize multiple random states if needed because of multiple statistical iteration of the same benchmark Parameters ---------- - statsIter : int + stats_iter : int Number of statistical iterations of the same benchmark done (with a different random state). - randomState : numpy.random.RandomState object + random_state : numpy.random.RandomState object The random state of the whole experimentation, that will be used to generate the ones for each statistical iteration. Returns ------- - statsIterRandomStates : list of numpy.random.RandomState objects + stats_iter_random_states : list of numpy.random.RandomState objects Multiple random states, one for each sattistical iteration of the same benchmark. """ - if statsIter > 1: - statsIterRandomStates = [ - np.random.RandomState(randomState.randint(5000)) for _ in - range(statsIter)] + if stats_iter > 1: + stats_iter_random_states = [ + np.random.RandomState(random_state.randint(5000)) for _ in + range(stats_iter)] else: - statsIterRandomStates = [randomState] - return statsIterRandomStates + stats_iter_random_states = [random_state] + return stats_iter_random_states -def getDatabaseFunction(name, type): +def get_database_function(name, type_var): r"""Used to get the right database extraction function according to the type of database and it's name Parameters ---------- name : string Name of the database. - type : string + type_var : string type of dataset hdf5 or csv Returns @@ -775,13 +780,13 @@ def getDatabaseFunction(name, type): The function that will be used to extract the database """ if name not in ["Fake", "Plausible"]: - getDatabase = getattr(DB, "getClassicDB" + type[1:]) + get_database = getattr(DB, "getClassicDB" + type_var[1:]) else: - getDatabase = getattr(DB, "get" + name + "DB" + type[1:]) - return getDatabase + get_database = getattr(DB, "get" + name + "DB" + type_var[1:]) + return get_database -def initLogFile(name, views, CL_type, log, debug, label, result_directory, add_noise, noise_std): +def init_log_file(name, views, cl_type, log, debug, label, result_directory, add_noise, noise_std): r"""Used to init the directory where the preds will be stored and the log file. First this function will check if the result directory already exists (only one per minute is allowed). @@ -794,50 +799,61 @@ def initLogFile(name, views, CL_type, log, debug, label, result_directory, add_n Name of the database. views : list of strings List of the view names that will be used in the benchmark. - CL_type : list of strings + cl_type : list of strings Type of benchmark that will be made . log : bool Whether to show the log file in console or hide it. + debug : bool + for debug option + label : str for label + + result_directory : str name of the result directory + + add_noise : bool for add noise + + noise_std : level of std noise Returns ------- - resultsDirectory : string + results_directory : string Reference to the main results directory for the benchmark. """ noise_string = "/n_"+str(int(noise_std*100)) if debug: - resultDirectory = result_directory + name + noise_string +"/debug_started_" + time.strftime( - "%Y_%m_%d-%H_%M_%S") + "_" + label + "/" + result_directory = result_directory + name + noise_string + \ + "/debug_started_" + \ + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "_" + label + "/" else: - resultDirectory = result_directory + name + noise_string+ "/started_" + time.strftime( + result_directory = result_directory + name + noise_string+ "/started_" + time.strftime( "%Y_%m_%d-%H_%M") + "_" + label + "/" - logFileName = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join( - CL_type) + "-" + "_".join( + log_file_name = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join( + cl_type) + "-" + "_".join( views) + "-" + name + "-LOG" - if os.path.exists(os.path.dirname(resultDirectory)): + if os.path.exists(os.path.dirname(result_directory)): raise NameError("The result dir already exists, wait 1 min and retry") - os.makedirs(os.path.dirname(resultDirectory + logFileName)) - logFile = resultDirectory + logFileName - logFile += ".log" + os.makedirs(os.path.dirname(result_directory + log_file_name)) + log_file = result_directory + log_file_name + log_file += ".log" logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', - filename=logFile, level=logging.DEBUG, + filename=log_file, level=logging.DEBUG, filemode='w') if log: logging.getLogger().addHandler(logging.StreamHandler()) - return resultDirectory + return result_directory -def genSplits(labels, splitRatio, statsIterRandomStates): - r"""Used to gen the train/test splits using one or multiple random states. +def gen_splits(labels, split_ratio, stats_iter_random_states): + r"""Used to _gen the train/test splits using one or multiple random states. Parameters ---------- labels : numpy.ndarray Name of the database. - splitRatio : float + split_ratio : float The ratio of examples between train and test set. - statsIterRandomStates : list of numpy.random.RandomState + stats_iter_random_states : list of numpy.random.RandomState The random states for each statistical iteration. Returns @@ -848,99 +864,100 @@ def genSplits(labels, splitRatio, statsIterRandomStates): """ indices = np.arange(len(labels)) splits = [] - for randomState in statsIterRandomStates: - foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, - random_state=randomState, - test_size=splitRatio) - folds = foldsObj.split(indices, labels) + for random_state in stats_iter_random_states: + folds_obj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, + random_state=random_state, + test_size=split_ratio) + folds = folds_obj.split(indices, labels) for fold in folds: train_fold, test_fold = fold - trainIndices = indices[train_fold] - testIndices = indices[test_fold] - splits.append([trainIndices, testIndices]) + train_indices = indices[train_fold] + test_indices = indices[test_fold] + splits.append([train_indices, test_indices]) return splits -def genKFolds(statsIter, nbFolds, statsIterRandomStates): +def gen_k_folds(stats_iter, nb_folds, stats_iter_random_states): r"""Used to generate folds indices for cross validation for each statistical iteration. Parameters ---------- - statsIter : integer + stats_iter : integer Number of statistical iterations of the benchmark. - nbFolds : integer + nb_folds : integer The number of cross-validation folds for the benchmark. - statsIterRandomStates : list of numpy.random.RandomState + stats_iter_random_states : list of numpy.random.RandomState The random states for each statistical iteration. Returns ------- - foldsList : list of list of sklearn.model_selection.StratifiedKFold + folds_list : list of list of sklearn.model_selection.StratifiedKFold For each statistical iteration a Kfold stratified (keeping the ratio between classes in each fold). """ - if statsIter > 1: - foldsList = [] - for randomState in statsIterRandomStates: - foldsList.append( - sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, - random_state=randomState)) + if stats_iter > 1: + folds_list = [] + for random_state in stats_iter_random_states: + folds_list.append( + sklearn.model_selection.StratifiedKFold(n_splits=nb_folds, + random_state=random_state)) else: - foldsList = [sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, - random_state=statsIterRandomStates)] - return foldsList + folds_list = [sklearn.model_selection.StratifiedKFold(n_splits=nb_folds, + random_state=stats_iter_random_states)] + return folds_list -def initViews(DATASET, argViews): - r"""Used to return the views names that will be used by the benchmark, their indices and all the views names. +def init_views(dataset, arg_views): + r"""Used to return the views names that will be used by the + benchmark, their indices and all the views names. Parameters ---------- - DATASET : HDF5 dataset file + datset : HDF5 dataset file The full dataset that wil be used by the benchmark. - argViews : list of strings + arg_views : list of strings The views that will be used by the benchmark (arg). Returns ------- views : list of strings Names of the views that will be used by the benchmark. - viewIndices : list of ints + view_indices : list of ints The list of the indices of the view that will be used in the benchmark (according to the dataset). - allViews : list of strings + all_views : list of strings Names of all the available views in the dataset. """ - NB_VIEW = DATASET.get("Metadata").attrs["nbView"] - if argViews != ["all"]: - allowedViews = argViews - allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) + nb_view = dataset.get("Metadata").attrs["nbView"] + if arg_views != ["all"]: + allowed_views = arg_views + all_views = [str(dataset.get("View" + str(view_index)).attrs["name"]) if type( - DATASET.get("View" + str(viewIndex)).attrs["name"]) != bytes - else DATASET.get("View" + str(viewIndex)).attrs[ + dataset.get("View" + str(view_index)).attrs["name"]) != bytes + else dataset.get("View" + str(view_index)).attrs[ "name"].decode("utf-8") - for viewIndex in range(NB_VIEW)] + for view_index in range(nb_view)] views = [] - viewsIndices = [] - for viewIndex in range(NB_VIEW): - viewName = DATASET.get("View" + str(viewIndex)).attrs["name"] - if type(viewName) == bytes: - viewName = viewName.decode("utf-8") - if viewName in allowedViews: - views.append(viewName) - viewsIndices.append(viewIndex) + views_indices = [] + for view_index in range(nb_view): + view_name = dataset.get("View" + str(view_index)).attrs["name"] + if type(view_name) == bytes: + view_name = view_name.decode("utf-8") + if view_name in allowed_views: + views.append(view_name) + views_indices.append(view_index) else: - views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) + views = [str(dataset.get("View" + str(viewIndex)).attrs["name"]) if type( - DATASET.get("View" + str(viewIndex)).attrs["name"]) != bytes - else DATASET.get("View" + str(viewIndex)).attrs["name"].decode( + dataset.get("View" + str(viewIndex)).attrs["name"]) != bytes + else dataset.get("View" + str(viewIndex)).attrs["name"].decode( "utf-8") - for viewIndex in range(NB_VIEW)] - viewsIndices = range(NB_VIEW) - allViews = views - return views, viewsIndices, allViews + for viewIndex in range(nb_view)] + views_indices = range(nb_view) + all_views = views + return views, views_indices, all_views -def genDirecortiesNames(directory, statsIter): +def gen_direcorties_names(directory, statsIter): r"""Used to generate the different directories of each iteration if needed. Parameters @@ -977,12 +994,13 @@ def find_dataset_names(path, type, names): else: return names -def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, - labelsCombinations, indicesMulticlass, - hyperParamSearch, args, kFolds, - statsIterRandomStates, metrics, - argumentDictionaries, - benchmark, nbViews, views, viewsIndices): + +def gen_argument_dictionaries(labels_dictionary, directories, multiclass_labels, + labels_combinations, indices_multiclass, + hyper_param_search, args, k_folds, + stats_iter_random_states, metrics, + argument_dictionaries, + benchmark, nb_views, views, views_indices): r"""Used to generate a dictionary for each benchmark. One for each label combination (if multiclass), for each statistical iteration, generates an dictionary with @@ -990,37 +1008,37 @@ def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, Parameters ---------- - labelsDictionary : dictionary + labels_dictionary : dictionary Dictionary mapping labels indices to labels names. directories : list of strings List of the paths to the result directories for each statistical iteration. - multiclassLabels : list of lists of numpy.ndarray + multiclass_labels : list of lists of numpy.ndarray For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the indices for the biclass training set, the ones for the biclass testing set and the ones for the multiclass testing set. - labelsCombinations : list of lists of numpy.ndarray + labels_combinations : list of lists of numpy.ndarray Each original couple of different labels. - indicesMulticlass : list of lists of numpy.ndarray + indices_multiclass : list of lists of numpy.ndarray For each combination, contains a biclass labels numpy.ndarray with the 0/1 labels of combination. - hyperParamSearch : string + hyper_param_search : string Type of hyper parameter optimization method args : parsed args objects All the args passed by the user. - kFolds : list of list of sklearn.model_selection.StratifiedKFold + k_folds : list of list of sklearn.model_selection.StratifiedKFold For each statistical iteration a Kfold stratified (keeping the ratio between classes in each fold). - statsIterRandomStates : list of numpy.random.RandomState objects + stats_iter_random_states : list of numpy.random.RandomState objects Multiple random states, one for each sattistical iteration of the same benchmark. metrics : list of lists metrics that will be used to evaluate the algorithms performance. - argumentDictionaries : dictionary + argument_dictionaries : dictionary Dictionary resuming all the specific arguments for the benchmark, oe dictionary for each classifier. benchmark : dictionary Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. - nbViews : int + nb_views : int THe number of views used by the benchmark. views : list of strings List of the names of the used views. - viewsIndices : list of ints + views_indices : list of ints List of indices (according to the dataset) of the used views. Returns @@ -1029,31 +1047,31 @@ def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, All the needed arguments for the benchmarks. """ - benchmarkArgumentDictionaries = [] - for combinationIndex, labelsCombination in enumerate(labelsCombinations): - for iterIndex, iterRandomState in enumerate(statsIterRandomStates): - benchmarkArgumentDictionary = { - "LABELS_DICTIONARY": {0: labelsDictionary[labelsCombination[0]], - 1: labelsDictionary[ - labelsCombination[1]]}, - "directory": directories[iterIndex] + - labelsDictionary[labelsCombination[0]] + + benchmark_argument_dictionaries = [] + for combination_index, labels_combination in enumerate(labels_combinations): + for iter_index, iterRandomState in enumerate(stats_iter_random_states): + benchmark_argument_dictionary = { + "LABELS_DICTIONARY": {0: labels_dictionary[labels_combination[0]], + 1: labels_dictionary[ + labels_combination[1]]}, + "directory": directories[iter_index] + + labels_dictionary[labels_combination[0]] + "-vs-" + - labelsDictionary[labelsCombination[1]] + "/", + labels_dictionary[labels_combination[1]] + "/", "classificationIndices": [ - indicesMulticlass[combinationIndex][0][iterIndex], - indicesMulticlass[combinationIndex][1][iterIndex], - indicesMulticlass[combinationIndex][2][iterIndex]], + indices_multiclass[combination_index][0][iter_index], + indices_multiclass[combination_index][1][iter_index], + indices_multiclass[combination_index][2][iter_index]], "args": args, - "labels": multiclassLabels[combinationIndex], - "kFolds": kFolds[iterIndex], + "labels": multiclass_labels[combination_index], + "kFolds": k_folds[iter_index], "randomState": iterRandomState, - "hyperParamSearch": hyperParamSearch, + "hyperParamSearch": hyper_param_search, "metrics": metrics, - "argumentDictionaries": argumentDictionaries, + "argumentDictionaries": argument_dictionaries, "benchmark": benchmark, "views": views, - "viewsIndices": viewsIndices, - "flag": [iterIndex, labelsCombination]} - benchmarkArgumentDictionaries.append(benchmarkArgumentDictionary) - return benchmarkArgumentDictionaries + "viewsIndices": views_indices, + "flag": [iter_index, labels_combination]} + benchmark_argument_dictionaries.append(benchmark_argument_dictionary) + return benchmark_argument_dictionaries diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 19966ac08ac3a36ff6bbbef537a5435bc846131c..0d86492787e2e290fc5d351ec446ada1c49c1049 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -250,186 +250,186 @@ class DatasetError(Exception): Exception.__init__(self, *args, **kwargs) -def getClasses(labels): - labelsSet = set(list(labels)) - nbLabels = len(labelsSet) - if nbLabels >= 2: - return labelsSet +def get_classes(labels): + labels_set = set(list(labels)) + nb_labels = len(labels_set) + if nb_labels >= 2: + return labels_set else: raise DatasetError("Dataset must have at least two different labels") -def allAskedLabelsAreAvailable(askedLabelsNamesSet, availableLabelsNames): - for askedLabelName in askedLabelsNamesSet: - if askedLabelName in availableLabelsNames: +def all_asked_labels_are_available(asked_labels_names_set, available_labels_names): + for askedLabelName in asked_labels_names_set: + if askedLabelName in available_labels_names: pass else: return False return True -def fillLabelNames(NB_CLASS, askedLabelsNames, randomState, - availableLabelsNames): - if len(askedLabelsNames) < NB_CLASS: - nbLabelsToAdd = NB_CLASS - len(askedLabelsNames) - labelsNamesToChoose = [availableLabelName for availableLabelName in - availableLabelsNames - if availableLabelName not in askedLabelsNames] - addedLabelsNames = randomState.choice(labelsNamesToChoose, - nbLabelsToAdd, replace=False) - askedLabelsNames = list(askedLabelsNames) + list(addedLabelsNames) - askedLabelsNamesSet = set(askedLabelsNames) +def fill_label_names(nb_class, asked_labels_names, random_state, + available_labels_names): + if len(asked_labels_names) < nb_class: + nb_labels_to_add = nb_class - len(asked_labels_names) + labels_names_to_choose = [available_label_name for available_label_name in + available_labels_names + if available_label_name not in asked_labels_names] + added_labels_names = random_state.choice(labels_names_to_choose, + nb_labels_to_add, replace=False) + asked_labels_names = list(asked_labels_names) + list(added_labels_names) + asked_labels_names_set = set(asked_labels_names) - elif len(askedLabelsNames) > NB_CLASS: - askedLabelsNames = list( - randomState.choice(askedLabelsNames, NB_CLASS, replace=False)) - askedLabelsNamesSet = set(askedLabelsNames) + elif len(asked_labels_names) > nb_class: + asked_labels_names = list( + random_state.choice(asked_labels_names, nb_class, replace=False)) + asked_labels_names_set = set(asked_labels_names) else: - askedLabelsNamesSet = set(askedLabelsNames) - - return askedLabelsNames, askedLabelsNamesSet - - -def getAllLabels(fullLabels, availableLabelsNames): - newLabels = fullLabels - newLabelsNames = availableLabelsNames - usedIndices = np.arange(len(fullLabels)) - return newLabels, newLabelsNames, usedIndices - - -def selectAskedLabels(askedLabelsNamesSet, availableLabelsNames, - askedLabelsNames, fullLabels): - if allAskedLabelsAreAvailable(askedLabelsNamesSet, availableLabelsNames): - usedLabels = [availableLabelsNames.index(askedLabelName) for - askedLabelName in askedLabelsNames] - usedIndices = np.array( - [labelIndex for labelIndex, label in enumerate(fullLabels) if - label in usedLabels]) - newLabels = np.array([usedLabels.index(label) for label in fullLabels if - label in usedLabels]) - newLabelsNames = [availableLabelsNames[usedLabel] for usedLabel in - usedLabels] - return newLabels, newLabelsNames, usedIndices + asked_labels_names_set = set(asked_labels_names) + + return asked_labels_names, asked_labels_names_set + + +def get_all_labels(full_labels, available_labels_names): + new_labels = full_labels + new_labels_names = available_labels_names + used_indices = np.arange(len(full_labels)) + return new_labels, new_labels_names, used_indices + + +def select_asked_labels(asked_labels_names_set, available_labels_names, + asked_labels_names, full_labels): + if all_asked_labels_are_available(asked_labels_names_set, available_labels_names): + used_labels = [available_labels_names.index(asked_label_name) for + asked_label_name in asked_labels_names] + used_indices = np.array( + [labelIndex for labelIndex, label in enumerate(full_labels) if + label in used_labels]) + new_labels = np.array([used_labels.index(label) for label in full_labels if + label in used_labels]) + new_labels_names = [available_labels_names[usedLabel] for usedLabel in + used_labels] + return new_labels, new_labels_names, used_indices else: raise DatasetError("Asked labels are not all available in the dataset") -def filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, - availableLabelsNames, askedLabelsNames): - if len(labelsSet) > 2: - if askedLabelsNames == availableLabelsNames: - newLabels, newLabelsNames, usedIndices = getAllLabels(fullLabels, - availableLabelsNames) - elif len(askedLabelsNamesSet) <= len(labelsSet): - newLabels, newLabelsNames, usedIndices = selectAskedLabels( - askedLabelsNamesSet, availableLabelsNames, - askedLabelsNames, fullLabels) +def filter_labels(labels_set, asked_labels_names_set, full_labels, + available_labels_names, asked_labels_names): + if len(labels_set) > 2: + if asked_labels_names == available_labels_names: + new_labels, new_labels_names, used_indices = \ + get_all_labels(full_labels, available_labels_names) + elif len(asked_labels_names_set) <= len(labels_set): + new_labels, new_labels_names, used_indices = select_asked_labels( + asked_labels_names_set, available_labels_names, + asked_labels_names, full_labels) else: raise DatasetError( "Asked more labels than available in the dataset. Available labels are : " + - ", ".join(availableLabelsNames)) + ", ".join(available_labels_names)) else: - newLabels, newLabelsNames, usedIndices = getAllLabels(fullLabels, - availableLabelsNames) - return newLabels, newLabelsNames, usedIndices + new_labels, new_labels_names, used_indices = get_all_labels(full_labels, + available_labels_names) + return new_labels, new_labels_names, used_indices -def filterViews(datasetFile, temp_dataset, views, usedIndices): - newViewIndex = 0 +def filter_views(dataset_file, temp_dataset, views, used_indices): + new_view_index = 0 if views == [""]: - for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]): - copyhdf5Dataset(datasetFile, temp_dataset, "View" + str(viewIndex), - "View" + str(viewIndex), usedIndices) + for view_index in range(dataset_file.get("Metadata").attrs["nbView"]): + copyhdf5_dataset(dataset_file, temp_dataset, "View" + str(view_index), + "View" + str(view_index), used_indices) else: - for askedViewName in views: - for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]): - viewName = datasetFile.get("View" + str(viewIndex)).attrs["name"] - if type(viewName) == bytes: - viewName = viewName.decode("utf-8") - if viewName == askedViewName: - copyhdf5Dataset(datasetFile, temp_dataset, - "View" + str(viewIndex), - "View" + str(newViewIndex), usedIndices) - newViewName = \ - temp_dataset.get("View" + str(newViewIndex)).attrs["name"] - if type(newViewName) == bytes: - temp_dataset.get("View" + str(newViewIndex)).attrs[ - "name"] = newViewName.decode("utf-8") - - newViewIndex += 1 + for asked_view_name in views: + for view_index in range(dataset_file.get("Metadata").attrs["nbView"]): + view_name = dataset_file.get("View" + str(view_index)).attrs["name"] + if type(view_name) == bytes: + view_name = view_name.decode("utf-8") + if view_name == asked_view_name: + copyhdf5_dataset(dataset_file, temp_dataset, + "View" + str(view_index), + "View" + str(new_view_index), used_indices) + new_view_name = \ + temp_dataset.get("View" + str(new_view_index)).attrs["name"] + if type(new_view_name) == bytes: + temp_dataset.get("View" + str(new_view_index)).attrs[ + "name"] = new_view_name.decode("utf-8") + + new_view_index += 1 else: pass temp_dataset.get("Metadata").attrs["nbView"] = len(views) -def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, - destinationDatasetName, usedIndices): +def copyhdf5_dataset(source_data_file, destination_data_file, source_dataset_name, + destination_dataset_name, used_indices): """Used to copy a view in a new dataset file using only the examples of usedIndices, and copying the args""" - newDset = destinationDataFile.create_dataset(destinationDatasetName, - data=sourceDataFile.get( - sourceDatasetName).value[ - usedIndices, :]) - if "sparse" in sourceDataFile.get(sourceDatasetName).attrs.keys() and \ - sourceDataFile.get(sourceDatasetName).attrs["sparse"]: + new_d_set = destination_data_file.create_dataset(destination_dataset_name, + data=source_data_file.get( + source_dataset_name).value[ + used_indices, :]) + if "sparse" in source_data_file.get(source_dataset_name).attrs.keys() and \ + source_data_file.get(source_dataset_name).attrs["sparse"]: # TODO : Support sparse pass else: - for key, value in sourceDataFile.get(sourceDatasetName).attrs.items(): - newDset.attrs[key] = value + for key, value in source_data_file.get(source_dataset_name).attrs.items(): + new_d_set.attrs[key] = value -def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, - randomState, full=False, add_noise=False, noise_std=0.15,): +def get_classicDBhdf5(views, path_f, name_DB, nb_class, asked_labels_names, + random_state, full=False, add_noise=False, noise_std=0.15,): """Used to load a hdf5 database""" if full: - datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") - dataset_name = nameDB - labelsDictionary = dict( - (labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in - enumerate(datasetFile.get("Labels").attrs["names"])) + dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + dataset_name = name_DB + labels_dictionary = dict( + (label_index, label_name.decode("utf-8")) for label_index, label_name in + enumerate(dataset_file.get("Labels").attrs["names"])) else: - askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in - askedLabelsNames] - baseDatasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") - fullLabels = baseDatasetFile.get("Labels").value - datasetFile = h5py.File(pathF + nameDB + "_temp_view_label_select.hdf5", + asked_labels_names = [asked_label_name.encode("utf8") for asked_label_name in + asked_labels_names] + base_dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + full_labels = base_dataset_file.get("Labels").value + dataset_file = h5py.File(path_f + name_DB + "_temp_view_label_select.hdf5", "w") - dataset_name = nameDB + "_temp_view_label_select" - baseDatasetFile.copy("Metadata", datasetFile) - labelsSet = getClasses(fullLabels) - availableLabelsNames = list( - baseDatasetFile.get("Labels").attrs["names"]) - askedLabelsNames, askedLabelsNamesSet = fillLabelNames(NB_CLASS, - askedLabelsNames, - randomState, - availableLabelsNames) - - newLabels, newLabelsNames, usedIndices = filterLabels(labelsSet, - askedLabelsNamesSet, - fullLabels, - availableLabelsNames, - askedLabelsNames) - datasetFile.get("Metadata").attrs["datasetLength"] = len(usedIndices) - datasetFile.get("Metadata").attrs["nbClass"] = NB_CLASS - datasetFile.create_dataset("Labels", data=newLabels) - datasetFile.get("Labels").attrs["names"] = newLabelsNames - filterViews(baseDatasetFile, datasetFile, views, usedIndices) - - labelsDictionary = dict( + dataset_name = name_DB + "_temp_view_label_select" + base_dataset_file.copy("Metadata", dataset_file) + labels_set = get_classes(full_labels) + available_labels_names = list( + base_dataset_file.get("Labels").attrs["names"]) + asked_labels_names, asked_labels_names_set = fill_label_names(nb_class, + asked_labels_names, + random_state, + available_labels_names) + + new_labels, new_labels_names, used_indices = filter_labels(labels_set, + asked_labels_names_set, + full_labels, + available_labels_names, + asked_labels_names) + dataset_file.get("Metadata").attrs["datasetLength"] = len(used_indices) + dataset_file.get("Metadata").attrs["nbClass"] = nb_class + dataset_file.create_dataset("Labels", data=new_labels) + dataset_file.get("Labels").attrs["names"] = new_labels_names + filter_views(base_dataset_file, dataset_file, views, used_indices) + + labels_dictionary = dict( (labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in - enumerate(datasetFile.get("Labels").attrs["names"])) - datasetFile.close() - datasetFile = h5py.File(pathF + nameDB + "_temp_view_label_select.hdf5", + enumerate(dataset_file.get("Labels").attrs["names"])) + dataset_file.close() + dataset_file = h5py.File(path_f + name_DB + "_temp_view_label_select.hdf5", "r") if add_noise: - datasetFile, dataset_name = add_gaussian_noise(datasetFile, randomState, - pathF, dataset_name, - noise_std) + dataset_file, dataset_name = add_gaussian_noise(dataset_file, random_state, + path_f, dataset_name, + noise_std) else: pass - return datasetFile, labelsDictionary, dataset_name + return dataset_file, labels_dictionary, dataset_name def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 6c066e1cdcf6ff4753179e2f138278f13c152185..8119b3eca8454fb33a62447446ce722d173ef860 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -32,8 +32,8 @@ def searchBestSettings(dataset, labels, classifier_module, classifier_name, return bestSettings # or well set clasifier ? -def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, - **kwargs): +def grid_search(dataset, classifier_name, views_indices=None, k_folds=None, n_iter=1, + **kwargs): """Used to perfom gridsearch on the classifiers""" pass @@ -82,9 +82,9 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None) estimator.fit(X, y, available_indices[train_indices]) test_folds_prediction.append( estimator.predict(X, available_indices[test_indices])) - minFoldLength = fold_lengths.min() + min_fold_length = fold_lengths.min() test_folds_prediction = np.array( - [test_fold_prediction[:minFoldLength] for test_fold_prediction in + [test_fold_prediction[:min_fold_length] for test_fold_prediction in test_folds_prediction]) return test_folds_prediction @@ -93,21 +93,21 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None): estimator = getattr(classifier_module, classifier_name)(random_state, - **classifier_kwargs) + **classifier_kwargs) params_dict = estimator.genDistribs() if params_dict: - metricModule = getattr(metrics, metric[0]) + metric_module = getattr(metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in + metric_kargs = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: - metricKWARGS = {} - scorer = metricModule.get_scorer(**metricKWARGS) + metric_kargs = {} + scorer = metric_module.get_scorer(**metric_kargs) nb_possible_combinations = compute_possible_combinations(params_dict) min_list = np.array( [min(nb_possible_combination, n_iter) for nb_possible_combination in nb_possible_combinations]) - randomSearch = MultiviewCompatibleRandomizedSearchCV(estimator, + random_search = MultiviewCompatibleRandomizedSearchCV(estimator, n_iter=int(np.sum(min_list)), param_distributions=params_dict, refit=True, @@ -115,24 +115,24 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie cv=folds, random_state=random_state, learning_indices=learning_indices, view_indices=view_indices, - framework = framework) - detector = randomSearch.fit(X, y) + framework=framework) + detector = random_search.fit(X, y) - bestParams = dict((key, value) for key, value in + best_params = dict((key, value) for key, value in estimator.genBestParams(detector).items() if key is not "random_state") - scoresArray = detector.cv_results_['mean_test_score'] + scores_array = detector.cv_results_['mean_test_score'] params = estimator.genParamsFromDetector(detector) - genHeatMaps(params, scoresArray, output_file_name) + gen_heat_maps(params, scores_array, output_file_name) best_estimator = detector.best_estimator_ else: best_estimator = estimator - bestParams = {} - testFoldsPreds = get_test_folds_preds(X, y, folds, best_estimator, + best_params = {} + test_folds_preds = get_test_folds_preds(X, y, folds, best_estimator, framework, learning_indices) - return bestParams, testFoldsPreds + return best_params, test_folds_preds from sklearn.base import clone @@ -210,131 +210,128 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): if self.framework =="multiview": estimator.fit(X, y, self.available_indices[train_indices]) test_folds_prediction.append(estimator.predict(X, self.available_indices[test_indices])) - minFoldLength = fold_lengths.min() + min_fold_length = fold_lengths.min() test_folds_prediction = np.array( - [test_fold_prediction[:minFoldLength] for test_fold_prediction in test_folds_prediction]) + [test_fold_prediction[:min_fold_length] for test_fold_prediction in test_folds_prediction]) return test_folds_prediction - - - -def randomizedSearch(dataset, labels, classifierPackage, classifierName, - metrics_list, learningIndices, KFolds, randomState, - viewsIndices=None, nIter=1, - nbCores=1, **classificationKWARGS): +def randomizedSearch(dataset, labels, classifier_package, classifier_name, + metrics_list, learning_indices, k_folds, random_state, + views_indices=None, n_iter=1, + nb_cores=1, **classification_kargs): """Used to perform a random search on the classifiers to optimize hyper parameters""" - if viewsIndices is None: - viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) + if views_indices is None: + views_indices = range(dataset.get("Metadata").attrs["nbView"]) metric = metrics_list[0] - metricModule = getattr(metrics, metric[0]) + metric_module = getattr(metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in + metric_kargs = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: - metricKWARGS = {} - classifierModule = getattr(classifierPackage, classifierName + "Module") - classifierClass = getattr(classifierModule, classifierName + "Class") - if classifierName != "Mumbo": - paramsSets = classifierModule.genParamsSets(classificationKWARGS, - randomState, nIter=nIter) - if metricModule.getConfig()[-14] == "h": - baseScore = -1000.0 - isBetter = "higher" + metric_kargs = {} + classifier_module = getattr(classifier_package, classifier_name + "Module") + classifier_class = getattr(classifier_module, classifier_name + "Class") + if classifier_name != "Mumbo": + params_sets = classifier_module.gen_params_sets(classification_kargs, + random_state, n_iter=n_iter) + if metric_module.getConfig()[-14] == "h": + base_score = -1000.0 + is_better = "higher" else: - baseScore = 1000.0 - isBetter = "lower" - bestSettings = None - kFolds = KFolds.split(learningIndices, labels[learningIndices]) - for paramsSet in paramsSets: + base_score = 1000.0 + is_better = "lower" + best_settings = None + kk_folds = k_folds.split(learning_indices, labels[learning_indices]) + for params_set in params_sets: scores = [] - for trainIndices, testIndices in kFolds: - classifier = classifierClass(randomState, NB_CORES=nbCores, - **classificationKWARGS) - classifier.setParams(paramsSet) + for trainIndices, testIndices in kk_folds: + classifier = classifier_class(random_state, nb_scors=nb_cores, + **classification_kargs) + classifier.setParams(params_set) classifier.fit_hdf5(dataset, labels, - trainIndices=learningIndices[trainIndices], - viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(dataset, usedIndices= - learningIndices[testIndices], - viewsIndices=viewsIndices) - testScore = metricModule.score( - labels[learningIndices[testIndices]], testLabels) - scores.append(testScore) - crossValScore = np.mean(np.array(scores)) - - if isBetter == "higher" and crossValScore > baseScore: - baseScore = crossValScore - bestSettings = paramsSet - elif isBetter == "lower" and crossValScore < baseScore: - baseScore = crossValScore - bestSettings = paramsSet - classifier = classifierClass(randomState, NB_CORES=nbCores, - **classificationKWARGS) - classifier.setParams(bestSettings) + trainIndices=learning_indices[trainIndices], + viewsIndices=views_indices) + test_labels = classifier.predict_hdf5(dataset, + used_indices=learning_indices[testIndices], + views_indices=views_indices) + test_score = metric_module.score( + labels[learning_indices[testIndices]], test_labels) + scores.append(test_score) + cross_val_score = np.mean(np.array(scores)) + + if is_better == "higher" and cross_val_score > base_score: + base_score = cross_val_score + best_settings = params_set + elif is_better == "lower" and cross_val_score < base_score: + base_score = cross_val_score + best_settings = params_set + classifier = classifier_class(random_state, nb_cores=nb_cores, + **classification_kargs) + classifier.setParams(best_settings) # TODO : This must be corrected else: - bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, labels, - viewsIndices, - classificationKWARGS, - learningIndices, - randomState, - metric=metric, - nIter=nIter) - classificationKWARGS["classifiersConfigs"] = bestConfigs - classifier = classifierClass(randomState, NB_CORES=nbCores, - **classificationKWARGS) + best_configs, _ = classifier_module.grid_search_hdf5(dataset, labels, + views_indices, + classification_kargs, + learning_indices, + random_state, + metric=metric, + nI_iter=n_iter) + classification_kargs["classifiersConfigs"] = best_configs + classifier = classifier_class(random_state, nb_cores=nb_cores, + **classification_kargs) return classifier -def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, - **kwargs): +def spear_mint(dataset, classifier_name, views_indices=None, k_folds=None, n_iter=1, + **kwargs): """Used to perform spearmint on the classifiers to optimize hyper parameters, longer than randomsearch (can't be parallelized)""" pass -def genHeatMaps(params, scoresArray, outputFileName): +def gen_heat_maps(params, scores_array, output_file_name): """Used to generate a heat map for each doublet of hyperparms optimized on the previous function""" - nbParams = len(params) - if nbParams > 2: - combinations = itertools.combinations(range(nbParams), 2) - elif nbParams == 2: + nb_params = len(params) + if nb_params > 2: + combinations = itertools.combinations(range(nb_params), 2) + elif nb_params == 2: combinations = [(0, 1)] else: combinations = [()] for combination in combinations: if combination: - paramName1, paramArray1 = params[combination[0]] - paramName2, paramArray2 = params[combination[1]] + param_name1, param_array1 = params[combination[0]] + param_name2, param_array2 = params[combination[1]] else: - paramName1, paramArray1 = params[0] - paramName2, paramArray2 = ("Control", np.array([0])) + param_name1, param_array1 = params[0] + param_name2, param_array2 = ("Control", np.array([0])) - paramArray1Set = np.sort(np.array(list(set(paramArray1)))) - paramArray2Set = np.sort(np.array(list(set(paramArray2)))) + param_array1_set = np.sort(np.array(list(set(param_array1)))) + param_array2_set = np.sort(np.array(list(set(param_array2)))) - scoresMatrix = np.zeros( - (len(paramArray2Set), len(paramArray1Set))) - 0.1 - for param1, param2, score in zip(paramArray1, paramArray2, scoresArray): - param1Index, = np.where(paramArray1Set == param1) - param2Index, = np.where(paramArray2Set == param2) - scoresMatrix[int(param2Index), int(param1Index)] = score + scores_matrix = np.zeros( + (len(param_array2_set), len(param_array1_set))) - 0.1 + for param1, param2, score in zip(param_array1, param_array2, scores_array): + param1_index, = np.where(param_array1_set == param1) + param2_index, = np.where(param_array2_set == param2) + scores_matrix[int(param2_index), int(param1_index)] = score plt.figure(figsize=(8, 6)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) - plt.imshow(scoresMatrix, interpolation='nearest', cmap=plt.cm.hot, + plt.imshow(scores_matrix, interpolation='nearest', cmap=plt.cm.hot, ) - plt.xlabel(paramName1) - plt.ylabel(paramName2) + plt.xlabel(param_name1) + plt.ylabel(param_name2) plt.colorbar() - plt.xticks(np.arange(len(paramArray1Set)), paramArray1Set) - plt.yticks(np.arange(len(paramArray2Set)), paramArray2Set, rotation=45) + plt.xticks(np.arange(len(param_array1_set)), param_array1_set) + plt.yticks(np.arange(len(param_array2_set)), param_array2_set, rotation=45) plt.title('Validation metric') plt.savefig( - outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png", transparent=True) + output_file_name + "heat_map-" + param_name1 + "-" + param_name2 + ".png", transparent=True) plt.close() # nohup python ~/dev/git/spearmint/spearmint/main.py . & diff --git a/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py b/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py index 9d4e19bdd0972dc8d7b4f428c906d42d9f1c1488..2e525f2983472feb1b78089bb06f5f7ddd55314d 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py @@ -3,7 +3,7 @@ import itertools import numpy as np -def genMulticlassLabels(labels, multiclassMethod, splits): +def gen_multiclass_labels(labels, multiclass_method, splits): r"""Used to gen the train/test splits and to set up the framework of the adaptation of a multiclass dataset to biclass algorithms. @@ -21,7 +21,7 @@ def genMulticlassLabels(labels, multiclassMethod, splits): ---------- labels : numpy.ndarray Name of the database. - multiclassMethod : string + multiclass_method : string The name of the multiclass method used (oneVersusOne, oneVersusAll, ...). splits : list of lists of numpy.ndarray For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and @@ -29,70 +29,70 @@ def genMulticlassLabels(labels, multiclassMethod, splits): Returns ------- - multiclassLabels : list of lists of numpy.ndarray + multiclass_labels : list of lists of numpy.ndarray For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the indices for the biclass training set, the ones for the biclass testing set and the ones for the multiclass testing set. - labelsIndices : list of lists of numpy.ndarray + labels_indices : list of lists of numpy.ndarray Each original couple of different labels. - indicesMulticlass : list of lists of numpy.ndarray + indices_multiclass : list of lists of numpy.ndarray For each combination, contains a biclass labels numpy.ndarray with the 0/1 labels of combination. """ - if multiclassMethod == "oneVersusOne": - nbLabels = len(set(list(labels))) - if nbLabels == 2: + if multiclass_method == "oneVersusOne": + nb_labels = len(set(list(labels))) + if nb_labels == 2: splits = [[trainIndices for trainIndices, _ in splits], [testIndices for _, testIndices in splits], [[] for _ in splits]] return [labels], [(0, 1)], [splits] else: - combinations = itertools.combinations(np.arange(nbLabels), 2) - multiclassLabels = [] - labelsIndices = [] - indicesMulticlass = [] + combinations = itertools.combinations(np.arange(nb_labels), 2) + multiclass_labels = [] + labels_indices = [] + indices_multiclass = [] for combination in combinations: - labelsIndices.append(combination) - oldIndices = [exampleIndex - for exampleIndex, exampleLabel in + labels_indices.append(combination) + old_indices = [example_index + for example_index, example_label in enumerate(labels) - if exampleLabel in combination] - trainIndices = [np.array([oldIndex for oldIndex in oldIndices if - oldIndex in iterIndices[0]]) + if example_label in combination] + train_indices = [np.array([old_index for old_index in old_indices if + old_index in iterIndices[0]]) for iterIndices in splits] - testIndices = [np.array([oldIndex for oldIndex in oldIndices if - oldIndex in iterindices[1]]) + test_indices = [np.array([old_index for old_index in old_indices if + old_index in iterindices[1]]) for iterindices in splits] - testIndicesMulticlass = [np.array(iterindices[1]) for + test_indices_multiclass = [np.array(iterindices[1]) for iterindices in splits] - indicesMulticlass.append( - [trainIndices, testIndices, testIndicesMulticlass]) - newLabels = np.zeros(len(labels), dtype=int) - 100 + indices_multiclass.append( + [train_indices, test_indices, test_indices_multiclass]) + new_labels = np.zeros(len(labels), dtype=int) - 100 for labelIndex, label in enumerate(labels): if label == combination[0]: - newLabels[labelIndex] = 1 + new_labels[labelIndex] = 1 elif label == combination[1]: - newLabels[labelIndex] = 0 + new_labels[labelIndex] = 0 else: pass - multiclassLabels.append(newLabels) + multiclass_labels.append(new_labels) - elif multiclassMethod == "oneVersusRest": + elif multiclass_method == "oneVersusRest": # TODO : Implement one versus rest if probas are not a problem anymore pass - return multiclassLabels, labelsIndices, indicesMulticlass + return multiclass_labels, labels_indices, indices_multiclass -def genMulticlassMonoviewDecision(monoviewResult, classificationIndices): - learningIndices, validationIndices, testIndicesMulticlass = classificationIndices - multiclassMonoviewDecisions = monoviewResult.full_labels_pred - multiclassMonoviewDecisions[ - testIndicesMulticlass] = monoviewResult.y_test_multiclass_pred - return multiclassMonoviewDecisions +def gen_multiclass_monoview_decision(monoview_result, classification_indices): + learning_indices, validation_indices, test_indices_multiclass = classification_indices + multiclass_monoview_decisions = monoview_result.full_labels_pred + multiclass_monoview_decisions[ + test_indices_multiclass] = monoview_result.y_test_multiclass_pred + return multiclass_monoview_decisions -def isBiclass(multiclass_preds): +def is_biclass(multiclass_preds): if multiclass_preds[0] is []: return True else: diff --git a/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py b/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py index 43833e25ab266ec060dcbf24394c0717cf65abb8..372f62116eb4d2305f7cf5df4596fd02c26a3bc2 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py @@ -5,46 +5,46 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def printMetricScore(metricScores, metrics): - metricScoreString = "\n\n" +def print_metric_score(metric_scores, metrics): + metric_score_string = "\n\n" for metric in metrics: - metricModule = getattr(metrics, metric[0]) + metric_module = getattr(metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in - enumerate(metric[1])) + metric_kwargs = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: - metricKWARGS = {} - metricScoreString += "\tFor " + metricModule.getConfig( - **metricKWARGS) + " : " - metricScoreString += "\n\t\t- Score on train : " + str( - metricScores[metric[0]][0]) - metricScoreString += "\n\t\t- Score on test : " + str( - metricScores[metric[0]][1]) - metricScoreString += "\n\n" - return metricScoreString + metric_kwargs = {} + metric_score_string += "\tFor " + metric_module.getConfig( + **metric_kwargs) + " : " + metric_score_string += "\n\t\t- Score on train : " + str( + metric_scores[metric[0]][0]) + metric_score_string += "\n\t\t- Score on test : " + str( + metric_scores[metric[0]][1]) + metric_score_string += "\n\n" + return metric_score_string -def getTotalMetricScores(metric, trainLabels, testLabels, validationIndices, - learningIndices, labels): - metricModule = getattr(metrics, metric[0]) +def get_total_metric_scores(metric, train_labels, test_labels, validation_indices, + learning_indices, labels): + metric_module = getattr(metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in - enumerate(metric[1])) + metric_kwargs = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: - metricKWARGS = {} - trainScore = metricModule.score(labels[learningIndices], trainLabels, - **metricKWARGS) - testScore = metricModule.score(labels[validationIndices], testLabels, - **metricKWARGS) - return [trainScore, testScore] + metric_kwargs = {} + train_score = metric_module.score(labels[learning_indices], train_labels, + **metric_kwargs) + test_score = metric_module.score(labels[validation_indices], test_labels, + **metric_kwargs) + return [train_score, test_score] -def getMetricsScores(metrics, trainLabels, testLabels, - validationIndices, learningIndices, labels): - metricsScores = {} - for metric in metrics: - metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, - testLabels, - validationIndices, - learningIndices, labels) - return metricsScores +def get_metrics_scores(metrics_var, train_labels, test_labels, + validation_indices, learning_indices, labels): + metrics_scores = {} + for metric in metrics_var: + metrics_scores[metric[0]] = get_total_metric_scores(metric, train_labels, + test_labels, + validation_indices, + learning_indices, labels) + return metrics_scores diff --git a/multiview_platform/mono_multi_view_classifiers/utils/transformations.py b/multiview_platform/mono_multi_view_classifiers/utils/transformations.py index 5d569addfa5f71256eb354f5ea0243be3d1c0657..9d26ddde8bd02fea2ef1176385ac251260027e40 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/transformations.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/transformations.py @@ -1,7 +1,7 @@ import numpy as np -def signLabels(labels): +def sign_labels(labels): if set(labels) == (0, 1): return np.array([label if label != 0 else -1 for label in labels]) else: diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index cd9545cb9371f1b61afa9e709a919fb0f758f12b..3807ba6273c6a0b7367e96252ddafe3e62bf7dc9 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -10,7 +10,7 @@ from ..mono_multi_view_classifiers import exec_classif class Test_initBenchmark(unittest.TestCase): def test_benchmark_wanted(self): - # benchmark_output = ExecClassif.initBenchmark(self.args) + # benchmark_output = ExecClassif.init_benchmark(self.args) self.assertEqual(1, 1) @@ -124,7 +124,7 @@ class Test_execBenchmark(unittest.TestCase): "Classification":{"hps_iter": 1}} def test_simple(cls): - res = exec_classif.execBenchmark(1, 2, 3, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(1, 2, 3, cls.argumentDictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, @@ -137,7 +137,7 @@ class Test_execBenchmark(unittest.TestCase): def test_multiclass_no_iter(cls): cls.argumentDictionaries = [{"a": 10, "args": cls.args}, {"a": 4, "args": cls.args}] - res = exec_classif.execBenchmark(2, 1, 2, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 1, 2, cls.argumentDictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, @@ -152,7 +152,7 @@ class Test_execBenchmark(unittest.TestCase): {"a": 4, "args": cls.args}, {"a": 55, "args": cls.args}, {"a": 24, "args": cls.args}] - res = exec_classif.execBenchmark(2, 2, 2, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 2, 2, cls.argumentDictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, @@ -163,7 +163,7 @@ class Test_execBenchmark(unittest.TestCase): cls.assertEqual(res, 3) def test_no_iter_biclass_multicore(cls): - res = exec_classif.execBenchmark(2, 1, 1, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 1, 1, cls.argumentDictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, @@ -256,7 +256,7 @@ class Test_execOneBenchmark(unittest.TestCase): 1, 2, 1, 1, 2, 1, 21]), ExecMonoview_multicore=fakeExecMono, - ExecMultiview_multicore=fakeExecMulti, + exec_multiview_multicore=fakeExecMulti, initMultiviewArguments=fakeInitMulti) cls.assertEqual(flag, None) @@ -315,7 +315,7 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): flag=None, labels=np.array([0, 1, 2, 3, 4, 2, 2, 12, 1, 2, 1, 1, 2, 1, 21]), ExecMonoview_multicore=fakeExecMono, - ExecMultiview_multicore=fakeExecMulti, + exec_multiview_multicore=fakeExecMulti, initMultiviewArguments=fakeInitMulti) cls.assertEqual(flag, None) diff --git a/multiview_platform/versions.py b/multiview_platform/versions.py index 39d9af6f5a9714a075348a622e087bf1249de355..23661197ccf84cbaa7fa3acd0f6fcc2376efc449 100644 --- a/multiview_platform/versions.py +++ b/multiview_platform/versions.py @@ -3,10 +3,10 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def testVersions(): +def test_versions(): """Used to test if all prerequisites are installed""" - isUpToDate = True - toInstall = [] + is_up_to_date = True + to_install = [] try: import sys @@ -16,81 +16,81 @@ def testVersions(): try: import cvxopt except ImportError: - isUpToDate = False - toInstall.append("cvxopt") + is_up_to_date = False + to_install.append("cvxopt") try: import pyscm except ImportError: - isUpToDate = False - toInstall.append("pyscm") + is_up_to_date = False + to_install.append("pyscm") try: import numpy except ImportError: - isUpToDate = False - toInstall.append("numpy") + is_up_to_date = False + to_install.append("numpy") try: import scipy except ImportError: - isUpToDate = False - toInstall.append("scipy") + is_up_to_date = False + to_install.append("scipy") try: import matplotlib except ImportError: - isUpToDate = False - toInstall.append("matplotlib") + is_up_to_date = False + to_install.append("matplotlib") try: import sklearn except ImportError: - isUpToDate = False - toInstall.append("sklearn") + is_up_to_date = False + to_install.append("sklearn") try: import logging except ImportError: - isUpToDate = False - toInstall.append("logging") + is_up_to_date = False + to_install.append("logging") try: import joblib except ImportError: - isUpToDate = False - toInstall.append("joblib") + is_up_to_date = False + to_install.append("joblib") try: import argparse except ImportError: - isUpToDate = False - toInstall.append("argparse") + is_up_to_date = False + to_install.append("argparse") try: import h5py # except ImportError: - isUpToDate = False - toInstall.append("h5py") + is_up_to_date = False + to_install.append("h5py") # try: # import graphviz # # except ImportError: - # isUpToDate = False - # toInstall.append("graphviz") + # is_up_to_date = False + # to_install.append("graphviz") try: import pickle # except ImportError: - isUpToDate = False - toInstall.append("pickle") + is_up_to_date = False + to_install.append("pickle") - if not isUpToDate: + if not is_up_to_date: print( "You can't run at the moment, please install the following modules : \n" + "\n".join( - toInstall)) + to_install)) quit() if __name__ == "__main__": - testVersions() + test_versions() diff --git a/requirements.txt b/requirements.txt index d64a21ee8f5a1a6af8b36de9bcdf97dbd96ad9aa..e165233e7213a44182001cf71dd874be8b6479f7 100755 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ six==1.12.0 pandas==0.23.3 m2r==0.2.1 docutils==0.12 -pyyaml==3.12 \ No newline at end of file +pyyaml==3.12 diff --git a/setup.py b/setup.py index 71baa7adce8f806d199d37837581f6fa74a39b2f..4879c8d6ccee7ec7bf363324f7fa8c88f5d2068e 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def setup_package(): # Une url qui pointe vers la page officielle de votre lib url='http://github.com/babau1/multiview-machine-learning-omis/', install_requires=['numpy>=1.8', 'scipy>=0.16','scikit-learn==0.19', - 'h5py', 'joblib', 'pyscm', 'pandas', 'm2r', 'pyyaml'], + 'matplotlib', 'h5py', 'joblib', 'pyscm', 'pandas', 'm2r', 'pyyaml'], # Il est d'usage de mettre quelques metadata à propos de sa lib # Pour que les robots puissent facilement la classer. # La liste des marqueurs autorisées est longue: