diff --git a/config_files/config.yml b/config_files/config.yml index bf55f8fd1a7241a3d07e4278e8e3a1b2d9a6eec5..b22cd1855bdc29cf024e1b6d565b464ce07abff2 100644 --- a/config_files/config.yml +++ b/config_files/config.yml @@ -5,7 +5,7 @@ Base : label: "_" type: ".hdf5" views: ["all"] - pathf: "../Data/" + pathf: "../data/" nice: 0 random_state: 42 nb_cores: 1 @@ -13,7 +13,7 @@ Base : debug: True add_noise: False noise_std: 0.0 - res_dir: "../Results/" + res_dir: "../results/" # All the classification-realted configuration options Classification: diff --git a/data/Plausible.hdf5 b/data/Plausible.hdf5 index 947a30ea7b8b7213f075b8c03149c4efd0e5df28..4f10a2ad8f524e8692771be0ab2f3f3709f37c16 100644 Binary files a/data/Plausible.hdf5 and b/data/Plausible.hdf5 differ diff --git a/multiview_platform/execute.py b/multiview_platform/execute.py index 53d4fcc9fdb31920c40ae1802db43fb17f7058e3..a142cb88fa45fb4f904bb71a97295b50f02195a5 100644 --- a/multiview_platform/execute.py +++ b/multiview_platform/execute.py @@ -2,12 +2,12 @@ def exec(): - import versions - versions.testVersions() + import multiview_platform.versions as versions + versions.test_versions() import sys - from mono_multi_view_classifiers import exec_classif - exec_classif.execClassif(sys.argv[1:]) + from multiview_platform.mono_multi_view_classifiers import exec_classif + exec_classif.exec_classif(sys.argv[1:]) if __name__ == "__main__": diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index acec240c3933480dab089d2f00d21abec6569169..93c6f75964a477927c51adb142251d107d476e53 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -10,40 +10,43 @@ import itertools import numpy as np from joblib import Parallel, delayed from sklearn.tree import DecisionTreeClassifier - -matplotlib.use( - 'Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure - # Import own modules from . import monoview_classifiers from . import multiview_classifiers -from .multiview.exec_multiview import ExecMultiview, ExecMultiview_multicore -from .monoview.exec_classif_mono_view import ExecMonoview, ExecMonoview_multicore +from .multiview.exec_multiview import exec_multiview, exec_multiview_multicore +from .monoview.exec_classif_mono_view import exec_monoview, exec_monoview_multicore from .utils import get_multiview_db as DB -from .result_analysis import \ - getResults, plot_results_noise # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, +from .result_analysis import get_results +from .result_analysis import plot_results_noise +# resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, from .utils import execution, dataset, multiclass, configuration +matplotlib.use( + 'Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure + + + # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): +def init_benchmark(cl_type, monoview_algos, multiview_algos, args): r"""Used to create a list of all the algorithm packages names used for the benchmark. - First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right + First this function will check if the benchmark need mono- or/and multiview + algorithms and adds to the right dictionary the asked algorithms. If none is asked by the user, all will be added. If the keyword `"Benchmark"` is used, all mono- and multiview algorithms will be added. Parameters ---------- - CL_type : List of string + cl_type : List of string List of types of needed benchmark - multiviewAlgos : List of strings + multiview_algos : List of strings List of multiview algorithms needed for the benchmark - monoviewAlgos : Listof strings + monoview_algos : Listof strings List of monoview algorithms needed for the benchmark args : ParsedArgumentParser args All the input args (used to tune the algorithms) @@ -54,21 +57,21 @@ def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ benchmark = {"monoview": {}, "multiview": {}} - allMultiviewPackages = [name for _, name, isPackage + all_multiview_packages = [name for _, name, isPackage in pkgutil.iter_modules( ['./mono_multi_view_classifiers/multiview_classifiers/']) if isPackage] - if "monoview" in CL_type: - if monoviewAlgos == ['all']: + if "monoview" in cl_type: + if monoview_algos == ['all']: benchmark["monoview"] = [name for _, name, isPackage in pkgutil.iter_modules([ "./mono_multi_view_classifiers/monoview_classifiers"]) if not isPackage] else: - benchmark["monoview"] = monoviewAlgos + benchmark["monoview"] = monoview_algos - if "multiview" in CL_type: + if "multiview" in cl_type: benchmark["multiview"] = [name for _, name, isPackage in pkgutil.iter_modules([ "./mono_multi_view_classifiers/multiview_classifiers"]) @@ -76,12 +79,12 @@ def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): return benchmark -def genViewsDictionnary(DATASET, views): +def gen_views_dictionnary(dataset_var, views): r"""Used to generate a dictionary mapping a view name (key) to it's index in the dataset (value). Parameters ---------- - DATASET : `h5py` dataset file + dataset_var : `h5py` dataset file The full dataset on which the benchmark will be done views : List of strings Names of the selected views on which the banchmark will be done @@ -91,21 +94,21 @@ def genViewsDictionnary(DATASET, views): viewDictionary : Dictionary Dictionary mapping the view names totheir indexin the full dataset. """ - datasetsNames = DATASET.keys() - viewsDictionary = {} - for datasetName in datasetsNames: - if datasetName[:4] == "View": - viewName = DATASET.get(datasetName).attrs["name"] - if type(viewName) == bytes: - viewName = viewName.decode("utf-8") - if viewName in views: - viewsDictionary[viewName] = int(datasetName[4:]) + datasets_names = dataset_var.keys() + views_dictionary = {} + for dataset_name in datasets_names: + if dataset_name[:4] == "View": + view_name = dataset_var.get(dataset_name).attrs["name"] + if type(view_name) == bytes: + view_name = view_name.decode("utf-8") + if view_name in views: + views_dictionary[view_name] = int(dataset_name[4:]) - return viewsDictionary + return views_dictionary def init_argument_dictionaries(benchmark, views_dictionary, - nb_class, init_kwargs): + nb_class, init_kwargs): argument_dictionaries = {"monoview": [], "multiview": []} if benchmark["monoview"]: argument_dictionaries["monoview"] = init_monoview_exps( @@ -155,7 +158,7 @@ def init_monoview_exps(classifier_names, All types of monoview and multiview experiments that have to be benchmarked argument_dictionaries : dictionary Maps monoview and multiview experiments arguments. - viewDictionary : dictionary + views_dictionary : dictionary Maps the view names to their index in the HDF5 dataset nb_class : integer Number of different labels in the classification @@ -264,11 +267,11 @@ def is_dict_in(dictionary): return paths -def gen_multiple_kwargs_combinations(clKWARGS): - values = list(clKWARGS.values()) +def gen_multiple_kwargs_combinations(cl_kwrags): + values = list(cl_kwrags.values()) listed_values = [[_] if type(_) is not list else _ for _ in values] values_cartesian_prod = [_ for _ in itertools.product(*listed_values)] - keys = clKWARGS.keys() + keys = cl_kwrags.keys() kwargs_combination = [dict((key, value) for key, value in zip(keys, values)) for values in values_cartesian_prod] @@ -282,14 +285,14 @@ def gen_multiple_kwargs_combinations(clKWARGS): return kwargs_combination, reduced_kwargs_combination -def gen_multiple_args_dictionnaries(nb_class, kwargsInit, classifier, +def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, view_name=None, view_index=None, views_dictionary=None, framework="monoview"): if framework=="multiview": - classifier_config = get_path_dict(kwargsInit[classifier]) + classifier_config = get_path_dict(kwargs_init[classifier]) else: - classifier_config = kwargsInit[classifier] + classifier_config = kwargs_init[classifier] multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations(classifier_config) multiple_kwargs_dict = dict( (classifier+"_"+"_".join(map(str,list(reduced_dictionary.values()))), dictionary) @@ -309,14 +312,14 @@ def gen_multiple_args_dictionnaries(nb_class, kwargsInit, classifier, return args_dictionnaries -def init_monoview_kwargs(args, classifiersNames): +def init_kwargs(args, classifiers_names): r"""Used to init kwargs thanks to a function in each monoview classifier package. Parameters ---------- args : parsed args objects All the args passed by the user. - classifiersNames : list of strings + classifiers-names : list of strings List of the benchmarks's monoview classifiers names. Returns @@ -328,7 +331,7 @@ def init_monoview_kwargs(args, classifiersNames): logging.debug("Start:\t Initializing monoview classifiers arguments") monoviewKWARGS = {} - for classifiersName in classifiersNames: + for classifiersName in classifiers_names: try: getattr(monoview_classifiers, classifiersName) except AttributeError: @@ -342,9 +345,9 @@ def init_monoview_kwargs(args, classifiersNames): return monoviewKWARGS -def initKWARGSFunc(args, benchmark): - monoview_kwargs = init_monoview_kwargs(args, benchmark["monoview"]) - multiview_kwargs = init_multiview_kwargs(args, benchmark["multiview"]) +def init_kwargs_func(args, benchmark): + monoview_kwargs = init_kwargs(args, benchmark["monoview"]) + multiview_kwargs = init_kwargs(args, benchmark["multiview"]) kwargs = {"monoview":monoview_kwargs, "multiview":multiview_kwargs} return kwargs @@ -365,43 +368,43 @@ def init_multiview_kwargs(args, classifiers_names): return multiview_kwargs -def initMultiviewArguments(args, benchmark, views, viewsIndices, - argumentDictionaries, randomState, directory, - resultsMonoview, classificationIndices): +def init_multiview_arguments(args, benchmark, views, views_indices, + argument_dictionaries, random_state, directory, + results_monoview, classification_indices): """Used to add each monoview exeperience args to the list of monoview experiences args""" logging.debug("Start:\t Initializing multiview classifiers arguments") - multiviewArguments = [] + multiview_arguments = [] if "multiview" in benchmark: - for multiviewAlgoName in benchmark["multiview"]: - mutliviewModule = getattr(multiview_classifiers, - multiviewAlgoName) + for multiview_algo_name in benchmark["multiview"]: + mutliview_module = getattr(multiview_classifiers, + multiview_algo_name) - multiviewArguments += mutliviewModule.getArgs(args, benchmark, - views, viewsIndices, - randomState, + multiview_arguments += mutliview_module.getArgs(args, benchmark, + views, views_indices, + random_state, directory, - resultsMonoview, - classificationIndices) - argumentDictionaries["multiview"] = multiviewArguments + results_monoview, + classification_indices) + argument_dictionaries["multiview"] = multiview_arguments logging.debug("Start:\t Initializing multiview classifiers arguments") - return argumentDictionaries + return argument_dictionaries -def arangeMetrics(metrics, metricPrinc): +def arange_metrics(metrics, metric_princ): """Used to get the metrics list in the right order so that the first one is the principal metric specified in args""" - if [metricPrinc] in metrics: - metricIndex = metrics.index([metricPrinc]) - firstMetric = metrics[0] - metrics[0] = [metricPrinc] - metrics[metricIndex] = firstMetric + if [metric_princ] in metrics: + metric_index = metrics.index([metric_princ]) + first_metric = metrics[0] + metrics[0] = [metric_princ] + metrics[metric_index] = first_metric else: - raise AttributeError(metricPrinc + " not in metric pool") + raise AttributeError(metric_princ + " not in metric pool") return metrics -def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, - kFolds): +def benchmark_init(directory, classification_indices, labels, labels_dictionary, + k_folds): logging.debug("Start:\t Benchmark initialization") if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): try: @@ -409,56 +412,56 @@ def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, except OSError as exc: if exc.errno != errno.EEXIST: raise - trainIndices = classificationIndices[0] - trainLabels = labels[trainIndices] - np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",") - np.savetxt(directory + "train_indices.csv", classificationIndices[0], + train_indices = classification_indices[0] + train_labels = labels[train_indices] + np.savetxt(directory + "train_labels.csv", train_labels, delimiter=",") + np.savetxt(directory + "train_indices.csv", classification_indices[0], delimiter=",") - resultsMonoview = [] - folds = kFolds.split(np.arange(len(trainLabels)), trainLabels) - minFoldLen = int(len(trainLabels) / kFolds.n_splits) - for foldIndex, (trainCVIndices, testCVIndices) in enumerate(folds): - fileName = directory + "/folds/test_labels_fold_" + str( - foldIndex) + ".csv" - if not os.path.exists(os.path.dirname(fileName)): + results_monoview = [] + folds = k_folds.split(np.arange(len(train_labels)), train_labels) + min_fold_len = int(len(train_labels) / k_folds.n_splits) + for fold_index, (train_cv_indices, test_cv_indices) in enumerate(folds): + file_name = directory + "/folds/test_labels_fold_" + str( + fold_index) + ".csv" + if not os.path.exists(os.path.dirname(file_name)): try: - os.makedirs(os.path.dirname(fileName)) + os.makedirs(os.path.dirname(file_name)) except OSError as exc: if exc.errno != errno.EEXIST: raise - np.savetxt(fileName, trainLabels[testCVIndices[:minFoldLen]], + np.savetxt(file_name, train_labels[test_cv_indices[:min_fold_len]], delimiter=",") - labelsNames = list(LABELS_DICTIONARY.values()) + labels_names = list(labels_dictionary.values()) logging.debug("Done:\t Benchmark initialization") - return resultsMonoview, labelsNames + return results_monoview, labels_names -def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, - classificationIndices=None, args=None, - kFolds=None, randomState=None, hyperParamSearch=None, - metrics=None, argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, flag=None, +def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None, + classification_indices=None, args=None, + k_folds=None, random_state=None, hyper_param_search=None, + metrics=None, argument_dictionaries=None, + benchmark=None, views=None, views_indices=None, flag=None, labels=None, - ExecMonoview_multicore=ExecMonoview_multicore, - ExecMultiview_multicore=ExecMultiview_multicore, - initMultiviewArguments=initMultiviewArguments): + exec_monoview_multicore=exec_monoview_multicore, + exec_multiview_multicore=exec_multiview_multicore, + init_multiview_arguments=init_multiview_arguments): """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and - ExecMultiview_multicore args are only used for tests""" + exec_multiview_multicore args are only used for tests""" - resultsMonoview, labelsNames = benchmarkInit(directory, - classificationIndices, labels, - LABELS_DICTIONARY, kFolds) + results_monoview, labels_names = benchmark_init(directory, + classification_indices, labels, + labels_dictionary, k_folds) logging.debug("Start:\t monoview benchmark") - resultsMonoview += [ - ExecMonoview_multicore(directory, args["Base"]["name"], labelsNames, - classificationIndices, kFolds, - coreIndex, args["Base"]["type"], args["Base"]["pathf"], randomState, + results_monoview += [ + exec_monoview_multicore(directory, args["Base"]["name"], labels_names, + classification_indices, k_folds, + core_index, args["Base"]["type"], args["Base"]["pathf"], random_state, labels, - hyperParamSearch=hyperParamSearch, + hyperParamSearch=hyper_param_search, metrics=metrics, nIter=args["Classification"]["hps_iter"], **argument) - for argument in argumentDictionaries["Monoview"]] + for argument in argument_dictionaries["Monoview"]] logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") @@ -471,53 +474,53 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") - resultsMultiview = [ - ExecMultiview_multicore(directory, coreIndex, args["Base"]["name"], - classificationIndices, kFolds, args["Base"]["type"], - args["Base"]["pathf"], LABELS_DICTIONARY, randomState, - labels, hyperParamSearch=hyperParamSearch, + results_multiview = [ + exec_multiview_multicore(directory, core_index, args["Base"]["name"], + classification_indices, k_folds, args["Base"]["type"], + args["Base"]["pathf"], labels_dictionary, random_state, + labels, hyper_param_search=hyper_param_search, metrics=metrics, nIter=args["Classification"]["hps_iter"], **arguments) - for arguments in argumentDictionaries["multiview"]] + for arguments in argument_dictionaries["multiview"]] logging.debug("Done:\t multiview benchmark") - return [flag, resultsMonoview + resultsMultiview] + return [flag, results_monoview + results_multiview] -def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, - directory=None, classificationIndices=None, - args=None, - kFolds=None, randomState=None, - hyperParamSearch=None, metrics=None, - argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, - flag=None, labels=None, - ExecMonoview_multicore=ExecMonoview_multicore, - ExecMultiview_multicore=ExecMultiview_multicore, - initMultiviewArguments=initMultiviewArguments): +def exec_one_benchmark_multicore(nb_cores=-1, labels_dictionary=None, + directory=None, classification_indices=None, + args=None, + k_folds=None, random_state=None, + hyper_param_search=None, metrics=None, + argument_dictionaries=None, + benchmark=None, views=None, viewsIndices=None, + flag=None, labels=None, + exec_monoview_multicore=exec_monoview_multicore, + exec_multiview_multicore=exec_multiview_multicore, + init_multiview_arguments=init_multiview_arguments): """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and - ExecMultiview_multicore args are only used for tests""" + exec_multiview_multicore args are only used for tests""" - resultsMonoview, labelsNames = benchmarkInit(directory, - classificationIndices, labels, - LABELS_DICTIONARY, kFolds) + results_monoview, labels_names = benchmark_init(directory, + classification_indices, labels, + labels_dictionary, k_folds) logging.debug("Start:\t monoview benchmark") - nbExperiments = len(argumentDictionaries["monoview"]) - nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) - for stepIndex in range(nbMulticoreToDo): - resultsMonoview += (Parallel(n_jobs=nbCores)( - delayed(ExecMonoview_multicore)(directory, args["Base"]["name"], labelsNames, - classificationIndices, kFolds, - coreIndex, args["Base"]["type"], args["Base"]["pathf"], - randomState, labels, - hyperParamSearch=hyperParamSearch, + nb_experiments = len(argument_dictionaries["monoview"]) + nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores)) + for step_index in range(nb_multicore_to_do): + results_monoview += (Parallel(n_jobs=nb_cores)( + delayed(exec_monoview_multicore)(directory, args["Base"]["name"], labels_names, + classification_indices, k_folds, + core_index, args["Base"]["type"], args["Base"]["pathf"], + random_state, labels, + hyper_param_search=hyper_param_search, metrics=metrics, nIter=args["Classification"]["hps_iter"], - **argumentDictionaries["monoview"][ - coreIndex + stepIndex * nbCores]) - for coreIndex in - range(min(nbCores, nbExperiments - stepIndex * nbCores)))) + **argument_dictionaries["monoview"][ + core_index + step_index * nb_cores]) + for core_index in + range(min(nb_cores, nb_experiments - step_index * nb_cores)))) logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") @@ -530,53 +533,53 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") - resultsMultiview = [] - nbExperiments = len(argumentDictionaries["multiview"]) - nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) - for stepIndex in range(nbMulticoreToDo): - resultsMultiview += Parallel(n_jobs=nbCores)( - delayed(ExecMultiview_multicore)(directory, coreIndex, args["Base"]["name"], - classificationIndices, kFolds, + results_multiview = [] + nb_experiments = len(argument_dictionaries["multiview"]) + nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores)) + for step_index in range(nb_multicore_to_do): + results_multiview += Parallel(n_jobs=nb_cores)( + delayed(exec_multiview_multicore)(directory, core_index, args["Base"]["name"], + classification_indices, k_folds, args["Base"]["type"], args["Base"]["pathf"], - LABELS_DICTIONARY, randomState, + labels_dictionary, random_state, labels, - hyperParamSearch=hyperParamSearch, + hyper_param_search=hyper_param_search, metrics=metrics, nIter=args["Classification"]["hps_iter"], ** - argumentDictionaries["multiview"][ - stepIndex * nbCores + coreIndex]) - for coreIndex in - range(min(nbCores, nbExperiments - stepIndex * nbCores))) + argument_dictionaries["multiview"][ + step_index * nb_cores + core_index]) + for core_index in + range(min(nb_cores, nb_experiments - step_index * nb_cores))) logging.debug("Done:\t multiview benchmark") - return [flag, resultsMonoview + resultsMultiview] + return [flag, results_monoview + results_multiview] -def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, +def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, directory=None, classificationIndices=None, args=None, kFolds=None, randomState=None, - hyperParamSearch=None, metrics=None, + hyper_param_search=None, metrics=None, argumentDictionaries=None, benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, - ExecMonoview_multicore=ExecMonoview_multicore, - ExecMultiview_multicore=ExecMultiview_multicore, - initMultiviewArguments=initMultiviewArguments): - resultsMonoview, labelsNames = benchmarkInit(directory, + exec_monoview_multicore=exec_monoview_multicore, + exec_multiview_multicore=exec_multiview_multicore, + init_multiview_arguments=init_multiview_arguments): + results_monoview, labels_names = benchmark_init(directory, classificationIndices, labels, - LABELS_DICTIONARY, kFolds) + labels_dictionary, kFolds) logging.debug("Start:\t monoview benchmark") for arguments in argumentDictionaries["monoview"]: - X = DATASET.get("View" + str(arguments["view_index"])) + X = dataset_var.get("View" + str(arguments["view_index"])) Y = labels - resultsMonoview += [ - ExecMonoview(directory, X, Y, args["Base"]["name"], labelsNames, + results_monoview += [ + exec_monoview(directory, X, Y, args["Base"]["name"], labels_names, classificationIndices, kFolds, 1, args["Base"]["type"], args["Base"]["pathf"], randomState, - hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args["Classification"]["hps_iter"], **arguments)] + hyper_param_search=hyper_param_search, metrics=metrics, + n_iter=args["Classification"]["hps_iter"], **arguments)] logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") @@ -590,46 +593,46 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") - resultsMultiview = [] + results_multiview = [] for arguments in argumentDictionaries["multiview"]: - resultsMultiview += [ - ExecMultiview(directory, DATASET, args["Base"]["name"], classificationIndices, + results_multiview += [ + exec_multiview(directory, dataset_var, args["Base"]["name"], classificationIndices, kFolds, 1, args["Base"]["type"], - args["Base"]["pathf"], LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args["Classification"]["hps_iter"], **arguments)] + args["Base"]["pathf"], labels_dictionary, randomState, labels, + hyper_param_search=hyper_param_search, + metrics=metrics, n_iter=args["Classification"]["hps_iter"], **arguments)] logging.debug("Done:\t multiview benchmark") - return [flag, resultsMonoview + resultsMultiview] + return [flag, results_monoview + results_multiview] -def execBenchmark(nbCores, statsIter, nbMulticlass, - benchmarkArgumentsDictionaries, classificationIndices, +def exec_benchmark(nb_cores, stats_iter, nb_multiclass, + benchmark_arguments_dictionaries, classification_indices, directories, - directory, multiClassLabels, metrics, labelsDictionary, - nbLabels, DATASET, - execOneBenchmark=execOneBenchmark, - execOneBenchmark_multicore=execOneBenchmark_multicore, - execOneBenchmarkMonoCore=execOneBenchmarkMonoCore, - getResults=getResults, delete=DB.deleteHDF5): + directory, multi_class_labels, metrics, labels_dictionary, + nb_labels, dataset_var, + exec_one_benchmark=exec_one_benchmark, + exec_one_benchmark_multicore=exec_one_benchmark_multicore, + exec_one_benchmark_mono_core=exec_one_benchmark_mono_core, + get_results=get_results, delete=DB.deleteHDF5): r"""Used to execute the needed benchmark(s) on multicore or mono-core functions. Parameters ---------- - nbCores : int + nb_cores : int Number of threads that the benchmarks can use. - statsIter : int + stats_iter : int Number of statistical iterations that have to be done. - benchmarkArgumentsDictionaries : list of dictionaries + benchmark_arguments_dictionaries : list of dictionaries All the needed arguments for the benchmarks. - classificationIndices : list of lists of numpy.ndarray + classification_indices : list of lists of numpy.ndarray For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and the ones of the testing set. directories : list of strings List of the paths to the result directories for each statistical iteration. directory : string Path to the main results directory. - multiClassLabels : ist of lists of numpy.ndarray + multi_class_labels : ist of lists of numpy.ndarray For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the indices for the biclass training set, the ones for the biclass testing set and the ones for the multiclass testing set. @@ -639,7 +642,7 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, Dictionary mapping labels indices to labels names. nbLabels : int Total number of different labels in the dataset. - DATASET : HDF5 dataset file + dataset_var : HDF5 dataset file The full dataset that wil be used by the benchmark. classifiersNames : list of strings List of the benchmarks's monoview classifiers names. @@ -654,61 +657,61 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, """ logging.debug("Start:\t Executing all the needed biclass benchmarks") results = [] - if nbCores > 1: - if statsIter > 1 or nbMulticlass > 1: - nbExpsToDo = len(benchmarkArgumentsDictionaries) - nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores))) - for stepIndex in nbMulticoreToDo: - results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark) - (coreIndex=coreIndex, + if nb_cores > 1: + if stats_iter > 1 or nb_multiclass > 1: + nb_exps_to_do = len(benchmark_arguments_dictionaries) + nb_multicore_to_do = range(int(math.ceil(float(nb_exps_to_do) / nb_cores))) + for step_index in nb_multicore_to_do: + results += (Parallel(n_jobs=nb_cores)(delayed(exec_one_benchmark) + (core_index=coreIndex, ** - benchmarkArgumentsDictionaries[ - coreIndex + stepIndex * nbCores]) + benchmark_arguments_dictionaries[ + coreIndex + step_index * nb_cores]) for coreIndex in range( - min(nbCores, nbExpsToDo - stepIndex * nbCores)))) + min(nb_cores, nb_exps_to_do - step_index * nb_cores)))) else: - results += [execOneBenchmark_multicore(nbCores=nbCores, ** - benchmarkArgumentsDictionaries[0])] + results += [exec_one_benchmark_multicore(nb_cores=nb_cores, ** + benchmark_arguments_dictionaries[0])] else: - for arguments in benchmarkArgumentsDictionaries: - results += [execOneBenchmarkMonoCore(DATASET=DATASET, **arguments)] + for arguments in benchmark_arguments_dictionaries: + results += [exec_one_benchmark_mono_core(dataset_var=dataset_var, **arguments)] logging.debug("Done:\t Executing all the needed biclass benchmarks") # Do everything with flagging - nbExamples = len(classificationIndices[0][0]) + len( - classificationIndices[0][1]) - multiclassGroundTruth = DATASET.get("Labels").value + nb_examples = len(classification_indices[0][0]) + len( + classification_indices[0][1]) + multiclass_ground_truth = dataset_var.get("Labels").value logging.debug("Start:\t Analyzing predictions") - results_mean_stds = getResults(results, statsIter, nbMulticlass, - benchmarkArgumentsDictionaries, - multiclassGroundTruth, + results_mean_stds = get_results(results, stats_iter, nb_multiclass, + benchmark_arguments_dictionaries, + multiclass_ground_truth, metrics, - classificationIndices, + classification_indices, directories, directory, - labelsDictionary, - nbExamples, - nbLabels) + labels_dictionary, + nb_examples, + nb_labels) logging.debug("Done:\t Analyzing predictions") - delete(benchmarkArgumentsDictionaries, nbCores, DATASET) + delete(benchmark_arguments_dictionaries, nb_cores, dataset_var) return results_mean_stds -def execClassif(arguments): +def exec_classif(arguments): """Main function to execute the benchmark""" start = time.time() - args = execution.parseTheArgs(arguments) + args = execution.parse_the_args(arguments) args = configuration.get_the_args(args.path_config) os.nice(args["Base"]["nice"]) - nbCores = args["Base"]["nb_cores"] - if nbCores == 1: + nb_cores = args["Base"]["nb_cores"] + if nb_cores == 1: os.environ['OPENBLAS_NUM_THREADS'] = '1' - statsIter = args["Classification"]["stats_iter"] - hyperParamSearch = args["Classification"]["hps_type"] - multiclassMethod = args["Classification"]["multiclass_method"] - CL_type = args["Classification"]["type"] - monoviewAlgos = args["Classification"]["algos_monoview"] - multiviewAlgos = args["Classification"]["algos_multiview"] + stats_iter = args["Classification"]["stats_iter"] + hyper_param_search = args["Classification"]["hps_type"] + multiclass_method = args["Classification"]["multiclass_method"] + cl_type = args["Classification"]["type"] + monoview_algos = args["Classification"]["algos_monoview"] + multiview_algos = args["Classification"]["algos_multiview"] dataset_list = execution.find_dataset_names(args["Base"]["pathf"], args["Base"]["type"], args["Base"]["name"]) @@ -719,76 +722,76 @@ def execClassif(arguments): noise_results = [] for noise_std in args["Base"]["noise_std"]: - directory = execution.initLogFile(dataset_name, args["Base"]["views"], args["Classification"]["type"], + directory = execution.init_log_file(dataset_name, args["Base"]["views"], args["Classification"]["type"], args["Base"]["log"], args["Base"]["debug"], args["Base"]["label"], args["Base"]["res_dir"], args["Base"]["add_noise"], noise_std) - randomState = execution.initRandomState(args["Base"]["random_state"], directory) - statsIterRandomStates = execution.initStatsIterRandomStates(statsIter, - randomState) + random_state = execution.init_random_state(args["Base"]["random_state"], directory) + stats_iter_random_states = execution.init_stats_iter_random_states(stats_iter, + random_state) - getDatabase = execution.getDatabaseFunction(dataset_name, args["Base"]["type"]) + get_database = execution.get_database_function(dataset_name, args["Base"]["type"]) - DATASET, LABELS_DICTIONARY, datasetname = getDatabase(args["Base"]["views"], + dataset_var, labels_dictionary, datasetname = get_database(args["Base"]["views"], args["Base"]["pathf"], dataset_name, args["Classification"]["nb_class"], args["Classification"]["classes"], - randomState, + random_state, args["Base"]["full"], args["Base"]["add_noise"], noise_std) args["Base"]["name"] = datasetname - splits = execution.genSplits(DATASET.get("Labels").value, args["Classification"]["split"], - statsIterRandomStates) + splits = execution.gen_splits(dataset_var.get("Labels").value, args["Classification"]["split"], + stats_iter_random_states) - multiclassLabels, labelsCombinations, indicesMulticlass = multiclass.genMulticlassLabels( - DATASET.get("Labels").value, multiclassMethod, splits) + multiclass_labels, labels_combinations, indices_multiclass = multiclass.gen_multiclass_labels( + dataset_var.get("Labels").value, multiclass_method, splits) - kFolds = execution.genKFolds(statsIter, args["Classification"]["nb_folds"], - statsIterRandomStates) + k_folds = execution.gen_k_folds(stats_iter, args["Classification"]["nb_folds"], + stats_iter_random_states) - datasetFiles = dataset.initMultipleDatasets(args["Base"]["pathf"], args["Base"]["name"], nbCores) + dataset_files = dataset.init_multiple_datasets(args["Base"]["pathf"], args["Base"]["name"], nb_cores) - views, viewsIndices, allViews = execution.initViews(DATASET, args["Base"]["views"]) - viewsDictionary = genViewsDictionnary(DATASET, views) - nbViews = len(views) - NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] + views, views_indices, all_views = execution.init_views(dataset_var, args["Base"]["views"]) + views_dictionary = gen_views_dictionnary(dataset_var, views) + nb_views = len(views) + nb_class = dataset_var.get("Metadata").attrs["nbClass"] metrics = [metric.split(":") for metric in args["Classification"]["metrics"]] if metrics == [["all"]]: - metricsNames = [name for _, name, isPackage + metrics_names = [name for _, name, isPackage in pkgutil.iter_modules( ['./mono_multi_view_classifiers/metrics']) if not isPackage and name not in ["framework", "log_loss", "matthews_corrcoef", "roc_auc_score"]] - metrics = [[metricName] for metricName in metricsNames] - metrics = arangeMetrics(metrics, args["Classification"]["metric_princ"]) + metrics = [[metricName] for metricName in metrics_names] + metrics = arange_metrics(metrics, args["Classification"]["metric_princ"]) for metricIndex, metric in enumerate(metrics): if len(metric) == 1: metrics[metricIndex] = [metric[0], None] - benchmark = initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args) - initKWARGS = initKWARGSFunc(args, benchmark) - dataBaseTime = time.time() - start - argumentDictionaries = init_argument_dictionaries(benchmark, viewsDictionary, - NB_CLASS, initKWARGS) + benchmark = init_benchmark(cl_type, monoview_algos, multiview_algos, args) + init_kwargs= init_kwargs_func(args, benchmark) + data_base_time = time.time() - start + argument_dictionaries = init_argument_dictionaries(benchmark, views_dictionary, + nb_class, init_kwargs) # argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, # NB_CLASS, initKWARGS) - directories = execution.genDirecortiesNames(directory, statsIter) - benchmarkArgumentDictionaries = execution.genArgumentDictionaries( - LABELS_DICTIONARY, directories, multiclassLabels, - labelsCombinations, indicesMulticlass, - hyperParamSearch, args, kFolds, - statsIterRandomStates, metrics, - argumentDictionaries, benchmark, nbViews, - views, viewsIndices) - nbMulticlass = len(labelsCombinations) - results_mean_stds = execBenchmark(nbCores, statsIter, nbMulticlass, - benchmarkArgumentDictionaries, splits, directories, - directory, multiclassLabels, metrics, LABELS_DICTIONARY, - NB_CLASS, DATASET) + directories = execution.gen_direcorties_names(directory, stats_iter) + benchmark_argument_dictionaries = execution.gen_argument_dictionaries( + labels_dictionary, directories, multiclass_labels, + labels_combinations, indices_multiclass, + hyper_param_search, args, k_folds, + stats_iter_random_states, metrics, + argument_dictionaries, benchmark, nb_views, + views, views_indices) + nb_multiclass = len(labels_combinations) + results_mean_stds = exec_benchmark(nb_cores, stats_iter, nb_multiclass, + benchmark_argument_dictionaries, splits, directories, + directory, multiclass_labels, metrics, labels_dictionary, + nb_class, dataset_var) noise_results.append([noise_std, results_mean_stds]) plot_results_noise(directory, noise_results, metrics[0][0], dataset_name) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 5745e67a992696783362632e4c868be410cdcd2b..5c807f3ac3523e21b9b17ec7c91b5112385fa564 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -16,7 +16,7 @@ from . import monoview_utils from .analyze_result import execute # Import own modules from .. import monoview_classifiers -from ..utils.dataset import getValue, extractSubset +from ..utils.dataset import get_value, extract_subset from ..utils import hyper_parameter_search # Author-Info @@ -27,7 +27,7 @@ __status__ = "Prototype" # Production, Development, Prototype # __date__ = 2016 - 03 - 25 -def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, +def exec_monoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, path, randomState, labels, hyperParamSearch="randomizedSearch", @@ -43,7 +43,7 @@ def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, metrics=metrics, nIter=nIter, **args) -def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, +def exec_monoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, randomState, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): @@ -146,7 +146,7 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): else: feat = X.attrs["name"] CL_type = kwargs["classifier_name"] - X = getValue(X) + X = get_value(X) learningRate = float(len(classificationIndices[0])) / ( len(classificationIndices[0]) + len(classificationIndices[1])) labelsString = "-".join(labelsNames) @@ -164,12 +164,12 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, outputFileName -def initTrainTest(X, Y, classificationIndices): +def init_train_test(X, Y, classificationIndices): trainIndices, testIndices, testIndicesMulticlass = classificationIndices - X_train = extractSubset(X, trainIndices) - X_test = extractSubset(X, testIndices) + X_train = extract_subset(X, trainIndices) + X_test = extract_subset(X, testIndices) if np.array(testIndicesMulticlass).size != 0: - X_test_multiclass = extractSubset(X, testIndicesMulticlass) + X_test_multiclass = extract_subset(X, testIndicesMulticlass) else: X_test_multiclass = [] y_train = Y[trainIndices] @@ -319,7 +319,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # databaseType = None # # # Extract the data using MPI -# X, Y = dataset.getMonoviewShared(path, name, viewName) +# X, Y = dataset.get_monoview_shared(path, name, viewName) # # # Init log # logFileName = time.strftime( diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 02ba90af927f6fa24b101e88dc067d261850db6e..696c97e5cafd44e82d78bebbd9dd71d21d3405d8 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -11,7 +11,7 @@ from .multiview_utils import MultiviewResult from . import analyze_results from .. import multiview_classifiers from ..utils import hyper_parameter_search -from ..utils.dataset import getShape +from ..utils.dataset import get_shape # Author-Info __author__ = "Baptiste Bauvin" @@ -38,7 +38,7 @@ def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, for viewIndex, viewName in zip(viewsIndices, views): logging.info("Info:\t Shape of " + str(viewName) + " :" + str( - getShape(DATASET, viewIndex))) + get_shape(DATASET, viewIndex))) return classifier_name, t_start, viewsIndices, classifier_config, views, learningRate @@ -77,7 +77,7 @@ def saveResults(classifier, LABELS_DICTIONARY, stringAnalysis, views, classifier outputFileName + imageName + '.png', transparent=True) -def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, +def exec_multiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=False, nbCores=1, metrics=None, @@ -91,7 +91,7 @@ def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, nIter=nIter, **arguments) -def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, +def exec_multiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=False, metrics=None, nIter=30, **kwargs): diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py index 07a63a6f8e6380c6ff463a0359cd73a63625a34b..4bb84dc6daf14a9ee282f8f233018370b5d91c3d 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py @@ -2,7 +2,7 @@ # -*- encoding: utf-8 import numpy as np -from ....utils.dataset import getV +from ....utils.dataset import get_v from .... import monoview_classifiers class EarlyFusionClassifier(object): @@ -38,5 +38,5 @@ class EarlyFusionClassifier(object): weights = np.array([1 / nbView for i in range(nbView)]) if sum(weights) != 1: weights = weights / sum(weights) - self.monoviewData = np.concatenate([getV(DATASET, viewIndex, usedIndices) + self.monoviewData = np.concatenate([get_v(DATASET, viewIndex, usedIndices) for index, viewIndex in enumerate(viewsIndices)], axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py index 8beb6a299404e32d8b1791da34cad45961f87668..314b11bb3b85b0fb570450fc93923997c47c58e9 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py @@ -9,7 +9,7 @@ import math from .... import monoview_classifiers from .... import metrics -from ....utils.dataset import getV +from ....utils.dataset import get_v # def canProbasClassifier(classifierConfig): @@ -142,7 +142,7 @@ class LateFusionClassifier(object): trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)( delayed(fitMonoviewClassifier)(self.monoviewClassifiers[index], - getV(DATASET, viewIndex, trainIndices), + get_v(DATASET, viewIndex, trainIndices), labels[trainIndices], self.needProbas, self.randomState) for index, viewIndex in enumerate(viewsIndices)) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py index 2f6d9b723fedf68094fe591b37b638fe81f729f5..61ec3838f238adeafa98a02471890e58eb1ccbd2 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py @@ -2,7 +2,7 @@ import numpy as np from sklearn.metrics import accuracy_score import pkgutil -from .....utils.dataset import getV +from .....utils.dataset import get_v from ..... import monoview_classifiers from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig @@ -78,7 +78,7 @@ class BayesianInference(LateFusionClassifier): viewScores = []#np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) for index, viewIndex in enumerate(viewsIndices): viewScores.append(np.power( - self.monoviewClassifiers[index].predict_proba(getV(DATASET, viewIndex, usedIndices)), + self.monoviewClassifiers[index].predict_proba(get_v(DATASET, viewIndex, usedIndices)), self.weights[index])) viewScores = np.array(viewScores) predictedLabels = np.argmax(np.prod(viewScores, axis=0), axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py index bef30f9cd687e389745a8b1f117718b84faeb2d4..bcdbfa8222d9f4aa79333dbf52132dff757da227 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -2,7 +2,7 @@ import numpy as np # from sklearn.metrics import accuracy_score # import pkgutil -from .....utils.dataset import getV +from .....utils.dataset import get_v from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig from ..... import monoview_classifiers @@ -78,7 +78,7 @@ class MajorityVoting(LateFusionClassifier): monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) for exampleIndex in range(datasetLength): for viewIndex, featureClassification in enumerate(monoViewDecisions[exampleIndex, :]): votes[exampleIndex, featureClassification] += self.weights[viewIndex] diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py index c5e7b9acab1504a506751722ebfbd13aa8637a9f..739ba0233224cd84057d66ec2b9442a72cbf69b2 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -9,7 +9,7 @@ import itertools from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig from ..... import monoview_classifiers -from .....utils.dataset import getV +from .....utils.dataset import get_v class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin): @@ -119,7 +119,7 @@ class SCMForLinear(LateFusionClassifier): if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) for index, viewIndex in enumerate(viewsIndices): - self.monoviewClassifiers[index].fit(getV(DATASET, viewIndex, trainIndices), + self.monoviewClassifiers[index].fit(get_v(DATASET, viewIndex, trainIndices), labels[trainIndices]) self.SCMForLinearFusionFit(DATASET, labels, usedIndices=trainIndices, viewsIndices=viewsIndices) @@ -133,7 +133,7 @@ class SCMForLinear(LateFusionClassifier): # accus = [] for index, viewIndex in enumerate(viewsIndices): monoviewDecision = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) # accus.append(accuracy_score(DATASET.get("Labels").value[usedIndices], monoviewDecision)) monoviewDecisions[:, index] = monoviewDecision features = self.generateInteractions(monoviewDecisions) @@ -150,7 +150,7 @@ class SCMForLinear(LateFusionClassifier): monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) features = self.generateInteractions(monoViewDecisions) features = np.array([np.array([feat for feat in feature]) for feature in features]) self.SCMClassifier.fit(features, labels[usedIndices].astype(int)) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py index 8b4c984039282d8aa49e91a6328663f0a48768e8..509256190aaa545ae1dbae70083c183ff24f4ec8 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -4,7 +4,7 @@ from sklearn.svm import SVC import pkgutil from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig -from .....utils.dataset import getV +from .....utils.dataset import get_v from ..... import monoview_classifiers @@ -63,7 +63,7 @@ class SVMForLinear(LateFusionClassifier): if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) for index, viewIndex in enumerate(viewsIndices): - self.monoviewClassifiers[index].fit(getV(DATASET, viewIndex, trainIndices), + self.monoviewClassifiers[index].fit(get_v(DATASET, viewIndex, trainIndices), labels[trainIndices]) self.SVMForLinearFusionFit(DATASET, labels, usedIndices=trainIndices, viewsIndices=viewsIndices) @@ -79,7 +79,7 @@ class SVMForLinear(LateFusionClassifier): monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoviewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) predictedLabels = self.SVMClassifier.predict(monoviewDecisions) return predictedLabels @@ -91,7 +91,7 @@ class SVMForLinear(LateFusionClassifier): monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) self.SVMClassifier.fit(monoViewDecisions, labels[usedIndices]) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py index d7d9f418bdaeccf190e9534e2ea407b71fdc1eef..baf9c56b9fab46b7702b0754da894a4e053e044f 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -4,7 +4,7 @@ import pkgutil from ..... import monoview_classifiers from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig -from .....utils.dataset import getV +from .....utils.dataset import get_v def genParamsSets(classificationKWARGS, randomState, nIter=1): @@ -76,7 +76,7 @@ class WeightedLinear(LateFusionClassifier): viewScores = []#np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) for index, viewIndex in enumerate(viewsIndices): viewScores.append(np.array(self.monoviewClassifiers[index].predict_proba( - getV(DATASET, viewIndex, usedIndices))) * self.weights[index]) + get_v(DATASET, viewIndex, usedIndices))) * self.weights[index]) viewScores = np.array(viewScores) predictedLabels = np.argmax(np.sum(viewScores, axis=0), axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py index f3d586fb424079f418525963737065397f42c1e0..c26387fb1057cb648ad97a8be10d0e54da1a5082 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py @@ -11,7 +11,7 @@ except ValueError: import pdb;pdb.set_trace() from ... import monoview_classifiers -from ...utils.dataset import getV +from ...utils.dataset import get_v # Author-Info __author__ = "Baptiste Bauvin" @@ -111,7 +111,7 @@ def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices= weights = np.array([1 / NB_VIEW for i in range(NB_VIEW)]) if sum(weights) != 1: weights = weights / sum(weights) - monoviewData = np.concatenate([weights[index] * getV(DATASET, viewIndex, usedIndices) + monoviewData = np.concatenate([weights[index] * get_v(DATASET, viewIndex, usedIndices) for index, viewIndex in enumerate(viewsIndices)], axis=1) return monoviewData @@ -140,7 +140,7 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): # classifierModule = getattr(monoview_classifiers, classifierName) # classifierMethod = getattr(classifierModule, "hyperParamSearch") # if fusionTypeName == "LateFusion": -# bestSettings.append(classifierMethod(getV(DATASET, viewsIndices[classifierIndex], learningIndices), +# bestSettings.append(classifierMethod(get_v(DATASET, viewsIndices[classifierIndex], learningIndices), # DATASET.get("Labels")[learningIndices], metric=metric, # nIter=nIter)) # else: diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py index a851a609a1221a8560ec16c278ce0fb8ca9338b2..e3b93cf133397e36085a8be638408a06fc44bdf4 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -1,8 +1,9 @@ import numpy as np import inspect -from ..utils.dataset import getV +from ..utils.dataset import get_v from ..multiview.multiview_utils import BaseMultiviewClassifier, get_train_views_indices, ConfigGenerator + from .. import monoview_classifiers classifier_class_name = "WeightedLinearEarlyFusion" @@ -79,7 +80,7 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): def hdf5_to_monoview(self, dataset, exmaples): """Here, we concatenate the views for the asked examples """ monoview_data = np.concatenate( - [getV(dataset, view_idx, exmaples) + [get_v(dataset, view_idx, exmaples) for view_weight, (index, view_idx) in zip(self.view_weights, enumerate(self.view_indices))] , axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index 50cb31a6078f9a577f8626dd74a460761e026b94..b7126de5a9e152c9066672cab7798f663ad1bd20 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -98,7 +98,7 @@ def autolabel(rects, ax, set=1, std=None): ha='center', va='bottom', size="small") -def getMetricsScoresBiclass(metrics, results): +def get_metrics_scores_biclass(metrics, results): r"""Used to extract metrics scores in case of biclass classification Parameters @@ -117,22 +117,22 @@ def getMetricsScoresBiclass(metrics, results): -`metricScores[metric_name]["trainScores"]` is a list of all the available classifiers scores on the train set, -`metricScores[metric_name]["testScores"]` is a list of all the available classifiers scores on the test set. """ - metricsScores = {} + metrics_scores = {} for metric in metrics: - classifiersNames = [] - trainScores = [] - testScores = [] + classifiers_names = [] + train_scores = [] + test_scores = [] for classifierResult in results: - trainScores.append(classifierResult.metrics_scores[metric[0]][0]) - testScores.append(classifierResult.metrics_scores[metric[0]][1]) - classifiersNames.append(classifierResult.get_classifier_name()) + train_scores.append(classifierResult.metrics_scores[metric[0]][0]) + test_scores.append(classifierResult.metrics_scores[metric[0]][1]) + classifiers_names.append(classifierResult.get_classifier_name()) - metricsScores[metric[0]] = {"classifiersNames": classifiersNames, - "trainScores": trainScores, - "testScores": testScores} - return metricsScores + metrics_scores[metric[0]] = {"classifiersNames": classifiers_names, + "trainScores": train_scores, + "testScores": test_scores} + return metrics_scores def getExampleErrorsBiclass(groud_truth, results): @@ -1006,7 +1006,7 @@ def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, return results -def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, +def get_results(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py index 2a37b3d12408777f022309c12706d174ea2abcc1..c236a6a6b1601cdae70f689e8588126f203eab11 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py @@ -11,87 +11,86 @@ from . import get_multiview_db as DB - -def getV(DATASET, viewIndex, usedIndices=None): +def get_v(dataset, view_index, used_indices=None): """Used to extract a view as a numpy array or a sparse mat from the HDF5 dataset""" - if usedIndices is None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(usedIndices) is int: - return DATASET.get("View" + str(viewIndex))[usedIndices, :] + if used_indices is None: + used_indices = range(dataset.get("Metadata").attrs["datasetLength"]) + if type(used_indices) is int: + return dataset.get("View" + str(view_index))[used_indices, :] else: - usedIndices = np.array(usedIndices) - sortedIndices = np.argsort(usedIndices) - usedIndices = usedIndices[sortedIndices] + used_indices = np.array(used_indices) + sorted_indices = np.argsort(used_indices) + used_indices = used_indices[sorted_indices] - if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]: - return DATASET.get("View" + str(viewIndex))[usedIndices, :][ - np.argsort(sortedIndices), :] + if not dataset.get("View" + str(view_index)).attrs["sparse"]: + return dataset.get("View" + str(view_index))[used_indices, :][ + np.argsort(sorted_indices), :] else: sparse_mat = sparse.csr_matrix( - (DATASET.get("View" + str(viewIndex)).get("data").value, - DATASET.get("View" + str(viewIndex)).get("indices").value, - DATASET.get("View" + str(viewIndex)).get("indptr").value), - shape=DATASET.get("View" + str(viewIndex)).attrs["shape"])[ - usedIndices, :][ - np.argsort(sortedIndices), :] + (dataset.get("View" + str(view_index)).get("data").value, + dataset.get("View" + str(view_index)).get("indices").value, + dataset.get("View" + str(view_index)).get("indptr").value), + shape=dataset.get("View" + str(view_index)).attrs["shape"])[ + used_indices, :][ + np.argsort(sorted_indices), :] return sparse_mat -def getShape(DATASET, viewIndex): +def get_shape(dataset, view_index): """Used to get the dataset shape even if it's sparse""" - if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]: - return DATASET.get("View" + str(viewIndex)).shape + if not dataset.get("View" + str(view_index)).attrs["sparse"]: + return dataset.get("View" + str(view_index)).shape else: - return DATASET.get("View" + str(viewIndex)).attrs["shape"] + return dataset.get("View" + str(view_index)).attrs["shape"] -def getValue(DATASET): +def get_value(dataset): """Used to get the value of a view in the HDF5 dataset even if it sparse""" - if not DATASET.attrs["sparse"]: - return DATASET.value + if not dataset.attrs["sparse"]: + return dataset.value else: - sparse_mat = sparse.csr_matrix((DATASET.get("data").value, - DATASET.get("indices").value, - DATASET.get("indptr").value), - shape=DATASET.attrs["shape"]) + sparse_mat = sparse.csr_matrix((dataset.get("data").value, + dataset.get("indices").value, + dataset.get("indptr").value), + shape=dataset.attrs["shape"]) return sparse_mat -def extractSubset(matrix, usedIndices): +def extract_subset(matrix, used_indices): """Used to extract a subset of a matrix even if it's sparse""" if sparse.issparse(matrix): - newIndptr = np.zeros(len(usedIndices) + 1, dtype=int) + new_indptr = np.zeros(len(used_indices) + 1, dtype=int) oldindptr = matrix.indptr - for exampleIndexIndex, exampleIndex in enumerate(usedIndices): - newIndptr[exampleIndexIndex + 1] = newIndptr[exampleIndexIndex] + ( + for exampleIndexIndex, exampleIndex in enumerate(used_indices): + new_indptr[exampleIndexIndex + 1] = new_indptr[exampleIndexIndex] + ( oldindptr[exampleIndex + 1] - oldindptr[exampleIndex]) - newData = np.ones(newIndptr[-1], dtype=bool) - newIndices = np.zeros(newIndptr[-1], dtype=int) - oldIndices = matrix.indices - for exampleIndexIndex, exampleIndex in enumerate(usedIndices): - newIndices[newIndptr[exampleIndexIndex]:newIndptr[ - exampleIndexIndex + 1]] = oldIndices[ + new_data = np.ones(new_indptr[-1], dtype=bool) + new_indices = np.zeros(new_indptr[-1], dtype=int) + old_indices = matrix.indices + for exampleIndexIndex, exampleIndex in enumerate(used_indices): + new_indices[new_indptr[exampleIndexIndex]:new_indptr[ + exampleIndexIndex + 1]] = old_indices[ oldindptr[exampleIndex]: oldindptr[exampleIndex + 1]] - return sparse.csr_matrix((newData, newIndices, newIndptr), - shape=(len(usedIndices), matrix.shape[1])) + return sparse.csr_matrix((new_data, new_indices, new_indptr), + shape=(len(used_indices), matrix.shape[1])) else: - return matrix[usedIndices] + return matrix[used_indices] -def initMultipleDatasets(pathF, name, nbCores): +def init_multiple_datasets(path_f, name, nb_cores): r"""Used to create copies of the dataset if multicore computation is used. This is a temporary solution to fix the sharing memory issue with HDF5 datasets. Parameters ---------- - pathF : string + path_f : string Path to the original dataset directory name : string Name of the dataset - nbCores : int + nb_cores : int The number of threads that the benchmark can use Returns @@ -99,25 +98,25 @@ def initMultipleDatasets(pathF, name, nbCores): datasetFiles : None Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ - if nbCores > 1: - if DB.datasetsAlreadyExist(pathF, name, nbCores): + if nb_cores > 1: + if DB.datasetsAlreadyExist(path_f, name, nb_cores): logging.debug( "Info:\t Enough copies of the dataset are already available") pass else: logging.debug("Start:\t Creating " + str( - nbCores) + " temporary datasets for multiprocessing") + nb_cores) + " temporary datasets for multiprocessing") logging.warning( " WARNING : /!\ This may use a lot of HDD storage space : " + - str(os.path.getsize(pathF + name + ".hdf5") * nbCores / float( + str(os.path.getsize(path_f + name + ".hdf5") * nb_cores / float( 1024) / 1000 / 1000) + " Gbytes /!\ ") confirmation = confirm() if not confirmation: sys.exit(0) else: - datasetFiles = DB.copyHDF5(pathF, name, nbCores) + dataset_files = DB.copyHDF5(path_f, name, nb_cores) logging.debug("Start:\t Creating datasets for multiprocessing") - return datasetFiles + return dataset_files def confirm(resp=True, timeout=15): @@ -143,10 +142,9 @@ def input_(timeout=15): else: return "y" - -def getMonoviewShared(path, name, viewName, labelsNames, classificationIndices): +def get_monoview_shared(path, name, view_name, labels_names, classification_indices): """ATM is not used with shared memory, but soon :)""" - HDF5_dataset_file = h5py.File(path + name + ".hdf5", "w") - X = HDF5_dataset_file.get(viewName).value - Y = HDF5_dataset_file.get("Labels").value - return X, Y + hdf5_dataset_file = h5py.File(path + name + ".hdf5", "w") + X = hdf5_dataset_file.get(view_name).value + y = hdf5_dataset_file.get("Labels").value + return X, y diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index b0e3779f3780def50e4e057b0e41693f79a3c6fe..e5bfe258bccf5dc9dfd36c6c7ce43c6590ca0418 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -10,7 +10,7 @@ import sklearn from . import get_multiview_db as DB -def parseTheArgs(arguments): +def parse_the_args(arguments): """Used to parse the args entered by the user""" parser = argparse.ArgumentParser( @@ -24,749 +24,83 @@ def parseTheArgs(arguments): help='Path to the hdf5 dataset or database ' 'folder (default: %(default)s)', default='../config_files/config.yml') -# groupStandard.add_argument('-log', action='store_true', -# help='Use option to activate logging to console') -# groupStandard.add_argument('--name', metavar='STRING', nargs='+', action='store', -# help='Name of Database (default: %(default)s)', -# default=['Plausible']) -# groupStandard.add_argument('--label', metavar='STRING', action='store', -# help='Labeling the results directory (default: ' -# '%(default)s)', -# default='') -# groupStandard.add_argument('--type', metavar='STRING', action='store', -# help='Type of database : .hdf5 or .csv (' -# 'default: %(default)s)', -# default='.hdf5') -# groupStandard.add_argument('--views', metavar='STRING', action='store', -# nargs="+", -# help='Name of the views selected for learning ' -# '(default: %(default)s)', -# default=['']) -# groupStandard.add_argument('--pathF', metavar='STRING', action='store', -# help='Path to the hdf5 dataset or database ' -# 'folder (default: %(default)s)', -# default='../data/') -# groupStandard.add_argument('--nice', metavar='INT', action='store', -# type=int, -# help='Niceness for the processes', default=0) -# groupStandard.add_argument('--randomState', metavar='STRING', -# action='store', -# help="The random state seed to use or the path " -# "to a pickle file where it is stored", -# default=None) -# groupStandard.add_argument('--nbCores', metavar='INT', action='store', -# help='Number of cores to use for parallel ' -# 'computing, -1 for all', -# type=int, default=2) -# groupStandard.add_argument('--machine', metavar='STRING', action='store', -# help='Type of machine on which the script runs', -# default="PC") -# groupStandard.add_argument('-full', action='store_true', -# help='Use option to use full dataset and no ' -# 'labels or view filtering') -# groupStandard.add_argument('-debug', action='store_true', -# help='Use option to bebug implemented algorithms') -# groupStandard.add_argument('-add_noise', action='store_true', -# help='Use option to add noise to the data') -# groupStandard.add_argument('--noise_std', metavar='FLOAT', nargs="+", action='store', -# help='The std of the gaussian noise that will ' -# 'be added to the data.', -# type=float, default=[0.0]) -# groupStandard.add_argument('--res_dir', metavar='STRING', action='store', -# help='The path to the result directory', -# default="../results/") -# -# groupClass = parser.add_argument_group('Classification arguments') -# groupClass.add_argument('--CL_multiclassMethod', metavar='STRING', -# action='store', -# help='Determine which multiclass method to use if ' -# 'the dataset is multiclass', -# default="oneVersusOne") -# groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', -# help='Determine the split ratio between learning ' -# 'and validation sets', -# type=float, -# default=0.2) -# groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', -# help='Number of folds in cross validation', -# type=int, default=2) -# groupClass.add_argument('--CL_nbClass', metavar='INT', action='store', -# help='Number of classes, -1 for all', type=int, -# default=2) -# groupClass.add_argument('--CL_classes', metavar='STRING', action='store', -# nargs="+", -# help='Classes used in the dataset (names of the ' -# 'folders) if not filled, random classes will ' -# 'be ' -# 'selected', default=["yes", "no"]) -# groupClass.add_argument('--CL_type', metavar='STRING', action='store', -# nargs="+", -# help='Determine whether to use multiview and/or ' -# 'monoview, or Benchmark classification', -# default=['monoview', 'multiview']) -# groupClass.add_argument('--CL_algos_monoview', metavar='STRING', -# action='store', nargs="+", -# help='Determine which monoview classifier to use ' -# 'if empty, considering all', -# default=['']) -# groupClass.add_argument('--CL_algos_multiview', metavar='STRING', -# action='store', nargs="+", -# help='Determine which multiview classifier to use ' -# 'if empty, considering all', -# default=['']) -# groupClass.add_argument('--CL_statsiter', metavar='INT', action='store', -# help="Number of iteration for each algorithm to " -# "mean preds on different random states. " -# "If using multiple cores, it's highly " -# "recommended to use statsiter mod nbCores == " -# "0", -# type=int, -# default=2) -# groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', -# nargs="+", -# help='Determine which metrics to use, separate ' -# 'metric and configuration with ":". ' -# 'If multiple, separate with space. If no ' -# 'metric is specified, ' -# 'considering all' -# , default=['']) -# groupClass.add_argument('--CL_metric_princ', metavar='STRING', -# action='store', -# help='Determine which metric to use for ' -# 'randomSearch and optimization', -# default="f1_score") -# groupClass.add_argument('--CL_HPS_iter', metavar='INT', action='store', -# help='Determine how many hyper parameters ' -# 'optimization tests to do', -# type=int, default=2) -# groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store', -# help='Determine which hyperparamter search ' -# 'function use', -# default="randomizedSearch") -# -# groupRF = parser.add_argument_group('Random Forest arguments') -# groupRF.add_argument('--RF_trees', metavar='INT', type=int, action='store', -# help='Number max trees',nargs="+", -# default=[25]) -# groupRF.add_argument('--RF_max_depth', metavar='INT', type=int, -# action='store',nargs="+", -# help='Max depth for the trees', -# default=[5]) -# groupRF.add_argument('--RF_criterion', metavar='STRING', action='store', -# help='Criterion for the trees',nargs="+", -# default=["entropy"]) -# -# groupSVMLinear = parser.add_argument_group('Linear SVM arguments') -# groupSVMLinear.add_argument('--SVML_C', metavar='INT', type=int, -# action='store', nargs="+", help='Penalty parameter used', -# default=[1]) -# -# groupSVMRBF = parser.add_argument_group('SVW-RBF arguments') -# groupSVMRBF.add_argument('--SVMRBF_C', metavar='INT', type=int, -# action='store', nargs="+", help='Penalty parameter used', -# default=[1]) -# -# groupSVMPoly = parser.add_argument_group('Poly SVM arguments') -# groupSVMPoly.add_argument('--SVMPoly_C', metavar='INT', type=int, -# action='store', nargs="+", help='Penalty parameter used', -# default=[1]) -# groupSVMPoly.add_argument('--SVMPoly_deg', nargs="+", metavar='INT', type=int, -# action='store', help='Degree parameter used', -# default=[2]) -# -# groupAdaboost = parser.add_argument_group('Adaboost arguments') -# groupAdaboost.add_argument('--Ada_n_est', metavar='INT', type=int, -# action='store', nargs="+", help='Number of estimators', -# default=[2]) -# groupAdaboost.add_argument('--Ada_b_est', metavar='STRING', action='store', -# help='Estimators',nargs="+", -# default=['DecisionTreeClassifier']) -# -# groupAdaboostPregen = parser.add_argument_group('AdaboostPregen arguments') -# groupAdaboostPregen.add_argument('--AdP_n_est', metavar='INT', type=int, -# action='store',nargs="+", -# help='Number of estimators', -# default=[100]) -# groupAdaboostPregen.add_argument('--AdP_b_est', metavar='STRING', -# action='store',nargs="+", -# help='Estimators', -# default=['DecisionTreeClassifier']) -# groupAdaboostPregen.add_argument('--AdP_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Number of stumps inthe ' -# 'pregenerated dataset', -# default=[1]) -# -# groupAdaboostGraalpy = parser.add_argument_group( -# 'AdaboostGraalpy arguments') -# groupAdaboostGraalpy.add_argument('--AdG_n_iter', metavar='INT', type=int, -# action='store',nargs="+", -# help='Number of estimators', -# default=[100]) -# groupAdaboostGraalpy.add_argument('--AdG_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Number of stumps inthe ' -# 'pregenerated dataset', -# default=[1]) -# -# groupDT = parser.add_argument_group('Decision Trees arguments') -# groupDT.add_argument('--DT_depth', metavar='INT', type=int, action='store', -# help='Determine max depth for Decision Trees',nargs="+", -# default=[3]) -# groupDT.add_argument('--DT_criterion', metavar='STRING', action='store', -# help='Determine max depth for Decision Trees',nargs="+", -# default=["entropy"]) -# groupDT.add_argument('--DT_splitter', metavar='STRING', action='store', -# help='Determine criterion for Decision Trees',nargs="+", -# default=["random"]) -# -# groupDTP = parser.add_argument_group('Decision Trees pregen arguments') -# groupDTP.add_argument('--DTP_depth', metavar='INT', type=int, -# action='store',nargs="+", -# help='Determine max depth for Decision Trees', -# default=[3]) -# groupDTP.add_argument('--DTP_criterion', metavar='STRING', action='store', -# help='Determine max depth for Decision Trees',nargs="+", -# default=["entropy"]) -# groupDTP.add_argument('--DTP_splitter', metavar='STRING', action='store', -# help='Determine criterion for Decision Trees',nargs="+", -# default=["random"]) -# groupDTP.add_argument('--DTP_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Determine the number of stumps for Decision ' -# 'Trees pregen', -# default=[1]) -# -# groupSGD = parser.add_argument_group('SGD arguments') -# groupSGD.add_argument('--SGD_alpha', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Determine alpha for SGDClassifier', default=[0.1]) -# groupSGD.add_argument('--SGD_loss', metavar='STRING', action='store', -# help='Determine loss for SGDClassifier',nargs="+", -# default=['log']) -# groupSGD.add_argument('--SGD_penalty', metavar='STRING', action='store', -# help='Determine penalty for SGDClassifier', nargs="+", -# default=['l2']) -# -# groupKNN = parser.add_argument_group('KNN arguments') -# groupKNN.add_argument('--KNN_neigh', metavar='INT', type=int, -# action='store',nargs="+", -# help='Determine number of neighbors for KNN', -# default=[1]) -# groupKNN.add_argument('--KNN_weights', nargs="+", -# metavar='STRING', action='store', -# help='Determine number of neighbors for KNN', -# default=["distance"]) -# groupKNN.add_argument('--KNN_algo', metavar='STRING', action='store', -# help='Determine number of neighbors for KNN', -# default=["auto"],nargs="+", ) -# groupKNN.add_argument('--KNN_p', metavar='INT', nargs="+", -# type=int, action='store', -# help='Determine number of neighbors for KNN', -# default=[1]) -# -# groupSCM = parser.add_argument_group('SCM arguments') -# groupSCM.add_argument('--SCM_max_rules', metavar='INT', type=int, -# action='store', nargs="+", -# help='Max number of rules for SCM', default=[1]) -# groupSCM.add_argument('--SCM_p', metavar='FLOAT', type=float, -# action='store', nargs="+", -# help='Max number of rules for SCM', default=[1.0]) -# groupSCM.add_argument('--SCM_model_type', metavar='STRING', action='store', -# help='Max number of rules for SCM', nargs="+", -# default=["conjunction"]) -# -# groupSCMPregen = parser.add_argument_group('SCMPregen arguments') -# groupSCMPregen.add_argument('--SCP_max_rules', metavar='INT', type=int, -# action='store',nargs="+", -# help='Max number of rules for SCM', default=[1]) -# groupSCMPregen.add_argument('--SCP_p', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Max number of rules for SCM', default=[1.0]) -# groupSCMPregen.add_argument('--SCP_model_type', metavar='STRING', -# action='store',nargs="+", -# help='Max number of rules for SCM', -# default=["conjunction"]) -# groupSCMPregen.add_argument('--SCP_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Number of stumps per attribute', -# default=[1]) -# -# groupSCMSparsity = parser.add_argument_group('SCMSparsity arguments') -# groupSCMSparsity.add_argument('--SCS_max_rules', metavar='INT', type=int, -# action='store',nargs="+", -# help='Max number of rules for SCM', default=[1]) -# groupSCMSparsity.add_argument('--SCS_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Number of stumps', default=[1]) -# groupSCMSparsity.add_argument('--SCS_p', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Max number of rules for SCM', -# default=[1.0]) -# groupSCMSparsity.add_argument('--SCS_model_type', metavar='STRING', -# action='store',nargs="+", -# help='Max number of rules for SCM', -# default=["conjunction"]) -# -# groupCQBoost = parser.add_argument_group('CQBoost arguments') -# groupCQBoost.add_argument('--CQB_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for CQBoost', -# default=[0.001]) -# groupCQBoost.add_argument('--CQB_epsilon', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the epsilon parameter for CQBoost', -# default=[1e-06]) -# groupCQBoost.add_argument('--CQB_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the number of stumps for CQBoost', -# default=[1]) -# groupCQBoost.add_argument('--CQB_n_iter', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the maximum number of iteration in ' -# 'CQBoost', -# default=[None]) -# -# groupCQBoostv2 = parser.add_argument_group('CQBoostv2 arguments') -# groupCQBoostv2.add_argument('--CQB2_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for CQBoostv2', -# default=[0.002]) -# groupCQBoostv2.add_argument('--CQB2_epsilon', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the epsilon parameter for CQBoostv2', -# default=[1e-08]) -# -# groupCQBoostv21 = parser.add_argument_group('CQBoostv21 arguments') -# groupCQBoostv21.add_argument('--CQB21_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for CQBoostv2', -# default=[0.001]) -# groupCQBoostv21.add_argument('--CQB21_epsilon', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the epsilon parameter for CQBoostv2', -# default=[1e-08]) -# -# groupQarBoost = parser.add_argument_group('QarBoost arguments') -# groupQarBoost.add_argument('--QarB_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for QarBoost', -# default=[0.001]) -# groupQarBoost.add_argument('--QarB_epsilon', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the epsilon parameter for QarBoost', -# default=[1e-08]) -# -# groupCGreed = parser.add_argument_group('CGreed arguments') -# groupCGreed.add_argument('--CGR_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the n_stumps_per_attribute parameter ' -# 'for CGreed', -# default=[1]) -# groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the n_max_iterations parameter for ' -# 'CGreed', -# default=[100]) -# -# groupCGDesc = parser.add_argument_group('CGDesc arguments') -# groupCGDesc.add_argument('--CGD_stumps', nargs="+", metavar='INT', type=int, -# action='store', -# help='Set the n_stumps_per_attribute parameter ' -# 'for CGreed', -# default=[1]) -# groupCGDesc.add_argument('--CGD_n_iter', metavar='INT', type=int, -# action='store', nargs="+", -# help='Set the n_max_iterations parameter for ' -# 'CGreed', -# default=[10]) -# -# groupCBBoost= parser.add_argument_group('CBBoost arguments') -# groupCBBoost.add_argument('--CBB_stumps', nargs="+", metavar='INT', type=int, -# action='store', -# help='Set the n_stumps_per_attribute parameter ' -# 'for CBBoost', -# default=[1]) -# groupCBBoost.add_argument('--CBB_n_iter', metavar='INT', type=int, -# action='store', nargs="+", -# help='Set the n_max_iterations parameter for ' -# 'CBBoost', -# default=[100]) -# -# groupCGDescTree = parser.add_argument_group('CGDesc arguments') -# groupCGDescTree.add_argument('--CGDT_trees', metavar='INT', type=int, -# action='store', nargs="+", -# help='Set thenumber of trees for CGreed', -# default=[100]) -# groupCGDescTree.add_argument('--CGDT_n_iter', metavar='INT', type=int, -# action='store', nargs="+", -# help='Set the n_max_iterations parameter for ' -# 'CGreed', -# default=[100]) -# groupCGDescTree.add_argument('--CGDT_max_depth', metavar='INT', type=int, -# action='store', nargs="+", -# help='Set the n_max_iterations parameter for CGreed', -# default=[2]) -# -# groupMinCQGraalpyTree = parser.add_argument_group( -# 'MinCQGraalpyTree arguments') -# groupMinCQGraalpyTree.add_argument('--MCGT_mu', metavar='FLOAT', type=float, -# action='store', nargs="+", -# help='Set the mu_parameter for MinCQGraalpy', -# default=[0.05]) -# groupMinCQGraalpyTree.add_argument('--MCGT_trees', metavar='INT', type=int, -# action='store', nargs="+", -# help='Set the n trees parameter for MinCQGraalpy', -# default=[100]) -# groupMinCQGraalpyTree.add_argument('--MCGT_max_depth', metavar='INT', -# type=int,nargs="+", -# action='store', -# help='Set the n_stumps_per_attribute parameter for MinCQGraalpy', -# default=[2]) -# -# groupCQBoostTree = parser.add_argument_group('CQBoostTree arguments') -# groupCQBoostTree.add_argument('--CQBT_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for CQBoost', -# default=[0.001]) -# groupCQBoostTree.add_argument('--CQBT_epsilon', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the epsilon parameter for CQBoost', -# default=[1e-06]) -# groupCQBoostTree.add_argument('--CQBT_trees', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the number of trees for CQBoost', -# default=[100]) -# groupCQBoostTree.add_argument('--CQBT_max_depth', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the number of stumps for CQBoost', -# default=[2]) -# groupCQBoostTree.add_argument('--CQBT_n_iter', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the maximum number of iteration in CQBoostTree', -# default=[None]) -# -# groupSCMPregenTree = parser.add_argument_group('SCMPregenTree arguments') -# groupSCMPregenTree.add_argument('--SCPT_max_rules', metavar='INT', type=int, -# action='store',nargs="+", -# help='Max number of rules for SCM', -# default=[1]) -# groupSCMPregenTree.add_argument('--SCPT_p', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Max number of rules for SCM', -# default=[1.0]) -# groupSCMPregenTree.add_argument('--SCPT_model_type', metavar='STRING', -# action='store',nargs="+", -# help='Max number of rules for SCM', -# default=["conjunction"]) -# groupSCMPregenTree.add_argument('--SCPT_trees', metavar='INT', type=int, -# action='store',nargs="+", -# help='Number of stumps per attribute', -# default=[100]) -# groupSCMPregenTree.add_argument('--SCPT_max_depth', metavar='INT', type=int, -# action='store',nargs="+", -# help='Max_depth of the trees', -# default=[1]) -# -# groupSCMSparsityTree = parser.add_argument_group( -# 'SCMSparsityTree arguments') -# groupSCMSparsityTree.add_argument('--SCST_max_rules', metavar='INT', -# type=int,nargs="+", -# action='store', -# help='Max number of rules for SCM', -# default=[1]) -# groupSCMSparsityTree.add_argument('--SCST_p', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Max number of rules for SCM', -# default=[1.0]) -# groupSCMSparsityTree.add_argument('--SCST_model_type', metavar='STRING', -# action='store',nargs="+", -# help='Max number of rules for SCM', -# default=["conjunction"]) -# groupSCMSparsityTree.add_argument('--SCST_trees', metavar='INT', type=int, -# action='store',nargs="+", -# help='Number of stumps per attribute', -# default=[100]) -# groupSCMSparsityTree.add_argument('--SCST_max_depth', metavar='INT', -# type=int,nargs="+", -# action='store', -# help='Max_depth of the trees', -# default=[1]) -# -# groupAdaboostPregenTree = parser.add_argument_group( -# 'AdaboostPregenTrees arguments') -# groupAdaboostPregenTree.add_argument('--AdPT_n_est', metavar='INT', -# type=int,nargs="+", -# action='store', -# help='Number of estimators', -# default=[100]) -# groupAdaboostPregenTree.add_argument('--AdPT_b_est', metavar='STRING', -# action='store',nargs="+", -# help='Estimators', -# default=['DecisionTreeClassifier']) -# groupAdaboostPregenTree.add_argument('--AdPT_trees', metavar='INT', -# type=int,nargs="+", -# action='store', -# help='Number of trees in the pregenerated dataset', -# default=[100]) -# groupAdaboostPregenTree.add_argument('--AdPT_max_depth', metavar='INT', -# type=int,nargs="+", -# action='store', -# help='Number of stumps inthe pregenerated dataset', -# default=[3]) -# -# groupLasso = parser.add_argument_group('Lasso arguments') -# groupLasso.add_argument('--LA_n_iter', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the max_iter parameter for Lasso', -# default=[1]) -# groupLasso.add_argument('--LA_alpha', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the alpha parameter for Lasso', -# default=[1.0]) -# -# groupGradientBoosting = parser.add_argument_group( -# 'Gradient Boosting arguments') -# groupGradientBoosting.add_argument('--GB_n_est', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the n_estimators_parameter for Gradient Boosting', -# default=[100]) -# -# groupMinCQ = parser.add_argument_group('MinCQ arguments') -# groupMinCQ.add_argument('--MCQ_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu_parameter for MinCQ', -# default=[0.05]) -# groupMinCQ.add_argument('--MCQ_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the n_stumps_per_attribute parameter for MinCQ', -# default=[1]) -# -# groupMinCQGraalpy = parser.add_argument_group('MinCQGraalpy arguments') -# groupMinCQGraalpy.add_argument('--MCG_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu_parameter for MinCQGraalpy', -# default=[0.05]) -# groupMinCQGraalpy.add_argument('--MCG_stumps', metavar='INT', type=int, -# action='store',nargs="+", -# help='Set the n_stumps_per_attribute parameter for MinCQGraalpy', -# default=[1]) -# -# groupQarBoostv3 = parser.add_argument_group('QarBoostv3 arguments') -# groupQarBoostv3.add_argument('--QarB3_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for QarBoostv3', -# default=[0.001]) -# groupQarBoostv3.add_argument('--QarB3_epsilon', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the epsilon parameter for QarBoostv3', -# default=[1e-08]) -# -# groupQarBoostNC = parser.add_argument_group('QarBoostNC arguments') -# groupQarBoostNC.add_argument('--QarBNC_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for QarBoostNC', -# default=[0.001]) -# groupQarBoostNC.add_argument('--QarBNC_epsilon', metavar='FLOAT', -# type=float, action='store',nargs="+", -# help='Set the epsilon parameter for QarBoostNC', -# default=[1e-08]) -# -# groupQarBoostNC2 = parser.add_argument_group('QarBoostNC2 arguments') -# groupQarBoostNC2.add_argument('--QarBNC2_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for QarBoostNC2', -# default=[0.001]) -# groupQarBoostNC2.add_argument('--QarBNC2_epsilon', metavar='FLOAT', -# type=float, action='store',nargs="+", -# help='Set the epsilon parameter for QarBoostNC2', -# default=[1e-08]) -# -# groupQarBoostNC3 = parser.add_argument_group('QarBoostNC3 arguments') -# groupQarBoostNC3.add_argument('--QarBNC3_mu', metavar='FLOAT', type=float, -# action='store',nargs="+", -# help='Set the mu parameter for QarBoostNC3', -# default=[0.001]) -# groupQarBoostNC3.add_argument('--QarBNC3_epsilon', metavar='FLOAT', -# type=float, action='store',nargs="+", -# help='Set the epsilon parameter for QarBoostNC3', -# default=[1e-08]) -# -# # -# # multiview args -# # -# -# groupMumbo = parser.add_argument_group('Mumbo arguments') -# groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', -# nargs="+", -# help='Determine which monoview classifier to use with Mumbo', -# default=['']) -# groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', -# nargs='+', -# help='Configuration for the monoview classifier in Mumbo separate each classifier with sapce and each argument with:', -# default=['']) -# groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3, -# help='Max number of iteration, min number of iteration, convergence threshold', -# type=float, -# default=[10, 1, 0.01]) -# groupMumbo.add_argument('--MU_combination', action='store_true', -# help='Try all the monoview classifiers combinations for each view', -# default=False) -# -# groupFusion = parser.add_argument_group('fusion arguments') -# groupFusion.add_argument('--FU_types', metavar='STRING', action='store', -# nargs="+", -# help='Determine which type of fusion to use', -# default=['']) -# groupEarlyFusion = parser.add_argument_group('Early fusion arguments') -# groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', -# action='store', nargs="+", -# help='Determine which early fusion method of fusion to use', -# default=['']) -# groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', -# action='store', nargs='+', -# help='Configuration for the early fusion methods separate ' -# 'method by space and values by :', -# default=['']) -# groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', -# action='store', nargs='+', -# help='Configuration for the monoview classifiers used separate classifier by space ' -# 'and configs must be of form argument1_name:value,argument2_name:value', -# default=['']) -# groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', -# action='store', nargs='+', -# help='Name of the classifiers used for each early fusion method', -# default=['']) -# -# groupLateFusion = parser.add_argument_group('Late fusion arguments') -# groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', -# action='store', nargs="+", -# help='Determine which late fusion method of fusion to use', -# default=['']) -# groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', -# action='store', nargs='+', -# help='Configuration for the fusion method', -# default=['']) -# groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', -# action='store', nargs='+', -# help='Configuration for the monoview classifiers used', -# default=['']) -# groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', -# action='store', nargs="+", -# help='Names of the classifier used for late fusion', -# default=['']) -# groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', -# action='store', -# help='Determine which method to use to select the monoview classifiers', -# default="intersect") -# -# groupFatLateFusion = parser.add_argument_group('Fat Late fusion arguments') -# groupFatLateFusion.add_argument('--FLF_weights', metavar='FLOAT', -# action='store', nargs="+", -# help='Determine the weights of each monoview decision for FLF', -# type=float, -# default=[]) -# -# groupFatSCMLateFusion = parser.add_argument_group( -# 'Fat SCM Late fusion arguments') -# groupFatSCMLateFusion.add_argument('--FSCMLF_p', metavar='FLOAT', -# action='store', -# help='Determine the p argument of the SCM', -# type=float, -# default=0.5) -# groupFatSCMLateFusion.add_argument('--FSCMLF_max_attributes', metavar='INT', -# action='store', -# help='Determine the maximum number of aibutes used by the SCM', -# type=int, -# default=4) -# groupFatSCMLateFusion.add_argument('--FSCMLF_model', metavar='STRING', -# action='store', -# help='Determine the model type of the SCM', -# default="conjunction") -# -# groupDisagreeFusion = parser.add_argument_group( -# 'Disagreement based fusion arguments') -# groupDisagreeFusion.add_argument('--DGF_weights', metavar='FLOAT', -# action='store', nargs="+", -# help='Determine the weights of each monoview decision for DFG', -# type=float, -# default=[]) - args = parser.parse_args(arguments) return args -def initRandomState(randomStateArg, directory): +def init_random_state(random_state_arg, directory): r""" Used to init a random state. If no random state is specified, it will generate a 'random' seed. - If the `randomSateArg` is a string containing only numbers, it will be converted in an int to generate a seed. - If the `randomSateArg` is a string with letters, it must be a path to a pickled random state file that will be loaded. + If the `randomSateArg` is a string containing only numbers, it will be converted in + an int to generate a seed. + If the `randomSateArg` is a string with letters, it must be a path to a pickled random + state file that will be loaded. The function will also pickle the new random state in a file tobe able to retrieve it later. Tested Parameters ---------- - randomStateArg : None or string + random_state_arg : None or string See function description. directory : string Path to the results directory. Returns ------- - randomState : numpy.random.RandomState object + random_state : numpy.random.RandomState object This random state will be used all along the benchmark . """ - if randomStateArg is None: - randomState = np.random.RandomState(randomStateArg) + if random_state_arg is None: + random_state = np.random.RandomState(random_state_arg) else: try: - seed = int(randomStateArg) - randomState = np.random.RandomState(seed) + seed = int(random_state_arg) + random_state = np.random.RandomState(seed) except ValueError: - fileName = randomStateArg - with open(fileName, 'rb') as handle: - randomState = pickle.load(handle) + file_name = random_state_arg + with open(file_name, 'rb') as handle: + random_state = pickle.load(handle) with open(directory + "randomState.pickle", "wb") as handle: - pickle.dump(randomState, handle) - return randomState + pickle.dump(random_state, handle) + return random_state -def initStatsIterRandomStates(statsIter, randomState): +def init_stats_iter_random_states(stats_iter, random_state): r""" Used to initialize multiple random states if needed because of multiple statistical iteration of the same benchmark Parameters ---------- - statsIter : int + stats_iter : int Number of statistical iterations of the same benchmark done (with a different random state). - randomState : numpy.random.RandomState object + random_state : numpy.random.RandomState object The random state of the whole experimentation, that will be used to generate the ones for each statistical iteration. Returns ------- - statsIterRandomStates : list of numpy.random.RandomState objects + stats_iter_random_states : list of numpy.random.RandomState objects Multiple random states, one for each sattistical iteration of the same benchmark. """ - if statsIter > 1: - statsIterRandomStates = [ - np.random.RandomState(randomState.randint(5000)) for _ in - range(statsIter)] + if stats_iter > 1: + stats_iter_random_states = [ + np.random.RandomState(random_state.randint(5000)) for _ in + range(stats_iter)] else: - statsIterRandomStates = [randomState] - return statsIterRandomStates + stats_iter_random_states = [random_state] + return stats_iter_random_states -def getDatabaseFunction(name, type): +def get_database_function(name, type_var): r"""Used to get the right database extraction function according to the type of database and it's name Parameters ---------- name : string Name of the database. - type : string + type_var : string type of dataset hdf5 or csv Returns @@ -775,13 +109,13 @@ def getDatabaseFunction(name, type): The function that will be used to extract the database """ if name not in ["Fake", "Plausible"]: - getDatabase = getattr(DB, "getClassicDB" + type[1:]) + get_database = getattr(DB, "getClassicDB" + type_var[1:]) else: - getDatabase = getattr(DB, "get" + name + "DB" + type[1:]) - return getDatabase + get_database = getattr(DB, "get" + name + "DB" + type_var[1:]) + return get_database -def initLogFile(name, views, CL_type, log, debug, label, result_directory, add_noise, noise_std): +def init_log_file(name, views, cl_type, log, debug, label, result_directory, add_noise, noise_std): r"""Used to init the directory where the preds will be stored and the log file. First this function will check if the result directory already exists (only one per minute is allowed). @@ -794,50 +128,61 @@ def initLogFile(name, views, CL_type, log, debug, label, result_directory, add_n Name of the database. views : list of strings List of the view names that will be used in the benchmark. - CL_type : list of strings + cl_type : list of strings Type of benchmark that will be made . log : bool Whether to show the log file in console or hide it. + debug : bool + for debug option + label : str for label + + result_directory : str name of the result directory + + add_noise : bool for add noise + + noise_std : level of std noise Returns ------- - resultsDirectory : string + results_directory : string Reference to the main results directory for the benchmark. """ noise_string = "/n_"+str(int(noise_std*100)) if debug: - resultDirectory = result_directory + name + noise_string +"/debug_started_" + time.strftime( - "%Y_%m_%d-%H_%M_%S") + "_" + label + "/" + result_directory = result_directory + name + noise_string + \ + "/debug_started_" + \ + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "_" + label + "/" else: - resultDirectory = result_directory + name + noise_string+ "/started_" + time.strftime( + result_directory = result_directory + name + noise_string+ "/started_" + time.strftime( "%Y_%m_%d-%H_%M") + "_" + label + "/" - logFileName = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join( - CL_type) + "-" + "_".join( + log_file_name = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join( + cl_type) + "-" + "_".join( views) + "-" + name + "-LOG" - if os.path.exists(os.path.dirname(resultDirectory)): + if os.path.exists(os.path.dirname(result_directory)): raise NameError("The result dir already exists, wait 1 min and retry") - os.makedirs(os.path.dirname(resultDirectory + logFileName)) - logFile = resultDirectory + logFileName - logFile += ".log" + os.makedirs(os.path.dirname(result_directory + log_file_name)) + log_file = result_directory + log_file_name + log_file += ".log" logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', - filename=logFile, level=logging.DEBUG, + filename=log_file, level=logging.DEBUG, filemode='w') if log: logging.getLogger().addHandler(logging.StreamHandler()) - return resultDirectory + return result_directory -def genSplits(labels, splitRatio, statsIterRandomStates): - r"""Used to gen the train/test splits using one or multiple random states. +def gen_splits(labels, split_ratio, stats_iter_random_states): + r"""Used to _gen the train/test splits using one or multiple random states. Parameters ---------- labels : numpy.ndarray Name of the database. - splitRatio : float + split_ratio : float The ratio of examples between train and test set. - statsIterRandomStates : list of numpy.random.RandomState + stats_iter_random_states : list of numpy.random.RandomState The random states for each statistical iteration. Returns @@ -848,99 +193,100 @@ def genSplits(labels, splitRatio, statsIterRandomStates): """ indices = np.arange(len(labels)) splits = [] - for randomState in statsIterRandomStates: - foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, - random_state=randomState, - test_size=splitRatio) - folds = foldsObj.split(indices, labels) + for random_state in stats_iter_random_states: + folds_obj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, + random_state=random_state, + test_size=split_ratio) + folds = folds_obj.split(indices, labels) for fold in folds: train_fold, test_fold = fold - trainIndices = indices[train_fold] - testIndices = indices[test_fold] - splits.append([trainIndices, testIndices]) + train_indices = indices[train_fold] + test_indices = indices[test_fold] + splits.append([train_indices, test_indices]) return splits -def genKFolds(statsIter, nbFolds, statsIterRandomStates): +def gen_k_folds(stats_iter, nb_folds, stats_iter_random_states): r"""Used to generate folds indices for cross validation for each statistical iteration. Parameters ---------- - statsIter : integer + stats_iter : integer Number of statistical iterations of the benchmark. - nbFolds : integer + nb_folds : integer The number of cross-validation folds for the benchmark. - statsIterRandomStates : list of numpy.random.RandomState + stats_iter_random_states : list of numpy.random.RandomState The random states for each statistical iteration. Returns ------- - foldsList : list of list of sklearn.model_selection.StratifiedKFold + folds_list : list of list of sklearn.model_selection.StratifiedKFold For each statistical iteration a Kfold stratified (keeping the ratio between classes in each fold). """ - if statsIter > 1: - foldsList = [] - for randomState in statsIterRandomStates: - foldsList.append( - sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, - random_state=randomState)) + if stats_iter > 1: + folds_list = [] + for random_state in stats_iter_random_states: + folds_list.append( + sklearn.model_selection.StratifiedKFold(n_splits=nb_folds, + random_state=random_state)) else: - foldsList = [sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, - random_state=statsIterRandomStates)] - return foldsList + folds_list = [sklearn.model_selection.StratifiedKFold(n_splits=nb_folds, + random_state=stats_iter_random_states)] + return folds_list -def initViews(DATASET, argViews): - r"""Used to return the views names that will be used by the benchmark, their indices and all the views names. +def init_views(dataset, arg_views): + r"""Used to return the views names that will be used by the + benchmark, their indices and all the views names. Parameters ---------- - DATASET : HDF5 dataset file + datset : HDF5 dataset file The full dataset that wil be used by the benchmark. - argViews : list of strings + arg_views : list of strings The views that will be used by the benchmark (arg). Returns ------- views : list of strings Names of the views that will be used by the benchmark. - viewIndices : list of ints + view_indices : list of ints The list of the indices of the view that will be used in the benchmark (according to the dataset). - allViews : list of strings + all_views : list of strings Names of all the available views in the dataset. """ - NB_VIEW = DATASET.get("Metadata").attrs["nbView"] - if argViews != ["all"]: - allowedViews = argViews - allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) + nb_view = dataset.get("Metadata").attrs["nbView"] + if arg_views != ["all"]: + allowed_views = arg_views + all_views = [str(dataset.get("View" + str(view_index)).attrs["name"]) if type( - DATASET.get("View" + str(viewIndex)).attrs["name"]) != bytes - else DATASET.get("View" + str(viewIndex)).attrs[ + dataset.get("View" + str(view_index)).attrs["name"]) != bytes + else dataset.get("View" + str(view_index)).attrs[ "name"].decode("utf-8") - for viewIndex in range(NB_VIEW)] + for view_index in range(nb_view)] views = [] - viewsIndices = [] - for viewIndex in range(NB_VIEW): - viewName = DATASET.get("View" + str(viewIndex)).attrs["name"] - if type(viewName) == bytes: - viewName = viewName.decode("utf-8") - if viewName in allowedViews: - views.append(viewName) - viewsIndices.append(viewIndex) + views_indices = [] + for view_index in range(nb_view): + view_name = dataset.get("View" + str(view_index)).attrs["name"] + if type(view_name) == bytes: + view_name = view_name.decode("utf-8") + if view_name in allowed_views: + views.append(view_name) + views_indices.append(view_index) else: - views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) + views = [str(dataset.get("View" + str(viewIndex)).attrs["name"]) if type( - DATASET.get("View" + str(viewIndex)).attrs["name"]) != bytes - else DATASET.get("View" + str(viewIndex)).attrs["name"].decode( + dataset.get("View" + str(viewIndex)).attrs["name"]) != bytes + else dataset.get("View" + str(viewIndex)).attrs["name"].decode( "utf-8") - for viewIndex in range(NB_VIEW)] - viewsIndices = range(NB_VIEW) - allViews = views - return views, viewsIndices, allViews + for viewIndex in range(nb_view)] + views_indices = range(nb_view) + all_views = views + return views, views_indices, all_views -def genDirecortiesNames(directory, statsIter): +def gen_direcorties_names(directory, statsIter): r"""Used to generate the different directories of each iteration if needed. Parameters @@ -977,12 +323,13 @@ def find_dataset_names(path, type, names): else: return names -def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, - labelsCombinations, indicesMulticlass, - hyperParamSearch, args, kFolds, - statsIterRandomStates, metrics, - argumentDictionaries, - benchmark, nbViews, views, viewsIndices): + +def gen_argument_dictionaries(labels_dictionary, directories, multiclass_labels, + labels_combinations, indices_multiclass, + hyper_param_search, args, k_folds, + stats_iter_random_states, metrics, + argument_dictionaries, + benchmark, nb_views, views, views_indices): r"""Used to generate a dictionary for each benchmark. One for each label combination (if multiclass), for each statistical iteration, generates an dictionary with @@ -990,37 +337,37 @@ def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, Parameters ---------- - labelsDictionary : dictionary + labels_dictionary : dictionary Dictionary mapping labels indices to labels names. directories : list of strings List of the paths to the result directories for each statistical iteration. - multiclassLabels : list of lists of numpy.ndarray + multiclass_labels : list of lists of numpy.ndarray For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the indices for the biclass training set, the ones for the biclass testing set and the ones for the multiclass testing set. - labelsCombinations : list of lists of numpy.ndarray + labels_combinations : list of lists of numpy.ndarray Each original couple of different labels. - indicesMulticlass : list of lists of numpy.ndarray + indices_multiclass : list of lists of numpy.ndarray For each combination, contains a biclass labels numpy.ndarray with the 0/1 labels of combination. - hyperParamSearch : string + hyper_param_search : string Type of hyper parameter optimization method args : parsed args objects All the args passed by the user. - kFolds : list of list of sklearn.model_selection.StratifiedKFold + k_folds : list of list of sklearn.model_selection.StratifiedKFold For each statistical iteration a Kfold stratified (keeping the ratio between classes in each fold). - statsIterRandomStates : list of numpy.random.RandomState objects + stats_iter_random_states : list of numpy.random.RandomState objects Multiple random states, one for each sattistical iteration of the same benchmark. metrics : list of lists metrics that will be used to evaluate the algorithms performance. - argumentDictionaries : dictionary + argument_dictionaries : dictionary Dictionary resuming all the specific arguments for the benchmark, oe dictionary for each classifier. benchmark : dictionary Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. - nbViews : int + nb_views : int THe number of views used by the benchmark. views : list of strings List of the names of the used views. - viewsIndices : list of ints + views_indices : list of ints List of indices (according to the dataset) of the used views. Returns @@ -1029,31 +376,31 @@ def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, All the needed arguments for the benchmarks. """ - benchmarkArgumentDictionaries = [] - for combinationIndex, labelsCombination in enumerate(labelsCombinations): - for iterIndex, iterRandomState in enumerate(statsIterRandomStates): - benchmarkArgumentDictionary = { - "LABELS_DICTIONARY": {0: labelsDictionary[labelsCombination[0]], - 1: labelsDictionary[ - labelsCombination[1]]}, - "directory": directories[iterIndex] + - labelsDictionary[labelsCombination[0]] + + benchmark_argument_dictionaries = [] + for combination_index, labels_combination in enumerate(labels_combinations): + for iter_index, iterRandomState in enumerate(stats_iter_random_states): + benchmark_argument_dictionary = { + "LABELS_DICTIONARY": {0: labels_dictionary[labels_combination[0]], + 1: labels_dictionary[ + labels_combination[1]]}, + "directory": directories[iter_index] + + labels_dictionary[labels_combination[0]] + "-vs-" + - labelsDictionary[labelsCombination[1]] + "/", + labels_dictionary[labels_combination[1]] + "/", "classificationIndices": [ - indicesMulticlass[combinationIndex][0][iterIndex], - indicesMulticlass[combinationIndex][1][iterIndex], - indicesMulticlass[combinationIndex][2][iterIndex]], + indices_multiclass[combination_index][0][iter_index], + indices_multiclass[combination_index][1][iter_index], + indices_multiclass[combination_index][2][iter_index]], "args": args, - "labels": multiclassLabels[combinationIndex], - "kFolds": kFolds[iterIndex], + "labels": multiclass_labels[combination_index], + "kFolds": k_folds[iter_index], "randomState": iterRandomState, - "hyperParamSearch": hyperParamSearch, + "hyperParamSearch": hyper_param_search, "metrics": metrics, - "argumentDictionaries": argumentDictionaries, + "argumentDictionaries": argument_dictionaries, "benchmark": benchmark, "views": views, - "viewsIndices": viewsIndices, - "flag": [iterIndex, labelsCombination]} - benchmarkArgumentDictionaries.append(benchmarkArgumentDictionary) - return benchmarkArgumentDictionaries + "viewsIndices": views_indices, + "flag": [iter_index, labels_combination]} + benchmark_argument_dictionaries.append(benchmark_argument_dictionary) + return benchmark_argument_dictionaries diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 19966ac08ac3a36ff6bbbef537a5435bc846131c..0d86492787e2e290fc5d351ec446ada1c49c1049 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -250,186 +250,186 @@ class DatasetError(Exception): Exception.__init__(self, *args, **kwargs) -def getClasses(labels): - labelsSet = set(list(labels)) - nbLabels = len(labelsSet) - if nbLabels >= 2: - return labelsSet +def get_classes(labels): + labels_set = set(list(labels)) + nb_labels = len(labels_set) + if nb_labels >= 2: + return labels_set else: raise DatasetError("Dataset must have at least two different labels") -def allAskedLabelsAreAvailable(askedLabelsNamesSet, availableLabelsNames): - for askedLabelName in askedLabelsNamesSet: - if askedLabelName in availableLabelsNames: +def all_asked_labels_are_available(asked_labels_names_set, available_labels_names): + for askedLabelName in asked_labels_names_set: + if askedLabelName in available_labels_names: pass else: return False return True -def fillLabelNames(NB_CLASS, askedLabelsNames, randomState, - availableLabelsNames): - if len(askedLabelsNames) < NB_CLASS: - nbLabelsToAdd = NB_CLASS - len(askedLabelsNames) - labelsNamesToChoose = [availableLabelName for availableLabelName in - availableLabelsNames - if availableLabelName not in askedLabelsNames] - addedLabelsNames = randomState.choice(labelsNamesToChoose, - nbLabelsToAdd, replace=False) - askedLabelsNames = list(askedLabelsNames) + list(addedLabelsNames) - askedLabelsNamesSet = set(askedLabelsNames) +def fill_label_names(nb_class, asked_labels_names, random_state, + available_labels_names): + if len(asked_labels_names) < nb_class: + nb_labels_to_add = nb_class - len(asked_labels_names) + labels_names_to_choose = [available_label_name for available_label_name in + available_labels_names + if available_label_name not in asked_labels_names] + added_labels_names = random_state.choice(labels_names_to_choose, + nb_labels_to_add, replace=False) + asked_labels_names = list(asked_labels_names) + list(added_labels_names) + asked_labels_names_set = set(asked_labels_names) - elif len(askedLabelsNames) > NB_CLASS: - askedLabelsNames = list( - randomState.choice(askedLabelsNames, NB_CLASS, replace=False)) - askedLabelsNamesSet = set(askedLabelsNames) + elif len(asked_labels_names) > nb_class: + asked_labels_names = list( + random_state.choice(asked_labels_names, nb_class, replace=False)) + asked_labels_names_set = set(asked_labels_names) else: - askedLabelsNamesSet = set(askedLabelsNames) - - return askedLabelsNames, askedLabelsNamesSet - - -def getAllLabels(fullLabels, availableLabelsNames): - newLabels = fullLabels - newLabelsNames = availableLabelsNames - usedIndices = np.arange(len(fullLabels)) - return newLabels, newLabelsNames, usedIndices - - -def selectAskedLabels(askedLabelsNamesSet, availableLabelsNames, - askedLabelsNames, fullLabels): - if allAskedLabelsAreAvailable(askedLabelsNamesSet, availableLabelsNames): - usedLabels = [availableLabelsNames.index(askedLabelName) for - askedLabelName in askedLabelsNames] - usedIndices = np.array( - [labelIndex for labelIndex, label in enumerate(fullLabels) if - label in usedLabels]) - newLabels = np.array([usedLabels.index(label) for label in fullLabels if - label in usedLabels]) - newLabelsNames = [availableLabelsNames[usedLabel] for usedLabel in - usedLabels] - return newLabels, newLabelsNames, usedIndices + asked_labels_names_set = set(asked_labels_names) + + return asked_labels_names, asked_labels_names_set + + +def get_all_labels(full_labels, available_labels_names): + new_labels = full_labels + new_labels_names = available_labels_names + used_indices = np.arange(len(full_labels)) + return new_labels, new_labels_names, used_indices + + +def select_asked_labels(asked_labels_names_set, available_labels_names, + asked_labels_names, full_labels): + if all_asked_labels_are_available(asked_labels_names_set, available_labels_names): + used_labels = [available_labels_names.index(asked_label_name) for + asked_label_name in asked_labels_names] + used_indices = np.array( + [labelIndex for labelIndex, label in enumerate(full_labels) if + label in used_labels]) + new_labels = np.array([used_labels.index(label) for label in full_labels if + label in used_labels]) + new_labels_names = [available_labels_names[usedLabel] for usedLabel in + used_labels] + return new_labels, new_labels_names, used_indices else: raise DatasetError("Asked labels are not all available in the dataset") -def filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, - availableLabelsNames, askedLabelsNames): - if len(labelsSet) > 2: - if askedLabelsNames == availableLabelsNames: - newLabels, newLabelsNames, usedIndices = getAllLabels(fullLabels, - availableLabelsNames) - elif len(askedLabelsNamesSet) <= len(labelsSet): - newLabels, newLabelsNames, usedIndices = selectAskedLabels( - askedLabelsNamesSet, availableLabelsNames, - askedLabelsNames, fullLabels) +def filter_labels(labels_set, asked_labels_names_set, full_labels, + available_labels_names, asked_labels_names): + if len(labels_set) > 2: + if asked_labels_names == available_labels_names: + new_labels, new_labels_names, used_indices = \ + get_all_labels(full_labels, available_labels_names) + elif len(asked_labels_names_set) <= len(labels_set): + new_labels, new_labels_names, used_indices = select_asked_labels( + asked_labels_names_set, available_labels_names, + asked_labels_names, full_labels) else: raise DatasetError( "Asked more labels than available in the dataset. Available labels are : " + - ", ".join(availableLabelsNames)) + ", ".join(available_labels_names)) else: - newLabels, newLabelsNames, usedIndices = getAllLabels(fullLabels, - availableLabelsNames) - return newLabels, newLabelsNames, usedIndices + new_labels, new_labels_names, used_indices = get_all_labels(full_labels, + available_labels_names) + return new_labels, new_labels_names, used_indices -def filterViews(datasetFile, temp_dataset, views, usedIndices): - newViewIndex = 0 +def filter_views(dataset_file, temp_dataset, views, used_indices): + new_view_index = 0 if views == [""]: - for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]): - copyhdf5Dataset(datasetFile, temp_dataset, "View" + str(viewIndex), - "View" + str(viewIndex), usedIndices) + for view_index in range(dataset_file.get("Metadata").attrs["nbView"]): + copyhdf5_dataset(dataset_file, temp_dataset, "View" + str(view_index), + "View" + str(view_index), used_indices) else: - for askedViewName in views: - for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]): - viewName = datasetFile.get("View" + str(viewIndex)).attrs["name"] - if type(viewName) == bytes: - viewName = viewName.decode("utf-8") - if viewName == askedViewName: - copyhdf5Dataset(datasetFile, temp_dataset, - "View" + str(viewIndex), - "View" + str(newViewIndex), usedIndices) - newViewName = \ - temp_dataset.get("View" + str(newViewIndex)).attrs["name"] - if type(newViewName) == bytes: - temp_dataset.get("View" + str(newViewIndex)).attrs[ - "name"] = newViewName.decode("utf-8") - - newViewIndex += 1 + for asked_view_name in views: + for view_index in range(dataset_file.get("Metadata").attrs["nbView"]): + view_name = dataset_file.get("View" + str(view_index)).attrs["name"] + if type(view_name) == bytes: + view_name = view_name.decode("utf-8") + if view_name == asked_view_name: + copyhdf5_dataset(dataset_file, temp_dataset, + "View" + str(view_index), + "View" + str(new_view_index), used_indices) + new_view_name = \ + temp_dataset.get("View" + str(new_view_index)).attrs["name"] + if type(new_view_name) == bytes: + temp_dataset.get("View" + str(new_view_index)).attrs[ + "name"] = new_view_name.decode("utf-8") + + new_view_index += 1 else: pass temp_dataset.get("Metadata").attrs["nbView"] = len(views) -def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, - destinationDatasetName, usedIndices): +def copyhdf5_dataset(source_data_file, destination_data_file, source_dataset_name, + destination_dataset_name, used_indices): """Used to copy a view in a new dataset file using only the examples of usedIndices, and copying the args""" - newDset = destinationDataFile.create_dataset(destinationDatasetName, - data=sourceDataFile.get( - sourceDatasetName).value[ - usedIndices, :]) - if "sparse" in sourceDataFile.get(sourceDatasetName).attrs.keys() and \ - sourceDataFile.get(sourceDatasetName).attrs["sparse"]: + new_d_set = destination_data_file.create_dataset(destination_dataset_name, + data=source_data_file.get( + source_dataset_name).value[ + used_indices, :]) + if "sparse" in source_data_file.get(source_dataset_name).attrs.keys() and \ + source_data_file.get(source_dataset_name).attrs["sparse"]: # TODO : Support sparse pass else: - for key, value in sourceDataFile.get(sourceDatasetName).attrs.items(): - newDset.attrs[key] = value + for key, value in source_data_file.get(source_dataset_name).attrs.items(): + new_d_set.attrs[key] = value -def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, - randomState, full=False, add_noise=False, noise_std=0.15,): +def get_classicDBhdf5(views, path_f, name_DB, nb_class, asked_labels_names, + random_state, full=False, add_noise=False, noise_std=0.15,): """Used to load a hdf5 database""" if full: - datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") - dataset_name = nameDB - labelsDictionary = dict( - (labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in - enumerate(datasetFile.get("Labels").attrs["names"])) + dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + dataset_name = name_DB + labels_dictionary = dict( + (label_index, label_name.decode("utf-8")) for label_index, label_name in + enumerate(dataset_file.get("Labels").attrs["names"])) else: - askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in - askedLabelsNames] - baseDatasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") - fullLabels = baseDatasetFile.get("Labels").value - datasetFile = h5py.File(pathF + nameDB + "_temp_view_label_select.hdf5", + asked_labels_names = [asked_label_name.encode("utf8") for asked_label_name in + asked_labels_names] + base_dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + full_labels = base_dataset_file.get("Labels").value + dataset_file = h5py.File(path_f + name_DB + "_temp_view_label_select.hdf5", "w") - dataset_name = nameDB + "_temp_view_label_select" - baseDatasetFile.copy("Metadata", datasetFile) - labelsSet = getClasses(fullLabels) - availableLabelsNames = list( - baseDatasetFile.get("Labels").attrs["names"]) - askedLabelsNames, askedLabelsNamesSet = fillLabelNames(NB_CLASS, - askedLabelsNames, - randomState, - availableLabelsNames) - - newLabels, newLabelsNames, usedIndices = filterLabels(labelsSet, - askedLabelsNamesSet, - fullLabels, - availableLabelsNames, - askedLabelsNames) - datasetFile.get("Metadata").attrs["datasetLength"] = len(usedIndices) - datasetFile.get("Metadata").attrs["nbClass"] = NB_CLASS - datasetFile.create_dataset("Labels", data=newLabels) - datasetFile.get("Labels").attrs["names"] = newLabelsNames - filterViews(baseDatasetFile, datasetFile, views, usedIndices) - - labelsDictionary = dict( + dataset_name = name_DB + "_temp_view_label_select" + base_dataset_file.copy("Metadata", dataset_file) + labels_set = get_classes(full_labels) + available_labels_names = list( + base_dataset_file.get("Labels").attrs["names"]) + asked_labels_names, asked_labels_names_set = fill_label_names(nb_class, + asked_labels_names, + random_state, + available_labels_names) + + new_labels, new_labels_names, used_indices = filter_labels(labels_set, + asked_labels_names_set, + full_labels, + available_labels_names, + asked_labels_names) + dataset_file.get("Metadata").attrs["datasetLength"] = len(used_indices) + dataset_file.get("Metadata").attrs["nbClass"] = nb_class + dataset_file.create_dataset("Labels", data=new_labels) + dataset_file.get("Labels").attrs["names"] = new_labels_names + filter_views(base_dataset_file, dataset_file, views, used_indices) + + labels_dictionary = dict( (labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in - enumerate(datasetFile.get("Labels").attrs["names"])) - datasetFile.close() - datasetFile = h5py.File(pathF + nameDB + "_temp_view_label_select.hdf5", + enumerate(dataset_file.get("Labels").attrs["names"])) + dataset_file.close() + dataset_file = h5py.File(path_f + name_DB + "_temp_view_label_select.hdf5", "r") if add_noise: - datasetFile, dataset_name = add_gaussian_noise(datasetFile, randomState, - pathF, dataset_name, - noise_std) + dataset_file, dataset_name = add_gaussian_noise(dataset_file, random_state, + path_f, dataset_name, + noise_std) else: pass - return datasetFile, labelsDictionary, dataset_name + return dataset_file, labels_dictionary, dataset_name def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 6bbf2eb4acb77a67d069bcbb25fa86710ea640d6..bb1c8ddffda8f9031c6d1b09268a3cc613e2eb4e 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -32,8 +32,8 @@ def searchBestSettings(dataset, labels, classifier_module, classifier_name, return bestSettings # or well set clasifier ? -def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, - **kwargs): +def grid_search(dataset, classifier_name, views_indices=None, k_folds=None, n_iter=1, + **kwargs): """Used to perfom gridsearch on the classifiers""" pass @@ -82,9 +82,9 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None) estimator.fit(X, y, available_indices[train_indices]) test_folds_prediction.append( estimator.predict(X, available_indices[test_indices])) - minFoldLength = fold_lengths.min() + min_fold_length = fold_lengths.min() test_folds_prediction = np.array( - [test_fold_prediction[:minFoldLength] for test_fold_prediction in + [test_fold_prediction[:min_fold_length] for test_fold_prediction in test_folds_prediction]) return test_folds_prediction @@ -93,16 +93,16 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None): estimator = getattr(classifier_module, classifier_name)(random_state, - **classifier_kwargs) + **classifier_kwargs) params_dict = estimator.genDistribs() if params_dict: - metricModule = getattr(metrics, metric[0]) + metric_module = getattr(metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in + metric_kargs = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: - metricKWARGS = {} - scorer = metricModule.get_scorer(**metricKWARGS) + metric_kargs = {} + scorer = metric_module.get_scorer(**metric_kargs) nb_possible_combinations = compute_possible_combinations(params_dict) min_list = np.array( [min(nb_possible_combination, n_iter) for nb_possible_combination in @@ -121,13 +121,9 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie if "random_state" in best_params: best_params.pop("random_state") - # bestParams = dict((key, value) for key, value in - # estimator.genBestParams(detector).items() if - # key is not "random_state") - scoresArray = random_search.cv_results_['mean_test_score'] params = [(key[6:], value ) for key, value in random_search.cv_results_.items() if key.startswith("param_")] - # genHeatMaps(params, scoresArray, output_file_name) + # gen_heat_maps(params, scores_array, output_file_name) best_estimator = random_search.best_estimator_ else: best_estimator = estimator @@ -213,131 +209,128 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): if self.framework =="multiview": estimator.fit(X, y, self.available_indices[train_indices]) test_folds_prediction.append(estimator.predict(X, self.available_indices[test_indices])) - minFoldLength = fold_lengths.min() + min_fold_length = fold_lengths.min() test_folds_prediction = np.array( - [test_fold_prediction[:minFoldLength] for test_fold_prediction in test_folds_prediction]) + [test_fold_prediction[:min_fold_length] for test_fold_prediction in test_folds_prediction]) return test_folds_prediction - - - -def randomizedSearch(dataset, labels, classifierPackage, classifierName, - metrics_list, learningIndices, KFolds, randomState, - viewsIndices=None, nIter=1, - nbCores=1, **classificationKWARGS): +def randomizedSearch(dataset, labels, classifier_package, classifier_name, + metrics_list, learning_indices, k_folds, random_state, + views_indices=None, n_iter=1, + nb_cores=1, **classification_kargs): """Used to perform a random search on the classifiers to optimize hyper parameters""" - if viewsIndices is None: - viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) + if views_indices is None: + views_indices = range(dataset.get("Metadata").attrs["nbView"]) metric = metrics_list[0] - metricModule = getattr(metrics, metric[0]) + metric_module = getattr(metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in + metric_kargs = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: - metricKWARGS = {} - classifierModule = getattr(classifierPackage, classifierName + "Module") - classifierClass = getattr(classifierModule, classifierName + "Class") - if classifierName != "Mumbo": - paramsSets = classifierModule.genParamsSets(classificationKWARGS, - randomState, nIter=nIter) - if metricModule.getConfig()[-14] == "h": - baseScore = -1000.0 - isBetter = "higher" + metric_kargs = {} + classifier_module = getattr(classifier_package, classifier_name + "Module") + classifier_class = getattr(classifier_module, classifier_name + "Class") + if classifier_name != "Mumbo": + params_sets = classifier_module.gen_params_sets(classification_kargs, + random_state, n_iter=n_iter) + if metric_module.getConfig()[-14] == "h": + base_score = -1000.0 + is_better = "higher" else: - baseScore = 1000.0 - isBetter = "lower" - bestSettings = None - kFolds = KFolds.split(learningIndices, labels[learningIndices]) - for paramsSet in paramsSets: + base_score = 1000.0 + is_better = "lower" + best_settings = None + kk_folds = k_folds.split(learning_indices, labels[learning_indices]) + for params_set in params_sets: scores = [] - for trainIndices, testIndices in kFolds: - classifier = classifierClass(randomState, NB_CORES=nbCores, - **classificationKWARGS) - classifier.setParams(paramsSet) + for trainIndices, testIndices in kk_folds: + classifier = classifier_class(random_state, nb_scors=nb_cores, + **classification_kargs) + classifier.setParams(params_set) classifier.fit_hdf5(dataset, labels, - trainIndices=learningIndices[trainIndices], - viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(dataset, usedIndices= - learningIndices[testIndices], - viewsIndices=viewsIndices) - testScore = metricModule.score( - labels[learningIndices[testIndices]], testLabels) - scores.append(testScore) - crossValScore = np.mean(np.array(scores)) - - if isBetter == "higher" and crossValScore > baseScore: - baseScore = crossValScore - bestSettings = paramsSet - elif isBetter == "lower" and crossValScore < baseScore: - baseScore = crossValScore - bestSettings = paramsSet - classifier = classifierClass(randomState, NB_CORES=nbCores, - **classificationKWARGS) - classifier.setParams(bestSettings) + trainIndices=learning_indices[trainIndices], + viewsIndices=views_indices) + test_labels = classifier.predict_hdf5(dataset, + used_indices=learning_indices[testIndices], + views_indices=views_indices) + test_score = metric_module.score( + labels[learning_indices[testIndices]], test_labels) + scores.append(test_score) + cross_val_score = np.mean(np.array(scores)) + + if is_better == "higher" and cross_val_score > base_score: + base_score = cross_val_score + best_settings = params_set + elif is_better == "lower" and cross_val_score < base_score: + base_score = cross_val_score + best_settings = params_set + classifier = classifier_class(random_state, nb_cores=nb_cores, + **classification_kargs) + classifier.setParams(best_settings) # TODO : This must be corrected else: - bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, labels, - viewsIndices, - classificationKWARGS, - learningIndices, - randomState, - metric=metric, - nIter=nIter) - classificationKWARGS["classifiersConfigs"] = bestConfigs - classifier = classifierClass(randomState, NB_CORES=nbCores, - **classificationKWARGS) + best_configs, _ = classifier_module.grid_search_hdf5(dataset, labels, + views_indices, + classification_kargs, + learning_indices, + random_state, + metric=metric, + nI_iter=n_iter) + classification_kargs["classifiersConfigs"] = best_configs + classifier = classifier_class(random_state, nb_cores=nb_cores, + **classification_kargs) return classifier -def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, - **kwargs): +def spear_mint(dataset, classifier_name, views_indices=None, k_folds=None, n_iter=1, + **kwargs): """Used to perform spearmint on the classifiers to optimize hyper parameters, longer than randomsearch (can't be parallelized)""" pass -def genHeatMaps(params, scoresArray, outputFileName): +def gen_heat_maps(params, scores_array, output_file_name): """Used to generate a heat map for each doublet of hyperparms optimized on the previous function""" - nbParams = len(params) - if nbParams > 2: - combinations = itertools.combinations(range(nbParams), 2) - elif nbParams == 2: + nb_params = len(params) + if nb_params > 2: + combinations = itertools.combinations(range(nb_params), 2) + elif nb_params == 2: combinations = [(0, 1)] else: combinations = [()] for combination in combinations: if combination: - paramName1, paramArray1 = params[combination[0]] - paramName2, paramArray2 = params[combination[1]] + param_name1, param_array1 = params[combination[0]] + param_name2, param_array2 = params[combination[1]] else: - paramName1, paramArray1 = params[0] - paramName2, paramArray2 = ("Control", np.array([0])) + param_name1, param_array1 = params[0] + param_name2, param_array2 = ("Control", np.array([0])) - paramArray1Set = np.sort(np.array(list(set(paramArray1)))) - paramArray2Set = np.sort(np.array(list(set(paramArray2)))) + param_array1_set = np.sort(np.array(list(set(param_array1)))) + param_array2_set = np.sort(np.array(list(set(param_array2)))) - scoresMatrix = np.zeros( - (len(paramArray2Set), len(paramArray1Set))) - 0.1 - for param1, param2, score in zip(paramArray1, paramArray2, scoresArray): - param1Index, = np.where(paramArray1Set == param1) - param2Index, = np.where(paramArray2Set == param2) - scoresMatrix[int(param2Index), int(param1Index)] = score + scores_matrix = np.zeros( + (len(param_array2_set), len(param_array1_set))) - 0.1 + for param1, param2, score in zip(param_array1, param_array2, scores_array): + param1_index, = np.where(param_array1_set == param1) + param2_index, = np.where(param_array2_set == param2) + scores_matrix[int(param2_index), int(param1_index)] = score plt.figure(figsize=(8, 6)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) - plt.imshow(scoresMatrix, interpolation='nearest', cmap=plt.cm.hot, + plt.imshow(scores_matrix, interpolation='nearest', cmap=plt.cm.hot, ) - plt.xlabel(paramName1) - plt.ylabel(paramName2) + plt.xlabel(param_name1) + plt.ylabel(param_name2) plt.colorbar() - plt.xticks(np.arange(len(paramArray1Set)), paramArray1Set) - plt.yticks(np.arange(len(paramArray2Set)), paramArray2Set, rotation=45) + plt.xticks(np.arange(len(param_array1_set)), param_array1_set) + plt.yticks(np.arange(len(param_array2_set)), param_array2_set, rotation=45) plt.title('Validation metric') plt.savefig( - outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png", transparent=True) + output_file_name + "heat_map-" + param_name1 + "-" + param_name2 + ".png", transparent=True) plt.close() # nohup python ~/dev/git/spearmint/spearmint/main.py . & diff --git a/multiview_platform/mono_multi_view_classifiers/utils/make_file_config.py b/multiview_platform/mono_multi_view_classifiers/utils/make_file_config.py index 121e1f869c321aabd6a287632c7401c67210257f..ac53853a52f0338b3bbf1e289a2582f5f4d2707a 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/make_file_config.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/make_file_config.py @@ -4,39 +4,20 @@ import os, sys, inspect import importlib -classifier_dict = {"0": ['mono', 'Adaboost', - 'multiview_platform.mono_multi_view_classifiers.monoview_classifiers.adaboost']} -val = classifier_dict["0"] -mymodule = importlib.import_module(val[2]) - -for name in dir(mymodule): - att = getattr(mymodule, name) - try: - getattr(att, "__module__") - if att.__module__.startswith(mymodule.__name__): - if inspect.isclass(att): - print(att) - print(name) - except Exception: - pass - - -parameter = {"0":[]} -instring = "multiview_platform/mono_multi_view_classifiers/monoview_classifiers/" - -if instring in mymodule.__file__: - monInstance = getattr(mymodule, 'Adaboost') - sig = inspect.signature(monInstance.__init__) - for arg_idx, name in enumerate(sig.parameters): - param= sig.parameters[name] - if not name.startswith('self'): - parameter["0"].append(name) - - if param.default is not inspect.Parameter.empty: - value_default = param.default - else: - value_default = 'None' - print() +# +# if instring in mymodule.__file__: +# +# sig = inspect.signature(monInstance.__init__) +# for arg_idx, name in enumerate(sig.parameters): +# param= sig.parameters[name] +# if not name.startswith('self'): +# parameter["0"].append(name) +# +# if param.default is not inspect.Parameter.empty: +# value_default = param.default +# else: +# value_default = 'None' +# print() class ConfigurationMaker(): @@ -57,6 +38,7 @@ class ConfigurationMaker(): for key, val in classifier_dict.items(): mymodule = importlib.import_module(val[2]) names.append(self._get_module_name(mymodule)) + monInstance = getattr(mymodule, val[1]) def _get_module_name(self, mymodule): @@ -72,81 +54,6 @@ class ConfigurationMaker(): return None return None -# mymodule = importlib.import_module(val[2]) -# module_file = mymodule.__file__ -# getattr(self._path_classifier_mono, module_file[:-3]) -# -# #__import__(val[1], locals(), globals(), [], 1) -# sig = inspect.signature(val[1]+"."+val[0]) -# print(sig) -# for arg_idx, name in enumerate(sig.parameters): -# print(arg_idx) -# print(name) -# -# -# def make(dir='.', output=None): -# """ -# Generate file config from classifier files -# :param dir: (default'.' -# :dir type: str or list of str -# :return: -# """ -# -# currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) -# parentdir = os.path.dirname(currentdir) -# sys.path.insert(0, parentdir) -# -# -# # calling_module = inspect.getmodule(stack_frame[0]) -# -# -# -# path = os.getcwd() + '/multiview_platform/' -# files = [] -# # r=root, d=directories, f = files -# for r, d, f in os.walk(path): -# print('dir', d) -# print('root', r) -# for file in f: -# if '.py' in file and not file.startswith('__init__'): -# print("file", file) -# files.append(os.path.join(r, file)) -# -# for f in files: -# print(f) -# -# for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): -# if module == '__init__.py' or module[-3:] != '.py': -# continue -# print(module) -# __import__(module[:-3], locals(), globals(), [], 1) -# -# import glob -# -# path = 'c:\\projects\\hc2\\' -# -# files = [f for f in glob.glob(path + "**/*.txt", recursive=True)] -# -# for f in files: -# print(f) -# -# import inspect -# -# -# # Import this to other module and call it -# def print_caller_info(): -# # Get the full stack -# stack = inspect.stack() -# -# # Get one level up from current -# previous_stack_frame = stack[1] -# print(previous_stack_frame.filename) # Filename where caller lives -# -# # Get the module object of the caller -# calling_module = inspect.getmodule(stack_frame[0]) -# print(calling_module) -# print(calling_module.__file__) -# -# -# if __name__ == '__main__': -# print_caller_info() \ No newline at end of file + +if __name__ == '__main__': + ConfigurationMaker() diff --git a/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py b/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py index 9d4e19bdd0972dc8d7b4f428c906d42d9f1c1488..2e525f2983472feb1b78089bb06f5f7ddd55314d 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py @@ -3,7 +3,7 @@ import itertools import numpy as np -def genMulticlassLabels(labels, multiclassMethod, splits): +def gen_multiclass_labels(labels, multiclass_method, splits): r"""Used to gen the train/test splits and to set up the framework of the adaptation of a multiclass dataset to biclass algorithms. @@ -21,7 +21,7 @@ def genMulticlassLabels(labels, multiclassMethod, splits): ---------- labels : numpy.ndarray Name of the database. - multiclassMethod : string + multiclass_method : string The name of the multiclass method used (oneVersusOne, oneVersusAll, ...). splits : list of lists of numpy.ndarray For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and @@ -29,70 +29,70 @@ def genMulticlassLabels(labels, multiclassMethod, splits): Returns ------- - multiclassLabels : list of lists of numpy.ndarray + multiclass_labels : list of lists of numpy.ndarray For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the indices for the biclass training set, the ones for the biclass testing set and the ones for the multiclass testing set. - labelsIndices : list of lists of numpy.ndarray + labels_indices : list of lists of numpy.ndarray Each original couple of different labels. - indicesMulticlass : list of lists of numpy.ndarray + indices_multiclass : list of lists of numpy.ndarray For each combination, contains a biclass labels numpy.ndarray with the 0/1 labels of combination. """ - if multiclassMethod == "oneVersusOne": - nbLabels = len(set(list(labels))) - if nbLabels == 2: + if multiclass_method == "oneVersusOne": + nb_labels = len(set(list(labels))) + if nb_labels == 2: splits = [[trainIndices for trainIndices, _ in splits], [testIndices for _, testIndices in splits], [[] for _ in splits]] return [labels], [(0, 1)], [splits] else: - combinations = itertools.combinations(np.arange(nbLabels), 2) - multiclassLabels = [] - labelsIndices = [] - indicesMulticlass = [] + combinations = itertools.combinations(np.arange(nb_labels), 2) + multiclass_labels = [] + labels_indices = [] + indices_multiclass = [] for combination in combinations: - labelsIndices.append(combination) - oldIndices = [exampleIndex - for exampleIndex, exampleLabel in + labels_indices.append(combination) + old_indices = [example_index + for example_index, example_label in enumerate(labels) - if exampleLabel in combination] - trainIndices = [np.array([oldIndex for oldIndex in oldIndices if - oldIndex in iterIndices[0]]) + if example_label in combination] + train_indices = [np.array([old_index for old_index in old_indices if + old_index in iterIndices[0]]) for iterIndices in splits] - testIndices = [np.array([oldIndex for oldIndex in oldIndices if - oldIndex in iterindices[1]]) + test_indices = [np.array([old_index for old_index in old_indices if + old_index in iterindices[1]]) for iterindices in splits] - testIndicesMulticlass = [np.array(iterindices[1]) for + test_indices_multiclass = [np.array(iterindices[1]) for iterindices in splits] - indicesMulticlass.append( - [trainIndices, testIndices, testIndicesMulticlass]) - newLabels = np.zeros(len(labels), dtype=int) - 100 + indices_multiclass.append( + [train_indices, test_indices, test_indices_multiclass]) + new_labels = np.zeros(len(labels), dtype=int) - 100 for labelIndex, label in enumerate(labels): if label == combination[0]: - newLabels[labelIndex] = 1 + new_labels[labelIndex] = 1 elif label == combination[1]: - newLabels[labelIndex] = 0 + new_labels[labelIndex] = 0 else: pass - multiclassLabels.append(newLabels) + multiclass_labels.append(new_labels) - elif multiclassMethod == "oneVersusRest": + elif multiclass_method == "oneVersusRest": # TODO : Implement one versus rest if probas are not a problem anymore pass - return multiclassLabels, labelsIndices, indicesMulticlass + return multiclass_labels, labels_indices, indices_multiclass -def genMulticlassMonoviewDecision(monoviewResult, classificationIndices): - learningIndices, validationIndices, testIndicesMulticlass = classificationIndices - multiclassMonoviewDecisions = monoviewResult.full_labels_pred - multiclassMonoviewDecisions[ - testIndicesMulticlass] = monoviewResult.y_test_multiclass_pred - return multiclassMonoviewDecisions +def gen_multiclass_monoview_decision(monoview_result, classification_indices): + learning_indices, validation_indices, test_indices_multiclass = classification_indices + multiclass_monoview_decisions = monoview_result.full_labels_pred + multiclass_monoview_decisions[ + test_indices_multiclass] = monoview_result.y_test_multiclass_pred + return multiclass_monoview_decisions -def isBiclass(multiclass_preds): +def is_biclass(multiclass_preds): if multiclass_preds[0] is []: return True else: diff --git a/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py b/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py index 43833e25ab266ec060dcbf24394c0717cf65abb8..372f62116eb4d2305f7cf5df4596fd02c26a3bc2 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py @@ -5,46 +5,46 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def printMetricScore(metricScores, metrics): - metricScoreString = "\n\n" +def print_metric_score(metric_scores, metrics): + metric_score_string = "\n\n" for metric in metrics: - metricModule = getattr(metrics, metric[0]) + metric_module = getattr(metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in - enumerate(metric[1])) + metric_kwargs = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: - metricKWARGS = {} - metricScoreString += "\tFor " + metricModule.getConfig( - **metricKWARGS) + " : " - metricScoreString += "\n\t\t- Score on train : " + str( - metricScores[metric[0]][0]) - metricScoreString += "\n\t\t- Score on test : " + str( - metricScores[metric[0]][1]) - metricScoreString += "\n\n" - return metricScoreString + metric_kwargs = {} + metric_score_string += "\tFor " + metric_module.getConfig( + **metric_kwargs) + " : " + metric_score_string += "\n\t\t- Score on train : " + str( + metric_scores[metric[0]][0]) + metric_score_string += "\n\t\t- Score on test : " + str( + metric_scores[metric[0]][1]) + metric_score_string += "\n\n" + return metric_score_string -def getTotalMetricScores(metric, trainLabels, testLabels, validationIndices, - learningIndices, labels): - metricModule = getattr(metrics, metric[0]) +def get_total_metric_scores(metric, train_labels, test_labels, validation_indices, + learning_indices, labels): + metric_module = getattr(metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in - enumerate(metric[1])) + metric_kwargs = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: - metricKWARGS = {} - trainScore = metricModule.score(labels[learningIndices], trainLabels, - **metricKWARGS) - testScore = metricModule.score(labels[validationIndices], testLabels, - **metricKWARGS) - return [trainScore, testScore] + metric_kwargs = {} + train_score = metric_module.score(labels[learning_indices], train_labels, + **metric_kwargs) + test_score = metric_module.score(labels[validation_indices], test_labels, + **metric_kwargs) + return [train_score, test_score] -def getMetricsScores(metrics, trainLabels, testLabels, - validationIndices, learningIndices, labels): - metricsScores = {} - for metric in metrics: - metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, - testLabels, - validationIndices, - learningIndices, labels) - return metricsScores +def get_metrics_scores(metrics_var, train_labels, test_labels, + validation_indices, learning_indices, labels): + metrics_scores = {} + for metric in metrics_var: + metrics_scores[metric[0]] = get_total_metric_scores(metric, train_labels, + test_labels, + validation_indices, + learning_indices, labels) + return metrics_scores diff --git a/multiview_platform/mono_multi_view_classifiers/utils/transformations.py b/multiview_platform/mono_multi_view_classifiers/utils/transformations.py index 5d569addfa5f71256eb354f5ea0243be3d1c0657..9d26ddde8bd02fea2ef1176385ac251260027e40 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/transformations.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/transformations.py @@ -1,7 +1,7 @@ import numpy as np -def signLabels(labels): +def sign_labels(labels): if set(labels) == (0, 1): return np.array([label if label != 0 else -1 for label in labels]) else: diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 90fd24342a047bf39b26de5c9e99164dce705ecf..07ba4ecc0dce6ad16f6924625072b273bc5dd53c 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -12,7 +12,7 @@ from ..mono_multi_view_classifiers import exec_classif class Test_initBenchmark(unittest.TestCase): def test_benchmark_wanted(self): - # benchmark_output = ExecClassif.initBenchmark(self.args) + # benchmark_output = ExecClassif.init_benchmark(self.args) self.assertEqual(1, 1) @@ -244,7 +244,7 @@ class Test_execBenchmark(unittest.TestCase): "Classification":{"hps_iter": 1}} def test_simple(cls): - res = exec_classif.execBenchmark(1, 2, 3, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(1, 2, 3, cls.argumentDictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, @@ -257,7 +257,7 @@ class Test_execBenchmark(unittest.TestCase): def test_multiclass_no_iter(cls): cls.argumentDictionaries = [{"a": 10, "args": cls.args}, {"a": 4, "args": cls.args}] - res = exec_classif.execBenchmark(2, 1, 2, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 1, 2, cls.argumentDictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, @@ -272,7 +272,7 @@ class Test_execBenchmark(unittest.TestCase): {"a": 4, "args": cls.args}, {"a": 55, "args": cls.args}, {"a": 24, "args": cls.args}] - res = exec_classif.execBenchmark(2, 2, 2, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 2, 2, cls.argumentDictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, @@ -283,7 +283,7 @@ class Test_execBenchmark(unittest.TestCase): cls.assertEqual(res, 3) def test_no_iter_biclass_multicore(cls): - res = exec_classif.execBenchmark(2, 1, 1, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 1, 1, cls.argumentDictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, @@ -377,7 +377,7 @@ class Test_execOneBenchmark(unittest.TestCase): 1, 2, 1, 1, 2, 1, 21]), ExecMonoview_multicore=fakeExecMono, - ExecMultiview_multicore=fakeExecMulti, + exec_multiview_multicore=fakeExecMulti, initMultiviewArguments=fakeInitMulti) cls.assertEqual(flag, None) @@ -437,7 +437,7 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): flag=None, labels=np.array([0, 1, 2, 3, 4, 2, 2, 12, 1, 2, 1, 1, 2, 1, 21]), ExecMonoview_multicore=fakeExecMono, - ExecMultiview_multicore=fakeExecMulti, + exec_multiview_multicore=fakeExecMulti, initMultiviewArguments=fakeInitMulti) cls.assertEqual(flag, None) diff --git a/multiview_platform/versions.py b/multiview_platform/versions.py index 39d9af6f5a9714a075348a622e087bf1249de355..23661197ccf84cbaa7fa3acd0f6fcc2376efc449 100644 --- a/multiview_platform/versions.py +++ b/multiview_platform/versions.py @@ -3,10 +3,10 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def testVersions(): +def test_versions(): """Used to test if all prerequisites are installed""" - isUpToDate = True - toInstall = [] + is_up_to_date = True + to_install = [] try: import sys @@ -16,81 +16,81 @@ def testVersions(): try: import cvxopt except ImportError: - isUpToDate = False - toInstall.append("cvxopt") + is_up_to_date = False + to_install.append("cvxopt") try: import pyscm except ImportError: - isUpToDate = False - toInstall.append("pyscm") + is_up_to_date = False + to_install.append("pyscm") try: import numpy except ImportError: - isUpToDate = False - toInstall.append("numpy") + is_up_to_date = False + to_install.append("numpy") try: import scipy except ImportError: - isUpToDate = False - toInstall.append("scipy") + is_up_to_date = False + to_install.append("scipy") try: import matplotlib except ImportError: - isUpToDate = False - toInstall.append("matplotlib") + is_up_to_date = False + to_install.append("matplotlib") try: import sklearn except ImportError: - isUpToDate = False - toInstall.append("sklearn") + is_up_to_date = False + to_install.append("sklearn") try: import logging except ImportError: - isUpToDate = False - toInstall.append("logging") + is_up_to_date = False + to_install.append("logging") try: import joblib except ImportError: - isUpToDate = False - toInstall.append("joblib") + is_up_to_date = False + to_install.append("joblib") try: import argparse except ImportError: - isUpToDate = False - toInstall.append("argparse") + is_up_to_date = False + to_install.append("argparse") try: import h5py # except ImportError: - isUpToDate = False - toInstall.append("h5py") + is_up_to_date = False + to_install.append("h5py") # try: # import graphviz # # except ImportError: - # isUpToDate = False - # toInstall.append("graphviz") + # is_up_to_date = False + # to_install.append("graphviz") try: import pickle # except ImportError: - isUpToDate = False - toInstall.append("pickle") + is_up_to_date = False + to_install.append("pickle") - if not isUpToDate: + if not is_up_to_date: print( "You can't run at the moment, please install the following modules : \n" + "\n".join( - toInstall)) + to_install)) quit() if __name__ == "__main__": - testVersions() + test_versions() diff --git a/requirements.txt b/requirements.txt index d64a21ee8f5a1a6af8b36de9bcdf97dbd96ad9aa..e165233e7213a44182001cf71dd874be8b6479f7 100755 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ six==1.12.0 pandas==0.23.3 m2r==0.2.1 docutils==0.12 -pyyaml==3.12 \ No newline at end of file +pyyaml==3.12 diff --git a/setup.py b/setup.py index 4b3122f16a18b34b26c7d4c58e1d2bc552b82a62..4879c8d6ccee7ec7bf363324f7fa8c88f5d2068e 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def setup_package(): # Une url qui pointe vers la page officielle de votre lib url='http://github.com/babau1/multiview-machine-learning-omis/', install_requires=['numpy>=1.8', 'scipy>=0.16','scikit-learn==0.19', - 'h5py', 'joblib', 'pyscm', 'pandas', 'm2r', 'yaml'], + 'matplotlib', 'h5py', 'joblib', 'pyscm', 'pandas', 'm2r', 'pyyaml'], # Il est d'usage de mettre quelques metadata à propos de sa lib # Pour que les robots puissent facilement la classer. # La liste des marqueurs autorisées est longue: