From d2df1de0634c0f654eea3a0136d384eb5ccaa77b Mon Sep 17 00:00:00 2001 From: Dominique BENIELLI <dominique.benielli@univ-amu.fr> Date: Wed, 2 Oct 2019 17:31:02 +0200 Subject: [PATCH] pep8 not working --- multiview_platform/execute.py | 8 +- .../exec_classif.py | 182 +++--- .../monoview/analyze_result.py | 8 +- .../monoview/exec_classif_mono_view.py | 50 +- .../monoview/monoview_utils.py | 8 +- .../monoview_classifiers/adaboost_graalpy.py | 4 +- .../monoview_classifiers/cb_boost.py | 2 +- .../monoview_classifiers/cg_desc.py | 2 +- .../decision_tree_pregen.py | 8 +- .../monoview_classifiers/knn.py | 10 +- .../monoview_classifiers/min_cq_graalpy.py | 2 +- .../monoview_classifiers/random_forest.py | 8 +- .../monoview_classifiers/scm.py | 8 +- .../monoview_classifiers/sgd.py | 8 +- .../multiview/additions/diversity_utils.py | 6 +- .../multiview/analyze_results.py | 8 +- .../multiview/exec_multiview.py | 242 ++++--- .../difficulty_fusion/analyze_results.py | 16 +- .../difficulty_fusion/difficulty_fusion.py | 16 +- .../disagree_fusion/analyze_results.py | 12 +- .../disagree_fusion/disagree_fusion.py | 8 +- .../double_fault_fusion/analyze_results.py | 16 +- .../double_fault_fusion.py | 14 +- .../entropy_fusion/analyze_results.py | 16 +- .../entropy_fusion/entropy_fusion.py | 8 +- .../fat_late_fusion/analyze_results.py | 14 +- .../fat_late_fusion/fat_late_fusion.py | 8 +- .../fat_scm_late_fusion/analyze_results.py | 16 +- .../fat_scm_late_fusion.py | 24 +- .../fusion/Methods/EarlyFusion.py | 10 +- .../EarlyFusionPackage/WeightedLinear.py | 12 +- .../fusion/Methods/LateFusion.py | 32 +- .../LateFusionPackage/BayesianInference.py | 6 +- .../LateFusionPackage/MajorityVoting.py | 6 +- .../Methods/LateFusionPackage/SCMForLinear.py | 18 +- .../Methods/LateFusionPackage/SVMForLinear.py | 4 +- .../LateFusionPackage/WeightedLinear.py | 8 +- .../fusion/analyze_results.py | 8 +- .../multiview_classifiers/fusion/fusion.py | 40 +- .../pseudo_cq_fusion/analyze_results.py | 12 +- .../pseudo_cq_fusion/pseudo_cq_fusion.py | 6 +- .../result_analysis.py | 616 +++++++++--------- .../utils/configuration.py | 5 + .../utils/execution.py | 24 +- .../utils/get_multiview_db.py | 80 +-- .../utils/hyper_parameter_search.py | 80 +-- multiview_platform/tests/test_ExecClassif.py | 132 ++-- .../tests/test_ResultAnalysis.py | 18 +- .../test_ExecClassifMonoView.py | 24 +- .../test_compatibility.py | 2 +- .../test_DisagreeFusionModule.py | 28 +- .../test_DoubleFaultFusionModule.py | 26 +- .../Test_Fusion/test_FusionModule.py | 2 +- .../test_diversity_utils.py | 16 +- .../tests/test_utils/test_GetMultiviewDB.py | 8 +- .../tests/test_utils/test_execution.py | 18 +- .../tests/test_utils/test_multiclass.py | 4 +- 57 files changed, 995 insertions(+), 982 deletions(-) diff --git a/multiview_platform/execute.py b/multiview_platform/execute.py index a142cb88..5772286a 100644 --- a/multiview_platform/execute.py +++ b/multiview_platform/execute.py @@ -1,9 +1,9 @@ """This is the execution module, used to execute the code""" -def exec(): - import multiview_platform.versions as versions - versions.test_versions() +def execute(): + import multiview_platform.versions as vs + vs.test_versions() import sys from multiview_platform.mono_multi_view_classifiers import exec_classif @@ -11,4 +11,4 @@ def exec(): if __name__ == "__main__": - exec() + execute() diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index fd51e530..868891c9 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -243,17 +243,16 @@ def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, (classifier+"_"+"_".join(map(str,list(reduced_dictionary.values()))), dictionary) for reduced_dictionary, dictionary in zip(reduced_multiple_kwargs_list, multiple_kwargs_list )) args_dictionnaries = [gen_single_monoview_arg_dictionary(classifier_name, - arguments, - nb_class, - view_index=view_index, - view_name=view_name) + arguments, + nb_class, + view_index=view_index, + view_name=view_name) if framework=="monoview" else gen_single_multiview_arg_dictionary(classifier_name, - arguments, - nb_class, - views_indices=views_indices) - for classifier_name, arguments - in multiple_kwargs_dict.items()] + arguments, + nb_class, + views_indices=views_indices) + for classifier_name, arguments in multiple_kwargs_dict.items()] return args_dictionnaries @@ -275,22 +274,22 @@ def init_monoview_kwargs(args, classifiers_names): For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`""" logging.debug("Start:\t Initializing monoview classifiers arguments") - monoviewKWARGS = {} - for classifiersName in classifiers_names: + monoview_kwargs = {} + for classifiers_name in classifiers_names: try: - getattr(monoview_classifiers, classifiersName) + getattr(monoview_classifiers, classifiers_name) except AttributeError: raise AttributeError( - classifiersName + " is not implemented in monoview_classifiers, " + classifiers_name + " is not implemented in monoview_classifiers, " "please specify the name of the file in monoview_classifiers") - monoviewKWARGS[ - classifiersName] = args[classifiersName] + monoview_kwargs[ + classifiers_name] = args[classifiers_name] logging.debug("Done:\t Initializing monoview classifiers arguments") - return monoviewKWARGS + return monoview_kwargs -def initKWARGSFunc(args, benchmark): +def init_kwargs_func(args, benchmark): monoview_kwargs = init_monoview_kwargs(args, benchmark["monoview"]) multiview_kwargs = init_multiview_kwargs(args, benchmark["multiview"]) kwargs = {"monoview":monoview_kwargs, "multiview":multiview_kwargs} @@ -399,32 +398,32 @@ def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None, logging.debug("Start:\t monoview benchmark") results_monoview += [ - ExecMonoview_multicore(directory, args["Base"]["name"], labels_names, + exec_monoview_multicore(directory, args["Base"]["name"], labels_names, classification_indices, k_folds, core_index, args["Base"]["type"], args["Base"]["pathf"], random_state, labels, - hyperParamSearch=hyper_param_search, + hyper_param_search=hyper_param_search, metrics=metrics, - nIter=args["Classification"]["hps_iter"], **argument) + n_iter=args["Classification"]["hps_iter"], **argument) for argument in argument_dictionaries["Monoview"]] logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") - # argumentDictionaries = initMultiviewArguments(args, benchmark, views, - # viewsIndices, - # argumentDictionaries, - # randomState, directory, + # argument_dictionaries = initMultiviewArguments(args, benchmark, views, + # views_indices, + # argument_dictionaries, + # random_state, directory, # resultsMonoview, - # classificationIndices) + # classification_indices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") results_multiview = [ exec_multiview_multicore(directory, core_index, args["Base"]["name"], - classification_indices, kFolds, args["Base"]["type"], + classification_indices, k_folds, args["Base"]["type"], args["Base"]["pathf"], labels_dictionary, random_state, labels, hyper_param_search=hyper_param_search, - metrics=metrics, nIter=args["Classification"]["hps_iter"], + metrics=metrics, n_iter=args["Classification"]["hps_iter"], **arguments) for arguments in argument_dictionaries["multiview"]] logging.debug("Done:\t multiview benchmark") @@ -432,13 +431,13 @@ def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None, return [flag, results_monoview + results_multiview] -def exec_one_benchmark_multicore(nbCores=-1, labels_dictionary=None, +def exec_one_benchmark_multicore(nb_cores=-1, labels_dictionary=None, directory=None, classification_indices=None, args=None, k_folds=None, random_state=None, hyper_param_search=None, metrics=None, argument_dictionaries=None, - benchmark=None, views=None, viewsIndices=None, + benchmark=None, views=None, views_indices=None, flag=None, labels=None, exec_monoview_multicore=exec_monoview_multicore, exec_multiview_multicore=exec_multiview_multicore, @@ -452,7 +451,7 @@ def exec_one_benchmark_multicore(nbCores=-1, labels_dictionary=None, logging.debug("Start:\t monoview benchmark") nb_experiments = len(argument_dictionaries["monoview"]) - nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nbCores)) + nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores)) for step_index in range(nb_multicore_to_do): results_monoview += (Parallel(n_jobs=nb_cores)( delayed(exec_monoview_multicore)(directory, args["Base"]["name"], labels_names, @@ -461,7 +460,7 @@ def exec_one_benchmark_multicore(nbCores=-1, labels_dictionary=None, random_state, labels, hyper_param_search=hyper_param_search, metrics=metrics, - nIter=args["Classification"]["hps_iter"], + n_iter=args["Classification"]["hps_iter"], **argument_dictionaries["monoview"][ core_index + step_index * nb_cores]) for core_index in @@ -469,21 +468,21 @@ def exec_one_benchmark_multicore(nbCores=-1, labels_dictionary=None, logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") - # argumentDictionaries = initMultiviewArguments(args, benchmark, views, - # viewsIndices, - # argumentDictionaries, - # randomState, directory, + # argument_dictionaries = initMultiviewArguments(args, benchmark, views, + # views_indices, + # argument_dictionaries, + # random_state, directory, # resultsMonoview, - # classificationIndices) + # classification_indices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") results_multiview = [] nb_experiments = len(argument_dictionaries["multiview"]) - nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nbCores)) + nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores)) for step_index in range(nb_multicore_to_do): - results_multiview += Parallel(n_jobs=nbCores)( - delayed(exec_multiview_multicore)(directory, coreIndex, args["Base"]["name"], + results_multiview += Parallel(n_jobs=nb_cores)( + delayed(exec_multiview_multicore)(directory, core_index, args["Base"]["name"], classification_indices, k_folds, args["Base"]["type"], args["Base"]["pathf"], labels_dictionary, random_state, @@ -494,7 +493,7 @@ def exec_one_benchmark_multicore(nbCores=-1, labels_dictionary=None, ** argument_dictionaries["multiview"][ step_index * nb_cores + core_index]) - for coreIndex in + for core_index in range(min(nb_cores, nb_experiments - step_index * nb_cores))) logging.debug("Done:\t multiview benchmark") @@ -502,48 +501,48 @@ def exec_one_benchmark_multicore(nbCores=-1, labels_dictionary=None, def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, - directory=None, classificationIndices=None, - args=None, - kFolds=None, randomState=None, - hyperParamSearch=None, metrics=None, - argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, - flag=None, labels=None, - exec_monoview_multicore=exec_monoview_multicore, - exec_multiview_multicore=exec_multiview_multicore, - init_multiview_arguments=init_multiview_arguments): + directory=None, classification_indices=None, + args=None, + k_folds=None, random_state=None, + hyper_param_search=None, metrics=None, + argument_dictionaries=None, + benchmark=None, views=None, views_indices=None, + flag=None, labels=None, + exec_monoview_multicore=exec_monoview_multicore, + exec_multiview_multicore=exec_multiview_multicore, + init_multiview_arguments=init_multiview_arguments): results_monoview, labels_names = benchmark_init(directory, - classificationIndices, labels, - labels_dictionary, kFolds) + classification_indices, labels, + labels_dictionary, k_folds) logging.debug("Start:\t monoview benchmark") - for arguments in argumentDictionaries["monoview"]: + for arguments in argument_dictionaries["monoview"]: X = dataset_var.get("View" + str(arguments["view_index"])) Y = labels results_monoview += [ exec_monoview(directory, X, Y, args["Base"]["name"], labels_names, - classificationIndices, kFolds, - 1, args["Base"]["type"], args["Base"]["pathf"], randomState, - hyper_param_search=hyper_param_search, metrics=metrics, - n_iter=args["Classification"]["hps_iter"], **arguments)] + classification_indices, k_folds, + 1, args["Base"]["type"], args["Base"]["pathf"], random_state, + hyper_param_search=hyper_param_search, metrics=metrics, + n_iter=args["Classification"]["hps_iter"], **arguments)] logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") - # argumentDictionaries = initMultiviewArguments(args, benchmark, views, - # viewsIndices, - # argumentDictionaries, - # randomState, directory, + # argument_dictionaries = initMultiviewArguments(args, benchmark, views, + # views_indices, + # argument_dictionaries, + # random_state, directory, # resultsMonoview, - # classificationIndices) + # classification_indices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") results_multiview = [] - for arguments in argumentDictionaries["multiview"]: - resultsMultiview += [ - exec_multiview(directory, dataset_var, args["Base"]["name"], classificationIndices, - kFolds, 1, args["Base"]["type"], - args["Base"]["pathf"], labels_dictionary, randomState, labels, + for arguments in argument_dictionaries["multiview"]: + results_multiview += [ + exec_multiview(directory, dataset_var, args["Base"]["name"], classification_indices, + k_folds, 1, args["Base"]["type"], + args["Base"]["pathf"], labels_dictionary, random_state, labels, hyper_param_search=hyper_param_search, metrics=metrics, n_iter=args["Classification"]["hps_iter"], **arguments)] logging.debug("Done:\t multiview benchmark") @@ -583,13 +582,13 @@ def exec_benchmark(nb_cores, stats_iter, nb_multiclass, multiclass testing set. metrics : list of lists metrics that will be used to evaluate the algorithms performance. - labelsDictionary : dictionary + labels_dictionary : dictionary Dictionary mapping labels indices to labels names. - nbLabels : int + nb_labels : int Total number of different labels in the dataset. dataset_var : HDF5 dataset file The full dataset that wil be used by the benchmark. - classifiersNames : list of strings + classifiers_names : list of strings List of the benchmarks's monoview classifiers names. rest_of_the_args : Just used for testing purposes @@ -608,17 +607,18 @@ def exec_benchmark(nb_cores, stats_iter, nb_multiclass, nb_multicore_to_do = range(int(math.ceil(float(nb_exps_to_do) / nb_cores))) for step_index in nb_multicore_to_do: results += (Parallel(n_jobs=nb_cores)(delayed(exec_one_benchmark) - (core_index=coreIndex, + (core_index=core_index, ** benchmark_arguments_dictionaries[ - coreIndex + step_index * nb_cores]) - for coreIndex in range( + core_index + step_index * nb_cores]) + for core_index in range( min(nb_cores, nb_exps_to_do - step_index * nb_cores)))) else: results += [exec_one_benchmark_multicore(nb_cores=nb_cores, ** benchmark_arguments_dictionaries[0])] else: for arguments in benchmark_arguments_dictionaries: + print(arguments) results += [exec_one_benchmark_mono_core(dataset_var=dataset_var, **arguments)] logging.debug("Done:\t Executing all the needed biclass benchmarks") @@ -628,15 +628,15 @@ def exec_benchmark(nb_cores, stats_iter, nb_multiclass, multiclass_ground_truth = dataset_var.get("Labels").value logging.debug("Start:\t Analyzing predictions") results_mean_stds = get_results(results, stats_iter, nb_multiclass, - benchmark_arguments_dictionaries, - multiclass_ground_truth, - metrics, - classification_indices, - directories, - directory, - labels_dictionary, - nb_examples, - nb_labels) + benchmark_arguments_dictionaries, + multiclass_ground_truth, + metrics, + classification_indices, + directories, + directory, + labels_dictionary, + nb_examples, + nb_labels) logging.debug("Done:\t Analyzing predictions") delete(benchmark_arguments_dictionaries, nb_cores, dataset_var) return results_mean_stds @@ -712,7 +712,7 @@ def exec_classif(arguments): "matthews_corrcoef", "roc_auc_score"]] metrics = [[metricName] for metricName in metrics_names] - metrics = arangeMetrics(metrics, args["Classification"]["metric_princ"]) + metrics = arange_metrics(metrics, args["Classification"]["metric_princ"]) for metricIndex, metric in enumerate(metrics): if len(metric) == 1: metrics[metricIndex] = [metric[0], None] @@ -720,9 +720,10 @@ def exec_classif(arguments): benchmark = init_benchmark(cl_type, monoview_algos, multiview_algos, args) init_kwargs= init_kwargs_func(args, benchmark) data_base_time = time.time() - start - argument_dictionaries = init_argument_dictionaries(benchmark, views_dictionary, - nb_class, init_kwargs) - # argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, + argument_dictionaries = init_argument_dictionaries( + benchmark, views_dictionary, + nb_class, init_kwargs) + # argument_dictionaries = initMonoviewExps(benchmark, viewsDictionary, # NB_CLASS, initKWARGS) directories = execution.gen_direcorties_names(directory, stats_iter) benchmark_argument_dictionaries = execution.gen_argument_dictionaries( @@ -733,10 +734,11 @@ def exec_classif(arguments): argument_dictionaries, benchmark, nb_views, views, views_indices) nb_multiclass = len(labels_combinations) - results_mean_stds = exec_benchmark(nb_cores, stats_iter, nb_multiclass, - benchmark_argument_dictionaries, splits, directories, - directory, multiclass_labels, metrics, labels_dictionary, - nb_class, dataset_var) + results_mean_stds = exec_benchmark( + nb_cores, stats_iter, nb_multiclass, + benchmark_argument_dictionaries, splits, directories, + directory, multiclass_labels, metrics, labels_dictionary, + nb_class, dataset_var) noise_results.append([noise_std, results_mean_stds]) plot_results_noise(directory, noise_results, metrics[0][0], dataset_name) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py index 87e50317..cb93b1bf 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py @@ -3,10 +3,10 @@ from datetime import timedelta as hms from .. import metrics -def getDBConfigString(name, feat, classificationIndices, shape, +def getDBConfigString(name, feat, classification_indices, shape, classLabelsNames, KFolds): - learningRate = float(len(classificationIndices[0])) / ( - len(classificationIndices[0]) + len(classificationIndices[1])) + learningRate = float(len(classification_indices[0])) / ( + len(classification_indices[0]) + len(classification_indices[1])) dbConfigString = "Database configuration : \n" dbConfigString += "\t- Database name : " + name + "\n" dbConfigString += "\t- View name : " + feat + "\t View shape : " + str( @@ -51,7 +51,7 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics_list, nIter, feat, CL_type, clKWARGS, classLabelsNames, shape, y_train, y_train_pred, y_test, y_test_pred, time, - randomState, classifier, directory): + random_state, classifier, directory): metricsScores = {} metricModule = getattr(metrics, metrics_list[0][0]) trainScore = metricModule.score(y_train, y_train_pred) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 5c807f3a..8b096d47 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -27,25 +27,25 @@ __status__ = "Prototype" # Production, Development, Prototype # __date__ = 2016 - 03 - 25 -def exec_monoview_multicore(directory, name, labelsNames, classificationIndices, +def exec_monoview_multicore(directory, name, labels_names, classificationIndices, KFolds, datasetFileIndex, databaseType, - path, randomState, labels, - hyperParamSearch="randomizedSearch", + path, random_state, labels, + hyper_param_search="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r") neededViewIndex = args["viewIndex"] X = DATASET.get("View" + str(neededViewIndex)) Y = labels - return ExecMonoview(directory, X, Y, name, labelsNames, + return ExecMonoview(directory, X, Y, name, labels_names, classificationIndices, KFolds, 1, databaseType, path, - randomState, hyperParamSearch=hyperParamSearch, + random_state, hyper_param_search=hyper_param_search, metrics=metrics, nIter=nIter, **args) -def exec_monoview(directory, X, Y, name, labelsNames, classificationIndices, +def exec_monoview(directory, X, Y, name, labels_names, classificationIndices, KFolds, nbCores, databaseType, path, - randomState, hyperParamSearch="randomizedSearch", + randomState, hyper_param_search="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): logging.debug("Start:\t Loading data") kwargs, \ @@ -55,7 +55,7 @@ def exec_monoview(directory, X, Y, name, labelsNames, classificationIndices, X, \ learningRate, \ labelsString, \ - outputFileName = initConstants(args, X, classificationIndices, labelsNames, + outputFileName = initConstants(args, X, classificationIndices, labels_names, name, directory) logging.debug("Done:\t Loading data") @@ -79,7 +79,7 @@ def exec_monoview(directory, X, Y, name, labelsNames, classificationIndices, logging.debug("Start:\t Generate classifier args") classifierModule = getattr(monoview_classifiers, CL_type) classifier_class_name = classifierModule.classifier_class_name - clKWARGS, testFoldsPreds = getHPs(classifierModule, hyperParamSearch, + clKWARGS, testFoldsPreds = getHPs(classifierModule, hyper_param_search, nIter, CL_type, classifier_class_name, X_train, y_train, randomState, outputFileName, @@ -114,8 +114,8 @@ def exec_monoview(directory, X, Y, name, labelsNames, classificationIndices, stringAnalysis, \ imagesAnalysis, \ metricsScores = execute(name, classificationIndices, KFolds, nbCores, - hyperParamSearch, metrics, nIter, feat, CL_type, - clKWARGS, labelsNames, X.shape, + hyper_param_search, metrics, nIter, feat, CL_type, + clKWARGS, labels_names, X.shape, y_train, y_train_pred, y_test, y_test_pred, t_end, randomState, classifier, outputFileName) # cl_desc = [value for key, value in sorted(clKWARGS.items())] @@ -135,7 +135,7 @@ def exec_monoview(directory, X, Y, name, labelsNames, classificationIndices, # return viewIndex, [CL_type, feat, metricsScores, full_labels_pred, clKWARGS, y_test_multiclass_pred, testFoldsPreds] -def initConstants(args, X, classificationIndices, labelsNames, name, directory): +def initConstants(args, X, classificationIndices, labels_names, name, directory): try: kwargs = args["args"] except KeyError: @@ -149,7 +149,7 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): X = get_value(X) learningRate = float(len(classificationIndices[0])) / ( len(classificationIndices[0]) + len(classificationIndices[1])) - labelsString = "-".join(labelsNames) + labelsString = "-".join(labels_names) CL_type_string = CL_type timestr = time.strftime("%Y_%m_%d-%H_%M_%S") outputFileName = directory + CL_type_string + "/" + feat + "/" + timestr + "-results-" + CL_type_string + "-" + labelsString + \ @@ -177,15 +177,15 @@ def init_train_test(X, Y, classificationIndices): return X_train, y_train, X_test, y_test, X_test_multiclass -def getHPs(classifierModule, hyperParamSearch, nIter, classifier_module_name, +def getHPs(classifierModule, hyper_param_search, nIter, classifier_module_name, classifier_class_name, X_train, y_train, randomState, outputFileName, KFolds, nbCores, metrics, kwargs): - if hyperParamSearch != "None": + if hyper_param_search != "None": logging.debug( - "Start:\t " + hyperParamSearch + " best settings with " + str( + "Start:\t " + hyper_param_search + " best settings with " + str( nIter) + " iterations for " + classifier_module_name) - classifierHPSearch = getattr(hyper_parameter_search, hyperParamSearch) + classifierHPSearch = getattr(hyper_parameter_search, hyper_param_search) clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, "monoview", randomState, outputFileName, @@ -197,7 +197,7 @@ def getHPs(classifierModule, hyperParamSearch, nIter, classifier_module_name, n_iter=nIter, classifier_kwargs=kwargs[ classifier_module_name]) - logging.debug("Done:\t " + hyperParamSearch + " best settings") + logging.debug("Done:\t " + hyper_param_search + " best settings") else: clKWARGS = kwargs[classifier_module_name + "KWARGS"] testFoldsPreds = None @@ -262,7 +262,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # default='../../../data/Plausible') # groupStandard.add_argument('--directory', metavar='STRING', action='store', # help='Path of the output directory', default='') -# groupStandard.add_argument('--labelsNames', metavar='STRING', +# groupStandard.add_argument('--labels_names', metavar='STRING', # action='store', nargs='+', # help='Name of the labels used for classification', # default=['Yes', 'No']) @@ -271,7 +271,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # help='Path to the classificationIndices pickle file', # default='') # groupStandard.add_argument('--KFolds', metavar='STRING', action='store', -# help='Path to the kFolds pickle file', +# help='Path to the k_folds pickle file', # default='') # groupStandard.add_argument('--nbCores', metavar='INT', action='store', # help='Number of cores, -1 for all', @@ -279,7 +279,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # groupStandard.add_argument('--randomState', metavar='INT', action='store', # help='Seed for the random state or pickable randomstate file', # default=42) -# groupStandard.add_argument('--hyperParamSearch', metavar='STRING', +# groupStandard.add_argument('--hyper_param_search', metavar='STRING', # action='store', # help='The type of method used to search the best set of hyper parameters', # default='randomizedSearch') @@ -299,7 +299,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # directory = args.directory # name = args.name # classifierName = args.cl_name -# labelsNames = args.labelsNames +# labels_names = args.labels_names # viewName = args.view # with open(args.classificationIndices, 'rb') as handle: # classificationIndices = pickle.load(handle) @@ -309,7 +309,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # path = args.pathF # with open(args.randomState, 'rb') as handle: # randomState = pickle.load(handle) -# hyperParamSearch = args.hyperParamSearch +# hyper_param_search = args.hyper_param_search # with open(args.metrics, 'rb') as handle: # metrics = pickle.load(handle) # nIter = args.nIter @@ -346,10 +346,10 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # logging.getLogger().addHandler(logging.StreamHandler()) # # # Computing on multiple cores -# res = ExecMonoview(directory, X, Y, name, labelsNames, +# res = ExecMonoview(directory, X, Y, name, labels_names, # classificationIndices, KFolds, nbCores, databaseType, # path, -# randomState, hyperParamSearch=hyperParamSearch, +# randomState, hyper_param_search=hyper_param_search, # metrics=metrics, nIter=nIter, **kwargs) # # with open(directory + "res.pickle", "wb") as handle: diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index d152c7d6..4ba9f364 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -210,20 +210,20 @@ class MonoviewResult(object): # return supports, dict((label, index) for label, index in zip(labels, range(len(labels)))) # # -# def splitDataset(LABELS, NB_CLASS, LEARNING_RATE, DATASET_LENGTH, randomState): -# validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState) +# def splitDataset(LABELS, NB_CLASS, LEARNING_RATE, DATASET_LENGTH, random_state): +# validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, random_state) # validationIndices.sort() # return validationIndices # # -# def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState): +# def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLASS, random_state): # labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS)) # nbTrainingExamples = [int(support * LEARNING_RATE) for support in labelSupports] # trainingExamplesIndices = [] # usedIndices = [] # while nbTrainingExamples != [0 for i in range(NB_CLASS)]: # isUseFull = False -# index = int(randomState.randint(0, DATASET_LENGTH - 1)) +# index = int(random_state.randint(0, DATASET_LENGTH - 1)) # if index not in usedIndices: # isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict) # if isUseFull: diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py index 3ffd5e23..6052032a 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py @@ -202,9 +202,9 @@ class AdaboostGraalpy(AdaBoostGP, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): - paramsSet.append({"n_iterations": randomState.randint(1, 500), }) + paramsSet.append({"n_iterations": random_state.randint(1, 500), }) return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cb_boost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cb_boost.py index c9340c4d..b86aee42 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cb_boost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cb_boost.py @@ -42,7 +42,7 @@ class CBBoost(CBBoostClassifier, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc.py index 7a881285..e5ef93cd 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc.py @@ -48,7 +48,7 @@ class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree_pregen.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree_pregen.py index 124d7a90..5f5ee1c3 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree_pregen.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree_pregen.py @@ -80,10 +80,10 @@ class DecisionTreePregen(DecisionTreeClassifier, BaseMonoviewClassifier, # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): paramsSet = [] for _ in range(nIter): - paramsSet.append({"max_depth": randomState.randint(1, 300), - "criterion": randomState.choice(["gini", "entropy"]), - "splitter": randomState.choice(["best", "random"])}) + paramsSet.append({"max_depth": random_state.randint(1, 300), + "criterion": random_state.choice(["gini", "entropy"]), + "splitter": random_state.choice(["best", "random"])}) return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py index 8ecb4139..5a08934d 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py @@ -47,13 +47,13 @@ class KNN(KNeighborsClassifier, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): paramsSet = [] for _ in range(nIter): - paramsSet.append({"n_neighbors": randomState.randint(1, 20), - "weights": randomState.choice( + paramsSet.append({"n_neighbors": random_state.randint(1, 20), + "weights": random_state.choice( ["uniform", "distance"]), - "algorithm": randomState.choice( + "algorithm": random_state.choice( ["auto", "ball_tree", "kd_tree", "brute"]), - "p": randomState.choice([1, 2])}) + "p": random_state.choice([1, 2])}) return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py index c5318746..969b1571 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py @@ -62,7 +62,7 @@ class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py index 6fe0dcad..54eb985b 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py @@ -45,10 +45,10 @@ class RandomForest(RandomForestClassifier, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): paramsSet = [] for _ in range(nIter): - paramsSet.append({"n_estimators": randomState.randint(1, 300), - "max_depth": randomState.randint(1, 300), - "criterion": randomState.choice(["gini", "entropy"])}) + paramsSet.append({"n_estimators": random_state.randint(1, 300), + "max_depth": random_state.randint(1, 300), + "criterion": random_state.choice(["gini", "entropy"])}) return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py index f5c3df8f..ab9afe67 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py @@ -77,11 +77,11 @@ class SCM(scm, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): paramsSet = [] for _ in range(nIter): paramsSet.append( - {"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) + {"model_type": random_state.choice(["conjunction", "disjunction"]), + "max_rules": random_state.randint(1, 15), + "p": random_state.random_sample()}) return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py index d43d372c..22bbad8d 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py @@ -42,11 +42,11 @@ class SGD(SGDClassifier, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): +def paramsToSet(nIter, random_state): paramsSet = [] for _ in range(nIter): - paramsSet.append({"loss": randomState.choice(['log', 'modified_huber']), - "penalty": randomState.choice( + paramsSet.append({"loss": random_state.choice(['log', 'modified_huber']), + "penalty": random_state.choice( ["l1", "l2", "elasticnet"]), - "alpha": randomState.random_sample()}) + "alpha": random_state.random_sample()}) return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/additions/diversity_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/additions/diversity_utils.py index 97773018..3fdbacd1 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/additions/diversity_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/additions/diversity_utils.py @@ -7,14 +7,14 @@ import numpy as np from ...utils.multiclass import isBiclass, genMulticlassMonoviewDecision -def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview): +def getClassifiersDecisions(allClassifersNames, views_indices, resultsMonoview): """ This function gets the monoview classifiers decisions from resultsMonoview. If no HP optimization is done, there is just one fold, the training set. The classifiersDecisions variable is ordered as : classifiersDecisions[viewIndex, classifierIndex, foldIndex, exampleIndex] - And the classifiersNames variable is ordered as : - classifiersNames[viewIndex][classifierIndex] + And the classifiers_names variable is ordered as : + classifiers_names[viewIndex][classifierIndex] """ nbViews = len(viewsIndices) nbClassifiers = len(allClassifersNames) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py index 460bcf7a..2c4a90e1 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py @@ -59,10 +59,10 @@ def getMetricsScores(metrics, trainLabels, testLabels, def execute(classifier, trainLabels, testLabels, DATASET, classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metric_list, - viewsIndices, randomState, labels, classifierModule): + hyper_param_search, nIter, metric_list, + views_indices, random_state, labels, classifierModule): classifier_name = classifier.short_name learningIndices, validationIndices, testIndicesMulticlass = classificationIndices @@ -84,7 +84,7 @@ def execute(classifier, trainLabels, scoreOnTest) + \ "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ ', '.join( - LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join( + labels_dictionary.values()) + "\n\t-Views : " + ', '.join( views) + "\n\t-" + str( KFolds.n_splits) + \ " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.getConfig() diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 696c97e5..76c89056 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -18,177 +18,171 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, - DATASET): +def init_constants(kwargs, classification_indices, metrics, name, nb_cores, k_folds, + dataset_var): """Used to init the constants""" views = kwargs["view_names"] - viewsIndices = kwargs["view_indices"] + views_indices = kwargs["view_indices"] if not metrics: metrics = [["f1_score", None]] classifier_name = kwargs["classifier_name"] classifier_config = kwargs[classifier_name] - learningRate = len(classificationIndices[0]) / float( - (len(classificationIndices[0]) + len(classificationIndices[1]))) + learning_rate = len(classification_indices[0]) / float( + (len(classification_indices[0]) + len(classification_indices[1]))) t_start = time.time() logging.info("Info\t: Classification - Database : " + str( name) + " ; Views : " + ", ".join(views) + " ; Algorithm : " + classifier_name + " ; Cores : " + str( - nbCores) + ", Train ratio : " + str(learningRate) + - ", CV on " + str(KFolds.n_splits) + " folds") - - for viewIndex, viewName in zip(viewsIndices, views): - logging.info("Info:\t Shape of " + str(viewName) + " :" + str( - get_shape(DATASET, viewIndex))) - return classifier_name, t_start, viewsIndices, classifier_config, views, learningRate - - -def saveResults(classifier, LABELS_DICTIONARY, stringAnalysis, views, classifierModule, - classificationKWARGS, directory, learningRate, name, - imagesAnalysis): - labelsSet = set(LABELS_DICTIONARY.values()) - logging.info(stringAnalysis) - viewsString = "-".join(views) - labelsString = "-".join(labelsSet) + nbCores) + ", Train ratio : " + str(learning_rate) + + ", CV on " + str(k_folds.n_splits) + " folds") + + for view_index, view_name in zip(views_indices, views): + logging.info("Info:\t Shape of " + str(view_name) + " :" + str( + get_shape(dataset_var, view_index))) + return classifier_name, t_start, views_indices, classifier_config, views, learning_rate + + +def save_results(classifier, labels_dictionary, string_analysis, views, classifier_module, + classification_kargs, directory, learning_rate, name, + images_analysis): + labels_set = set(labels_dictionary.values()) + logging.info(string_analysis) + views_string = "-".join(views) + labels_string = "-".join(labels_set) timestr = time.strftime("%Y_%m_%d-%H_%M_%S") - CL_type_string = classifier.short_name - outputFileName = directory + "/" + CL_type_string + "/" + timestr + "-results-" + CL_type_string + "-" + viewsString + '-' + labelsString + \ - '-learnRate_{0:.2f}'.format(learningRate) + '-' + name - if not os.path.exists(os.path.dirname(outputFileName)): + cl_type_string = classifier.short_name + output_file_name = directory + "/" + cl_type_string + "/" + timestr + \ + "-results-" + cl_type_string + "-" + views_string + '-' + labels_string + \ + '-learnRate_{0:.2f}'.format(learning_rate) + '-' + name + if not os.path.exists(os.path.dirname(output_file_name)): try: - os.makedirs(os.path.dirname(outputFileName)) + os.makedirs(os.path.dirname(output_file_name)) except OSError as exc: if exc.errno != errno.EEXIST: raise - outputTextFile = open(outputFileName + '.txt', 'w') - outputTextFile.write(stringAnalysis) - outputTextFile.close() + output_text_file = open(output_file_name + '.txt', 'w') + output_text_file.write(string_analysis) + output_text_file.close() - if imagesAnalysis is not None: - for imageName in imagesAnalysis.keys(): - if os.path.isfile(outputFileName + imageName + ".png"): + if images_analysis is not None: + for image_name in images_analysis.keys(): + if os.path.isfile(output_file_name + image_name + ".png"): for i in range(1, 20): - testFileName = outputFileName + imageName + "-" + str( + test_file_name = output_file_name + image_name + "-" + str( i) + ".png" if not os.path.isfile(testFileName): - imagesAnalysis[imageName].savefig(testFileName, transparent=True) + images_analysis[image_name].savefig(test_file_name, transparent=True) break - imagesAnalysis[imageName].savefig( - outputFileName + imageName + '.png', transparent=True) + images_analysis[image_name].savefig( + output_file_name + image_name + '.png', transparent=True) -def exec_multiview_multicore(directory, coreIndex, name, learningRate, nbFolds, - databaseType, path, LABELS_DICTIONARY, - randomState, labels, - hyperParamSearch=False, nbCores=1, metrics=None, - nIter=30, **arguments): - """Used to load an HDF5 dataset for each parallel job and execute multiview classification""" - DATASET = h5py.File(path + name + str(coreIndex) + ".hdf5", "r") - return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, - databaseType, path, LABELS_DICTIONARY, - randomState, labels, - hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=nIter, **arguments) +def exec_multiview_multicore(directory, core_index, name, learning_rate, nb_folds, + database_type, path, labels_dictionary, + random_state, labels, + hyper_param_search=False, nb_cores=1, metrics=None, + n_iter=30, **arguments): + """Used to load an HDF5 dataset_var for each parallel job and execute multiview classification""" + dataset_var = h5py.File(path + name + str(core_index) + ".hdf5", "r") + return exec_multiview(directory, dataset_var, name, learning_rate, nb_folds, 1, + database_type, path, labels_dictionary, + random_state, labels, + hyper_param_search=hyper_param_search, metrics=metrics, + n_iter=n_iter, **arguments) -def exec_multiview(directory, DATASET, name, classificationIndices, KFolds, - nbCores, databaseType, path, - LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=False, metrics=None, nIter=30, **kwargs): +def exec_multiview(directory, dataset_var, name, classification_indices, k_folds, + nb_cores, database_type, path, + labels_dictionary, random_state, labels, + hyper_param_search=False, metrics=None, n_iter=30, **kwargs): """Used to execute multiview classification and result analysis""" logging.debug("Start:\t Initialize constants") - CL_type, \ + cl_type, \ t_start, \ - viewsIndices, \ + views_indices, \ classifier_config, \ views, \ - learningRate = initConstants(kwargs, classificationIndices, metrics, name, - nbCores, KFolds, DATASET) + learning_rate = init_constants(kwargs, classification_indices, metrics, name, + nb_cores, k_folds, dataset_var) logging.debug("Done:\t Initialize constants") - extractionTime = time.time() - t_start - logging.info("Info:\t Extraction duration " + str(extractionTime) + "s") + extraction_time = time.time() - t_start + logging.info("Info:\t Extraction duration " + str(extraction_time) + "s") logging.debug("Start:\t Getting train/test split") - learningIndices, validationIndices, testIndicesMulticlass = classificationIndices + learning_indices, validation_indices, test_indices_multiclass = classification_indices logging.debug("Done:\t Getting train/test split") logging.debug("Start:\t Getting classifiers modules") # classifierPackage = getattr(multiview_classifiers, # CL_type) # Permet d'appeler un module avec une string - classifier_module = getattr(multiview_classifiers, CL_type) + classifier_module = getattr(multiview_classifiers, cl_type) classifier_name = classifier_module.classifier_class_name # classifierClass = getattr(classifierModule, CL_type + "Class") logging.debug("Done:\t Getting classifiers modules") logging.debug("Start:\t Optimizing hyperparameters") - if hyperParamSearch != "None": - classifier_config = hyper_parameter_search.searchBestSettings(DATASET, labels, - classifier_module, - classifier_name, - metrics[0], - learningIndices, - KFolds, - randomState, - directory, - nb_cores=nbCores, - viewsIndices=viewsIndices, - searchingTool=hyperParamSearch, - n_iter=nIter, - classifier_config=classifier_config) - - classifier = getattr(classifier_module, classifier_name)(randomState, + if hyper_param_search != "None": + classifier_config = hyper_parameter_search.search_best_settings( + dataset_var, labels, classifier_module, classifier_name, + metrics[0], learning_indices, k_folds, random_state, + directory, nb_cores=nb_cores, views_indices=views_indices, + searching_tool=hyper_param_search, n_iter=n_iter, + classifier_config=classifier_config) + + classifier = getattr(classifier_module, classifier_name)(random_state, **classifier_config) logging.debug("Done:\t Optimizing hyperparameters") logging.debug("Start:\t Fitting classifier") - classifier.fit(DATASET, labels, train_indices=learningIndices, - view_indices=viewsIndices) + classifier.fit(dataset_var, labels, train_indices=learning_indices, + view_indices=views_indices) logging.debug("Done:\t Fitting classifier") logging.debug("Start:\t Predicting") - trainLabels = classifier.predict(DATASET, predict_indices=learningIndices, - view_indices=viewsIndices) - testLabels = classifier.predict(DATASET, predict_indices=validationIndices, - view_indices=viewsIndices) - fullLabels = np.zeros(labels.shape, dtype=int) - 100 - for trainIndex, index in enumerate(learningIndices): - fullLabels[index] = trainLabels[trainIndex] - for testIndex, index in enumerate(validationIndices): - fullLabels[index] = testLabels[testIndex] - if testIndicesMulticlass != []: - testLabelsMulticlass = classifier.predict_hdf5(DATASET, - usedIndices=testIndicesMulticlass, - viewsIndices=viewsIndices) + train_labels = classifier.predict(dataset_var, predict_indices=learning_indices, + view_indices=views_indices) + test_labels = classifier.predict(dataset_var, predict_indices=validation_indices, + view_indices=views_indices) + full_labels = np.zeros(labels.shape, dtype=int) - 100 + for train_index, index in enumerate(learning_indices): + full_labels[index] = train_labels[train_index] + for test_index, index in enumerate(validation_indices): + full_labels[index] = test_labels[test_index] + if test_indices_multiclass != []: + test_labels_multiclass = classifier.predict_hdf5(dataset_var, + used_indices=test_indices_multiclass, + views_indices=views_indices) else: - testLabelsMulticlass = [] + test_labels_multiclass = [] logging.info("Done:\t Pertidcting") - classificationTime = time.time() - t_start - logging.info("Info:\t Classification duration " + str(extractionTime) + "s") + classification_time = time.time() - t_start + logging.info("Info:\t Classification duration " + str(extraction_time) + "s") # TODO: get better cltype - logging.info("Start:\t Result Analysis for " + CL_type) - times = (extractionTime, classificationTime) - stringAnalysis, imagesAnalysis, metricsScores = analyze_results.execute( - classifier, trainLabels, - testLabels, DATASET, + logging.info("Start:\t Result Analysis for " + cl_type) + times = (extraction_time, classification_time) + string_analysis, images_analysis, metrics_scores = analyze_results.execute( + classifier, train_labels, + test_labels, dataset_var, classifier_config, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, - name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifier_module) - logging.info("Done:\t Result Analysis for " + CL_type) + labels_dictionary, views, nb_cores, times, + name, k_folds, + hyper_param_search, n_iter, metrics, + views_indices, random_state, labels, classifier_module) + logging.info("Done:\t Result Analysis for " + cl_type) logging.debug("Start:\t Saving preds") - saveResults(classifier, LABELS_DICTIONARY, stringAnalysis, views, classifier_module, - classifier_config, directory, - learningRate, name, imagesAnalysis) + save_results(classifier, labels_dictionary, string_analysis, views, classifier_module, + classifier_config, directory, + learning_rate, name, images_analysis) logging.debug("Start:\t Saving preds") - return MultiviewResult(CL_type, classifier_config, metricsScores, - fullLabels, testLabelsMulticlass) + return MultiviewResult(cl_type, classifier_config, metrics_scores, + full_labels, test_labels_multiclass) # return CL_type, classificationKWARGS, metricsScores, fullLabels, testLabelsMulticlass @@ -204,7 +198,7 @@ if __name__ == "__main__": groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') groupStandard.add_argument('--type', metavar='STRING', action='store', - help='Type of Dataset', default=".hdf5") + help='Type of dataset', default=".hdf5") groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)', default='DB') @@ -217,7 +211,7 @@ if __name__ == "__main__": groupStandard.add_argument('--directory', metavar='STRING', action='store', help='Path to the views (default: %(default)s)', default='results-FeatExtr/') - groupStandard.add_argument('--LABELS_DICTIONARY', metavar='STRING', + groupStandard.add_argument('--labels_dictionary', metavar='STRING', action='store', nargs='+', help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv') @@ -231,7 +225,7 @@ if __name__ == "__main__": groupStandard.add_argument('--randomState', metavar='INT', action='store', help='Seed for the random state or pickable randomstate file', default=42) - groupStandard.add_argument('--hyperParamSearch', metavar='STRING', + groupStandard.add_argument('--hyper_param_search', metavar='STRING', action='store', help='The type of method used tosearch the best set of hyper parameters', default='randomizedSearch') @@ -248,20 +242,20 @@ if __name__ == "__main__": directory = args.directory name = args.name - LABELS_DICTIONARY = args.LABELS_DICTIONARY + labels_dictionary = args.labels_dictionary classificationIndices = args.classificationIndices - KFolds = args.KFolds + k_folds = args.k_folds nbCores = args.nbCores databaseType = None - path = args.pathF - randomState = args.randomState - hyperParamSearch = args.hyperParamSearch + path = args.path_f + random_state = args.random_state + hyper_param_search = args.hyper_param_search metrics = args.metrics - nIter = args.nIter + n_iter = args.n_iter kwargs = args.kwargs # Extract the data using MPI ? - DATASET = None + dataset_var = None labels = None # (get from CSV ?) logfilename = "gen a good logfilename" @@ -283,11 +277,11 @@ if __name__ == "__main__": if args.log: logging.getLogger().addHandler(logging.StreamHandler()) - res = ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, - nbCores, databaseType, path, - LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=nIter, **kwargs) + res = exec_multiview(directory, dataset_var, name, classification_indices, k_folds, + nb_cores, databaseType, path, + labels_dictionary, random_state, labels, + hyper_param_search=hyper_param_search, metrics=metrics, + n_iter=n_iter, **kwargs) # Pickle the res # Go put your token diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/analyze_results.py index 07cf0a7b..d5fcd8a9 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/analyze_results.py @@ -7,15 +7,15 @@ __status__ = "Prototype" # Production, Development, Prototype def execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule): + hyper_param_search, nIter, metrics, + views_indices, random_state, labels, classifierModule): return analyze_results.execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) \ No newline at end of file + hyper_param_search, nIter, metrics, + views_indices, random_state, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/difficulty_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/difficulty_fusion.py index 007f9f00..2eb07e77 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/difficulty_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/difficulty_fusion.py @@ -31,24 +31,24 @@ def difficulty(classifiersDecisions, combination, foldsGroudTruth, foldsLen): return difficulty_score -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): +def getArgs(args, benchmark, views, views_indices, random_state, directory, resultsMonoview, classificationIndices): return diversity_utils.getArgs(args, benchmark, views, - viewsIndices, randomState, directory, - resultsMonoview, classificationIndices, + views_indices, random_state, directory, + resultsMonoview, classification_indices, difficulty, "difficulty_fusion") -def genParamsSets(classificationKWARGS, randomState, nIter=1): - return diversity_utils.genParamsSets(classificationKWARGS, randomState, nIter=nIter) +def genParamsSets(classificationKWARGS, random_state, nIter=1): + return diversity_utils.genParamsSets(classificationKWARGS, random_state, nIter=nIter) class DifficultyFusionClass(diversity_utils.DiversityFusionClass): - def __init__(self, randomState, NB_CORES=1, **kwargs): - diversity_utils.DiversityFusionClass.__init__(self, randomState, NB_CORES=1, **kwargs) + def __init__(self, random_state, NB_CORES=1, **kwargs): + diversity_utils.DiversityFusionClass.__init__(self, random_state, NB_CORES=1, **kwargs) def getSpecificAnalysis(self, classificationKWARGS): - stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiersNames)+\ + stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiers_names)+\ ', with a difficulty of '+str(self.div_measure) return stringAnalysis \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/analyze_results.py index 07cf0a7b..3823e687 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/analyze_results.py @@ -8,14 +8,14 @@ __status__ = "Prototype" # Production, Development, Prototype def execute(classifier, trainLabels, testLabels, DATASET, classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule): + hyper_param_search, nIter, metrics, + views_indices, randomState, labels, classifierModule): return analyze_results.execute(classifier, trainLabels, testLabels, DATASET, classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) \ No newline at end of file + hyper_param_search, nIter, metrics, + views_indices, randomState, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/disagree_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/disagree_fusion.py index 035bd22f..08fbaea5 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/disagree_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/disagree_fusion.py @@ -16,8 +16,8 @@ def disagree(classifierDecision1, classifierDecision2, ground_truth): return np.logical_xor(classifierDecision1, classifierDecision2) -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): - return diversity_utils.getArgs(args, benchmark, views, viewsIndices, +def getArgs(args, benchmark, views, views_indices, randomState, directory, resultsMonoview, classificationIndices): + return diversity_utils.getArgs(args, benchmark, views, views_indices, randomState, directory, resultsMonoview, classificationIndices, disagree, "disagree_fusion") @@ -25,7 +25,7 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, directory, result def genParamsSets(classificationKWARGS, randomState, nIter=1): return diversity_utils.genParamsSets(classificationKWARGS, randomState, nIter=nIter) # """Used to generate parameters sets for the random hyper parameters optimization function""" - # weights = [randomState.random_sample(len(classificationKWARGS["classifiersNames"])) for _ in range(nIter)] + # weights = [randomState.random_sample(len(classificationKWARGS["classifiers_names"])) for _ in range(nIter)] # nomralizedWeights = [[weightVector/np.sum(weightVector)] for weightVector in weights] # return nomralizedWeights @@ -36,6 +36,6 @@ class DisagreeFusionClass(diversity_utils.DiversityFusionClass): diversity_utils.DiversityFusionClass.__init__(self, randomState, NB_CORES=1, **kwargs) def getSpecificAnalysis(self, classificationKWARGS): - stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiersNames)+\ + stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiers_names)+\ ', with a disagreement of '+str(self.div_measure) return stringAnalysis diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/analyze_results.py index 07cf0a7b..d5fcd8a9 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/analyze_results.py @@ -7,15 +7,15 @@ __status__ = "Prototype" # Production, Development, Prototype def execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule): + hyper_param_search, nIter, metrics, + views_indices, random_state, labels, classifierModule): return analyze_results.execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) \ No newline at end of file + hyper_param_search, nIter, metrics, + views_indices, random_state, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/double_fault_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/double_fault_fusion.py index a08e4807..e48f3a62 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/double_fault_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/double_fault_fusion.py @@ -17,24 +17,24 @@ def doubleFault(classifierDecision1, classifierDecision2, ground_truth): np.logical_xor(classifierDecision2, ground_truth)) -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): +def getArgs(args, benchmark, views, views_indices, random_state, directory, resultsMonoview, classificationIndices): return diversity_utils.getArgs(args, benchmark, views, - viewsIndices, randomState, directory, + views_indices, random_state, directory, resultsMonoview, classificationIndices, doubleFault, "double_fault_fusion") -def genParamsSets(classificationKWARGS, randomState, nIter=1): - return diversity_utils.genParamsSets(classificationKWARGS, randomState, nIter=nIter) +def genParamsSets(classificationKWARGS, random_state, nIter=1): + return diversity_utils.genParamsSets(classificationKWARGS, random_state, nIter=nIter) class DoubleFaultFusionClass(diversity_utils.DiversityFusionClass): - def __init__(self, randomState, NB_CORES=1, **kwargs): - diversity_utils.DiversityFusionClass.__init__(self, randomState, NB_CORES=1, **kwargs) + def __init__(self, random_state, NB_CORES=1, **kwargs): + diversity_utils.DiversityFusionClass.__init__(self, random_state, NB_CORES=1, **kwargs) def getSpecificAnalysis(self, classificationKWARGS): - stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiersNames)+\ + stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiers_names)+\ ', with a double fault ratio of '+str(self.div_measure) return stringAnalysis \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/analyze_results.py index 07cf0a7b..bdd63730 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/analyze_results.py @@ -7,15 +7,15 @@ __status__ = "Prototype" # Production, Development, Prototype def execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule): + hyper_param_search, nIter, metrics, + views_indices, randomState, labels, classifierModule): return analyze_results.execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) \ No newline at end of file + hyper_param_search, nIter, metrics, + views_indices, randomState, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/entropy_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/entropy_fusion.py index eb467c8f..348bcc1f 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/entropy_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/entropy_fusion.py @@ -26,10 +26,10 @@ def entropy(classifiersDecisions, combination, foldsGroudTruth, foldsLen): return entropy_score -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): +def getArgs(args, benchmark, views, views_indices, randomState, directory, resultsMonoview, classificationIndices): return diversity_utils.getArgs(args, benchmark, views, - viewsIndices, randomState, directory, - resultsMonoview, classificationIndices, + views_indices, randomState, directory, + resultsMonoview, classification_indices, entropy, "entropy_fusion") @@ -44,6 +44,6 @@ class EntropyFusionClass(diversity_utils.DiversityFusionClass): diversity_utils.DiversityFusionClass.__init__(self, randomState, NB_CORES=1, **kwargs) def getSpecificAnalysis(self, classificationKWARGS): - stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiersNames)+\ + stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiers_names)+\ ', with an entropy of '+str(self.div_measure) return stringAnalysis \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/analyze_results.py index 07cf0a7b..6e58780d 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/analyze_results.py @@ -7,15 +7,15 @@ __status__ = "Prototype" # Production, Development, Prototype def execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule): + hyper_param_search, nIter, metrics, + views_indices, randomState, labels, classifierModule): return analyze_results.execute(classifier, trainLabels, testLabels, DATASET, classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) \ No newline at end of file + hyper_param_search, nIter, metrics, + views_indices, randomState, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py index 041b679c..cb6b810f 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py @@ -12,7 +12,7 @@ def getBenchmark(benchmark, args=None): return benchmark -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): +def getArgs(args, benchmark, views, views_indices, randomState, directory, resultsMonoview, classificationIndices): argumentsList = [] multiclass_preds = [monoviewResult.y_test_multiclass_pred for monoviewResult in resultsMonoview] if isBiclass(multiclass_preds): @@ -26,7 +26,7 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, directory, result arguments = {"CL_type": "fat_late_fusion", "views": views, "NB_VIEW": len(resultsMonoview), - "viewsIndices": range(len(resultsMonoview)), + "views_indices": range(len(resultsMonoview)), "NB_CLASS": len(args.CL_classes), "LABELS_NAMES": args.CL_classes, "FatLateFusionKWARGS": { @@ -58,10 +58,10 @@ class FatLateFusionClass: def setParams(self, paramsSet): self.weights = paramsSet[0] - def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): + def fit_hdf5(self, DATASET, labels, trainIndices=None, views_indices=None, metric=["f1_score", None]): pass - def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): + def predict_hdf5(self, DATASET, usedIndices=None, views_indices=None): if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) votes = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbClass"]), dtype=float) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/analyze_results.py index 07cf0a7b..d5fcd8a9 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/analyze_results.py @@ -7,15 +7,15 @@ __status__ = "Prototype" # Production, Development, Prototype def execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule): + hyper_param_search, nIter, metrics, + views_indices, random_state, labels, classifierModule): return analyze_results.execute(classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + classificationKWARGS, classification_indices, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) \ No newline at end of file + hyper_param_search, nIter, metrics, + views_indices, random_state, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/fat_scm_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/fat_scm_late_fusion.py index 205b360e..34d3e982 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/fat_scm_late_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/fat_scm_late_fusion.py @@ -16,19 +16,19 @@ def getBenchmark(benchmark, args=None): -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): +def getArgs(args, benchmark, views, views_indices, random_state, directory, resultsMonoview, classificationIndices): argumentsList = [] multiclass_preds = [monoviewResult.y_test_multiclass_pred for monoviewResult in resultsMonoview] if isBiclass(multiclass_preds): monoviewDecisions = np.array([monoviewResult.full_labels_pred for monoviewResult in resultsMonoview]) else: - monoviewDecisions = np.array([genMulticlassMonoviewDecision(monoviewResult, classificationIndices) for monoviewResult in resultsMonoview]) + monoviewDecisions = np.array([genMulticlassMonoviewDecision(monoviewResult, classification_indices) for monoviewResult in resultsMonoview]) monoviewDecisions = np.transpose(monoviewDecisions) #monoviewDecisions = np.transpose(np.array([monoviewResult[1][3] for monoviewResult in resultsMonoview])) arguments = {"CL_type": "fat_scm_late_fusion", "views": ["all"], "NB_VIEW": len(resultsMonoview), - "viewsIndices": range(len(resultsMonoview)), + "views_indices": range(len(resultsMonoview)), "NB_CLASS": len(args.CL_classes), "LABELS_NAMES": args.CL_classes, "FatSCMLateFusionKWARGS": { @@ -42,13 +42,13 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, directory, result return argumentsList -def genParamsSets(classificationKWARGS, randomState, nIter=1): +def genParamsSets(classificationKWARGS, random_state, nIter=1): """Used to generate parameters sets for the random hyper parameters optimization function""" paramsSets = [] for _ in range(nIter): - max_attributes = randomState.randint(1, 20) - p = randomState.random_sample() - model = randomState.choice(["conjunction", "disjunction"]) + max_attributes = random_state.randint(1, 20) + p = random_state.random_sample() + model = random_state.choice(["conjunction", "disjunction"]) paramsSets.append([p, max_attributes, model]) return paramsSets @@ -56,7 +56,7 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): class FatSCMLateFusionClass: - def __init__(self, randomState, NB_CORES=1, **kwargs): + def __init__(self, random_state, NB_CORES=1, **kwargs): if kwargs["p"]: self.p = kwargs["p"] else: @@ -70,20 +70,20 @@ class FatSCMLateFusionClass: else: self.model = "conjunction" self.monoviewDecisions = kwargs["monoviewDecisions"] - self.randomState = randomState + self.random_state = random_state def setParams(self, paramsSet): self.p = paramsSet[0] self.max_attributes = paramsSet[1] self.model = paramsSet[2] - def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): + def fit_hdf5(self, DATASET, labels, trainIndices=None, views_indices=None, metric=["f1_score", None]): features = self.monoviewDecisions[trainIndices] self.SCMClassifier = DecisionStumpSCMNew(p=self.p, max_rules=self.max_attributes, model_type=self.model, - random_state=self.randomState) + random_state=self.random_state) self.SCMClassifier.fit(features, labels[trainIndices].astype(int)) - def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): + def predict_hdf5(self, DATASET, usedIndices=None, views_indices=None): if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) predictedLabels = self.SCMClassifier.predict(self.monoviewDecisions[usedIndices]) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py index 4bb84dc6..9f48dc28 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py @@ -28,10 +28,10 @@ class EarlyFusionClassifier(object): self.monoviewData = None self.randomState = randomState - def makeMonoviewData_hdf5(self, DATASET, weights=None, usedIndices=None, viewsIndices=None): - if type(viewsIndices) == type(None): - viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - nbView = len(viewsIndices) + def makeMonoviewData_hdf5(self, DATASET, weights=None, usedIndices=None, views_indices=None): + if type(views_indices) == type(None): + views_indices = np.arange(DATASET.get("Metadata").attrs["nbView"]) + nbView = len(views_indices) if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(weights) == type(None): @@ -39,4 +39,4 @@ class EarlyFusionClassifier(object): if sum(weights) != 1: weights = weights / sum(weights) self.monoviewData = np.concatenate([get_v(DATASET, viewIndex, usedIndices) - for index, viewIndex in enumerate(viewsIndices)], axis=1) + for index, viewIndex in enumerate(views_indices)], axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusionPackage/WeightedLinear.py index 79059d5c..a4f7e9ff 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusionPackage/WeightedLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -7,7 +7,7 @@ from ..... import monoview_classifiers def genParamsSets(classificationKWARGS, randomState, nIter=1): nbView = classificationKWARGS["nbView"] if classificationKWARGS["classifiersConfigs"] is None: - monoviewClassifierModule = getattr(monoview_classifiers, classificationKWARGS["classifiersNames"]) + monoviewClassifierModule = getattr(monoview_classifiers, classificationKWARGS["classifiers_names"]) paramsMonoview = monoviewClassifierModule.paramsToSet(nIter, randomState) paramsSets = [] for iterIndex in range(nIter): @@ -36,7 +36,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "EarlyFusion", "fusionMethod": "WeightedLinear", - "classifiersNames": classifierName, + "classifiers_names": classifierName, "classifiersConfigs": monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split( @@ -52,7 +52,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "EarlyFusion", "fusionMethod": "WeightedLinear", - "classifiersNames": classifierName, + "classifiers_names": classifierName, "classifiersConfigs": None, 'fusionMethodConfig': args.FU_E_method_configs, "nbView": (len(viewsIndices))}} @@ -62,12 +62,12 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class WeightedLinear(EarlyFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - EarlyFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + EarlyFusionClassifier.__init__(self, randomState, kwargs['classifiers_names'], kwargs['classifiersConfigs'], NB_CORES=NB_CORES) if kwargs['fusionMethodConfig'] is None: - self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) + self.weights = np.ones(len(kwargs["classifiers_names"]), dtype=float) elif kwargs['fusionMethodConfig'] == ['']: - self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) + self.weights = np.ones(len(kwargs["classifiers_names"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'])) self.weights /= float(max(self.weights)) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py index 314b11bb..4373ec14 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py @@ -44,21 +44,21 @@ def getScores(LateFusionClassifiers): def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): wrongSets = [[] for _ in viewsIndices] # wrongSets = [0 for _ in allClassifersNames] - classifiersNames = [[] for _ in viewsIndices] + classifiers_names = [[] for _ in viewsIndices] nbViews = len(viewsIndices) trainLabels = np.genfromtxt(directory + "train_labels.csv", delimiter=",").astype(np.int16) length = len(trainLabels) for resultMonoview in resultsMonoview: - if resultMonoview.classifier_name in classifiersNames[viewsIndices.index(resultMonoview.view_index)]: - classifierIndex = classifiersNames.index(resultMonoview.classifier_name) + if resultMonoview.classifier_name in classifiers_names[viewsIndices.index(resultMonoview.view_index)]: + classifierIndex = classifiers_names.index(resultMonoview.classifier_name) wrongSets[resultMonoview.view_index][classifierIndex] = np.where( trainLabels + resultMonoview.full_labels_pred[classificationIndices[0]] == 1)[0] else: - classifiersNames[viewsIndices.index(resultMonoview.view_index)].append(resultMonoview.classifier_name) + classifiers_names[viewsIndices.index(resultMonoview.view_index)].append(resultMonoview.classifier_name) wrongSets[viewsIndices.index(resultMonoview.view_index)].append( np.where(trainLabels + resultMonoview.full_labels_pred[classificationIndices[0]] == 1)[0]) - combinations = itertools.combinations_with_replacement(range(len(classifiersNames[0])), nbViews) + combinations = itertools.combinations_with_replacement(range(len(classifiers_names[0])), nbViews) bestLen = length bestCombination = None for combination in combinations: @@ -68,14 +68,14 @@ def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, clas if len(intersect) < bestLen: bestLen = len(intersect) bestCombination = combination - return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] + return [classifiers_names[viewIndex][index] for viewIndex, index in enumerate(bestCombination)] def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices): nbViews = len(viewsIndices) nbClassifiers = len(allClassifersNames) scores = np.zeros((nbViews, nbClassifiers)) - classifiersNames = [[] for _ in viewsIndices] + classifiers_names = [[] for _ in viewsIndices] metricName = resultsMonoview[0].metrics_scores.keys()[0] metricModule = getattr(metrics, metricName) if metricModule.getConfig()[-14] == "h": @@ -83,30 +83,30 @@ def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, clas else: betterHigh = False for resultMonoview in resultsMonoview: - if resultMonoview.classifier_name not in classifiersNames[resultMonoview.view_index]: - classifiersNames[resultMonoview.view_index].append(resultMonoview.classifier_name) - classifierIndex = classifiersNames[resultMonoview.view_index].index(resultMonoview.classifier_name) + if resultMonoview.classifier_name not in classifiers_names[resultMonoview.view_index]: + classifiers_names[resultMonoview.view_index].append(resultMonoview.classifier_name) + classifierIndex = classifiers_names[resultMonoview.view_index].index(resultMonoview.classifier_name) scores[resultMonoview.view_index, classifierIndex] = resultMonoview.metrics_scores.values()[0][0] if betterHigh: classifierIndices = np.argmax(scores, axis=1) else: classifierIndices = np.argmin(scores, axis=1) - return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(classifierIndices)] + return [classifiers_names[viewIndex][index] for viewIndex, index in enumerate(classifierIndices)] def getClassifiers(selectionMethodName, allClassifiersNames, directory, viewsIndices, resultsMonoview, classificationIndices): thismodule = sys.modules[__name__] selectionMethod = getattr(thismodule, selectionMethodName) - classifiersNames = selectionMethod(allClassifiersNames, directory, viewsIndices, resultsMonoview, + classifiers_names = selectionMethod(allClassifiersNames, directory, viewsIndices, resultsMonoview, classificationIndices) - return classifiersNames + return classifiers_names -def getConfig(classifiersNames, resultsMonoview, viewsIndices): - classifiersConfigs = [0 for _ in range(len(classifiersNames))] - for classifierIndex, classifierName in enumerate(classifiersNames): +def getConfig(classifiers_names, resultsMonoview, viewsIndices): + classifiersConfigs = [0 for _ in range(len(classifiers_names))] + for classifierIndex, classifierName in enumerate(classifiers_names): for resultMonoview in resultsMonoview: if resultMonoview.view_index == viewsIndices[classifierIndex] and resultMonoview.classifier_name == classifierName: classifiersConfigs[classifierIndex] = resultMonoview.classifier_config diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py index 61ec3838..c671cb3a 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py @@ -44,7 +44,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": "BayesianInference", - "classifiersNames": args.FU_L_cl_names, + "classifiers_names": args.FU_L_cl_names, "classifiersConfigs": classifiersConfigs, 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, @@ -54,11 +54,11 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class BayesianInference(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiers_names'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig'] == ['']: - self.weights = np.array([1.0 for _ in kwargs['classifiersNames']]) + self.weights = np.array([1.0 for _ in kwargs['classifiers_names']]) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.needProbas = True diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py index bcdbfa82..6a010aeb 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -44,7 +44,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": "MajorityVoting", - "classifiersNames": args.FU_L_cl_names, + "classifiers_names": args.FU_L_cl_names, "classifiersConfigs": classifiersConfigs, 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, @@ -54,11 +54,11 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class MajorityVoting(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiers_names'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig'] == ['']: - self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) + self.weights = np.ones(len(kwargs["classifiers_names"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py index 739ba023..0b5bb772 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -44,13 +44,13 @@ class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin): return {"Binary_attributes": self.clf.model_.rules} -def genParamsSets(classificationKWARGS, randomState, nIter=1): +def genParamsSets(classificationKWARGS, random_state, nIter=1): paramsSets = [] for _ in range(nIter): - max_attributes = randomState.randint(1, 20) - p = randomState.random_sample() - model = randomState.choice(["conjunction", "disjunction"]) - order = randomState.randint(1, 10) + max_attributes = random_state.randint(1, 20) + p = random_state.random_sample() + model = random_state.choice(["conjunction", "disjunction"]) + order = random_state.randint(1, 10) paramsSets.append([p, max_attributes, model, order]) return paramsSets @@ -82,7 +82,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": "SCMForLinear", - "classifiersNames": args.FU_L_cl_names, + "classifiers_names": args.FU_L_cl_names, "classifiersConfigs": classifiersConfigs, 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, @@ -91,8 +91,8 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class SCMForLinear(LateFusionClassifier): - def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + def __init__(self, random_state, NB_CORES=1, **kwargs): + LateFusionClassifier.__init__(self, random_state, kwargs['classifiers_names'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) self.SCMClassifier = None @@ -146,7 +146,7 @@ class SCMForLinear(LateFusionClassifier): nbView = len(viewsIndices) self.SCMClassifier = DecisionStumpSCMNew(p=self.p, max_rules=self.maxAttributes, model_type=self.modelType, - random_state=self.randomState) + random_state=self.random_state) monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py index 50925619..9fa55f10 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -42,7 +42,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": "SVMForLinear", - "classifiersNames": args.FU_L_cl_names, + "classifiers_names": args.FU_L_cl_names, "classifiersConfigs": classifiersConfigs, 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, @@ -52,7 +52,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class SVMForLinear(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiers_names'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) self.SVMClassifier = None diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py index baf9c56b..e32369b3 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -23,7 +23,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl else: monoviewClassifierModulesNames = benchmark["monoview"] args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, - viewsIndices, resultsMonoview, classificationIndices) + viewsIndices, resultsMonoview, classification_indices) monoviewClassifierModules = [getattr(monoview_classifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_names == [""] and args.CL_type == ["multiview"]: @@ -44,7 +44,7 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": "WeightedLinear", - "classifiersNames": args.FU_L_cl_names, + "classifiers_names": args.FU_L_cl_names, "classifiersConfigs": classifiersConfigs, 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, @@ -54,11 +54,11 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl class WeightedLinear(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, randomState, kwargs['classifiers_names'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) if kwargs['fusionMethodConfig'][0] is None or kwargs['fusionMethodConfig'] == ['']: - self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) + self.weights = np.ones(len(kwargs["classifiers_names"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.needProbas = True diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/analyze_results.py index 70b11657..e225b3b3 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/analyze_results.py @@ -8,14 +8,14 @@ __status__ = "Prototype" # Production, Development, Prototype def execute(classifier, trainLabels, testLabels, DATASET, classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, + hyper_param_search, nIter, metrics, viewsIndices, randomState, labels, classifierModule): return analyze_results.execute(classifier, trainLabels, testLabels, DATASET, classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, + hyper_param_search, nIter, metrics, viewsIndices, randomState, labels, classifierModule) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py index c26387fb..b80aa9a6 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py @@ -85,7 +85,7 @@ def getBenchmark(benchmark, args=None): return benchmark -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): +def getArgs(args, benchmark, views, views_indices, randomState, directory, resultsMonoview, classificationIndices): """Used to generate the list of arguments for each fusion experimentation""" if not "monoview" in benchmark and not args.FU_L_select_monoview in ["randomClf", "Determined"]: args.FU_L_select_monoview = "randomClf" @@ -94,25 +94,25 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, directory, result fusionTypePackage = getattr(Methods, fusionType + "Package") for fusionMethod in benchmark["multiview"]["fusion"]["Methods"][fusionType]: fusionMethodModule = getattr(fusionTypePackage, fusionMethod) - arguments = fusionMethodModule.getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, + arguments = fusionMethodModule.getArgs(benchmark, args, views, views_indices, directory, resultsMonoview, classificationIndices) argumentsList += arguments return argumentsList -def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices=None): +def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, views_indices=None): """Used to concatenate the viewsin one big monoview dataset""" - if type(viewsIndices) == type(None): - viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) + if type(views_indices) == type(None): + views_indices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if not usedIndices: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - NB_VIEW = len(viewsIndices) + NB_VIEW = len(views_indices) if weights is None: weights = np.array([1 / NB_VIEW for i in range(NB_VIEW)]) if sum(weights) != 1: weights = weights / sum(weights) monoviewData = np.concatenate([weights[index] * get_v(DATASET, viewIndex, usedIndices) - for index, viewIndex in enumerate(viewsIndices)], axis=1) + for index, viewIndex in enumerate(views_indices)], axis=1) return monoviewData @@ -126,9 +126,9 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return fusionMethodConfig -# def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices, metric=None, nIter=30): -# if type(viewsIndices) == type(None): -# viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) +# def gridSearch_hdf5(DATASET, views_indices, classificationKWARGS, learningIndices, metric=None, nIter=30): +# if type(views_indices) == type(None): +# views_indices = np.arange(DATASET.get("Metadata").attrs["nbView"]) # fusionTypeName = classificationKWARGS["fusionType"] # fusionTypePackage = globals()[fusionTypeName + "Package"] # fusionMethodModuleName = classificationKWARGS["fusionMethod"] @@ -138,21 +138,21 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): # for classifierIndex, classifierName in enumerate(classifiersNames): # logging.debug("\tStart:\t Random search for " + classifierName + " with " + str(nIter) + " iterations") # classifierModule = getattr(monoview_classifiers, classifierName) -# classifierMethod = getattr(classifierModule, "hyperParamSearch") +# classifierMethod = getattr(classifierModule, "hyper_param_search") # if fusionTypeName == "LateFusion": -# bestSettings.append(classifierMethod(get_v(DATASET, viewsIndices[classifierIndex], learningIndices), +# bestSettings.append(classifierMethod(get_v(DATASET, views_indices[classifierIndex], learningIndices), # DATASET.get("Labels")[learningIndices], metric=metric, # nIter=nIter)) # else: # bestSettings.append( -# classifierMethod(makeMonoviewData_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices), +# classifierMethod(makeMonoviewData_hdf5(DATASET, usedIndices=learningIndices, views_indices=views_indices), # DATASET.get("Labels")[learningIndices], metric=metric, # nIter=nIter)) # logging.debug("\tDone:\t Random search for " + classifierName) # classificationKWARGS["classifiersConfigs"] = bestSettings # logging.debug("\tStart:\t Random search for " + fusionMethodModuleName) # fusionMethodConfig = fusionMethodModule.gridSearch(DATASET, classificationKWARGS, learningIndices, nIter=nIter, -# viewsIndices=viewsIndices) +# views_indices=views_indices) # logging.debug("\tDone:\t Random search for " + fusionMethodModuleName) # return bestSettings, fusionMethodConfig @@ -174,15 +174,15 @@ class FusionClass: def setParams(self, paramsSet): self.classifier.setParams(paramsSet) - def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): - self.classifier.fit_hdf5(DATASET, labels, trainIndices=trainIndices, viewsIndices=viewsIndices) + def fit_hdf5(self, DATASET, labels, trainIndices=None, views_indices=None, metric=["f1_score", None]): + self.classifier.fit_hdf5(DATASET, labels, trainIndices=trainIndices, views_indices=views_indices) - def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): + def predict_hdf5(self, DATASET, usedIndices=None, views_indices=None): if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(viewsIndices) == type(None): - viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - predictedLabels = self.classifier.predict_hdf5(DATASET, usedIndices=usedIndices, viewsIndices=viewsIndices) + if type(views_indices) == type(None): + views_indices = np.arange(DATASET.get("Metadata").attrs["nbView"]) + predictedLabels = self.classifier.predict_hdf5(DATASET, usedIndices=usedIndices, views_indices=views_indices) return predictedLabels def predict_probas_hdf5(self, DATASET, usedIndices=None): diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/analyze_results.py index 07cf0a7b..3823e687 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/analyze_results.py @@ -8,14 +8,14 @@ __status__ = "Prototype" # Production, Development, Prototype def execute(classifier, trainLabels, testLabels, DATASET, classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule): + hyper_param_search, nIter, metrics, + views_indices, randomState, labels, classifierModule): return analyze_results.execute(classifier, trainLabels, testLabels, DATASET, classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, + labels_dictionary, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) \ No newline at end of file + hyper_param_search, nIter, metrics, + views_indices, randomState, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py index 441593f5..1b83dae3 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py @@ -16,9 +16,9 @@ def pseudoCQ(difficulty, doubleFlaut): return difficulty/float(doubleFlaut) -def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): +def getArgs(args, benchmark, views, views_indices, randomState, directory, resultsMonoview, classificationIndices): return diversity_utils.getArgs(args, benchmark, views, - viewsIndices, randomState, directory, + views_indices, randomState, directory, resultsMonoview, classificationIndices, [doubleFault, difficulty], "pseudo_cq_fusion") @@ -34,6 +34,6 @@ class PseudoCQFusionClass(diversity_utils.DiversityFusionClass): diversity_utils.DiversityFusionClass.__init__(self, randomState, NB_CORES=1, **kwargs) def getSpecificAnalysis(self, classificationKWARGS): - stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiersNames)+\ + stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiers_names)+\ ', with a pseudo CQ of '+str(self.div_measure) return stringAnalysis \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index 765e9806..6cc3f41b 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -113,9 +113,9 @@ def get_metrics_scores_biclass(metrics, results): metricsScores : dict of dict of list Regroups all the scores for each metrics for each classifier and for the train and test sets. organized as : - -`metricScores[metric_name]["classifiersNames"]` is a list of all the classifiers available for this metric, - -`metricScores[metric_name]["trainScores"]` is a list of all the available classifiers scores on the train set, - -`metricScores[metric_name]["testScores"]` is a list of all the available classifiers scores on the test set. + -`metricScores[metric_name]["classifiers_names"]` is a list of all the classifiers available for this metric, + -`metricScores[metric_name]["train_scores"]` is a list of all the available classifiers scores on the train set, + -`metricScores[metric_name]["test_scores"]` is a list of all the available classifiers scores on the test set. """ metrics_scores = {} @@ -129,9 +129,9 @@ def get_metrics_scores_biclass(metrics, results): test_scores.append(classifierResult.metrics_scores[metric[0]][1]) classifiers_names.append(classifierResult.get_classifier_name()) - metrics_scores[metric[0]] = {"classifiersNames": classifiers_names, - "trainScores": train_scores, - "testScores": test_scores} + metrics_scores[metric[0]] = {"classifiers_names": classifiers_names, + "train_scores": train_scores, + "test_scores": test_scores} return metrics_scores @@ -239,24 +239,24 @@ def sort_by_test_score(train_scores, test_scores, names, train_STDs=None, return sorted_names, sorted_train_scores, sorted_test_scores, sorted_train_STDs, sorted_test_STDs -def plotMetricScores(trainScores, testScores, names, nbResults, metricName, - fileName, +def plotMetricScores(train_scores, test_scores, names, nb_results, metric_name, + file_name, tag="", train_STDs=None, test_STDs=None): r"""Used to plot and save the score barplot for a specific metric. Parameters ---------- - trainScores : list or np.array of floats + train_scores : list or np.array of floats The scores of each classifier on the training set. - testScores : list or np.array of floats + test_scores : list or np.array of floats The scores of each classifier on the testing set. names : list or np.array of strs The names of all the classifiers. - nbResults: int + nb_results: int The number of classifiers to plot. - metricName : str + metric_name : str The plotted metric's name - fileName : str + file_name : str The name of the file where the figure will be saved. tag : str Some text to personalize the title, must start with a whitespace. @@ -269,87 +269,90 @@ def plotMetricScores(trainScores, testScores, names, nbResults, metricName, ------- """ - figKW, barWidth = get_fig_size(nbResults) + figKW, barWidth = get_fig_size(nb_results) - names, trainScores, testScores, train_STDs, test_STDs = sort_by_test_score( - trainScores, testScores, names, + names, train_scores, test_scores, train_STDs, test_STDs = sort_by_test_score( + train_scores, test_scores, names, train_STDs, test_STDs) f, ax = plt.subplots(nrows=1, ncols=1, **figKW) - ax.set_title(metricName + "\n" + tag + " scores for each classifier") + ax.set_title(metric_name + "\n" + tag + " scores for each classifier") - rects = ax.bar(range(nbResults), testScores, barWidth, color="0.1", + rects = ax.bar(range(nb_results), test_scores, barWidth, color="0.1", yerr=test_STDs) - rect2 = ax.bar(np.arange(nbResults) + barWidth, trainScores, barWidth, + rect2 = ax.bar(np.arange(nb_results) + barWidth, train_scores, barWidth, color="0.8", yerr=train_STDs) autolabel(rects, ax, set=1, std=test_STDs) autolabel(rect2, ax, set=2, std=train_STDs) - + print("nb_results", nb_results) ax.legend((rects[0], rect2[0]), ('Test', 'Train')) ax.set_ylim(-0.1, 1.1) - ax.set_xticks(np.arange(nbResults) + barWidth) + ax.set_xticks(np.arange(nb_results) + barWidth) ax.set_xticklabels(names, rotation="vertical") try: plt.tight_layout() except: pass - f.savefig(fileName + '.png', transparent=True) + f.savefig(file_name + '.png', transparent=True) plt.close() import pandas as pd if train_STDs is None: dataframe = pd.DataFrame(np.transpose(np.concatenate(( - trainScores.reshape((trainScores.shape[0], 1)), - testScores.reshape((trainScores.shape[0], 1))), axis=1)), + train_scores.reshape((train_scores.shape[0], 1)), + test_scores.reshape((train_scores.shape[0], 1))), axis=1)), columns=names) else: dataframe = pd.DataFrame(np.transpose(np.concatenate(( - trainScores.reshape((trainScores.shape[0], 1)), - train_STDs.reshape((trainScores.shape[0], 1)), - testScores.reshape((trainScores.shape[0], 1)), - test_STDs.reshape((trainScores.shape[0], 1))), axis=1)), + train_scores.reshape((train_scores.shape[0], 1)), + train_STDs.reshape((train_scores.shape[0], 1)), + test_scores.reshape((train_scores.shape[0], 1)), + test_STDs.reshape((train_scores.shape[0], 1))), axis=1)), columns=names) - dataframe.to_csv(fileName + ".csv") + dataframe.to_csv(file_name + ".csv") -def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames): +def publishMetricsGraphs(metrics_scores, directory, database_name, labels_names): r"""Used to sort the results (names and both scores) in descending test score order. Parameters ---------- - metricsScores : dict of dicts of lists or np.arrays + metrics_scores : dict of dicts of lists or np.arrays Keys : The names of the metrics. Values : The scores and names of each classifier . directory : str The path to the directory where the figures will be saved. - databaseName : str + database_name : str The name of the database on which the experiments where conducted. - labelsNames : list of strs + labels_names : list of strs The name corresponding to each numerical label. Returns ------- + results """ results=[] - for metricName, metricScores in metricsScores.items(): + for metric_name, metric_scores in metrics_scores.items(): logging.debug( - "Start:\t Biclass score graph generation for " + metricName) - - nbResults = len(metricScores["testScores"]) + "Start:\t Biclass score graph generation for " + metric_name) - fileName = directory + time.strftime( - "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + "_vs_".join( - labelsNames) + "-" + metricName + nb_results = len(metric_scores["test_scores"]) + file_name = directory + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + database_name + "-" + "_vs_".join( + labels_names) + "-" + metric_name - plotMetricScores(np.array(metricScores["trainScores"]), - np.array(metricScores["testScores"]), - np.array(metricScores["classifiersNames"]), nbResults, - metricName, fileName, - tag=" " + " vs ".join(labelsNames)) + plotMetricScores(np.array(metric_scores["train_scores"]), + np.array(metric_scores["test_scores"]), + np.array(metric_scores["classifiers_names"]), nb_results, + metric_name, file_name, + tag=" " + " vs ".join(labels_names)) logging.debug( - "Done:\t Biclass score graph generation for " + metricName) - results+=[[classifiersName, metricName, testMean, testSTD] for classifiersName, testMean, testSTD in zip(np.array(metricScores["classifiersNames"]), np.array(metricScores["testScores"]), np.zeros(len(np.array(metricScores["testScores"]))))] + "Done:\t Biclass score graph generation for " + metric_name) + results+=[[classifiers_name, metric_name, testMean, testSTD] + for classifiers_name, testMean, testSTD in zip(np.array(metric_scores["classifiers_names"]), + np.array(metric_scores["test_scores"]), + np.zeros(len(np.array(metric_scores["test_scores"]))))] return results def iterCmap(statsIter): @@ -378,7 +381,7 @@ def iterCmap(statsIter): return cmap, norm -def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, +def publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, nbCopies, fileName, minSize=10, width_denominator=2.0, height_denominator=20.0, statsIter=1): r"""Used to generate a 2D plot of the errors. @@ -388,7 +391,7 @@ def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, data : np.array of shape `(nbClassifiers, nbExamples)` A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well and -100 if the example was not classified. - classifiersNames : list of str + classifiers_names : list of str The names of the classifiers. nbClassifiers : int The number of classifiers. @@ -419,7 +422,7 @@ def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, aspect='auto') plt.title('Errors depending on the classifier') ticks = np.arange(nbCopies / 2 - 0.5, nbClassifiers * nbCopies, nbCopies) - labels = classifiersNames + labels = classifiers_names plt.xticks(ticks, labels, rotation="vertical") cbar = fig.colorbar(cax, ticks=[-100 * statsIter / 2, 0, statsIter]) cbar.ax.set_yticklabels(['Unseen', 'Always Wrong', 'Always Right']) @@ -435,7 +438,7 @@ def publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, fileName): ---------- errorOnExamples : np.array of shape `(nbExamples,)` An array counting how many classifiers failed to classifiy each examples. - classifiersNames : list of str + classifiers_names : list of str The names of the classifiers. nbClassifiers : int The number of classifiers. @@ -482,7 +485,7 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2): NUmber of examples. nbCopies : int The number of times the data is copied (classifier wise) in order for the figure to be more readable. - classifiersNames : list of strs + classifiers_names : list of strs The names fo the classifiers. data : np.array of shape `(nbClassifiers, nbExamples)` A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well @@ -492,14 +495,14 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2): """ nbClassifiers = len(example_errors) nbExamples = len(list(example_errors.values())[0]["errorOnExamples"]) - classifiersNames = example_errors.keys() + classifiers_names = example_errors.keys() data = np.zeros((nbExamples, nbClassifiers * nbCopies)) temp_data = np.zeros((nbExamples, nbClassifiers)) - for classifierIndex, (classifierName, errorOnExamples) in enumerate( + for classifierIndex, (classifier_name, errorOnExamples) in enumerate( example_errors.items()): - for iterIndex in range(nbCopies): - data[:, classifierIndex * nbCopies + iterIndex] = errorOnExamples[ + for iter_index in range(nbCopies): + data[:, classifierIndex * nbCopies + iter_index] = errorOnExamples[ "errorOnExamples"] temp_data[:, classifierIndex] = errorOnExamples["errorOnExamples"] errorOnExamples = -1 * np.sum(data, axis=1) / nbCopies + nbClassifiers @@ -507,21 +510,21 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2): np.savetxt(base_file_name + "2D_plot_data.csv", data, delimiter=",") np.savetxt(base_file_name + "bar_plot_data.csv", temp_data, delimiter=",") - return nbClassifiers, nbExamples, nbCopies, classifiersNames, data, errorOnExamples + return nbClassifiers, nbExamples, nbCopies, classifiers_names, data, errorOnExamples -def publishExampleErrors(exampleErrors, directory, databaseName, labelsNames): +def publishExampleErrors(exampleErrors, directory, databaseName, labels_names): logging.debug("Start:\t Biclass Label analysis figure generation") base_file_name = directory + time.strftime( "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + "_vs_".join( - labelsNames) + "-" + labels_names) + "-" - nbClassifiers, nbExamples, nCopies, classifiersNames, data, errorOnExamples = gen_error_data( + nbClassifiers, nbExamples, nCopies, classifiers_names, data, errorOnExamples = gen_error_data( exampleErrors, base_file_name) - publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nCopies, + publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, nCopies, base_file_name) publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, @@ -530,7 +533,7 @@ def publishExampleErrors(exampleErrors, directory, databaseName, labelsNames): logging.debug("Done:\t Biclass Label analysis figures generation") -def get_arguments(benchmarkArgumentDictionaries, flag): +def get_arguments(benchmark_argument_dictionaries, flag): r"""Used to get the arguments passed to the benchmark executing function corresponding to the flag of a biclass experimentation. @@ -538,7 +541,7 @@ def get_arguments(benchmarkArgumentDictionaries, flag): ---------- flag : list The needed experimentation's flag. - benchmarkArgumentDictionaries : list of dicts + benchmark_argument_dictionaries : list of dicts The list of all the arguments passed to the benchmark executing functions. Returns @@ -546,12 +549,12 @@ def get_arguments(benchmarkArgumentDictionaries, flag): benchmarkArgumentDictionary : dict All the arguments passed to the benchmark executing function for the needed experimentation. """ - for benchmarkArgumentDictionary in benchmarkArgumentDictionaries: + for benchmarkArgumentDictionary in benchmark_argument_dictionaries: if benchmarkArgumentDictionary["flag"] == flag: return benchmarkArgumentDictionary -def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): +def analyze_biclass(results, benchmark_argument_dictionaries, stats_iter, metrics): r"""Used to extract and format the results of the different biclass experimentations performed. Parameters @@ -560,11 +563,11 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): The result list returned by the bencmark execution function. For each executed benchmark, contains a flag & a result element. The flag is a way to identify to which benchmark the results belong, formatted this way : - `flag = iterIndex, [classifierPositive, classifierNegative]` with - - `iterIndex` the index of the statistical iteration + `flag = iter_index, [classifierPositive, classifierNegative]` with + - `iter_index` the index of the statistical iteration - `[classifierPositive, classifierNegative]` the indices of the labels considered positive and negative by the classifier (mainly useful for one versus one multiclass classification). - benchmarkArgumentDictionaries : list of dicts + benchmark_argument_dictionaries : list of dicts The list of all the arguments passed to the benchmark executing functions. statsIter : int The number of statistical iterations. @@ -578,133 +581,133 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): label combination, regrouping the scores for each metrics and the information useful to plot errors on examples. """ logging.debug("Srart:\t Analzing all biclass resuls") - biclassResults = [{} for _ in range(statsIter)] + biclass_results = [{} for _ in range(stats_iter)] for flag, result in results: iteridex, [classifierPositive, classifierNegative] = flag - arguments = get_arguments(benchmarkArgumentDictionaries, flag) + arguments = get_arguments(benchmark_argument_dictionaries, flag) - metricsScores = getMetricsScoresBiclass(metrics, result) - exampleErrors = getExampleErrorsBiclass(arguments["labels"], result) + metrics_scores = get_metrics_scores_biclass(metrics, result) + example_errors = getExampleErrorsBiclass(arguments["labels"], result) directory = arguments["directory"] - databaseName = arguments["args"]["Base"]["name"] - labelsNames = [arguments["LABELS_DICTIONARY"][0], - arguments["LABELS_DICTIONARY"][1]] + database_name = arguments["args"]["Base"]["name"] + labels_names = [arguments["labels_dictionary"][0], + arguments["labels_dictionary"][1]] - results = publishMetricsGraphs(metricsScores, directory, databaseName, - labelsNames) - publishExampleErrors(exampleErrors, directory, databaseName, - labelsNames) + results = publishMetricsGraphs(metrics_scores, directory, database_name, + labels_names) + publishExampleErrors(example_errors, directory, database_name, + labels_names) - biclassResults[iteridex][ + biclass_results[iteridex][ str(classifierPositive) + str(classifierNegative)] = { - "metricsScores": metricsScores, - "exampleErrors": exampleErrors} + "metricsScores": metrics_scores, + "exampleErrors": example_errors} logging.debug("Done:\t Analzing all biclass resuls") - return results, biclassResults + return results, biclass_results -def genMetricsScoresMulticlass(results, trueLabels, metrics, - argumentsDictionaries): +def gen_metrics_scores_multiclass(results, true_labels, metrics, + arguments_dictionaries): """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration""" logging.debug("Start:\t Getting multiclass scores for each metric") for metric in metrics: - metricModule = getattr(metrics, metric[0]) - for iterIndex, iterResults in enumerate(results): + metric_module = getattr(metrics, metric[0]) + for iter_index, iter_results in enumerate(results): - for argumentsDictionary in argumentsDictionaries: - if argumentsDictionary["flag"][0] == iterIndex: - classificationIndices = argumentsDictionary[ - "classificationIndices"] - trainIndices, testIndices, multiclassTestIndices = classificationIndices + for argumentsDictionary in arguments_dictionaries: + if argumentsDictionary["flag"][0] == iter_index: + classification_indices = argumentsDictionary[ + "classification_indices"] + train_indices, test_indices, multiclass_test_indices = classification_indices - for classifierName, resultDictionary in iterResults.items(): + for classifier_name, resultDictionary in iter_results.items(): if not "metricsScores" in resultDictionary: - results[iterIndex][classifierName]["metricsScores"] = {} - trainScore = metricModule.score(trueLabels[trainIndices], + results[iter_index][classifier_name]["metricsScores"] = {} + train_score = metric_module.score(true_labels[train_indices], resultDictionary["labels"][ - trainIndices], + train_indices], multiclass=True) - testScore = metricModule.score( - trueLabels[multiclassTestIndices], - resultDictionary["labels"][multiclassTestIndices], + test_score = metric_module.score( + true_labels[multiclass_test_indices], + resultDictionary["labels"][multiclass_test_indices], multiclass=True) - results[iterIndex][classifierName]["metricsScores"][ - metric[0]] = [trainScore, testScore] + results[iter_index][classifier_name]["metrics_scores"][ + metric[0]] = [train_score, test_score] logging.debug("Done:\t Getting multiclass scores for each metric") return results -def getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels): +def get_error_on_labels_multiclass(multiclass_results, multiclass_labels): """Used to add all the arrays showing on which example there is an error for each clf and each iteration""" logging.debug("Start:\t Getting errors on each example for each classifier") - for iterIndex, iterResults in enumerate(multiclassResults): - for classifierName, classifierResults in iterResults.items(): - errorOnExamples = classifierResults["labels"] == multiclassLabels - multiclassResults[iterIndex][classifierName][ - "errorOnExamples"] = errorOnExamples.astype(int) + for iter_index, iter_results in enumerate(multiclass_results): + for classifier_name, classifier_results in iter_results.items(): + error_on_examples = classifier_results["labels"] == multiclass_labels + multiclass_results[iter_index][classifier_name][ + "errorOnExamples"] = error_on_examples.astype(int) logging.debug("Done:\t Getting errors on each example for each classifier") - return multiclassResults + return multiclass_results -def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, +def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories, databaseName): results=[] - for iterIndex in range(statsIter): - directory = direcories[iterIndex] + for iter_index in range(stats_iter): + directory = direcories[iter_index] for metric in metrics: logging.debug( "Start:\t Multiclass score graph generation for " + metric[0]) - classifiersNames = np.array([classifierName for classifierName in - multiclassResults[iterIndex].keys()]) - trainScores = np.array([multiclassResults[iterIndex][ - classifierName]["metricsScores"][ + classifiers_names = np.array([classifier_name for classifier_name in + multiclass_results[iter_index].keys()]) + train_scores = np.array([multiclass_results[iter_index][ + classifier_name]["metricsScores"][ metric[0]][0] - for classifierName in classifiersNames]) - validationScores = np.array([multiclassResults[iterIndex][ - classifierName]["metricsScores"][ + for classifier_name in classifiers_names]) + validationScores = np.array([multiclass_results[iter_index][ + classifier_name]["metricsScores"][ metric[0]][1] - for classifierName in - classifiersNames]) + for classifier_name in + classifiers_names]) - nbResults = classifiersNames.shape[0] + nbResults = classifiers_names.shape[0] fileName = directory + time.strftime( "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[ 0] + ".png" - plotMetricScores(trainScores, validationScores, classifiersNames, + plotMetricScores(train_scores, validationScores, classifiers_names, nbResults, metric[0], fileName, tag=" multiclass") logging.debug( "Done:\t Multiclass score graph generation for " + metric[0]) - results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiersNames, validationScores, np.zeros(len(validationScores)))] + results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiers_names, validationScores, np.zeros(len(validationScores)))] return results -def publishMulticlassExmapleErrors(multiclassResults, directories, +def publishMulticlassExmapleErrors(multiclass_results, directories, databaseName): - for iterIndex, multiclassResult in enumerate(multiclassResults): - directory = directories[iterIndex] + for iter_index, multiclassResult in enumerate(multiclass_results): + directory = directories[iter_index] logging.debug("Start:\t Multiclass Label analysis figure generation") base_file_name = directory + time.strftime( "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" - nbClassifiers, nbExamples, nCopies, classifiersNames, data, errorOnExamples = gen_error_data( + nbClassifiers, nbExamples, nCopies, classifiers_names, data, errorOnExamples = gen_error_data( multiclassResult, base_file_name) - publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, + publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, nCopies, base_file_name) publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, @@ -713,75 +716,75 @@ def publishMulticlassExmapleErrors(multiclassResults, directories, logging.debug("Done:\t Multiclass Label analysis figure generation") -def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, - nbExamples, nbLabels, multiclassLabels, - metrics, classificationIndices, directories): +def analyzeMulticlass(results, stats_iter, benchmark_argument_dictionaries, + nb_examples, nb_labels, multiclass_labels, + metrics, classification_indices, directories): """Used to transform one versus one results in multiclass results and to publish it""" - multiclassResults = [{} for _ in range(statsIter)] + multiclass_results = [{} for _ in range(stats_iter)] for flag, result in results: - iterIndex = flag[0] + iter_index = flag[0] classifierPositive = flag[1][0] classifierNegative = flag[1][1] - for benchmarkArgumentDictionary in benchmarkArgumentDictionaries: + for benchmarkArgumentDictionary in benchmark_argument_dictionaries: if benchmarkArgumentDictionary["flag"] == flag: trainIndices, testIndices, testMulticlassIndices = \ - benchmarkArgumentDictionary["classificationIndices"] + benchmarkArgumentDictionary["classification_indices"] for classifierResult in result: - classifierName = classifierResult.get_classifier_name() - if classifierName not in multiclassResults[iterIndex]: - multiclassResults[iterIndex][classifierName] = np.zeros( - (nbExamples, nbLabels), dtype=int) + classifier_name = classifierResult.get_classifier_name() + if classifier_name not in multiclass_results[iter_index]: + multiclass_results[iter_index][classifier_name] = np.zeros( + (nb_examples, nb_labels), dtype=int) for exampleIndex in trainIndices: label = classifierResult.full_labels_pred[exampleIndex] if label == 1: - multiclassResults[iterIndex][classifierName][ + multiclass_results[iter_index][classifier_name][ exampleIndex, classifierPositive] += 1 else: - multiclassResults[iterIndex][classifierName][ + multiclass_results[iter_index][classifier_name][ exampleIndex, classifierNegative] += 1 for multiclassIndex, exampleIndex in enumerate( testMulticlassIndices): label = classifierResult.y_test_multiclass_pred[multiclassIndex] if label == 1: - multiclassResults[iterIndex][classifierName][ + multiclass_results[iter_index][classifier_name][ exampleIndex, classifierPositive] += 1 else: - multiclassResults[iterIndex][classifierName][ + multiclass_results[iter_index][classifier_name][ exampleIndex, classifierNegative] += 1 - for iterIndex, multiclassiterResult in enumerate(multiclassResults): + for iter_index, multiclassiterResult in enumerate(multiclass_results): for key, value in multiclassiterResult.items(): - multiclassResults[iterIndex][key] = { + multiclass_results[iter_index][key] = { "labels": np.argmax(value, axis=1)} - multiclassResults = genMetricsScoresMulticlass(multiclassResults, - multiclassLabels, metrics, - benchmarkArgumentDictionaries) - multiclassResults = getErrorOnLabelsMulticlass(multiclassResults, - multiclassLabels) + multiclass_results = gen_metrics_scores_multiclass(multiclass_results, + multiclass_labels, metrics, + benchmark_argument_dictionaries) + multiclass_results = get_error_on_labels_multiclass(multiclass_results, + multiclass_labels) - results = publishMulticlassScores(multiclassResults, metrics, statsIter, directories, - benchmarkArgumentDictionaries[0]["args"]["Base"]["name"]) - publishMulticlassExmapleErrors(multiclassResults, directories, - benchmarkArgumentDictionaries[0][ + results = publishMulticlassScores(multiclass_results, metrics, stats_iter, directories, + benchmark_argument_dictionaries[0]["args"]["Base"]["name"]) + publishMulticlassExmapleErrors(multiclass_results, directories, + benchmark_argument_dictionaries[0][ "args"].name) - return results, multiclassResults + return results, multiclass_results def numpy_mean_and_std(scores_array): return np.mean(scores_array, axis=1), np.std(scores_array, axis=1) -def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, - classifiersDict, dataBaseName, statsIter, - minSize=10): +def publish_iter_biclass_metrics_scores(iter_results, directory, labels_dictionary, + classifiers_dict, data_base_name, stats_iter, + min_size=10): results=[] - for labelsCombination, iterResult in iterResults.items(): - currentDirectory = directory + labelsDictionary[ - int(labelsCombination[0])] + "-vs-" + labelsDictionary[ + for labelsCombination, iterResult in iter_results.items(): + currentDirectory = directory + labels_dictionary[ + int(labelsCombination[0])] + "-vs-" + labels_dictionary[ int(labelsCombination[1])] + "/" if not os.path.exists(os.path.dirname(currentDirectory + "a")): try: @@ -791,16 +794,16 @@ def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, raise for metricName, scores in iterResult["metricsScores"].items(): - trainMeans, trainSTDs = numpy_mean_and_std(scores["trainScores"]) - testMeans, testSTDs = numpy_mean_and_std(scores["testScores"]) + trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"]) + testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"]) - names = np.array([name for name in classifiersDict.keys()]) + names = np.array([name for name in classifiers_dict.keys()]) fileName = currentDirectory + time.strftime( - "%Y_%m_%d-%H_%M_%S") + "-" + dataBaseName + "-Mean_on_" + str( - statsIter) + "_iter-" + metricName + ".png" + "%Y_%m_%d-%H_%M_%S") + "-" + data_base_name + "-Mean_on_" + str( + stats_iter) + "_iter-" + metricName + ".png" nbResults = names.shape[0] - plotMetricScores(trainScores=trainMeans, testScores=testMeans, + plotMetricScores(train_scores=trainMeans, test_scores=testMeans, names=names, nbResults=nbResults, metricName=metricName, fileName=fileName, tag=" averaged", @@ -809,92 +812,92 @@ def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, return results -def gen_error_dat_glob(combiResults, statsIter, base_file_name): - nbExamples = combiResults["errorOnExamples"].shape[1] - nbClassifiers = combiResults["errorOnExamples"].shape[0] - data = np.transpose(combiResults["errorOnExamples"]) - errorOnExamples = -1 * np.sum(data, axis=1) + (nbClassifiers * statsIter) +def gen_error_dat_glob(combi_results, stats_iter, base_file_name): + nbExamples = combi_results["errorOnExamples"].shape[1] + nbClassifiers = combi_results["errorOnExamples"].shape[0] + data = np.transpose(combi_results["errorOnExamples"]) + errorOnExamples = -1 * np.sum(data, axis=1) + (nbClassifiers * stats_iter) np.savetxt(base_file_name + "clf_errors.csv", data, delimiter=",") np.savetxt(base_file_name + "example_errors.csv", errorOnExamples, delimiter=",") return nbExamples, nbClassifiers, data, errorOnExamples -def publishIterBiclassExampleErrors(iterResults, directory, labelsDictionary, - classifiersDict, statsIter, minSize=10): - for labelsCombination, combiResults in iterResults.items(): - base_file_name = directory + labelsDictionary[ +def publish_iter_biclass_example_errors(iter_results, directory, labels_dictionary, + classifiers_dict, stats_iter, min_size=10): + for labelsCombination, combiResults in iter_results.items(): + base_file_name = directory + labels_dictionary[ int(labelsCombination[0])] + "-vs-" + \ - labelsDictionary[ + labels_dictionary[ int(labelsCombination[1])] + "/" + time.strftime( "%Y_%m_%d-%H_%M_%S") + "-" - classifiersNames = [classifierName for classifierName in - classifiersDict.values()] + classifiers_names = [classifier_name for classifier_name in + classifiers_dict.values()] logging.debug( "Start:\t Global biclass label analysis figure generation") nbExamples, nbClassifiers, data, errorOnExamples = gen_error_dat_glob( - combiResults, statsIter, base_file_name) + combiResults, stats_iter, base_file_name) - publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, 1, - base_file_name, statsIter=statsIter) + publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, 1, + base_file_name, stats_iter=stats_iter) - publishErrorsBarPlot(errorOnExamples, nbClassifiers * statsIter, + publishErrorsBarPlot(errorOnExamples, nbClassifiers * stats_iter, nbExamples, base_file_name) logging.debug( "Done:\t Global biclass label analysis figures generation") -def publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, - dataBaseName, directory, statsIter, - minSize=10): +def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_names, + data_base_name, directory, stats_iter, + min_size=10): results = [] - for metricName, scores in iterMulticlassResults["metricsScores"].items(): - trainMeans, trainSTDs = numpy_mean_and_std(scores["trainScores"]) - testMeans, testSTDs = numpy_mean_and_std(scores["testScores"]) + for metric_name, scores in iter_multiclass_results["metricsScores"].items(): + trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"]) + testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"]) - nbResults = classifiersNames.shape[0] + nb_results = classifiers_names.shape[0] - fileName = directory + time.strftime( - "%Y_%m_%d-%H_%M_%S") + "-" + dataBaseName + "-Mean_on_" + str( - statsIter) + "_iter-" + metricName + ".png" + file_name = directory + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + data_base_name + "-Mean_on_" + str( + stats_iter) + "_iter-" + metric_name + ".png" - plotMetricScores(trainScores=trainMeans, testScores=testMeans, - names=classifiersNames, nbResults=nbResults, - metricName=metricName, fileName=fileName, + plotMetricScores(train_scores=trainMeans, test_scores=testMeans, + names=classifiers_names, nbResults=nb_results, + metricName=metric_name, fileName=file_name, tag=" averaged multiclass", train_STDs=trainSTDs, test_STDs=testSTDs) - results+=[[classifiersName, metricName,testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiersNames, testMeans, testSTDs)] + results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)] return results -def publishIterMulticlassExampleErrors(iterMulticlassResults, directory, - classifiersNames, statsIter, minSize=10): +def publish_iter_multiclass_example_errors(iter_multiclass_results, directory, + classifiers_names, stats_iter, min_size=10): logging.debug( "Start:\t Global multiclass label analysis figures generation") base_file_name = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" - nbExamples, nbClassifiers, data, errorOnExamples = gen_error_dat_glob( - iterMulticlassResults, statsIter, base_file_name) + nb_examples, nb_classifiers, data, error_on_examples = gen_error_dat_glob( + iter_multiclass_results, stats_iter, base_file_name) - publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, 1, - base_file_name, statsIter=statsIter) + publish2Dplot(data, classifiers_names, nb_classifiers, nb_examples, 1, + base_file_name, stats_iter=stats_iter) - publishErrorsBarPlot(errorOnExamples, nbClassifiers * statsIter, nbExamples, + publishErrorsBarPlot(error_on_examples, nb_classifiers * stats_iter, nb_examples, base_file_name) logging.debug("Done:\t Global multiclass label analysis figures generation") def gen_classifiers_dict(results, metrics): - classifiersDict = dict((classifierName, classifierIndex) - for classifierIndex, classifierName + classifiers_dict = dict((classifier_name, classifierIndex) + for classifierIndex, classifier_name in enumerate( results[0][list(results[0].keys())[0]]["metricsScores"][metrics[0][0]][ - "classifiersNames"])) - return classifiersDict, len(classifiersDict) + "classifiers_names"])) + return classifiers_dict, len(classifiers_dict) def add_new_labels_combination(iterBiclassResults, labelsComination, @@ -910,122 +913,125 @@ def add_new_labels_combination(iterBiclassResults, labelsComination, return iterBiclassResults -def add_new_metric(iterBiclassResults, metric, labelsComination, nbClassifiers, - statsIter): - if metric[0] not in iterBiclassResults[labelsComination]["metricsScores"]: - iterBiclassResults[labelsComination]["metricsScores"][metric[0]] = { - "trainScores": - np.zeros((nbClassifiers, statsIter)), - "testScores": - np.zeros((nbClassifiers, statsIter))} - return iterBiclassResults +def add_new_metric(iter_biclass_results, metric, labels_comination, nb_classifiers, + stats_iter): + if metric[0] not in iter_biclass_results[labels_comination]["metrics_scores"]: + iter_biclass_results[labels_comination]["metrics_scores"][metric[0]] = { + "train_scores": + np.zeros((nb_classifiers, stats_iter)), + "test_scores": + np.zeros((nb_classifiers, stats_iter))} + return iter_biclass_results -def analyzebiclassIter(biclassResults, metrics, statsIter, directory, - labelsDictionary, dataBaseName, nbExamples): +def analyzebiclass_iter(biclass_results, metrics, stats_iter, directory, + labels_dictionary, data_base_name, nb_examples): """Used to format the results in order to plot the mean results on the iterations""" - iterBiclassResults = {} - classifiersDict, nbClassifiers = gen_classifiers_dict(biclassResults, + iter_biclass_results = {} + classifiers_dict, nb_classifiers = gen_classifiers_dict(biclass_results, metrics) - for iterIndex, biclassResult in enumerate(biclassResults): - for labelsComination, results in biclassResult.items(): + for iter_index, biclass_result in enumerate(biclass_results): + for labelsComination, results in biclass_result.items(): for metric in metrics: - iterBiclassResults = add_new_labels_combination( - iterBiclassResults, labelsComination, nbClassifiers, - nbExamples) - iterBiclassResults = add_new_metric(iterBiclassResults, metric, + iter_biclass_results = add_new_labels_combination( + iter_biclass_results, labelsComination, nb_classifiers, + nb_examples) + iter_biclass_results = add_new_metric(iter_biclass_results, metric, labelsComination, - nbClassifiers, statsIter) - - metric_results = results["metricsScores"][metric[0]] - for classifierName, trainScore, testScore in zip( - metric_results["classifiersNames"], - metric_results["trainScores"], - metric_results["testScores"], ): - iterBiclassResults[labelsComination]["metricsScores"][ - metric[0]]["trainScores"][ - classifiersDict[classifierName], iterIndex] = trainScore - iterBiclassResults[labelsComination]["metricsScores"][ - metric[0]]["testScores"][ - classifiersDict[classifierName], iterIndex] = testScore - - for classifierName, errorOnExample in results[ - "exampleErrors"].items(): - iterBiclassResults[labelsComination]["errorOnExamples"][ - classifiersDict[classifierName], :] += errorOnExample[ - "errorOnExamples"] - - results = publishIterBiclassMetricsScores(iterBiclassResults, directory, - labelsDictionary, classifiersDict, - dataBaseName, statsIter) - publishIterBiclassExampleErrors(iterBiclassResults, directory, - labelsDictionary, classifiersDict, - statsIter) + nb_classifiers, stats_iter) + + metric_results = results["metrics_scores"][metric[0]] + for classifier_name, trainScore, testScore in zip( + metric_results["classifiers_names"], + metric_results["train_scores"], + metric_results["test_scores"], ): + iter_biclass_results[labelsComination]["metrics_scores"][ + metric[0]]["train_scores"][ + classifiers_dict[classifier_name], iter_index] = trainScore + iter_biclass_results[labelsComination]["metrics_scores"][ + metric[0]]["test_scores"][ + classifiers_dict[classifier_name], iter_index] = testScore + + for classifier_name, errorOnExample in results[ + "example_errors"].items(): + iter_biclass_results[labelsComination]["error_on_examples"][ + classifiers_dict[classifier_name], :] += errorOnExample[ + "error_on_examples"] + + results = publish_iter_biclass_metrics_scores( + iter_biclass_results, directory, + labels_dictionary, classifiers_dict, + data_base_name, stats_iter) + publish_iter_biclass_example_errors(iter_biclass_results, directory, + labels_dictionary, classifiers_dict, + stats_iter) return results -def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, - dataBaseName, nbExamples): +def analyze_iter_multiclass(multiclass_results, directory, stats_iter, metrics, + data_base_name, nb_examples): """Used to mean the multiclass results on the iterations executed with different random states""" logging.debug("Start:\t Getting mean results for multiclass classification") - iterMulticlassResults = {} - nbClassifiers = len(multiclassResults[0]) - iterMulticlassResults["errorOnExamples"] = np.zeros( - (nbClassifiers, nbExamples), dtype=int) - iterMulticlassResults["metricsScores"] = {} - classifiersNames = [] - for iterIndex, multiclassResult in enumerate(multiclassResults): - for classifierName, classifierResults in multiclassResult.items(): - if classifierName not in classifiersNames: - classifiersNames.append(classifierName) - classifierIndex = classifiersNames.index(classifierName) + iter_multiclass_results = {} + nb_classifiers = len(multiclass_results[0]) + iter_multiclass_results["error_on_examples"] = np.zeros( + (nb_classifiers, nb_examples), dtype=int) + iter_multiclass_results["metrics_scores"] = {} + classifiers_names = [] + for iter_index, multiclass_result in enumerate(multiclass_results): + for classifier_name, classifier_results in multiclass_result.items(): + if classifier_name not in classifiers_names: + classifiers_names.append(classifier_name) + classifier_index = classifiers_names.index(classifier_name) for metric in metrics: - if metric[0] not in iterMulticlassResults["metricsScores"]: - iterMulticlassResults["metricsScores"][metric[0]] = { - "trainScores": - np.zeros((nbClassifiers, statsIter)), - "testScores": - np.zeros((nbClassifiers, statsIter))} - iterMulticlassResults["metricsScores"][metric[0]][ - "trainScores"][classifierIndex, iterIndex] = \ - classifierResults["metricsScores"][metric[0]][0] - iterMulticlassResults["metricsScores"][metric[0]]["testScores"][ - classifierIndex, iterIndex] = \ - classifierResults["metricsScores"][metric[0]][1] - iterMulticlassResults["errorOnExamples"][classifierIndex, :] += \ - classifierResults["errorOnExamples"] + if metric[0] not in iter_multiclass_results["metrics_scores"]: + iter_multiclass_results["metrics_scores"][metric[0]] = { + "train_scores": + np.zeros((nb_classifiers, stats_iter)), + "test_scores": + np.zeros((nb_classifiers, stats_iter))} + iter_multiclass_results["metrics_scores"][metric[0]][ + "train_scores"][classifier_index, iter_index] = \ + classifier_results["metrics_scores"][metric[0]][0] + iter_multiclass_results["metrics_scores"][metric[0]]["test_scores"][ + classifier_index, iter_index] = \ + classifier_results["metrics_scores"][metric[0]][1] + iter_multiclass_results["error_on_examples"][classifier_index, :] += \ + classifier_results["error_on_examples"] logging.debug("Start:\t Getting mean results for multiclass classification") - classifiersNames = np.array(classifiersNames) - results = publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, - dataBaseName, directory, statsIter) - publishIterMulticlassExampleErrors(iterMulticlassResults, directory, - classifiersNames, statsIter) + classifiers_names = np.array(classifiers_names) + results = publish_iter_multiclass_metrics_scores( + iter_multiclass_results, classifiers_names, + data_base_name, directory, stats_iter) + publish_iter_multiclass_example_errors(iter_multiclass_results, directory, + classifiers_names, stats_iter) return results -def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, - multiclassLabels, metrics, - classificationIndices, directories, directory, labelsDictionary, - nbExamples, nbLabels): +def get_results(results, stats_iter, nb_multiclass, benchmark_argument_dictionaries, + multiclass_labels, metrics, + classification_indices, directories, directory, labels_dictionary, + nb_examples, nb_labels): """Used to analyze the results of the previous benchmarks""" - dataBaseName = benchmarkArgumentDictionaries[0]["args"]["Base"]["name"] - results_means_std, biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, - statsIter, metrics) - - if nbMulticlass > 1: - results_means_std, multiclassResults = analyzeMulticlass(results, statsIter, - benchmarkArgumentDictionaries, - nbExamples, nbLabels, - multiclassLabels, metrics, - classificationIndices, + data_base_name = benchmark_argument_dictionaries[0]["args"]["Base"]["name"] + results_means_std, biclass_results = analyze_biclass(results, benchmark_argument_dictionaries, + stats_iter, metrics) + + if nb_multiclass > 1: + results_means_std, multiclass_results = analyzeMulticlass(results, stats_iter, + benchmark_argument_dictionaries, + nb_examples, nb_labels, + multiclass_labels, metrics, + classification_indices, directories) - if statsIter > 1: - results_means_std = analyzebiclassIter(biclassResults, metrics, statsIter, directory, - labelsDictionary, dataBaseName, nbExamples) - if nbMulticlass > 1: - results_means_std = analyzeIterMulticlass(multiclassResults, directory, statsIter, - metrics, dataBaseName, nbExamples) + if stats_iter > 1: + results_means_std = analyzebiclass_iter( + biclass_results, metrics, stats_iter, directory, + labels_dictionary, data_base_name, nb_examples) + if nb_multiclass > 1: + results_means_std = analyze_iter_multiclass(multiclass_results, directory, stats_iter, + metrics, data_base_name, nb_examples) return results_means_std diff --git a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py index 4534c685..7046491a 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py @@ -2,10 +2,15 @@ import configparser import builtins from distutils.util import strtobool as tobool import yaml +import os def get_the_args(path_to_config_file="../config_files/config.yml"): """This is the main function for extracting the args for a '.ini' file""" + config_path = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join(config_path, "../..") + path_to_config_file = os.path.join(config_path, path_to_config_file) + with open(path_to_config_file, 'r') as stream: yaml_config = yaml.safe_load(stream) return yaml_config diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index 5fe42ee0..b9d65979 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -49,7 +49,7 @@ def parse_the_args(arguments): # groupStandard.add_argument('--nice', metavar='INT', action='store', # type=int, # help='Niceness for the processes', default=0) -# groupStandard.add_argument('--randomState', metavar='STRING', +# groupStandard.add_argument('--random_state', metavar='STRING', # action='store', # help="The random state seed to use or the path " # "to a pickle file where it is stored", @@ -733,7 +733,7 @@ def init_random_state(random_state_arg, directory): file_name = random_state_arg with open(file_name, 'rb') as handle: random_state = pickle.load(handle) - with open(directory + "randomState.pickle", "wb") as handle: + with open(directory + "random_state.pickle", "wb") as handle: pickle.dump(random_state, handle) return random_state @@ -982,8 +982,12 @@ def gen_direcorties_names(directory, statsIter): def find_dataset_names(path, type, names): - """This function goal is to browse the dataset directory and extarcts all + """This function goal is to browse the dataset directory and extrats all the needed dataset names.""" + config_path = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join(config_path, "../..") + path = os.path.join(config_path, path) + available_file_names = [file_name.strip().split(".")[0] for file_name in os.listdir(path) if file_name.endswith(type)] @@ -1051,27 +1055,27 @@ def gen_argument_dictionaries(labels_dictionary, directories, multiclass_labels, for combination_index, labels_combination in enumerate(labels_combinations): for iter_index, iterRandomState in enumerate(stats_iter_random_states): benchmark_argument_dictionary = { - "LABELS_DICTIONARY": {0: labels_dictionary[labels_combination[0]], + "labels_dictionary": {0: labels_dictionary[labels_combination[0]], 1: labels_dictionary[ labels_combination[1]]}, "directory": directories[iter_index] + labels_dictionary[labels_combination[0]] + "-vs-" + labels_dictionary[labels_combination[1]] + "/", - "classificationIndices": [ + "classification_indices": [ indices_multiclass[combination_index][0][iter_index], indices_multiclass[combination_index][1][iter_index], indices_multiclass[combination_index][2][iter_index]], "args": args, "labels": multiclass_labels[combination_index], - "kFolds": k_folds[iter_index], - "randomState": iterRandomState, - "hyperParamSearch": hyper_param_search, + "k_folds": k_folds[iter_index], + "random_state": iterRandomState, + "hyper_param_search": hyper_param_search, "metrics": metrics, - "argumentDictionaries": argument_dictionaries, + "argument_dictionaries": argument_dictionaries, "benchmark": benchmark, "views": views, - "viewsIndices": views_indices, + "views_indices": views_indices, "flag": [iter_index, labels_combination]} benchmark_argument_dictionaries.append(benchmark_argument_dictionary) return benchmark_argument_dictionaries diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 0d864927..2401ab64 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -75,7 +75,7 @@ def deleteHDF5(benchmarkArgumentsDictionaries, nbCores, DATASET): os.remove(filename) -def makeMeNoisy(viewData, randomState, percentage=5): +def makeMeNoisy(viewData, random_state, percentage=5): """used to introduce some noise in the generated data""" viewData = viewData.astype(bool) nbNoisyCoord = int( @@ -83,15 +83,15 @@ def makeMeNoisy(viewData, randomState, percentage=5): rows = range(viewData.shape[0]) cols = range(viewData.shape[1]) for _ in range(nbNoisyCoord): - rowIdx = randomState.choice(rows) - colIdx = randomState.choice(cols) + rowIdx = random_state.choice(rows) + colIdx = random_state.choice(cols) viewData[rowIdx, colIdx] = 0 noisyViewData = viewData.astype(np.uint8) return noisyViewData def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", - randomState=None, full=True, add_noise=False, + random_state=None, full=True, add_noise=False, noise_std=0.15, nbView=3, nbClass=2, datasetLength=100, randomStateInt=42, nbFeatures = 10): """Used to generate a plausible dataset to test the algorithms""" @@ -112,9 +112,9 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", [np.zeros(nbFeatures) for _ in range(int(datasetLength / 2))] + [np.ones(nbFeatures) for _ in range(datasetLength - int(datasetLength / 2))]) - fakeOneIndices = randomState.randint(0, int(datasetLength / 2), + fakeOneIndices = random_state.randint(0, int(datasetLength / 2), int(datasetLength / 12)) - fakeZeroIndices = randomState.randint(int(datasetLength / 2), + fakeZeroIndices = random_state.randint(int(datasetLength / 2), datasetLength, int(datasetLength / 12)) @@ -122,7 +122,7 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", (len(fakeOneIndices), nbFeatures)) viewData[fakeZeroIndices] = np.zeros( (len(fakeZeroIndices), nbFeatures)) - viewData = makeMeNoisy(viewData, randomState) + viewData = makeMeNoisy(viewData, random_state) viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewData.shape, data=viewData.astype( @@ -139,8 +139,8 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) datasetFile.close() datasetFile = h5py.File(pathF + "Plausible.hdf5", "r") - LABELS_DICTIONARY = {0: "No", 1: "Yes"} - return datasetFile, LABELS_DICTIONARY, "Plausible" + labels_dictionary = {0: "No", 1: "Yes"} + return datasetFile, labels_dictionary, "Plausible" elif NB_CLASS >= 3: firstBound = int(datasetLength / 3) rest = datasetLength - 2 * int(datasetLength / 3) @@ -158,11 +158,11 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", [np.zeros(nbFeatures) for _ in range(firstBound)] + [np.ones(nbFeatures) for _ in range(firstBound)] + [np.ones(nbFeatures) + 1 for _ in range(rest)]) - fakeOneIndices = randomState.randint(0, firstBound, + fakeOneIndices = random_state.randint(0, firstBound, int(datasetLength / 12)) - fakeTwoIndices = randomState.randint(firstBound, scndBound, + fakeTwoIndices = random_state.randint(firstBound, scndBound, int(datasetLength / 12)) - fakeZeroIndices = randomState.randint(scndBound, thrdBound, + fakeZeroIndices = random_state.randint(scndBound, thrdBound, int(datasetLength / 12)) viewData[fakeOneIndices] = np.ones( @@ -171,7 +171,7 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", (len(fakeZeroIndices), nbFeatures)) viewData[fakeTwoIndices] = np.ones( (len(fakeTwoIndices), nbFeatures)) + 1 - viewData = makeMeNoisy(viewData, randomState) + viewData = makeMeNoisy(viewData, random_state) viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewData.shape, data=viewData.astype( @@ -189,28 +189,28 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) datasetFile.close() datasetFile = h5py.File(pathF + "Plausible.hdf5", "r") - LABELS_DICTIONARY = {0: "No", 1: "Yes", 2: "Maybe"} - return datasetFile, LABELS_DICTIONARY, "Plausible" + labels_dictionary = {0: "No", 1: "Yes", 2: "Maybe"} + return datasetFile, labels_dictionary, "Plausible" -# def getFakeDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, randomState): +# def getFakeDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, random_state): # """Was used to generateafake dataset to run tests""" # NB_VIEW = 4 # DATASET_LENGTH = 30 # NB_CLASS = 2 -# VIEW_DIMENSIONS = randomState.random_integers(5, 20, NB_VIEW) +# VIEW_DIMENSIONS = random_state.random_integers(5, 20, NB_VIEW) # # DATA = dict((indx, # np.array([ -# randomState.normal(0.0, 2, viewDimension) +# random_state.normal(0.0, 2, viewDimension) # for i in np.arange(DATASET_LENGTH)])) # for indx, viewDimension in enumerate(VIEW_DIMENSIONS)) # -# CLASS_LABELS = randomState.random_integers(0, NB_CLASS - 1, DATASET_LENGTH) +# CLASS_LABELS = random_state.random_integers(0, NB_CLASS - 1, DATASET_LENGTH) # datasetFile = h5py.File(pathF + "Fake.hdf5", "w") # for index, viewData in enumerate(DATA.values()): # if index == 0: -# viewData = randomState.randint(0, 1, (DATASET_LENGTH, 300)).astype( +# viewData = random_state.randint(0, 1, (DATASET_LENGTH, 300)).astype( # np.uint8) # # np.zeros(viewData.shape, dtype=bool)+np.ones((viewData.shape[0], viewData.shape[1]/2), dtype=bool) # viewDset = datasetFile.create_dataset("View" + str(index), viewData.shape) @@ -239,10 +239,10 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", # metaDataGrp.attrs["nbView"] = NB_VIEW # metaDataGrp.attrs["nbClass"] = NB_CLASS # metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) -# LABELS_DICTIONARY = {0: "No", 1: "Yes"} +# labels_dictionary = {0: "No", 1: "Yes"} # datasetFile.close() # datasetFile = h5py.File(pathF + "Fake.hdf5", "r") -# return datasetFile, LABELS_DICTIONARY +# return datasetFile, labels_dictionary class DatasetError(Exception): @@ -468,16 +468,16 @@ def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, - randomState, full=False, add_noise=False, noise_std=0.15, + random_state, full=False, add_noise=False, noise_std=0.15, delimiter=","): # TODO : Update this one - labelsNames = np.genfromtxt(pathF + nameDB + "-labels-names.csv", + labels_names = np.genfromtxt(pathF + nameDB + "-labels-names.csv", dtype='str', delimiter=delimiter) datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") labels = np.genfromtxt(pathF + nameDB + "-labels.csv", delimiter=delimiter) labelsDset = datasetFile.create_dataset("Labels", labels.shape, data=labels) labelsDset.attrs["names"] = [labelName.encode() for labelName in - labelsNames] + labels_names] viewFileNames = [viewFileName for viewFileName in os.listdir(pathF + "Views/")] for viewIndex, viewFileName in enumerate(os.listdir(pathF + "Views/")): @@ -494,12 +494,12 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, pass metaDataGrp = datasetFile.create_group("Metadata") metaDataGrp.attrs["nbView"] = len(viewFileNames) - metaDataGrp.attrs["nbClass"] = len(labelsNames) + metaDataGrp.attrs["nbClass"] = len(labels_names) metaDataGrp.attrs["datasetLength"] = len(labels) datasetFile.close() datasetFile, labelsDictionary, dataset_name = getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, - randomState, full) + random_state, full) return datasetFile, labelsDictionary, dataset_name @@ -518,22 +518,22 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # return False, labelSupports -# def splitDataset(DATASET, LEARNING_RATE, DATASET_LENGTH, randomState): +# def splitDataset(DATASET, LEARNING_RATE, DATASET_LENGTH, random_state): # LABELS = DATASET.get("Labels")[...] # NB_CLASS = int(DATASET["Metadata"].attrs["nbClass"]) -# validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState) +# validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, random_state) # validationIndices.sort() # return validationIndices -# def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState): +# def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLASS, random_state): # labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS)) # nbTrainingExamples = [int(support * LEARNING_RATE) for support in labelSupports] # trainingExamplesIndices = [] # usedIndices = [] # while nbTrainingExamples != [0 for i in range(NB_CLASS)]: # isUseFull = False -# index = int(randomState.randint(0, DATASET_LENGTH - 1)) +# index = int(random_state.randint(0, DATASET_LENGTH - 1)) # if index not in usedIndices: # isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict) # if isUseFull: @@ -542,7 +542,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # return trainingExamplesIndices -# def getKFoldIndices(nbFolds, CLASS_LABELS, NB_CLASS, learningIndices, randomState): +# def getKFoldIndices(nbFolds, CLASS_LABELS, NB_CLASS, learningIndices, random_state): # labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS[learningIndices])) # nbTrainingExamples = [[int(support / nbFolds) for support in labelSupports] for fold in range(nbFolds)] # trainingExamplesIndices = [] @@ -550,7 +550,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # for foldIndex, fold in enumerate(nbTrainingExamples): # trainingExamplesIndices.append([]) # while fold != [0 for i in range(NB_CLASS)]: -# index = randomState.randint(0, len(learningIndices)) +# index = random_state.randint(0, len(learningIndices)) # if learningIndices[index] not in usedIndices: # isUseFull, fold = isUseful(fold, learningIndices[index], CLASS_LABELS, labelDict) # if isUseFull: @@ -567,7 +567,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # return usedIndices -# def getCaltechDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES, randomState): +# def getCaltechDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES, random_state): # datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") # labelsNamesFile = open(pathF + nameDB + '-ClassLabels-Description.csv') # if len(LABELS_NAMES) != NB_CLASS: @@ -575,7 +575,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # for l in labelsNamesFile: # nbLabelsAvailable += 1 # LABELS_NAMES = [line.strip().split(";")[1] for lineIdx, line in enumerate(labelsNamesFile) if -# lineIdx in randomState.randint(nbLabelsAvailable, size=NB_CLASS)] +# lineIdx in random_state.randint(nbLabelsAvailable, size=NB_CLASS)] # fullLabels = np.genfromtxt(pathF + nameDB + '-ClassLabels.csv', delimiter=';').astype(int) # labelsDictionary = dict((classIndice, labelName) for (classIndice, labelName) in # [(int(line.strip().split(";")[0]), line.strip().split(";")[1]) for lineIndex, line in @@ -607,7 +607,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # --------------------------------------------# -# def getMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES, randomState): +# def getMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES, random_state): # datasetFile = h5py.File(path + "MultiOmic.hdf5", "w") # # logging.debug("Start:\t Getting Methylation data") @@ -746,7 +746,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # return factorLeft # # -# def findParams(arrayLen, nbPatients, randomState, maxNbBins=2000, minNbBins=10, maxLenBin=70000, minOverlapping=1, +# def findParams(arrayLen, nbPatients, random_state, maxNbBins=2000, minNbBins=10, maxLenBin=70000, minOverlapping=1, # minNbBinsOverlapped=0, maxNbSolutions=30): # results = [] # if arrayLen * arrayLen * 10 / 100 > minNbBinsOverlapped * nbPatients: @@ -762,7 +762,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # if arrayLen == (nbBins - 1) * (lenBin - overlapping) + lenBin: # results.append({"nbBins": nbBins, "overlapping": overlapping, "lenBin": lenBin}) # if len(results) == maxNbSolutions: -# params = preds[randomState.randrange(len(preds))] +# params = preds[random_state.randrange(len(preds))] # return params # # @@ -794,9 +794,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # return sortedBinsMatrix # # -# def makeSparseTotalMatrix(sortedRNASeq, randomState): +# def makeSparseTotalMatrix(sortedRNASeq, random_state): # nbPatients, nbGenes = sortedRNASeq.shape -# params = findParams(nbGenes, nbPatients, randomState) +# params = findParams(nbGenes, nbPatients, random_state) # nbBins = params["nbBins"] # overlapping = params["overlapping"] # lenBin = params["lenBin"] diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 8119b3ec..4398e612 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -10,26 +10,27 @@ from sklearn.model_selection import RandomizedSearchCV from .. import metrics -def searchBestSettings(dataset, labels, classifier_module, classifier_name, - metrics, learning_indices, iKFolds, random_state, - directory, viewsIndices=None, nb_cores=1, - searchingTool="randomized_search", n_iter=1, - classifier_config=None): +def search_best_settings(dataset_var, labels, classifier_module, classifier_name, + metrics, learning_indices, i_k_folds, random_state, + directory, views_indices=None, nb_cores=1, + searching_tool="randomized_search", n_iter=1, + classifier_config=None): """Used to select the right hyper-parameter optimization function to optimize hyper parameters""" - if viewsIndices is None: - viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) + if views_indices is None: + views_indices = range(dataset_var.get("Metadata").attrs["nbView"]) output_file_name = directory thismodule = sys.modules[__name__] - if searchingTool is not "None": - searchingToolMethod = getattr(thismodule, searchingTool) - bestSettings, test_folds_preds = searchingToolMethod(dataset, labels, "multiview", random_state, output_file_name, - classifier_module, classifier_name, iKFolds, - nb_cores, metrics, n_iter, classifier_config, - learning_indices=learning_indices, view_indices=viewsIndices,) + if searching_tool is not "None": + searching_tool_method = getattr(thismodule, searching_tool) + best_settings, test_folds_preds = searching_tool_method( + dataset_var, labels, "multiview", random_state, output_file_name, + classifier_module, classifier_name, i_k_folds, + nb_cores, metrics, n_iter, classifier_config, + learning_indices=learning_indices, view_indices=views_indices,) else: - bestSettings = classifier_config - return bestSettings # or well set clasifier ? + best_settings = classifier_config + return best_settings # or well set clasifier ? def grid_search(dataset, classifier_name, views_indices=None, k_folds=None, n_iter=1, @@ -89,9 +90,9 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None) return test_folds_prediction -def randomized_search(X, y, framework, random_state, output_file_name, classifier_module, - classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], n_iter=30, - classifier_kwargs =None, learning_indices=None, view_indices=None): +def randomized_search_x(X, y, framework, random_state, output_file_name, classifier_module, + classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], + n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None): estimator = getattr(classifier_module, classifier_name)(random_state, **classifier_kwargs) params_dict = estimator.genDistribs() @@ -107,15 +108,16 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie min_list = np.array( [min(nb_possible_combination, n_iter) for nb_possible_combination in nb_possible_combinations]) - random_search = MultiviewCompatibleRandomizedSearchCV(estimator, - n_iter=int(np.sum(min_list)), - param_distributions=params_dict, - refit=True, - n_jobs=nb_cores, scoring=scorer, - cv=folds, random_state=random_state, - learning_indices=learning_indices, - view_indices=view_indices, - framework=framework) + random_search = MultiviewCompatibleRandomizedSearchCV( + estimator, + n_iter=int(np.sum(min_list)), + param_distributions=params_dict, + refit=True, + n_jobs=nb_cores, scoring=scorer, + cv=folds, random_state=random_state, + learning_indices=learning_indices, + view_indices=view_indices, + framework=framework) detector = random_search.fit(X, y) best_params = dict((key, value) for key, value in @@ -131,7 +133,7 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie best_estimator = estimator best_params = {} test_folds_preds = get_test_folds_preds(X, y, folds, best_estimator, - framework, learning_indices) + framework, learning_indices) return best_params, test_folds_preds @@ -216,13 +218,13 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): return test_folds_prediction -def randomizedSearch(dataset, labels, classifier_package, classifier_name, - metrics_list, learning_indices, k_folds, random_state, - views_indices=None, n_iter=1, - nb_cores=1, **classification_kargs): +def randomized_search(dataset_var, labels, classifier_package, classifier_name, + metrics_list, learning_indices, k_folds, random_state, + views_indices=None, n_iter=1, + nb_cores=1, **classification_kargs): """Used to perform a random search on the classifiers to optimize hyper parameters""" if views_indices is None: - views_indices = range(dataset.get("Metadata").attrs["nbView"]) + views_indices = range(dataset_var.get("Metadata").attrs["nbView"]) metric = metrics_list[0] metric_module = getattr(metrics, metric[0]) if metric[1] is not None: @@ -246,13 +248,13 @@ def randomizedSearch(dataset, labels, classifier_package, classifier_name, for params_set in params_sets: scores = [] for trainIndices, testIndices in kk_folds: - classifier = classifier_class(random_state, nb_scors=nb_cores, + classifier = classifier_class(random_state, nb_scores=nb_cores, **classification_kargs) classifier.setParams(params_set) - classifier.fit_hdf5(dataset, labels, - trainIndices=learning_indices[trainIndices], - viewsIndices=views_indices) - test_labels = classifier.predict_hdf5(dataset, + classifier.fit_hdf5(dataset_var, labels, + train_indices=learning_indices[trainIndices], + views_indices=views_indices) + test_labels = classifier.predict_hdf5(dataset_var, used_indices=learning_indices[testIndices], views_indices=views_indices) test_score = metric_module.score( @@ -272,7 +274,7 @@ def randomizedSearch(dataset, labels, classifier_package, classifier_name, # TODO : This must be corrected else: - best_configs, _ = classifier_module.grid_search_hdf5(dataset, labels, + best_configs, _ = classifier_module.grid_search_hdf5(dataset_var, labels, views_indices, classification_kargs, learning_indices, diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 3807ba62..91bd39d6 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -18,11 +18,11 @@ class Test_initKWARGS(unittest.TestCase): def test_initKWARGSFunc_no_monoview(self): benchmark = {"monoview": {}, "multiview": {}} - args = exec_classif.initKWARGSFunc({}, benchmark) + args = exec_classif.init_kwargs_func({}, benchmark) self.assertEqual(args, {"monoview": {}, "multiview": {}}) -class Test_init_argument_dictionaries(unittest.TestCase): +class Test_InitArgumentDictionaries(unittest.TestCase): @classmethod def setUpClass(cls): cls.benchmark = {"monoview": ["fake_monoview_classifier"], "multiview": {}} @@ -86,22 +86,22 @@ class Test_init_argument_dictionaries(unittest.TestCase): self.assertEqual(arguments["multiview"][0], expected_output[0]) -def fakeBenchmarkExec(coreIndex=-1, a=7, args=1): - return [coreIndex, a] +def fakeBenchmarkExec(core_index=-1, a=7, args=1): + return [core_index, a] -def fakeBenchmarkExec_mutlicore(nbCores=-1, a=6, args=1): - return [nbCores, a] +def fakeBenchmarkExec_mutlicore(nb_cores=-1, a=6, args=1): + return [nb_cores, a] def fakeBenchmarkExec_monocore(DATASET=1, a=4, args=1): return [a] -def fakegetResults(results, statsIter, nbMulticlass, - benchmarkArgumentsDictionaries, multiClassLabels, metrics, - classificationIndices, directories, directory, - labelsDictionary, nbExamples, nbLabels): +def fakegetResults(results, stats_iter, nb_multiclass, + benchmark_arguments_dictionaries, multi_class_labels, metrics, + classification_indices, directories, directory, + labels_dictionary, nb_examples, nb_labels): return 3 @@ -118,58 +118,58 @@ class Test_execBenchmark(unittest.TestCase): "multiview_platform/tests/tmp_tests/test_file.hdf5", "w") cls.labels = cls.Dataset.create_dataset("Labels", data=np.array([0, 1, 2])) - cls.argumentDictionaries = [{"a": 4, "args": {}}] + cls.argument_dictionaries = [{"a": 4, "args": {}}] cls.args = { "Base":{"name": "chicken_is_heaven", "type": "type", "pathf": "pathF"}, "Classification":{"hps_iter": 1}} def test_simple(cls): - res = exec_classif.exec_benchmark(1, 2, 3, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(1, 2, 3, cls.argument_dictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, - execOneBenchmark=fakeBenchmarkExec, - execOneBenchmark_multicore=fakeBenchmarkExec_mutlicore, - execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, - getResults=fakegetResults, + exec_one_benchmark=fakeBenchmarkExec, + exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, + exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, + get_results=fakegetResults, delete=fakeDelete) cls.assertEqual(res, 3) def test_multiclass_no_iter(cls): - cls.argumentDictionaries = [{"a": 10, "args": cls.args}, + cls.argument_dictionaries = [{"a": 10, "args": cls.args}, {"a": 4, "args": cls.args}] - res = exec_classif.exec_benchmark(2, 1, 2, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 1, 2, cls.argument_dictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, - execOneBenchmark=fakeBenchmarkExec, - execOneBenchmark_multicore=fakeBenchmarkExec_mutlicore, - execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, - getResults=fakegetResults, + exec_one_benchmark=fakeBenchmarkExec, + exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, + exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, + get_results=fakegetResults, delete=fakeDelete) cls.assertEqual(res, 3) def test_multiclass_and_iter(cls): - cls.argumentDictionaries = [{"a": 10, "args": cls.args}, + cls.argument_dictionaries = [{"a": 10, "args": cls.args}, {"a": 4, "args": cls.args}, {"a": 55, "args": cls.args}, {"a": 24, "args": cls.args}] - res = exec_classif.exec_benchmark(2, 2, 2, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 2, 2, cls.argument_dictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, - execOneBenchmark=fakeBenchmarkExec, - execOneBenchmark_multicore=fakeBenchmarkExec_mutlicore, - execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, - getResults=fakegetResults, + exec_one_benchmark=fakeBenchmarkExec, + exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, + exec_one_benchmark_monoCore=fakeBenchmarkExec_monocore, + get_results=fakegetResults, delete=fakeDelete) cls.assertEqual(res, 3) def test_no_iter_biclass_multicore(cls): - res = exec_classif.exec_benchmark(2, 1, 1, cls.argumentDictionaries, + res = exec_classif.exec_benchmark(2, 1, 1, cls.argument_dictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, - execOneBenchmark=fakeBenchmarkExec, - execOneBenchmark_multicore=fakeBenchmarkExec_mutlicore, - execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, - getResults=fakegetResults, + exec_one_benchmark=fakeBenchmarkExec, + exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, + exec_oneBenchmark_mono_core=fakeBenchmarkExec_monocore, + get_results=fakegetResults, delete=fakeDelete) cls.assertEqual(res, 3) @@ -182,22 +182,22 @@ class Test_execBenchmark(unittest.TestCase): os.rmdir(path) -def fakeExecMono(directory, name, labelsNames, classificationIndices, kFolds, - coreIndex, type, pathF, randomState, labels, - hyperParamSearch="try", metrics="try", nIter=1, **arguments): +def fakeExecMono(directory, name, labels_names, classification_indices, k_folds, + coreIndex, type, pathF, random_state, labels, + hyper_param_search="try", metrics="try", nIter=1, **arguments): return ["Mono", arguments] -def fakeExecMulti(directory, coreIndex, name, classificationIndices, kFolds, - type, pathF, LABELS_DICTIONARY, - randomState, labels, hyperParamSearch="", metrics=None, +def fakeExecMulti(directory, coreIndex, name, classification_indices, k_folds, + type, pathF, labels_dictionary, + random_state, labels, hyper_param_search="", metrics=None, nIter=1, **arguments): return ["Multi", arguments] -def fakeInitMulti(args, benchmark, views, viewsIndices, argumentDictionaries, - randomState, directory, resultsMonoview, - classificationIndices): +def fakeInitMulti(args, benchmark, views, views_indices, argument_dictionaries, + random_state, directory, resultsMonoview, + classification_indices): return {"monoview": [{"try": 0}, {"try2": 100}], "multiview": [{"try3": 5}, {"try4": 10}]} @@ -223,20 +223,20 @@ class Test_execOneBenchmark(unittest.TestCase): "Classification": {"hps_iter": 1}} def test_simple(cls): - flag, results = exec_classif.execOneBenchmark(coreIndex=10, - LABELS_DICTIONARY={ + flag, results = exec_classif.exec_one_benchmark(core_index=10, + labels_dictionary={ 0: "a", 1: "b"}, directory="multiview_platform/tests/tmp_tests/", - classificationIndices=( + classification_indices=( [1, 2, 3, 4], [0, 5, 6, 7, 8]), args=cls.args, - kFolds=FakeKfold(), - randomState="try", - hyperParamSearch="try", + k_folds=FakeKfold(), + random_state="try", + hyper_param_search="try", metrics="try", - argumentDictionaries={ + argument_dictionaries={ "Monoview": [ { "try": 0}, @@ -248,16 +248,16 @@ class Test_execOneBenchmark(unittest.TestCase): "try4": 10}]}, benchmark="try", views="try", - viewsIndices="try", + views_indices="try", flag=None, labels=np.array( [0, 1, 2, 1, 2, 2, 2, 12, 1, 2, 1, 1, 2, 1, 21]), - ExecMonoview_multicore=fakeExecMono, + exec_monoview_multicore=fakeExecMono, exec_multiview_multicore=fakeExecMulti, - initMultiviewArguments=fakeInitMulti) + init_multiview_arguments=fakeInitMulti) cls.assertEqual(flag, None) cls.assertEqual(results , @@ -289,17 +289,17 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): "Classification": {"hps_iter": 1}} def test_simple(cls): - flag, results = exec_classif.execOneBenchmark_multicore( + flag, results = exec_classif.exec_one_benchmark_multicore( nbCores=2, - LABELS_DICTIONARY={0: "a", 1: "b"}, + labels_dictionary={0: "a", 1: "b"}, directory="multiview_platform/tests/tmp_tests/", - classificationIndices=([1, 2, 3, 4], [0, 10, 20, 30, 40]), + classification_indices=([1, 2, 3, 4], [0, 10, 20, 30, 40]), args=cls.args, - kFolds=FakeKfold(), - randomState="try", - hyperParamSearch="try", + k_folds=FakeKfold(), + random_state="try", + hyper_param_search="try", metrics="try", - argumentDictionaries={ + argument_dictionaries={ "monoview": [ { "try": 0}, @@ -311,12 +311,12 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): "try4": 10}]}, benchmark="try", views="try", - viewsIndices="try", + views_indices="try", flag=None, labels=np.array([0, 1, 2, 3, 4, 2, 2, 12, 1, 2, 1, 1, 2, 1, 21]), - ExecMonoview_multicore=fakeExecMono, + exec_monoview_multicore=fakeExecMono, exec_multiview_multicore=fakeExecMulti, - initMultiviewArguments=fakeInitMulti) + init_multiview_arguments=fakeInitMulti) cls.assertEqual(flag, None) cls.assertEqual(results , @@ -350,7 +350,7 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): # cls.preds2 = [np.array([0 in range(5)]) for i in range(6)] + \ # [np.array([1, 0, 1, 1, 1]), np.array([1,0,0,1,1]), # np.array([1,0,0,0,1]), np.array([1,1,0,1,1]), np.array([1,1,0,0,1]), np.array([1,1,1,0,1])] -# cls.classifiersNames = ["chicken_is_heaven", "chicken_is_heaven", "chicken_is_heaven", "chicken_is_heaven", +# cls.classifiers_names = ["chicken_is_heaven", "chicken_is_heaven", "chicken_is_heaven", "chicken_is_heaven", # "chicken_is_heaven", "chicken_is_heaven", "chicken_is_heaven", "chicken_is_heaven", # "chicken_is_heaven", "chicken_is_heaven", "chicken_is_heaven", "chicken_is_heaven",] # cls.classifiersNames2 = ["cheese_is_no_disease", "cheese_is_no_disease", "cheese_is_no_disease", @@ -358,10 +358,10 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): # "cheese_is_no_disease", "cheese_is_no_disease", "cheese_is_no_disease", # "cheese_is_no_disease", "cheese_is_no_disease", "cheese_is_no_disease"] # cls.results = [[flag, [["", [name, "", "", pred]], ["", [name1, "", "", pred1]]], ["", ""]] -# for flag, name, pred, name1, pred1 in zip(cls.flags, cls.classifiersNames, cls.preds, +# for flag, name, pred, name1, pred1 in zip(cls.flags, cls.classifiers_names, cls.preds, # cls.classifiersNames2, cls.preds2)] # # cls.results = [[flag, ["", ["", name, "", pred]], ""] for flag, pred, name in -# # zip(cls.flags, cls.preds, cls.classifiersNames)] +# # zip(cls.flags, cls.preds, cls.classifiers_names)] # cls.statsIter = 2 # cls.nbExample = 5 # cls.nbLabels = 4 @@ -480,7 +480,7 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): # default='/home/bbauvin/Documents/data/Data_multi_omics/') # groupStandard.add_argument('--nice', metavar='INT', action='store', type=int, # help='Niceness for the process', default=0) -# groupStandard.add_argument('--randomState', metavar='STRING', action='store', +# groupStandard.add_argument('--random_state', metavar='STRING', action='store', # help="The random state seed to use or a file where we can find it's get_state", default=None) # # groupClass = parser.add_argument_group('Classification arguments') diff --git a/multiview_platform/tests/test_ResultAnalysis.py b/multiview_platform/tests/test_ResultAnalysis.py index be59cce4..bc739072 100644 --- a/multiview_platform/tests/test_ResultAnalysis.py +++ b/multiview_platform/tests/test_ResultAnalysis.py @@ -16,27 +16,27 @@ # res = ResultAnalysis.getMetricsScoresBiclass(cls.metrics, cls.monoViewResults, cls.multiviewResults) # cls.assertIn("accuracy_score",res) # cls.assertEqual(type(res["accuracy_score"]), dict) -# cls.assertEqual(res["accuracy_score"]["classifiersNames"], ["chicken_is_heaven-View0", "Mumbo"]) -# cls.assertEqual(res["accuracy_score"]["trainScores"], [0.5, 0.6]) -# cls.assertEqual(res["accuracy_score"]["testScores"], [0.7, 0.8]) +# cls.assertEqual(res["accuracy_score"]["classifiers_names"], ["chicken_is_heaven-View0", "Mumbo"]) +# cls.assertEqual(res["accuracy_score"]["train_scores"], [0.5, 0.6]) +# cls.assertEqual(res["accuracy_score"]["test_scores"], [0.7, 0.8]) # # def test_only_multiview(cls): # cls.monoViewResults = [] # res = ResultAnalysis.getMetricsScoresBiclass(cls.metrics, cls.monoViewResults, cls.multiviewResults) # cls.assertIn("accuracy_score",res) # cls.assertEqual(type(res["accuracy_score"]), dict) -# cls.assertEqual(res["accuracy_score"]["classifiersNames"], ["Mumbo"]) -# cls.assertEqual(res["accuracy_score"]["trainScores"], [0.6]) -# cls.assertEqual(res["accuracy_score"]["testScores"], [0.8]) +# cls.assertEqual(res["accuracy_score"]["classifiers_names"], ["Mumbo"]) +# cls.assertEqual(res["accuracy_score"]["train_scores"], [0.6]) +# cls.assertEqual(res["accuracy_score"]["test_scores"], [0.8]) # # def test_only_monoview(cls): # cls.multiviewResults = [] # res = ResultAnalysis.getMetricsScoresBiclass(cls.metrics, cls.monoViewResults, cls.multiviewResults) # cls.assertIn("accuracy_score",res) # cls.assertEqual(type(res["accuracy_score"]), dict) -# cls.assertEqual(res["accuracy_score"]["classifiersNames"], ["chicken_is_heaven-View0"]) -# cls.assertEqual(res["accuracy_score"]["trainScores"], [0.5]) -# cls.assertEqual(res["accuracy_score"]["testScores"], [0.7]) +# cls.assertEqual(res["accuracy_score"]["classifiers_names"], ["chicken_is_heaven-View0"]) +# cls.assertEqual(res["accuracy_score"]["train_scores"], [0.5]) +# cls.assertEqual(res["accuracy_score"]["test_scores"], [0.7]) # # # class Test_getExampleErrorsBiclass(unittest.TestCase): diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index 78a4bdb2..8bfc9fa4 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -22,10 +22,10 @@ class Test_initConstants(unittest.TestCase): cls.X = cls.datasetFile.create_dataset("View0", data=cls.X_value) cls.X.attrs["name"] = "test_dataset" cls.X.attrs["sparse"] = False - cls.classificationIndices = [np.array([0, 2, 4, 6, 8]), + cls.classification_indices = [np.array([0, 2, 4, 6, 8]), np.array([1, 3, 5, 7, 9]), np.array([1, 3, 5, 7, 9])] - cls.labelsNames = ["test_true", "test_false"] + cls.labels_names = ["test_true", "test_false"] cls.name = "test" cls.directory = "multiview_platform/tests/temp_tests/test_dir/" @@ -39,8 +39,8 @@ class Test_initConstants(unittest.TestCase): labelsString, \ outputFileName = exec_classif_mono_view.initConstants(cls.args, cls.X, - cls.classificationIndices, - cls.labelsNames, + cls.classification_indices, + cls.labels_names, cls.name, cls.directory) cls.assertEqual(kwargs, cls.args) @@ -68,13 +68,13 @@ class Test_initTrainTest(unittest.TestCase): cls.random_state = np.random.RandomState(42) cls.X = cls.random_state.randint(0, 500, (10, 5)) cls.Y = cls.random_state.randint(0, 2, 10) - cls.classificationIndices = [np.array([0, 2, 4, 6, 8]), + cls.classification_indices = [np.array([0, 2, 4, 6, 8]), np.array([1, 3, 5, 7, 9]), np.array([1, 3, 5, 7, 9])] def test_simple(cls): X_train, y_train, X_test, y_test, X_test_multiclass = exec_classif_mono_view.initTrainTest( - cls.X, cls.Y, cls.classificationIndices) + cls.X, cls.Y, cls.classification_indices) np.testing.assert_array_equal(X_train, np.array( [np.array([102, 435, 348, 270, 106]), np.array([466, 214, 330, 458, 87]), @@ -97,7 +97,7 @@ class Test_getHPs(unittest.TestCase): def setUpClass(cls): os.mkdir("multiview_platform/tests/tmp_tests") cls.classifierModule = decision_tree - cls.hyperParamSearch = "randomized_search" + cls.hyper_param_search = "randomized_search" cls.n_iter = 2 cls.classifier_name = "decision_tree" cls.random_state = np.random.RandomState(42) @@ -121,7 +121,7 @@ class Test_getHPs(unittest.TestCase): def test_simple(self): kwargs, test_folds_predictions = exec_classif_mono_view.getHPs(self.classifierModule, - self.hyperParamSearch, + self.hyper_param_search, self.n_iter, self.classifier_name, self.classifier_class_name, @@ -139,12 +139,12 @@ class Test_getHPs(unittest.TestCase): # @classmethod # def setUpClass(cls): # cls.classifierModule = None -# cls.hyperParamSearch = "None" +# cls.hyper_param_search = "None" # cls.nIter = 2 # cls.CL_type = "string" # cls.X_train = np.zeros((10,20)) # cls.y_train = np.zeros((10)) -# cls.randomState = np.random.RandomState(42) +# cls.random_state = np.random.RandomState(42) # cls.outputFileName = "test_file" # cls.KFolds = None # cls.nbCores = 1 @@ -153,12 +153,12 @@ class Test_getHPs(unittest.TestCase): # # def test_simple(cls): # clKWARGS = ExecClassifMonoView.getHPs(cls.classifierModule, -# cls.hyperParamSearch, +# cls.hyper_param_search, # cls.nIter, # cls.CL_type, # cls.X_train, # cls.y_train, -# cls.randomState, +# cls.random_state, # cls.outputFileName, # cls.KFolds, # cls.nbCores, diff --git a/multiview_platform/tests/test_monoview_classifiers/test_compatibility.py b/multiview_platform/tests/test_monoview_classifiers/test_compatibility.py index 5feb9f10..91c566df 100644 --- a/multiview_platform/tests/test_monoview_classifiers/test_compatibility.py +++ b/multiview_platform/tests/test_monoview_classifiers/test_compatibility.py @@ -89,7 +89,7 @@ # cls.labels = cls.random_state.randint(0, 2, 10) # # # def test_inputs(cls): -# # # DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs +# # # DATASET, CLASS_LABELS, random_state, NB_CORES=1, **kwargs # # for fileName in os.listdir("Code/mono_multi_view_classifiers/monoview_classifiers"): # # if fileName[-3:] == ".py" and fileName != "__init__.py": # # monoview_classifier_module = getattr(monoview_classifiers, fileName[:-3]) diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py b/multiview_platform/tests/test_multiview_classifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py index 373cb629..528cd15d 100644 --- a/multiview_platform/tests/test_multiview_classifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py +++ b/multiview_platform/tests/test_multiview_classifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py @@ -12,19 +12,19 @@ class Test_disagreement(unittest.TestCase): @classmethod def setUpClass(cls): - cls.randomState = np.random.RandomState(42) + cls.random_state = np.random.RandomState(42) cls.allClassifiersNames = [["SCM", "SVM", "DT"], ["SCM", "SVM", "DT"]] - cls.viewsIndices = np.array([0, 1]) - cls.classifiersDecisions = np.zeros((cls.viewsIndices.shape[0], len(cls.allClassifiersNames), 3, 6), + cls.views_indices = np.array([0, 1]) + cls.classifiersDecisions = np.zeros((cls.views_indices.shape[0], len(cls.allClassifiersNames), 3, 6), dtype=int) for classifer_index, classifier in enumerate(cls.allClassifiersNames): - for view_index, view in enumerate(cls.viewsIndices): + for view_index, view in enumerate(cls.views_indices): cls.classifiersDecisions[view_index, classifer_index] = np.array([ - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6)]) + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6)]) cls.folds_ground_truth = np.array([np.array([1,1,1,0,0,0]) for _ in range(3)]) - cls.classificationIndices = np.array([]) + cls.classification_indices = np.array([]) def test_simple(cls): bestCombi, disagreement = diversity_utils.couple_div_measure( @@ -33,18 +33,18 @@ class Test_disagreement(unittest.TestCase): cls.assertEqual(len(bestCombi), 2) def test_multipleViews(cls): - cls.viewsIndices = np.array([0, 6, 18]) + cls.views_indices = np.array([0, 6, 18]) cls.allClassifiersNames = [["SCM", "SVM", "DT"], ["SCM", "SVM", "DT"], ["SCM", "SVM", "DT"]] cls.classifiersDecisions = np.zeros( - (cls.viewsIndices.shape[0], len(cls.allClassifiersNames), 3, 6), + (cls.views_indices.shape[0], len(cls.allClassifiersNames), 3, 6), dtype=int) for classifer_index, classifier in enumerate(cls.allClassifiersNames): - for view_index, view in enumerate(cls.viewsIndices): + for view_index, view in enumerate(cls.views_indices): cls.classifiersDecisions[ view_index, classifer_index] = np.array([ - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6)]) + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6)]) bestCombi, disagreement = diversity_utils.couple_div_measure( cls.allClassifiersNames, cls.classifiersDecisions, diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_DoubleFaultFusion/test_DoubleFaultFusionModule.py b/multiview_platform/tests/test_multiview_classifiers/Test_DoubleFaultFusion/test_DoubleFaultFusionModule.py index 77c73435..676dac18 100644 --- a/multiview_platform/tests/test_multiview_classifiers/Test_DoubleFaultFusion/test_DoubleFaultFusionModule.py +++ b/multiview_platform/tests/test_multiview_classifiers/Test_DoubleFaultFusion/test_DoubleFaultFusionModule.py @@ -12,20 +12,20 @@ class Test_doubleFaultRatio(unittest.TestCase): @classmethod def setUpClass(cls): - cls.randomState = np.random.RandomState(42) + cls.random_state = np.random.RandomState(42) cls.allClassifiersNames = [["SCM", "SVM", "DT"], ["SCM", "SVM", "DT"]] cls.directory = "" - cls.viewsIndices = np.array([0, 1]) + cls.views_indices = np.array([0, 1]) cls.classifiersDecisions = np.zeros( - (cls.viewsIndices.shape[0], len(cls.allClassifiersNames), 3, 6), + (cls.views_indices.shape[0], len(cls.allClassifiersNames), 3, 6), dtype=int) for classifer_index, classifier in enumerate(cls.allClassifiersNames): - for view_index, view in enumerate(cls.viewsIndices): + for view_index, view in enumerate(cls.views_indices): cls.classifiersDecisions[ view_index, classifer_index] = np.array([ - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6)]) + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6)]) cls.folds_ground_truth = np.array([np.array([1,1,1,0,0,0]) for _ in range(3)]) def test_simple(cls): @@ -36,18 +36,18 @@ class Test_doubleFaultRatio(unittest.TestCase): cls.assertEqual(len(bestCombi), 2) def test_multipleViews(cls): - cls.viewsIndices = np.array([0, 6, 18]) + cls.views_indices = np.array([0, 6, 18]) cls.allClassifiersNames = [["SCM", "SVM", "DT"], ["SCM", "SVM", "DT"], ["SCM", "SVM", "DT"]] cls.classifiersDecisions = np.zeros( - (cls.viewsIndices.shape[0], len(cls.allClassifiersNames), 3, 6), + (cls.views_indices.shape[0], len(cls.allClassifiersNames), 3, 6), dtype=int) for classifer_index, classifier in enumerate(cls.allClassifiersNames): - for view_index, view in enumerate(cls.viewsIndices): + for view_index, view in enumerate(cls.views_indices): cls.classifiersDecisions[ view_index, classifer_index] = np.array([ - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6)]) + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6)]) bestCombi, disagreement = diversity_utils.couple_div_measure( cls.allClassifiersNames, cls.classifiersDecisions, double_fault_fusion.doubleFault, cls.folds_ground_truth) diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_Fusion/test_FusionModule.py b/multiview_platform/tests/test_multiview_classifiers/Test_Fusion/test_FusionModule.py index 3155ad23..369fb4e8 100644 --- a/multiview_platform/tests/test_multiview_classifiers/Test_Fusion/test_FusionModule.py +++ b/multiview_platform/tests/test_multiview_classifiers/Test_Fusion/test_FusionModule.py @@ -9,6 +9,6 @@ class Test_genName(unittest.TestCase): def test_late(self): self.config = {"fusionType": "LateFusion", "fusionMethod": "chicken_is_heaven", - "classifiersNames": ["cheese", "is", "no", "disease"]} + "classifiers_names": ["cheese", "is", "no", "disease"]} res = fusion.genName(self.config) self.assertEqual(res, "Late-chic") diff --git a/multiview_platform/tests/test_multiview_classifiers/test_diversity_utils.py b/multiview_platform/tests/test_multiview_classifiers/test_diversity_utils.py index 1b706091..ac51698f 100644 --- a/multiview_platform/tests/test_multiview_classifiers/test_diversity_utils.py +++ b/multiview_platform/tests/test_multiview_classifiers/test_diversity_utils.py @@ -14,22 +14,22 @@ class Test_global_div_measure(unittest.TestCase): @classmethod def setUpClass(cls): - cls.randomState = np.random.RandomState(42) + cls.random_state = np.random.RandomState(42) cls.allClassifiersNames = [["SCM", "SVM", "DT"], ["SCM", "SVM", "DT"]] - cls.viewsIndices = np.array([0, 1]) + cls.views_indices = np.array([0, 1]) cls.classifiersDecisions = np.zeros( - (cls.viewsIndices.shape[0], len(cls.allClassifiersNames), 3, 6), + (cls.views_indices.shape[0], len(cls.allClassifiersNames), 3, 6), dtype=int) for classifer_index, classifier in enumerate(cls.allClassifiersNames): - for view_index, view in enumerate(cls.viewsIndices): + for view_index, view in enumerate(cls.views_indices): cls.classifiersDecisions[ view_index, classifer_index] = np.array([ - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6), - cls.randomState.randint(0, 2, 6)]) + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6), + cls.random_state.randint(0, 2, 6)]) cls.folds_ground_truth = np.array( [np.array([1, 1, 1, 0, 0, 0]) for _ in range(3)]) - cls.classificationIndices = np.array([]) + cls.classification_indices = np.array([]) cls.measurement = fake_measure def test_simple(cls): diff --git a/multiview_platform/tests/test_utils/test_GetMultiviewDB.py b/multiview_platform/tests/test_utils/test_GetMultiviewDB.py index e59ce5fb..65be8762 100644 --- a/multiview_platform/tests/test_utils/test_GetMultiviewDB.py +++ b/multiview_platform/tests/test_utils/test_GetMultiviewDB.py @@ -237,14 +237,14 @@ class Test_fillLabelNames(unittest.TestCase): def setUpClass(cls): cls.NB_CLASS = 2 cls.askedLabelsNames = ["test_label_1", "test_label_3"] - cls.randomState = np.random.RandomState(42) + cls.random_state = np.random.RandomState(42) cls.availableLabelsNames = ["test_label_" + str(_) for _ in range(40)] def test_simple(cls): askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fillLabelNames( cls.NB_CLASS, cls.askedLabelsNames, - cls.randomState, + cls.random_state, cls.availableLabelsNames) cls.assertEqual(askedLabelsNames, cls.askedLabelsNames) cls.assertEqual(askedLabelsNamesSet, set(cls.askedLabelsNames)) @@ -254,7 +254,7 @@ class Test_fillLabelNames(unittest.TestCase): askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fillLabelNames( cls.NB_CLASS, cls.askedLabelsNames, - cls.randomState, + cls.random_state, cls.availableLabelsNames) cls.assertEqual(askedLabelsNames, @@ -282,7 +282,7 @@ class Test_fillLabelNames(unittest.TestCase): askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fillLabelNames( cls.NB_CLASS, cls.askedLabelsNames, - cls.randomState, + cls.random_state, cls.availableLabelsNames) cls.assertEqual(askedLabelsNames, ["test_label_3", "test_label_6"]) cls.assertEqual(askedLabelsNamesSet, {"test_label_3", "test_label_6"}) diff --git a/multiview_platform/tests/test_utils/test_execution.py b/multiview_platform/tests/test_utils/test_execution.py index 003b928b..ac84d5f7 100644 --- a/multiview_platform/tests/test_utils/test_execution.py +++ b/multiview_platform/tests/test_utils/test_execution.py @@ -20,20 +20,20 @@ class Test_initStatsIterRandomStates(unittest.TestCase): @classmethod def setUpClass(cls): - cls.randomState = np.random.RandomState(42) + cls.random_state = np.random.RandomState(42) cls.statsIter = 1 def test_one_statiter(cls): - cls.state = cls.randomState.get_state()[1] + cls.state = cls.random_state.get_state()[1] statsIterRandomStates = execution.initStatsIterRandomStates( - cls.statsIter, cls.randomState) + cls.statsIter, cls.random_state) np.testing.assert_array_equal(statsIterRandomStates[0].get_state()[1], cls.state) def test_multiple_iter(cls): cls.statsIter = 3 statsIterRandomStates = execution.initStatsIterRandomStates( - cls.statsIter, cls.randomState) + cls.statsIter, cls.random_state) cls.assertAlmostEqual(len(statsIterRandomStates), 3) cls.assertNotEqual(statsIterRandomStates[0].randint(5000), statsIterRandomStates[1].randint(5000)) @@ -82,19 +82,19 @@ class Test_initRandomState(unittest.TestCase): def test_random_state_42(self): randomState_42 = np.random.RandomState(42) - randomState = execution.initRandomState("42", + random_state = execution.initRandomState("42", "multiview_platform/tests/temp_tests/") - os.remove("multiview_platform/tests/temp_tests/randomState.pickle") - np.testing.assert_array_equal(randomState.beta(1, 100, 100), + os.remove("multiview_platform/tests/temp_tests/random_state.pickle") + np.testing.assert_array_equal(random_state.beta(1, 100, 100), randomState_42.beta(1, 100, 100)) def test_random_state_pickle(self): randomState_to_pickle = execution.initRandomState(None, "multiview_platform/tests/temp_tests/") pickled_randomState = execution.initRandomState( - "multiview_platform/tests/temp_tests/randomState.pickle", + "multiview_platform/tests/temp_tests/random_state.pickle", "multiview_platform/tests/temp_tests/") - os.remove("multiview_platform/tests/temp_tests/randomState.pickle") + os.remove("multiview_platform/tests/temp_tests/random_state.pickle") np.testing.assert_array_equal(randomState_to_pickle.beta(1, 100, 100), pickled_randomState.beta(1, 100, 100)) diff --git a/multiview_platform/tests/test_utils/test_multiclass.py b/multiview_platform/tests/test_utils/test_multiclass.py index a73e9f2b..4dd535b9 100644 --- a/multiview_platform/tests/test_utils/test_multiclass.py +++ b/multiview_platform/tests/test_utils/test_multiclass.py @@ -14,7 +14,7 @@ class Test_genMulticlassLabels(unittest.TestCase): cls.testIndices = [ cls.random_state.choice(np.arange(50), size=10, replace=False), cls.random_state.choice(np.arange(50), size=10, replace=False)] - cls.classificationIndices = [ + cls.classification_indices = [ [np.array([_ for _ in range(50) if _ not in cls.testIndices[0]]), cls.testIndices[0]], [np.array([_ for _ in range(50) if _ not in cls.testIndices[1]]), @@ -22,7 +22,7 @@ class Test_genMulticlassLabels(unittest.TestCase): def test_one_versus_one(cls): multiclassLabels, labelsIndices, oldIndicesMulticlass = multiclass.genMulticlassLabels( - cls.labels, "oneVersusOne", cls.classificationIndices) + cls.labels, "oneVersusOne", cls.classification_indices) cls.assertEqual(len(multiclassLabels), 10) cls.assertEqual(labelsIndices, [(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), -- GitLab