diff --git a/config_files/config.ini b/config_files/config.ini index f77c2e4acc63a755b6db59aebbc635f4da62bf51..fa2c84d9f058c2b605dbabfb14e444e9584e7bde 100644 --- a/config_files/config.ini +++ b/config_files/config.ini @@ -23,7 +23,7 @@ split = float ; 0.8 nbFolds = int ; 2 nbClass = int ; 2 classes = list_str ; yes no -type = list_str ; monoview multiview +type = list_str ; multiview algos_monoview = list_str ; all algos_multiview = list_str ; all statsiter = int ; 2 @@ -137,4 +137,6 @@ n_stumps_per_attribute = list_int ; 1 # The Multiview Classifier arguments # ###################################### -#TODO \ No newline at end of file +[weighted_linear_early_fusion] +view_weights = list_str ; None +monoview_classifier = list_str ; decision_tree diff --git a/config_files/config.yml b/config_files/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..5c3f48fc24d18b5e334592eea9446d6045da8efe --- /dev/null +++ b/config_files/config.yml @@ -0,0 +1,140 @@ +# The base configuration of the benchmark +Base : + log: true + name: ["Plausible"] + label: "_" + type: ".hdf5" + views: ["all"] + pathf: "../Data/" + nice: 0 + random_state: 42 + nb_cores: 1 + full: False + debug: True + add_noise: False + noise_std: 0.0 + res_dir: "../Results/" + +# All the classification-realted configuration options +Classification: + multiclass_method: "oneVersusOne" + split: 0.8 + nb_folds: 2 + nb_class: 2 + classes: ["yes", "no"] + type: ["multiview", "monoview"] + algos_monoview: ["all"] + algos_multiview: ["all"] + stats_iter: 2 + metrics: ["accuracy_score", "f1_score"] + metric_princ: "f1_score" + hps_type: "randomized_search" + hps_iter: 2 + + +##################################### +# The Monoview Classifier arguments # +##################################### + +random_forest: + n_estimators: [25] + max_depth: [3] + criterion: ["entropy"] + +svm_linear: + C: [1] + +svm_rbf: + C: [1] + +svm_poly: + C: [1] + degree: [2] + +adaboost: + n_estimators: [50] + base_estimator: ["DecisionTreeClassifier"] + +adaboost_pregen: + n_estimators: [50] + base_estimator: ["DecisionTreeClassifier"] + n_stumps: [1] + +adaboost_graalpy: + n_iterations: [50] + n_stumps: [1] + +decision_tree: + max_depth: [10] + criterion: ["gini"] + splitter: ["best"] + +decision_tree_pregen: + max_depth: [10] + criterion: ["gini"] + splitter: ["best"] + n_stumps: [1] + +sgd: + loss: ["hinge"] + penalty: [l2] + alpha: [0.0001] + +knn: + n_neighbors: [5] + weights: ["uniform"] + algorithm: ["auto"] + +scm: + model_type: ["conjunction"] + max_rules: [10] + p: [0.1] + +scm_pregen: + model_type: ["conjunction"] + max_rules: [10] + p: [0.1] + n_stumps: [1] + +cq_boost: + mu: [0.01] + epsilon: [1e-06] + n_max_iterations: [5] + n_stumps: [1] + +cg_desc: + n_max_iterations: [10] + n_stumps: [1] + +cb_boost: + n_max_iterations: [10] + n_stumps: [1] + +min_cq_graalpy: + mu: [0.01] + n_stumps_per_attribute: [1] + +min_cq_graalpy_tree: + mu: [0.01] + n_stumps_per_attribute: [1] + max_depth: [2] + +lasso: + alpha: [1] + max_iter: [2] + +gradient_boosting: + n_estimators: [2] + +min_cq: + mu: [0.01] + n_stumps_per_attribute: [1] + + +###################################### +# The Multiview Classifier arguments # +###################################### + +weighted_linear_early_fusion: + view_weights: [None] + monoview_classifier: ["decision_tree"] diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index 6dc0d255fe233bfc660e20d3c29dc68211705ae9..cd066e7274fbda67eeffe406339c0decae445a55 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -73,35 +73,6 @@ def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): pkgutil.iter_modules([ "./mono_multi_view_classifiers/multiview_classifiers"]) if not isPackage] - # benchmark["multiview"] = {} - # if multiviewAlgos == ["all"]: - # algosMutliview = allMultiviewPackages - # else: - # algosMutliview = multiviewAlgos - # for multiviewPackageName in allMultiviewPackages: - # if multiviewPackageName in algosMutliview: - # multiviewPackage = getattr(multiview_classifiers, - # multiviewPackageName) - # multiviewModule = getattr(multiviewPackage, - # multiviewPackageName + "Module") - # benchmark = multiviewModule.getBenchmark(benchmark, args=args) - - # if CL_type == ["Benchmark"]: - # allMonoviewAlgos = [name for _, name, isPackage in - # pkgutil.iter_modules([ - # './mono_multi_view_classifiers/monoview_classifiers']) - # if (not isPackage) and name not in ["framework"]] - # benchmark["monoview"] = allMonoviewAlgos - # benchmark["multiview"] = dict( - # (multiviewPackageName, "_") for multiviewPackageName in - # allMultiviewPackages) - # for multiviewPackageName in allMultiviewPackages: - # multiviewPackage = getattr(multiview_classifiers, - # multiviewPackageName) - # multiviewModule = getattr(multiviewPackage, - # multiviewPackageName + "Module") - # benchmark = multiviewModule.getBenchmark(benchmark, args=args) - return benchmark @@ -133,7 +104,42 @@ def genViewsDictionnary(DATASET, views): return viewsDictionary -def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit): +def init_argument_dictionaries(benchmark, views_dictionary, + nb_class, init_kwargs): + argument_dictionaries = {"monoview": [], "multiview": []} + if benchmark["monoview"]: + argument_dictionaries["monoview"] = init_monoview_exps( + benchmark["monoview"], + views_dictionary, + nb_class, + init_kwargs["monoview"]) + if benchmark["multiview"]: + argument_dictionaries["multiview"] = init_multiview_exps(benchmark["multiview"], + views_dictionary, + nb_class, + init_kwargs["multiview"]) + return argument_dictionaries + + +def init_multiview_exps(classifier_names, views_dictionary, nb_class, kwargs_init): + multiview_arguments = [] + for classifier_name in classifier_names: + if multiple_args(classifier_name, kwargs_init): + multiview_arguments += gen_multiple_args_dictionnaries(nb_class, + kwargs_init, + classifier_name, + views_dictionary=views_dictionary, + framework="multiview") + else: + multiview_arguments += [gen_single_multiview_arg_dictionary(classifier_name, + kwargs_init, + nb_class, + views_dictionary=views_dictionary)] + return multiview_arguments + + +def init_monoview_exps(classifier_names, + views_dictionary, nb_class, kwargs_init): r"""Used to add each monoview exeperience args to the list of monoview experiences args. First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right @@ -143,13 +149,13 @@ def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit): Parameters ---------- - benchmark : dictionary + classifier_names : dictionary All types of monoview and multiview experiments that have to be benchmarked - argumentDictionaries : dictionary + argument_dictionaries : dictionary Maps monoview and multiview experiments arguments. viewDictionary : dictionary Maps the view names to their index in the HDF5 dataset - nbClass : integer + nb_class : integer Number of different labels in the classification Returns @@ -157,30 +163,56 @@ def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit): benchmark : Dictionary of dictionaries Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ - argumentDictionaries = {"monoview": [], "multiview": []} - if benchmark["monoview"]: - argumentDictionaries["monoview"] = [] - for viewName, viewIndex in viewsDictionary.items(): - for classifier in benchmark["monoview"]: - if multiple_args(classifier, kwargsInit): - argumentDictionaries["monoview"] += gen_multiple_args_dictionnaries(nbClass, kwargsInit, classifier, viewName, viewIndex) - else: - arguments = { - "args": {classifier + "KWARGS": dict((key, value[0]) for key, value in kwargsInit[ - classifier + "KWARGSInit"].items()), "feat": viewName, - "CL_type": classifier, "nbClass": nbClass}, - "viewIndex": viewIndex} - argumentDictionaries["monoview"].append(arguments) - return argumentDictionaries + monoview_arguments = [] + for view_name, view_index in views_dictionary.items(): + for classifier in classifier_names: + if multiple_args(classifier, kwargs_init): + monoview_arguments += gen_multiple_args_dictionnaries(nb_class, + kwargs_init, + classifier, + view_name, + view_index) + else: + arguments = gen_single_monoview_arg_dictionary(classifier, + kwargs_init, + nb_class, + view_index, + view_name) + monoview_arguments.append(arguments) + return monoview_arguments + + +def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class, + view_index, view_name): + return {classifier_name: dict((key, value[0]) for key, value in arguments[ + classifier_name].items()), + "view_name": view_name, + "view_index": view_index, + "classifier_name": classifier_name, + "nb_class": nb_class} + + +def gen_single_multiview_arg_dictionary(classifier_name,arguments,nb_class, + views_dictionary=None): + return {"classifier_name": classifier_name, + "view_names": list(views_dictionary.keys()), + 'view_indices': list(views_dictionary.values()), + "nb_class": nb_class, + "labels_names": None, + classifier_name: dict((key, value[0]) for key, value in arguments[ + classifier_name].items()) + } + def multiple_args(classifier, kwargsInit): - listed_args = [type(value) == list and len(value)>1 for key, value in - kwargsInit[classifier + "KWARGSInit"].items()] + listed_args = [type(value) == list and len(value)>1 for key, value in + kwargsInit[classifier].items()] if True in listed_args: return True else: return False + def gen_multiple_kwargs_combinations(clKWARGS): values = list(clKWARGS.values()) listed_values = [[_] if type(_) is not list else _ for _ in values] @@ -199,23 +231,29 @@ def gen_multiple_kwargs_combinations(clKWARGS): return kwargs_combination, reduced_kwargs_combination -def gen_multiple_args_dictionnaries(nbClass, kwargsInit, - classifier, viewName, viewIndex): - multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations(kwargsInit[classifier + "KWARGSInit"]) +def gen_multiple_args_dictionnaries(nb_class, kwargsInit, classifier, + view_name=None, view_index=None, views_indices=None, + framework="monoview"): + multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations(kwargsInit[classifier]) multiple_kwargs_dict = dict( (classifier+"_"+"_".join(map(str,list(reduced_dictionary.values()))), dictionary) for reduced_dictionary, dictionary in zip(reduced_multiple_kwargs_list, multiple_kwargs_list )) - args_dictionnaries = [{ - "args": {classifier_name + "KWARGS": arguments, - "feat": viewName, - "CL_type": classifier_name, - "nbClass": nbClass}, - "viewIndex": viewIndex} - for classifier_name, arguments in multiple_kwargs_dict.items()] + args_dictionnaries = [gen_single_monoview_arg_dictionary(classifier_name, + arguments, + nb_class, + view_index=view_index, + view_name=view_name) + if framework=="monoview" else + gen_single_multiview_arg_dictionary(classifier_name, + arguments, + nb_class, + views_indices=views_indices) + for classifier_name, arguments + in multiple_kwargs_dict.items()] return args_dictionnaries -def initMonoviewKWARGS(args, classifiersNames): +def init_monoview_kwargs(args, classifiersNames): r"""Used to init kwargs thanks to a function in each monoview classifier package. Parameters @@ -236,21 +274,39 @@ def initMonoviewKWARGS(args, classifiersNames): monoviewKWARGS = {} for classifiersName in classifiersNames: try: - classifierModule = getattr(monoview_classifiers, classifiersName) + getattr(monoview_classifiers, classifiersName) except AttributeError: raise AttributeError( classifiersName + " is not implemented in monoview_classifiers, " "please specify the name of the file in monoview_classifiers") monoviewKWARGS[ - classifiersName + "KWARGSInit"] = args[classifiersName] + classifiersName] = args[classifiersName] logging.debug("Done:\t Initializing monoview classifiers arguments") return monoviewKWARGS def initKWARGSFunc(args, benchmark): - monoviewKWARGS = initMonoviewKWARGS(args, benchmark["monoview"]) - return monoviewKWARGS + monoview_kwargs = init_monoview_kwargs(args, benchmark["monoview"]) + multiview_kwargs = init_multiview_kwargs(args, benchmark["multiview"]) + kwargs = {"monoview":monoview_kwargs, "multiview":multiview_kwargs} + return kwargs + + +def init_multiview_kwargs(args, classifiers_names): + logging.debug("Start:\t Initializing multiview classifiers arguments") + multiview_kwargs = {} + for classifiers_name in classifiers_names: + try: + getattr(multiview_classifiers, classifiers_name) + except AttributeError: + raise AttributeError( + classifiers_name + " is not implemented in mutliview_classifiers, " + "please specify the name of the coressponding .py " + "file in mutliview_classifiers") + multiview_kwargs[classifiers_name] = args[classifiers_name] + logging.debug("Done:\t Initializing multiview classifiers arguments") + return multiview_kwargs def initMultiviewArguments(args, benchmark, views, viewsIndices, @@ -261,9 +317,8 @@ def initMultiviewArguments(args, benchmark, views, viewsIndices, multiviewArguments = [] if "multiview" in benchmark: for multiviewAlgoName in benchmark["multiview"]: - multiviewPackage = getattr(multiview_classifiers, multiviewAlgoName) - mutliviewModule = getattr(multiviewPackage, - multiviewAlgoName + "Module") + mutliviewModule = getattr(multiview_classifiers, + multiviewAlgoName) multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, @@ -351,12 +406,12 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, - viewsIndices, - argumentDictionaries, - randomState, directory, - resultsMonoview, - classificationIndices) + # argumentDictionaries = initMultiviewArguments(args, benchmark, views, + # viewsIndices, + # argumentDictionaries, + # randomState, directory, + # resultsMonoview, + # classificationIndices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") @@ -410,12 +465,12 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, - viewsIndices, - argumentDictionaries, - randomState, directory, - resultsMonoview, - classificationIndices) + # argumentDictionaries = initMultiviewArguments(args, benchmark, views, + # viewsIndices, + # argumentDictionaries, + # randomState, directory, + # resultsMonoview, + # classificationIndices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") @@ -458,7 +513,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, LABELS_DICTIONARY, kFolds) logging.debug("Start:\t monoview benchmark") for arguments in argumentDictionaries["monoview"]: - X = DATASET.get("View" + str(arguments["viewIndex"])) + X = DATASET.get("View" + str(arguments["view_index"])) Y = labels resultsMonoview += [ ExecMonoview(directory, X, Y, args["Base"]["name"], labelsNames, @@ -470,12 +525,12 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, logging.debug("Start:\t multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, - viewsIndices, - argumentDictionaries, - randomState, directory, - resultsMonoview, - classificationIndices) + # argumentDictionaries = initMultiviewArguments(args, benchmark, views, + # viewsIndices, + # argumentDictionaries, + # randomState, directory, + # resultsMonoview, + # classificationIndices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") @@ -568,9 +623,16 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, classificationIndices[0][1]) multiclassGroundTruth = DATASET.get("Labels").value logging.debug("Start:\t Analyzing predictions") - results_mean_stds =getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, - multiclassGroundTruth, metrics, classificationIndices, - directories, directory, labelsDictionary, nbExamples, nbLabels) + results_mean_stds = getResults(results, statsIter, nbMulticlass, + benchmarkArgumentsDictionaries, + multiclassGroundTruth, + metrics, + classificationIndices, + directories, + directory, + labelsDictionary, + nbExamples, + nbLabels) logging.debug("Done:\t Analyzing predictions") delete(benchmarkArgumentsDictionaries, nbCores, DATASET) return results_mean_stds @@ -581,14 +643,13 @@ def execClassif(arguments): start = time.time() args = execution.parseTheArgs(arguments) args = configuration.get_the_args(args.path_config) - os.nice(args["Base"]["nice"]) - nbCores = args["Base"]["nbcores"] + nbCores = args["Base"]["nb_cores"] if nbCores == 1: os.environ['OPENBLAS_NUM_THREADS'] = '1' - statsIter = args["Classification"]["statsiter"] + statsIter = args["Classification"]["stats_iter"] hyperParamSearch = args["Classification"]["hps_type"] - multiclassMethod = args["Classification"]["multiclassmethod"] + multiclassMethod = args["Classification"]["multiclass_method"] CL_type = args["Classification"]["type"] monoviewAlgos = args["Classification"]["algos_monoview"] multiviewAlgos = args["Classification"]["algos_multiview"] @@ -605,7 +666,7 @@ def execClassif(arguments): directory = execution.initLogFile(dataset_name, args["Base"]["views"], args["Classification"]["type"], args["Base"]["log"], args["Base"]["debug"], args["Base"]["label"], args["Base"]["res_dir"], args["Base"]["add_noise"], noise_std) - randomState = execution.initRandomState(args["Base"]["randomstate"], directory) + randomState = execution.initRandomState(args["Base"]["random_state"], directory) statsIterRandomStates = execution.initStatsIterRandomStates(statsIter, randomState) @@ -613,7 +674,7 @@ def execClassif(arguments): DATASET, LABELS_DICTIONARY, datasetname = getDatabase(args["Base"]["views"], args["Base"]["pathf"], dataset_name, - args["Classification"]["nbclass"], + args["Classification"]["nb_class"], args["Classification"]["classes"], randomState, args["Base"]["full"], @@ -627,7 +688,7 @@ def execClassif(arguments): multiclassLabels, labelsCombinations, indicesMulticlass = multiclass.genMulticlassLabels( DATASET.get("Labels").value, multiclassMethod, splits) - kFolds = execution.genKFolds(statsIter, args["Classification"]["nbfolds"], + kFolds = execution.genKFolds(statsIter, args["Classification"]["nb_folds"], statsIterRandomStates) datasetFiles = dataset.initMultipleDatasets(args["Base"]["pathf"], args["Base"]["name"], nbCores) @@ -655,8 +716,10 @@ def execClassif(arguments): benchmark = initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args) initKWARGS = initKWARGSFunc(args, benchmark) dataBaseTime = time.time() - start - argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, + argumentDictionaries = init_argument_dictionaries(benchmark, viewsDictionary, NB_CLASS, initKWARGS) + # argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, + # NB_CLASS, initKWARGS) directories = execution.genDirecortiesNames(directory, statsIter) benchmarkArgumentDictionaries = execution.genArgumentDictionaries( LABELS_DICTIONARY, directories, multiclassLabels, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 635e09c63fa4c995ab78e37e3e46be083f35ac3f..5707f61374156ac72478f3d3d8e64d30e69ae2a6 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -126,7 +126,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, y_train, imagesAnalysis, y_test) logging.info("Done:\t Saving results") - viewIndex = args["viewIndex"] + viewIndex = args["view_index"] if testFoldsPreds is None: testFoldsPreds = y_train_pred return monoview_utils.MonoviewResult(viewIndex, CL_type, feat, metricsScores, @@ -145,7 +145,7 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): feat = X.attrs["name"].decode("utf-8") else: feat = X.attrs["name"] - CL_type = kwargs["CL_type"] + CL_type = kwargs["classifier_name"] X = getValue(X) learningRate = float(len(classificationIndices[0])) / ( len(classificationIndices[0]) + len(classificationIndices[1])) @@ -196,7 +196,7 @@ def getHPs(classifierModule, hyperParamSearch, nIter, classifier_module_name, metric=metrics[0], n_iter=nIter, classifier_kwargs=kwargs[ - classifier_module_name + "KWARGS"]) + classifier_module_name]) logging.debug("Done:\t " + hyperParamSearch + " best settings") else: clKWARGS = kwargs[classifier_module_name + "KWARGS"] diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index 8f9d6b103bc3fa447842fc104cb50872aad5a724..b7968e2f456664e94ea7ba1d2e1999a19fc1d278 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -116,11 +116,14 @@ class BaseMonoviewClassifier(BaseEstimator, ClassifierMixin): return dict((param_name, distrib) for param_name, distrib in zip(self.param_names, self.distribs)) - def getConfig(self): - if self.param_names: - return "\n\t\t- " + self.__class__.__name__ + "with " + ", ".join( + def params_to_string(self): + return ", ".join( [param_name + " : " + self.to_str(param_name) for param_name in self.param_names]) + + def getConfig(self): + if self.param_names: + return "\n\t\t- " + self.__class__.__name__ + "with " + self.params_to_string() else: return "\n\t\t- " + self.__class__.__name__ + "with no config." diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py index 6160c013ce2633fbc431d92b2de8bdfa6a14ab10..f9e3aec53a437b9e4ba9855a364d3d06ce1c1e0d 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py @@ -44,10 +44,6 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): [estim.predict(X) for estim in self.estimators_]) self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)]) - self.bounds = np.array([np.prod( - np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) - for i in - range(self.estimator_errors_.shape[0])]) def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -77,9 +73,6 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): get_accuracy_graph(step_test_metrics, "Adaboost", directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "Adaboost", directory + "metrics.png", - self.plotted_metric_name, bounds=list(self.bounds), - bound_name="boosting bound") np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen.py index ec3fa8387d94365cb3537c1e6734851831678787..ab2abe79688017f69097c26ad4acb41c3ac8c895 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen.py @@ -103,10 +103,10 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, get_accuracy_graph(step_test_metrics, "AdaboostPregen", directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "AdaboostPregen", - directory + "metrics.png", self.plotted_metric_name, - bounds=list(self.bounds), - bound_name="boosting bound") + # get_accuracy_graph(self.metrics, "AdaboostPregen", + # directory + "metrics.png", self.plotted_metric_name, + # bounds=list(self.bounds), + # bound_name="boosting bound") np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py index 68ecfa692da5ac88051a8993df40dd0581887ac9..8ecb413962252e281298e2d64d16cf71cd22ddee 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py @@ -9,6 +9,7 @@ __status__ = "Prototype" # Production, Development, Prototype classifier_class_name = "KNN" + class KNN(KNeighborsClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, n_neighbors=5, diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py index 4f746c5f2c12c0018e7286b77770e1c54a0866d9..460bcf7a172b4c3d991af8ee599a806b82e74b15 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py @@ -63,8 +63,7 @@ def execute(classifier, trainLabels, name, KFolds, hyperParamSearch, nIter, metric_list, viewsIndices, randomState, labels, classifierModule): - classifierNameString = classifierModule.genName(classificationKWARGS) - CLASS_LABELS = labels + classifier_name = classifier.short_name learningIndices, validationIndices, testIndicesMulticlass = classificationIndices metricModule = getattr(metrics, metric_list[0][0]) @@ -73,15 +72,13 @@ def execute(classifier, trainLabels, enumerate(metric_list[0][1])) else: metricKWARGS = {} - scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], - CLASS_LABELS[learningIndices], + scoreOnTrain = metricModule.score(labels[learningIndices], + labels[learningIndices], **metricKWARGS) - scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], + scoreOnTest = metricModule.score(labels[validationIndices], testLabels, **metricKWARGS) - classifierConfiguration = classifier.getConfigString(classificationKWARGS) - - stringAnalysis = "\t\tResult for multiview classification with " + classifierNameString + \ + stringAnalysis = "\t\tResult for multiview classification with " + classifier_name + \ "\n\n" + metric_list[0][0] + " :\n\t-On Train : " + str( scoreOnTrain) + "\n\t-On Test : " + str( scoreOnTest) + \ @@ -90,12 +87,11 @@ def execute(classifier, trainLabels, LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join( views) + "\n\t-" + str( KFolds.n_splits) + \ - " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifierNameString + " with : " + classifierConfiguration + " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.getConfig() metricsScores = getMetricsScores(metric_list, trainLabels, testLabels, validationIndices, learningIndices, labels) stringAnalysis += printMetricScore(metricsScores, metric_list) - stringAnalysis += "\n\n Interpretation : \n\n" + classifier.getSpecificAnalysis( - classificationKWARGS) + stringAnalysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation() imagesAnalysis = {} return stringAnalysis, imagesAnalysis, metricsScores diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 6f9569bd37ff76af745bd628f30f20781815df8e..02ba90af927f6fa24b101e88dc067d261850db6e 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -8,6 +8,7 @@ import h5py import numpy as np from .multiview_utils import MultiviewResult +from . import analyze_results from .. import multiview_classifiers from ..utils import hyper_parameter_search from ..utils.dataset import getShape @@ -20,28 +21,28 @@ __status__ = "Prototype" # Production, Development, Prototype def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, DATASET): """Used to init the constants""" - views = kwargs["views"] - viewsIndices = kwargs["viewsIndices"] + views = kwargs["view_names"] + viewsIndices = kwargs["view_indices"] if not metrics: metrics = [["f1_score", None]] - CL_type = kwargs["CL_type"] - classificationKWARGS = kwargs[CL_type + "KWARGS"] + classifier_name = kwargs["classifier_name"] + classifier_config = kwargs[classifier_name] learningRate = len(classificationIndices[0]) / float( (len(classificationIndices[0]) + len(classificationIndices[1]))) t_start = time.time() logging.info("Info\t: Classification - Database : " + str( name) + " ; Views : " + ", ".join(views) + - " ; Algorithm : " + CL_type + " ; Cores : " + str( + " ; Algorithm : " + classifier_name + " ; Cores : " + str( nbCores) + ", Train ratio : " + str(learningRate) + ", CV on " + str(KFolds.n_splits) + " folds") for viewIndex, viewName in zip(viewsIndices, views): logging.info("Info:\t Shape of " + str(viewName) + " :" + str( getShape(DATASET, viewIndex))) - return CL_type, t_start, viewsIndices, classificationKWARGS, views, learningRate + return classifier_name, t_start, viewsIndices, classifier_config, views, learningRate -def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, +def saveResults(classifier, LABELS_DICTIONARY, stringAnalysis, views, classifierModule, classificationKWARGS, directory, learningRate, name, imagesAnalysis): labelsSet = set(LABELS_DICTIONARY.values()) @@ -49,7 +50,7 @@ def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, viewsString = "-".join(views) labelsString = "-".join(labelsSet) timestr = time.strftime("%Y_%m_%d-%H_%M_%S") - CL_type_string = classifierModule.genName(classificationKWARGS) + CL_type_string = classifier.short_name outputFileName = directory + "/" + CL_type_string + "/" + timestr + "-results-" + CL_type_string + "-" + viewsString + '-' + labelsString + \ '-learnRate_{0:.2f}'.format(learningRate) + '-' + name if not os.path.exists(os.path.dirname(outputFileName)): @@ -99,7 +100,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, CL_type, \ t_start, \ viewsIndices, \ - classificationKWARGS, \ + classifier_config, \ views, \ learningRate = initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, DATASET) @@ -115,37 +116,41 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, logging.debug("Start:\t Getting classifiers modules") # classifierPackage = getattr(multiview_classifiers, # CL_type) # Permet d'appeler un module avec une string - classifierModule = getattr(multiview_classifiers, CL_type) - classifierClass = getattr(classifierModule, CL_type + "Class") + classifier_module = getattr(multiview_classifiers, CL_type) + classifier_name = classifier_module.classifier_class_name + # classifierClass = getattr(classifierModule, CL_type + "Class") logging.debug("Done:\t Getting classifiers modules") logging.debug("Start:\t Optimizing hyperparameters") if hyperParamSearch != "None": - classifier = hyper_parameter_search.searchBestSettings(DATASET, labels, - classifierPackage, - CL_type, metrics, + classifier_config = hyper_parameter_search.searchBestSettings(DATASET, labels, + classifier_module, + classifier_name, + metrics[0], learningIndices, KFolds, randomState, + directory, + nb_cores=nbCores, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, - nIter=nIter, - **classificationKWARGS) - else: - classifier = classifierClass(randomState, NB_CORES=nbCores, - **classificationKWARGS) + n_iter=nIter, + classifier_config=classifier_config) + + classifier = getattr(classifier_module, classifier_name)(randomState, + **classifier_config) logging.debug("Done:\t Optimizing hyperparameters") logging.debug("Start:\t Fitting classifier") - classifier.fit_hdf5(DATASET, labels, trainIndices=learningIndices, - viewsIndices=viewsIndices, metric=metrics[0]) + classifier.fit(DATASET, labels, train_indices=learningIndices, + view_indices=viewsIndices) logging.debug("Done:\t Fitting classifier") logging.debug("Start:\t Predicting") - trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, - viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, - viewsIndices=viewsIndices) + trainLabels = classifier.predict(DATASET, predict_indices=learningIndices, + view_indices=viewsIndices) + testLabels = classifier.predict(DATASET, predict_indices=validationIndices, + view_indices=viewsIndices) fullLabels = np.zeros(labels.shape, dtype=int) - 100 for trainIndex, index in enumerate(learningIndices): fullLabels[index] = trainLabels[trainIndex] @@ -166,23 +171,23 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, logging.info("Start:\t Result Analysis for " + CL_type) times = (extractionTime, classificationTime) - stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute( + stringAnalysis, imagesAnalysis, metricsScores = analyze_results.execute( classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, + classifier_config, classificationIndices, LABELS_DICTIONARY, views, nbCores, times, name, KFolds, hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) + viewsIndices, randomState, labels, classifier_module) logging.info("Done:\t Result Analysis for " + CL_type) logging.debug("Start:\t Saving preds") - saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, - classificationKWARGS, directory, + saveResults(classifier, LABELS_DICTIONARY, stringAnalysis, views, classifier_module, + classifier_config, directory, learningRate, name, imagesAnalysis) logging.debug("Start:\t Saving preds") - return MultiviewResult(CL_type, classificationKWARGS, metricsScores, + return MultiviewResult(CL_type, classifier_config, metricsScores, fullLabels, testLabelsMulticlass) # return CL_type, classificationKWARGS, metricsScores, fullLabels, testLabelsMulticlass diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index 724900fe3dca82577dfc38c1662da4fcaa7fa2a8..7e6baf50b0f90bd460a0cb7e6b4ec2890c6bb3e1 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -1,6 +1,10 @@ +from sklearn.base import BaseEstimator, ClassifierMixin +import numpy as np + from .. import multiview_classifiers + class MultiviewResult(object): def __init__(self, classifier_name, classifier_config, metrics_scores, full_labels, test_labels_multiclass): @@ -11,8 +15,80 @@ class MultiviewResult(object): self.y_test_multiclass_pred = test_labels_multiclass def get_classifier_name(self): - multiviewClassifierPackage = getattr(multiview_classifiers, - self.classifier_name) - multiviewClassifierModule = getattr(multiviewClassifierPackage, - self.classifier_name + "Module") - return multiviewClassifierModule.genName(self.classifier_config) + multiview_classifier_module = getattr(multiview_classifiers, + self.classifier_name) + multiview_classifier = getattr(multiview_classifier_module, + multiview_classifier_module.classifier_class_name)() + return multiview_classifier.short_name + + +def get_names(classed_list): + return np.array([object_.__class__.__name__ for object_ in classed_list]) + + +class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): + + def __init__(self, random_state): + self.random_state = random_state + self.short_name = self.__class__.__name__ + + def genBestParams(self, detector): + return dict((param_name, detector.best_params_[param_name]) + for param_name in self.param_names) + + def genParamsFromDetector(self, detector): + if self.classed_params: + classed_dict = dict((classed_param, get_names( + detector.cv_results_["param_" + classed_param])) + for classed_param in self.classed_params) + if self.param_names: + return [(param_name, + np.array(detector.cv_results_["param_" + param_name])) + if param_name not in self.classed_params else ( + param_name, classed_dict[param_name]) + for param_name in self.param_names] + else: + return [()] + + def genDistribs(self): + return dict((param_name, distrib) for param_name, distrib in + zip(self.param_names, self.distribs)) + + def params_to_string(self): + return ", ".join( + [param_name + " : " + self.to_str(param_name) for param_name in + self.param_names]) + + def getConfig(self): + if self.param_names: + return "\n\t\t- " + self.__class__.__name__ + "with " + self.params_to_string() + else: + return "\n\t\t- " + self.__class__.__name__ + "with no config." + + def to_str(self, param_name): + if param_name in self.weird_strings: + string = "" + if "class_name" in self.weird_strings[param_name] : + string+=self.get_params()[param_name].__class__.__name__ + if "config" in self.weird_strings[param_name]: + string += "( with "+ self.get_params()[param_name].params_to_string()+")" + else: + string+=self.weird_strings[param_name]( + self.get_params()[param_name]) + return string + else: + return str(self.get_params()[param_name]) + + def get_interpretation(self): + return "No detailed interpretation function" + + + + +def get_train_views_indices(dataset, train_indices, view_indices,): + """This function is used to get all the examples indices and view indices if needed""" + if view_indices is None: + view_indices = np.arange(dataset["Metadata"].attrs["nbView"]) + if train_indices is None: + train_indices = range(dataset["Metadata"].attrs["datasetLength"]) + return train_indices, view_indices \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/__init__.py index ab051c97118c8fb7de2bd5ec2aaa75ea007c0dd4..1ea37fbbff987b6220a141dcc6dd5853b5a40482 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/__init__.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/__init__.py @@ -1,9 +1,8 @@ import os for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): - if module == '__init__.py' or module[-3:] == '.py' or module[ - -4:] == '.pyc' or module == '__pycache__': + if module == '__init__.py' or module[-4:] == '.pyc' or module == '__pycache__' or module[-3:] != '.py': continue - __import__(module, locals(), globals(), [], 1) + __import__(module[:-3], locals(), globals(), [], 1) del module del os diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py index a1905ac68e0444f82ba14040c7aee980deb2cba7..8dbec755f0c66bf5549a4b94c81ff93ead587628 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py @@ -65,7 +65,7 @@ class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): return "No detailed interpretation function" -def get_train_views_indices(dataset, train_indices, view_indices): +def get_train_views_indices(dataset, train_indices, view_indices,): """This function is used to get all the examples indices and view indices if needed""" if view_indices is None: view_indices = np.arange(dataset["Metadata"].attrs["nbView"]) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py index 7e0fd7e925eaf974571b1f18a83cf3d9ffff6905..bb7fcbe09ccb9e93d3cf6c2dc147415e8a5ee88c 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -2,32 +2,40 @@ import numpy as np import pkgutil from ..utils.dataset import getV -from .additions.utils import BaseMultiviewClassifier, get_train_views_indices +from ..multiview.multiview_utils import BaseMultiviewClassifier, get_train_views_indices from .. import monoview_classifiers +classifier_class_name = "WeightedLinearEarlyFusion" + class WeightedLinearEarlyFusion(BaseMultiviewClassifier): - def __init__(self, random_state=None, view_weights=None, monoview_classifier="decision_tree", monoview_classifier_config={}): + def __init__(self, random_state=None, view_weights=None, + monoview_classifier="decision_tree", + monoview_classifier_config={}): super(WeightedLinearEarlyFusion, self).__init__(random_state=random_state) self.view_weights = view_weights if isinstance(monoview_classifier, str): + self.short_name = "early fusion "+monoview_classifier monoview_classifier_module = getattr(monoview_classifiers, monoview_classifier) monoview_classifier_class = getattr(monoview_classifier_module, monoview_classifier_module.classifier_class_name) - self.monoview_classifier = monoview_classifier_class(**monoview_classifier_config) + self.monoview_classifier = monoview_classifier_class(random_state=random_state, + **monoview_classifier_config) else: self.monoview_classifier = monoview_classifier - self.param_names = ["monoview_classifier",] + self.short_name = "early fusion "+self.monoview_classifier.__class__.__name__ + self.param_names = ["monoview_classifier","random_state"] classifier_classes = [] for name in dir(monoview_classifiers): - module = getattr(monoview_classifiers, name) - if name == "decision_tree": + if not name.startswith("__"): + module = getattr(monoview_classifiers, name) classifier_class = getattr(module, module.classifier_class_name)() classifier_classes.append(classifier_class) - self.distribs = [classifier_classes] + self.distribs = [classifier_classes, [self.random_state]] self.classed_params = ["monoview_classifier"] + self.weird_strings={"monoview_classifier":["class_name", "config"]} def fit(self, X, y, train_indices=None, view_indices=None): train_indices, X = self.transform_data_to_monoview(X, train_indices, view_indices) @@ -44,21 +52,21 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): example_indices, self.view_indices = get_train_views_indices(dataset, example_indices, view_indices) - if self.view_weights is None: + if self.view_weights is None or self.view_weights=="None": self.view_weights = np.ones(len(self.view_indices), dtype=float) else: self.view_weights = np.array(self.view_weights) self.view_weights /= float(np.sum(self.view_weights)) - X = self.hdf5_to_monoview(dataset, example_indices, self.view_indices) + X = self.hdf5_to_monoview(dataset, example_indices) return example_indices, X - def hdf5_to_monoview(self, dataset, exmaples, view_indices): + def hdf5_to_monoview(self, dataset, exmaples): """Here, we concatenate the views for the asked examples """ monoview_data = np.concatenate( [getV(dataset, view_idx, exmaples) for view_weight, (index, view_idx) - in zip(self.view_weights, enumerate(view_indices))] + in zip(self.view_weights, enumerate(self.view_indices))] , axis=1) return monoview_data @@ -66,3 +74,4 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): + diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index 59ed1d8ed9e3583aef28fc0541fafef296a8982b..50cb31a6078f9a577f8626dd74a460761e026b94 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -590,7 +590,7 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): directory = arguments["directory"] - databaseName = arguments["args"].name + databaseName = arguments["args"]["Base"]["name"] labelsNames = [arguments["LABELS_DICTIONARY"][0], arguments["LABELS_DICTIONARY"][1]] @@ -764,7 +764,7 @@ def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels) results = publishMulticlassScores(multiclassResults, metrics, statsIter, directories, - benchmarkArgumentDictionaries[0]["args"].name) + benchmarkArgumentDictionaries[0]["args"]["Base"]["name"]) publishMulticlassExmapleErrors(multiclassResults, directories, benchmarkArgumentDictionaries[0][ "args"].name) @@ -1011,7 +1011,7 @@ def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels): """Used to analyze the results of the previous benchmarks""" - dataBaseName = benchmarkArgumentDictionaries[0]["args"].name + dataBaseName = benchmarkArgumentDictionaries[0]["args"]["Base"]["name"] results_means_std, biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py index 3cd0e6e9dda29f38f45f86a94793982274eeffef..4534c685529b9978ba135ad54121f3b42340b737 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py @@ -1,19 +1,24 @@ import configparser import builtins from distutils.util import strtobool as tobool +import yaml -def get_the_args(path_to_config_file="../config_files/config.ini"): +def get_the_args(path_to_config_file="../config_files/config.yml"): """This is the main function for extracting the args for a '.ini' file""" - config_parser = configparser.ConfigParser(comment_prefixes=('#')) - config_parser.read(path_to_config_file) - config_dict = {} - for section in config_parser: - config_dict[section] = {} - for key in config_parser[section]: - value = format_raw_arg(config_parser[section][key]) - config_dict[section][key] = value - return config_dict + with open(path_to_config_file, 'r') as stream: + yaml_config = yaml.safe_load(stream) + return yaml_config + + # config_parser = configparser.ConfigParser(comment_prefixes=('#')) + # config_parser.read(path_to_config_file) + # config_dict = {} + # for section in config_parser: + # config_dict[section] = {} + # for key in config_parser[section]: + # value = format_raw_arg(config_parser[section][key]) + # config_dict[section][key] = value + # return config_dict def format_raw_arg(raw_arg): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index 5a32172bc7b1d4f4cd24bacf6fa9cc86ffd373d4..c0a458742dd2a3226f6bf0b5339de6cb3520cc9e 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -23,7 +23,7 @@ def parseTheArgs(arguments): groupStandard.add_argument('--path_config', metavar='STRING', action='store', help='Path to the hdf5 dataset or database ' 'folder (default: %(default)s)', - default='../config_files/config.ini') + default='../config_files/config.yml') # groupStandard.add_argument('-log', action='store_true', # help='Use option to activate logging to console') # groupStandard.add_argument('--name', metavar='STRING', nargs='+', action='store', diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 250bdac228cf94b5db5d6832f412816f5b2ff0d9..6c066e1cdcf6ff4753179e2f138278f13c152185 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -10,21 +10,25 @@ from sklearn.model_selection import RandomizedSearchCV from .. import metrics -def searchBestSettings(dataset, labels, classifierPackage, classifierName, - metrics, iLearningIndices, iKFolds, randomState, - viewsIndices=None, - searchingTool="randomizedSearch", nIter=1, **kwargs): - """Used to select the right hyperparam optimization function to optimize hyper parameters""" +def searchBestSettings(dataset, labels, classifier_module, classifier_name, + metrics, learning_indices, iKFolds, random_state, + directory, viewsIndices=None, nb_cores=1, + searchingTool="randomized_search", n_iter=1, + classifier_config=None): + """Used to select the right hyper-parameter optimization function + to optimize hyper parameters""" if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) + output_file_name = directory thismodule = sys.modules[__name__] - searchingTool = "randomizedSearch" # Todo find a nice way to configure multiview classifier without hp search - searchingToolMethod = getattr(thismodule, searchingTool) - bestSettings = searchingToolMethod(dataset, labels, classifierPackage, - classifierName, metrics, - iLearningIndices, iKFolds, randomState, - viewsIndices=viewsIndices, nIter=nIter, - **kwargs) + if searchingTool is not "None": + searchingToolMethod = getattr(thismodule, searchingTool) + bestSettings, test_folds_preds = searchingToolMethod(dataset, labels, "multiview", random_state, output_file_name, + classifier_module, classifier_name, iKFolds, + nb_cores, metrics, n_iter, classifier_config, + learning_indices=learning_indices, view_indices=viewsIndices,) + else: + bestSettings = classifier_config return bestSettings # or well set clasifier ? @@ -67,7 +71,6 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None) if framework == "monoview": folds = cv.split(np.arange(len(y)), y) if framework == "multiview": - y = y.value folds = cv.split(available_indices, y[available_indices]) fold_lengths = np.zeros(cv.n_splits, dtype=int) for fold_idx, (train_indices, test_indices) in enumerate(folds): @@ -88,7 +91,7 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None) def randomized_search(X, y, framework, random_state, output_file_name, classifier_module, classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], n_iter=30, - classifier_kwargs =None, learning_indices=None): + classifier_kwargs =None, learning_indices=None, view_indices=None): estimator = getattr(classifier_module, classifier_name)(random_state, **classifier_kwargs) params_dict = estimator.genDistribs() @@ -111,6 +114,7 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie n_jobs=nb_cores, scoring=scorer, cv=folds, random_state=random_state, learning_indices=learning_indices, + view_indices=view_indices, framework = framework) detector = randomSearch.fit(X, y) @@ -138,7 +142,7 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): def __init__(self, estimator, param_distributions, n_iter=10, refit=True, n_jobs=1, scoring=None, cv=None, - random_state=None, learning_indices=None, framework="monoview"): + random_state=None, learning_indices=None, view_indices=None, framework="monoview"): super(MultiviewCompatibleRandomizedSearchCV, self).__init__(estimator, n_iter=n_iter, param_distributions=param_distributions, @@ -147,12 +151,13 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): cv=cv, random_state=random_state) self.framework = framework self.available_indices = learning_indices + self.view_indices = view_indices def fit(self, X, y=None, groups=None, **fit_params): if self.framework == "monoview": return super(MultiviewCompatibleRandomizedSearchCV, self).fit(X, y=y, groups=groups, **fit_params) elif self.framework == "multiview": - return self.fit_multiview(X, y=y.value, groups=groups,**fit_params) + return self.fit_multiview(X, y=y, groups=groups,**fit_params) def fit_multiview(self, X, y=None, groups=None, **fit_params): n_splits = self.cv.get_n_splits(self.available_indices, y[self.available_indices]) @@ -168,10 +173,12 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): current_estimator = clone(base_estimator) current_estimator.set_params(**candidate_param) current_estimator.fit(X, y, - train_indices=self.available_indices[train_indices]) + train_indices=self.available_indices[train_indices], + view_indices=self.view_indices) test_prediction = current_estimator.predict( X, - self.available_indices[test_indices]) + self.available_indices[test_indices], + view_indices=self.view_indices) test_score = self.scoring._score_func(y[self.available_indices[test_indices]], test_prediction) test_scores[fold_idx] = test_score @@ -188,7 +195,6 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): self.n_splits_ = n_splits return self - def get_test_folds_preds(self, X, y, estimator): test_folds_prediction = [] if self.framework=="monoview": @@ -214,13 +220,13 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): def randomizedSearch(dataset, labels, classifierPackage, classifierName, - metrics, learningIndices, KFolds, randomState, + metrics_list, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1, nbCores=1, **classificationKWARGS): """Used to perform a random search on the classifiers to optimize hyper parameters""" if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) - metric = metrics[0] + metric = metrics_list[0] metricModule = getattr(metrics, metric[0]) if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 9da27eefe43dec9c27030a01ca07fb3f819519e8..239b0acb7f257e590dc1d7d005a07e2f34539547 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -19,19 +19,71 @@ class Test_initKWARGS(unittest.TestCase): def test_initKWARGSFunc_no_monoview(self): benchmark = {"monoview": {}, "multiview": {}} args = exec_classif.initKWARGSFunc({}, benchmark) - self.assertEqual(args, {}) + self.assertEqual(args, {"monoview": {}, "multiview": {}}) -class Test_initMonoviewArguments(unittest.TestCase): - - def test_initMonoviewArguments_no_monoview(self): - benchmark = {"monoview": {}, "multiview": {}} - arguments = exec_classif.initMonoviewExps(benchmark, {}, 0, {}) - self.assertEqual(arguments, {'monoview': [], 'multiview': []}) - - def test_initMonoviewArguments_empty(self): - benchmark = {"monoview": {}, "multiview": {}} - arguments = exec_classif.initMonoviewExps(benchmark, {}, 0, {}) +class Test_init_argument_dictionaries(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.benchmark = {"monoview": ["fake_monoview_classifier"], "multiview": {}} + cls.views_dictionnary = {'test_view_0': 0, 'test_view': 1} + cls.nb_class = 2 + cls.monoview_classifier_name = "fake_monoview_classifier" + cls.monoview_classifier_arg_name = "fake_arg" + cls.monoview_classifier_arg_value = ["fake_value_1"] + cls.multiview_classifier_name = "fake_multiview_classifier" + cls.multiview_classifier_arg_name = "fake_arg_mv" + cls.multiview_classifier_arg_value = ["fake_value_2"] + cls.init_kwargs = { + 'monoview':{ + cls.monoview_classifier_name: + {cls.monoview_classifier_arg_name:cls.monoview_classifier_arg_value} + }, + "multiview":{ + cls.multiview_classifier_name:{ + cls.multiview_classifier_arg_name:cls.multiview_classifier_arg_value} + } + } + + def test_init_argument_dictionaries_monoview(self): + arguments = exec_classif.init_argument_dictionaries(self.benchmark, + self.views_dictionnary, + self.nb_class, + self.init_kwargs) + expected_output = [{ + self.monoview_classifier_name: { + self.monoview_classifier_arg_name:self.monoview_classifier_arg_value[0]}, + "view_name": "test_view_0", + "classifier_name": self.monoview_classifier_name, + "nb_class": self.nb_class, + "view_index": 0}, + {self.monoview_classifier_name: { + self.monoview_classifier_arg_name: self.monoview_classifier_arg_value[0]}, + "view_name": "test_view", + "classifier_name": self.monoview_classifier_name, + "nb_class": self.nb_class, + "view_index": 1}, + ] + self.assertEqual(arguments["monoview"], expected_output) + + def test_init_argument_dictionaries_multiview(self): + self.benchmark["multiview"] = ["fake_multiview_classifier"] + self.benchmark["monoview"] = {} + arguments = exec_classif.init_argument_dictionaries(self.benchmark, + self.views_dictionnary, + self.nb_class, + self.init_kwargs) + expected_output = [{ + "classifier_name": self.multiview_classifier_name, + "view_indices": [0,1], + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + "labels_names":None, + self.multiview_classifier_name: { + self.multiview_classifier_arg_name: + self.multiview_classifier_arg_value[0]}, + },] + self.assertEqual(arguments["multiview"][0], expected_output[0]) def fakeBenchmarkExec(coreIndex=-1, a=7, args=1): diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index 04ad40d4ce5eeb9d1e3d0def002151ce7b66c547..78a4bdb2a570e48aadef94a4b9138dcbd74bc7f4 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -17,7 +17,7 @@ class Test_initConstants(unittest.TestCase): cls.datasetFile = h5py.File( "multiview_platform/tests/temp_tests/test.hdf5", "w") cls.random_state = np.random.RandomState(42) - cls.args = {"CL_type": "test_clf"} + cls.args = {"classifier_name": "test_clf"} cls.X_value = cls.random_state.randint(0, 500, (10, 20)) cls.X = cls.datasetFile.create_dataset("View0", data=cls.X_value) cls.X.attrs["name"] = "test_dataset" @@ -99,7 +99,7 @@ class Test_getHPs(unittest.TestCase): cls.classifierModule = decision_tree cls.hyperParamSearch = "randomized_search" cls.n_iter = 2 - cls.classifier_name = "DecisionTree" + cls.classifier_name = "decision_tree" cls.random_state = np.random.RandomState(42) cls.X = cls.random_state.randint(0,10,size=(10,5)) cls.y = cls.random_state.randint(0,2,size=10) @@ -107,9 +107,10 @@ class Test_getHPs(unittest.TestCase): cls.cv = StratifiedKFold(n_splits=2, random_state=cls.random_state) cls.nb_cores = 1 cls.metrics = [["accuracy_score", None]] - cls.kwargs = {"DecisionTreeKWARGS" : {"max_depth": 1, + cls.kwargs = {"decision_tree" : {"max_depth": 1, "criterion": "gini", "splitter": "best"}} + cls.classifier_class_name = "DecisionTree" @classmethod def tearDownClass(cls): @@ -123,6 +124,7 @@ class Test_getHPs(unittest.TestCase): self.hyperParamSearch, self.n_iter, self.classifier_name, + self.classifier_class_name, self.X, self.y, self.random_state, diff --git a/multiview_platform/tests/test_utils/test_configuration.py b/multiview_platform/tests/test_utils/test_configuration.py index ce04975c2b5991ce70c7a842f16b0b7c314e9fe8..c1e8c3b47125380c120e8516c6880f115b6f6bc4 100644 --- a/multiview_platform/tests/test_utils/test_configuration.py +++ b/multiview_platform/tests/test_utils/test_configuration.py @@ -1,6 +1,6 @@ import os import unittest - +import yaml import numpy as np from multiview_platform.mono_multi_view_classifiers.utils import configuration @@ -8,14 +8,14 @@ from multiview_platform.mono_multi_view_classifiers.utils import configuration class Test_get_the_args(unittest.TestCase): def setUp(self): - self.path_to_config_file = "multiview_platform/tests/tmp_tests/config_temp.ini" + self.path_to_config_file = "multiview_platform/tests/tmp_tests/config_temp.yml" os.mkdir("multiview_platform/tests/tmp_tests") - config_file = open(self.path_to_config_file, "w") - config_file.write("[Base]\nfirst_arg = int ; 10\nsecond_arg = list_float ; 12.5 1e-06\n[Classification]\nthird_arg = bool ; yes") - config_file.close() + data = {"Base":{"first_arg": 10, "second_arg":[12.5, 1e-06]}, "Classification":{"third_arg":True}} + with open(self.path_to_config_file, "w") as config_file: + yaml.dump(data, config_file) def tearDown(self): - os.remove("multiview_platform/tests/tmp_tests/config_temp.ini") + os.remove("multiview_platform/tests/tmp_tests/config_temp.yml") os.rmdir("multiview_platform/tests/tmp_tests") def test_file_loading(self): diff --git a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py index aaf11dd65db022f1717ab6c7461dca705e99506d..0024a1427a85b07adbbd4f4ebee038fcf75cc28d 100644 --- a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py +++ b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py @@ -50,6 +50,6 @@ class Test_randomized_search(unittest.TestCase): def test_simple(self): best_params, test_folds_preds = hyper_parameter_search.randomized_search( - self.dataset_file, self.labels, "multiview", self.random_state, "multiview_platform/tests/tmp_tests/", + self.dataset_file, self.labels.value, "multiview", self.random_state, "multiview_platform/tests/tmp_tests/", weighted_linear_early_fusion, "WeightedLinearEarlyFusion", self.k_folds, 1, ["accuracy_score", None], 2, {}, learning_indices=self.learning_indices)