diff --git a/config_files/config.ini b/config_files/config.ini index 027ca27a52ab8706682e1dd4770a103aa8cce11f..fa2c84d9f058c2b605dbabfb14e444e9584e7bde 100644 --- a/config_files/config.ini +++ b/config_files/config.ini @@ -7,7 +7,7 @@ type = str ; .hdf5 views = list_str ; all pathF = str ; ../Data/ nice = int ; 0 -randomState = str ; None +randomState = str ; 42 nbCores = int ; 1 full = bool ; yes debug = bool ; yes @@ -23,13 +23,13 @@ split = float ; 0.8 nbFolds = int ; 2 nbClass = int ; 2 classes = list_str ; yes no -type = list_str ; Monoview Multiview +type = list_str ; multiview algos_monoview = list_str ; all algos_multiview = list_str ; all statsiter = int ; 2 metrics = list_str ; accuracy_score f1_score metric_princ = str ; f1_score -HPS_type = str ; randomizedSearch +HPS_type = str ; randomized_search HPS_iter = int ; 2 @@ -38,150 +38,97 @@ HPS_iter = int ; 2 # The Monoview Classifier arguments # ##################################### -[RandomForest] +[random_forest] n_estimators = list_int ; 25 max_depth = list_int ; 3 criterion = list_str ; entropy -[SVMLinear] +[svm_linear] C = list_float ; 1 -[SVMRBF] +[svm_rbf] C = list_float ; 1 -[SVMPoly] +[svm_poly] C = list_float ; 1 degree = list_int ; 2 -[Adaboost] +[adaboost] n_estimators = list_int ; 50 base_estimator = list_str ; DecisionTreeClassifier -[AdaboostPregen] +[adaboost_pregen] n_estimators = list_int ; 50 base_estimator = list_str ; DecisionTreeClassifier n_stumps = list_int ; 1 -[AdaboostPregen10] -n_estimators = list_int ; 50 -base_estimator = list_str ; DecisionTreeClassifier -n_stumps = list_int ; 1 - -[AdaboostGraalpy] +[adaboost_graalpy] n_iterations = list_int ; 50 n_stumps = list_int ; 1 -[DecisionTree] +[decision_tree] max_depth = list_int ; 10 criterion = list_str ; gini splitter = list_str ; best -[DecisionTreePregen] +[decision_tree_pregen] max_depth = list_int ; 10 criterion = list_str ; gini splitter = list_str ; best n_stumps = list_int ; 1 -[SGD] +[sgd] loss = list_str ; hinge penalty = list_str ; l2 alpha = list_float ; 0.0001 -[KNN] +[knn] n_neighbors = list_int ; 5 weights = list_str ; uniform algorithm = list_str ; auto -[SCM] +[scm] model_type = list_str ; conjunction max_rules = list_int ; 10 p = list_float ; 0.1 -[SCMPregen] +[scm_pregen] model_type = list_str ; conjunction max_rules = list_int ; 10 p = list_float ; 0.1 n_stumps = list_int ; 1 -[CQBoost] +[cq_boost] mu = list_float ; 0.01 epsilon = list_float ; 1e-06 n_max_iterations = list_int ; 5 n_stumps = list_int ; 1 -[CGDesc] +[cg_desc] n_max_iterations = list_int ; 10 n_stumps = list_int ; 1 -[CGDesc10] +[cb_boost] n_max_iterations = list_int ; 10 n_stumps = list_int ; 1 -[CGreed] -n_max_iterations = list_int ; 10 -n_stumps = list_int ; 1 - -[QarBoost] -n_max_iterations = list_int ; 10 -n_stumps = list_int ; 1 - -[QarBoostNC3] -n_max_iterations = list_int ; 10 -n_stumps = list_int ; 1 - -[QarBoostv2] -n_max_iterations = list_int ; 10 -n_stumps = list_int ; 1 - -[QarBoostv3] -n_max_iterations = list_int ; 10 -n_stumps = list_int ; 1 - -[CBBoost] -n_max_iterations = list_int ; 10 -n_stumps = list_int ; 1 - -[CGDescTree] -n_max_iterations = list_int ; 10 -n_stumps = list_int ; 1 -max_depth = list_int ; 2 - -[MinCQGraalpy] +[min_cq_graalpy] mu = list_float ; 0.01 n_stumps_per_attribute = list_int ; 1 -[MinCQGraalpyTree] +[min_cq_graalpy_tree] mu = list_float ; 0.01 n_stumps_per_attribute = list_int ; 1 max_depth = list_int ; 2 -[CQBoostTree] -mu = list_float ; 0.01 -epsilon = list_float ; 1e-06 -n_max_iterations = list_int ; 5 -n_stumps = list_int ; 1 -max_depth = list_int ; 2 - -[SCMPregenTree] -max_rules = list_int ; 5 -model_type = list_str ; conjunction -n_stumps = list_int ; 1 -max_depth = list_int ; 2 - -[AdaboostPregenTree] -n_estimators = list_int ; 50 -base_estimator = list_str ; DecisionTreeClassifier -n_stumps = list_int ; 1 -max_depth = list_int ; 2 - -[Lasso] +[lasso] alpha = list_float ; 1 max_iter = list_int ; 2 -[GradientBoosting] +[gradient_boosting] n_estimators = list_int ; 2 -[MinCQ] +[min_cq] mu = list_float ; 0.01 n_stumps_per_attribute = list_int ; 1 @@ -190,4 +137,6 @@ n_stumps_per_attribute = list_int ; 1 # The Multiview Classifier arguments # ###################################### -#TODO \ No newline at end of file +[weighted_linear_early_fusion] +view_weights = list_str ; None +monoview_classifier = list_str ; decision_tree diff --git a/config_files/config.yml b/config_files/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..5c3f48fc24d18b5e334592eea9446d6045da8efe --- /dev/null +++ b/config_files/config.yml @@ -0,0 +1,140 @@ +# The base configuration of the benchmark +Base : + log: true + name: ["Plausible"] + label: "_" + type: ".hdf5" + views: ["all"] + pathf: "../Data/" + nice: 0 + random_state: 42 + nb_cores: 1 + full: False + debug: True + add_noise: False + noise_std: 0.0 + res_dir: "../Results/" + +# All the classification-realted configuration options +Classification: + multiclass_method: "oneVersusOne" + split: 0.8 + nb_folds: 2 + nb_class: 2 + classes: ["yes", "no"] + type: ["multiview", "monoview"] + algos_monoview: ["all"] + algos_multiview: ["all"] + stats_iter: 2 + metrics: ["accuracy_score", "f1_score"] + metric_princ: "f1_score" + hps_type: "randomized_search" + hps_iter: 2 + + +##################################### +# The Monoview Classifier arguments # +##################################### + +random_forest: + n_estimators: [25] + max_depth: [3] + criterion: ["entropy"] + +svm_linear: + C: [1] + +svm_rbf: + C: [1] + +svm_poly: + C: [1] + degree: [2] + +adaboost: + n_estimators: [50] + base_estimator: ["DecisionTreeClassifier"] + +adaboost_pregen: + n_estimators: [50] + base_estimator: ["DecisionTreeClassifier"] + n_stumps: [1] + +adaboost_graalpy: + n_iterations: [50] + n_stumps: [1] + +decision_tree: + max_depth: [10] + criterion: ["gini"] + splitter: ["best"] + +decision_tree_pregen: + max_depth: [10] + criterion: ["gini"] + splitter: ["best"] + n_stumps: [1] + +sgd: + loss: ["hinge"] + penalty: [l2] + alpha: [0.0001] + +knn: + n_neighbors: [5] + weights: ["uniform"] + algorithm: ["auto"] + +scm: + model_type: ["conjunction"] + max_rules: [10] + p: [0.1] + +scm_pregen: + model_type: ["conjunction"] + max_rules: [10] + p: [0.1] + n_stumps: [1] + +cq_boost: + mu: [0.01] + epsilon: [1e-06] + n_max_iterations: [5] + n_stumps: [1] + +cg_desc: + n_max_iterations: [10] + n_stumps: [1] + +cb_boost: + n_max_iterations: [10] + n_stumps: [1] + +min_cq_graalpy: + mu: [0.01] + n_stumps_per_attribute: [1] + +min_cq_graalpy_tree: + mu: [0.01] + n_stumps_per_attribute: [1] + max_depth: [2] + +lasso: + alpha: [1] + max_iter: [2] + +gradient_boosting: + n_estimators: [2] + +min_cq: + mu: [0.01] + n_stumps_per_attribute: [1] + + +###################################### +# The Multiview Classifier arguments # +###################################### + +weighted_linear_early_fusion: + view_weights: [None] + monoview_classifier: ["decision_tree"] diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index ae958ca5a67de50c615ed282579c00eb52bbde66..cd066e7274fbda67eeffe406339c0decae445a55 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -69,35 +69,10 @@ def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): benchmark["monoview"] = monoviewAlgos if "multiview" in CL_type: - benchmark["multiview"] = {} - if multiviewAlgos == ["all"]: - algosMutliview = allMultiviewPackages - else: - algosMutliview = multiviewAlgos - for multiviewPackageName in allMultiviewPackages: - if multiviewPackageName in algosMutliview: - multiviewPackage = getattr(multiview_classifiers, - multiviewPackageName) - multiviewModule = getattr(multiviewPackage, - multiviewPackageName + "Module") - benchmark = multiviewModule.getBenchmark(benchmark, args=args) - - if CL_type == ["Benchmark"]: - allMonoviewAlgos = [name for _, name, isPackage in - pkgutil.iter_modules([ - './mono_multi_view_classifiers/monoview_classifiers']) - if (not isPackage) and name not in ["framework"]] - benchmark["monoview"] = allMonoviewAlgos - benchmark["multiview"] = dict( - (multiviewPackageName, "_") for multiviewPackageName in - allMultiviewPackages) - for multiviewPackageName in allMultiviewPackages: - multiviewPackage = getattr(multiview_classifiers, - multiviewPackageName) - multiviewModule = getattr(multiviewPackage, - multiviewPackageName + "Module") - benchmark = multiviewModule.getBenchmark(benchmark, args=args) - + benchmark["multiview"] = [name for _, name, isPackage in + pkgutil.iter_modules([ + "./mono_multi_view_classifiers/multiview_classifiers"]) + if not isPackage] return benchmark @@ -129,7 +104,42 @@ def genViewsDictionnary(DATASET, views): return viewsDictionary -def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit): +def init_argument_dictionaries(benchmark, views_dictionary, + nb_class, init_kwargs): + argument_dictionaries = {"monoview": [], "multiview": []} + if benchmark["monoview"]: + argument_dictionaries["monoview"] = init_monoview_exps( + benchmark["monoview"], + views_dictionary, + nb_class, + init_kwargs["monoview"]) + if benchmark["multiview"]: + argument_dictionaries["multiview"] = init_multiview_exps(benchmark["multiview"], + views_dictionary, + nb_class, + init_kwargs["multiview"]) + return argument_dictionaries + + +def init_multiview_exps(classifier_names, views_dictionary, nb_class, kwargs_init): + multiview_arguments = [] + for classifier_name in classifier_names: + if multiple_args(classifier_name, kwargs_init): + multiview_arguments += gen_multiple_args_dictionnaries(nb_class, + kwargs_init, + classifier_name, + views_dictionary=views_dictionary, + framework="multiview") + else: + multiview_arguments += [gen_single_multiview_arg_dictionary(classifier_name, + kwargs_init, + nb_class, + views_dictionary=views_dictionary)] + return multiview_arguments + + +def init_monoview_exps(classifier_names, + views_dictionary, nb_class, kwargs_init): r"""Used to add each monoview exeperience args to the list of monoview experiences args. First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right @@ -139,13 +149,13 @@ def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit): Parameters ---------- - benchmark : dictionary + classifier_names : dictionary All types of monoview and multiview experiments that have to be benchmarked - argumentDictionaries : dictionary + argument_dictionaries : dictionary Maps monoview and multiview experiments arguments. viewDictionary : dictionary Maps the view names to their index in the HDF5 dataset - nbClass : integer + nb_class : integer Number of different labels in the classification Returns @@ -153,30 +163,56 @@ def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit): benchmark : Dictionary of dictionaries Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ - argumentDictionaries = {"monoview": [], "multiview": []} - if benchmark["monoview"]: - argumentDictionaries["monoview"] = [] - for viewName, viewIndex in viewsDictionary.items(): - for classifier in benchmark["monoview"]: - if multiple_args(classifier, kwargsInit): - argumentDictionaries["monoview"] += gen_multiple_args_dictionnaries(nbClass, kwargsInit, classifier, viewName, viewIndex) - else: - arguments = { - "args": {classifier + "KWARGS": dict((key, value[0]) for key, value in kwargsInit[ - classifier + "KWARGSInit"].items()), "feat": viewName, - "CL_type": classifier, "nbClass": nbClass}, - "viewIndex": viewIndex} - argumentDictionaries["monoview"].append(arguments) - return argumentDictionaries + monoview_arguments = [] + for view_name, view_index in views_dictionary.items(): + for classifier in classifier_names: + if multiple_args(classifier, kwargs_init): + monoview_arguments += gen_multiple_args_dictionnaries(nb_class, + kwargs_init, + classifier, + view_name, + view_index) + else: + arguments = gen_single_monoview_arg_dictionary(classifier, + kwargs_init, + nb_class, + view_index, + view_name) + monoview_arguments.append(arguments) + return monoview_arguments + + +def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class, + view_index, view_name): + return {classifier_name: dict((key, value[0]) for key, value in arguments[ + classifier_name].items()), + "view_name": view_name, + "view_index": view_index, + "classifier_name": classifier_name, + "nb_class": nb_class} + + +def gen_single_multiview_arg_dictionary(classifier_name,arguments,nb_class, + views_dictionary=None): + return {"classifier_name": classifier_name, + "view_names": list(views_dictionary.keys()), + 'view_indices': list(views_dictionary.values()), + "nb_class": nb_class, + "labels_names": None, + classifier_name: dict((key, value[0]) for key, value in arguments[ + classifier_name].items()) + } + def multiple_args(classifier, kwargsInit): - listed_args = [type(value) == list and len(value)>1 for key, value in - kwargsInit[classifier + "KWARGSInit"].items()] + listed_args = [type(value) == list and len(value)>1 for key, value in + kwargsInit[classifier].items()] if True in listed_args: return True else: return False + def gen_multiple_kwargs_combinations(clKWARGS): values = list(clKWARGS.values()) listed_values = [[_] if type(_) is not list else _ for _ in values] @@ -195,23 +231,29 @@ def gen_multiple_kwargs_combinations(clKWARGS): return kwargs_combination, reduced_kwargs_combination -def gen_multiple_args_dictionnaries(nbClass, kwargsInit, - classifier, viewName, viewIndex): - multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations(kwargsInit[classifier + "KWARGSInit"]) +def gen_multiple_args_dictionnaries(nb_class, kwargsInit, classifier, + view_name=None, view_index=None, views_indices=None, + framework="monoview"): + multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations(kwargsInit[classifier]) multiple_kwargs_dict = dict( (classifier+"_"+"_".join(map(str,list(reduced_dictionary.values()))), dictionary) for reduced_dictionary, dictionary in zip(reduced_multiple_kwargs_list, multiple_kwargs_list )) - args_dictionnaries = [{ - "args": {classifier_name + "KWARGS": arguments, - "feat": viewName, - "CL_type": classifier_name, - "nbClass": nbClass}, - "viewIndex": viewIndex} - for classifier_name, arguments in multiple_kwargs_dict.items()] + args_dictionnaries = [gen_single_monoview_arg_dictionary(classifier_name, + arguments, + nb_class, + view_index=view_index, + view_name=view_name) + if framework=="monoview" else + gen_single_multiview_arg_dictionary(classifier_name, + arguments, + nb_class, + views_indices=views_indices) + for classifier_name, arguments + in multiple_kwargs_dict.items()] return args_dictionnaries -def initMonoviewKWARGS(args, classifiersNames): +def init_monoview_kwargs(args, classifiersNames): r"""Used to init kwargs thanks to a function in each monoview classifier package. Parameters @@ -232,21 +274,39 @@ def initMonoviewKWARGS(args, classifiersNames): monoviewKWARGS = {} for classifiersName in classifiersNames: try: - classifierModule = getattr(monoview_classifiers, classifiersName) + getattr(monoview_classifiers, classifiersName) except AttributeError: raise AttributeError( classifiersName + " is not implemented in monoview_classifiers, " "please specify the name of the file in monoview_classifiers") monoviewKWARGS[ - classifiersName + "KWARGSInit"] = args[classifiersName] + classifiersName] = args[classifiersName] logging.debug("Done:\t Initializing monoview classifiers arguments") return monoviewKWARGS def initKWARGSFunc(args, benchmark): - monoviewKWARGS = initMonoviewKWARGS(args, benchmark["monoview"]) - return monoviewKWARGS + monoview_kwargs = init_monoview_kwargs(args, benchmark["monoview"]) + multiview_kwargs = init_multiview_kwargs(args, benchmark["multiview"]) + kwargs = {"monoview":monoview_kwargs, "multiview":multiview_kwargs} + return kwargs + + +def init_multiview_kwargs(args, classifiers_names): + logging.debug("Start:\t Initializing multiview classifiers arguments") + multiview_kwargs = {} + for classifiers_name in classifiers_names: + try: + getattr(multiview_classifiers, classifiers_name) + except AttributeError: + raise AttributeError( + classifiers_name + " is not implemented in mutliview_classifiers, " + "please specify the name of the coressponding .py " + "file in mutliview_classifiers") + multiview_kwargs[classifiers_name] = args[classifiers_name] + logging.debug("Done:\t Initializing multiview classifiers arguments") + return multiview_kwargs def initMultiviewArguments(args, benchmark, views, viewsIndices, @@ -257,9 +317,8 @@ def initMultiviewArguments(args, benchmark, views, viewsIndices, multiviewArguments = [] if "multiview" in benchmark: for multiviewAlgoName in benchmark["multiview"]: - multiviewPackage = getattr(multiview_classifiers, multiviewAlgoName) - mutliviewModule = getattr(multiviewPackage, - multiviewAlgoName + "Module") + mutliviewModule = getattr(multiview_classifiers, + multiviewAlgoName) multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, @@ -347,12 +406,12 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, - viewsIndices, - argumentDictionaries, - randomState, directory, - resultsMonoview, - classificationIndices) + # argumentDictionaries = initMultiviewArguments(args, benchmark, views, + # viewsIndices, + # argumentDictionaries, + # randomState, directory, + # resultsMonoview, + # classificationIndices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") @@ -406,12 +465,12 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, logging.debug("Done:\t monoview benchmark") logging.debug("Start:\t multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, - viewsIndices, - argumentDictionaries, - randomState, directory, - resultsMonoview, - classificationIndices) + # argumentDictionaries = initMultiviewArguments(args, benchmark, views, + # viewsIndices, + # argumentDictionaries, + # randomState, directory, + # resultsMonoview, + # classificationIndices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") @@ -454,7 +513,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, LABELS_DICTIONARY, kFolds) logging.debug("Start:\t monoview benchmark") for arguments in argumentDictionaries["monoview"]: - X = DATASET.get("View" + str(arguments["viewIndex"])) + X = DATASET.get("View" + str(arguments["view_index"])) Y = labels resultsMonoview += [ ExecMonoview(directory, X, Y, args["Base"]["name"], labelsNames, @@ -466,12 +525,12 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, logging.debug("Start:\t multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, - viewsIndices, - argumentDictionaries, - randomState, directory, - resultsMonoview, - classificationIndices) + # argumentDictionaries = initMultiviewArguments(args, benchmark, views, + # viewsIndices, + # argumentDictionaries, + # randomState, directory, + # resultsMonoview, + # classificationIndices) logging.debug("Done:\t multiview arguments initialization") logging.debug("Start:\t multiview benchmark") @@ -564,9 +623,16 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, classificationIndices[0][1]) multiclassGroundTruth = DATASET.get("Labels").value logging.debug("Start:\t Analyzing predictions") - results_mean_stds =getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, - multiclassGroundTruth, metrics, classificationIndices, - directories, directory, labelsDictionary, nbExamples, nbLabels) + results_mean_stds = getResults(results, statsIter, nbMulticlass, + benchmarkArgumentsDictionaries, + multiclassGroundTruth, + metrics, + classificationIndices, + directories, + directory, + labelsDictionary, + nbExamples, + nbLabels) logging.debug("Done:\t Analyzing predictions") delete(benchmarkArgumentsDictionaries, nbCores, DATASET) return results_mean_stds @@ -577,14 +643,13 @@ def execClassif(arguments): start = time.time() args = execution.parseTheArgs(arguments) args = configuration.get_the_args(args.path_config) - os.nice(args["Base"]["nice"]) - nbCores = args["Base"]["nbcores"] + nbCores = args["Base"]["nb_cores"] if nbCores == 1: os.environ['OPENBLAS_NUM_THREADS'] = '1' - statsIter = args["Classification"]["statsiter"] + statsIter = args["Classification"]["stats_iter"] hyperParamSearch = args["Classification"]["hps_type"] - multiclassMethod = args["Classification"]["multiclassmethod"] + multiclassMethod = args["Classification"]["multiclass_method"] CL_type = args["Classification"]["type"] monoviewAlgos = args["Classification"]["algos_monoview"] multiviewAlgos = args["Classification"]["algos_multiview"] @@ -601,7 +666,7 @@ def execClassif(arguments): directory = execution.initLogFile(dataset_name, args["Base"]["views"], args["Classification"]["type"], args["Base"]["log"], args["Base"]["debug"], args["Base"]["label"], args["Base"]["res_dir"], args["Base"]["add_noise"], noise_std) - randomState = execution.initRandomState(args["Base"]["randomstate"], directory) + randomState = execution.initRandomState(args["Base"]["random_state"], directory) statsIterRandomStates = execution.initStatsIterRandomStates(statsIter, randomState) @@ -609,7 +674,7 @@ def execClassif(arguments): DATASET, LABELS_DICTIONARY, datasetname = getDatabase(args["Base"]["views"], args["Base"]["pathf"], dataset_name, - args["Classification"]["nbclass"], + args["Classification"]["nb_class"], args["Classification"]["classes"], randomState, args["Base"]["full"], @@ -623,17 +688,14 @@ def execClassif(arguments): multiclassLabels, labelsCombinations, indicesMulticlass = multiclass.genMulticlassLabels( DATASET.get("Labels").value, multiclassMethod, splits) - kFolds = execution.genKFolds(statsIter, args["Classification"]["nbfolds"], + kFolds = execution.genKFolds(statsIter, args["Classification"]["nb_folds"], statsIterRandomStates) datasetFiles = dataset.initMultipleDatasets(args["Base"]["pathf"], args["Base"]["name"], nbCores) - # if not views: - # raise ValueError("Empty views list, modify selected views to match dataset " + args["Base"]["views) views, viewsIndices, allViews = execution.initViews(DATASET, args["Base"]["views"]) viewsDictionary = genViewsDictionnary(DATASET, views) - print(viewsDictionary) nbViews = len(views) NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] @@ -654,8 +716,10 @@ def execClassif(arguments): benchmark = initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args) initKWARGS = initKWARGSFunc(args, benchmark) dataBaseTime = time.time() - start - argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, + argumentDictionaries = init_argument_dictionaries(benchmark, viewsDictionary, NB_CLASS, initKWARGS) + # argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, + # NB_CLASS, initKWARGS) directories = execution.genDirecortiesNames(directory, statsIter) benchmarkArgumentDictionaries = execution.genArgumentDictionaries( LABELS_DICTIONARY, directories, multiclassLabels, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/additions/CGDescUtils.py b/multiview_platform/mono_multi_view_classifiers/monoview/additions/CGDescUtils.py index c17b30783d844ddde1ba3927d574277929d83c8b..ee90e90bb8175f1c5aacbb658d0dd1063a414b3a 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/additions/CGDescUtils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/additions/CGDescUtils.py @@ -17,7 +17,7 @@ from ... import metrics # Used for QarBoost and CGreed class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): - def __init__(self, n_max_iterations=None, estimators_generator="Stumps", + def __init__(self, n_max_iterations=None, estimators_generator="Stumps", max_depth=1, random_state=42, self_complemented=True, twice_the_same=False, c_bound_choice=True, random_start=True, n_stumps=1, use_r=True, c_bound_sol=True, @@ -72,17 +72,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): "c_bound_choice", "random_start", "n_stumps", "use_r", "c_bound_sol"] self.mincq_tracking = mincq_tracking + self.max_depth = max_depth def fit(self, X, y): - ones = [] - tows = [] - threes = [] - fours = [] - fives = [] - sixes = [] - sevens = [] - eights = [] - formatted_X, formatted_y = self.format_X_y(X, y) @@ -351,11 +343,11 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def init_hypotheses(self, X, y): """Inintialization for the hyptotheses used to build the boosted vote""" - if self.estimators_generator is "Stumps": + if self.estimators_generator == "Stumps": self.estimators_generator = StumpsClassifiersGenerator( n_stumps_per_attribute=self.n_stumps, self_complemented=self.self_complemented) - if self.estimators_generator is "Trees": + if self.estimators_generator == "Trees": self.estimators_generator = TreeClassifiersGenerator( n_trees=self.n_stumps, max_depth=self.max_depth, self_complemented=self.self_complemented) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/additions/CQBoostUtils.py b/multiview_platform/mono_multi_view_classifiers/monoview/additions/CQBoostUtils.py index df9f23f1f3756bcda9124069d5b99f9a87270dab..40122b1d542b9091e2e7142326b26fce50be7bb9 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/additions/CQBoostUtils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/additions/CQBoostUtils.py @@ -17,7 +17,7 @@ from ... import metrics class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): def __init__(self, mu=0.01, epsilon=1e-06, n_max_iterations=100, - estimators_generator="Stumps", dual_constraint_rhs=0, + estimators_generator="Stumps", dual_constraint_rhs=0, max_depth=1, save_iteration_as_hyperparameter_each=None, random_state=None): super(ColumnGenerationClassifier, self).__init__() self.epsilon = epsilon @@ -25,6 +25,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.estimators_generator = estimators_generator self.dual_constraint_rhs = dual_constraint_rhs self.mu = mu + self.max_depth=max_depth self.train_time = 0 self.plotted_metric = metrics.zero_one_loss self.random_state = random_state @@ -79,15 +80,18 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): np.squeeze(np.array((alpha).T.dot(y_kernel_matrix).T)), fill_value=-np.inf) - h_values[self.chosen_columns_] = ma.masked + if self.chosen_columns_: + h_values[self.chosen_columns_] = ma.masked + worst_h_index = ma.argmax(h_values) # Check for optimal solution. We ensure at least one complete iteration is done as the initialization # values might provide a degenerate initial solution. - if h_values[ - worst_h_index] <= self.dual_constraint_rhs + self.epsilon and len( - self.chosen_columns_) > 0: - break + if self.chosen_columns_: + if h_values[ + worst_h_index] <= self.dual_constraint_rhs + self.epsilon and len( + self.chosen_columns_) > 0: + break # Append the weak hypothesis. self.chosen_columns_.append(worst_h_index) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py index 9e6a5db61ddbaf10b31a4dcbadc29c27b44629b3..87e50317d40f97a791cf349324370cbf6f739c3e 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py @@ -48,24 +48,24 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): return metricScoreString, [metricScoreTrain, metricScoreTest] -def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, +def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics_list, nIter, feat, CL_type, clKWARGS, classLabelsNames, shape, y_train, y_train_pred, y_test, y_test_pred, time, randomState, classifier, directory): metricsScores = {} - metricModule = getattr(metrics, metrics[0][0]) + metricModule = getattr(metrics, metrics_list[0][0]) trainScore = metricModule.score(y_train, y_train_pred) testScore = metricModule.score(y_test, y_test_pred) stringAnalysis = "Classification on " + name + " database for " + feat + " with " + CL_type + ".\n\n" - stringAnalysis += metrics[0][0] + " on train : " + str(trainScore) + "\n" + \ - metrics[0][0] + " on test : " + str( + stringAnalysis += metrics_list[0][0] + " on train : " + str(trainScore) + "\n" + \ + metrics_list[0][0] + " on test : " + str( testScore) + "\n\n" stringAnalysis += getDBConfigString(name, feat, learningRate, shape, classLabelsNames, KFolds) classifierConfigString, classifierIntepretString = getClassifierConfigString( gridSearch, nbCores, nIter, clKWARGS, classifier, directory, y_test) stringAnalysis += classifierConfigString - for metric in metrics: + for metric in metrics_list: metricString, metricScore = getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 1870e9b74a855c6e89fe6a5170d06b0bc5c2d3c7..5745e67a992696783362632e4c868be410cdcd2b 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -17,6 +17,7 @@ from .analyze_result import execute # Import own modules from .. import monoview_classifiers from ..utils.dataset import getValue, extractSubset +from ..utils import hyper_parameter_search # Author-Info __author__ = "Nikolas Huelsmann, Baptiste BAUVIN" @@ -76,16 +77,17 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, logging.debug("Done:\t Determine Train/Test split") logging.debug("Start:\t Generate classifier args") - classifierModuleName = CL_type.split("_")[0] - classifierModule = getattr(monoview_classifiers, classifierModuleName) + classifierModule = getattr(monoview_classifiers, CL_type) + classifier_class_name = classifierModule.classifier_class_name clKWARGS, testFoldsPreds = getHPs(classifierModule, hyperParamSearch, - nIter, CL_type, X_train, y_train, + nIter, CL_type, classifier_class_name, + X_train, y_train, randomState, outputFileName, KFolds, nbCores, metrics, kwargs) logging.debug("Done:\t Generate classifier args") logging.debug("Start:\t Training") - classifier = getattr(classifierModule, classifierModuleName)(randomState, **clKWARGS) + classifier = getattr(classifierModule, classifier_class_name)(randomState, **clKWARGS) classifier.fit(X_train, y_train) # NB_CORES=nbCores, logging.debug("Done:\t Training") @@ -124,7 +126,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, y_train, imagesAnalysis, y_test) logging.info("Done:\t Saving results") - viewIndex = args["viewIndex"] + viewIndex = args["view_index"] if testFoldsPreds is None: testFoldsPreds = y_train_pred return monoview_utils.MonoviewResult(viewIndex, CL_type, feat, metricsScores, @@ -143,7 +145,7 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): feat = X.attrs["name"].decode("utf-8") else: feat = X.attrs["name"] - CL_type = kwargs["CL_type"] + CL_type = kwargs["classifier_name"] X = getValue(X) learningRate = float(len(classificationIndices[0])) / ( len(classificationIndices[0]) + len(classificationIndices[1])) @@ -175,27 +177,29 @@ def initTrainTest(X, Y, classificationIndices): return X_train, y_train, X_test, y_test, X_test_multiclass -def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, +def getHPs(classifierModule, hyperParamSearch, nIter, classifier_module_name, + classifier_class_name, X_train, y_train, randomState, outputFileName, KFolds, nbCores, metrics, kwargs): if hyperParamSearch != "None": logging.debug( "Start:\t " + hyperParamSearch + " best settings with " + str( - nIter) + " iterations for " + CL_type) - classifierHPSearch = getattr(monoview_utils, hyperParamSearch) - clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, + nIter) + " iterations for " + classifier_module_name) + classifierHPSearch = getattr(hyper_parameter_search, hyperParamSearch) + clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, "monoview", randomState, outputFileName, - classifierModule, CL_type, - KFolds=KFolds, - nbCores=nbCores, + classifierModule, + classifier_class_name, + folds=KFolds, + nb_cores=nbCores, metric=metrics[0], - nIter=nIter, - classifier_KWARGS=kwargs[ - CL_type + "KWARGS"]) + n_iter=nIter, + classifier_kwargs=kwargs[ + classifier_module_name]) logging.debug("Done:\t " + hyperParamSearch + " best settings") else: - clKWARGS = kwargs[CL_type + "KWARGS"] + clKWARGS = kwargs[classifier_module_name + "KWARGS"] testFoldsPreds = None return clKWARGS, testFoldsPreds diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index 4f3500d77a90381a74f759687a029ca9166f6199..d152c7d6ae0146ca829ae2b1b3d37c90f621a392 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -17,50 +17,6 @@ __status__ = "Prototype" # Production, Development, Prototype # __date__ = 2016 - 03 - 25 - -def randomizedSearch(X_train, y_train, randomState, outputFileName, - classifierModule, CL_type, KFolds=4, nbCores=1, - metric=["accuracy_score", None], nIter=30, - classifier_KWARGS=None): - estimator = getattr(classifierModule, CL_type)(randomState, - **classifier_KWARGS) - params_dict = estimator.genDistribs() - if params_dict: - metricModule = getattr(metrics, metric[0]) - if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in - enumerate(metric[1])) - else: - metricKWARGS = {} - scorer = metricModule.get_scorer(**metricKWARGS) - nb_possible_combinations = compute_possible_combinations(params_dict) - min_list = np.array( - [min(nb_possible_combination, nIter) for nb_possible_combination in - nb_possible_combinations]) - randomSearch = RandomizedSearchCV(estimator, - n_iter=int(np.sum(min_list)), - param_distributions=params_dict, - refit=True, - n_jobs=nbCores, scoring=scorer, - cv=KFolds, random_state=randomState) - detector = randomSearch.fit(X_train, y_train) - - bestParams = dict((key, value) for key, value in - estimator.genBestParams(detector).items() if - key is not "random_state") - - scoresArray = detector.cv_results_['mean_test_score'] - params = estimator.genParamsFromDetector(detector) - - hyper_parameter_search.genHeatMaps(params, scoresArray, outputFileName) - best_estimator = detector.best_estimator_ - else: - best_estimator = estimator - bestParams = {} - testFoldsPreds = genTestFoldsPreds(X_train, y_train, KFolds, best_estimator) - return bestParams, testFoldsPreds - - def change_label_to_minus(y): minus_y = np.copy(y) minus_y[np.where(y == 0)] = -1 @@ -103,7 +59,7 @@ class CustomRandint: It can be used with a multiplier agrument to be able to perform more complex generation for example 10 e -(randint)""" - def __init__(self, low=0, high=0, multiplier=""): + def __init__(self,low=0, high=0, multiplier=""): self.randint = randint(low, high) self.multiplier = multiplier @@ -160,11 +116,14 @@ class BaseMonoviewClassifier(BaseEstimator, ClassifierMixin): return dict((param_name, distrib) for param_name, distrib in zip(self.param_names, self.distribs)) - def getConfig(self): - if self.param_names: - return "\n\t\t- " + self.__class__.__name__ + "with " + ", ".join( + def params_to_string(self): + return ", ".join( [param_name + " : " + self.to_str(param_name) for param_name in self.param_names]) + + def getConfig(self): + if self.param_names: + return "\n\t\t- " + self.__class__.__name__ + "with " + self.params_to_string() else: return "\n\t\t- " + self.__class__.__name__ + "with no config." diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py index b8deb248f31d907887c72e5b6c1eb2ca531cec5b..3b2952e4481e0ca2bf1a4510751c742f7ef2699e 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py @@ -12,6 +12,8 @@ from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "Adaboost" + class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): """ @@ -46,10 +48,6 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): [estim.predict(X) for estim in self.estimators_]) self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)]) - self.bounds = np.array([np.prod( - np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) - for i in - range(self.estimator_errors_.shape[0])]) def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -79,9 +77,6 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): get_accuracy_graph(step_test_metrics, "Adaboost", directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "Adaboost", directory + "metrics.png", - self.plotted_metric_name, bounds=list(self.bounds), - bound_name="boosting bound") np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graal.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py similarity index 99% rename from multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graal.py rename to multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py index 03618cad734d057633be43d714864f8f54cfb9ab..3ffd5e232e665e03baadcba0c67b361c379a07b3 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graal.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py @@ -10,6 +10,7 @@ from ..monoview.additions.BoostUtils import StumpsClassifiersGenerator, \ from ..monoview.monoview_utils import CustomRandint, \ BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero +classifier_class_name = "AdaboostGraalpy" class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): """Scikit-Learn compatible AdaBoost classifier. Original code by Pascal Germain, adapted by Jean-Francis Roy. diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen.py index ba960986d94872a8899f04e03ecf215f1fd6b9c4..511a5320da3e857974c946412a9c39c2458eb4ac 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen.py @@ -15,11 +15,14 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "AdaboostPregen" + class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, PregenClassifier): def __init__(self, random_state=None, n_estimators=50, - base_estimator=None, n_stumps=1, self_complemeted=True, + base_estimator=None, n_stumps=1, estimators_generator="Stumps", + max_depth=1, self_complemeted=True, **kwargs): super(AdaboostPregen, self).__init__( random_state=random_state, @@ -28,16 +31,19 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, algorithm="SAMME" ) self.param_names = ["n_estimators", "base_estimator", "n_stumps", + "estimators_generator", "max_depth", "random_state"] self.classed_params = ["base_estimator"] self.distribs = [CustomRandint(low=1, high=500), [DecisionTreeClassifier(max_depth=1)], [n_stumps], + ["Stumps", "Tree"], CustomRandint(low=1, high=5), [random_state]] self.weird_strings = {"base_estimator": "class_name"} self.plotted_metric = metrics.zero_one_loss self.plotted_metric_name = "zero_one_loss" self.step_predictions = None - self.estimators_generator = "Stumps" + self.estimators_generator = estimators_generator + self.max_depth = max_depth self.n_stumps = n_stumps self.self_complemented = self_complemeted @@ -97,10 +103,10 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, get_accuracy_graph(step_test_metrics, "AdaboostPregen", directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "AdaboostPregen", - directory + "metrics.png", self.plotted_metric_name, - bounds=list(self.bounds), - bound_name="boosting bound") + # get_accuracy_graph(self.metrics, "AdaboostPregen", + # directory + "metrics.png", self.plotted_metric_name, + # bounds=list(self.bounds), + # bound_name="boosting bound") np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') @@ -110,19 +116,6 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, np.array([self.train_time, len(self.estimator_weights_)]), delimiter=',') return interpretString - # def pregen_voters(self, X, y=None): - # if y is not None: - # neg_y = change_label_to_minus(y) - # if self.estimators_generator is None: - # self.estimators_generator = StumpsClassifiersGenerator( - # n_stumps_per_attribute=self.n_stumps, - # self_complemented=self.self_complemented) - # self.estimators_generator.fit(X, neg_y) - # else: - # neg_y=None - # classification_matrix = self._binary_classification_matrix(X) - - # def formatCmdArgs(args): # """Used to format kwargs for the parsed args""" # kwargsDict = {'n_estimators': args.AdP_n_est, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen10.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen10.py deleted file mode 100644 index e7d0e21f78b3b7a257c1e87b5b4690de8c0c8dc9..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen10.py +++ /dev/null @@ -1,38 +0,0 @@ -from sklearn.tree import DecisionTreeClassifier - -from .adaboost_pregen import AdaboostPregen - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -class AdaboostPregen10(AdaboostPregen): - - def __init__(self, random_state=None, n_estimators=50, - base_estimator=None, n_stumps=1, self_complemeted=True, - **kwargs): - super(AdaboostPregen10, self).__init__( - random_state=random_state, - n_estimators=100, - base_estimator=base_estimator, - n_stumps=10, - self_complemeted=self_complemeted - ) - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {'n_estimators': args.AdP_n_est, -# 'base_estimator': [DecisionTreeClassifier(max_depth=1)], -# } -# return kwargsDict - - -def paramsToSet(nIter, random_state): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"n_estimators": random_state.randint(1, 500), - "base_estimator": None}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen_tree.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen_tree.py deleted file mode 100644 index 8276f764477d299d7566903358768a2150410035..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_pregen_tree.py +++ /dev/null @@ -1,123 +0,0 @@ -import time - -import numpy as np -from sklearn.ensemble import AdaBoostClassifier -from sklearn.tree import DecisionTreeClassifier - -from .. import metrics -from ..monoview.additions.BoostUtils import get_accuracy_graph -from ..monoview.additions.PregenUtils import PregenClassifier -from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier, \ - change_label_to_zero - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -class AdaboostPregenTree(AdaBoostClassifier, BaseMonoviewClassifier, - PregenClassifier): - - def __init__(self, random_state=None, n_estimators=50, - base_estimator=None, n_stumps=1, self_complemeted=True, - max_depth=2, **kwargs): - super(AdaboostPregenTree, self).__init__( - random_state=random_state, - n_estimators=n_estimators, - base_estimator=base_estimator, - algorithm="SAMME" - ) - self.param_names = ["n_estimators", "base_estimator", "n_stumps", - "random_state", "max_depth"] - self.classed_params = ["base_estimator"] - self.distribs = [CustomRandint(low=1, high=500), - [DecisionTreeClassifier(max_depth=1)], [n_stumps], - [random_state], [max_depth]] - self.weird_strings = {"base_estimator": "class_name"} - self.plotted_metric = metrics.zero_one_loss - self.plotted_metric_name = "zero_one_loss" - self.step_predictions = None - self.estimators_generator = "Trees" - self.n_stumps = n_stumps - self.max_depth = max_depth - self.self_complemented = self_complemeted - self.random_state = random_state - - def fit(self, X, y, sample_weight=None): - pregen_X, pregen_y = self.pregen_voters(X, y, - generator=self.estimators_generator) - begin = time.time() - super(AdaboostPregenTree, self).fit(pregen_X, pregen_y, - sample_weight=sample_weight) - end = time.time() - self.train_time = end - begin - self.train_shape = pregen_X.shape - self.base_predictions = np.array( - [change_label_to_zero(estim.predict(pregen_X)) for estim in - self.estimators_]) - self.metrics = np.array( - [self.plotted_metric.score(change_label_to_zero(pred), y) for pred - in self.staged_predict(pregen_X)]) - self.bounds = np.array([np.prod( - np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) - for i in - range(self.estimator_errors_.shape[0])]) - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def predict(self, X): - begin = time.time() - pregen_X, _ = self.pregen_voters(X) - pred = super(AdaboostPregenTree, self).predict(pregen_X) - end = time.time() - self.pred_time = end - begin - if pregen_X.shape != self.train_shape: - self.step_predictions = np.array( - [change_label_to_zero(step_pred) for step_pred in - self.staged_predict(pregen_X)]) - return change_label_to_zero(pred) - - def getInterpret(self, directory, y_test): - interpretString = "" - interpretString += self.getFeatureImportance(directory) - interpretString += "\n\n Estimator error | Estimator weight\n" - interpretString += "\n".join( - [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for - error, weight in - zip(self.estimator_errors_, self.estimator_weights_)]) - step_test_metrics = np.array( - [self.plotted_metric.score(y_test, step_pred) for step_pred in - self.step_predictions]) - get_accuracy_graph(step_test_metrics, "AdaboostPregen", - directory + "test_metrics.png", - self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "AdaboostPregen", - directory + "metrics.png", self.plotted_metric_name, - bounds=list(self.bounds), - bound_name="boosting bound") - np.savetxt(directory + "test_metrics.csv", step_test_metrics, - delimiter=',') - np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') - np.savetxt(directory + "times.csv", - np.array([self.train_time, self.pred_time]), delimiter=',') - return interpretString - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {'n_estimators': args.AdPT_n_est, -# 'base_estimator': [DecisionTreeClassifier(max_depth=1)], -# 'n_stumps': args.AdPT_trees, -# "max_depth": args.AdPT_max_depth} -# return kwargsDict - - -def paramsToSet(nIter, random_state): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"n_estimators": random_state.randint(1, 500), - "base_estimator": None}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/c_greed.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/c_greed.py deleted file mode 100644 index 823776895c48af7a6b3ac46e5b698c8fc96a2598..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/c_greed.py +++ /dev/null @@ -1,50 +0,0 @@ -from ..monoview.additions.CGDescUtils import ColumnGenerationClassifierQar -from ..monoview.monoview_utils import BaseMonoviewClassifier, CustomRandint - - -class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - - def __init__(self, random_state=None, n_max_iterations=500, n_stumps=10, - **kwargs): - super(CGreed, self).__init__(n_max_iterations=n_max_iterations, - random_state=random_state, - self_complemented=True, - twice_the_same=False, - c_bound_choice=True, - random_start=False, - n_stumps=n_stumps, - use_r=True, - c_bound_sol=True, - estimators_generator="Stumps" - ) - - self.param_names = ["n_max_iterations", "n_stumps", "random_state"] - self.distribs = [CustomRandint(low=2, high=1000), [n_stumps], - [random_state]] - self.classed_params = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return self.getInterpretQar(directory, y_test) - - def get_name_for_fusion(self): - return "CGr" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"n_stumps": args.CGR_stumps, -# "n_max_iterations": args.CGR_n_iter} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cb_boost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cb_boost.py index df7329ac376ec440bc9507bb3a4e710e4e6f44b0..c9340c4d2e8ef1cece11d30f249ad2b789b28af7 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cb_boost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cb_boost.py @@ -2,6 +2,8 @@ from ..monoview.additions.CBBoostUtils import CBBoostClassifier from ..monoview.monoview_utils import BaseMonoviewClassifier, CustomRandint +classifier_class_name = "CBBoost" + class CBBoost(CBBoostClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc.py index 66f4d287aba2821be2d597fc183ac6e7d9e2d351..7a881285807f714f5b5b493ccc0af073f7aeefb6 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc.py @@ -2,25 +2,31 @@ from ..monoview.additions.CGDescUtils import ColumnGenerationClassifierQar from ..monoview.monoview_utils import BaseMonoviewClassifier, CustomRandint +classifier_class_name = "CGDesc" + class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, + estimators_generator="Stumps", twice_the_same=True, max_depth=1, **kwargs): super(CGDesc, self).__init__(n_max_iterations=n_max_iterations, random_state=random_state, self_complemented=True, - twice_the_same=True, + twice_the_same=twice_the_same, c_bound_choice=True, random_start=False, n_stumps=n_stumps, use_r=False, c_bound_sol=True, - estimators_generator="Stumps", + estimators_generator=estimators_generator, + max_depth=max_depth, mincq_tracking=False, ) - self.param_names = ["n_max_iterations", "n_stumps", "random_state"] + self.param_names = ["n_max_iterations", "n_stumps", + "estimators_generator", "max_depth", "random_state", "twice_the_same"] self.distribs = [CustomRandint(low=2, high=500), [n_stumps], - [random_state]] + ["Stumps", "Trees"], CustomRandint(low=1, high=5), + [random_state], [True, False]] self.classed_params = [] self.weird_strings = {} diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc10.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc10.py deleted file mode 100644 index f29d5dd39cce3cd2683dd7440c2f005403754387..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc10.py +++ /dev/null @@ -1,25 +0,0 @@ -from .cg_desc import CGDesc - - -class CGDesc10(CGDesc): - - def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, - **kwargs): - super(CGDesc10, self).__init__(n_max_iterations=100, - random_state=random_state, - n_stumps=10, ) - - -# def formatCmdArgs(args): - # """Used to format kwargs for the parsed args""" - # kwargsDict = {"n_stumps": args.CGD_stumps, - # "n_max_iterations": args.CGD_n_iter} - # return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc_tree.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc_tree.py deleted file mode 100644 index 3694f79d9a47bec0580074a45ed756f34c40f678..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cg_desc_tree.py +++ /dev/null @@ -1,52 +0,0 @@ -from ..monoview.additions.CGDescUtils import ColumnGenerationClassifierQar -from ..monoview.monoview_utils import BaseMonoviewClassifier, CustomRandint - - -class CGDescTree(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - - def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, - max_depth=2, **kwargs): - super(CGDescTree, self).__init__(n_max_iterations=n_max_iterations, - random_state=random_state, - self_complemented=True, - twice_the_same=True, - c_bound_choice=True, - random_start=False, - n_stumps=n_stumps, - use_r=True, - c_bound_sol=True, - estimators_generator="Trees" - ) - self.max_depth = max_depth - self.param_names = ["n_max_iterations", "n_stumps", "random_state", - "max_depth"] - self.distribs = [CustomRandint(low=2, high=1000), [n_stumps], - [random_state], [max_depth]] - self.classed_params = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return self.getInterpretQar(directory, y_test) - - def get_name_for_fusion(self): - return "CGDT" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"n_stumps": args.CGDT_trees, -# "n_max_iterations": args.CGDT_n_iter, -# "max_depth": args.CGDT_max_depth} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boost.py index 057df43e6c40f0755eeefeb75a3e7f7778240e1a..7984a428a16b77ef1a293f4812cd3085a2156bad 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boost.py @@ -5,23 +5,28 @@ from ..monoview.additions.CQBoostUtils import ColumnGenerationClassifier from ..monoview.monoview_utils import CustomUniform, CustomRandint, \ BaseMonoviewClassifier +classifier_class_name = "CQBoost" class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, - n_max_iterations=None, **kwargs): + n_max_iterations=None, estimators_generator="Stumps", + max_depth=1, **kwargs): super(CQBoost, self).__init__( random_state=random_state, mu=mu, epsilon=epsilon, - estimators_generator="Stumps", - n_max_iterations=n_max_iterations + estimators_generator=estimators_generator, + n_max_iterations=n_max_iterations, + max_depth=max_depth ) self.param_names = ["mu", "epsilon", "n_stumps", "random_state", - "n_max_iterations"] + "n_max_iterations", "estimators_generator", + "max_depth"] self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), CustomRandint(low=1, high=15, multiplier="e-"), - [n_stumps], [random_state], [n_max_iterations]] + [n_stumps], [random_state], [n_max_iterations], + ["Stumps", "Trees"], CustomRandint(low=1, high=5)] self.classed_params = [] self.weird_strings = {} self.n_stumps = n_stumps diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boosttree.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boosttree.py deleted file mode 100644 index b999867ae0acb61eccf2515603527dfbdddc5a13..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boosttree.py +++ /dev/null @@ -1,74 +0,0 @@ -import numpy as np - -from ..monoview.additions.BoostUtils import getInterpretBase -from ..monoview.additions.CQBoostUtils import ColumnGenerationClassifier -from ..monoview.monoview_utils import CustomUniform, CustomRandint, \ - BaseMonoviewClassifier - - -class CQBoostTree(ColumnGenerationClassifier, BaseMonoviewClassifier): - - def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, - max_depth=2, n_max_iterations=100, **kwargs): - super(CQBoostTree, self).__init__( - random_state=random_state, - mu=mu, - epsilon=epsilon, - estimators_generator="Trees", - n_max_iterations=n_max_iterations - ) - self.param_names = ["mu", "epsilon", "n_stumps", "random_state", - "max_depth", "n_max_iterations"] - self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), - CustomRandint(low=1, high=15, multiplier="e-"), - [n_stumps], [random_state], [max_depth], - [n_max_iterations]] - self.classed_params = [] - self.weird_strings = {} - self.n_stumps = n_stumps - self.max_depth = max_depth - if "nbCores" not in kwargs: - self.nbCores = 1 - else: - self.nbCores = kwargs["nbCores"] - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - np.savetxt(directory + "train_metrics.csv", self.train_metrics, - delimiter=',') - np.savetxt(directory + "c_bounds.csv", self.c_bounds, - delimiter=',') - np.savetxt(directory + "y_test_step.csv", self.step_decisions, - delimiter=',') - step_metrics = [] - for step_index in range(self.step_decisions.shape[1] - 1): - step_metrics.append(self.plotted_metric.score(y_test, - self.step_decisions[:, - step_index])) - step_metrics = np.array(step_metrics) - np.savetxt(directory + "step_test_metrics.csv", step_metrics, - delimiter=',') - return getInterpretBase(self, directory, "CQBoost", self.weights_, - y_test) - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"mu": args.CQBT_mu, -# "epsilon": args.CQBT_epsilon, -# "n_stumps": args.CQBT_trees, -# "max_depth": args.CQBT_max_depth, -# "n_max_iterations": args.CQBT_n_iter} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"mu": 10 ** -randomState.uniform(0.5, 1.5), - "epsilon": 10 ** -randomState.randint(1, 15)}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boostv2.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boostv2.py deleted file mode 100644 index 0ac2d774114ff8c90d1f61a9822126f3877cf64b..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boostv2.py +++ /dev/null @@ -1,233 +0,0 @@ -import numpy as np - -from ..monoview.additions.BoostUtils import getInterpretBase -from ..monoview.additions.CQBoostUtils import ColumnGenerationClassifier -from ..monoview.monoview_utils import CustomRandint, CustomUniform, \ - BaseMonoviewClassifier - - -class ColumnGenerationClassifierv2(ColumnGenerationClassifier): - - def __init__(self, mu=0.01, epsilon=1e-06, random_state=None): - super(ColumnGenerationClassifierv2, self).__init__(mu=mu, - epsilon=epsilon, - random_state=random_state) - - def initialize(self): - self.weights_ = [] - self.edge_scores = [] - self.alphas = [] - - def update_values(self, h_values=None, worst_h_index=None, alpha=None, - w=None): - self.edge_scores.append(h_values[worst_h_index]) - self.alphas.append(alpha) - self.weights_.append(w[-1]) - - def get_margins(self, w=None): - self.weights = np.array(self.weights_) - self.final_vote_weights = np.array( - [np.prod(1 - self.weights[t + 1:]) * self.weights_[t] if t < - self.weights.shape[ - 0] - 1 else - self.weights[t] for t in range(self.weights.shape[0])]) - margins = np.squeeze(np.asarray( - np.matmul(self.classification_matrix[:, self.chosen_columns_], - self.final_vote_weights))) - return margins - - def compute_weights_(self, w=None): - self.weights_ = np.array(self.weights_) - self.final_vote_weights = np.array( - [np.prod(1 - self.weights_[t + 1:]) * self.weights_[t] if t < - self.weights_.shape[ - 0] - 1 else - self.weights_[t] for t in range(self.weights_.shape[0])]) - self.weights_ = self.final_vote_weights - - def get_matrix_to_optimize(self, y_kernel_matrix, w=None): - m = self.n_total_examples - if w is not None: - matrix_to_optimize = np.concatenate( - (np.matmul(self.matrix_to_optimize, w).reshape((m, 1)), - y_kernel_matrix[:, self.chosen_columns_[-1]].reshape((m, 1))), - axis=1) - else: - matrix_to_optimize = y_kernel_matrix[:, - self.chosen_columns_[-1]].reshape((m, 1)) - return matrix_to_optimize - - -class CQBoostv2(ColumnGenerationClassifierv2, BaseMonoviewClassifier): - - def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, **kwargs): - super(CQBoostv2, self).__init__( - random_state=random_state, - mu=mu, - epsilon=epsilon - ) - self.param_names = ["mu", "epsilon"] - self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), - CustomRandint(low=1, high=15, multiplier="e-")] - self.classed_params = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return getInterpretBase(self, directory, "CQBoostv2", self.weights_, ) - - def get_name_for_fusion(self): - return "CQB2" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"mu": args.CQB_mu, -# "epsilon": args.CQB_epsilon} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"mu": 10 ** -randomState.uniform(0.5, 1.5), - "epsilon": 10 ** -randomState.randint(1, 15)}) - return paramsSet - -# class CQBoostv2(CqBoostClassifierv2): -# -# def __init__(self, random_state, **kwargs): -# super(CQBoostv2, self).__init__( -# mu=kwargs['mu'], -# epsilon=kwargs['epsilon'], -# n_max_iterations= kwargs['n_max_iterations'], -# ) -# -# def canProbas(self): -# """Used to know if the classifier can return label probabilities""" -# return False -# -# def paramsToSrt(self, nIter=1): -# """Used for weighted linear early fusion to generate random search sets""" -# paramsSet = [] -# for _ in range(nIter): -# paramsSet.append({"mu": 0.001, -# "epsilon": 1e-08, -# "n_max_iterations": None}) -# return paramsSet -# -# def getKWARGS(self, args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {} -# kwargsDict['mu'] = 0.001 -# kwargsDict['epsilon'] = 1e-08 -# kwargsDict['n_max_iterations'] = None -# return kwargsDict -# -# def genPipeline(self): -# return Pipeline([('classifier', CqBoostClassifierv2())]) -# -# def genParamsDict(self, randomState): -# return {"classifier__mu": [0.001], -# "classifier__epsilon": [1e-08], -# "classifier__n_max_iterations": [None]} -# -# def genBestParams(self, detector): -# return {"mu": detector.best_params_["classifier__mu"], -# "epsilon": detector.best_params_["classifier__epsilon"], -# "n_max_iterations": detector.best_params_["classifier__n_max_iterations"]} -# -# def genParamsFromDetector(self, detector): -# nIter = len(detector.cv_results_['param_classifier__mu']) -# return [("mu", np.array([0.001 for _ in range(nIter)])), -# ("epsilon", np.array(detector.cv_results_['param_classifier__epsilon'])), -# ("n_max_iterations", np.array(detector.cv_results_['param_classifier__n_max_iterations']))] -# -# def getConfig(self, config): -# if type(config) is not dict: # Used in late fusion when config is a classifier -# return "\n\t\t- CQBoost with mu : " + str(config.mu) + ", epsilon : " + str( -# config.epsilon + ", n_max_iterations : " + str(config.n_max_iterations)) -# else: -# return "\n\t\t- CQBoost with mu : " + str(config["mu"]) + ", epsilon : " + str( -# config["epsilon"] + ", n_max_iterations : " + str(config["n_max_iterations"])) -# -# -# def getInterpret(self, classifier, directory): -# interpretString = "" -# return interpretString -# -# -# def canProbas(): -# return False -# -# -# def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): -# """Used to fit the monoview classifier with the args stored in kwargs""" -# start = time.time() -# classifier = CqBoostClassifierv2(mu=kwargs['mu'], -# epsilon=kwargs['epsilon'], -# n_max_iterations=kwargs["n_max_iterations"],) -# # random_state=randomState) -# classifier.fit(DATASET, CLASS_LABELS) -# end = time.time() -# classifier.train_time =end-start -# return classifier -# -# -# def paramsToSet(nIter, randomState): -# """Used for weighted linear early fusion to generate random search sets""" -# paramsSet = [] -# for _ in range(nIter): -# paramsSet.append({"mu": randomState.uniform(1e-02, 10**(-0.5)), -# "epsilon": 10**-randomState.randint(1, 15), -# "n_max_iterations": None}) -# return paramsSet -# -# -# def getKWARGS(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {} -# kwargsDict['mu'] = args.CQB2_mu -# kwargsDict['epsilon'] = args.CQB2_epsilon -# kwargsDict['n_max_iterations'] = None -# return kwargsDict -# -# -# def genPipeline(): -# return Pipeline([('classifier', CqBoostClassifierv2())]) -# -# -# def genParamsDict(randomState): -# return {"classifier__mu": CustomUniform(loc=.5, state=2, multiplier='e-'), -# "classifier__epsilon": CustomRandint(low=1, high=15, multiplier='e-'), -# "classifier__n_max_iterations": [None]} -# -# -# def genBestParams(detector): -# return {"mu": detector.best_params_["classifier__mu"], -# "epsilon": detector.best_params_["classifier__epsilon"], -# "n_max_iterations": detector.best_params_["classifier__n_max_iterations"]} -# -# -# def genParamsFromDetector(detector): -# nIter = len(detector.cv_results_['param_classifier__mu']) -# return [("mu", np.array([0.001 for _ in range(nIter)])), -# ("epsilon", np.array(detector.cv_results_['param_classifier__epsilon'])), -# ("n_max_iterations", np.array(detector.cv_results_['param_classifier__n_max_iterations']))] -# -# -# def getConfig(config): -# if type(config) is not dict: # Used in late fusion when config is a classifier -# return "\n\t\t- CQBoostv2 with mu : " + str(config.mu) + ", epsilon : " + str( -# config.epsilon) + ", n_max_iterations : " + str(config.n_max_iterations) -# else: -# return "\n\t\t- CQBoostv2 with mu : " + str(config["mu"]) + ", epsilon : " + str( -# config["epsilon"]) + ", n_max_iterations : " + str(config["n_max_iterations"]) -# -# -# def getInterpret(classifier, directory): -# return getInterpretBase(classifier, directory, "CQBoostv2", classifier.final_vote_weights) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boostv21.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boostv21.py deleted file mode 100644 index 45ae008f03476c0d3342742938e12071cd0ae754..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boostv21.py +++ /dev/null @@ -1,327 +0,0 @@ -import logging -import time -from collections import defaultdict - -import numpy as np -import numpy.ma as ma -import scipy -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.metrics import accuracy_score -from sklearn.utils.validation import check_is_fitted - -from ..monoview.additions.BoostUtils import StumpsClassifiersGenerator, sign, \ - getInterpretBase, BaseBoost -from ..monoview.monoview_utils import CustomUniform, CustomRandint, \ - BaseMonoviewClassifier - - -class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): - def __init__(self, epsilon=1e-06, n_max_iterations=None, - estimators_generator=None, dual_constraint_rhs=0, - save_iteration_as_hyperparameter_each=None, random_state=42): - super(ColumnGenerationClassifierv21, self).__init__() - self.epsilon = epsilon - self.n_max_iterations = n_max_iterations - self.estimators_generator = estimators_generator - self.dual_constraint_rhs = dual_constraint_rhs - self.save_iteration_as_hyperparameter_each = save_iteration_as_hyperparameter_each - self.random_state = random_state - - def fit(self, X, y): - if scipy.sparse.issparse(X): - logging.info('Converting to dense matrix.') - X = np.array(X.todense()) - - if self.estimators_generator is None: - self.estimators_generator = StumpsClassifiersGenerator( - n_stumps_per_attribute=self.n_stumps, self_complemented=True) - - y[y == 0] = -1 - - self.estimators_generator.fit(X, y) - self.classification_matrix = self._binary_classification_matrix(X) - - self.weights_ = [] - self.infos_per_iteration_ = defaultdict(list) - - m, n = self.classification_matrix.shape - y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), - self.classification_matrix) - - # Initialization - - w = None - self.collected_weight_vectors_ = {} - self.collected_dual_constraint_violations_ = {} - - example_weights = self._initialize_alphas(m).reshape((m, 1)) - - self.chosen_columns_ = [] - self.fobidden_columns = [] - self.edge_scores = [] - self.example_weights_ = [example_weights] - self.train_accuracies = [] - self.previous_votes = [] - - self.n_total_hypotheses_ = n - self.n_total_examples = m - # print("\n \t\t Start fit\n") - for k in range(min(n, - self.n_max_iterations if self.n_max_iterations is not None else np.inf)): - # Find worst weak hypothesis given alpha. - new_voter_index, criterion = self._find_new_voter(example_weights, - y_kernel_matrix, - "pseudo_h") - - # Append the weak hypothesis. - self.chosen_columns_.append(new_voter_index) - self.fobidden_columns.append(new_voter_index) - new_voter_margin = y_kernel_matrix[:, - self.chosen_columns_[-1]].reshape((m, 1)) - self.edge_scores.append(criterion) - - if w is None: - self.previous_vote = new_voter_margin - w = 1 - self.weights_.append(w) - example_weights = self._update_example_weights(example_weights, - y_kernel_matrix, - m) - self.example_weights_.append(example_weights) - self.train_accuracies.append( - accuracy_score(y, np.sign(self.previous_vote))) - continue - - # ---- On resoud le probleme a deux votants analytiquement. - w = self._solve_two_weights_min_c(new_voter_margin, example_weights) - if w[0] == "break": - self.chosen_columns_.pop() - self.break_cause = w[1] - break - self.previous_vote = np.matmul( - np.concatenate((self.previous_vote, new_voter_margin), axis=1), - w).reshape((m, 1)) - - # We collect iteration information for later evaluation. - self.weights_.append(w[-1]) - - self.weights = np.array(self.weights_) - self.final_vote_weights = np.array( - [np.prod(1 - self.weights[t + 1:]) * self.weights[t] if t < - self.weights.shape[ - 0] - 1 else - self.weights[t] for t in range(self.weights.shape[0])]) - margins = np.squeeze(np.asarray( - np.matmul(self.classification_matrix[:, self.chosen_columns_], - self.final_vote_weights))) - signs_array = np.array([int(x) for x in sign(margins)]) - self.train_accuracies.append(accuracy_score(y, signs_array)) - - # ---- On change l'edge - example_weights = self._update_example_weights(example_weights, - y_kernel_matrix, m) - self.example_weights_.append(example_weights) - - self.nb_opposed_voters = self.check_opposed_voters() - self.estimators_generator.estimators_ = \ - self.estimators_generator.estimators_[self.chosen_columns_] - - y[y == -1] = 0 - - return self - - def predict(self, X): - start = time.time() - check_is_fitted(self, 'weights_') - - if scipy.sparse.issparse(X): - logging.warning('Converting sparse matrix to dense matrix.') - X = np.array(X.todense()) - classification_matrix = self._binary_classification_matrix(X) - self.weights_ = np.array(self.weights_) - self.final_vote_weights = np.array([np.prod(1 - self.weights_[t + 1:]) * - self.weights_[t] if t < - self.weights_.shape[ - 0] - 1 else - self.weights_[t] for t in - range(self.weights_.shape[0])]) - margins = np.squeeze(np.asarray( - np.matmul(classification_matrix, self.final_vote_weights))) - signs_array = np.array([int(x) for x in sign(margins)]) - signs_array[signs_array == -1] = 0 - end = time.time() - self.predict_time = end - start - return signs_array - - def _find_new_voter(self, example_weights, y_kernel_matrix, - type="pseudo_h"): - if type == "pseudo_h": - pseudo_h_values = ma.array( - np.squeeze(np.array(example_weights.T.dot(y_kernel_matrix).T)), - fill_value=-np.inf) - pseudo_h_values[self.fobidden_columns] = ma.masked - worst_h_index = ma.argmax(pseudo_h_values) - return worst_h_index, pseudo_h_values[worst_h_index] - elif type == "random": - new_index = self.random_state.choice( - np.arange(self.n_total_hypotheses_)) - while new_index in self.fobidden_columns: - new_index = self.random_state.choice( - np.arange(self.n_total_hypotheses_)) - return new_index, 100 - - def _update_example_weights(self, example_weights, y_kernel_matrix, m): - if len(self.weights_) == 1: - example_weights[self.previous_vote == -1] *= 2 - example_weights[self.previous_vote == 1] /= 2 - pass - else: - weights = np.array(self.weights_) - current_vote_weights = np.array( - [np.prod(1 - weights[t + 1:]) * weights[t] if t < - weights.shape[ - 0] - 1 else - weights[t] for t in range(weights.shape[0])]).reshape( - (weights.shape[0], 1)) - weighted_margin = np.matmul( - y_kernel_matrix[:, self.chosen_columns_], current_vote_weights) - example_weights = np.multiply(example_weights, - np.exp((1 - np.sum(weighted_margin, - axis=1) / - np.sum(weighted_margin, - axis=1))).reshape( - (m, 1))) - return example_weights - - def _solve_two_weights_min_c(self, next_column, example_weights): - m = next_column.shape[0] - zero_diag = np.ones((m, m)) - np.identity(m) - - weighted_previous_vote = self.previous_vote.reshape((m, 1)) - weighted_next_column = next_column.reshape((m, 1)) - - mat_prev = np.repeat(weighted_previous_vote, m, axis=1) * zero_diag - mat_next = np.repeat(weighted_next_column, m, axis=1) * zero_diag - - self.B2 = np.sum((weighted_previous_vote - weighted_next_column) ** 2) - self.B1 = np.sum(2 * weighted_next_column * ( - weighted_previous_vote - 2 * weighted_next_column * weighted_next_column)) - self.B0 = np.sum(weighted_next_column * weighted_next_column) - - self.A2 = self.B2 + np.sum( - (mat_prev - mat_next) * np.transpose(mat_prev - mat_next)) - self.A1 = self.B1 + np.sum( - mat_prev * np.transpose(mat_next) - mat_next * np.transpose( - mat_prev) - 2 * mat_next * np.transpose(mat_next)) - self.A0 = self.B0 + np.sum(mat_next * np.transpose(mat_next)) - - C2 = (self.A1 * self.B2 - self.A2 * self.B1) - C1 = 2 * (self.A0 * self.B2 - self.A2 * self.B0) - C0 = self.A0 * self.B1 - self.A1 * self.B0 - - if C2 == 0: - if C1 == 0: - return np.array([0.5, 0.5]) - elif abs(C1) > 0: - return np.array([0., 1.]) - else: - return ['break', "the derivate was constant."] - elif C2 == 0: - return ["break", "the derivate was affine."] - - sols = np.roots(np.array([C2, C1, C0])) - - is_acceptable, sol = self._analyze_solutions(sols) - if is_acceptable: - # print("cb", self._cborn(sol)) - return np.array([sol, 1 - sol]) - else: - return ["break", sol] - - def _analyze_solutions(self, sols): - if sols.shape[0] == 1: - if self._cborn(sols[0]) < self._cborn(sols[0] + 1): - best_sol = sols[0] - else: - return False, " the only solution was a maximum." - elif sols.shape[0] == 2: - best_sol = self._best_sol(sols) - else: - return False, " no solution were found." - - if 0 < best_sol < 1: - return True, self._best_sol(sols) - - elif best_sol <= 0: - return False, " the minimum was below 0." - else: - return False, " the minimum was over 1." - - def _cborn(self, sol): - return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / ( - self.B2 * sol ** 2 + self.B1 * sol + self.B0) - - def _best_sol(self, sols): - values = np.array([self._cborn(sol) for sol in sols]) - return sols[np.argmin(values)] - - def _restricted_master_problem(self, y_kernel_matrix): - raise NotImplementedError("Restricted master problem not implemented.") - - -class CqBoostClassifierv21(ColumnGenerationClassifierv21): - def __init__(self, mu=0.001, epsilon=1e-08, n_max_iterations=None, - estimators_generator=None, - save_iteration_as_hyperparameter_each=None, random_state=42): - super(CqBoostClassifierv21, self).__init__(epsilon, n_max_iterations, - estimators_generator, - dual_constraint_rhs=0, - save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each, - random_state=random_state) - self.train_time = 0 - self.mu = mu - - def _initialize_alphas(self, n_examples): - return 1.0 / n_examples * np.ones((n_examples,)) - - -class CQBoostv21(CqBoostClassifierv21, BaseMonoviewClassifier): - - def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, **kwargs): - super(CQBoostv21, self).__init__( - random_state=random_state, - mu=mu, - epsilon=epsilon - ) - self.param_names = ["mu", "epsilon"] - self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), - CustomRandint(low=1, high=15, multiplier="e-")] - self.classed_params = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return getInterpretBase(self, directory, "CQBoostv21", self.weights_, - self.break_cause) - - def get_name_for_fusion(self): - return "CQ21" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"mu": args.CQB2_mu, -# "epsilon": args.CQB2_epsilon} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"mu": 10 ** -randomState.uniform(0.5, 1.5), - "epsilon": 10 ** -randomState.randint(1, 15)}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree.py index c0b320a8343d96fb682905269bd9cc9e1430d8e3..7684d579508564fe9570ad048edf410528f91946 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree.py @@ -6,6 +6,8 @@ from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "DecisionTree" + class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree_pregen.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree_pregen.py index f02b476d244c04dc53ca56bd867213aa359e5697..124d7a9066f9d5ea480474134d10148f803e1120 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree_pregen.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree_pregen.py @@ -11,6 +11,7 @@ from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier, \ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "DecisionTreePregen" class DecisionTreePregen(DecisionTreeClassifier, BaseMonoviewClassifier, PregenClassifier): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py index 5327aae252b6f2fbcc09f21b98ee077997700046..7fd870f08577bfb4b13698284b0d8434c8ad205c 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py @@ -13,6 +13,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "GradientBoosting" + class CustomDecisionTree(DecisionTreeClassifier): def predict(self, X, check_input=True): y_pred = super(CustomDecisionTree, self).predict(X, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py index 7e7fbb3d816efb5f45390667178bfdb60d286165..8ecb413962252e281298e2d64d16cf71cd22ddee 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py @@ -7,6 +7,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "KNN" + + class KNN(KNeighborsClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, n_neighbors=5, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/lasso.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/lasso.py index 1e5c13e468e5525ac90343f2424c61fc794494b0..30af6f5b1839a68ab13bfe7dab37bda9eb3db1d3 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/lasso.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/lasso.py @@ -9,6 +9,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "Lasso" + class Lasso(LassoSK, BaseMonoviewClassifier): def __init__(self, random_state=None, alpha=1.0, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq.py index d5be24075fa9f6dc7db56403a4e66c3c2a14147e..21345552b14dc0c2493e3d92d88a152b46de5a80 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq.py @@ -21,6 +21,8 @@ from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, \ from ..monoview.additions.BoostUtils import ConvexProgram as QP +classifier_class_name = "MinCQ" + # from majority_vote import MajorityVote # from voter import StumpsVotersGenerator, KernelVotersGenerator diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py index 3ffa26d2f13f97c67311dfb25e21347b8ba74630..c53187463a1c3045300f7887739a870b194cb7cb 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py @@ -5,6 +5,8 @@ from ..monoview.additions.MinCQUtils import RegularizedBinaryMinCqClassifier from ..monoview.monoview_utils import BaseMonoviewClassifier, CustomUniform +classifier_class_name = "MinCQGraalpy" + class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, mu=0.01, self_complemented=True, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy_tree.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy_tree.py index 096bc62ef181b596de6499f89d3876fc02ea74dd..0bc6b73d2f9561ad669b1207781cf613bc88d2b9 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy_tree.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy_tree.py @@ -4,6 +4,7 @@ from ..monoview.additions.BoostUtils import TreeClassifiersGenerator from ..monoview.additions.MinCQUtils import RegularizedBinaryMinCqClassifier from ..monoview.monoview_utils import BaseMonoviewClassifier, CustomUniform +classifier_class_name = "MinCQGraalpyTree" class MinCQGraalpyTree(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boost.py deleted file mode 100644 index fe02c038307329280be0dc7ab13082a08c1850ba..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boost.py +++ /dev/null @@ -1,46 +0,0 @@ -from ..monoview.additions.CGDescUtils import ColumnGenerationClassifierQar -from ..monoview.monoview_utils import BaseMonoviewClassifier - - -class QarBoost(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - - def __init__(self, random_state=None, **kwargs): - super(QarBoost, self).__init__(n_max_iterations=500, - random_state=random_state, - self_complemented=True, - twice_the_same=True, - c_bound_choice=True, - random_start=False, - n_stumps=10, - use_r=True, - c_bound_sol=False - ) - # n_stumps_per_attribute=10, - self.param_names = [] - self.distribs = [] - self.classed_params = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return self.getInterpretQar(directory, y_test) - - def get_name_for_fusion(self): - return "QB" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boost_nc3.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boost_nc3.py deleted file mode 100644 index 06a9c186f24a71a7c1113e9cda3a01a916896c97..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boost_nc3.py +++ /dev/null @@ -1,46 +0,0 @@ -from ..monoview.additions.CGDescUtils import ColumnGenerationClassifierQar -from ..monoview.monoview_utils import BaseMonoviewClassifier - - -class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - - def __init__(self, random_state=None, **kwargs): - super(QarBoostNC3, self).__init__(n_max_iterations=300, - random_state=random_state, - self_complemented=True, - twice_the_same=False, - c_bound_choice=True, - random_start=False, - n_stumps=1, - use_r=True, - c_bound_sol=True - ) - # n_stumps_per_attribute=1, - self.param_names = [] - self.distribs = [] - self.classed_params = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return self.getInterpretQar(directory, y_test) - - def get_name_for_fusion(self): - return "QBN3" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boostv2.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boostv2.py deleted file mode 100644 index c3936a3cbbd6344a6c76413d266c73f5732fa952..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boostv2.py +++ /dev/null @@ -1,46 +0,0 @@ -from ..monoview.additions.CGDescUtils import ColumnGenerationClassifierQar -from ..monoview.monoview_utils import BaseMonoviewClassifier - - -class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - - def __init__(self, random_state=None, **kwargs): - super(QarBoostv2, self).__init__(n_max_iterations=300, - random_state=random_state, - self_complemented=True, - twice_the_same=False, - c_bound_choice=True, - random_start=False, - n_stumps=1, - use_r=True, - c_bound_sol=False - ) - # n_stumps_per_attribute = 1, - self.param_names = [] - self.distribs = [] - self.classed_params = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return self.getInterpretQar(directory, y_test) - - def get_name_for_fusion(self): - return "QBv2" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boostv3.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boostv3.py deleted file mode 100644 index 954ab0b5362715c53a241870416975fbf6d19b53..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/qar_boostv3.py +++ /dev/null @@ -1,49 +0,0 @@ -from ..monoview.additions.CGDescUtils import ColumnGenerationClassifierQar -from ..monoview.monoview_utils import BaseMonoviewClassifier - - -class QarBoostv3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - - def __init__(self, random_state=None, **kwargs): - super(QarBoostv3, self).__init__( - random_state=random_state, - self_complemented=False, - twice_the_same=False, - c_bound_choice=True, - random_start=True, - n_stumps=1, - use_r=False - ) - # old_fashioned=False - # previous_vote_weighted=False, - # two_wieghts_problem=False, - # divided_ponderation=True, - # n_stumps_per_attribute=1, - self.param_names = [] - self.classed_params = [] - self.distribs = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return self.getInterpretQar(directory, y_test) - - def get_name_for_fusion(self): - return "QBv3" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py index 2a2f067f6bf9ef6b4c7f32291b69d3602177cdcb..6fe0dcadea76b63220d06eac9204fe1d83c02fb6 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py @@ -7,6 +7,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "RandomForest" + class RandomForest(RandomForestClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, n_estimators=10, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py index 1265410bbcb9643af005bda1b8737a8b992ed553..f5c3df8f2a4bcccaf15ed0f0a63e00a859b368f2 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py @@ -41,6 +41,8 @@ __status__ = "Prototype" # Production, Development, Prototype # # return {"Binary_attributes": self.clf.model_.rules} +classifier_class_name = "SCM" + class SCM(scm, BaseMonoviewClassifier): def __init__(self, random_state=None, model_type="conjunction", diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen.py index 46fc905a283214f38695d8f8b290360b91b95e57..ced608365e87a9f0e80daf5f2a6db941b5062808 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen.py @@ -11,12 +11,13 @@ from ..monoview.monoview_utils import CustomRandint, CustomUniform, \ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "SCMPregen" -class SCMPregen(scm, BaseMonoviewClassifier, PregenClassifier): +class SCMPregen(BaseMonoviewClassifier, PregenClassifier, scm): def __init__(self, random_state=None, model_type="conjunction", max_rules=10, p=0.1, n_stumps=10, self_complemented=True, - **kwargs): + estimators_generator="Stumps", max_depth=1, **kwargs): super(SCMPregen, self).__init__( random_state=random_state, model_type=model_type, @@ -24,16 +25,25 @@ class SCMPregen(scm, BaseMonoviewClassifier, PregenClassifier): p=p ) self.param_names = ["model_type", "max_rules", "p", "n_stumps", - "random_state"] + "random_state", "estimators_generator", "max_depth"] self.distribs = [["conjunction", "disjunction"], CustomRandint(low=1, high=15), CustomUniform(loc=0, state=1), [n_stumps], - [random_state]] + [random_state], ["Stumps", "Tree"], + CustomRandint(low=1, high=5)] self.classed_params = [] self.weird_strings = {} self.self_complemented = self_complemented self.n_stumps = n_stumps - self.estimators_generator = "Stumps" + self.estimators_generator = estimators_generator + self.max_depth=1 + + def get_params(self, deep=True): + params = super(SCMPregen, self).get_params(deep) + params["estimators_generator"] = self.estimators_generator + params["max_depth"] = self.max_depth + params["n_stumps"] = self.n_stumps + return params def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): pregen_X, _ = self.pregen_voters(X, y) @@ -72,11 +82,6 @@ class SCMPregen(scm, BaseMonoviewClassifier, PregenClassifier): os.remove(file_name) return self.classes_[self.model_.predict(place_holder)] - def get_params(self, deep=True): - return {"p": self.p, "model_type": self.model_type, - "max_rules": self.max_rules, - "random_state": self.random_state, "n_stumps": self.n_stumps} - def canProbas(self): """Used to know if the classifier can return label probabilities""" return False diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen_tree.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen_tree.py deleted file mode 100644 index 1b85c4fc3009269f695d45ef438c5b55c1dcae3e..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen_tree.py +++ /dev/null @@ -1,109 +0,0 @@ -import os - -import numpy as np -from pyscm.scm import SetCoveringMachineClassifier as scm - -from ..monoview.additions.PregenUtils import PregenClassifier -from ..monoview.monoview_utils import CustomRandint, CustomUniform, \ - BaseMonoviewClassifier - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -class SCMPregenTree(scm, BaseMonoviewClassifier, PregenClassifier): - - def __init__(self, random_state=None, model_type="conjunction", - max_rules=10, p=0.1, n_stumps=10, self_complemented=True, - max_depth=2, **kwargs): - super(SCMPregenTree, self).__init__( - random_state=random_state, - model_type=model_type, - max_rules=max_rules, - p=p - ) - self.param_names = ["model_type", "max_rules", "p", "n_stumps", - "random_state", "max_depth"] - self.distribs = [["conjunction", "disjunction"], - CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [n_stumps], - [random_state], [max_depth]] - self.classed_params = [] - self.weird_strings = {} - self.max_depth = max_depth - self.self_complemented = self_complemented - self.random_state = random_state - self.n_stumps = n_stumps - self.estimators_generator = "Stumps" - - def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): - pregen_X, _ = self.pregen_voters(X, y, generator="Trees") - list_files = os.listdir(".") - a = int(self.random_state.randint(0, 10000)) - if "pregen_x" + str(a) + ".csv" in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - while file_name in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - else: - file_name = "pregen_x" + str(a) + ".csv" - np.savetxt(file_name, pregen_X, delimiter=',') - place_holder = np.genfromtxt(file_name, delimiter=',') - os.remove(file_name) - super(SCMPregenTree, self).fit(place_holder, y, tiebreaker=tiebreaker, - iteration_callback=iteration_callback, - **fit_params) - return self - - def predict(self, X): - pregen_X, _ = self.pregen_voters(X, ) - list_files = os.listdir(".") - a = int(self.random_state.randint(0, 10000)) - if "pregen_x" + str(a) + ".csv" in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - while file_name in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - else: - file_name = "pregen_x" + str(a) + ".csv" - np.savetxt(file_name, pregen_X, delimiter=',') - place_holder = np.genfromtxt(file_name, delimiter=',') - os.remove(file_name) - return self.classes_[self.model_.predict(place_holder)] - - def get_params(self, deep=True): - return {"p": self.p, "model_type": self.model_type, - "max_rules": self.max_rules, - "random_state": self.random_state, "n_stumps": self.n_stumps, - "max_depth": self.max_depth} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return False - - def getInterpret(self, directory, y_test): - interpretString = "Model used : " + str(self.model_) - return interpretString - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"model_type": args.SCPT_model_type, -# "p": args.SCPT_p, -# "max_rules": args.SCPT_max_rules, -# "n_stumps": args.SCPT_trees, -# "max_depth": args.SCPT_max_depth} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - paramsSet = [] - for _ in range(nIter): - paramsSet.append( - {"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_sparsity.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_sparsity.py deleted file mode 100644 index 57feac12373a23d05019bf98f55699a1f9012571..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_sparsity.py +++ /dev/null @@ -1,122 +0,0 @@ -import os -import time - -import numpy as np -from pyscm.scm import SetCoveringMachineClassifier as scm - -from ..metrics import zero_one_loss -from ..monoview.additions.PregenUtils import PregenClassifier -from ..monoview.monoview_utils import CustomRandint, CustomUniform, \ - BaseMonoviewClassifier - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -class SCMSparsity(BaseMonoviewClassifier, PregenClassifier): - - def __init__(self, random_state=None, model_type="disjunction", - max_rules=10, p=0.1, n_stumps=1, self_complemented=True, - **kwargs): - self.scm_estimators = [scm( - random_state=random_state, - model_type=model_type, - max_rules=max_rule + 1, - p=p - ) for max_rule in range(max_rules)] - self.model_type = model_type - self.self_complemented = self_complemented - self.n_stumps = n_stumps - self.p = p - self.random_state = random_state - self.max_rules = max_rules - self.param_names = ["model_type", "max_rules", "p", "random_state", - "n_stumps"] - self.distribs = [["conjunction", "disjunction"], - CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [random_state], - [n_stumps]] - self.classed_params = [] - self.weird_strings = {} - - def get_params(self): - return {"model_type": self.model_type, "p": self.p, - "max_rules": self.max_rules, "random_state": self.random_state, - "n_stumps": self.n_stumps} - - def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): - pregen_X, _ = self.pregen_voters(X, y) - list_files = os.listdir(".") - a = int(self.random_state.randint(0, 10000)) - if "pregen_x" + str(a) + ".csv" in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - while file_name in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - else: - file_name = "pregen_x" + str(a) + ".csv" - np.savetxt(file_name, pregen_X, delimiter=',') - place_holder = np.genfromtxt(file_name, delimiter=',') - os.remove(file_name) - for scm_estimator in self.scm_estimators: - beg = time.time() - scm_estimator.fit(place_holder, y, tiebreaker=None, - iteration_callback=None, **fit_params) - end = time.time() - self.times = np.array([end - beg, 0]) - self.train_metrics = [ - zero_one_loss.score(y, scm_estimator.predict(place_holder)) for - scm_estimator in self.scm_estimators] - return self.scm_estimators[-1] - - def predict(self, X): - pregen_X, _ = self.pregen_voters(X, ) - list_files = os.listdir(".") - a = int(self.random_state.randint(0, 10000)) - if "pregen_x" + str(a) + ".csv" in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - while file_name in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - else: - file_name = "pregen_x" + str(a) + ".csv" - np.savetxt(file_name, pregen_X, delimiter=',') - place_holder = np.genfromtxt(file_name, delimiter=',') - os.remove(file_name) - self.preds = [scm_estimator.predict(place_holder) for scm_estimator in - self.scm_estimators] - return self.preds[-1] - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - interpretString = "" - np.savetxt(directory + "test_metrics.csv", np.array( - [zero_one_loss.score(y_test, pred) for pred in self.preds])) - np.savetxt(directory + "times.csv", self.times) - np.savetxt(directory + "train_metrics.csv", self.train_metrics) - return interpretString - -# -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"model_type": args.SCS_model_type, -# "p": args.SCS_p, -# "max_rules": args.SCS_max_rules, -# "n_stumps": args.SCS_stumps} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - paramsSet = [] - for _ in range(nIter): - paramsSet.append( - {"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_sparsity_ttee.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_sparsity_ttee.py deleted file mode 100644 index 25c91fca68d909a6bc947c33e92cb9bf228dec5e..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_sparsity_ttee.py +++ /dev/null @@ -1,122 +0,0 @@ -import os -import time - -import numpy as np -from pyscm.scm import SetCoveringMachineClassifier as scm - -from ..metrics import zero_one_loss -from ..monoview.additions.PregenUtils import PregenClassifier -from ..monoview.monoview_utils import CustomRandint, CustomUniform, \ - BaseMonoviewClassifier - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -class SCMSparsityTree(BaseMonoviewClassifier, PregenClassifier): - - def __init__(self, random_state=None, model_type="conjunction", - max_rules=10, p=0.1, n_stumps=1, max_depth=2, **kwargs): - self.scm_estimators = [scm( - random_state=random_state, - model_type=model_type, - max_rules=max_rule + 1, - p=p - ) for max_rule in range(max_rules)] - self.model_type = model_type - self.max_depth = max_depth - self.p = p - self.n_stumps = n_stumps - self.random_state = random_state - self.max_rules = max_rules - self.param_names = ["model_type", "max_rules", "p", "random_state", - "max_depth"] - self.distribs = [["conjunction", "disjunction"], - CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [random_state], - [max_depth]] - self.classed_params = [] - self.weird_strings = {} - - def get_params(self): - return {"model_type": self.model_type, "p": self.p, - "max_rules": self.max_rules, "random_state": self.random_state, - "max_depth": self.max_depth, "n_stumps": self.n_stumps} - - def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): - pregen_X, _ = self.pregen_voters(X, y, generator="Trees") - list_files = os.listdir(".") - a = int(self.random_state.randint(0, 10000)) - if "pregen_x" + str(a) + ".csv" in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - while file_name in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - else: - file_name = "pregen_x" + str(a) + ".csv" - np.savetxt(file_name, pregen_X, delimiter=',') - place_holder = np.genfromtxt(file_name, delimiter=',') - os.remove(file_name) - for scm_estimator in self.scm_estimators: - beg = time.time() - scm_estimator.fit(place_holder, y, tiebreaker=None, - iteration_callback=None, **fit_params) - end = time.time() - self.times = np.array([end - beg, 0]) - self.train_metrics = [ - zero_one_loss.score(y, scm_estimator.predict(place_holder)) for - scm_estimator in self.scm_estimators] - return self.scm_estimators[-1] - - def predict(self, X): - pregen_X, _ = self.pregen_voters(X, generator="Trees") - list_files = os.listdir(".") - a = int(self.random_state.randint(0, 10000)) - if "pregen_x" + str(a) + ".csv" in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - while file_name in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - else: - file_name = "pregen_x" + str(a) + ".csv" - np.savetxt(file_name, pregen_X, delimiter=',') - place_holder = np.genfromtxt(file_name, delimiter=',') - os.remove(file_name) - self.preds = [scm_estimator.predict(place_holder) for scm_estimator in - self.scm_estimators] - return self.preds[-1] - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - interpretString = "" - np.savetxt(directory + "test_metrics.csv", np.array( - [zero_one_loss.score(y_test, pred) for pred in self.preds])) - np.savetxt(directory + "times.csv", self.times) - np.savetxt(directory + "train_metrics.csv", self.train_metrics) - return interpretString - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"model_type": args.SCST_model_type, -# "p": args.SCST_p, -# "max_rules": args.SCST_max_rules, -# "n_stumps": args.SCST_trees, -# "max_depth": args.SCST_max_depth} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - paramsSet = [] - for _ in range(nIter): - paramsSet.append( - {"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py index d0546f409615ee9b6cd932509454b5643755bf9e..d43d372c0c0adca31236c56947af00c01ab6b42b 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py @@ -6,6 +6,7 @@ from ..monoview.monoview_utils import CustomUniform, BaseMonoviewClassifier __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "SGD" class SGD(SGDClassifier, BaseMonoviewClassifier): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py index 53ac5226e81914c207474b74bd9e7f0c4b31916d..87f4c4ed4511f41e160223178f023780fa87f6e9 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py @@ -6,6 +6,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "SVMLinear" + class SVMLinear(SVCClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, C=1.0, **kwargs): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_poly.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_poly.py index 52a24a29b53402b6495bb526f4294d70a35d982d..386868656e84286bb1979539f1c93e197a4f011a 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_poly.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_poly.py @@ -6,6 +6,7 @@ from ..monoview.monoview_utils import CustomUniform, CustomRandint, \ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "SVMPoly" class SVMPoly(SVCClassifier, BaseMonoviewClassifier): diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_rbf.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_rbf.py index 9a5853cfa55a93bcac0f9782e4b3e43ec04bbce1..f2ac82543e90b47a2e9126116ef98846ef765740 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_rbf.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_rbf.py @@ -6,6 +6,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +classifier_class_name = "SVMRBF" + class SVMRBF(SVCClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, C=1.0, **kwargs): diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py index 0deca9b6c96419d815de93a6d585385fefee6e40..460bcf7a172b4c3d991af8ee599a806b82e74b15 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py @@ -5,9 +5,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def printMetricScore(metricScores, metrics): +def printMetricScore(metricScores, metric_list): metricScoreString = "\n\n" - for metric in metrics: + for metric in metric_list: metricModule = getattr(metrics, metric[0]) if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in @@ -61,28 +61,25 @@ def execute(classifier, trainLabels, classificationKWARGS, classificationIndices, LABELS_DICTIONARY, views, nbCores, times, name, KFolds, - hyperParamSearch, nIter, metrics, + hyperParamSearch, nIter, metric_list, viewsIndices, randomState, labels, classifierModule): - classifierNameString = classifierModule.genName(classificationKWARGS) - CLASS_LABELS = labels + classifier_name = classifier.short_name learningIndices, validationIndices, testIndicesMulticlass = classificationIndices - metricModule = getattr(metrics, metrics[0][0]) - if metrics[0][1] is not None: + metricModule = getattr(metrics, metric_list[0][0]) + if metric_list[0][1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in - enumerate(metrics[0][1])) + enumerate(metric_list[0][1])) else: metricKWARGS = {} - scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], - CLASS_LABELS[learningIndices], + scoreOnTrain = metricModule.score(labels[learningIndices], + labels[learningIndices], **metricKWARGS) - scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], + scoreOnTest = metricModule.score(labels[validationIndices], testLabels, **metricKWARGS) - classifierConfiguration = classifier.getConfigString(classificationKWARGS) - - stringAnalysis = "\t\tResult for multiview classification with " + classifierNameString + \ - "\n\n" + metrics[0][0] + " :\n\t-On Train : " + str( + stringAnalysis = "\t\tResult for multiview classification with " + classifier_name + \ + "\n\n" + metric_list[0][0] + " :\n\t-On Train : " + str( scoreOnTrain) + "\n\t-On Test : " + str( scoreOnTest) + \ "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ @@ -90,12 +87,11 @@ def execute(classifier, trainLabels, LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join( views) + "\n\t-" + str( KFolds.n_splits) + \ - " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifierNameString + " with : " + classifierConfiguration + " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.getConfig() - metricsScores = getMetricsScores(metrics, trainLabels, testLabels, + metricsScores = getMetricsScores(metric_list, trainLabels, testLabels, validationIndices, learningIndices, labels) - stringAnalysis += printMetricScore(metricsScores, metrics) - stringAnalysis += "\n\n Interpretation : \n\n" + classifier.getSpecificAnalysis( - classificationKWARGS) + stringAnalysis += printMetricScore(metricsScores, metric_list) + stringAnalysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation() imagesAnalysis = {} return stringAnalysis, imagesAnalysis, metricsScores diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index c6eaf6e83d437c1a699e28b278c11a0aa8bdc59d..02ba90af927f6fa24b101e88dc067d261850db6e 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -8,6 +8,7 @@ import h5py import numpy as np from .multiview_utils import MultiviewResult +from . import analyze_results from .. import multiview_classifiers from ..utils import hyper_parameter_search from ..utils.dataset import getShape @@ -20,28 +21,28 @@ __status__ = "Prototype" # Production, Development, Prototype def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, DATASET): """Used to init the constants""" - views = kwargs["views"] - viewsIndices = kwargs["viewsIndices"] + views = kwargs["view_names"] + viewsIndices = kwargs["view_indices"] if not metrics: metrics = [["f1_score", None]] - CL_type = kwargs["CL_type"] - classificationKWARGS = kwargs[CL_type + "KWARGS"] + classifier_name = kwargs["classifier_name"] + classifier_config = kwargs[classifier_name] learningRate = len(classificationIndices[0]) / float( (len(classificationIndices[0]) + len(classificationIndices[1]))) t_start = time.time() logging.info("Info\t: Classification - Database : " + str( name) + " ; Views : " + ", ".join(views) + - " ; Algorithm : " + CL_type + " ; Cores : " + str( + " ; Algorithm : " + classifier_name + " ; Cores : " + str( nbCores) + ", Train ratio : " + str(learningRate) + ", CV on " + str(KFolds.n_splits) + " folds") for viewIndex, viewName in zip(viewsIndices, views): logging.info("Info:\t Shape of " + str(viewName) + " :" + str( getShape(DATASET, viewIndex))) - return CL_type, t_start, viewsIndices, classificationKWARGS, views, learningRate + return classifier_name, t_start, viewsIndices, classifier_config, views, learningRate -def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, +def saveResults(classifier, LABELS_DICTIONARY, stringAnalysis, views, classifierModule, classificationKWARGS, directory, learningRate, name, imagesAnalysis): labelsSet = set(LABELS_DICTIONARY.values()) @@ -49,7 +50,7 @@ def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, viewsString = "-".join(views) labelsString = "-".join(labelsSet) timestr = time.strftime("%Y_%m_%d-%H_%M_%S") - CL_type_string = classifierModule.genName(classificationKWARGS) + CL_type_string = classifier.short_name outputFileName = directory + "/" + CL_type_string + "/" + timestr + "-results-" + CL_type_string + "-" + viewsString + '-' + labelsString + \ '-learnRate_{0:.2f}'.format(learningRate) + '-' + name if not os.path.exists(os.path.dirname(outputFileName)): @@ -99,7 +100,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, CL_type, \ t_start, \ viewsIndices, \ - classificationKWARGS, \ + classifier_config, \ views, \ learningRate = initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, DATASET) @@ -113,40 +114,43 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, logging.debug("Done:\t Getting train/test split") logging.debug("Start:\t Getting classifiers modules") - classifierPackage = getattr(multiview_classifiers, - CL_type) # Permet d'appeler un module avec une string - classifierModule = getattr(classifierPackage, CL_type + "Module") - classifierClass = getattr(classifierModule, CL_type + "Class") - analysisModule = getattr(classifierPackage, "analyzeResults") + # classifierPackage = getattr(multiview_classifiers, + # CL_type) # Permet d'appeler un module avec une string + classifier_module = getattr(multiview_classifiers, CL_type) + classifier_name = classifier_module.classifier_class_name + # classifierClass = getattr(classifierModule, CL_type + "Class") logging.debug("Done:\t Getting classifiers modules") logging.debug("Start:\t Optimizing hyperparameters") if hyperParamSearch != "None": - classifier = hyper_parameter_search.searchBestSettings(DATASET, labels, - classifierPackage, - CL_type, metrics, + classifier_config = hyper_parameter_search.searchBestSettings(DATASET, labels, + classifier_module, + classifier_name, + metrics[0], learningIndices, KFolds, randomState, + directory, + nb_cores=nbCores, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, - nIter=nIter, - **classificationKWARGS) - else: - classifier = classifierClass(randomState, NB_CORES=nbCores, - **classificationKWARGS) + n_iter=nIter, + classifier_config=classifier_config) + + classifier = getattr(classifier_module, classifier_name)(randomState, + **classifier_config) logging.debug("Done:\t Optimizing hyperparameters") logging.debug("Start:\t Fitting classifier") - classifier.fit_hdf5(DATASET, labels, trainIndices=learningIndices, - viewsIndices=viewsIndices, metric=metrics[0]) + classifier.fit(DATASET, labels, train_indices=learningIndices, + view_indices=viewsIndices) logging.debug("Done:\t Fitting classifier") logging.debug("Start:\t Predicting") - trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, - viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, - viewsIndices=viewsIndices) + trainLabels = classifier.predict(DATASET, predict_indices=learningIndices, + view_indices=viewsIndices) + testLabels = classifier.predict(DATASET, predict_indices=validationIndices, + view_indices=viewsIndices) fullLabels = np.zeros(labels.shape, dtype=int) - 100 for trainIndex, index in enumerate(learningIndices): fullLabels[index] = trainLabels[trainIndex] @@ -167,23 +171,23 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, logging.info("Start:\t Result Analysis for " + CL_type) times = (extractionTime, classificationTime) - stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute( + stringAnalysis, imagesAnalysis, metricsScores = analyze_results.execute( classifier, trainLabels, testLabels, DATASET, - classificationKWARGS, classificationIndices, + classifier_config, classificationIndices, LABELS_DICTIONARY, views, nbCores, times, name, KFolds, hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) + viewsIndices, randomState, labels, classifier_module) logging.info("Done:\t Result Analysis for " + CL_type) logging.debug("Start:\t Saving preds") - saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, - classificationKWARGS, directory, + saveResults(classifier, LABELS_DICTIONARY, stringAnalysis, views, classifier_module, + classifier_config, directory, learningRate, name, imagesAnalysis) logging.debug("Start:\t Saving preds") - return MultiviewResult(CL_type, classificationKWARGS, metricsScores, + return MultiviewResult(CL_type, classifier_config, metricsScores, fullLabels, testLabelsMulticlass) # return CL_type, classificationKWARGS, metricsScores, fullLabels, testLabelsMulticlass diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index 724900fe3dca82577dfc38c1662da4fcaa7fa2a8..7e6baf50b0f90bd460a0cb7e6b4ec2890c6bb3e1 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -1,6 +1,10 @@ +from sklearn.base import BaseEstimator, ClassifierMixin +import numpy as np + from .. import multiview_classifiers + class MultiviewResult(object): def __init__(self, classifier_name, classifier_config, metrics_scores, full_labels, test_labels_multiclass): @@ -11,8 +15,80 @@ class MultiviewResult(object): self.y_test_multiclass_pred = test_labels_multiclass def get_classifier_name(self): - multiviewClassifierPackage = getattr(multiview_classifiers, - self.classifier_name) - multiviewClassifierModule = getattr(multiviewClassifierPackage, - self.classifier_name + "Module") - return multiviewClassifierModule.genName(self.classifier_config) + multiview_classifier_module = getattr(multiview_classifiers, + self.classifier_name) + multiview_classifier = getattr(multiview_classifier_module, + multiview_classifier_module.classifier_class_name)() + return multiview_classifier.short_name + + +def get_names(classed_list): + return np.array([object_.__class__.__name__ for object_ in classed_list]) + + +class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): + + def __init__(self, random_state): + self.random_state = random_state + self.short_name = self.__class__.__name__ + + def genBestParams(self, detector): + return dict((param_name, detector.best_params_[param_name]) + for param_name in self.param_names) + + def genParamsFromDetector(self, detector): + if self.classed_params: + classed_dict = dict((classed_param, get_names( + detector.cv_results_["param_" + classed_param])) + for classed_param in self.classed_params) + if self.param_names: + return [(param_name, + np.array(detector.cv_results_["param_" + param_name])) + if param_name not in self.classed_params else ( + param_name, classed_dict[param_name]) + for param_name in self.param_names] + else: + return [()] + + def genDistribs(self): + return dict((param_name, distrib) for param_name, distrib in + zip(self.param_names, self.distribs)) + + def params_to_string(self): + return ", ".join( + [param_name + " : " + self.to_str(param_name) for param_name in + self.param_names]) + + def getConfig(self): + if self.param_names: + return "\n\t\t- " + self.__class__.__name__ + "with " + self.params_to_string() + else: + return "\n\t\t- " + self.__class__.__name__ + "with no config." + + def to_str(self, param_name): + if param_name in self.weird_strings: + string = "" + if "class_name" in self.weird_strings[param_name] : + string+=self.get_params()[param_name].__class__.__name__ + if "config" in self.weird_strings[param_name]: + string += "( with "+ self.get_params()[param_name].params_to_string()+")" + else: + string+=self.weird_strings[param_name]( + self.get_params()[param_name]) + return string + else: + return str(self.get_params()[param_name]) + + def get_interpretation(self): + return "No detailed interpretation function" + + + + +def get_train_views_indices(dataset, train_indices, view_indices,): + """This function is used to get all the examples indices and view indices if needed""" + if view_indices is None: + view_indices = np.arange(dataset["Metadata"].attrs["nbView"]) + if train_indices is None: + train_indices = range(dataset["Metadata"].attrs["datasetLength"]) + return train_indices, view_indices \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/__init__.py index ab051c97118c8fb7de2bd5ec2aaa75ea007c0dd4..1ea37fbbff987b6220a141dcc6dd5853b5a40482 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/__init__.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/__init__.py @@ -1,9 +1,8 @@ import os for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): - if module == '__init__.py' or module[-3:] == '.py' or module[ - -4:] == '.pyc' or module == '__pycache__': + if module == '__init__.py' or module[-4:] == '.pyc' or module == '__pycache__' or module[-3:] != '.py': continue - __import__(module, locals(), globals(), [], 1) + __import__(module[:-3], locals(), globals(), [], 1) del module del os diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8dbec755f0c66bf5549a4b94c81ff93ead587628 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py @@ -0,0 +1,74 @@ +import logging +import math +import time +from collections import defaultdict + +import numpy as np +import numpy.ma as ma +import scipy +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils.validation import check_is_fitted + +from ... import metrics + + +def get_names(classed_list): + return np.array([object_.__class__.__name__ for object_ in classed_list]) + + +class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): + + def __init__(self, random_state): + self.random_state = random_state + + def genBestParams(self, detector): + return dict((param_name, detector.best_params_[param_name]) + for param_name in self.param_names) + + def genParamsFromDetector(self, detector): + if self.classed_params: + classed_dict = dict((classed_param, get_names( + detector.cv_results_["param_" + classed_param])) + for classed_param in self.classed_params) + if self.param_names: + return [(param_name, + np.array(detector.cv_results_["param_" + param_name])) + if param_name not in self.classed_params else ( + param_name, classed_dict[param_name]) + for param_name in self.param_names] + else: + return [()] + + def genDistribs(self): + return dict((param_name, distrib) for param_name, distrib in + zip(self.param_names, self.distribs)) + + def getConfig(self): + if self.param_names: + return "\n\t\t- " + self.__class__.__name__ + "with " + ", ".join( + [param_name + " : " + self.to_str(param_name) for param_name in + self.param_names]) + else: + return "\n\t\t- " + self.__class__.__name__ + "with no config." + + def to_str(self, param_name): + if param_name in self.weird_strings: + if self.weird_strings[param_name] == "class_name": + return self.get_params()[param_name].__class__.__name__ + else: + return self.weird_strings[param_name]( + self.get_params()[param_name]) + else: + return str(self.get_params()[param_name]) + + def get_interpretation(self): + return "No detailed interpretation function" + + +def get_train_views_indices(dataset, train_indices, view_indices,): + """This function is used to get all the examples indices and view indices if needed""" + if view_indices is None: + view_indices = np.arange(dataset["Metadata"].attrs["nbView"]) + if train_indices is None: + train_indices = range(dataset["Metadata"].attrs["datasetLength"]) + return train_indices, view_indices diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..bb7fcbe09ccb9e93d3cf6c2dc147415e8a5ee88c --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -0,0 +1,77 @@ +import numpy as np +import pkgutil + +from ..utils.dataset import getV +from ..multiview.multiview_utils import BaseMultiviewClassifier, get_train_views_indices +from .. import monoview_classifiers + +classifier_class_name = "WeightedLinearEarlyFusion" + + +class WeightedLinearEarlyFusion(BaseMultiviewClassifier): + + def __init__(self, random_state=None, view_weights=None, + monoview_classifier="decision_tree", + monoview_classifier_config={}): + super(WeightedLinearEarlyFusion, self).__init__(random_state=random_state) + self.view_weights = view_weights + if isinstance(monoview_classifier, str): + self.short_name = "early fusion "+monoview_classifier + monoview_classifier_module = getattr(monoview_classifiers, + monoview_classifier) + monoview_classifier_class = getattr(monoview_classifier_module, + monoview_classifier_module.classifier_class_name) + self.monoview_classifier = monoview_classifier_class(random_state=random_state, + **monoview_classifier_config) + else: + self.monoview_classifier = monoview_classifier + self.short_name = "early fusion "+self.monoview_classifier.__class__.__name__ + self.param_names = ["monoview_classifier","random_state"] + classifier_classes = [] + for name in dir(monoview_classifiers): + if not name.startswith("__"): + module = getattr(monoview_classifiers, name) + classifier_class = getattr(module, module.classifier_class_name)() + classifier_classes.append(classifier_class) + self.distribs = [classifier_classes, [self.random_state]] + self.classed_params = ["monoview_classifier"] + self.weird_strings={"monoview_classifier":["class_name", "config"]} + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, view_indices) + self.monoview_classifier.fit(X, y[train_indices]) + + def predict(self, X, predict_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, predict_indices, view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, example_indices, view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + example_indices, self.view_indices = get_train_views_indices(dataset, + example_indices, + view_indices) + if self.view_weights is None or self.view_weights=="None": + self.view_weights = np.ones(len(self.view_indices), dtype=float) + else: + self.view_weights = np.array(self.view_weights) + self.view_weights /= float(np.sum(self.view_weights)) + + X = self.hdf5_to_monoview(dataset, example_indices) + return example_indices, X + + def hdf5_to_monoview(self, dataset, exmaples): + """Here, we concatenate the views for the asked examples """ + monoview_data = np.concatenate( + [getV(dataset, view_idx, exmaples) + for view_weight, (index, view_idx) + in zip(self.view_weights, enumerate(self.view_indices))] + , axis=1) + return monoview_data + + + + + + diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index 59ed1d8ed9e3583aef28fc0541fafef296a8982b..50cb31a6078f9a577f8626dd74a460761e026b94 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -590,7 +590,7 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): directory = arguments["directory"] - databaseName = arguments["args"].name + databaseName = arguments["args"]["Base"]["name"] labelsNames = [arguments["LABELS_DICTIONARY"][0], arguments["LABELS_DICTIONARY"][1]] @@ -764,7 +764,7 @@ def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels) results = publishMulticlassScores(multiclassResults, metrics, statsIter, directories, - benchmarkArgumentDictionaries[0]["args"].name) + benchmarkArgumentDictionaries[0]["args"]["Base"]["name"]) publishMulticlassExmapleErrors(multiclassResults, directories, benchmarkArgumentDictionaries[0][ "args"].name) @@ -1011,7 +1011,7 @@ def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels): """Used to analyze the results of the previous benchmarks""" - dataBaseName = benchmarkArgumentDictionaries[0]["args"].name + dataBaseName = benchmarkArgumentDictionaries[0]["args"]["Base"]["name"] results_means_std, biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py index 3cd0e6e9dda29f38f45f86a94793982274eeffef..4534c685529b9978ba135ad54121f3b42340b737 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py @@ -1,19 +1,24 @@ import configparser import builtins from distutils.util import strtobool as tobool +import yaml -def get_the_args(path_to_config_file="../config_files/config.ini"): +def get_the_args(path_to_config_file="../config_files/config.yml"): """This is the main function for extracting the args for a '.ini' file""" - config_parser = configparser.ConfigParser(comment_prefixes=('#')) - config_parser.read(path_to_config_file) - config_dict = {} - for section in config_parser: - config_dict[section] = {} - for key in config_parser[section]: - value = format_raw_arg(config_parser[section][key]) - config_dict[section][key] = value - return config_dict + with open(path_to_config_file, 'r') as stream: + yaml_config = yaml.safe_load(stream) + return yaml_config + + # config_parser = configparser.ConfigParser(comment_prefixes=('#')) + # config_parser.read(path_to_config_file) + # config_dict = {} + # for section in config_parser: + # config_dict[section] = {} + # for key in config_parser[section]: + # value = format_raw_arg(config_parser[section][key]) + # config_dict[section][key] = value + # return config_dict def format_raw_arg(raw_arg): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index da36fb45b55050c03a11e6472938d069954c367f..b0e3779f3780def50e4e057b0e41693f79a3c6fe 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -23,7 +23,7 @@ def parseTheArgs(arguments): groupStandard.add_argument('--path_config', metavar='STRING', action='store', help='Path to the hdf5 dataset or database ' 'folder (default: %(default)s)', - default='../config_files/config.ini') + default='../config_files/config.yml') # groupStandard.add_argument('-log', action='store_true', # help='Use option to activate logging to console') # groupStandard.add_argument('--name', metavar='STRING', nargs='+', action='store', diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 22246c81090ba680fda86bfb9f508f6d51090390..19966ac08ac3a36ff6bbbef537a5435bc846131c 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -93,7 +93,7 @@ def makeMeNoisy(viewData, randomState, percentage=5): def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", randomState=None, full=True, add_noise=False, noise_std=0.15, nbView=3, - nbClass=2, datasetLength=100, randomStateInt=42, nbFeatures = 5): + nbClass=2, datasetLength=100, randomStateInt=42, nbFeatures = 10): """Used to generate a plausible dataset to test the algorithms""" if not os.path.exists(os.path.dirname(pathF + "Plausible.hdf5")): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 7124c2654125c1b454f5a0ecec05a0277533f833..6c066e1cdcf6ff4753179e2f138278f13c152185 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -3,25 +3,32 @@ import sys import matplotlib.pyplot as plt import numpy as np +from scipy.stats import randint +from sklearn.model_selection import RandomizedSearchCV + from .. import metrics -def searchBestSettings(dataset, labels, classifierPackage, classifierName, - metrics, iLearningIndices, iKFolds, randomState, - viewsIndices=None, - searchingTool="randomizedSearch", nIter=1, **kwargs): - """Used to select the right hyperparam optimization function to optimize hyper parameters""" +def searchBestSettings(dataset, labels, classifier_module, classifier_name, + metrics, learning_indices, iKFolds, random_state, + directory, viewsIndices=None, nb_cores=1, + searchingTool="randomized_search", n_iter=1, + classifier_config=None): + """Used to select the right hyper-parameter optimization function + to optimize hyper parameters""" if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) + output_file_name = directory thismodule = sys.modules[__name__] - searchingTool = "randomizedSearch" # Todo find a nice way to configure multiview classifier without hp search - searchingToolMethod = getattr(thismodule, searchingTool) - bestSettings = searchingToolMethod(dataset, labels, classifierPackage, - classifierName, metrics, - iLearningIndices, iKFolds, randomState, - viewsIndices=viewsIndices, nIter=nIter, - **kwargs) + if searchingTool is not "None": + searchingToolMethod = getattr(thismodule, searchingTool) + bestSettings, test_folds_preds = searchingToolMethod(dataset, labels, "multiview", random_state, output_file_name, + classifier_module, classifier_name, iKFolds, + nb_cores, metrics, n_iter, classifier_config, + learning_indices=learning_indices, view_indices=viewsIndices,) + else: + bestSettings = classifier_config return bestSettings # or well set clasifier ? @@ -30,15 +37,196 @@ def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, """Used to perfom gridsearch on the classifiers""" pass +class CustomRandint: + """Used as a distribution returning a integer between low and high-1. + It can be used with a multiplier agrument to be able to perform more complex generation + for example 10 e -(randint)""" + + def __init__(self, low=0, high=0, multiplier=""): + self.randint = randint(low, high) + self.multiplier = multiplier + + def rvs(self, random_state=None): + randinteger = self.randint.rvs(random_state=random_state) + if self.multiplier == "e-": + return 10 ** -randinteger + else: + return randinteger + + def get_nb_possibilities(self): + return self.randint.b - self.randint.a + +def compute_possible_combinations(params_dict): + n_possibs = np.ones(len(params_dict)) * np.inf + for value_index, value in enumerate(params_dict.values()): + if type(value) == list: + n_possibs[value_index] = len(value) + elif isinstance(value, CustomRandint): + n_possibs[value_index] = value.get_nb_possibilities() + return n_possibs + + +def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None): + test_folds_prediction = [] + if framework == "monoview": + folds = cv.split(np.arange(len(y)), y) + if framework == "multiview": + folds = cv.split(available_indices, y[available_indices]) + fold_lengths = np.zeros(cv.n_splits, dtype=int) + for fold_idx, (train_indices, test_indices) in enumerate(folds): + fold_lengths[fold_idx] = len(test_indices) + if framework == "monoview": + estimator.fit(X[train_indices], y[train_indices]) + test_folds_prediction.append(estimator.predict(X[train_indices])) + if framework == "multiview": + estimator.fit(X, y, available_indices[train_indices]) + test_folds_prediction.append( + estimator.predict(X, available_indices[test_indices])) + minFoldLength = fold_lengths.min() + test_folds_prediction = np.array( + [test_fold_prediction[:minFoldLength] for test_fold_prediction in + test_folds_prediction]) + return test_folds_prediction + + +def randomized_search(X, y, framework, random_state, output_file_name, classifier_module, + classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], n_iter=30, + classifier_kwargs =None, learning_indices=None, view_indices=None): + estimator = getattr(classifier_module, classifier_name)(random_state, + **classifier_kwargs) + params_dict = estimator.genDistribs() + if params_dict: + metricModule = getattr(metrics, metric[0]) + if metric[1] is not None: + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) + else: + metricKWARGS = {} + scorer = metricModule.get_scorer(**metricKWARGS) + nb_possible_combinations = compute_possible_combinations(params_dict) + min_list = np.array( + [min(nb_possible_combination, n_iter) for nb_possible_combination in + nb_possible_combinations]) + randomSearch = MultiviewCompatibleRandomizedSearchCV(estimator, + n_iter=int(np.sum(min_list)), + param_distributions=params_dict, + refit=True, + n_jobs=nb_cores, scoring=scorer, + cv=folds, random_state=random_state, + learning_indices=learning_indices, + view_indices=view_indices, + framework = framework) + detector = randomSearch.fit(X, y) + + bestParams = dict((key, value) for key, value in + estimator.genBestParams(detector).items() if + key is not "random_state") + + scoresArray = detector.cv_results_['mean_test_score'] + params = estimator.genParamsFromDetector(detector) + + genHeatMaps(params, scoresArray, output_file_name) + best_estimator = detector.best_estimator_ + else: + best_estimator = estimator + bestParams = {} + testFoldsPreds = get_test_folds_preds(X, y, folds, best_estimator, + framework, learning_indices) + return bestParams, testFoldsPreds + + +from sklearn.base import clone + + +class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): + + def __init__(self, estimator, param_distributions, n_iter=10, + refit=True, n_jobs=1, scoring=None, cv=None, + random_state=None, learning_indices=None, view_indices=None, framework="monoview"): + super(MultiviewCompatibleRandomizedSearchCV, self).__init__(estimator, + n_iter=n_iter, + param_distributions=param_distributions, + refit=refit, + n_jobs=n_jobs, scoring=scoring, + cv=cv, random_state=random_state) + self.framework = framework + self.available_indices = learning_indices + self.view_indices = view_indices + + def fit(self, X, y=None, groups=None, **fit_params): + if self.framework == "monoview": + return super(MultiviewCompatibleRandomizedSearchCV, self).fit(X, y=y, groups=groups, **fit_params) + elif self.framework == "multiview": + return self.fit_multiview(X, y=y, groups=groups,**fit_params) + + def fit_multiview(self, X, y=None, groups=None, **fit_params): + n_splits = self.cv.get_n_splits(self.available_indices, y[self.available_indices]) + folds = self.cv.split(self.available_indices, y[self.available_indices]) + candidate_params = list(self._get_param_iterator()) + base_estimator = clone(self.estimator) + results = {} + self.cv_results_ = dict(("param_"+param_name, []) for param_name in candidate_params[0].keys()) + self.cv_results_["mean_test_score"] = [] + for candidate_param_idx, candidate_param in enumerate(candidate_params): + test_scores = np.zeros(n_splits)+1000 + for fold_idx, (train_indices, test_indices) in enumerate(folds): + current_estimator = clone(base_estimator) + current_estimator.set_params(**candidate_param) + current_estimator.fit(X, y, + train_indices=self.available_indices[train_indices], + view_indices=self.view_indices) + test_prediction = current_estimator.predict( + X, + self.available_indices[test_indices], + view_indices=self.view_indices) + test_score = self.scoring._score_func(y[self.available_indices[test_indices]], + test_prediction) + test_scores[fold_idx] = test_score + for param_name, param in candidate_param.items(): + self.cv_results_["param_"+param_name].append(param) + cross_validation_score = np.mean(test_scores) + self.cv_results_["mean_test_score"].append(cross_validation_score) + results[candidate_param_idx] = cross_validation_score + if cross_validation_score <= min(results.values()): + self.best_params_ = candidate_params[candidate_param_idx] + self.best_score_ = cross_validation_score + if self.refit: + self.best_estimator_ = clone(base_estimator).set_params(**self.best_params_) + self.n_splits_ = n_splits + return self + + def get_test_folds_preds(self, X, y, estimator): + test_folds_prediction = [] + if self.framework=="monoview": + folds = self.cv.split(np.arange(len(y)), y) + if self.framework=="multiview": + folds = self.cv.split(self.available_indices, y) + fold_lengths = np.zeros(self.cv.n_splits, dtype=int) + for fold_idx, (train_indices, test_indices) in enumerate(folds): + fold_lengths[fold_idx] = len(test_indices) + if self.framework == "monoview": + estimator.fit(X[train_indices], y[train_indices]) + test_folds_prediction.append(estimator.predict(X[train_indices])) + if self.framework =="multiview": + estimator.fit(X, y, self.available_indices[train_indices]) + test_folds_prediction.append(estimator.predict(X, self.available_indices[test_indices])) + minFoldLength = fold_lengths.min() + test_folds_prediction = np.array( + [test_fold_prediction[:minFoldLength] for test_fold_prediction in test_folds_prediction]) + return test_folds_prediction + + + + def randomizedSearch(dataset, labels, classifierPackage, classifierName, - metrics, learningIndices, KFolds, randomState, + metrics_list, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1, nbCores=1, **classificationKWARGS): """Used to perform a random search on the classifiers to optimize hyper parameters""" if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) - metric = metrics[0] + metric = metrics_list[0] metricModule = getattr(metrics, metric[0]) if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in diff --git a/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py b/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py index 5036232294ec6abac6551bd34356f2a8de16fe0c..43833e25ab266ec060dcbf24394c0717cf65abb8 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/multiview_result_analysis.py @@ -32,14 +32,8 @@ def getTotalMetricScores(metric, trainLabels, testLabels, validationIndices, enumerate(metric[1])) else: metricKWARGS = {} - try: - trainScore = metricModule.score(labels[learningIndices], trainLabels, + trainScore = metricModule.score(labels[learningIndices], trainLabels, **metricKWARGS) - except: - print(labels[learningIndices]) - print(trainLabels) - import pdb; - pdb.set_trace() testScore = metricModule.score(labels[validationIndices], testLabels, **metricKWARGS) return [trainScore, testScore] diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 3420821a5dfe1b8cff3d4f0b8ffde77d1f12f5c5..cd9545cb9371f1b61afa9e709a919fb0f758f12b 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -19,19 +19,71 @@ class Test_initKWARGS(unittest.TestCase): def test_initKWARGSFunc_no_monoview(self): benchmark = {"monoview": {}, "multiview": {}} args = exec_classif.initKWARGSFunc({}, benchmark) - self.assertEqual(args, {}) + self.assertEqual(args, {"monoview": {}, "multiview": {}}) -class Test_initMonoviewArguments(unittest.TestCase): - - def test_initMonoviewArguments_no_monoview(self): - benchmark = {"monoview": {}, "multiview": {}} - arguments = exec_classif.initMonoviewExps(benchmark, {}, 0, {}) - self.assertEqual(arguments, {'monoview': [], 'multiview': []}) - - def test_initMonoviewArguments_empty(self): - benchmark = {"monoview": {}, "multiview": {}} - arguments = exec_classif.initMonoviewExps(benchmark, {}, 0, {}) +class Test_init_argument_dictionaries(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.benchmark = {"monoview": ["fake_monoview_classifier"], "multiview": {}} + cls.views_dictionnary = {'test_view_0': 0, 'test_view': 1} + cls.nb_class = 2 + cls.monoview_classifier_name = "fake_monoview_classifier" + cls.monoview_classifier_arg_name = "fake_arg" + cls.monoview_classifier_arg_value = ["fake_value_1"] + cls.multiview_classifier_name = "fake_multiview_classifier" + cls.multiview_classifier_arg_name = "fake_arg_mv" + cls.multiview_classifier_arg_value = ["fake_value_2"] + cls.init_kwargs = { + 'monoview':{ + cls.monoview_classifier_name: + {cls.monoview_classifier_arg_name:cls.monoview_classifier_arg_value} + }, + "multiview":{ + cls.multiview_classifier_name:{ + cls.multiview_classifier_arg_name:cls.multiview_classifier_arg_value} + } + } + + def test_init_argument_dictionaries_monoview(self): + arguments = exec_classif.init_argument_dictionaries(self.benchmark, + self.views_dictionnary, + self.nb_class, + self.init_kwargs) + expected_output = [{ + self.monoview_classifier_name: { + self.monoview_classifier_arg_name:self.monoview_classifier_arg_value[0]}, + "view_name": "test_view_0", + "classifier_name": self.monoview_classifier_name, + "nb_class": self.nb_class, + "view_index": 0}, + {self.monoview_classifier_name: { + self.monoview_classifier_arg_name: self.monoview_classifier_arg_value[0]}, + "view_name": "test_view", + "classifier_name": self.monoview_classifier_name, + "nb_class": self.nb_class, + "view_index": 1}, + ] + self.assertEqual(arguments["monoview"], expected_output) + + def test_init_argument_dictionaries_multiview(self): + self.benchmark["multiview"] = ["fake_multiview_classifier"] + self.benchmark["monoview"] = {} + arguments = exec_classif.init_argument_dictionaries(self.benchmark, + self.views_dictionnary, + self.nb_class, + self.init_kwargs) + expected_output = [{ + "classifier_name": self.multiview_classifier_name, + "view_indices": [0,1], + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + "labels_names":None, + self.multiview_classifier_name: { + self.multiview_classifier_arg_name: + self.multiview_classifier_arg_value[0]}, + },] + self.assertEqual(arguments["multiview"][0], expected_output[0]) def fakeBenchmarkExec(coreIndex=-1, a=7, args=1): @@ -189,7 +241,11 @@ class Test_execOneBenchmark(unittest.TestCase): { "try": 0}, { - "try2": 100}]}, + "try2": 100}], + "multiview":[{ + "try3": 5}, + { + "try4": 10}]}, benchmark="try", views="try", viewsIndices="try", @@ -243,7 +299,16 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): randomState="try", hyperParamSearch="try", metrics="try", - argumentDictionaries={"monoview": [{"try": 0}, {"try2": 100}]}, + argumentDictionaries={ + "monoview": [ + { + "try": 0}, + { + "try2": 100}], + "multiview":[{ + "try3": 5}, + { + "try4": 10}]}, benchmark="try", views="try", viewsIndices="try", diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index 9f380ca9b73695a4bdb414e7103f112ac6211573..78a4bdb2a570e48aadef94a4b9138dcbd74bc7f4 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -3,8 +3,10 @@ import unittest import h5py import numpy as np +from sklearn.model_selection import StratifiedKFold from ...mono_multi_view_classifiers.monoview import exec_classif_mono_view +from ...mono_multi_view_classifiers.monoview_classifiers import decision_tree class Test_initConstants(unittest.TestCase): @@ -15,7 +17,7 @@ class Test_initConstants(unittest.TestCase): cls.datasetFile = h5py.File( "multiview_platform/tests/temp_tests/test.hdf5", "w") cls.random_state = np.random.RandomState(42) - cls.args = {"CL_type": "test_clf"} + cls.args = {"classifier_name": "test_clf"} cls.X_value = cls.random_state.randint(0, 500, (10, 20)) cls.X = cls.datasetFile.create_dataset("View0", data=cls.X_value) cls.X.attrs["name"] = "test_dataset" @@ -88,6 +90,50 @@ class Test_initTrainTest(unittest.TestCase): np.testing.assert_array_equal(y_train, np.array([0, 0, 1, 0, 0])) np.testing.assert_array_equal(y_test, np.array([1, 1, 0, 0, 0])) + +class Test_getHPs(unittest.TestCase): + + @classmethod + def setUpClass(cls): + os.mkdir("multiview_platform/tests/tmp_tests") + cls.classifierModule = decision_tree + cls.hyperParamSearch = "randomized_search" + cls.n_iter = 2 + cls.classifier_name = "decision_tree" + cls.random_state = np.random.RandomState(42) + cls.X = cls.random_state.randint(0,10,size=(10,5)) + cls.y = cls.random_state.randint(0,2,size=10) + cls.output_file_name = "multiview_platform/tests/tmp_tests/" + cls.cv = StratifiedKFold(n_splits=2, random_state=cls.random_state) + cls.nb_cores = 1 + cls.metrics = [["accuracy_score", None]] + cls.kwargs = {"decision_tree" : {"max_depth": 1, + "criterion": "gini", + "splitter": "best"}} + cls.classifier_class_name = "DecisionTree" + + @classmethod + def tearDownClass(cls): + for file_name in os.listdir("multiview_platform/tests/tmp_tests"): + os.remove( + os.path.join("multiview_platform/tests/tmp_tests", file_name)) + os.rmdir("multiview_platform/tests/tmp_tests") + + def test_simple(self): + kwargs, test_folds_predictions = exec_classif_mono_view.getHPs(self.classifierModule, + self.hyperParamSearch, + self.n_iter, + self.classifier_name, + self.classifier_class_name, + self.X, + self.y, + self.random_state, + self.output_file_name, + self.cv, + self.nb_cores, + self.metrics, + self.kwargs) + # class Test_getKWARGS(unittest.TestCase): # # @classmethod diff --git a/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py b/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py index da1136c66cd8c639a9c4155a261b7426384a06e2..a68c710a446bf09dbee7acc542564a775423b3b7 100644 --- a/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py +++ b/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py @@ -30,3 +30,4 @@ class Test_genTestFoldsPreds(unittest.TestCase): cls.assertEqual(testFoldsPreds.shape, (3, 10)) np.testing.assert_array_equal(testFoldsPreds[0], np.array( [1, 1, -1, -1, 1, 1, -1, 1, -1, 1])) + diff --git a/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py b/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..d78acf8020081205e42e36d79cc936d8511be72c --- /dev/null +++ b/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py @@ -0,0 +1,84 @@ +import unittest + +import numpy as np +import h5py +import os + +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers import \ + weighted_linear_early_fusion + +class Test_WeightedLinearEarlyFusion(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state = np.random.RandomState(42) + cls.view_weights = [0.5, 0.5] + os.mkdir("multiview_platform/tests/tmp_tests") + cls.dataset_file = h5py.File( + "multiview_platform/tests/tmp_tests/test_file.hdf5", "w") + cls.labels = cls.dataset_file.create_dataset("Labels", + data=np.array([0, 1, 0, 0, 1])) + cls.view0_data = cls.random_state.randint(1,10,size=(5, 4)) + view0 = cls.dataset_file.create_dataset("View0", data=cls.view0_data) + view0.attrs["sparse"] = False + cls.view1_data = cls.random_state.randint(1, 10, size=(5, 4)) + view1 = cls.dataset_file.create_dataset("View1", data=cls.view1_data) + view1.attrs["sparse"] = False + metaDataGrp = cls.dataset_file.create_group("Metadata") + metaDataGrp.attrs["nbView"] = 2 + metaDataGrp.attrs["nbClass"] = 2 + metaDataGrp.attrs["datasetLength"] = 5 + cls.monoview_classifier_name = "decision_tree" + cls.monoview_classifier_config = {"max_depth":1, "criterion": "gini", "splitter": "best"} + cls.classifier = weighted_linear_early_fusion.WeightedLinearEarlyFusion( + random_state=cls.random_state, view_weights=cls.view_weights, + monoview_classifier=cls.monoview_classifier_name, + monoview_classifier_config=cls.monoview_classifier_config) + + @classmethod + def tearDownClass(cls): + cls.dataset_file.close() + for file_name in os.listdir("multiview_platform/tests/tmp_tests"): + os.remove(os.path.join("multiview_platform/tests/tmp_tests", file_name)) + os.rmdir("multiview_platform/tests/tmp_tests") + + def test_simple(self): + np.testing.assert_array_equal(self.view_weights, self.classifier.view_weights) + + def test_fit(self): + self.assertRaises(AttributeError, getattr, + self.classifier.monoview_classifier, "classes_") + self.classifier.fit(self.dataset_file, self.labels, None, None) + np.testing.assert_array_equal(self.classifier.monoview_classifier.classes_, + np.array([0,1])) + + def test_predict(self): + self.classifier.fit(self.dataset_file, self.labels, None, None) + predicted_labels = self.classifier.predict(self.dataset_file, None, None) + np.testing.assert_array_equal(predicted_labels, self.labels) + + def test_transform_data_to_monoview_simple(self): + + + example_indices, X = self.classifier.transform_data_to_monoview(self.dataset_file, + None, None) + self.assertEqual(X.shape, (5,8)) + np.testing.assert_array_equal(X, np.concatenate((self.view0_data, self.view1_data), axis=1)) + np.testing.assert_array_equal(example_indices, np.arange(5)) + + def test_transform_data_to_monoview_view_select(self): + example_indices, X = self.classifier.transform_data_to_monoview( + self.dataset_file, + None, np.array([0])) + self.assertEqual(X.shape, (5, 4)) + np.testing.assert_array_equal(X, self.view0_data) + np.testing.assert_array_equal(example_indices, np.arange(5)) + + def test_transform_data_to_monoview_view_select(self): + example_indices, X = self.classifier.transform_data_to_monoview( + self.dataset_file, + np.array([1,2,3]), np.array([0])) + self.assertEqual(X.shape, (3, 4)) + np.testing.assert_array_equal(X, self.view0_data[np.array([1,2,3]), :]) + np.testing.assert_array_equal(example_indices, np.array([1,2,3])) + diff --git a/multiview_platform/tests/test_utils/test_configuration.py b/multiview_platform/tests/test_utils/test_configuration.py index ce04975c2b5991ce70c7a842f16b0b7c314e9fe8..c1e8c3b47125380c120e8516c6880f115b6f6bc4 100644 --- a/multiview_platform/tests/test_utils/test_configuration.py +++ b/multiview_platform/tests/test_utils/test_configuration.py @@ -1,6 +1,6 @@ import os import unittest - +import yaml import numpy as np from multiview_platform.mono_multi_view_classifiers.utils import configuration @@ -8,14 +8,14 @@ from multiview_platform.mono_multi_view_classifiers.utils import configuration class Test_get_the_args(unittest.TestCase): def setUp(self): - self.path_to_config_file = "multiview_platform/tests/tmp_tests/config_temp.ini" + self.path_to_config_file = "multiview_platform/tests/tmp_tests/config_temp.yml" os.mkdir("multiview_platform/tests/tmp_tests") - config_file = open(self.path_to_config_file, "w") - config_file.write("[Base]\nfirst_arg = int ; 10\nsecond_arg = list_float ; 12.5 1e-06\n[Classification]\nthird_arg = bool ; yes") - config_file.close() + data = {"Base":{"first_arg": 10, "second_arg":[12.5, 1e-06]}, "Classification":{"third_arg":True}} + with open(self.path_to_config_file, "w") as config_file: + yaml.dump(data, config_file) def tearDown(self): - os.remove("multiview_platform/tests/tmp_tests/config_temp.ini") + os.remove("multiview_platform/tests/tmp_tests/config_temp.yml") os.rmdir("multiview_platform/tests/tmp_tests") def test_file_loading(self): diff --git a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py new file mode 100644 index 0000000000000000000000000000000000000000..0024a1427a85b07adbbd4f4ebee038fcf75cc28d --- /dev/null +++ b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py @@ -0,0 +1,55 @@ +import os +import unittest + +import h5py +import numpy as np +from sklearn.model_selection import StratifiedKFold + +from ...mono_multi_view_classifiers.utils import hyper_parameter_search +from ...mono_multi_view_classifiers.multiview_classifiers import weighted_linear_early_fusion + +class Test_randomized_search(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state = np.random.RandomState(42) + cls.view_weights = [0.5, 0.5] + os.mkdir("multiview_platform/tests/tmp_tests") + cls.dataset_file = h5py.File( + "multiview_platform/tests/tmp_tests/test_file.hdf5", "w") + cls.labels = cls.dataset_file.create_dataset("Labels", + data=np.array( + [0, 1, 0, 0, 1, 0, 1, 0, 0, 1, ])) + cls.view0_data = cls.random_state.randint(1, 10, size=(10, 4)) + view0 = cls.dataset_file.create_dataset("View0", + data=cls.view0_data) + view0.attrs["sparse"] = False + cls.view1_data = cls.random_state.randint(1, 10, size=(10, 4)) + view1 = cls.dataset_file.create_dataset("View1", + data=cls.view1_data) + view1.attrs["sparse"] = False + metaDataGrp = cls.dataset_file.create_group("Metadata") + metaDataGrp.attrs["nbView"] = 2 + metaDataGrp.attrs["nbClass"] = 2 + metaDataGrp.attrs["datasetLength"] = 10 + cls.monoview_classifier_name = "decision_tree" + cls.monoview_classifier_config = {"max_depth": 1, + "criterion": "gini", + "splitter": "best"} + cls.k_folds = StratifiedKFold(n_splits=3, random_state=cls.random_state) + cls.learning_indices = np.array([1,2,3,4, 5,6,7,8,9]) + + @classmethod + def tearDownClass(cls): + cls.dataset_file.close() + for file_name in os.listdir("multiview_platform/tests/tmp_tests"): + os.remove( + os.path.join("multiview_platform/tests/tmp_tests", file_name)) + os.rmdir("multiview_platform/tests/tmp_tests") + + + def test_simple(self): + best_params, test_folds_preds = hyper_parameter_search.randomized_search( + self.dataset_file, self.labels.value, "multiview", self.random_state, "multiview_platform/tests/tmp_tests/", + weighted_linear_early_fusion, "WeightedLinearEarlyFusion", self.k_folds, + 1, ["accuracy_score", None], 2, {}, learning_indices=self.learning_indices) diff --git a/requirements.txt b/requirements.txt index 6ec2e80e7033b2c3f10c83526a6b556245a90fac..5e3248cae3b35153a6d1aa3eca2725add2d75b69 100755 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,5 @@ scipy==1.3.0 six==1.12.0 pandas==0.23.3 m2r==0.2.1 -docutils==0.12 \ No newline at end of file +docutils==0.12 +yaml==3.12 \ No newline at end of file diff --git a/setup.py b/setup.py index 8ed4b6c46acd183045a7327669cd8b14acfc34a5..4b3122f16a18b34b26c7d4c58e1d2bc552b82a62 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def setup_package(): # Une url qui pointe vers la page officielle de votre lib url='http://github.com/babau1/multiview-machine-learning-omis/', install_requires=['numpy>=1.8', 'scipy>=0.16','scikit-learn==0.19', - 'h5py', 'joblib', 'pyscm', 'pandas', 'm2r'], + 'h5py', 'joblib', 'pyscm', 'pandas', 'm2r', 'yaml'], # Il est d'usage de mettre quelques metadata à propos de sa lib # Pour que les robots puissent facilement la classer. # La liste des marqueurs autorisées est longue: