diff --git a/config_files/config.yml b/config_files/config.yml index a10fa2737f24f3477fdd112482875136aa09af69..318ef51d36e0a6d7fe96dc0201c48c638fb12bbd 100644 --- a/config_files/config.yml +++ b/config_files/config.yml @@ -22,7 +22,7 @@ Classification: nb_folds: 2 nb_class: 2 classes: ["yes", "no"] - type: ["multiview", "monoview"] + type: ["multiview",] algos_monoview: ["all"] algos_multiview: ["all"] stats_iter: 2 @@ -137,4 +137,42 @@ min_cq: weighted_linear_early_fusion: view_weights: [None] - monoview_classifier: ["decision_tree"] + monoview_classifier_name: ["decision_tree"] + monoview_classifier_config: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +entropy_fusion: + classifier_names: ["decison_tree"] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +disagree_fusion: + classifier_names: ["decison_tree"] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + + +double_fault_fusion: + classifier_names: ["decison_tree"] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +difficulty_fusion: + classifier_names: ["decison_tree"] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] \ No newline at end of file diff --git a/ipynb/FeatureExtraction-All_unix.ipynb b/ipynb/FeatureExtraction-All_unix.ipynb index 42b4e2e64409c15503e9af8034c09798f00a90f1..fd972a80342a04b04fbd8a6b243fd211667c6e5e 100644 --- a/ipynb/FeatureExtraction-All_unix.ipynb +++ b/ipynb/FeatureExtraction-All_unix.ipynb @@ -480,7 +480,6 @@ { "ename": "ValueError", "evalue": "all the input array dimensions except for the concatenation axis must match exactly", - "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", @@ -488,7 +487,8 @@ "\u001b[1;32m<ipython-input-13-c8d37ffc0446>\u001b[0m in \u001b[0;36mcalcSurfHisto\u001b[1;34m(dfImages_, k_)\u001b[0m\n\u001b[0;32m 39\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mdescriptor\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdes_list\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 40\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 41\u001b[1;33m \u001b[0mdescriptors\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvstack\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdescriptors\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdescriptor\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 42\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[1;31m#### Bag of Words Approach\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/doob/anaconda2/lib/python2.7/site-packages/numpy/core/shape_base.pyc\u001b[0m in \u001b[0;36mvstack\u001b[1;34m(tup)\u001b[0m\n\u001b[0;32m 228\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 229\u001b[0m \"\"\"\n\u001b[1;32m--> 230\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_nx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0matleast_2d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_m\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0m_m\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtup\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 231\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mhstack\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtup\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: all the input array dimensions except for the concatenation axis must match exactly" - ] + ], + "output_type": "error" } ], "source": [ diff --git a/multiview_platform/execute.py b/multiview_platform/execute.py index 5772286a7c562fed60466478956d673d4bd04145..47f485fbdb3e729872d402f92e2b3bc76c85449c 100644 --- a/multiview_platform/execute.py +++ b/multiview_platform/execute.py @@ -1,9 +1,15 @@ """This is the execution module, used to execute the code""" +<<<<<<< HEAD def execute(): import multiview_platform.versions as vs vs.test_versions() +======= +def exec(): + import multiview_platform.versions as versions + versions.test_versions() +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 import sys from multiview_platform.mono_multi_view_classifiers import exec_classif diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index 868891c9c4e8a72a82d6a0309b3f3337df08e6aa..79a8dc98bf0734ddf0f271fca729926ee3099eb1 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -127,15 +127,17 @@ def init_argument_dictionaries(benchmark, views_dictionary, def init_multiview_exps(classifier_names, views_dictionary, nb_class, kwargs_init): multiview_arguments = [] for classifier_name in classifier_names: - if multiple_args(classifier_name, kwargs_init): - multiview_arguments += gen_multiple_args_dictionnaries(nb_class, - kwargs_init, - classifier_name, - views_dictionary=views_dictionary, - framework="multiview") + if multiple_args(get_path_dict(kwargs_init[classifier_name])): + multiview_arguments += gen_multiple_args_dictionnaries( + nb_class, + kwargs_init, + classifier_name, + views_dictionary=views_dictionary, + framework="multiview") else: + arguments = get_path_dict(kwargs_init[classifier_name]) multiview_arguments += [gen_single_multiview_arg_dictionary(classifier_name, - kwargs_init, + arguments, nb_class, views_dictionary=views_dictionary)] return multiview_arguments @@ -169,7 +171,7 @@ def init_monoview_exps(classifier_names, monoview_arguments = [] for view_name, view_index in views_dictionary.items(): for classifier in classifier_names: - if multiple_args(classifier, kwargs_init): + if multiple_args(kwargs_init[classifier]): monoview_arguments += gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, @@ -202,20 +204,69 @@ def gen_single_multiview_arg_dictionary(classifier_name,arguments,nb_class, 'view_indices': list(views_dictionary.values()), "nb_class": nb_class, "labels_names": None, - classifier_name: dict((key, value[0]) for key, value in arguments[ - classifier_name].items()) + classifier_name: extract_dict(arguments) } -def multiple_args(classifier, kwargs_init): +def extract_dict(classifier_config): + """Reverse function of get_path_dict""" + extracted_dict = {} + for key, value in classifier_config.items(): + if isinstance(value, list): + extracted_dict = set_element(extracted_dict, key, value[0]) + else: + extracted_dict = set_element(extracted_dict, key, value) + return extracted_dict + + +def set_element(dictionary, path, value): + """Set value in dictionary at the location indicated by path""" + existing_keys = path.split(".")[:-1] + dict_state = dictionary + for existing_key in existing_keys: + if existing_key in dict_state: + dict_state = dict_state[existing_key] + else: + dict_state[existing_key] = {} + dict_state = dict_state[existing_key] + dict_state[path.split(".")[-1]] = value + return dictionary + + +def multiple_args(classifier_configuration): + """Checks if multiple values were provided for at least one arg""" listed_args = [type(value) == list and len(value)>1 for key, value in - kwargs_init[classifier].items()] + classifier_configuration.items()] if True in listed_args: return True else: return False +def get_path_dict(multiview_classifier_args): + """This function is used to generate a dictionary with each key being + the path to the value. + If given {"key1":{"key1_1":value1}, "key2":value2}, it will return + {"key1.key1_1":value1, "key2":value2}""" + path_dict = dict((key, value) for key, value in multiview_classifier_args.items()) + paths = is_dict_in(path_dict) + while paths: + for path in paths: + for key, value in path_dict[path].items(): + path_dict[".".join([path, key])] = value + path_dict.pop(path) + paths = is_dict_in(path_dict) + return path_dict + + +def is_dict_in(dictionary): + paths = [] + for key, value in dictionary.items(): + if isinstance(value, dict): + paths.append(key) + return paths + + def gen_multiple_kwargs_combinations(cl_kwrags): values = list(cl_kwrags.values()) listed_values = [[_] if type(_) is not list else _ for _ in values] @@ -235,10 +286,14 @@ def gen_multiple_kwargs_combinations(cl_kwrags): def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, - view_name=None, view_index=None, views_indices=None, + view_name=None, view_index=None, + views_dictionary=None, framework="monoview"): - multiple_kwargs_list, reduced_multiple_kwargs_list = \ - gen_multiple_kwargs_combinations(kwargs_init[classifier]) + if framework=="multiview": + classifier_config = get_path_dict(kwargs_init[classifier]) + else: + classifier_config = kwargs_init[classifier] + multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations(classifier_config) multiple_kwargs_dict = dict( (classifier+"_"+"_".join(map(str,list(reduced_dictionary.values()))), dictionary) for reduced_dictionary, dictionary in zip(reduced_multiple_kwargs_list, multiple_kwargs_list )) @@ -249,14 +304,15 @@ def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier, view_name=view_name) if framework=="monoview" else gen_single_multiview_arg_dictionary(classifier_name, - arguments, - nb_class, - views_indices=views_indices) - for classifier_name, arguments in multiple_kwargs_dict.items()] + arguments, + nb_class, + views_dictionary=views_dictionary) + for classifier_name, arguments + in multiple_kwargs_dict.items()] return args_dictionnaries -def init_monoview_kwargs(args, classifiers_names): +def init_kwargs(args, classifiers_names): r"""Used to init kwargs thanks to a function in each monoview classifier package. Parameters @@ -290,8 +346,8 @@ def init_monoview_kwargs(args, classifiers_names): def init_kwargs_func(args, benchmark): - monoview_kwargs = init_monoview_kwargs(args, benchmark["monoview"]) - multiview_kwargs = init_multiview_kwargs(args, benchmark["multiview"]) + monoview_kwargs = init_kwargs(args, benchmark["monoview"]) + multiview_kwargs = init_kwargs(args, benchmark["multiview"]) kwargs = {"monoview":monoview_kwargs, "multiview":multiview_kwargs} return kwargs diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 8b096d473330db994c51edf65f6838777fb8f5a6..6d9ebf6f5551f12a9262eada53191124c4ea3004 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -27,25 +27,26 @@ __status__ = "Prototype" # Production, Development, Prototype # __date__ = 2016 - 03 - 25 -def exec_monoview_multicore(directory, name, labels_names, classificationIndices, - KFolds, datasetFileIndex, databaseType, +def exec_monoview_multicore(directory, name, labels_names, classification_indices, + k_folds, dataset_file_index, database_type, path, random_state, labels, - hyper_param_search="randomizedSearch", - metrics=[["accuracy_score", None]], nIter=30, + hyper_param_search="randomized_search", + metrics=[["accuracy_score", None]], n_iter=30, **args): - DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r") + dataset_var = h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r") neededViewIndex = args["viewIndex"] - X = DATASET.get("View" + str(neededViewIndex)) + X = dataset_var.get("View" + str(neededViewIndex)) Y = labels - return ExecMonoview(directory, X, Y, name, labels_names, - classificationIndices, KFolds, 1, databaseType, path, - random_state, hyper_param_search=hyper_param_search, - metrics=metrics, nIter=nIter, **args) + return exec_monoview(directory, X, Y, name, labels_names, + classification_indices, k_folds, 1, database_type, path, + random_state, hyper_param_search=hyper_param_search, + + metrics=metrics, n_iter=n_iter, **args) def exec_monoview(directory, X, Y, name, labels_names, classificationIndices, KFolds, nbCores, databaseType, path, - randomState, hyper_param_search="randomizedSearch", + randomState, hyper_param_search="randomized_search", metrics=[["accuracy_score", None]], nIter=30, **args): logging.debug("Start:\t Loading data") kwargs, \ @@ -67,7 +68,7 @@ def exec_monoview(directory, X, Y, name, labels_names, classificationIndices, + str(nbCores) + ", algorithm : " + CL_type) logging.debug("Start:\t Determine Train/Test split") - X_train, y_train, X_test, y_test, X_test_multiclass = initTrainTest(X, Y, + X_train, y_train, X_test, y_test, X_test_multiclass = init_train_test(X, Y, classificationIndices) logging.debug("Info:\t Shape X_train:" + str( @@ -79,7 +80,7 @@ def exec_monoview(directory, X, Y, name, labels_names, classificationIndices, logging.debug("Start:\t Generate classifier args") classifierModule = getattr(monoview_classifiers, CL_type) classifier_class_name = classifierModule.classifier_class_name - clKWARGS, testFoldsPreds = getHPs(classifierModule, hyper_param_search, + clKWARGS, testFoldsPreds = getHPs(classifierModule, hyper_parameter_search, nIter, CL_type, classifier_class_name, X_train, y_train, randomState, outputFileName, @@ -114,7 +115,7 @@ def exec_monoview(directory, X, Y, name, labels_names, classificationIndices, stringAnalysis, \ imagesAnalysis, \ metricsScores = execute(name, classificationIndices, KFolds, nbCores, - hyper_param_search, metrics, nIter, feat, CL_type, + hyper_parameter_search, metrics, nIter, feat, CL_type, clKWARGS, labels_names, X.shape, y_train, y_train_pred, y_test, y_test_pred, t_end, randomState, classifier, outputFileName) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py index 3b2952e4481e0ca2bf1a4510751c742f7ef2699e..e079dae7b54fda07d3e036bf21d0d762f6f47d30 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py @@ -84,12 +84,6 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): np.array([self.train_time, self.pred_time]), delimiter=',') return interpretString -# -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {'n_estimators': args.Ada_n_est, -# 'base_estimator': [DecisionTreeClassifier(max_depth=1)]} -# return kwargsDict def paramsToSet(nIter, random_state): diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/additions/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview/additions/__init__.py index ded01232c360476be91c1eeba56bcb76af045be6..8b137891791fe96927ad78e64b0aad7bded08bdc 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/additions/__init__.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/additions/__init__.py @@ -1 +1 @@ -from . import diversity_utils + diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/additions/diversity_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/additions/diversity_utils.py index 3fdbacd1d66f56d26fed107f8d3043a23f106599..e4919f20b48c08205b7a5aa2dec1890a1c5a3424 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/additions/diversity_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/additions/diversity_utils.py @@ -16,9 +16,9 @@ def getClassifiersDecisions(allClassifersNames, views_indices, resultsMonoview): And the classifiers_names variable is ordered as : classifiers_names[viewIndex][classifierIndex] """ - nbViews = len(viewsIndices) + nbViews = len(views_indices) nbClassifiers = len(allClassifersNames) - classifiersNames = [[] for _ in viewsIndices] + classifiersNames = [[] for _ in views_indices] more_than_one_fold = len(resultsMonoview[0].test_folds_preds.shape) is not 1 if more_than_one_fold: nbFolds = resultsMonoview[0].test_folds_preds.shape[0] @@ -31,16 +31,16 @@ def getClassifiersDecisions(allClassifersNames, views_indices, resultsMonoview): for resultMonoview in resultsMonoview: if resultMonoview.classifier_name in classifiersNames[ - viewsIndices.index(resultMonoview.view_index)]: + views_indices.index(resultMonoview.view_index)]: pass else: classifiersNames[ - viewsIndices.index(resultMonoview.view_index)].append( + views_indices.index(resultMonoview.view_index)].append( resultMonoview.classifier_name) classifierIndex = classifiersNames[ - viewsIndices.index(resultMonoview.view_index)].index( + views_indices.index(resultMonoview.view_index)].index( resultMonoview.classifier_name) - classifiersDecisions[viewsIndices.index( + classifiersDecisions[views_indices.index( resultMonoview.view_index), classifierIndex] = resultMonoview.test_folds_preds # else: # train_len = resultsMonoview[0].test_folds_preds.shape[0] diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 76c890561fa44a2e05ea7385f1a93cc1276f4a72..08288d55a6eb2e29faa0d2273c23e94636d21623 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -243,9 +243,9 @@ if __name__ == "__main__": directory = args.directory name = args.name labels_dictionary = args.labels_dictionary - classificationIndices = args.classificationIndices + classification_indices = args.classification_indices k_folds = args.k_folds - nbCores = args.nbCores + nb_cores = args.nb_cores databaseType = None path = args.path_f random_state = args.random_state diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index 7e6baf50b0f90bd460a0cb7e6b4ec2890c6bb3e1..3370cd9938477baac14c645812a26d56045c9ae1 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -2,6 +2,7 @@ from sklearn.base import BaseEstimator, ClassifierMixin import numpy as np from .. import multiview_classifiers +from .. import monoview_classifiers @@ -83,12 +84,46 @@ class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): return "No detailed interpretation function" - - -def get_train_views_indices(dataset, train_indices, view_indices,): +def get_examples_views_indices(dataset, examples_indices, view_indices, ): """This function is used to get all the examples indices and view indices if needed""" if view_indices is None: view_indices = np.arange(dataset["Metadata"].attrs["nbView"]) - if train_indices is None: - train_indices = range(dataset["Metadata"].attrs["datasetLength"]) - return train_indices, view_indices \ No newline at end of file + if examples_indices is None: + examples_indices = range(dataset["Metadata"].attrs["datasetLength"]) + return examples_indices, view_indices + + +class ConfigGenerator(): + + def __init__(self, classifier_names): + self.distribs = {} + for classifier_name in classifier_names: + classifier_class = get_monoview_classifier(classifier_name) + self.distribs[classifier_name] = dict((param_name, param_distrib) + for param_name, param_distrib in + zip(classifier_class().param_names, + classifier_class().distribs)) + + def rvs(self, random_state=None): + config_sample = {} + for classifier_name, classifier_config in self.distribs.items(): + config_sample[classifier_name] = {} + for param_name, param_distrib in classifier_config.items(): + if hasattr(param_distrib, "rvs"): + config_sample[classifier_name][param_name]=param_distrib.rvs(random_state=random_state) + else: + config_sample[classifier_name][ + param_name] = param_distrib[random_state.randint(len(param_distrib))] + return config_sample + + +def get_available_monoview_classifiers(): + classifiers_names = [module_name + for module_name in dir(monoview_classifiers) + if not module_name.startswith("__")] + return classifiers_names + +def get_monoview_classifier(classifier_name): + classifier_module = getattr(monoview_classifiers, classifier_name) + classifier_class = getattr(classifier_module, classifier_module.classifier_class_name) + return classifier_class diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a72d82aa1370d82c72234e5f7c694872b283f882 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py @@ -0,0 +1,194 @@ +import itertools +import math +import inspect +import os + +import numpy as np + +from ...multiview.multiview_utils import ConfigGenerator, \ + get_monoview_classifier, get_examples_views_indices, \ + get_available_monoview_classifiers, BaseMultiviewClassifier + + +class DiversityFusion(BaseMultiviewClassifier): + """This is the base class for all the diversity fusion based classifiers.""" + + def __init__(self, random_state=None, classifier_names=None, + monoview_estimators=None, classifiers_configs=None): + """Used to init the instances""" + super(DiversityFusion, self).__init__(random_state) + if classifier_names is None: + classifier_names = get_available_monoview_classifiers() + self.classifier_names = classifier_names + self.param_names = ["classifiers_configs"] + self.distribs = [ConfigGenerator(get_available_monoview_classifiers())] + self.estimator_pool = monoview_estimators + self.classifiers_configs = classifiers_configs + + def fit(self, X, y, train_indices=None, views_indices=None): + train_indices, views_indices = get_examples_views_indices(X, + train_indices, + views_indices) + if self.estimator_pool is None: + self.estimator_pool = [] + for classifier_idx, classifier_name in enumerate(self.classifier_names): + self.estimator_pool.append([]) + if self.classifiers_configs is not None and classifier_name in self.classifiers_configs: + if 'random_state' in inspect.getfullargspec(get_monoview_classifier(classifier_name).__init__).args: + estimator = get_monoview_classifier(classifier_name)(random_state=self.random_state, + **self.classifiers_configs[classifier_name]) + else: + estimator = get_monoview_classifier(classifier_name)( + **self.classifiers_configs[classifier_name]) + else: + if 'random_state' in inspect.getfullargspec(get_monoview_classifier(classifier_name).__init__).args: + estimator = get_monoview_classifier(classifier_name)(random_state=self.random_state) + else: + estimator = get_monoview_classifier(classifier_name)() + for idx, view_idx in enumerate(views_indices): + estimator.fit(X.get_v(view_idx, train_indices), y[train_indices]) + self.estimator_pool[classifier_idx].append(estimator) + else: + pass #Todo + self.monoview_estimators = self.choose_combination(X, y, train_indices, views_indices) + return self + + def predict(self, X, example_indices=None, views_indices=None): + """Just a weighted majority vote""" + example_indices, views_indices = get_examples_views_indices(X, + example_indices, + views_indices) + nb_class = X.get_nb_class(example_indices) + votes = np.zeros((len(example_indices), nb_class), dtype=float) + monoview_predictions = [monoview_estimator.predict(X.get_v(view_idx, example_indices)) + for view_idx, monoview_estimator + in zip(views_indices, self.monoview_estimators)] + for idx, example_index in enumerate(example_indices): + for monoview_estimator_index, monoview_prediciton in enumerate(monoview_predictions): + votes[idx, monoview_prediciton[ + example_index]] += 1 + predicted_labels = np.argmax(votes, axis=1) + return predicted_labels + + def get_classifiers_decisions(self, X, view_indices, examples_indices): + classifiers_decisions = np.zeros((len(self.estimator_pool), + len(view_indices), + len(examples_indices))) + for estimator_idx, estimator in enumerate(self.estimator_pool): + for idx, view_index in enumerate(view_indices): + classifiers_decisions[estimator_idx, idx, :] = estimator[ + idx].predict(X.get_v(view_index, examples_indices)) + return classifiers_decisions + + def init_combinations(self, X, example_indices, view_indices): + classifiers_decisions = self.get_classifiers_decisions(X, view_indices, + example_indices) + nb_classifiers, nb_views, n_examples = classifiers_decisions.shape + combinations = itertools.combinations_with_replacement( + range(nb_classifiers), + nb_views) + nb_combinations = int( + math.factorial(nb_classifiers + nb_views - 1) / math.factorial( + nb_views) / math.factorial( + nb_classifiers - 1)) + div_measure = np.zeros(nb_combinations) + combis = np.zeros((nb_combinations, nb_views), dtype=int) + return combinations, combis, div_measure, classifiers_decisions, nb_views + + +class GlobalDiversityFusion(DiversityFusion): + + def choose_combination(self, X, y, examples_indices, view_indices): + combinations, combis, div_measure, classifiers_decisions, nb_views = self.init_combinations( + X, examples_indices, view_indices) + for combinationsIndex, combination in enumerate(combinations): + combis[combinationsIndex] = combination + div_measure[combinationsIndex] = self.diversity_score( + classifiers_decisions, + combination, + y[examples_indices]) + best_combi_index = np.argmax(div_measure) + best_combination = combis[best_combi_index] + self.monoview_estimators = [self.estimator_pool[classifier_index][view_index] + for view_index, classifier_index + in enumerate(best_combination)] + + +class CoupleDiversityFusion(DiversityFusion): + + def choose_combination(self, X, y, examples_indices, view_indices): + combinations, combis, div_measure, classifiers_decisions, nb_views = self.init_combinations( + X, examples_indices, view_indices) + for combinations_index, combination in enumerate(combinations): + combis[combinations_index] = combination + combi_with_view = [(viewIndex, combiIndex) for viewIndex, combiIndex + in + enumerate(combination)] + binomes = itertools.combinations(combi_with_view, 2) + nb_binomes = int( + math.factorial(nb_views) / 2 / math.factorial(nb_views - 2)) + couple_diversities = np.zeros(nb_binomes) + for binome_index, binome in enumerate(binomes): + (view_index_1, classifier_index_1), ( + view_index_2, classifier_index_2) = binome + couple_diversity = np.mean( + self.diversity_score( + classifiers_decisions[view_index_1, classifier_index_1], + classifiers_decisions[view_index_2, classifier_index_2], + y[examples_indices]) + ) + couple_diversities[binome_index] = couple_diversity + div_measure[combinations_index] = np.mean(couple_diversities) + best_combi_index = np.argmax(div_measure) + best_combination = combis[best_combi_index] + self.monoview_estimators = [self.estimator_pool[classifier_index][view_index] + for view_index, classifier_index + in enumerate(best_combination)] + + + +# +# def CQ_div_measure(classifiersNames, classifiersDecisions, measurement, +# foldsGroudTruth): +# """ +# This function is used to measure a pseudo-CQ measurement based on the minCq algorithm. +# It's a mix between couple_div_measure and global_div_measure that uses multiple measurements. +# """ +# nbViews, nbClassifiers, nbFolds, foldsLen = classifiersDecisions.shape +# combinations = itertools.combinations_with_replacement(range(nbClassifiers), +# nbViews) +# nbCombinations = int( +# math.factorial(nbClassifiers + nbViews - 1) / math.factorial( +# nbViews) / math.factorial(nbClassifiers - 1)) +# div_measure = np.zeros(nbCombinations) +# combis = np.zeros((nbCombinations, nbViews), dtype=int) +# +# for combinationsIndex, combination in enumerate(combinations): +# combis[combinationsIndex] = combination +# combiWithView = [(viewIndex, combiIndex) for viewIndex, combiIndex in +# enumerate(combination)] +# binomes = itertools.combinations(combiWithView, 2) +# nbBinomes = int( +# math.factorial(nbViews) / 2 / math.factorial(nbViews - 2)) +# disagreement = np.zeros(nbBinomes) +# div_measure[combinationsIndex] = measurement[1](classifiersDecisions, +# combination, +# foldsGroudTruth, +# foldsLen) +# for binomeIndex, binome in enumerate(binomes): +# (viewIndex1, classifierIndex1), ( +# viewIndex2, classifierIndex2) = binome +# nbDisagree = np.sum(measurement[0]( +# classifiersDecisions[viewIndex1, classifierIndex1], +# classifiersDecisions[viewIndex2, classifierIndex2], +# foldsGroudTruth) +# , axis=1) / float(foldsLen) +# disagreement[binomeIndex] = np.mean(nbDisagree) +# div_measure[combinationsIndex] /= float(np.mean(disagreement)) +# bestCombiIndex = np.argmin(div_measure) +# bestCombination = combis[bestCombiIndex] +# +# return [classifiersNames[viewIndex][index] for viewIndex, index in +# enumerate(bestCombination)], div_measure[ +# bestCombiIndex] +# diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py index 8dbec755f0c66bf5549a4b94c81ff93ead587628..6d3c8a7b493dfdf2943e92e594e6f196c51449df 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/utils.py @@ -10,6 +10,7 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_is_fitted from ... import metrics +from ... import monoview_classifiers def get_names(classed_list): @@ -72,3 +73,6 @@ def get_train_views_indices(dataset, train_indices, view_indices,): if train_indices is None: train_indices = range(dataset["Metadata"].attrs["datasetLength"]) return train_indices, view_indices + + + diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..4e45437746be19489f1552c67f173a73f6d45b53 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion.py @@ -0,0 +1,34 @@ +import numpy as np + +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.additions.diversity_utils import GlobalDiversityFusion + + +classifier_class_name = "DifficultyFusion" + + +class DifficultyFusion(GlobalDiversityFusion): + + def diversity_measure(self, classifiers_decisions, combination, y): + + _, nb_view, nb_examples = classifiers_decisions.shape + scores = np.zeros((nb_view, nb_examples), dtype=int) + for view_index, classifier_index in enumerate(combination): + scores[view_index] = np.logical_not( + np.logical_xor(classifiers_decisions[classifier_index, + view_index], + y) + ) + # difficulty_scores = np.sum(scores, axis=0) + # TODO : Check computing method + difficulty_score = np.mean( + np.var( + np.array([ + np.sum((scores==view_index), axis=1)/float(nb_view) + for view_index in range(len(combination)+1)]) + , axis=0) + ) + return difficulty_score + + + + diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/__init__.py deleted file mode 100644 index 5c0d0a9eb5152a3f5c88ac99fd0fc83b1982873f..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/difficulty_fusion/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import difficulty_fusion, analyze_results \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..445e4f8532e1557cd4aa9f16efc49f923c9b5342 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion.py @@ -0,0 +1,12 @@ +import numpy as np + +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.additions.diversity_utils import CoupleDiversityFusion + + +classifier_class_name = "DisagreeFusion" + + +class DisagreeFusion(CoupleDiversityFusion): + + def diversity_measure(self, first_classifier_decision, second_classifier_decision, _): + return np.logical_xor(first_classifier_decision, second_classifier_decision) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/__init__.py deleted file mode 100644 index b8459c144e15702c25f6e01a4418aabed408b6a3..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/disagree_fusion/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import disagree_fusion,analyze_results \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..15ad8b6ae073f999642d5e987ea6aff57c9327c6 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion.py @@ -0,0 +1,14 @@ +import numpy as np + +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.additions.diversity_utils import \ + CoupleDiversityFusion + +classifier_class_name = "DoubleFaultFusion" + + +class DoubleFaultFusion(CoupleDiversityFusion): + + def diversity_measure(self, first_classifier_decision, + second_classifier_decision, y): + return np.logical_and(np.logical_xor(first_classifier_decision, y), + np.logical_xor(second_classifier_decision, y)) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/__init__.py deleted file mode 100644 index bb9fd7705bad32bf53c4e5588f49d0e3846065b9..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/double_fault_fusion/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import analyze_results, double_fault_fusion \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..54986816d569bc9ec5761dafe3dced75467fa766 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion.py @@ -0,0 +1,24 @@ +import numpy as np + +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.additions.diversity_utils import GlobalDiversityFusion + + +classifier_class_name = "EntropyFusion" + + +class EntropyFusion(GlobalDiversityFusion): + + def diversity_measure(self, classifiers_decisions, combination, y): + _, nb_view, nb_examples = classifiers_decisions.shape + scores = np.zeros((nb_view, nb_examples), dtype=int) + for view_index, classifier_index in enumerate(combination): + scores[view_index] = np.logical_not( + np.logical_xor(classifiers_decisions[classifier_index, view_index], + y) + ) + entropy_scores = np.sum(scores, axis=0) + nb_view_matrix = np.zeros((nb_examples), + dtype=int) + nb_view - entropy_scores + entropy_score = np.mean(np.minimum(entropy_scores, nb_view_matrix).astype(float) / ( + nb_view - int(nb_view / 2))) + return entropy_score diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/__init__.py deleted file mode 100644 index f111304ec1a97d3f03b21250b681f0da2c1c7be3..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/entropy_fusion/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import analyze_results, entropy_fusion \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py index 1b83dae3ee401fe1e618f1dd481f37fbdfe0c90c..fbe0276ab205b08b1215e76c3580aa056ad5d928 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py @@ -1,6 +1,7 @@ -from ...multiview.additions import diversity_utils -from ..difficulty_fusion.difficulty_fusion import difficulty -from ..double_fault_fusion.double_fault_fusion import doubleFault +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.additions import \ + diversity_utils +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion_old import difficulty +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion_old import doubleFault def genName(config): @@ -34,6 +35,11 @@ class PseudoCQFusionClass(diversity_utils.DiversityFusionClass): diversity_utils.DiversityFusionClass.__init__(self, randomState, NB_CORES=1, **kwargs) def getSpecificAnalysis(self, classificationKWARGS): +<<<<<<< HEAD stringAnalysis = "Classifiers used for each view : "+ ', '.join(self.classifiers_names)+\ ', with a pseudo CQ of '+str(self.div_measure) +======= + stringAnalysis = "Classifiers used for each view : " + ', '.join(self.classifiers_names) +\ + ', with a pseudo CQ of ' + str(self.div_measure) +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 return stringAnalysis \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py index c21ee488974c8663fd863cacc1fe2572f03d59f3..25bd9c3e95dd2fadd5f556c55bb1423aeb630aa4 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -1,8 +1,10 @@ import numpy as np -import pkgutil +import inspect from ..utils.dataset import get_v -from ..multiview.multiview_utils import BaseMultiviewClassifier, get_train_views_indices +from ..multiview.multiview_utils import BaseMultiviewClassifier, get_examples_views_indices, ConfigGenerator, get_available_monoview_classifiers + + from .. import monoview_classifiers classifier_class_name = "WeightedLinearEarlyFusion" @@ -11,35 +13,48 @@ classifier_class_name = "WeightedLinearEarlyFusion" class WeightedLinearEarlyFusion(BaseMultiviewClassifier): def __init__(self, random_state=None, view_weights=None, - monoview_classifier="decision_tree", + monoview_classifier_name="decision_tree", monoview_classifier_config={}): super(WeightedLinearEarlyFusion, self).__init__(random_state=random_state) self.view_weights = view_weights - if isinstance(monoview_classifier, str): - self.short_name = "early fusion "+monoview_classifier - monoview_classifier_module = getattr(monoview_classifiers, - monoview_classifier) - monoview_classifier_class = getattr(monoview_classifier_module, - monoview_classifier_module.classifier_class_name) - self.monoview_classifier = monoview_classifier_class(random_state=random_state, - **monoview_classifier_config) - else: - self.monoview_classifier = monoview_classifier - self.short_name = "early fusion "+self.monoview_classifier.__class__.__name__ - self.param_names = ["monoview_classifier","random_state"] - classifier_classes = [] - for name in dir(monoview_classifiers): - if not name.startswith("__"): - module = getattr(monoview_classifiers, name) - classifier_class = getattr(module, module.classifier_class_name)() - classifier_classes.append(classifier_class) - self.distribs = [classifier_classes, [self.random_state]] - self.classed_params = ["monoview_classifier"] - self.weird_strings={"monoview_classifier":["class_name", "config"]} + self.monoview_classifier_name = monoview_classifier_name + self.short_name = "early fusion " + monoview_classifier_name + if monoview_classifier_name in monoview_classifier_config: + self.monoview_classifier_config = monoview_classifier_config[monoview_classifier_name] + self.monoview_classifier_config = monoview_classifier_config + monoview_classifier_module = getattr(monoview_classifiers, + self.monoview_classifier_name) + monoview_classifier_class = getattr(monoview_classifier_module, + monoview_classifier_module.classifier_class_name) + self.monoview_classifier = monoview_classifier_class(random_state=random_state, + **self.monoview_classifier_config) + self.param_names = ["monoview_classifier_name", "monoview_classifier_config"] + self.distribs = [get_available_monoview_classifiers(), + ConfigGenerator(get_available_monoview_classifiers())] + self.classed_params = [] + self.weird_strings={} + + def set_params(self, monoview_classifier_name=None, monoview_classifier_config=None, **params): + self.monoview_classifier_name = monoview_classifier_name + monoview_classifier_module = getattr(monoview_classifiers, + self.monoview_classifier_name) + monoview_classifier_class = getattr(monoview_classifier_module, + monoview_classifier_module.classifier_class_name) + self.monoview_classifier = monoview_classifier_class() + self.set_monoview_classifier_config(monoview_classifier_name, + monoview_classifier_config) + return self + + def get_params(self, deep=True): + return {"random_state":self.random_state, + "view_weights":self.view_weights, + "monoview_classifier_name":self.monoview_classifier_name, + "monoview_classifier_config":self.monoview_classifier_config} def fit(self, X, y, train_indices=None, view_indices=None): train_indices, X = self.transform_data_to_monoview(X, train_indices, view_indices) self.monoview_classifier.fit(X, y[train_indices]) + return self def predict(self, X, predict_indices=None, view_indices=None): _, X = self.transform_data_to_monoview(X, predict_indices, view_indices) @@ -49,7 +64,7 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): def transform_data_to_monoview(self, dataset, example_indices, view_indices): """Here, we extract the data from the HDF5 dataset file and store all the concatenated views in one variable""" - example_indices, self.view_indices = get_train_views_indices(dataset, + example_indices, self.view_indices = get_examples_views_indices(dataset, example_indices, view_indices) if self.view_weights is None or self.view_weights=="None": @@ -70,6 +85,12 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): , axis=1) return monoview_data + def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): + if monoview_classifier_name in monoview_classifier_config: + self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name]) + else: + self.monoview_classifier.set_params(**monoview_classifier_config) + diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index 6cc3f41b3ada63a9c1403a9f4d9af4e96c9497a0..6dcbb4364d9861bbb2661405df6a8079a3dc1e10 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -1015,6 +1015,7 @@ def get_results(results, stats_iter, nb_multiclass, benchmark_argument_dictionar multiclass_labels, metrics, classification_indices, directories, directory, labels_dictionary, nb_examples, nb_labels): + """Used to analyze the results of the previous benchmarks""" data_base_name = benchmark_argument_dictionaries[0]["args"]["Base"]["name"] results_means_std, biclass_results = analyze_biclass(results, benchmark_argument_dictionaries, diff --git a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py index 7046491aff1547d7a0dc65e0b6316fd902791ea7..5db045d77f506425adfae0ed0ea03d3a8de841e9 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/configuration.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/configuration.py @@ -1,4 +1,3 @@ -import configparser import builtins from distutils.util import strtobool as tobool import yaml @@ -14,34 +13,3 @@ def get_the_args(path_to_config_file="../config_files/config.yml"): with open(path_to_config_file, 'r') as stream: yaml_config = yaml.safe_load(stream) return yaml_config - - # config_parser = configparser.ConfigParser(comment_prefixes=('#')) - # config_parser.read(path_to_config_file) - # config_dict = {} - # for section in config_parser: - # config_dict[section] = {} - # for key in config_parser[section]: - # value = format_raw_arg(config_parser[section][key]) - # config_dict[section][key] = value - # return config_dict - - -def format_raw_arg(raw_arg): - """This function is used to convert the raw arg in a types value. - For example, 'list_int ; 10 20' will be formatted in [10,20]""" - function_name, raw_value = raw_arg.split(" ; ") - if function_name.startswith("list"): - function_name = function_name.split("_")[1] - raw_values = raw_value.split(" ") - value = [getattr(builtins, function_name)(raw_value) - if function_name != "bool" else bool(tobool(raw_value)) - for raw_value in raw_values] - else: - if raw_value == "None": - value = None - else: - if function_name=="bool": - value = bool(tobool(raw_value)) - else: - value = getattr(builtins, function_name)(raw_value) - return value diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py index 82467346c53d9661c0aa13fe4e53be8ab6b93deb..c889c251b82f904655f90d7565fd84dc805dc74f 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py @@ -10,6 +10,45 @@ from scipy import sparse from . import get_multiview_db as DB +class Dataset(): + + def __init__(self, dataset): + self.dataset = dataset + + def init_example_indces(self, example_indices=None): + if example_indices is None: + return range(self.dataset.get("Metadata").attrs["datasetLength"]) + else: + return example_indices + + def get_v(self, view_index, example_indices=None): + example_indices = self.init_example_indces(example_indices) + if type(example_indices) is int: + return self.dataset.get("View" + str(view_index))[example_indices, :] + else: + example_indices = np.array(example_indices) + sorted_indices = np.argsort(example_indices) + example_indices = example_indices[sorted_indices] + + if not self.dataset.get("View" + str(view_index)).attrs["sparse"]: + return self.dataset.get("View" + str(view_index))[example_indices, :][ + np.argsort(sorted_indices), :] + else: + sparse_mat = sparse.csr_matrix( + (self.dataset.get("View" + str(view_index)).get("data").value, + self.dataset.get("View" + str(view_index)).get("indices").value, + self.dataset.get("View" + str(view_index)).get("indptr").value), + shape=self.dataset.get("View" + str(view_index)).attrs["shape"])[ + example_indices, :][ + np.argsort(sorted_indices), :] + + return sparse_mat + + def get_nb_class(self, example_indices=None): + example_indices = self.init_example_indces(example_indices) + return len(np.unique(self.dataset.get("Labels").value[example_indices])) + + def get_v(dataset, view_index, used_indices=None): """Used to extract a view as a numpy array or a sparse mat from the HDF5 dataset""" if used_indices is None: diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index b9d65979620c01bc91b6f89f4fc25f6f9b283cde..7850b6a00dd1615dd3d5ee7ff5b76dcf076b7f1c 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -24,6 +24,7 @@ def parse_the_args(arguments): help='Path to the hdf5 dataset or database ' 'folder (default: %(default)s)', default='../config_files/config.yml') +<<<<<<< HEAD # groupStandard.add_argument('-log', action='store_true', # help='Use option to activate logging to console') # groupStandard.add_argument('--name', metavar='STRING', nargs='+', action='store', @@ -695,6 +696,8 @@ def parse_the_args(arguments): # type=float, # default=[]) +======= +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 args = parser.parse_args(arguments) return args @@ -779,10 +782,10 @@ def get_database_function(name, type_var): getDatabase : function The function that will be used to extract the database """ - if name not in ["Fake", "Plausible"]: - get_database = getattr(DB, "getClassicDB" + type_var[1:]) + if name not in ["fake", "plausible"]: + get_database = getattr(DB, "get_classic_db_" + type_var[1:]) else: - get_database = getattr(DB, "get" + name + "DB" + type_var[1:]) + get_database = getattr(DB, "get_" + name + "_db_" + type_var[1:]) return get_database diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 2401ab64dc0ce35eec1fd57911a1a92e714b2a02..d91d1b51d5d7ef23dd2a66c656013052ee50068f 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -90,8 +90,13 @@ def makeMeNoisy(viewData, random_state, percentage=5): return noisyViewData +<<<<<<< HEAD def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", random_state=None, full=True, add_noise=False, +======= +def get_plausible_db_hdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", + randomState=None, full=True, add_noise=False, +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 noise_std=0.15, nbView=3, nbClass=2, datasetLength=100, randomStateInt=42, nbFeatures = 10): """Used to generate a plausible dataset to test the algorithms""" @@ -380,7 +385,7 @@ def copyhdf5_dataset(source_data_file, destination_data_file, source_dataset_nam new_d_set.attrs[key] = value -def get_classicDBhdf5(views, path_f, name_DB, nb_class, asked_labels_names, +def get_classic_db_hdf5(views, path_f, name_DB, nb_class, asked_labels_names, random_state, full=False, add_noise=False, noise_std=0.15,): """Used to load a hdf5 database""" if full: @@ -467,8 +472,13 @@ def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, return noisy_dataset, dataset_name + "_noised" +<<<<<<< HEAD def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, random_state, full=False, add_noise=False, noise_std=0.15, +======= +def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, + randomState, full=False, add_noise=False, noise_std=0.15, +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 delimiter=","): # TODO : Update this one labels_names = np.genfromtxt(pathF + nameDB + "-labels-names.csv", @@ -497,7 +507,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, metaDataGrp.attrs["nbClass"] = len(labels_names) metaDataGrp.attrs["datasetLength"] = len(labels) datasetFile.close() - datasetFile, labelsDictionary, dataset_name = getClassicDBhdf5(views, pathF, nameDB, + datasetFile, labelsDictionary, dataset_name = get_classic_db_hdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, random_state, full) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 4398e6123edec53f42669487becb39c4c43f99bb..dee2cd7d8dbab3a000cad4216e8e67156d3110af 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -108,6 +108,7 @@ def randomized_search_x(X, y, framework, random_state, output_file_name, classif min_list = np.array( [min(nb_possible_combination, n_iter) for nb_possible_combination in nb_possible_combinations]) +<<<<<<< HEAD random_search = MultiviewCompatibleRandomizedSearchCV( estimator, n_iter=int(np.sum(min_list)), @@ -135,6 +136,32 @@ def randomized_search_x(X, y, framework, random_state, output_file_name, classif test_folds_preds = get_test_folds_preds(X, y, folds, best_estimator, framework, learning_indices) return best_params, test_folds_preds +======= + random_search = MultiviewCompatibleRandomizedSearchCV(estimator, + n_iter=int(np.sum(min_list)), + param_distributions=params_dict, + refit=True, + n_jobs=nb_cores, scoring=scorer, + cv=folds, random_state=random_state, + learning_indices=learning_indices, + view_indices=view_indices, + framework = framework) + random_search.fit(X, y) + best_params = random_search.best_params_ + if "random_state" in best_params: + best_params.pop("random_state") + + scoresArray = random_search.cv_results_['mean_test_score'] + params = [(key[6:], value ) for key, value in random_search.cv_results_.items() if key.startswith("param_")] + # gen_heat_maps(params, scores_array, output_file_name) + best_estimator = random_search.best_estimator_ + else: + best_estimator = estimator + best_params = {} + testFoldsPreds = get_test_folds_preds(X, y, folds, best_estimator, + framework, learning_indices) + return best_params, testFoldsPreds +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 from sklearn.base import clone @@ -194,6 +221,7 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): self.best_score_ = cross_validation_score if self.refit: self.best_estimator_ = clone(base_estimator).set_params(**self.best_params_) + self.best_estimator_.fit(X, y, **fit_params) self.n_splits_ = n_splits return self diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 91bd39d6c589804e5b6658d850e27071380d46ea..acb10c4f4432c667fb08873f7671483808f52ecb 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -4,6 +4,8 @@ import unittest import h5py import numpy as np +from .utils import rm_tmp + from ..mono_multi_view_classifiers import exec_classif @@ -25,6 +27,7 @@ class Test_initKWARGS(unittest.TestCase): class Test_InitArgumentDictionaries(unittest.TestCase): @classmethod def setUpClass(cls): + rm_tmp() cls.benchmark = {"monoview": ["fake_monoview_classifier"], "multiview": {}} cls.views_dictionnary = {'test_view_0': 0, 'test_view': 1} cls.nb_class = 2 @@ -85,16 +88,142 @@ class Test_InitArgumentDictionaries(unittest.TestCase): },] self.assertEqual(arguments["multiview"][0], expected_output[0]) + def test_init_argument_dictionaries_multiview_multiple(self): + self.multiview_classifier_arg_value = ["fake_value_2", "fake_arg_value_3"] + self.init_kwargs = { + 'monoview': { + self.monoview_classifier_name: + { + self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} + }, + "multiview": { + self.multiview_classifier_name: { + self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} + } + } + self.benchmark["multiview"] = ["fake_multiview_classifier"] + self.benchmark["monoview"] = {} + arguments = exec_classif.init_argument_dictionaries(self.benchmark, + self.views_dictionnary, + self.nb_class, + self.init_kwargs) + expected_output = [{ + "classifier_name": self.multiview_classifier_name+"_fake_value_2", + "view_indices": [0,1], + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + "labels_names":None, + self.multiview_classifier_name + "_fake_value_2": { + self.multiview_classifier_arg_name: + self.multiview_classifier_arg_value[0]}, + }, + { + "classifier_name": self.multiview_classifier_name+"_fake_arg_value_3", + "view_indices": [0, 1], + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + "labels_names": None, + self.multiview_classifier_name+"_fake_arg_value_3": { + self.multiview_classifier_arg_name: + self.multiview_classifier_arg_value[1]}, + } + ] + self.assertEqual(arguments["multiview"][0], expected_output[0]) + +<<<<<<< HEAD +def fakeBenchmarkExec(core_index=-1, a=7, args=1): + return [core_index, a] +======= +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 + + def test_init_argument_dictionaries_multiview_complex(self): + self.multiview_classifier_arg_value = {"fake_value_2":"plif", "plaf":"plouf"} + self.init_kwargs = { + 'monoview': { + self.monoview_classifier_name: + { + self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} + }, + "multiview": { + self.multiview_classifier_name: { + self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} + } + } + self.benchmark["multiview"] = ["fake_multiview_classifier"] + self.benchmark["monoview"] = {} + arguments = exec_classif.init_argument_dictionaries(self.benchmark, + self.views_dictionnary, + self.nb_class, + self.init_kwargs) + expected_output = [{ + "classifier_name": self.multiview_classifier_name, + "view_indices": [0,1], + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + "labels_names":None, + self.multiview_classifier_name: { + self.multiview_classifier_arg_name: + self.multiview_classifier_arg_value}, + }] + self.assertEqual(arguments["multiview"][0], expected_output[0]) + + def test_init_argument_dictionaries_multiview_multiple_complex(self): + self.multiview_classifier_arg_value = {"fake_value_2":["plif", "pluf"], "plaf":"plouf"} + self.init_kwargs = { + 'monoview': { + self.monoview_classifier_name: + { + self.monoview_classifier_arg_name: self.monoview_classifier_arg_value} + }, + "multiview": { + self.multiview_classifier_name: { + self.multiview_classifier_arg_name: self.multiview_classifier_arg_value} + } + } + self.benchmark["multiview"] = ["fake_multiview_classifier"] + self.benchmark["monoview"] = {} + arguments = exec_classif.init_argument_dictionaries(self.benchmark, + self.views_dictionnary, + self.nb_class, + self.init_kwargs) + expected_output = [{ + "classifier_name": self.multiview_classifier_name+"_plif_plouf", + "view_indices": [0,1], + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + "labels_names":None, + self.multiview_classifier_name + "_plif_plouf": { + self.multiview_classifier_arg_name: + {"fake_value_2": "plif", "plaf": "plouf"}}, + }, + { + "classifier_name": self.multiview_classifier_name+"_pluf_plouf", + "view_indices": [0, 1], + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + "labels_names": None, + self.multiview_classifier_name+"_pluf_plouf": { + self.multiview_classifier_arg_name: + {"fake_value_2":"pluf", "plaf":"plouf"}}, + } + ] + self.assertEqual(arguments["multiview"][0], expected_output[0]) + def fakeBenchmarkExec(core_index=-1, a=7, args=1): return [core_index, a] +<<<<<<< HEAD +def fakeBenchmarkExec_mutlicore(nb_cores=-1, a=6, args=1): + return [nb_cores, a] +======= +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 def fakeBenchmarkExec_mutlicore(nb_cores=-1, a=6, args=1): return [nb_cores, a] -def fakeBenchmarkExec_monocore(DATASET=1, a=4, args=1): +def fakeBenchmarkExec_monocore(dataset_var=1, a=4, args=1): return [a] @@ -113,6 +242,7 @@ class Test_execBenchmark(unittest.TestCase): @classmethod def setUpClass(cls): + rm_tmp() os.mkdir("multiview_platform/tests/tmp_tests") cls.Dataset = h5py.File( "multiview_platform/tests/tmp_tests/test_file.hdf5", "w") @@ -140,10 +270,17 @@ class Test_execBenchmark(unittest.TestCase): res = exec_classif.exec_benchmark(2, 1, 2, cls.argument_dictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, +<<<<<<< HEAD exec_one_benchmark=fakeBenchmarkExec, exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, get_results=fakegetResults, +======= + exec_one_benchmark=fakeBenchmarkExec, + exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, + exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, + get_results=fakegetResults, +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 delete=fakeDelete) cls.assertEqual(res, 3) @@ -155,10 +292,17 @@ class Test_execBenchmark(unittest.TestCase): res = exec_classif.exec_benchmark(2, 2, 2, cls.argument_dictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, +<<<<<<< HEAD exec_one_benchmark=fakeBenchmarkExec, exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, exec_one_benchmark_monoCore=fakeBenchmarkExec_monocore, get_results=fakegetResults, +======= + exec_one_benchmark=fakeBenchmarkExec, + exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, + exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, + get_results=fakegetResults, +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 delete=fakeDelete) cls.assertEqual(res, 3) @@ -166,10 +310,17 @@ class Test_execBenchmark(unittest.TestCase): res = exec_classif.exec_benchmark(2, 1, 1, cls.argument_dictionaries, [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, +<<<<<<< HEAD exec_one_benchmark=fakeBenchmarkExec, exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, exec_oneBenchmark_mono_core=fakeBenchmarkExec_monocore, get_results=fakegetResults, +======= + exec_one_benchmark=fakeBenchmarkExec, + exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, + exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, + get_results=fakegetResults, +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 delete=fakeDelete) cls.assertEqual(res, 3) @@ -182,15 +333,26 @@ class Test_execBenchmark(unittest.TestCase): os.rmdir(path) +<<<<<<< HEAD def fakeExecMono(directory, name, labels_names, classification_indices, k_folds, coreIndex, type, pathF, random_state, labels, +======= +def fakeExecMono(directory, name, labelsNames, classificationIndices, kFolds, + coreIndex, type, pathF, randomState, labels, +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 hyper_param_search="try", metrics="try", nIter=1, **arguments): return ["Mono", arguments] +<<<<<<< HEAD def fakeExecMulti(directory, coreIndex, name, classification_indices, k_folds, type, pathF, labels_dictionary, random_state, labels, hyper_param_search="", metrics=None, +======= +def fakeExecMulti(directory, coreIndex, name, classificationIndices, kFolds, + type, pathF, LABELS_DICTIONARY, + randomState, labels, hyper_param_search="", metrics=None, +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 nIter=1, **arguments): return ["Multi", arguments] @@ -216,6 +378,7 @@ class Test_execOneBenchmark(unittest.TestCase): @classmethod def setUp(cls): + rm_tmp() os.mkdir("multiview_platform/tests/tmp_tests") cls.args = { "Base": {"name": "chicken_is_heaven", "type": "type", @@ -224,6 +387,7 @@ class Test_execOneBenchmark(unittest.TestCase): def test_simple(cls): flag, results = exec_classif.exec_one_benchmark(core_index=10, +<<<<<<< HEAD labels_dictionary={ 0: "a", 1: "b"}, @@ -237,6 +401,21 @@ class Test_execOneBenchmark(unittest.TestCase): hyper_param_search="try", metrics="try", argument_dictionaries={ +======= + labels_dictionary={ + 0: "a", + 1: "b"}, + directory="multiview_platform/tests/tmp_tests/", + classification_indices=( + [1, 2, 3, 4], + [0, 5, 6, 7, 8]), + args=cls.args, + k_folds=FakeKfold(), + random_state="try", + hyper_param_search="try", + metrics="try", + argument_dictionaries={ +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 "Monoview": [ { "try": 0}, @@ -248,16 +427,26 @@ class Test_execOneBenchmark(unittest.TestCase): "try4": 10}]}, benchmark="try", views="try", +<<<<<<< HEAD views_indices="try", +======= + views_indices="try", +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 flag=None, labels=np.array( [0, 1, 2, 1, 2, 2, 2, 12, 1, 2, 1, 1, 2, 1, 21]), +<<<<<<< HEAD exec_monoview_multicore=fakeExecMono, exec_multiview_multicore=fakeExecMulti, init_multiview_arguments=fakeInitMulti) +======= + exec_monoview_multicore=fakeExecMono, + exec_multiview_multicore=fakeExecMulti, + init_multiview_arguments=fakeInitMulti) +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 cls.assertEqual(flag, None) cls.assertEqual(results , @@ -282,6 +471,7 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): @classmethod def setUpClass(cls): + rm_tmp() os.mkdir("multiview_platform/tests/tmp_tests") cls.args = { "Base": {"name": "chicken_is_heaven", "type": "type", @@ -290,7 +480,11 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): def test_simple(cls): flag, results = exec_classif.exec_one_benchmark_multicore( +<<<<<<< HEAD nbCores=2, +======= + nb_cores=2, +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 labels_dictionary={0: "a", 1: "b"}, directory="multiview_platform/tests/tmp_tests/", classification_indices=([1, 2, 3, 4], [0, 10, 20, 30, 40]), @@ -337,6 +531,55 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): os.rmdir(path) +class Test_set_element(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.dictionary = {"a": + {"b":{ + "c":{ + "d":{ + "e":1, + "f":[1] + } + } + }}} + cls.elements = {"a.b.c.d.e":1, "a.b.c.d.f":[1]} + + @classmethod + def tearDownClass(cls): + pass + + def test_simple(self): + simplified_dict = {} + for path, value in self.elements.items(): + simplified_dict = exec_classif.set_element(simplified_dict, path, value) + self.assertEqual(simplified_dict, self.dictionary) + + +class Test_get_path_dict(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.dictionary = {"a": + {"b":{ + "c":{ + "d":{ + "e":1, + "f":[1] + } + } + }}} + + @classmethod + def tearDownClass(cls): + pass + + def test_simple(self): + path_dict = exec_classif.get_path_dict(self.dictionary) + self.assertEqual(path_dict, {"a.b.c.d.e":1, "a.b.c.d.f":[1]}) + + # # class Test_analyzeMulticlass(unittest.TestCase): # diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index 8bfc9fa4a93261f41c2ac9a1f82b748c67bc2760..941cae735b5d742a50ab85dd906e38f093123563 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -5,6 +5,8 @@ import h5py import numpy as np from sklearn.model_selection import StratifiedKFold +from ..utils import rm_tmp + from ...mono_multi_view_classifiers.monoview import exec_classif_mono_view from ...mono_multi_view_classifiers.monoview_classifiers import decision_tree @@ -13,6 +15,7 @@ class Test_initConstants(unittest.TestCase): @classmethod def setUpClass(cls): + rm_tmp() os.mkdir("multiview_platform/tests/temp_tests") cls.datasetFile = h5py.File( "multiview_platform/tests/temp_tests/test.hdf5", "w") @@ -65,6 +68,7 @@ class Test_initTrainTest(unittest.TestCase): @classmethod def setUpClass(cls): + rm_tmp() cls.random_state = np.random.RandomState(42) cls.X = cls.random_state.randint(0, 500, (10, 5)) cls.Y = cls.random_state.randint(0, 2, 10) @@ -73,8 +77,13 @@ class Test_initTrainTest(unittest.TestCase): np.array([1, 3, 5, 7, 9])] def test_simple(cls): +<<<<<<< HEAD X_train, y_train, X_test, y_test, X_test_multiclass = exec_classif_mono_view.initTrainTest( cls.X, cls.Y, cls.classification_indices) +======= + X_train, y_train, X_test, y_test, X_test_multiclass = exec_classif_mono_view.init_train_test( + cls.X, cls.Y, cls.classificationIndices) +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 np.testing.assert_array_equal(X_train, np.array( [np.array([102, 435, 348, 270, 106]), np.array([466, 214, 330, 458, 87]), @@ -95,6 +104,7 @@ class Test_getHPs(unittest.TestCase): @classmethod def setUpClass(cls): + rm_tmp() os.mkdir("multiview_platform/tests/tmp_tests") cls.classifierModule = decision_tree cls.hyper_param_search = "randomized_search" diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_DifficultyMeasure/__init__.py b/multiview_platform/tests/test_multi_view/test_multiview_utils.py similarity index 100% rename from multiview_platform/tests/test_multiview_classifiers/Test_DifficultyMeasure/__init__.py rename to multiview_platform/tests/test_multi_view/test_multiview_utils.py diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_DifficultyMeasure/test_DifficultyMeasureModule.py b/multiview_platform/tests/test_multiview_classifiers/Test_DifficultyMeasure/test_DifficultyMeasureModule.py deleted file mode 100644 index 4dd7cdef6269e5c6daf9dd64a41a8159e320701e..0000000000000000000000000000000000000000 --- a/multiview_platform/tests/test_multiview_classifiers/Test_DifficultyMeasure/test_DifficultyMeasureModule.py +++ /dev/null @@ -1,36 +0,0 @@ -import unittest - -import numpy as np - -from ....mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion import \ - difficulty_fusion - - -class Test_difficulty(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.classifiersDecisions = np.array([ - [np.random.randint(0, 2, (2, 5)), - np.array([[0, 0, 1, 0, 1], [0, 1, 0, 1, 0]]), - np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), - np.random.randint(0, 2, (2, 5))], - [np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), - np.random.randint(0, 2, (2, 5)), - np.array([[0, 0, 1, 1, 0], [0, 1, 0, 1, 0]]), - np.random.randint(0, 2, (2, 5))], - [np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), - np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), - np.array([[0, 1, 1, 1, 1], [0, 1, 0, 1, 0]])], - ]) - cls.combination = [1, 3, 4] - cls.foldsGroudTruth = np.array([[1, 1, 0, 0, 1], [0, 1, 0, 1, 0]]) - cls.foldsLen = "" - - def test_simple(cls): - difficulty_measure = difficulty_fusion.difficulty( - cls.classifiersDecisions, - cls.combination, - cls.foldsGroudTruth, - cls.foldsLen) - cls.assertAlmostEqual(difficulty_measure, 0.29861111111) diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_DoubleFaultFusion/__init__.py b/multiview_platform/tests/test_multiview_classifiers/Test_DoubleFaultFusion/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_EntropyFusion/__init__.py b/multiview_platform/tests/test_multiview_classifiers/Test_EntropyFusion/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_EntropyFusion/test_EntropyFusionModule.py b/multiview_platform/tests/test_multiview_classifiers/Test_EntropyFusion/test_EntropyFusionModule.py deleted file mode 100644 index 605c6ab82d27e83116ba5d584204063e1475d894..0000000000000000000000000000000000000000 --- a/multiview_platform/tests/test_multiview_classifiers/Test_EntropyFusion/test_EntropyFusionModule.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest - -import numpy as np - -from ....mono_multi_view_classifiers.multiview_classifiers.entropy_fusion import \ - entropy_fusion - - -class Test_entropy(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.classifiersDecisions = np.array([ - [np.random.randint(0, 2, (2, 5)), - [[0, 0, 1, 0, 1], [0, 1, 0, 1, 0]], - np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), - np.random.randint(0, 2, (2, 5))], - [np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), - np.random.randint(0, 2, (2, 5)), - [[0, 0, 1, 1, 0], [0, 1, 0, 1, 0]], - np.random.randint(0, 2, (2, 5))], - [np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), - np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), - [[0, 1, 1, 1, 1], [0, 1, 0, 1, 0]]], - ]) - cls.combination = [1, 3, 4] - cls.foldsGroudTruth = np.array([[1, 1, 0, 0, 1], [0, 1, 0, 1, 0]]) - cls.foldsLen = "" - - def test_simple(cls): - entropy_score = entropy_fusion.entropy(cls.classifiersDecisions, - cls.combination, - cls.foldsGroudTruth, - cls.foldsLen) - cls.assertEqual(entropy_score, 0.15, 'Wrong values for entropy measure') diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_PseudoCQMeasure/test_PseudoCQFusionModule.py b/multiview_platform/tests/test_multiview_classifiers/Test_PseudoCQMeasure/test_PseudoCQFusionModule.py index be4f271c5b9bdee74da924b037f1a58b827405fb..65e22eb8f7dff86aec92af8d1c7adc9e21838d49 100644 --- a/multiview_platform/tests/test_multiview_classifiers/Test_PseudoCQMeasure/test_PseudoCQFusionModule.py +++ b/multiview_platform/tests/test_multiview_classifiers/Test_PseudoCQMeasure/test_PseudoCQFusionModule.py @@ -2,7 +2,7 @@ # # import numpy as np # -# from ....mono_multi_view_classifiers.multiview_classifiers.entropy_fusion import EntropyFusionModule +# from ....mono_multi_view_classifiers.multiview_classifiers.entropy_fusion_old import EntropyFusionModule # # class Test_entropy(unittest.TestCase): # diff --git a/multiview_platform/tests/test_multiview_classifiers/Test_DisagreeFusion/__init__.py b/multiview_platform/tests/test_multiview_classifiers/test_additions/__init__.py similarity index 100% rename from multiview_platform/tests/test_multiview_classifiers/Test_DisagreeFusion/__init__.py rename to multiview_platform/tests/test_multiview_classifiers/test_additions/__init__.py diff --git a/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py b/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6c22d615d12d8640b5f96c35dfb67999381fb138 --- /dev/null +++ b/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py @@ -0,0 +1,69 @@ +import unittest +import numpy as np + +from ....mono_multi_view_classifiers.multiview_classifiers.additions import diversity_utils + + +class FakeDataset(): + + def __init__(self, views, labels): + self.nb_views = views.shape[0] + self.dataset_length = views.shape[2] + self.views = views + self.labels = labels + + def get_v(self, view_index, example_indices): + return self.views[view_index, example_indices] + + def get_nb_class(self, example_indices): + return np.unique(self.labels[example_indices]) + +class FakeDivCoupleClf(diversity_utils.CoupleDiversityFusion): + + def __init__(self, rs, classifier_names=None, + classifiers_config=None, monoview_estimators=None): + super(FakeDivCoupleClf, self).__init__(random_state=rs, + classifier_names=classifier_names, + classifiers_configs=classifiers_config, + monoview_estimators=monoview_estimators) + self.rs = rs + + def diversity_score(self, a, b, c): + return self.rs.randint(0,100) + + +class FakeDivGlobalClf(diversity_utils.GlobalDiversityFusion): + + def __init__(self, rs, classifier_names=None, + classifiers_config=None, monoview_estimators=None): + super(FakeDivGlobalClf, self).__init__(random_state=rs, + classifier_names=classifier_names, + classifiers_configs=classifiers_config, + monoview_estimators=monoview_estimators) + self.rs = rs + + def diversity_score(self, a, b, c): + return self.rs.randint(0,100) + +class Test_DiversityFusion(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.classifier_names = ["cb_boost", "decision_tree"] + cls.classifiers_config = {"cb_boost":{"n_stumps":1, "n_iterations":5}} + cls.random_state = np.random.RandomState(42) + cls.y = cls.random_state.randint(0,2,6) + cls.X = FakeDataset(cls.random_state.randint(0,100,(2,5,6)), cls.y) + cls.train_indices = [0,1,2,4] + cls.views_indices = [0,1] + + def test_simple_couple(self): + clf = FakeDivCoupleClf(self.random_state, classifier_names=self.classifier_names, + classifiers_config=self.classifiers_config) + clf.fit(self.X, self.y, self.train_indices, self.views_indices) + + def test_simple_global(self): + clf = FakeDivGlobalClf(self.random_state, + classifier_names=self.classifier_names, + classifiers_config=self.classifiers_config) + clf.fit(self.X, self.y, self.train_indices, self.views_indices) \ No newline at end of file diff --git a/multiview_platform/tests/test_multiview_classifiers/test_difficulty_fusion.py b/multiview_platform/tests/test_multiview_classifiers/test_difficulty_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..4efc60546dcf049f8f9ce6613a713262f1ee42e5 --- /dev/null +++ b/multiview_platform/tests/test_multiview_classifiers/test_difficulty_fusion.py @@ -0,0 +1,23 @@ +import unittest + +import numpy as np + +from ...mono_multi_view_classifiers.multiview_classifiers import difficulty_fusion + + +class Test_difficulty_fusion(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state=np.random.RandomState(42) + cls.classifiers_decisions = cls.random_state.randint(0, 2, size=(5, 3, 5)) + cls.combination = [1, 3, 4] + cls.y = np.array([1, 1, 0, 0, 1]) + cls.difficulty_fusion_clf = difficulty_fusion.DifficultyFusion() + + def test_simple(cls): + difficulty_measure = cls.difficulty_fusion_clf.diversity_measure( + cls.classifiers_decisions, + cls.combination, + cls.y) + cls.assertAlmostEqual(difficulty_measure, 0.22453703703703706) diff --git a/multiview_platform/tests/test_multiview_classifiers/test_disagree_fusion.py b/multiview_platform/tests/test_multiview_classifiers/test_disagree_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..3ae051fa653f7e2ad132087682cc3fef4996f5f6 --- /dev/null +++ b/multiview_platform/tests/test_multiview_classifiers/test_disagree_fusion.py @@ -0,0 +1,23 @@ +# # import unittest +# +import numpy as np +import unittest +# +from ...mono_multi_view_classifiers.multiview_classifiers import disagree_fusion + + +class Test_disagree(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.monoview_decision_1 = np.array([0, 0, 1, 1]) + cls.monoview_decision_2 = np.array([0, 1, 0, 1]) + cls.ground_truth = None + cls.clf = disagree_fusion.DisagreeFusion() + + def test_simple(cls): + disagreement = cls.clf.diversity_measure(cls.monoview_decision_1, + cls.monoview_decision_2, + cls.ground_truth) + np.testing.assert_array_equal(disagreement, + np.array([False, True, True, False])) diff --git a/multiview_platform/tests/test_multiview_classifiers/test_diversity_utils.py b/multiview_platform/tests/test_multiview_classifiers/test_diversity_utils.py index ac51698fb5ef6bc560fe5c01a710f4f3c587d8d7..5a2d5ea0bd006d96579f1ba976f2e4917e74e996 100644 --- a/multiview_platform/tests/test_multiview_classifiers/test_diversity_utils.py +++ b/multiview_platform/tests/test_multiview_classifiers/test_diversity_utils.py @@ -1,5 +1,5 @@ -import unittest +<<<<<<< HEAD import numpy as np from multiview_platform.mono_multi_view_classifiers.multiview.additions import \ @@ -40,3 +40,5 @@ class Test_global_div_measure(unittest.TestCase): cls.folds_ground_truth) cls.assertEqual(len(clf_names), 2) cls.assertEqual(diversity_measure, 42) +======= +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 diff --git a/multiview_platform/tests/test_multiview_classifiers/test_double_fault_fusion.py b/multiview_platform/tests/test_multiview_classifiers/test_double_fault_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..3a0b7f6f29edeb3ee196e7f9cb60309fec96f2b4 --- /dev/null +++ b/multiview_platform/tests/test_multiview_classifiers/test_double_fault_fusion.py @@ -0,0 +1,22 @@ + +import numpy as np +import unittest + +from ...mono_multi_view_classifiers.multiview_classifiers import double_fault_fusion + + +class Test_disagree(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.monoview_decision_1 = np.array([0, 0, 0, 0, 1, 1, 1, 1]) + cls.monoview_decision_2 = np.array([0, 0, 1, 1, 0, 0, 1, 1]) + cls.ground_truth = np.array([0, 1, 0, 1, 0, 1, 0, 1]) + cls.clf = double_fault_fusion.DoubleFaultFusion() + + def test_simple(cls): + double_fault = cls.clf.diversity_measure(cls.monoview_decision_1, + cls.monoview_decision_2, + cls.ground_truth) + np.testing.assert_array_equal(double_fault, + np.array([False, True, False, False, False, False, True, False])) diff --git a/multiview_platform/tests/test_multiview_classifiers/test_entropy_fusion.py b/multiview_platform/tests/test_multiview_classifiers/test_entropy_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..765a0b793c0deab1687d540d565f7f17002b9f17 --- /dev/null +++ b/multiview_platform/tests/test_multiview_classifiers/test_entropy_fusion.py @@ -0,0 +1,23 @@ +import unittest + +import numpy as np + +from ...mono_multi_view_classifiers.multiview_classifiers import entropy_fusion + + +class Test_difficulty_fusion(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state=np.random.RandomState(42) + cls.classifiers_decisions = cls.random_state.randint(0, 2, size=(5, 3, 5)) + cls.combination = [1, 3, 4] + cls.y = np.array([1, 1, 0, 0, 1]) + cls.clf = entropy_fusion.EntropyFusion() + + def test_simple(cls): + entropy = cls.clf.diversity_measure( + cls.classifiers_decisions, + cls.combination, + cls.y) + cls.assertAlmostEqual(entropy, 0.2) diff --git a/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py b/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py index d78acf8020081205e42e36d79cc936d8511be72c..3fb36b314d67184ed43c801ab8e8e355d9ff24d3 100644 --- a/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py +++ b/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py @@ -4,6 +4,8 @@ import numpy as np import h5py import os +from ..utils import rm_tmp + from multiview_platform.mono_multi_view_classifiers.multiview_classifiers import \ weighted_linear_early_fusion @@ -11,6 +13,7 @@ class Test_WeightedLinearEarlyFusion(unittest.TestCase): @classmethod def setUpClass(cls): + rm_tmp() cls.random_state = np.random.RandomState(42) cls.view_weights = [0.5, 0.5] os.mkdir("multiview_platform/tests/tmp_tests") @@ -32,7 +35,7 @@ class Test_WeightedLinearEarlyFusion(unittest.TestCase): cls.monoview_classifier_config = {"max_depth":1, "criterion": "gini", "splitter": "best"} cls.classifier = weighted_linear_early_fusion.WeightedLinearEarlyFusion( random_state=cls.random_state, view_weights=cls.view_weights, - monoview_classifier=cls.monoview_classifier_name, + monoview_classifier_name=cls.monoview_classifier_name, monoview_classifier_config=cls.monoview_classifier_config) @classmethod diff --git a/multiview_platform/tests/test_utils/test_GetMultiviewDB.py b/multiview_platform/tests/test_utils/test_GetMultiviewDB.py index 65be8762d2b063a0a2cf0c42940bd01eda84ddae..0cc3af536993c202f60b7fad6c2cc92309618781 100644 --- a/multiview_platform/tests/test_utils/test_GetMultiviewDB.py +++ b/multiview_platform/tests/test_utils/test_GetMultiviewDB.py @@ -22,7 +22,7 @@ class Test_copyhdf5Dataset(unittest.TestCase): cls.dataset.attrs["test_arg"] = "Am I copied" def test_simple_copy(cls): - get_multiview_db.copyhdf5Dataset(cls.dataset_file, cls.dataset_file, + get_multiview_db.copyhdf5_dataset(cls.dataset_file, cls.dataset_file, "test", "test_copy_1", np.arange(10)) np.testing.assert_array_equal(cls.dataset_file.get("test").value, cls.dataset_file.get("test_copy_1").value) @@ -31,7 +31,7 @@ class Test_copyhdf5Dataset(unittest.TestCase): def test_copy_only_some_indices(cls): usedIndices = cls.random_state.choice(10, 6, replace=False) - get_multiview_db.copyhdf5Dataset(cls.dataset_file, cls.dataset_file, + get_multiview_db.copyhdf5_dataset(cls.dataset_file, cls.dataset_file, "test", "test_copy", usedIndices) np.testing.assert_array_equal( cls.dataset_file.get("test").value[usedIndices, :], @@ -68,7 +68,7 @@ class Test_filterViews(unittest.TestCase): cls.temp_dataset_file = h5py.File( "multiview_platform/tests/temp_tests/test_copy_temp.hdf5", "w") cls.dataset_file.copy("Metadata", cls.temp_dataset_file) - get_multiview_db.filterViews(cls.dataset_file, cls.temp_dataset_file, + get_multiview_db.filter_views(cls.dataset_file, cls.temp_dataset_file, cls.views, np.arange(10)) cls.assertEqual(cls.dataset_file.get("View1").attrs["name"], cls.temp_dataset_file.get("View0").attrs["name"]) @@ -82,7 +82,7 @@ class Test_filterViews(unittest.TestCase): "multiview_platform/tests/temp_tests/test_copy_temp.hdf5", "w") cls.dataset_file.copy("Metadata", cls.temp_dataset_file) usedIndices = cls.random_state.choice(10, 6, replace=False) - get_multiview_db.filterViews(cls.dataset_file, cls.temp_dataset_file, + get_multiview_db.filter_views(cls.dataset_file, cls.temp_dataset_file, cls.views, usedIndices) np.testing.assert_array_equal( cls.dataset_file.get("View1").value[usedIndices, :], @@ -112,7 +112,7 @@ class Test_filterLabels(unittest.TestCase): def test_simple(cls): newLabels, \ newLabelsNames, \ - usedIndices = get_multiview_db.filterLabels(cls.labelsSet, + usedIndices = get_multiview_db.filter_labels(cls.labelsSet, cls.askedLabelsNamesSet, cls.fullLabels, cls.availableLabelsNames, @@ -127,7 +127,7 @@ class Test_filterLabels(unittest.TestCase): cls.availableLabelsNames = ["test_label_0", "test_label_1"] newLabels, \ newLabelsNames, \ - usedIndices = get_multiview_db.filterLabels(cls.labelsSet, + usedIndices = get_multiview_db.filter_labels(cls.labelsSet, cls.askedLabelsNamesSet, cls.fullLabels, cls.availableLabelsNames, @@ -141,7 +141,7 @@ class Test_filterLabels(unittest.TestCase): "test_label_2", "test_label_3", "chicken_is_heaven"} with cls.assertRaises(get_multiview_db.DatasetError) as catcher: - get_multiview_db.filterLabels(cls.labelsSet, + get_multiview_db.filter_labels(cls.labelsSet, cls.askedLabelsNamesSet, cls.fullLabels, cls.availableLabelsNames, @@ -155,7 +155,7 @@ class Test_filterLabels(unittest.TestCase): "test_label_3"] newLabels, \ newLabelsNames, \ - usedIndices = get_multiview_db.filterLabels(cls.labelsSet, + usedIndices = get_multiview_db.filter_labels(cls.labelsSet, cls.askedLabelsNamesSet, cls.fullLabels, cls.availableLabelsNames, @@ -179,7 +179,7 @@ class Test_selectAskedLabels(unittest.TestCase): def test_simple(cls): newLabels, \ newLabelsNames, \ - usedIndices = get_multiview_db.selectAskedLabels(cls.askedLabelsNamesSet, + usedIndices = get_multiview_db.select_asked_labels(cls.askedLabelsNamesSet, cls.availableLabelsNames, cls.askedLabelsNames, cls.fullLabels) @@ -194,7 +194,7 @@ class Test_selectAskedLabels(unittest.TestCase): "test_label_3"] newLabels, \ newLabelsNames, \ - usedIndices = get_multiview_db.selectAskedLabels(cls.askedLabelsNamesSet, + usedIndices = get_multiview_db.select_asked_labels(cls.askedLabelsNamesSet, cls.availableLabelsNames, cls.askedLabelsNames, cls.fullLabels) @@ -206,7 +206,7 @@ class Test_selectAskedLabels(unittest.TestCase): cls.askedLabelsNamesSet = {"test_label_1", "test_label_3", "chicken_is_heaven"} with cls.assertRaises(get_multiview_db.DatasetError) as catcher: - get_multiview_db.selectAskedLabels(cls.askedLabelsNamesSet, + get_multiview_db.select_asked_labels(cls.askedLabelsNamesSet, cls.availableLabelsNames, cls.askedLabelsNames, cls.fullLabels) @@ -224,7 +224,7 @@ class Test_getAllLabels(unittest.TestCase): "test_label_2", "test_label_3"] def test_simple(cls): - newLabels, newLabelsNames, usedIndices = get_multiview_db.getAllLabels( + newLabels, newLabelsNames, usedIndices = get_multiview_db.get_all_labels( cls.fullLabels, cls.availableLabelsNames) cls.assertEqual(cls.availableLabelsNames, newLabelsNames) np.testing.assert_array_equal(usedIndices, np.arange(10)) @@ -241,7 +241,7 @@ class Test_fillLabelNames(unittest.TestCase): cls.availableLabelsNames = ["test_label_" + str(_) for _ in range(40)] def test_simple(cls): - askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fillLabelNames( + askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fill_label_names( cls.NB_CLASS, cls.askedLabelsNames, cls.random_state, @@ -251,7 +251,7 @@ class Test_fillLabelNames(unittest.TestCase): def test_missing_labels_names(cls): cls.NB_CLASS = 39 - askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fillLabelNames( + askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fill_label_names( cls.NB_CLASS, cls.askedLabelsNames, cls.random_state, @@ -279,7 +279,7 @@ class Test_fillLabelNames(unittest.TestCase): cls.NB_CLASS = 2 cls.askedLabelsNames = ["test_label_1", "test_label_3", "test_label_4", "test_label_6"] - askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fillLabelNames( + askedLabelsNames, askedLabelsNamesSet = get_multiview_db.fill_label_names( cls.NB_CLASS, cls.askedLabelsNames, cls.random_state, @@ -298,14 +298,14 @@ class Test_allAskedLabelsAreAvailable(unittest.TestCase): def test_asked_available_labels(cls): cls.assertTrue( - get_multiview_db.allAskedLabelsAreAvailable(cls.askedLabelsNamesSet, + get_multiview_db.all_asked_labels_are_available(cls.askedLabelsNamesSet, cls.availableLabelsNames)) def test_asked_unavailable_label(cls): cls.askedLabelsNamesSet = {"test_label_1", "test_label_3", "chicken_is_heaven"} cls.assertFalse( - get_multiview_db.allAskedLabelsAreAvailable(cls.askedLabelsNamesSet, + get_multiview_db.all_asked_labels_are_available(cls.askedLabelsNamesSet, cls.availableLabelsNames)) @@ -316,18 +316,18 @@ class Test_getClasses(unittest.TestCase): cls.random_state = np.random.RandomState(42) def test_multiclass(cls): - labelsSet = get_multiview_db.getClasses( + labelsSet = get_multiview_db.get_classes( cls.random_state.randint(0, 5, 30)) cls.assertEqual(labelsSet, {0, 1, 2, 3, 4}) def test_biclass(cls): - labelsSet = get_multiview_db.getClasses( + labelsSet = get_multiview_db.get_classes( cls.random_state.randint(0, 2, 30)) cls.assertEqual(labelsSet, {0, 1}) def test_one_class(cls): with cls.assertRaises(get_multiview_db.DatasetError) as catcher: - get_multiview_db.getClasses(np.zeros(30, dtype=int)) + get_multiview_db.get_classes(np.zeros(30, dtype=int)) exception = catcher.exception # cls.assertTrue("Dataset must have at least two different labels" in exception) @@ -363,7 +363,7 @@ class Test_getClassicDBhdf5(unittest.TestCase): cls.dataset.attrs["name"] = "test_view_" + str(i) def test_simple(cls): - dataset_file, labels_dictionary, dataset_name = get_multiview_db.getClassicDBhdf5( + dataset_file, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_hdf5( cls.views, cls.pathF, cls.nameDB, cls.NB_CLASS, cls.askedLabelsNames, cls.random_state) @@ -381,7 +381,7 @@ class Test_getClassicDBhdf5(unittest.TestCase): askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] NB_CLASS = 4 - dataset_file, labels_dictionary, dataset_name = get_multiview_db.getClassicDBhdf5( + dataset_file, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_hdf5( cls.views, cls.pathF, cls.nameDB, NB_CLASS, askedLabelsNames, cls.random_state) @@ -398,7 +398,7 @@ class Test_getClassicDBhdf5(unittest.TestCase): def test_all_views_asked(cls): views = ["test_view_0", "test_view_1", "test_view_2", "test_view_3"] - dataset_file, labels_dictionary, dataset_name = get_multiview_db.getClassicDBhdf5(views, + dataset_file, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_hdf5(views, cls.pathF, cls.nameDB, cls.NB_CLASS, @@ -423,7 +423,7 @@ class Test_getClassicDBhdf5(unittest.TestCase): "test_label_3"] NB_CLASS = 4 views = ["test_view_0", "test_view_1", "test_view_2", "test_view_3"] - dataset_file, labels_dictionary, dataset_name = get_multiview_db.getClassicDBhdf5(views, + dataset_file, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_hdf5(views, cls.pathF, cls.nameDB, NB_CLASS, @@ -481,7 +481,7 @@ class Test_getClassicDBcsv(unittest.TestCase): cls.datas.append(data) def test_simple(cls): - dataset_file, labels_dictionary, dataset_name = get_multiview_db.getClassicDBcsv( + dataset_file, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_csv( cls.views, cls.pathF, cls.nameDB, cls.NB_CLASS, cls.askedLabelsNames, cls.random_state, delimiter=",") @@ -497,7 +497,7 @@ class Test_getClassicDBcsv(unittest.TestCase): def test_all_views_asked(cls): views = ["test_view_0", "test_view_1", "test_view_2", "test_view_3"] - dataset_file, labels_dictionary, dataset_name = get_multiview_db.getClassicDBcsv(views, + dataset_file, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_csv(views, cls.pathF, cls.nameDB, cls.NB_CLASS, @@ -522,7 +522,7 @@ class Test_getClassicDBcsv(unittest.TestCase): askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] NB_CLASS = 4 - dataset_file, labels_dictionary, dataset_name = get_multiview_db.getClassicDBcsv( + dataset_file, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_csv( cls.views, cls.pathF, cls.nameDB, NB_CLASS, askedLabelsNames, cls.random_state, delimiter=",") @@ -541,7 +541,7 @@ class Test_getClassicDBcsv(unittest.TestCase): "test_label_3"] NB_CLASS = 4 views = ["test_view_0", "test_view_1", "test_view_2", "test_view_3"] - dataset_file, labels_dictionary, dataset_name = get_multiview_db.getClassicDBcsv(views, + dataset_file, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_csv(views, cls.pathF, cls.nameDB, NB_CLASS, diff --git a/multiview_platform/tests/test_utils/test_configuration.py b/multiview_platform/tests/test_utils/test_configuration.py index c1e8c3b47125380c120e8516c6880f115b6f6bc4..289af382ed4695e55d0c1528459cc905d1af7909 100644 --- a/multiview_platform/tests/test_utils/test_configuration.py +++ b/multiview_platform/tests/test_utils/test_configuration.py @@ -3,11 +3,14 @@ import unittest import yaml import numpy as np +from ..utils import rm_tmp + from multiview_platform.mono_multi_view_classifiers.utils import configuration class Test_get_the_args(unittest.TestCase): def setUp(self): + rm_tmp() self.path_to_config_file = "multiview_platform/tests/tmp_tests/config_temp.yml" os.mkdir("multiview_platform/tests/tmp_tests") data = {"Base":{"first_arg": 10, "second_arg":[12.5, 1e-06]}, "Classification":{"third_arg":True}} @@ -35,37 +38,37 @@ class Test_get_the_args(unittest.TestCase): self.assertEqual(config_dict["Base"]["second_arg"], [12.5, 1e-06]) self.assertEqual(config_dict["Classification"]["third_arg"], True) -class Test_format_the_args(unittest.TestCase): - - def test_bool(self): - value = configuration.format_raw_arg("bool ; yes") - self.assertEqual(value, True) - - def test_int(self): - value = configuration.format_raw_arg("int ; 1") - self.assertEqual(value, 1) - - def test_float(self): - value = configuration.format_raw_arg("float ; 1.5") - self.assertEqual(value, 1.5) - - def test_string(self): - value = configuration.format_raw_arg("str ; chicken_is_heaven") - self.assertEqual(value, "chicken_is_heaven") - - def test_list_bool(self): - value = configuration.format_raw_arg("list_bool ; yes no yes yes") - self.assertEqual(value, [True, False, True, True]) - - def test_list_int(self): - value = configuration.format_raw_arg("list_int ; 1 2 3 4") - self.assertEqual(value, [1,2,3,4]) - - def test_list_float(self): - value = configuration.format_raw_arg("list_float ; 1.5 1.6 1.7") - self.assertEqual(value, [1.5, 1.6, 1.7]) - - def test_list_string(self): - value = configuration.format_raw_arg("list_str ; list string") - self.assertEqual(value, ["list", "string"]) +# class Test_format_the_args(unittest.TestCase): +# +# def test_bool(self): +# value = configuration.format_raw_arg("bool ; yes") +# self.assertEqual(value, True) +# +# def test_int(self): +# value = configuration.format_raw_arg("int ; 1") +# self.assertEqual(value, 1) +# +# def test_float(self): +# value = configuration.format_raw_arg("float ; 1.5") +# self.assertEqual(value, 1.5) +# +# def test_string(self): +# value = configuration.format_raw_arg("str ; chicken_is_heaven") +# self.assertEqual(value, "chicken_is_heaven") +# +# def test_list_bool(self): +# value = configuration.format_raw_arg("list_bool ; yes no yes yes") +# self.assertEqual(value, [True, False, True, True]) +# +# def test_list_int(self): +# value = configuration.format_raw_arg("list_int ; 1 2 3 4") +# self.assertEqual(value, [1,2,3,4]) +# +# def test_list_float(self): +# value = configuration.format_raw_arg("list_float ; 1.5 1.6 1.7") +# self.assertEqual(value, [1.5, 1.6, 1.7]) +# +# def test_list_string(self): +# value = configuration.format_raw_arg("list_str ; list string") +# self.assertEqual(value, ["list", "string"]) diff --git a/multiview_platform/tests/test_utils/test_execution.py b/multiview_platform/tests/test_utils/test_execution.py index ac84d5f7b39f1b83926ccc2117874f8ee8df3c22..2c937a23d7d88b6903a5d84c1539692f15f6c8ac 100644 --- a/multiview_platform/tests/test_utils/test_execution.py +++ b/multiview_platform/tests/test_utils/test_execution.py @@ -3,6 +3,8 @@ import unittest import numpy as np +from ..utils import rm_tmp + from ...mono_multi_view_classifiers.utils import execution @@ -12,7 +14,7 @@ class Test_parseTheArgs(unittest.TestCase): self.args = [] def test_empty_args(self): - args = execution.parseTheArgs([]) + args = execution.parse_the_args([]) # print args @@ -24,16 +26,27 @@ class Test_initStatsIterRandomStates(unittest.TestCase): cls.statsIter = 1 def test_one_statiter(cls): +<<<<<<< HEAD cls.state = cls.random_state.get_state()[1] statsIterRandomStates = execution.initStatsIterRandomStates( cls.statsIter, cls.random_state) +======= + cls.state = cls.randomState.get_state()[1] + statsIterRandomStates = execution.init_stats_iter_random_states( + cls.statsIter, cls.randomState) +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 np.testing.assert_array_equal(statsIterRandomStates[0].get_state()[1], cls.state) def test_multiple_iter(cls): cls.statsIter = 3 +<<<<<<< HEAD statsIterRandomStates = execution.initStatsIterRandomStates( cls.statsIter, cls.random_state) +======= + statsIterRandomStates = execution.init_stats_iter_random_states( + cls.statsIter, cls.randomState) +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 cls.assertAlmostEqual(len(statsIterRandomStates), 3) cls.assertNotEqual(statsIterRandomStates[0].randint(5000), statsIterRandomStates[1].randint(5000)) @@ -51,37 +64,39 @@ class Test_getDatabaseFunction(unittest.TestCase): cls.type = ".csv" def test_simple(cls): - getDB = execution.getDatabaseFunction(cls.name, cls.type) + getDB = execution.get_database_function(cls.name, cls.type) from ...mono_multi_view_classifiers.utils.get_multiview_db import \ - getClassicDBcsv - cls.assertEqual(getDB, getClassicDBcsv) + get_classic_db_csv + cls.assertEqual(getDB, get_classic_db_csv) def test_hdf5(cls): cls.type = ".hdf5" - getDB = execution.getDatabaseFunction(cls.name, cls.type) + getDB = execution.get_database_function(cls.name, cls.type) from ...mono_multi_view_classifiers.utils.get_multiview_db import \ - getClassicDBhdf5 - cls.assertEqual(getDB, getClassicDBhdf5) + get_classic_db_hdf5 + cls.assertEqual(getDB, get_classic_db_hdf5) def test_plausible_hdf5(cls): - cls.name = "Plausible" + cls.name = "plausible" cls.type = ".hdf5" - getDB = execution.getDatabaseFunction(cls.name, cls.type) + getDB = execution.get_database_function(cls.name, cls.type) from ...mono_multi_view_classifiers.utils.get_multiview_db import \ - getPlausibleDBhdf5 - cls.assertEqual(getDB, getPlausibleDBhdf5) + get_plausible_db_hdf5 + cls.assertEqual(getDB, get_plausible_db_hdf5) class Test_initRandomState(unittest.TestCase): def setUp(self): - os.mkdir("multiview_platform/tests/temp_tests/") + rm_tmp() + os.mkdir("multiview_platform/tests/tmp_tests/") def tearDown(self): - os.rmdir("multiview_platform/tests/temp_tests/") + os.rmdir("multiview_platform/tests/tmp_tests/") def test_random_state_42(self): randomState_42 = np.random.RandomState(42) +<<<<<<< HEAD random_state = execution.initRandomState("42", "multiview_platform/tests/temp_tests/") os.remove("multiview_platform/tests/temp_tests/random_state.pickle") @@ -95,6 +110,21 @@ class Test_initRandomState(unittest.TestCase): "multiview_platform/tests/temp_tests/random_state.pickle", "multiview_platform/tests/temp_tests/") os.remove("multiview_platform/tests/temp_tests/random_state.pickle") +======= + randomState = execution.init_random_state("42", + "multiview_platform/tests/tmp_tests/") + os.remove("multiview_platform/tests/tmp_tests/randomState.pickle") + np.testing.assert_array_equal(randomState.beta(1, 100, 100), + randomState_42.beta(1, 100, 100)) + + def test_random_state_pickle(self): + randomState_to_pickle = execution.init_random_state(None, + "multiview_platform/tests/tmp_tests/") + pickled_randomState = execution.init_random_state( + "multiview_platform/tests/tmp_tests/randomState.pickle", + "multiview_platform/tests/tmp_tests/") + os.remove("multiview_platform/tests/tmp_tests/randomState.pickle") +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 np.testing.assert_array_equal(randomState_to_pickle.beta(1, 100, 100), pickled_randomState.beta(1, 100, 100)) @@ -144,7 +174,7 @@ class Test_genSplits(unittest.TestCase): self.splitRatio = 0.2 def test_simple(self): - splits = execution.genSplits(self.labels, self.splitRatio, + splits = execution.gen_splits(self.labels, self.splitRatio, self.statsIterRandomStates) self.assertEqual(len(splits), 3) self.assertEqual(len(splits[1]), 2) @@ -159,7 +189,7 @@ class Test_genSplits(unittest.TestCase): self.assertGreater(len(np.where(self.labels[splits[1][1]] == 2)[0]), 0) def test_genSplits_no_iter(self): - splits = execution.genSplits(self.labels, self.splitRatio, + splits = execution.gen_splits(self.labels, self.splitRatio, self.statsIterRandomStates) self.assertEqual(len(splits), 3) self.assertEqual(len(splits[0]), 2) @@ -194,7 +224,7 @@ class Test_genDirecortiesNames(unittest.TestCase): cls.stats_iter = 5 def test_simple_ovo(cls): - directories = execution.genDirecortiesNames(cls.directory, + directories = execution.gen_direcorties_names(cls.directory, cls.stats_iter) cls.assertEqual(len(directories), 5) cls.assertEqual(directories[0], "../chicken_is_heaven/iter_1/") @@ -202,7 +232,7 @@ class Test_genDirecortiesNames(unittest.TestCase): def test_ovo_no_iter(cls): cls.stats_iter = 1 - directories = execution.genDirecortiesNames(cls.directory, + directories = execution.gen_direcorties_names(cls.directory, cls.stats_iter) cls.assertEqual(len(directories), 1) cls.assertEqual(directories[0], "../chicken_is_heaven/") diff --git a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py index 0024a1427a85b07adbbd4f4ebee038fcf75cc28d..6b207372e6bf3fee23b5c1cf005b427d77ab0044 100644 --- a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py +++ b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py @@ -5,6 +5,8 @@ import h5py import numpy as np from sklearn.model_selection import StratifiedKFold +from ..utils import rm_tmp + from ...mono_multi_view_classifiers.utils import hyper_parameter_search from ...mono_multi_view_classifiers.multiview_classifiers import weighted_linear_early_fusion @@ -12,6 +14,7 @@ class Test_randomized_search(unittest.TestCase): @classmethod def setUpClass(cls): + rm_tmp() cls.random_state = np.random.RandomState(42) cls.view_weights = [0.5, 0.5] os.mkdir("multiview_platform/tests/tmp_tests") diff --git a/multiview_platform/tests/test_utils/test_multiclass.py b/multiview_platform/tests/test_utils/test_multiclass.py index 4dd535b9d5770f56c25b868883febd450c8b36a5..3a9578867ec438e1748a59a8a70cd93224af7b40 100644 --- a/multiview_platform/tests/test_utils/test_multiclass.py +++ b/multiview_platform/tests/test_utils/test_multiclass.py @@ -21,8 +21,13 @@ class Test_genMulticlassLabels(unittest.TestCase): cls.testIndices[1]]] def test_one_versus_one(cls): +<<<<<<< HEAD multiclassLabels, labelsIndices, oldIndicesMulticlass = multiclass.genMulticlassLabels( cls.labels, "oneVersusOne", cls.classification_indices) +======= + multiclassLabels, labelsIndices, oldIndicesMulticlass = multiclass.gen_multiclass_labels( + cls.labels, "oneVersusOne", cls.classificationIndices) +>>>>>>> 7b3e918b4fb2938657cae3093d95b1bd6fc461d4 cls.assertEqual(len(multiclassLabels), 10) cls.assertEqual(labelsIndices, [(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), diff --git a/multiview_platform/tests/utils.py b/multiview_platform/tests/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5766b68885035b216a141caa8e273c0df74583ae --- /dev/null +++ b/multiview_platform/tests/utils.py @@ -0,0 +1,9 @@ +import os + +def rm_tmp(): + try: + for file_name in os.listdir("multiview_platform/tests/tmp_tests"): + os.remove(os.path.join("multiview_platform/tests/tmp_tests", file_name)) + os.rmdir("multiview_platform/tests/tmp_tests") + except: + pass diff --git a/requirements.txt b/requirements.txt index e165233e7213a44182001cf71dd874be8b6479f7..88d2db8eb7463e5f2218ec0818bcf742ae5e056c 100755 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ pandas==0.23.3 m2r==0.2.1 docutils==0.12 pyyaml==3.12 +tkinter==8.6