diff --git a/config_files/config_test.yml b/config_files/config_test.yml index 7ed807b690443cfbf8cc59ae5e744784d2df5042..6f5c1407ee3977856c4c0ff97022877651d00cdc 100644 --- a/config_files/config_test.yml +++ b/config_files/config_test.yml @@ -14,212 +14,213 @@ Base : add_noise: False noise_std: 0.0 res_dir: "../results/" + track_tracebacks: False # All the classification-realted configuration options Classification: multiclass_method: "oneVersusOne" - split: 0.8 + split: 0.49 nb_folds: 2 - nb_class: 2 + nb_class: 3 classes: - type: ["monoview",] - algos_monoview: ["gradient_boosting", ] - algos_multiview: ["weighted_linear_early_fusion",] + type: ["multiview",] + algos_monoview: ["all" ] + algos_multiview: ["svm_jumbo_fusion",] stats_iter: 2 metrics: ["accuracy_score", "f1_score"] metric_princ: "f1_score" - hps_type: "randomized_search-equiv" + hps_type: "randomized_search" hps_iter: 1 -##################################### -# The Monoview Classifier arguments # -##################################### - -random_forest: - n_estimators: [25] - max_depth: [3] - criterion: ["entropy"] - -svm_linear: - C: [1] - -svm_rbf: - C: [1] - -svm_poly: - C: [1] - degree: [2] - -adaboost: - n_estimators: [50] - base_estimator: ["DecisionTreeClassifier"] - -adaboost_pregen: - n_estimators: [50] - base_estimator: ["DecisionTreeClassifier"] - n_stumps: [1] - -adaboost_graalpy: - n_iterations: [50] - n_stumps: [1] - -decision_tree: - max_depth: [2] - criterion: ["gini"] - splitter: ["best"] - -decision_tree_pregen: - max_depth: [10] - criterion: ["gini"] - splitter: ["best"] - n_stumps: [1] - -sgd: - loss: ["hinge"] - penalty: [l2] - alpha: [0.0001] - -knn: - n_neighbors: [5] - weights: ["uniform"] - algorithm: ["auto"] - -scm: - model_type: ["conjunction"] - max_rules: [10] - p: [0.1] - -scm_pregen: - model_type: ["conjunction"] - max_rules: [10] - p: [0.1] - n_stumps: [1] - -cq_boost: - mu: [0.01] - epsilon: [1e-06] - n_max_iterations: [5] - n_stumps: [1] - -cg_desc: - n_max_iterations: [10] - n_stumps: [1] - -cb_boost: - n_max_iterations: [10] - n_stumps: [1] - -lasso: - alpha: [1] - max_iter: [2] - -gradient_boosting: - n_estimators: [2] - - ###################################### -# The Multiview Classifier arguments # +## The Monoview Classifier arguments # ###################################### - -weighted_linear_early_fusion: - view_weights: [null] - monoview_classifier_name: ["decision_tree"] - monoview_classifier_config: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -entropy_fusion: - classifiers_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -disagree_fusion: - classifiers_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - - -double_fault_fusion: - classifiers_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -difficulty_fusion: - classifiers_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -scm_late_fusion: - classifiers_names: [["decision_tree"]] - p: 0.1 - max_rules: 10 - model_type: 'conjunction' - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -majority_voting_fusion: - classifiers_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -bayesian_inference_fusion: - classifiers_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -weighted_linear_late_fusion: - classifiers_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -mumbo: - base_estimator: [null] - n_estimators: [10] - best_view_mode: ["edge"] - -lp_norm_mkl: - lmbda: [0.1] - n_loops: [50] - precision: [0.0001] - kernel: ["rbf"] - kernel_params: - gamma: [0.1] - -mvml: - reg_params: [[0,1]] - nystrom_param: [1] - learn_A: [1] - learn_w: [0] - n_loops: [6] - kernel_types: ["rbf_kernel"] - kernel_configs: - gamma: [0.1] +# +#random_forest: +# n_estimators: [25] +# max_depth: [3] +# criterion: ["entropy"] +# +#svm_linear: +# C: [1] +# +#svm_rbf: +# C: [1] +# +#svm_poly: +# C: [1] +# degree: [2] +# +#adaboost: +# n_estimators: [50] +# base_estimator: ["DecisionTreeClassifier"] +# +#adaboost_pregen: +# n_estimators: [50] +# base_estimator: ["DecisionTreeClassifier"] +# n_stumps: [1] +# +#adaboost_graalpy: +# n_iterations: [50] +# n_stumps: [1] +# +#decision_tree: +# max_depth: [2] +# criterion: ["gini"] +# splitter: ["best"] +# +#decision_tree_pregen: +# max_depth: [10] +# criterion: ["gini"] +# splitter: ["best"] +# n_stumps: [1] +# +#sgd: +# loss: ["hinge"] +# penalty: [l2] +# alpha: [0.0001] +# +#knn: +# n_neighbors: [5] +# weights: ["uniform"] +# algorithm: ["auto"] +# +#scm: +# model_type: ["conjunction"] +# max_rules: [10] +# p: [0.1] +# +#scm_pregen: +# model_type: ["conjunction"] +# max_rules: [10] +# p: [0.1] +# n_stumps: [1] +# +#cq_boost: +# mu: [0.01] +# epsilon: [1e-06] +# n_max_iterations: [5] +# n_stumps: [1] +# +#cg_desc: +# n_max_iterations: [10] +# n_stumps: [1] +# +#cb_boost: +# n_max_iterations: [10] +# n_stumps: [1] +# +#lasso: +# alpha: [1] +# max_iter: [2] +# +#gradient_boosting: +# n_estimators: [2] +# +# +####################################### +## The Multiview Classifier arguments # +####################################### +# +#weighted_linear_early_fusion: +# view_weights: [null] +# monoview_classifier_name: ["decision_tree"] +# monoview_classifier_config: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +#entropy_fusion: +# classifiers_names: [["decision_tree"]] +# classifier_configs: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +#disagree_fusion: +# classifiers_names: [["decision_tree"]] +# classifier_configs: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +# +#double_fault_fusion: +# classifiers_names: [["decision_tree"]] +# classifier_configs: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +#difficulty_fusion: +# classifiers_names: [["decision_tree"]] +# classifier_configs: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +#scm_late_fusion: +# classifiers_names: [["decision_tree"]] +# p: 0.1 +# max_rules: 10 +# model_type: 'conjunction' +# classifier_configs: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +#majority_voting_fusion: +# classifiers_names: [["decision_tree", "decision_tree", "decision_tree", ]] +# classifier_configs: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +#bayesian_inference_fusion: +# classifiers_names: [["decision_tree", "decision_tree", "decision_tree", ]] +# classifier_configs: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +#weighted_linear_late_fusion: +# classifiers_names: [["decision_tree", "decision_tree", "decision_tree", ]] +# classifier_configs: +# decision_tree: +# max_depth: [1] +# criterion: ["gini"] +# splitter: ["best"] +# +#mumbo: +# base_estimator: [null] +# n_estimators: [10] +# best_view_mode: ["edge"] +# +#lp_norm_mkl: +# lmbda: [0.1] +# n_loops: [50] +# precision: [0.0001] +# kernel: ["rbf"] +# kernel_params: +# gamma: [0.1] +# +#mvml: +# reg_params: [[0,1]] +# nystrom_param: [1] +# learn_A: [1] +# learn_w: [0] +# n_loops: [6] +# kernel_types: ["rbf_kernel"] +# kernel_configs: +# gamma: [0.1] diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index 91edf3ddaf5e9e19f6b144fe386979763dea5e65..2c19d36709043e2f8b45bb0a86c1b8a083afd3ed 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -106,7 +106,6 @@ def init_multiview_exps(classifier_names, views_dictionary, nb_class, kwargs_ini views_dictionary=views_dictionary, framework="multiview") else: - print(classifier_name) arguments = get_path_dict(kwargs_init[classifier_name]) multiview_arguments += [gen_single_multiview_arg_dictionary(classifier_name, arguments, @@ -653,7 +652,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, hyper_param_search=None, metrics=None, argument_dictionaries=None, benchmark=None, views=None, views_indices=None, - flag=None, labels=None,): + flag=None, labels=None, track_tracebacks=False): results_monoview, labels_names = benchmark_init(directory, classification_indices, labels, labels_dictionary, k_folds, dataset_var) @@ -671,7 +670,10 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, hyper_param_search=hyper_param_search, metrics=metrics, n_iter=args["Classification"]["hps_iter"], **arguments)] except: - traceback_outputs[arguments["classifier_name"]+"-"+arguments["view_name"]] = traceback.format_exc() + if track_tracebacks: + traceback_outputs[arguments["classifier_name"]+"-"+arguments["view_name"]] = traceback.format_exc() + else: + raise logging.debug("Done:\t monoview benchmark") @@ -696,7 +698,10 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, hyper_param_search=hyper_param_search, metrics=metrics, n_iter=args["Classification"]["hps_iter"], **arguments)] except: - traceback_outputs[arguments["classifier_name"]] = traceback.format_exc() + if track_tracebacks: + traceback_outputs[arguments["classifier_name"]] = traceback.format_exc() + else: + raise logging.debug("Done:\t multiview benchmark") return [flag, results_monoview + results_multiview, traceback_outputs] @@ -704,11 +709,10 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, def exec_benchmark(nb_cores, stats_iter, benchmark_arguments_dictionaries, - directory, metrics, dataset_var, - # exec_one_benchmark=exec_one_benchmark, - # exec_one_benchmark_multicore=exec_one_benchmark_multicore, + directory, metrics, dataset_var, track_tracebacks, exec_one_benchmark_mono_core=exec_one_benchmark_mono_core, - get_results=get_results, delete=delete_HDF5): + get_results=get_results, delete=delete_HDF5, + analyze_iterations=analyze_iterations): r"""Used to execute the needed benchmark(s) on multicore or mono-core functions. Parameters @@ -768,7 +772,9 @@ def exec_benchmark(nb_cores, stats_iter, # benchmark_arguments_dictionaries[0])] # else: for arguments in benchmark_arguments_dictionaries: - benchmark_results = exec_one_benchmark_mono_core(dataset_var=dataset_var, **arguments) + benchmark_results = exec_one_benchmark_mono_core(dataset_var=dataset_var, + track_tracebacks=track_tracebacks, + **arguments) analyze_iterations([benchmark_results], benchmark_arguments_dictionaries, stats_iter, metrics, example_ids=dataset_var.example_ids, labels=dataset_var.get_labels()) results += [benchmark_results] logging.debug("Done:\t Executing all the needed biclass benchmarks") @@ -892,7 +898,8 @@ def exec_classif(arguments): views, views_indices,) results_mean_stds = exec_benchmark( nb_cores, stats_iter, - benchmark_argument_dictionaries, directory, metrics, dataset_var) + benchmark_argument_dictionaries, directory, metrics, dataset_var, + args["Base"]["track_tracebacks"]) noise_results.append([noise_std, results_mean_stds]) plot_results_noise(directory, noise_results, metrics[0][0], dataset_name) diff --git a/multiview_platform/mono_multi_view_classifiers/metrics/f1_score.py b/multiview_platform/mono_multi_view_classifiers/metrics/f1_score.py index 3c2029ece644073933463673d9fcc7ea84380904..cacce06e5790e6190fd9eaa31a52f3d92ddc0456 100644 --- a/multiview_platform/mono_multi_view_classifiers/metrics/f1_score.py +++ b/multiview_platform/mono_multi_view_classifiers/metrics/f1_score.py @@ -33,7 +33,7 @@ def score(y_true, y_pred, multiclass=True, **kwargs): if multiclass: average = "micro" else: - average = "binary" + average = "micro" score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) @@ -79,7 +79,7 @@ def get_config(**kwargs): try: average = kwargs["3"] except Exception: - average = "binary" + average = "micro" config_string = "F1 score using " + str( sample_weight) + " as sample_weights, " + str( labels) + " as labels, " + str( diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py index 8a6ba10001d3f63587072c706dc3167effb7f97f..77f81473ca74eff604146f77fee7f2561c08fc3d 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py @@ -42,7 +42,7 @@ def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, classifierConfigString += "\t- Got configuration using randomized search with " + str( nIter) + " iterations \n" classifierConfigString += "\n\n" - classifierInterpretString = classifier.get_interpret(output_file_name, y_test) + classifierInterpretString = classifier.get_interpretation(output_file_name, y_test) return classifierConfigString, classifierInterpretString diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 2d3b436aa3d35d6cc4a2f03240c4a32aef12acf4..9785229897c6e71c0f9ab1485e34f92b7cfa36ff 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -16,7 +16,7 @@ from . import monoview_utils from .analyze_result import execute # Import own modules from .. import monoview_classifiers -from ..utils.dataset import extract_subset, Dataset +from ..utils.dataset import extract_subset, HDF5Dataset from ..utils import hyper_parameter_search from ..utils.multiclass import get_mc_estim @@ -34,7 +34,7 @@ def exec_monoview_multicore(directory, name, labels_names, classification_indice hyper_param_search="randomized_search", metrics=[["accuracy_score", None]], n_iter=30, **args): - dataset_var = Dataset(hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r")) + dataset_var = HDF5Dataset(hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r")) neededViewIndex = args["view_index"] X = dataset_var.get_v(neededViewIndex) Y = labels @@ -93,6 +93,7 @@ def exec_monoview(directory, X, Y, name, labels_names, classification_indices, classifier = get_mc_estim(getattr(classifier_module, classifier_class_name) (random_state, **cl_kwargs), + Y, random_state) classifier.fit(X_train, y_train) # NB_CORES=nbCores, @@ -102,7 +103,7 @@ def exec_monoview(directory, X, Y, name, labels_names, classification_indices, y_train_pred = classifier.predict(X_train) y_test_pred = classifier.predict(X_test) - #Filling the full prediction in the right order + # Filling the full prediction in the right order full_pred = np.zeros(Y.shape, dtype=int) - 100 for trainIndex, index in enumerate(classification_indices[0]): full_pred[index] = y_train_pred[trainIndex] diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index 04683dadf177da8dc79b3d8136d5942538b7f278..8e7381c4ccd741a90028c8ec05210ffe819591b8 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -9,7 +9,7 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.model_selection import RandomizedSearchCV from .. import metrics -from ..utils import hyper_parameter_search +from ..utils.base import BaseClassifier # Author-Info __author__ = "Nikolas Huelsmann, Baptiste Bauvin" @@ -115,35 +115,7 @@ class CustomUniform: return unif -class BaseMonoviewClassifier(BaseEstimator, ):#ClassifierMixin): - - def genBestParams(self, detector): - return dict( - (param_name, detector.best_params_[param_name]) for param_name in - self.param_names) - - def genParamsFromDetector(self, detector): - if self.classed_params: - classed_dict = dict((classed_param, get_names( - detector.cv_results_["param_" + classed_param])) - for classed_param in self.classed_params) - if self.param_names: - return [(param_name, - np.array(detector.cv_results_["param_" + param_name])) - if param_name not in self.classed_params else ( - param_name, classed_dict[param_name]) - for param_name in self.param_names] - else: - return [()] - - def gen_distribs(self): - return dict((param_name, distrib) for param_name, distrib in - zip(self.param_names, self.distribs)) - - def params_to_string(self): - return ", ".join( - [param_name + " : " + self.to_str(param_name) for param_name in - self.param_names]) +class BaseMonoviewClassifier(BaseClassifier):#ClassifierMixin): def get_config(self): if self.param_names: @@ -151,16 +123,6 @@ class BaseMonoviewClassifier(BaseEstimator, ):#ClassifierMixin): else: return "\n\t\t- " + self.__class__.__name__ + "with no config." - def to_str(self, param_name): - if param_name in self.weird_strings: - if self.weird_strings[param_name] == "class_name": - return self.get_params()[param_name].__class__.__name__ - else: - return self.weird_strings[param_name]( - self.get_params()[param_name]) - else: - return str(self.get_params()[param_name]) - def get_feature_importance(self, directory, nb_considered_feats=50): """Used to generate a graph and a pickle dictionary representing feature importances""" featureImportances = self.feature_importances_ @@ -191,44 +153,9 @@ class BaseMonoviewClassifier(BaseEstimator, ):#ClassifierMixin): featureImportance) + "\n" return interpretString - @abstractmethod - def fit(self, X, y): - pass - - @abstractmethod - def predict(self, X): - pass - def get_name_for_fusion(self): return self.__class__.__name__[:4] - def get_interpret(self, directory, y_test): - return "" - - def accepts_multi_class(self, random_state, n_samples=10, dim=2, - n_classes=3): - if int(n_samples / n_classes) < 1: - raise ValueError( - "n_samples ({}) / n_classe ({}) must be over 1".format( - n_samples, - n_classes)) - fake_mc_X = random_state.random_integers(low=0, high=100, - size=(n_samples, dim)) - fake_mc_y = [class_index - for _ in range(int(n_samples / n_classes)) - for class_index in range(n_classes)] - fake_mc_y += [0 for _ in range(n_samples % n_classes)] - try: - self.fit(fake_mc_X, fake_mc_y) - self.predict(fake_mc_X) - return True - except ValueError: - return False - - -def get_names(classed_list): - return np.array([object_.__class__.__name__ for object_ in classed_list]) - def percent(x, pos): """Used to print percentage of importance on the y axis""" diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py index 367e8a8c18886d53e572fc6e32b8eb8f12065119..9717adbfe4b14400100275c22f61196f32338dd6 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py @@ -131,12 +131,13 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): pred = super(Adaboost, self).predict(X) end = time.time() self.pred_time = end - begin + # TODO : mauvaise verif if X.shape != self.train_shape: self.step_predictions = np.array( [step_pred for step_pred in self.staged_predict(X)]) return pred - def get_interpret(self, directory, y_test): + def get_interpretation(self, directory, y_test, multi_class=False): interpretString = "" interpretString += self.get_feature_importance(directory) interpretString += "\n\n Estimator error | Estimator weight\n" diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree.py index 378acf85a4905629cc25dacf9bf7b97b58f4e6be..dd01894083a6abca1709527b45dbdea444fb8496 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/decision_tree.py @@ -31,7 +31,7 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier): # """Used to know if the classifier can return label probabilities""" # return True - def get_interpret(self, directory, y_test): + def get_interpretation(self, directory, y_test): interpretString = "" interpretString += self.get_feature_importance(directory) return interpretString diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py index 01fe8bdc97e3b4c52325147115674601e638e0b7..a714b2afdf76c968c12447b553afb3e30ff17482 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py @@ -72,7 +72,7 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): # """Used to know if the classifier can return label probabilities""" # return False - def get_interpret(self, directory, y_test, multi_class=False): + def get_interpretation(self, directory, y_test, multi_class=False): interpretString = "" if multi_class: return interpretString diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py index 8a9ad08f5d72e1ea0b41f27a6ebe8104e94fa1c7..1d2076b13f0b0a7bfe1e659291bd46acd4055c07 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/knn.py @@ -46,7 +46,7 @@ class KNN(KNeighborsClassifier, BaseMonoviewClassifier): # """Used to know if the classifier can return label probabilities""" # return True - def get_interpret(self, directory, y_test): + def get_interpretation(self, directory, y_test): interpretString = "" return interpretString diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/lasso.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/lasso.py index a166d934819fcb10e6fc198d5276d53e26735e46..14a20f3f5f25be817631317b3c1ffbc4c9c13c5b 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/lasso.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/lasso.py @@ -82,7 +82,7 @@ class Lasso(LassoSK, BaseMonoviewClassifier): # """ # return False - def get_interpret(self, directory, y_test): + def get_interpretation(self, directory, y_test): """ return the interpreted string diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py index 3fc9721004f4b75d8d6d7290f03b7c62dce68d0d..b35d02363fb4e347307cba975a937c5a25faa400 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/random_forest.py @@ -73,7 +73,7 @@ class RandomForest(RandomForestClassifier, BaseMonoviewClassifier): # """ # return True - def get_interpret(self, directory, y_test): + def get_interpretation(self, directory, y_test): """ Parameters diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py index 78ff0ead71834086886a760ed528d3ba2184dc0a..18d0cbd646a43289c16c9959a686650f5370bd83 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/sgd.py @@ -65,7 +65,7 @@ class SGD(SGDClassifier, BaseMonoviewClassifier): # # return True - def get_interpret(self, directory, y_test): + def get_interpretation(self, directory, y_test): """ Parameters diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py index cd3a157b8ca98c74e9305fa1583e0fbc41faed5f..232130320e24f6bd23798e96a3244c91e8ff8d81 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py @@ -36,7 +36,7 @@ class SVMLinear(SVCClassifier, BaseMonoviewClassifier): self.param_names = ["C", "random_state"] self.distribs = [CustomUniform(loc=0, state=1), [random_state]] - def get_interpret(self, directory, y_test): + def get_interpretation(self, directory, y_test): interpret_string = "" # self.feature_importances_ = (self.coef_/np.sum(self.coef_)).reshape((self.coef_.shape[1],)) return interpret_string diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py index 80e6ef39522a7ea4cc045f764ba34fff5ab648c0..c0040cf7081fdbdfa3c0c9d640fe8e4e3b5e43f3 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py @@ -84,22 +84,23 @@ def getMetricsScores(metrics, trainLabels, testLabels, return metricsScores -def execute(classifier, trainLabels, - testLabels, DATASET, +def execute(classifier, pred_train_labels, + pred_test_labels, DATASET, classificationKWARGS, classificationIndices, labels_dictionary, views, nbCores, times, name, KFolds, hyper_param_search, nIter, metric_list, - views_indices, random_state, labels, classifierModule): + views_indices, random_state, labels, classifierModule, + directory): """ Parameters ---------- classifier : classifier used - trainLabels : labels of train + pred_train_labels : labels of train - testLabels : labels of test + pred_test_labels : labels of test DATASET : @@ -138,19 +139,18 @@ def execute(classifier, trainLabels, retuern tuple of (stringAnalysis, imagesAnalysis, metricsScore) """ classifier_name = classifier.short_name - learningIndices, validationIndices, testIndicesMulticlass = classificationIndices - + learning_indices, validation_indices = classificationIndices metricModule = getattr(metrics, metric_list[0][0]) if metric_list[0][1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric_list[0][1])) else: metricKWARGS = {} - scoreOnTrain = metricModule.score(labels[learningIndices], - labels[learningIndices], + scoreOnTrain = metricModule.score(labels[learning_indices], + pred_train_labels, **metricKWARGS) - scoreOnTest = metricModule.score(labels[validationIndices], - testLabels, **metricKWARGS) + scoreOnTest = metricModule.score(labels[validation_indices], + pred_test_labels, **metricKWARGS) stringAnalysis = "\t\tResult for multiview classification with " + classifier_name + \ "\n\n" + metric_list[0][0] + " :\n\t-On Train : " + str( @@ -163,9 +163,9 @@ def execute(classifier, trainLabels, KFolds.n_splits) + \ " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.get_config() - metricsScores = getMetricsScores(metric_list, trainLabels, testLabels, - validationIndices, learningIndices, labels) + metricsScores = getMetricsScores(metric_list, pred_train_labels, pred_test_labels, + validation_indices, learning_indices, labels) stringAnalysis += printMetricScore(metricsScores, metric_list) - stringAnalysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation() + stringAnalysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation(directory, labels) imagesAnalysis = {} return stringAnalysis, imagesAnalysis, metricsScores diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 2e4f5bc850784cba8c6b3b79d8fcd6ec1cb57e5f..d849025027aff2fd13ea8ca9665d76a997d3cbce 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -11,6 +11,7 @@ from .multiview_utils import MultiviewResult from . import analyze_results from .. import multiview_classifiers from ..utils import hyper_parameter_search +from ..utils.multiclass import get_mc_estim # Author-Info __author__ = "Baptiste Bauvin" @@ -62,8 +63,9 @@ def init_constants(kwargs, classification_indices, metrics, for view_index, view_name in zip(views_indices, views): logging.info("Info:\t Shape of " + str(view_name) + " :" + str( dataset_var.get_shape())) + labels = dataset_var.get_labels() return classifier_name, t_start, views_indices,\ - classifier_config, views, learning_rate + classifier_config, views, learning_rate,labels def save_results(classifier, labels_dictionary, string_analysis, views, classifier_module, @@ -241,7 +243,8 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds views_indices, \ classifier_config, \ views, \ - learning_rate = init_constants(kwargs, classification_indices, metrics, name, + learning_rate,\ + labels = init_constants(kwargs, classification_indices, metrics, name, nb_cores, k_folds, dataset_var) logging.debug("Done:\t Initialize constants") @@ -268,26 +271,23 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds directory, nb_cores=nb_cores, views_indices=views_indices, searching_tool=hyper_param_search, n_iter=n_iter, classifier_config=classifier_config) - - classifier = getattr(classifier_module, classifier_name)(random_state=random_state, - **classifier_config) + classifier = get_mc_estim(getattr(classifier_module, classifier_name)(random_state=random_state, + **classifier_config), + dataset_var.get_labels(), random_state, multiview=True,) logging.debug("Done:\t Optimizing hyperparameters") - logging.debug("Start:\t Fitting classifier") classifier.fit(dataset_var, dataset_var.get_labels(), train_indices=learning_indices, view_indices=views_indices) logging.debug("Done:\t Fitting classifier") logging.debug("Start:\t Predicting") - train_labels = classifier.predict(dataset_var, example_indices=learning_indices, + pred_train_labels = classifier.predict(dataset_var, example_indices=learning_indices, view_indices=views_indices) - test_labels = classifier.predict(dataset_var, example_indices=validation_indices, + pred_test_labels = classifier.predict(dataset_var, example_indices=validation_indices, view_indices=views_indices) full_labels = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100 - for train_index, index in enumerate(learning_indices): - full_labels[index] = train_labels[train_index] - for test_index, index in enumerate(validation_indices): - full_labels[index] = test_labels[test_index] + full_labels[learning_indices] = pred_train_labels + full_labels[validation_indices] = pred_test_labels logging.info("Done:\t Pertidcting") classification_time = time.time() - t_start @@ -298,13 +298,13 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds logging.info("Start:\t Result Analysis for " + cl_type) times = (extraction_time, classification_time) string_analysis, images_analysis, metrics_scores = analyze_results.execute( - classifier, train_labels, - test_labels, dataset_var, + classifier, pred_train_labels, + pred_test_labels, dataset_var, classifier_config, classification_indices, labels_dictionary, views, nb_cores, times, name, k_folds, hyper_param_search, n_iter, metrics, - views_indices, random_state, labels, classifier_module) + views_indices, random_state, labels, classifier_module, directory) logging.info("Done:\t Result Analysis for " + cl_type) logging.debug("Start:\t Saving preds") diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index da79c6cc36e20197074498683c6ced4bbdcc606c..9648943e24f980a5067e768ca344feb67552dc38 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -1,28 +1,12 @@ from sklearn.base import BaseEstimator, ClassifierMixin +from abc import abstractmethod import numpy as np -from .. import multiview_classifiers -from .. import monoview_classifiers - +from ..utils.base import BaseClassifier +from ..utils.dataset import RAMDataset +from .. import monoview_classifiers -class MultiviewResult(object): - def __init__(self, classifier_name, classifier_config, - metrics_scores, full_labels): - self.classifier_name = classifier_name - self.classifier_config = classifier_config - self.metrics_scores = metrics_scores - self.full_labels_pred = full_labels - - def get_classifier_name(self): - try: - multiview_classifier_module = getattr(multiview_classifiers, - self.classifier_name) - multiview_classifier = getattr(multiview_classifier_module, - multiview_classifier_module.classifier_class_name)(42) - return multiview_classifier.short_name - except: - return self.classifier_name class FakeEstimator(): @@ -30,11 +14,7 @@ class FakeEstimator(): return np.zeros(example_indices.shape[0]) -def get_names(classed_list): - return np.array([object_.__class__.__name__ for object_ in classed_list]) - - -class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): +class BaseMultiviewClassifier(BaseClassifier): """ BaseMultiviewClassifier base of Multiview classifiers @@ -51,49 +31,13 @@ class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): self.short_name = self.__module__.split(".")[-1] self.weird_strings = {} - def gen_best_params(self, detector): - """ - return best parameters of detector - Parameters - ---------- - detector : - - Returns - ------- - best param : dictionary with param name as key and best parameters - value - """ - return dict((param_name, detector.best_params_[param_name]) - for param_name in self.param_names) - - def genParamsFromDetector(self, detector): - if self.classed_params: - classed_dict = dict((classed_param, get_names( - detector.cv_results_["param_" + classed_param])) - for classed_param in self.classed_params) - if self.param_names: - return [(param_name, - np.array(detector.cv_results_["param_" + param_name])) - if param_name not in self.classed_params else ( - param_name, classed_dict[param_name]) - for param_name in self.param_names] - else: - return [()] - - def genDistribs(self): - return dict((param_name, distrib) for param_name, distrib in - zip(self.param_names, self.distribs)) + @abstractmethod + def fit(self, X, y, train_indices=None, view_indices=None): + pass - def params_to_string(self): - return ", ".join( - [param_name + " : " + self.to_str(param_name) for param_name in - self.param_names]) - - def getConfig(self): - if self.param_names: - return "\n\t\t- " + self.__class__.__name__ + "with " + self.params_to_string() - else: - return "\n\t\t- " + self.__class__.__name__ + "with no config." + @abstractmethod + def predict(self, X, example_indices=None, view_indices=None): + pass def to_str(self, param_name): if param_name in self.weird_strings: @@ -109,17 +53,36 @@ class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): else: return str(self.get_params()[param_name]) - def get_interpretation(self): - return "No detailed interpretation function" - - -def get_examples_views_indices(dataset, examples_indices, view_indices, ): - """This function is used to get all the examples indices and view indices if needed""" - if view_indices is None: - view_indices = np.arange(dataset.nb_view) - if examples_indices is None: - examples_indices = range(dataset.get_nb_examples()) - return examples_indices, view_indices + def accepts_multi_class(self, random_state, n_samples=10, dim=2, + n_classes=3, n_views=2): + if int(n_samples / n_classes) < 1: + raise ValueError( + "n_samples ({}) / n_classe ({}) must be over 1".format( + n_samples, + n_classes)) + fake_mc_X = RAMDataset(views= [random_state.random_integers(low=0, high=100, + size=(n_samples, dim)) + for i in range(n_views)], + labels=[class_index + for _ in range(int(n_samples / n_classes)) + for class_index in range(n_classes)], + are_sparse=False, + name="mc_dset", + labels_names=[str(class_index) for class_index in range(n_classes)], + view_names=["V0", "V1"], + ) + + fake_mc_y = [class_index + for _ in range(int(n_samples / n_classes)) + for class_index in range(n_classes)] + fake_mc_y += [0 for _ in range(n_samples % n_classes)] + fake_mc_y = np.asarray(fake_mc_y) + try: + self.fit(fake_mc_X, fake_mc_y) + self.predict(fake_mc_X) + return True + except ValueError: + return False class ConfigGenerator(): @@ -166,3 +129,22 @@ def get_monoview_classifier(classifier_name): classifier_module = getattr(monoview_classifiers, classifier_name) classifier_class = getattr(classifier_module, classifier_module.classifier_class_name) return classifier_class + +from .. import multiview_classifiers +class MultiviewResult(object): + def __init__(self, classifier_name, classifier_config, + metrics_scores, full_labels): + self.classifier_name = classifier_name + self.classifier_config = classifier_config + self.metrics_scores = metrics_scores + self.full_labels_pred = full_labels + + def get_classifier_name(self): + try: + multiview_classifier_module = getattr(multiview_classifiers, + self.classifier_name) + multiview_classifier = getattr(multiview_classifier_module, + multiview_classifier_module.classifier_class_name)(42) + return multiview_classifier.short_name + except: + return self.classifier_name \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py index 680412c17568cd2b924971a6bf8a62c4972ead78..4013f030aeb4b6f6d0434c50252d4a6e4ff76423 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py @@ -6,9 +6,10 @@ import os import numpy as np from ...multiview.multiview_utils import ConfigGenerator, \ - get_examples_views_indices, get_available_monoview_classifiers, \ + get_available_monoview_classifiers, \ BaseMultiviewClassifier from .fusion_utils import BaseFusionClassifier +from ...utils.dataset import get_examples_views_indices class DiversityFusionClassifier(BaseMultiviewClassifier, @@ -24,21 +25,23 @@ class DiversityFusionClassifier(BaseMultiviewClassifier, self.classifier_names = classifier_names self.param_names = ["classifier_configs"] self.distribs = [ConfigGenerator(get_available_monoview_classifiers())] - self.estimator_pool = monoview_estimators + self.monoview_estimators = monoview_estimators self.classifier_configs = classifier_configs def fit(self, X, y, train_indices=None, view_indices=None): train_indices, view_indices = get_examples_views_indices(X, train_indices, view_indices) - if self.estimator_pool is None: - self.estimator_pool = [] + if np.unique(y[train_indices]).shape[0] > 2: + raise ValueError("Multiclass not supported, classes used : {}".format(np.unique(y[train_indices]))) + if self.monoview_estimators is None: + self.monoview_estimators = [] for classifier_idx, classifier_name in enumerate(self.classifier_names): - self.estimator_pool.append([]) + self.monoview_estimators.append([]) for idx, view_idx in enumerate(view_indices): estimator = self.init_monoview_estimator(classifier_name, self.classifier_configs) estimator.fit(X.get_v(view_idx, train_indices), y[train_indices]) - self.estimator_pool[classifier_idx].append(estimator) + self.monoview_estimators[classifier_idx].append(estimator) else: pass #TODO self.choose_combination(X, y, train_indices, view_indices) @@ -66,10 +69,10 @@ class DiversityFusionClassifier(BaseMultiviewClassifier, return predicted_labels def get_classifiers_decisions(self, X, view_indices, examples_indices): - classifiers_decisions = np.zeros((len(self.estimator_pool), + classifiers_decisions = np.zeros((len(self.monoview_estimators), len(view_indices), len(examples_indices))) - for estimator_idx, estimator in enumerate(self.estimator_pool): + for estimator_idx, estimator in enumerate(self.monoview_estimators): for idx, view_index in enumerate(view_indices): classifiers_decisions[estimator_idx, idx, :] = estimator[ idx].predict(X.get_v(view_index, examples_indices)) @@ -104,7 +107,7 @@ class GlobalDiversityFusionClassifier(DiversityFusionClassifier): y[examples_indices]) best_combi_index = np.argmax(div_measure) best_combination = combis[best_combi_index] - self.monoview_estimators = [self.estimator_pool[classifier_index][view_index] + self.monoview_estimators = [self.monoview_estimators[classifier_index][view_index] for view_index, classifier_index in enumerate(best_combination)] @@ -136,7 +139,7 @@ class CoupleDiversityFusionClassifier(DiversityFusionClassifier): div_measure[combinations_index] = np.mean(couple_diversities) best_combi_index = np.argmax(div_measure) best_combination = combis[best_combi_index] - self.monoview_estimators = [self.estimator_pool[classifier_index][view_index] + self.monoview_estimators = [self.monoview_estimators[classifier_index][view_index] for view_index, classifier_index in enumerate(best_combination)] diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/fusion_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/fusion_utils.py index 7c269dfc033dd2ccb9a34420cf76b9319703e1b1..201ae23eca50f72cdd23b49d3f650f5b4341bb7b 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/fusion_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/fusion_utils.py @@ -9,7 +9,10 @@ class BaseFusionClassifier(): def init_monoview_estimator(self, classifier_name, classifier_config, classifier_index=None,): if classifier_index is not None: - classifier_configs = classifier_config[classifier_name] + if classifier_config is not None: + classifier_configs = classifier_config[classifier_name] + else: + classifier_configs = None else: classifier_configs = classifier_config if classifier_configs is not None and classifier_name in classifier_configs: diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/jumbo_fusion_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/jumbo_fusion_utils.py index b20804bd5d7be25238c872ec1e80d760e1b4a2d9..9cb907df4ef3be3b4a2a38c70369e3e2d45caba4 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/jumbo_fusion_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/jumbo_fusion_utils.py @@ -1,17 +1,18 @@ import numpy as np -from ...multiview.multiview_utils import get_examples_views_indices from .late_fusion_utils import LateFusionClassifier from ...monoview.monoview_utils import CustomRandint +from ...utils.dataset import get_examples_views_indices class BaseJumboFusion(LateFusionClassifier): def __init__(self, random_state, classifiers_names=None, classifier_configs=None, - nb_cores=1, weights=None, nb_monoview_per_view=1): + nb_cores=1, weights=None, nb_monoview_per_view=1, rs=None): super(BaseJumboFusion, self).__init__(random_state, classifiers_names=classifiers_names, classifier_configs=classifier_configs, - nb_cores=nb_cores, weights=weights) + nb_cores=nb_cores, weights=weights, + rs=rs) self.param_names += ["nb_monoview_per_view", ] self.distribs += [CustomRandint(1,10)] self.nb_monoview_per_view = nb_monoview_per_view diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/late_fusion_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/late_fusion_utils.py index f54fa286e9f25fe9a513cc5f885e56139904342e..26a3cab6e9a9e113e8350dc916753d562193b64e 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/late_fusion_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/late_fusion_utils.py @@ -3,8 +3,9 @@ import warnings from scipy.stats import uniform -from ...multiview.multiview_utils import BaseMultiviewClassifier, get_available_monoview_classifiers, get_monoview_classifier, get_examples_views_indices, ConfigGenerator +from ...multiview.multiview_utils import BaseMultiviewClassifier, get_available_monoview_classifiers, get_monoview_classifier, ConfigGenerator from .fusion_utils import BaseFusionClassifier +from ...utils.dataset import get_examples_views_indices class ClassifierDistribution: @@ -13,7 +14,9 @@ class ClassifierDistribution: self.random_state = np.random.RandomState(seed) self.available_classifiers = available_classifiers - def draw(self, nb_view): + def draw(self, nb_view, rs=None): + if rs is not None: + self.random_state.seed(rs) return self.random_state.choice(self.available_classifiers, size=nb_view, replace=True) @@ -34,7 +37,9 @@ class ConfigDistribution: self.random_state = np.random.RandomState(seed) self.config_generator = ConfigGenerator(available_classifiers) - def draw(self, nb_view): + def draw(self, nb_view, rs=None): + if rs is not None: + self.random_state.seed(rs) config_samples = [self.config_generator.rvs(self.random_state) for _ in range(nb_view)] return config_samples @@ -74,23 +79,27 @@ class WeightsGenerator: class LateFusionClassifier(BaseMultiviewClassifier, BaseFusionClassifier): def __init__(self, random_state=None, classifiers_names=None, - classifier_configs=None, nb_cores=1, weights=None): + classifier_configs=None, nb_cores=1, weights=None, + rs=None): super(LateFusionClassifier, self).__init__(random_state) self.classifiers_names = classifiers_names self.classifier_configs = classifier_configs self.nb_cores = nb_cores self.weights = weights - self.param_names = ["classifiers_names", "classifier_configs", "weights"] + self.rs=rs + self.param_names = ["classifiers_names", "classifier_configs", "weights", "rs"] self.distribs =[ClassifierCombinator(need_probas=self.need_probas), MultipleConfigGenerator(), - WeightsGenerator()] + WeightsGenerator(), + np.arange(1000)] def fit(self, X, y, train_indices=None, view_indices=None): - self.init_params(X.nb_view) - train_indices, view_indices = get_examples_views_indices(X, train_indices, view_indices) + self.init_params(len(view_indices)) + if np.unique(y[train_indices]).shape[0] > 2: + raise ValueError("Multiclass not supported") self.monoview_estimators = [monoview_estimator.fit(X.get_v(view_index, train_indices), y[train_indices]) for view_index, monoview_estimator @@ -121,14 +130,16 @@ class LateFusionClassifier(BaseMultiviewClassifier, BaseFusionClassifier): else: nb_clfs = nb_view if isinstance(self.classifiers_names, ClassifierDistribution): - self.classifiers_names = self.classifiers_names.draw(nb_clfs) + self.classifiers_names = self.classifiers_names.draw(nb_clfs, self.rs) elif self.classifiers_names is None: self.classifiers_names = ["decision_tree" for _ in range(nb_clfs)] if isinstance(self.classifier_configs, ConfigDistribution): - self.classifier_configs = self.classifier_configs.draw(nb_clfs) + self.classifier_configs = self.classifier_configs.draw(nb_clfs, self.rs) elif isinstance(self.classifier_configs, dict): self.classifier_configs = [{classifier_name: self.classifier_configs[classifier_name]} for classifier_name in self.classifiers_names] + elif self.classifier_configs is None: + self.classifier_configs = [None for _ in range(nb_clfs)] # def verif_clf_views(self, classifier_names, nb_view): diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/bayesian_inference_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/bayesian_inference_fusion.py index 438f740ceb2d1187f753fa4dceae4eca6ee63a98..e1c05b262616355b703d96748158a4b163cde788 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/bayesian_inference_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/bayesian_inference_fusion.py @@ -2,23 +2,25 @@ import numpy as np from ..multiview_classifiers.additions.late_fusion_utils import \ LateFusionClassifier -from ..multiview.multiview_utils import get_examples_views_indices +from ..utils.dataset import get_examples_views_indices classifier_class_name = "BayesianInferenceClassifier" class BayesianInferenceClassifier(LateFusionClassifier): def __init__(self, random_state, classifiers_names=None, - classifier_configs=None, nb_view=None, nb_cores=1, weights=None): + classifier_configs=None, nb_cores=1, weights=None, + rs=None): self.need_probas=True super(BayesianInferenceClassifier, self).__init__(random_state=random_state, classifiers_names=classifiers_names, classifier_configs=classifier_configs, nb_cores=nb_cores, - weights=weights) + weights=weights, + rs=rs) def predict(self, X, example_indices=None, view_indices=None): - example_indices, views_indices = get_examples_views_indices(X, + example_indices, view_indices = get_examples_views_indices(X, example_indices, view_indices) @@ -26,7 +28,7 @@ class BayesianInferenceClassifier(LateFusionClassifier): self.weights = self.weights / sum(self.weights) view_scores = [] - for index, view_index in enumerate(views_indices): + for index, view_index in enumerate(view_indices): view_scores.append(np.power( self.monoview_estimators[index].predict_proba(X.get_v(view_index, example_indices)), diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/majority_voting_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/majority_voting_fusion.py index c0e7ad4aee001081fb1d1c29a1c0f641cbabc428..932fde9e40cf80dd0be995147eb439d390c0a0b8 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/majority_voting_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/majority_voting_fusion.py @@ -1,8 +1,7 @@ import numpy as np from ..multiview_classifiers.additions.late_fusion_utils import LateFusionClassifier -from ..multiview.multiview_utils import get_examples_views_indices - +from ..utils.dataset import get_examples_views_indices classifier_class_name = "MajorityVoting" @@ -11,13 +10,14 @@ class VotingIndecision(Exception): class MajorityVoting(LateFusionClassifier): def __init__(self, random_state, classifiers_names=None, - classifier_configs=None, weights=None, nb_cores=1): + classifier_configs=None, weights=None, nb_cores=1, rs=None): self.need_probas=False super(MajorityVoting, self).__init__(random_state=random_state, classifiers_names=classifiers_names, classifier_configs=classifier_configs, nb_cores=nb_cores, - weights=weights) + weights=weights, + rs=rs) def predict(self, X, example_indices=None, view_indices=None): examples_indices, views_indices = get_examples_views_indices(X, diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/svm_jumbo_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/svm_jumbo_fusion.py index 3c0b9c95db211e836a4b54c6bc0d1e1c6f6adfca..bcdf1ef8da034d6ea4c94f0d838fa389e944a866 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/svm_jumbo_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/svm_jumbo_fusion.py @@ -8,17 +8,26 @@ classifier_class_name = "SVMJumboFusion" class SVMJumboFusion(BaseJumboFusion): def __init__(self, random_state=None, classifiers_names=None, - classifier_configs=None, nb_cores=1, weights=None, nb_monoview_per_view=1, C=1.0, kernel="rbf", degree=2): + classifier_configs=None, nb_cores=1, weights=None, + nb_monoview_per_view=1, C=1.0, kernel="rbf", degree=2, rs=None): self.need_probas=False super(SVMJumboFusion, self).__init__(random_state, classifiers_names=classifiers_names, classifier_configs=classifier_configs, - nb_cores=nb_cores, weights=weights, nb_monoview_per_view=nb_monoview_per_view) + nb_cores=nb_cores, weights=weights, + nb_monoview_per_view=nb_monoview_per_view, + rs=rs) self.param_names += ["C", "kernel", "degree"] self.distribs += [CustomUniform(), ["rbf", "poly", "linear"], CustomRandint(2, 5)] self.aggregation_estimator = SVC(C=C, kernel=kernel, degree=degree) + self.C = C + self.kernel = kernel + self.degree = degree def set_params(self, C=1.0, kernel="rbf", degree=1, **params): super(SVMJumboFusion, self).set_params(**params) + self.C = C + self.degree = degree + self.kernel = kernel self.aggregation_estimator.set_params(C=C, kernel=kernel, degree=degree) return self diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py index 170c1010a23cd92f2fbd42ae43e03c79fdc2fef9..27e94a742f87b293600ab35e82c05f48a20a23ae 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -3,11 +3,11 @@ import inspect # from ..utils.dataset import get_v -from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import BaseMultiviewClassifier -from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import get_examples_views_indices -from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import ConfigGenerator -from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import get_available_monoview_classifiers -from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.additions.fusion_utils import BaseFusionClassifier +from ..utils.dataset import get_examples_views_indices +from ..multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from .additions.fusion_utils import BaseFusionClassifier +from ..utils.multiclass import get_mc_estim, MultiClassWrapper from multiview_platform.mono_multi_view_classifiers import monoview_classifiers @@ -63,13 +63,20 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier): return self def get_params(self, deep=True): - return {"random_state":self.random_state, - "view_weights":self.view_weights, - "monoview_classifier_name":self.monoview_classifier_name, - "monoview_classifier_config":self.monoview_classifier_config} + return {"random_state": self.random_state, + "view_weights": self.view_weights, + "monoview_classifier_name": self.monoview_classifier_name, + "monoview_classifier_config": self.monoview_classifier_config} def fit(self, X, y, train_indices=None, view_indices=None): - train_indices, X = self.transform_data_to_monoview(X, train_indices, view_indices) + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + if np.unique(y[train_indices]).shape[0] > 2 and \ + not(isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + y[train_indices], + self.random_state, + multiview=False) self.monoview_classifier.fit(X, y[train_indices]) return self diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_late_fusion.py index 00a91ebbccd63e6d77a29edf49372b3d69f4f6f8..91b69b4322dbc667beabc26ba7e4a6e742e1668f 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_late_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_late_fusion.py @@ -1,19 +1,19 @@ import numpy as np from ..multiview_classifiers.additions.late_fusion_utils import LateFusionClassifier -from ..multiview.multiview_utils import get_examples_views_indices +from ..utils.dataset import get_examples_views_indices classifier_class_name = "WeightedLinearLateFusion" class WeightedLinearLateFusion(LateFusionClassifier): def __init__(self, random_state, classifiers_names=None, - classifier_configs=None, weights=None, nb_cores=1): + classifier_configs=None, weights=None, nb_cores=1, rs=None): self.need_probas=True super(WeightedLinearLateFusion, self).__init__(random_state=random_state, classifiers_names=classifiers_names, classifier_configs=classifier_configs, - nb_cores=nb_cores,weights=weights) + nb_cores=nb_cores,weights=weights, rs=rs) def predict(self, X, example_indices=None, view_indices=None): example_indices, views_indices = get_examples_views_indices(X, example_indices, view_indices) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/base.py b/multiview_platform/mono_multi_view_classifiers/utils/base.py index 74e8e593f712b402a4589a0a246dc95b507f2095..862ddcfcd98ffde4f0534d32633b67ef4c8444be 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/base.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/base.py @@ -7,12 +7,23 @@ import matplotlib.pyplot as plt class BaseClassifier(BaseEstimator, ): - def genBestParams(self, detector): + def gen_best_params(self, detector): + """ + return best parameters of detector + Parameters + ---------- + detector : + + Returns + ------- + best param : dictionary with param name as key and best parameters + value + """ return dict( (param_name, detector.best_params_[param_name]) for param_name in self.param_names) - def genParamsFromDetector(self, detector): + def gen_params_from_detector(self, detector): if self.classed_params: classed_dict = dict((classed_param, get_names( detector.cv_results_["param_" + classed_param])) @@ -35,7 +46,7 @@ class BaseClassifier(BaseEstimator, ): [param_name + " : " + self.to_str(param_name) for param_name in self.param_names]) - def getConfig(self): + def get_config(self): if self.param_names: return "\n\t\t- " + self.__class__.__name__ + "with " + self.params_to_string() else: @@ -51,40 +62,7 @@ class BaseClassifier(BaseEstimator, ): else: return str(self.get_params()[param_name]) - def get_feature_importance(self, directory, nb_considered_feats=50): - """Used to generate a graph and a pickle dictionary representing feature importances""" - featureImportances = self.feature_importances_ - sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][ - :nb_considered_feats] - featureIndicesSorted = sortedArgs[:nb_considered_feats] - fig, ax = plt.subplots() - x = np.arange(len(featureIndicesSorted)) - formatter = FuncFormatter(percent) - ax.yaxis.set_major_formatter(formatter) - plt.bar(x, featureImportancesSorted) - plt.title("Importance depending on feature") - fig.savefig(directory + "feature_importances.png", transparent=True) - plt.close() - featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in - enumerate(featureImportances) - if featureImportance != 0) - with open(directory + 'feature_importances.pickle', 'wb') as handle: - pickle.dump(featuresImportancesDict, handle) - interpretString = "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, - featureImportancesSorted): - if featureImportance > 0: - interpretString += "- Feature index : " + str(featureIndex) + \ - ", feature importance : " + str( - featureImportance) + "\n" - return interpretString - - def get_name_for_fusion(self): - return self.__class__.__name__[:4] - - def getInterpret(self, directory, y_test): + def get_interpretation(self, directory, y_test, multi_class=False): return "" def accepts_multi_class(self, random_state, n_samples=10, dim=2, @@ -94,23 +72,22 @@ class BaseClassifier(BaseEstimator, ): "n_samples ({}) / n_classe ({}) must be over 1".format( n_samples, n_classes)) - fake_mc_X = random_state.random_integers(low=0, high=100, - size=(n_samples, dim)) - fake_mc_y = [class_index - for _ in range(int(n_samples / n_classes)) - for class_index in range(n_classes)] - fake_mc_y += [0 for _ in range(n_samples % n_classes)] - try: - self.fit(fake_mc_X, fake_mc_y) - self.predict(fake_mc_X) - return True - except ValueError: - return False + if hasattr(self, "accepts_mutli_class"): + return self.accepts_multi_class + else: + fake_mc_X = random_state.random_integers(low=0, high=100, + size=(n_samples, dim)) + fake_mc_y = [class_index + for _ in range(int(n_samples / n_classes)) + for class_index in range(n_classes)] + fake_mc_y += [0 for _ in range(n_samples % n_classes)] + try: + self.fit(fake_mc_X, fake_mc_y) + self.predict(fake_mc_X) + return True + except ValueError: + return False def get_names(classed_list): return np.array([object_.__class__.__name__ for object_ in classed_list]) - -def percent(x, pos): - """Used to print percentage of importance on the y axis""" - return '%1.1f %%' % (x * 100) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py index 60062bf2c6e4c32f498c066ec308bc4e0305f0c6..3b34cdab2e5985a78b0768b6d45cb322f0dd6bb2 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py @@ -3,6 +3,7 @@ import os import select import sys import errno +from abc import abstractmethod import h5py import numpy as np @@ -10,8 +11,229 @@ from scipy import sparse # from . import get_multiview_db as DB - class Dataset(): + + @abstractmethod + def get_nb_examples(self): + pass + + @abstractmethod + def get_v(self, view_index, example_indices=None): + pass + + @abstractmethod + def get_label_names(self, example_indices=None): + pass + + @abstractmethod + def get_labels(self, example_indices=None): + pass + + @abstractmethod + def filter(self, labels, label_names, example_indices, view_names, path=None): + pass + + def init_example_indces(self, example_indices=None): + """If no example indices are provided, selects all the examples.""" + if example_indices is None: + return range(self.get_nb_examples()) + else: + return example_indices + + def get_shape(self, view_index=0, example_indices=None): + """Gets the shape of the needed view""" + return self.get_v(view_index, example_indices=example_indices).shape + + def to_numpy_array(self, example_indices=None, view_indices=None): + """ + To concanteant the needed views in one big numpy array while saving the + limits of each view in a list, to be bale to retrieve them later. + + Parameters + ---------- + example_indices : array like, + The indices of the examples to extract from the dataset + + view_indices : array like, + The indices of the view to concatenate in the numpy array + + Returns + ------- + concat_views : numpy array, + The numpy array containing all the needed views. + + view_limits : list of int + The limits of each slice used to extract the views. + + """ + view_limits = [0] + for view_index in view_indices: + view_data = self.get_v(view_index, example_indices=example_indices) + nb_features = view_data.shape[1] + view_limits.append(view_limits[-1]+nb_features) + concat_views = np.concatenate([self.get_v(view_index, + example_indices=example_indices) + for view_index in view_indices], axis=1) + return concat_views, view_limits + + def select_labels(self, selected_label_names): + selected_labels = [self.get_label_names().index(label_name.decode()) + if isinstance(label_name, bytes) + else self.get_label_names().index(label_name) + for label_name in selected_label_names] + selected_indices = np.array([index + for index, label in enumerate(self.get_labels()) + if label in selected_labels]) + labels = np.array([selected_labels.index(self.get_labels()[idx]) + for idx in selected_indices]) + return labels, selected_label_names, selected_indices + + + def select_views_and_labels(self, nb_labels=None, + selected_label_names=None, random_state=None, + view_names = None, path_for_new="../data/"): + if view_names is None and selected_label_names is None and nb_labels is None: + pass + else: + selected_label_names = self.check_selected_label_names(nb_labels, + selected_label_names, + random_state) + labels, label_names, example_indices = self.select_labels(selected_label_names) + self.filter(labels, label_names, example_indices, view_names, path_for_new) + labels_dictionary = dict( + (labelIndex, labelName) for labelIndex, labelName in + enumerate(self.get_label_names())) + return labels_dictionary + + def check_selected_label_names(self, nb_labels=None, + selected_label_names=None, + random_state=np.random.RandomState(42)): + if selected_label_names is None or nb_labels is None or len(selected_label_names) < nb_labels: + if selected_label_names is None: + nb_labels_to_add = nb_labels + selected_label_names = [] + elif nb_labels is not None: + nb_labels_to_add = nb_labels - len(selected_label_names) + else: + nb_labels_to_add=0 + labels_names_to_choose = [available_label_name + for available_label_name + in self.get_label_names() + if available_label_name + not in selected_label_names] + added_labels_names = random_state.choice(labels_names_to_choose, + nb_labels_to_add, + replace=False) + selected_label_names = list(selected_label_names) + list( + added_labels_names) + elif len(selected_label_names) > nb_labels: + selected_label_names = list( + random_state.choice(selected_label_names, nb_labels, + replace=False)) + + return selected_label_names + + +class RAMDataset(Dataset): + + def __init__(self, views=None, labels=None, are_sparse=False, + view_names=None, labels_names=None, example_ids=None, name=None): + self.saved_on_disk = False + self.views = views + self.labels = np.asarray(labels) + if isinstance(are_sparse, bool): + self.are_sparse = [are_sparse for _ in range(len(views))] + else: + self.are_sparse = are_sparse + self.view_names = view_names + self.labels_names = labels_names + self.example_ids = example_ids + self.view_dict = dict((view_name, view_ind) + for view_name, view_ind + in zip(view_names, range(len(views)))) + self.name=name + self.nb_view = len(self.views) + self.is_temp = False + + def get_view_name(self, view_idx): + return self.view_names[view_idx] + + def init_attrs(self): + """ + Used to init the two attributes that are modified when self.dataset + changes + + Returns + ------- + + """ + + self.nb_view = len(self.views) + self.view_dict = dict((view_ind, self.view_names[view_ind]) + for view_ind in range(self.nb_view)) + + def get_nb_examples(self): + return self.views[0].shape[0] + + def get_label_names(self, example_indices=None, decode=True): + selected_labels = self.get_labels(example_indices) + if decode: + return [label_name.encode("utf-8") + for label, label_name in enumerate(self.labels_names) + if label in selected_labels] + else: + return [label_name.encode("utf-8") + for label, label_name in enumerate(self.labels_names) + if label in selected_labels] + + def get_labels(self, example_indices=None): + example_indices = self.init_example_indces(example_indices) + return self.labels[example_indices] + + def get_v(self, view_index, example_indices=None): + example_indices = self.init_example_indces(example_indices) + if type(example_indices) is int: + return self.views[view_index][example_indices, :] + else: + example_indices = np.array(example_indices) + sorted_indices = np.argsort(example_indices) + example_indices = example_indices[sorted_indices] + if not self.are_sparse[view_index]: + return self.views[view_index][ + example_indices, :] + else: + # TODO Sparse support + pass + + def get_nb_class(self, example_indices=None): + """Gets the number of class of the dataset""" + example_indices = self.init_example_indces(example_indices) + return len(np.unique(self.labels[example_indices])) + + def filter(self, labels, label_names, example_indices, view_names, path=None): + if self.example_ids is not None: + self.example_ids = self.example_ids[example_indices] + self.labels = self.labels[example_indices] + self.labels_names = self.labels_names[np.unique(self.labels)] + self.labels = np.array([np.where(label == np.unique(self.labels))[0] for label in self.labels]) + self.view_names = view_names + new_views = [] + for new_view_ind, view_name in enumerate(self.view_names): + new_views.append(self.views[self.view_dict[view_name]][example_indices, :]) + self.views = new_views + self.view_dict = dict((view_name, view_ind) + for view_ind, view_name + in enumerate(self.view_names)) + self.nb_view=len(self.views) + + def get_view_dict(self): + return self.view_dict + + def get_name(self): + return self.name + + +class HDF5Dataset(Dataset): """ Class of Dataset @@ -67,7 +289,7 @@ class Dataset(): def __init__(self, views=None, labels=None, are_sparse=False, file_name="dataset.hdf5", view_names=None, path="", hdf5_file=None, labels_names=None, is_temp=False, - example_ids=None): + example_ids=None,): self.is_temp = False if hdf5_file is not None: self.dataset=hdf5_file @@ -113,6 +335,7 @@ class Dataset(): self.example_ids = ["ID_"+str(i) for i in range(labels.shape[0])] + def rm(self): """ Method used to delete the dataset file on the disk if the dataset is @@ -143,6 +366,7 @@ class Dataset(): """ return self.dataset["View"+str(view_idx)].attrs["name"] + def init_attrs(self): """ Used to init the two attributes that are modified when self.dataset @@ -160,7 +384,8 @@ class Dataset(): else "ID_"+example_id.decode() for example_id in self.dataset["Metadata"]["example_ids"]] else: - self.example_ids = [str(i) for i in range(self.dataset["Labels"].shape[0])] + self.example_ids = [str(i) for i in range(self.dataset["Labels"].shape[0])] + def get_nb_examples(self): """ @@ -197,7 +422,6 @@ class Dataset(): ------- """ - example_indices = self.init_example_indces(example_indices) selected_labels = self.get_labels(example_indices) if decode: return [label_name.decode("utf-8") @@ -208,12 +432,6 @@ class Dataset(): for label, label_name in enumerate(self.dataset["Labels"].attrs["names"]) if label in selected_labels] - def init_example_indces(self, example_indices=None): - """If no example indices are provided, selects all the examples.""" - if example_indices is None: - return range(self.get_nb_examples()) - else: - return example_indices def get_v(self, view_index, example_indices=None): """ @@ -286,7 +504,7 @@ class Dataset(): self.is_temp = True self.init_attrs() - def filter(self, labels, label_names, example_indices, view_names, path): + def filter(self, labels, label_names, example_indices, view_names, path=None): dataset_file_path = os.path.join(path,self.get_name()+"_temp_filter.hdf5") new_dataset_file = h5py.File(dataset_file_path,"w") self.dataset.copy("Metadata", new_dataset_file) @@ -353,98 +571,13 @@ class Dataset(): # The following methods are hdf5 free - def to_numpy_array(self, example_indices=None, view_indices=None): - """ - To concanteant the needed views in one big numpy array while saving the - limits of each view in a list, to be bale to retrieve them later. - - Parameters - ---------- - example_indices : array like, - The indices of the examples to extract from the dataset - - view_indices : array like, - The indices of the view to concatenate in the numpy array - - Returns - ------- - concat_views : numpy array, - The numpy array containing all the needed views. - view_limits : list of int - The limits of each slice used to extract the views. - - """ - view_limits = [0] - for view_index in view_indices: - view_data = self.get_v(view_index, example_indices=example_indices) - nb_features = view_data.shape[1] - view_limits.append(view_limits[-1]+nb_features) - concat_views = np.concatenate([self.get_v(view_index, - example_indices=example_indices) - for view_index in view_indices], axis=1) - return concat_views, view_limits - - - def select_views_and_labels(self, nb_labels=None, - selected_label_names=None, random_state=None, - view_names = None, path_for_new="../data/"): - if view_names is None and selected_label_names is None and nb_labels is None: - pass - else: - selected_label_names = self.check_selected_label_names(nb_labels, - selected_label_names, - random_state) - labels, label_names, example_indices = self.select_labels(selected_label_names) - self.filter(labels, label_names, example_indices, view_names, path_for_new) - labels_dictionary = dict( - (labelIndex, labelName) for labelIndex, labelName in - enumerate(self.get_label_names())) - return labels_dictionary def get_name(self): """Ony works if there are not multiple dots in the files name""" return self.dataset.filename.split('/')[-1].split('.')[0] - def select_labels(self, selected_label_names): - selected_labels = [self.get_label_names().index(label_name.decode()) - if isinstance(label_name, bytes) - else self.get_label_names().index(label_name) - for label_name in selected_label_names] - selected_indices = np.array([index - for index, label in enumerate(self.get_labels()) - if label in selected_labels]) - labels = np.array([selected_labels.index(self.get_labels()[idx]) - for idx in selected_indices]) - return labels, selected_label_names, selected_indices - def check_selected_label_names(self, nb_labels=None, - selected_label_names=None, - random_state=np.random.RandomState(42)): - if selected_label_names is None or nb_labels is None or len(selected_label_names) < nb_labels: - if selected_label_names is None: - nb_labels_to_add = nb_labels - selected_label_names = [] - elif nb_labels is not None: - nb_labels_to_add = nb_labels - len(selected_label_names) - else: - nb_labels_to_add=0 - labels_names_to_choose = [available_label_name - for available_label_name - in self.get_label_names() - if available_label_name - not in selected_label_names] - added_labels_names = random_state.choice(labels_names_to_choose, - nb_labels_to_add, - replace=False) - selected_label_names = list(selected_label_names) + list( - added_labels_names) - elif len(selected_label_names) > nb_labels: - selected_label_names = list( - random_state.choice(selected_label_names, nb_labels, - replace=False)) - - return selected_label_names def is_just_number(string): @@ -571,3 +704,12 @@ def input_(timeout=15): return sys.stdin.readline().strip() else: return "y" + + +def get_examples_views_indices(dataset, examples_indices, view_indices, ): + """This function is used to get all the examples indices and view indices if needed""" + if view_indices is None: + view_indices = np.arange(dataset.nb_view) + if examples_indices is None: + examples_indices = np.arange(dataset.get_nb_examples()) + return examples_indices, view_indices \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 5d9c960286ae64f21e5fc9c00f3ac7f4c7d43677..7d200eb31875a1eb174b5311c39e8eb0aac3015c 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -5,7 +5,7 @@ import logging import h5py import numpy as np -from ..utils.dataset import Dataset +from ..utils.dataset import RAMDataset, HDF5Dataset # Author-Info __author__ = "Baptiste Bauvin" @@ -77,10 +77,10 @@ def get_plausible_db_hdf5(features, path, file_name, nb_class=3, - dataset = Dataset(views=views, labels=labels, + dataset = RAMDataset(views=views, labels=labels, labels_names=label_names, view_names=view_names, - are_sparse=are_sparse, file_name="plausible.hdf5", - path=path, example_ids=example_ids) + are_sparse=are_sparse, example_ids=example_ids, + name='plausible') labels_dictionary = {0: "No", 1: "Yes"} return dataset, labels_dictionary, "plausible" elif nb_class >= 3: @@ -115,11 +115,11 @@ def get_plausible_db_hdf5(features, path, file_name, nb_class=3, views.append(view_data) view_names.append("ViewNumber" + str(view_index)) are_sparse.append(False) - dataset = Dataset(views=views, labels=labels, + dataset = RAMDataset(views=views, labels=labels, labels_names=label_names, view_names=view_names, are_sparse=are_sparse, - file_name="plausible.hdf5", - path=path, example_ids=example_ids) + name="plausible", + example_ids=example_ids) labels_dictionary = {0: "No", 1: "Yes", 2: "Maybe"} return dataset, labels_dictionary, "plausible" @@ -134,14 +134,14 @@ def get_classic_db_hdf5(views, path_f, name_DB, nb_class, asked_labels_names, """Used to load a hdf5 database""" if full: dataset_file = h5py.File(os.path.join(path_f, name_DB + ".hdf5"), "r") - dataset = Dataset(hdf5_file=dataset_file) + dataset = HDF5Dataset(hdf5_file=dataset_file) dataset_name = name_DB labels_dictionary = dict((label_index, label_name) for label_index, label_name in enumerate(dataset.get_label_names())) else: dataset_file = h5py.File(os.path.join(path_f, name_DB + ".hdf5"), "r") - dataset = Dataset(hdf5_file=dataset_file) + dataset = HDF5Dataset(hdf5_file=dataset_file) labels_dictionary = dataset.select_views_and_labels(nb_labels=nb_class, selected_label_names=asked_labels_names, view_names=views, random_state=random_state, diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 295e272ce3896d9e1b31520007951726f6b90497..df22a183ae27d89762e2cd48829c6dd77e654bfc 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -80,7 +80,7 @@ def compute_possible_combinations(params_dict): n_possibs[value_index] = len(value) elif isinstance(value, CustomRandint): n_possibs[value_index] = value.get_nb_possibilities() - return n_possibs + return np.prod(n_possibs) def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None): @@ -113,7 +113,8 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie estimator = getattr(classifier_module, classifier_name)(random_state=random_state, **classifier_kwargs) params_dict = estimator.gen_distribs() - estimator = get_mc_estim(estimator, random_state) + estimator = get_mc_estim(estimator, y, random_state, + multiview=(framework=="multiview")) if params_dict: metric_module = getattr(metrics, metric[0]) if metric[1] is not None: @@ -121,13 +122,14 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie enumerate(metric[1])) else: metric_kargs = {} + + scorer = metric_module.get_scorer(**metric_kargs) nb_possible_combinations = compute_possible_combinations(params_dict) - min_list = np.array( - [min(nb_possible_combination, n_iter) for nb_possible_combination in - nb_possible_combinations]) + n_iter_real= min(n_iter, nb_possible_combinations) + random_search = MultiviewCompatibleRandomizedSearchCV(estimator, - n_iter=int(np.sum(min_list)), + n_iter=int(n_iter_real), param_distributions=params_dict, refit=True, n_jobs=nb_cores, scoring=scorer, @@ -206,7 +208,8 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): self.available_indices[test_indices], view_indices=self.view_indices) test_score = self.scoring._score_func(y[self.available_indices[test_indices]], - test_prediction) + test_prediction, + **self.scoring._kwargs) test_scores[fold_idx] = test_score for param_name, param in candidate_param.items(): self.cv_results_["param_"+param_name].append(param) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py b/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py index e7b377883e737d9a0f2172449edda5f474cc797a..7988d0cd350bc8e6b7b4e67208fa6016c3cd4202 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/multiclass.py @@ -1,136 +1,379 @@ import itertools from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier import numpy as np +from abc import abstractmethod + +from sklearn.preprocessing import LabelBinarizer +from sklearn.base import clone, is_classifier, is_regressor +import array +import scipy.sparse as sp +from sklearn.multiclass import _ovr_decision_function + from .base import BaseClassifier +from .dataset import get_examples_views_indices -def gen_multiclass_labels(labels, multiclass_method, splits): - r"""Used to gen the train/test splits and to set up the framework of the adaptation of a multiclass dataset - to biclass algorithms. +# def gen_multiclass_labels(labels, multiclass_method, splits): +# r"""Used to gen the train/test splits and to set up the framework of the adaptation of a multiclass dataset +# to biclass algorithms. +# +# First, the function checks whether the dataset is really multiclass. +# +# Then, it generates all the possible couples of different labels in order to perform one versus one classification. +# +# For each combination, it selects the examples in the training sets (for each statistical iteration) that have their +# label in the combination and does the same for the testing set. It also saves the multiclass testing set in order to +# use multiclass metrics on the decisions. +# +# Lastly, it creates a new array of biclass labels (0/1) for the biclass classifications used in oneVersusOne +# +# Parameters +# ---------- +# labels : numpy.ndarray +# Name of the database. +# multiclass_method : string +# The name of the multiclass method used (oneVersusOne, oneVersusAll, ...). +# splits : list of lists of numpy.ndarray +# For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and +# the ones of the testing set. +# +# Returns +# ------- +# multiclass_labels : list of lists of numpy.ndarray +# For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the +# indices for the biclass training set, the ones for the biclass testing set and the ones for the +# multiclass testing set. +# +# labels_indices : list of lists of numpy.ndarray +# Each original couple of different labels. +# +# indices_multiclass : list of lists of numpy.ndarray +# For each combination, contains a biclass labels numpy.ndarray with the 0/1 labels of combination. +# """ +# if multiclass_method == "oneVersusOne": +# nb_labels = len(set(list(labels))) +# if nb_labels == 2: +# splits = [[trainIndices for trainIndices, _ in splits], +# [testIndices for _, testIndices in splits], +# [[] for _ in splits]] +# return [labels], [(0, 1)], [splits] +# else: +# combinations = itertools.combinations(np.arange(nb_labels), 2) +# multiclass_labels = [] +# labels_indices = [] +# indices_multiclass = [] +# for combination in combinations: +# labels_indices.append(combination) +# old_indices = [example_index +# for example_index, example_label in +# enumerate(labels) +# if example_label in combination] +# train_indices = [np.array([old_index for old_index in old_indices if +# old_index in iterIndices[0]]) +# for iterIndices in splits] +# test_indices = [np.array([old_index for old_index in old_indices if +# old_index in iterindices[1]]) +# for iterindices in splits] +# test_indices_multiclass = [np.array(iterindices[1]) for +# iterindices in splits] +# indices_multiclass.append( +# [train_indices, test_indices, test_indices_multiclass]) +# new_labels = np.zeros(len(labels), dtype=int) - 100 +# for labelIndex, label in enumerate(labels): +# if label == combination[0]: +# new_labels[labelIndex] = 1 +# elif label == combination[1]: +# new_labels[labelIndex] = 0 +# else: +# pass +# multiclass_labels.append(new_labels) +# +# elif multiclass_method == "oneVersusRest": +# # TODO : Implement one versus rest if probas are not a problem anymore +# pass +# return multiclass_labels, labels_indices, indices_multiclass + - First, the function checks whether the dataset is really multiclass. +# def gen_multiclass_monoview_decision(monoview_result, classification_indices): +# learning_indices, validation_indices, test_indices_multiclass = classification_indices +# multiclass_monoview_decisions = monoview_result.full_labels_pred +# multiclass_monoview_decisions[ +# test_indices_multiclass] = monoview_result.y_test_multiclass_pred +# return multiclass_monoview_decisions +# +# +# def is_biclass(multiclass_preds): +# if multiclass_preds[0] is []: +# return True +# else: +# return False - Then, it generates all the possible couples of different labels in order to perform one versus one classification. - For each combination, it selects the examples in the training sets (for each statistical iteration) that have their - label in the combination and does the same for the testing set. It also saves the multiclass testing set in order to - use multiclass metrics on the decisions. +def get_mc_estim(estimator, y, random_state, multiview=False): + r"""Used to get a multiclass-compatible estimator if the one in param does not natively support multiclass. + If perdict_proba is available in the asked estimator, a One Versus Rest wrapper is returned, + else, a One Versus One wrapper is returned. - Lastly, it creates a new array of biclass labels (0/1) for the biclass classifications used in oneVersusOne + To be able to deal with multiview algorithm, multiview wrappers are implemented separately. Parameters ---------- - labels : numpy.ndarray - Name of the database. - multiclass_method : string - The name of the multiclass method used (oneVersusOne, oneVersusAll, ...). - splits : list of lists of numpy.ndarray - For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and - the ones of the testing set. + estimator : sklearn-like estimator + Asked estimator + y : numpy.array + The labels of the problem + random_state : numpy.random.RandomState object + The random state, used to generate a fake multiclass problem + multiview : bool + If True, mutliview-compatible wrappers are returned. Returns ------- - multiclass_labels : list of lists of numpy.ndarray - For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the - indices for the biclass training set, the ones for the biclass testing set and the ones for the - multiclass testing set. - - labels_indices : list of lists of numpy.ndarray - Each original couple of different labels. - - indices_multiclass : list of lists of numpy.ndarray - For each combination, contains a biclass labels numpy.ndarray with the 0/1 labels of combination. + estimator : sklearn-like estimator + Either the aksed estimator, or a multiclass-compatible wrapper over the asked estimator """ - if multiclass_method == "oneVersusOne": - nb_labels = len(set(list(labels))) - if nb_labels == 2: - splits = [[trainIndices for trainIndices, _ in splits], - [testIndices for _, testIndices in splits], - [[] for _ in splits]] - return [labels], [(0, 1)], [splits] - else: - combinations = itertools.combinations(np.arange(nb_labels), 2) - multiclass_labels = [] - labels_indices = [] - indices_multiclass = [] - for combination in combinations: - labels_indices.append(combination) - old_indices = [example_index - for example_index, example_label in - enumerate(labels) - if example_label in combination] - train_indices = [np.array([old_index for old_index in old_indices if - old_index in iterIndices[0]]) - for iterIndices in splits] - test_indices = [np.array([old_index for old_index in old_indices if - old_index in iterindices[1]]) - for iterindices in splits] - test_indices_multiclass = [np.array(iterindices[1]) for - iterindices in splits] - indices_multiclass.append( - [train_indices, test_indices, test_indices_multiclass]) - new_labels = np.zeros(len(labels), dtype=int) - 100 - for labelIndex, label in enumerate(labels): - if label == combination[0]: - new_labels[labelIndex] = 1 - elif label == combination[1]: - new_labels[labelIndex] = 0 - else: - pass - multiclass_labels.append(new_labels) - - elif multiclass_method == "oneVersusRest": - # TODO : Implement one versus rest if probas are not a problem anymore - pass - return multiclass_labels, labels_indices, indices_multiclass - - -def gen_multiclass_monoview_decision(monoview_result, classification_indices): - learning_indices, validation_indices, test_indices_multiclass = classification_indices - multiclass_monoview_decisions = monoview_result.full_labels_pred - multiclass_monoview_decisions[ - test_indices_multiclass] = monoview_result.y_test_multiclass_pred - return multiclass_monoview_decisions - - -def is_biclass(multiclass_preds): - if multiclass_preds[0] is []: - return True - else: - return False - - -def get_mc_estim(estimator, random_state): - # print(estimator.accepts_multi_class(random_state)) - if not estimator.accepts_multi_class(random_state): - if hasattr(estimator, "predict_proba"): - estimator = OVRWrapper(estimator) - print(estimator.get_params().keys()) - else: - estimator = OneVsOneClassifier(estimator) + if np.unique(y).shape[0]>2: + if not clone(estimator).accepts_multi_class(random_state): + if hasattr(estimator, "predict_proba"): + if multiview: + estimator = MultiviewOVRWrapper(estimator) + else: + estimator = OVRWrapper(estimator) + else: + if multiview: + estimator = MultiviewOVOWrapper(estimator) + else: + estimator = OVOWrapper(estimator) return estimator -class MCWrapper(): + +class MultiClassWrapper: + + # TODO : Has an effect on the init of the sub-classes. + # @abstractmethod + # def __init__(self, estimator, **params): + # self.estimator = estimator def set_params(self, **params): + r""" + This function is useful in order for the OV_Wrappers to be transparent + in terms of parameters. + If we remove it the parameters have to be specified as estimator__param. + Witch is not relevant for the platform + + """ self.estimator.set_params(**params) return self def get_config(self): return self.estimator.get_config() - def get_interpret(self, output_file_name, y_test): - return self.estimator.get_interpret(output_file_name, y_test, + def get_interpretation(self, output_file_name=None, y_test=None): + return self.estimator.get_interpretation(output_file_name, y_test, multi_class=True) -# -# -class OVRWrapper(MCWrapper, OneVsOneClassifier): +class MonoviewWrapper(MultiClassWrapper): pass -class OVOWrapper(MCWrapper, BaseClassifier): +class OVRWrapper(MonoviewWrapper, OneVsRestClassifier): + pass + +class OVOWrapper(MonoviewWrapper, OneVsOneClassifier): pass + + +# The following code is a mutliview adaptation of sklearns multiclass package + +def _multiview_fit_binary(estimator, X, y, train_indices, + view_indices,classes=None,): + # TODO : Verifications des sklearn + estimator = clone(estimator) + estimator.fit(X, y, train_indices=train_indices, + view_indices=view_indices) + return estimator + + +def _multiview_predict_binary(estimator, X, example_indices, view_indices): + if is_regressor(estimator): + return estimator.predict(X, example_indices=example_indices, + view_indices=view_indices) + try: + score = np.ravel(estimator.decision_function(X)) + except (AttributeError, NotImplementedError): + # probabilities of the positive class + score = estimator.predict_proba(X, example_indices=example_indices, + view_indices=view_indices)[:, 1] + return score + + +class MultiviewWrapper(MultiClassWrapper): + + def __init__(self, estimator=None, **args): + super(MultiviewWrapper, self).__init__(estimator=estimator, **args) + self.short_name = estimator.short_name + + +class MultiviewOVRWrapper(MultiviewWrapper, OneVsRestClassifier): + + def fit(self, X, y, train_indices=None, view_indices=None): + self.label_binarizer_ = LabelBinarizer(sparse_output=True) + Y = self.label_binarizer_.fit_transform(y) + Y = Y.tocsc() + self.classes_ = self.label_binarizer_.classes_ + columns = (col.toarray().ravel() for col in Y.T) + # In cases where individual estimators are very fast to train setting + # n_jobs > 1 in can results in slower performance due to the overhead + # of spawning threads. See joblib issue #112. + self.estimators_ = [_multiview_fit_binary( + self.estimator, X, column, classes=[ + "not %s" % self.label_binarizer_.classes_[i], + self.label_binarizer_.classes_[i]], train_indices=train_indices, + view_indices=view_indices) + for i, column in + enumerate(columns)] + + return self + + def predict(self, X, example_indices=None, view_indices=None): + example_indices, view_indices = get_examples_views_indices(X, + example_indices, + view_indices) + n_samples = len(example_indices) + if self.label_binarizer_.y_type_ == "multiclass": + maxima = np.empty(n_samples, dtype=float) + maxima.fill(-np.inf) + argmaxima = np.zeros(n_samples, dtype=int) + for i, e in enumerate(self.estimators_): + pred = _multiview_predict_binary(e, X, example_indices, + view_indices) + np.maximum(maxima, pred, out=maxima) + argmaxima[maxima == pred] = i + return self.classes_[argmaxima] + else: + if (hasattr(self.estimators_[0], "decision_function") and + is_classifier(self.estimators_[0])): + thresh = 0 + else: + thresh = .5 + indices = array.array('i') + indptr = array.array('i', [0]) + for e in self.estimators_: + indices.extend( + np.where(_multiview_predict_binary(e, X, + example_indices, + view_indices) > thresh)[0]) + indptr.append(len(indices)) + data = np.ones(len(indices), dtype=int) + indicator = sp.csc_matrix((data, indices, indptr), + shape=(n_samples, len(self.estimators_))) + return self.label_binarizer_.inverse_transform(indicator) + + +def _multiview_fit_ovo_binary(estimator, X, y, i, j, train_indices, + view_indices): + cond = np.logical_or(y == i, y == j) + # y = y[cond] + y_binary = np.empty(y.shape, np.int) + y_binary[y == i] = 0 + y_binary[y == j] = 1 + indcond = np.arange(X.get_nb_examples())[cond] + train_indices = np.intersect1d(train_indices, indcond) + return _multiview_fit_binary(estimator, + X, + y_binary, train_indices, view_indices, classes=[i, j]), train_indices + + + +class MultiviewOVOWrapper(MultiviewWrapper, OneVsOneClassifier): + + def fit(self, X, y, train_indices=None, view_indices=None): + """Fit underlying estimators. + + Parameters + ---------- + X : (sparse) array-like of shape (n_samples, n_features) + Data. + + y : array-like of shape (n_samples,) + Multi-class targets. + + Returns + ------- + self + """ + # X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']) + # check_classification_targets(y) + train_indices, view_indices = get_examples_views_indices(X, + train_indices, + view_indices) + self.classes_ = np.unique(y) + if len(self.classes_) == 1: + raise ValueError("OneVsOneClassifier can not be fit when only one" + " class is present.") + n_classes = self.classes_.shape[0] + estimators_indices = list(zip(*([_multiview_fit_ovo_binary( + self.estimator, X, y, self.classes_[i], self.classes_[j], + train_indices, + view_indices + ) + for i in range(n_classes) for j in range(i + 1, n_classes) + ]))) + + self.estimators_ = estimators_indices[0] + self.pairwise_indices_ = ( + estimators_indices[1] if self._pairwise else None) + + return self + + def predict(self, X, example_indices=None, view_indices=None): + """Estimate the best class label for each sample in X. + + This is implemented as ``argmax(decision_function(X), axis=1)`` which + will return the label of the class with most votes by estimators + predicting the outcome of a decision for each possible class pair. + + Parameters + ---------- + X : (sparse) array-like of shape (n_samples, n_features) + Data. + + Returns + ------- + y : numpy array of shape [n_samples] + Predicted multi-class targets. + """ + example_indices, view_indices = get_examples_views_indices(X, + example_indices, + view_indices) + Y = self.multiview_decision_function(X, example_indices=example_indices, + view_indices=view_indices) + if self.n_classes_ == 2: + return self.classes_[(Y > 0).astype(np.int)] + return self.classes_[Y.argmax(axis=1)] + + def multiview_decision_function(self, X, example_indices, view_indices): + # check_is_fitted(self) + + indices = self.pairwise_indices_ + if indices is None: + Xs = [X] * len(self.estimators_) + else: + # TODO Gram matrix compatibility + Xs = [X[:, idx] for idx in indices] + predictions = np.vstack([est.predict(Xi, example_indices=example_indices, + view_indices=view_indices) + for est, Xi in zip(self.estimators_, Xs)]).T + confidences = np.ones(predictions.shape) + # confidences = np.vstack([_predict_binary(est, Xi) + # for est, Xi in zip(self.estimators_, Xs)]).T + Y = _ovr_decision_function(predictions, + confidences, len(self.classes_)) + if self.n_classes_ == 2: + return Y[:, 1] + return Y + + diff --git a/multiview_platform/mono_multi_view_classifiers/utils/transformations.py b/multiview_platform/mono_multi_view_classifiers/utils/transformations.py index e2cdfdf132364ec0da54d98591d202b116dda339..2a68282325538c70dc90ea51b2a4f3e3ab1ba52c 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/transformations.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/transformations.py @@ -37,7 +37,6 @@ def unsign_labels(labels): """ if len(labels.shape)==2: labels = labels.reshape((labels.shape[0], )) - print(labels) if -1 in labels: return np.array([label if label != -1 else 0 for label in labels]) else: diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 62d3b1cd55721f3819f5474a4becb056c42fc7b9..5b5726e2f867401c03b47b4f57286ae4a8324ae2 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -212,20 +212,21 @@ def fakeBenchmarkExec_mutlicore(nb_cores=-1, a=6, args=1): return [nb_cores, a] -def fakeBenchmarkExec_monocore(dataset_var=1, a=4, args=1): +def fakeBenchmarkExec_monocore(dataset_var=1, a=4, args=1, track_tracebacks=False): return [a] -def fakegetResults(results, stats_iter, nb_multiclass, - benchmark_arguments_dictionaries, multi_class_labels, metrics, - classification_indices, directories, directory, - labels_dictionary, nb_examples, nb_labels, example_ids): +def fakegetResults(results, stats_iter, + benchmark_arguments_dictionaries, metrics, directory, + example_ids, labels): return 3 def fakeDelete(a, b, c): return 9 +def fake_analyze(a, b, c, d, example_ids=None, labels=None): + pass class Test_execBenchmark(unittest.TestCase): @@ -241,27 +242,37 @@ class Test_execBenchmark(unittest.TestCase): "Classification":{"hps_iter": 1}} def test_simple(cls): - res = exec_classif.exec_benchmark(1, 2, 3, cls.argument_dictionaries, - [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, - 10, cls.Dataset, - # exec_one_benchmark=fakeBenchmarkExec, - # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, - exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, - get_results=fakegetResults, - delete=fakeDelete) + res = exec_classif.exec_benchmark(nb_cores=1, + stats_iter=2, + benchmark_arguments_dictionaries=cls.argument_dictionaries, + directory="", + metrics=[[[1, 2], [3, 4, 5]]], + dataset_var=cls.Dataset, + track_tracebacks=6, + # exec_one_benchmark=fakeBenchmarkExec, + # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, + exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, + get_results=fakegetResults, + delete=fakeDelete, + analyze_iterations=fake_analyze) cls.assertEqual(res, 3) def test_multiclass_no_iter(cls): cls.argument_dictionaries = [{"a": 10, "args": cls.args}, {"a": 4, "args": cls.args}] - res = exec_classif.exec_benchmark(2, 1, 2, cls.argument_dictionaries, - [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, - 10, cls.Dataset, + res = exec_classif.exec_benchmark(nb_cores=1, + stats_iter=1, + benchmark_arguments_dictionaries=cls.argument_dictionaries, + directory="", + metrics=[[[1, 2], [3, 4, 5]]], + dataset_var=cls.Dataset, + track_tracebacks=6, # exec_one_benchmark=fakeBenchmarkExec, # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, get_results=fakegetResults, - delete=fakeDelete) + delete=fakeDelete, + analyze_iterations=fake_analyze) cls.assertEqual(res, 3) def test_multiclass_and_iter(cls): @@ -269,25 +280,35 @@ class Test_execBenchmark(unittest.TestCase): {"a": 4, "args": cls.args}, {"a": 55, "args": cls.args}, {"a": 24, "args": cls.args}] - res = exec_classif.exec_benchmark(2, 2, 2, cls.argument_dictionaries, - [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, - 10, cls.Dataset, + res = exec_classif.exec_benchmark(nb_cores=1, + stats_iter=2, + benchmark_arguments_dictionaries=cls.argument_dictionaries, + directory="", + metrics=[[[1, 2], [3, 4, 5]]], + dataset_var=cls.Dataset, + track_tracebacks=6, # exec_one_benchmark=fakeBenchmarkExec, # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, get_results=fakegetResults, - delete=fakeDelete) + delete=fakeDelete, + analyze_iterations=fake_analyze) cls.assertEqual(res, 3) def test_no_iter_biclass_multicore(cls): - res = exec_classif.exec_benchmark(2, 1, 1, cls.argument_dictionaries, - [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, - 10, cls.Dataset, + res = exec_classif.exec_benchmark(nb_cores=1, + stats_iter=1, + benchmark_arguments_dictionaries=cls.argument_dictionaries, + directory="", + metrics=[[[1, 2], [3, 4, 5]]], + dataset_var=cls.Dataset, + track_tracebacks=6, # exec_one_benchmark=fakeBenchmarkExec, # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore, exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore, get_results=fakegetResults, - delete=fakeDelete) + delete=fakeDelete, + analyze_iterations=fake_analyze) cls.assertEqual(res, 3) @classmethod diff --git a/multiview_platform/tests/test_ResultAnalysis.py b/multiview_platform/tests/test_ResultAnalysis.py index 26b32efb6c965aafd7c9d17673efb75ac3bf81b1..18ad42de3f9a4ae8b2e1f8b4c27239564edc9a18 100644 --- a/multiview_platform/tests/test_ResultAnalysis.py +++ b/multiview_platform/tests/test_ResultAnalysis.py @@ -1,11 +1,11 @@ import unittest import numpy as np import pandas as pd -import time +import os -from ..mono_multi_view_classifiers import result_analysis -from ..mono_multi_view_classifiers.multiview.multiview_utils import MultiviewResult -from ..mono_multi_view_classifiers.monoview.monoview_utils import MonoviewResult +from multiview_platform.mono_multi_view_classifiers import result_analysis +from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import MultiviewResult +from multiview_platform.mono_multi_view_classifiers.monoview.monoview_utils import MonoviewResult class Test_get_arguments(unittest.TestCase): @@ -29,7 +29,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): "0", {"accuracy_score":[0.9, 0.95], "f1_score":[0.91, 0.96]} - , "", "", "", "", "", "")] + , "", "", "", "", "",)] metrics_scores = result_analysis.get_metrics_scores_biclass(metrics, results) self.assertIsInstance(metrics_scores, dict) @@ -56,7 +56,6 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): "f1_score": [0.91, 0.96]}, full_labels_pred="", classifier_config="", - y_test_multiclass_pred="", test_folds_preds="", classifier="", n_features=""), @@ -67,7 +66,6 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): "f1_score": [0.81, 0.86]}, full_labels_pred="", classifier_config="", - y_test_multiclass_pred="", test_folds_preds="", classifier="", n_features="") @@ -95,7 +93,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): def test_mutiview_result(self): metrics = [["accuracy_score"], ["f1_score"]] results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75], - "f1_score": [0.71, 0.76]}, "", ""), + "f1_score": [0.71, 0.76]}, "", ), MonoviewResult(view_index=0, classifier_name="dt", view_name="1", @@ -103,7 +101,6 @@ class Test_get_metrics_scores_biclass(unittest.TestCase): "f1_score": [0.81, 0.86]}, full_labels_pred="", classifier_config="", - y_test_multiclass_pred="", test_folds_preds="", classifier="", n_features="") @@ -135,14 +132,14 @@ class Test_get_example_errors_biclass(unittest.TestCase): results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75], "f1_score": [0.71, 0.76]}, np.array([0,0,0,0,1,1,1,1,1]), - ""), + ), MonoviewResult(0, "dt", "1", {"accuracy_score": [0.8, 0.85], "f1_score": [0.81, 0.86]} , np.array([0,0,1,1,0,0,1,1,0]), "", "", - "", "", "") + "", "",) ] example_errors = result_analysis.get_example_errors_biclass(ground_truth, results) @@ -171,8 +168,7 @@ class Test_init_plot(unittest.TestCase): directory, database_name, labels_names) - self.assertEqual(file_name, "dir/"+time.strftime( - "%Y_%m_%d-%H_%M_%S")+"-db-lb1_vs_lb2-acc") + self.assertEqual(file_name, os.path.join("dir", "db-lb1_vs_lb2-acc")) np.testing.assert_array_equal(train, data[0,:]) np.testing.assert_array_equal(test, data[1, :]) np.testing.assert_array_equal(classifier_names, np.array(["dt-1", "mv"])) diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index f540dab64ee913278efb36e95473566d7566a887..9d423f14a5152b64ff77c08bf19ed99ba94234c6 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -76,10 +76,10 @@ class Test_initTrainTest(unittest.TestCase): cls.Y = cls.random_state.randint(0, 2, 10) cls.classification_indices = [np.array([0, 2, 4, 6, 8]), np.array([1, 3, 5, 7, 9]), - np.array([1, 3, 5, 7, 9])] + ] def test_simple(cls): - X_train, y_train, X_test, y_test, X_test_multiclass = exec_classif_mono_view.init_train_test( + X_train, y_train, X_test, y_test = exec_classif_mono_view.init_train_test( cls.X, cls.Y, cls.classification_indices) np.testing.assert_array_equal(X_train, np.array( @@ -112,7 +112,7 @@ class Test_getHPs(unittest.TestCase): cls.X = cls.random_state.randint(0,10,size=(10,5)) cls.y = cls.random_state.randint(0,2,size=10) cls.output_file_name = tmp_path - cls.cv = StratifiedKFold(n_splits=2, random_state=cls.random_state) + cls.cv = StratifiedKFold(n_splits=2, random_state=cls.random_state, shuffle=True) cls.nb_cores = 1 cls.metrics = [["accuracy_score", None]] cls.kwargs = {"decision_tree" : {"max_depth": 1, diff --git a/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py b/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py index bdbbcf262d3e1c309fe24f2cf90cb53f940fd37c..498bf4674ba8a0ad590f6cedc7c4ac00a154a11a 100644 --- a/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py +++ b/multiview_platform/tests/test_mono_view/test_MonoviewUtils.py @@ -2,7 +2,7 @@ import unittest import numpy as np from sklearn.model_selection import StratifiedKFold -from sklearn.tree.tree import DecisionTreeClassifier +from sklearn.tree import DecisionTreeClassifier from multiview_platform.mono_multi_view_classifiers.monoview import monoview_utils @@ -14,7 +14,8 @@ class Test_genTestFoldsPreds(unittest.TestCase): cls.random_state = np.random.RandomState(42) cls.X_train = cls.random_state.random_sample((31, 10)) cls.y_train = np.ones(31, dtype=int) - cls.KFolds = StratifiedKFold(n_splits=3, random_state=cls.random_state) + cls.KFolds = StratifiedKFold(n_splits=3, random_state=cls.random_state, + shuffle=True) cls.estimator = DecisionTreeClassifier(max_depth=1) @@ -29,5 +30,5 @@ class Test_genTestFoldsPreds(unittest.TestCase): cls.estimator) cls.assertEqual(testFoldsPreds.shape, (3, 10)) np.testing.assert_array_equal(testFoldsPreds[0], np.array( - [1, 1, -1, -1, 1, 1, -1, 1, -1, 1])) + [ 1, 1, 1, 1, -1, -1, 1, -1, 1, 1])) diff --git a/multiview_platform/tests/test_utils/test_dataset.py b/multiview_platform/tests/test_utils/test_dataset.py index 6125243c08f1d6d82098f632fa28966b3a9564af..1a41f0d990a7a42ce142149a03b8086dca28c6f7 100644 --- a/multiview_platform/tests/test_utils/test_dataset.py +++ b/multiview_platform/tests/test_utils/test_dataset.py @@ -68,7 +68,7 @@ class Test_Dataset(unittest.TestCase): meta_data_grp.attrs["nbView"] = len(self.views) meta_data_grp.attrs["nbClass"] = len(np.unique(self.labels)) meta_data_grp.attrs["datasetLength"] = len(self.labels) - dataset_object = dataset.Dataset(hdf5_file=dataset_file_filter) + dataset_object = dataset.HDF5Dataset(hdf5_file=dataset_file_filter) dataset_object.filter(np.array([0, 1, 0]), ["0", "1"], [1, 2, 3], ["ViewN0"], tmp_path) self.assertEqual(dataset_object.nb_view, 1) @@ -78,10 +78,10 @@ class Test_Dataset(unittest.TestCase): os.remove(os.path.join(tmp_path, "test_filter.hdf5")) def test_for_hdf5_file(self): - dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) def test_from_scratch(self): - dataset_object = dataset.Dataset(views=self.views, + dataset_object = dataset.HDF5Dataset(views=self.views, labels=self.labels, are_sparse=self.are_sparse, file_name="from_scratch"+self.file_name, @@ -96,27 +96,27 @@ class Test_Dataset(unittest.TestCase): np.testing.assert_array_equal(view, self.views[0]) def test_init_example_indices(self): - example_indices = dataset.Dataset(hdf5_file=self.dataset_file).init_example_indces() + example_indices = dataset.HDF5Dataset(hdf5_file=self.dataset_file).init_example_indces() self.assertEqual(example_indices, range(self.nb_examples)) - example_indices = dataset.Dataset(hdf5_file=self.dataset_file).init_example_indces([0,1,2]) + example_indices = dataset.HDF5Dataset(hdf5_file=self.dataset_file).init_example_indces([0,1,2]) self.assertEqual(example_indices, [0,1,2]) def test_get_v(self): - view = dataset.Dataset(hdf5_file=self.dataset_file).get_v(0) + view = dataset.HDF5Dataset(hdf5_file=self.dataset_file).get_v(0) np.testing.assert_array_equal(view, self.views[0]) - view = dataset.Dataset(hdf5_file=self.dataset_file).get_v(1, [0,1,2]) + view = dataset.HDF5Dataset(hdf5_file=self.dataset_file).get_v(1, [0,1,2]) np.testing.assert_array_equal(view, self.views[1][[0,1,2,], :]) def test_get_nb_class(self): - nb_class = dataset.Dataset(hdf5_file=self.dataset_file).get_nb_class() + nb_class = dataset.HDF5Dataset(hdf5_file=self.dataset_file).get_nb_class() self.assertEqual(nb_class, self.nb_class) - nb_class = dataset.Dataset(hdf5_file=self.dataset_file).get_nb_class([0]) + nb_class = dataset.HDF5Dataset(hdf5_file=self.dataset_file).get_nb_class([0]) self.assertEqual(nb_class, 1) def test_get_view_dict(self): - dataset_object = dataset.Dataset(views=self.views, + dataset_object = dataset.HDF5Dataset(views=self.views, labels=self.labels, are_sparse=self.are_sparse, file_name="from_scratch" + self.file_name, @@ -128,7 +128,7 @@ class Test_Dataset(unittest.TestCase): "ViewN2": 2,}) def test_get_label_names(self): - dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) raw_label_names = dataset_object.get_label_names(decode=False) decoded_label_names = dataset_object.get_label_names() restricted_label_names = dataset_object.get_label_names(example_indices=[3,4]) @@ -137,19 +137,19 @@ class Test_Dataset(unittest.TestCase): self.assertEqual(restricted_label_names, ['2']) def test_get_nb_exmaples(self): - dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) nb_examples = dataset_object.get_nb_examples() self.assertEqual(nb_examples, self.nb_examples) def test_get_labels(self): - dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) labels = dataset_object.get_labels() np.testing.assert_array_equal(labels, self.labels) labels = dataset_object.get_labels([1,2,0]) np.testing.assert_array_equal(labels, self.labels[[1,2,0]]) def test_copy_view(self): - dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) new_dataset = h5py.File(os.path.join(tmp_path, "test_copy.hdf5"), "w") dataset_object.copy_view(target_dataset=new_dataset, source_view_name="ViewN0", @@ -161,17 +161,17 @@ class Test_Dataset(unittest.TestCase): os.remove(os.path.join(tmp_path, "test_copy.hdf5")) def test_get_name(self): - dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) self.assertEqual("test", dataset_object.get_name()) def test_select_labels(self): - dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) labels, label_names, indices = dataset_object.select_labels(["0", "2"]) np.testing.assert_array_equal(np.unique(labels), np.array([0,1])) self.assertEqual(label_names, ["0","2"]) def test_check_selected_label_names(self): - dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) names = dataset_object.check_selected_label_names(nb_labels=2, random_state=self.rs) self.assertEqual(names, ["1", "0"]) names = dataset_object.check_selected_label_names(selected_label_names=['0', '2'], @@ -198,7 +198,7 @@ class Test_Dataset(unittest.TestCase): meta_data_grp.attrs["nbView"] = len(self.views) meta_data_grp.attrs["nbClass"] = len(np.unique(self.labels)) meta_data_grp.attrs["datasetLength"] = len(self.labels) - dataset_object = dataset.Dataset(hdf5_file=dataset_file_select) + dataset_object = dataset.HDF5Dataset(hdf5_file=dataset_file_select) names = dataset_object.select_views_and_labels(nb_labels=2, view_names=["ViewN0"], random_state=self.rs, path_for_new=tmp_path) self.assertEqual(names, {0: '2', 1: '1'}) self.assertEqual(dataset_object.nb_view, 1) @@ -229,7 +229,7 @@ class Test_Dataset(unittest.TestCase): meta_data_grp.attrs["nbView"] = len(self.views) meta_data_grp.attrs["nbClass"] = len(np.unique(self.labels)) meta_data_grp.attrs["datasetLength"] = len(self.labels) - dataset_object = dataset.Dataset(hdf5_file=dataset_file_select) + dataset_object = dataset.HDF5Dataset(hdf5_file=dataset_file_select) dataset_object.add_gaussian_noise(self.rs, tmp_path) dataset_object.dataset.close() os.remove(os.path.join(tmp_path, "test_noise_noised.hdf5")) diff --git a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py index 03a9655bbc10e0c8001a479897fe084db48f95a5..2257f14a4842ddea064e131bbb4f236b2f1a1d22 100644 --- a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py +++ b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py @@ -7,7 +7,7 @@ from sklearn.model_selection import StratifiedKFold from multiview_platform.tests.utils import rm_tmp, tmp_path -from multiview_platform.mono_multi_view_classifiers.utils.dataset import Dataset +from multiview_platform.mono_multi_view_classifiers.utils.dataset import HDF5Dataset from multiview_platform.mono_multi_view_classifiers.utils import hyper_parameter_search from multiview_platform.mono_multi_view_classifiers.multiview_classifiers import weighted_linear_early_fusion @@ -43,9 +43,10 @@ class Test_randomized_search(unittest.TestCase): cls.monoview_classifier_config = {"max_depth": 1, "criterion": "gini", "splitter": "best"} - cls.k_folds = StratifiedKFold(n_splits=3, random_state=cls.random_state) + cls.k_folds = StratifiedKFold(n_splits=3, random_state=cls.random_state, + shuffle=True) cls.learning_indices = np.array([1,2,3,4, 5,6,7,8,9]) - cls.dataset = Dataset(hdf5_file=cls.dataset_file) + cls.dataset = HDF5Dataset(hdf5_file=cls.dataset_file) @classmethod def tearDownClass(cls): diff --git a/multiview_platform/tests/test_utils/test_multiclass.py b/multiview_platform/tests/test_utils/test_multiclass.py index f4ffbad929828fd04161f792366a5df1c4d6813d..62de20d75af77607867e219a235af6814d2a0547 100644 --- a/multiview_platform/tests/test_utils/test_multiclass.py +++ b/multiview_platform/tests/test_utils/test_multiclass.py @@ -1,48 +1,164 @@ import unittest import numpy as np +from sklearn.base import BaseEstimator -import multiview_platform.mono_multi_view_classifiers.utils.multiclass as mm +from multiview_platform.mono_multi_view_classifiers.utils.multiclass import get_mc_estim, \ +OVRWrapper, OVOWrapper, MultiviewOVOWrapper, MultiviewOVRWrapper +class FakeMCEstim(BaseEstimator): -class Test_genMulticlassLabels(unittest.TestCase): + def __init__(self): + self.short_name="short_name" + + def accepts_multi_class(self, random_state): + return False + +class FakeEstimNative(FakeMCEstim): + + def accepts_multi_class(self, random_state): + return True + + +class FakeNonProbaEstim(FakeMCEstim): + pass + + +class FakeProbaEstim(FakeMCEstim): + + def predict_proba(self): + pass + + +class Test_get_mc_estim(unittest.TestCase): @classmethod def setUpClass(cls): cls.random_state = np.random.RandomState(42) - cls.labels = cls.random_state.randint(0, 5, 50) - cls.testIndices = [ - cls.random_state.choice(np.arange(50), size=10, replace=False), - cls.random_state.choice(np.arange(50), size=10, replace=False)] - cls.classification_indices = [ - [np.array([_ for _ in range(50) if _ not in cls.testIndices[0]]), - cls.testIndices[0]], - [np.array([_ for _ in range(50) if _ not in cls.testIndices[1]]), - cls.testIndices[1]]] - - def test_one_versus_one(cls): - multiclassLabels, labelsIndices, oldIndicesMulticlass = mm.gen_multiclass_labels( - cls.labels, "oneVersusOne", cls.classification_indices) - cls.assertEqual(len(multiclassLabels), 10) - cls.assertEqual(labelsIndices, - [(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), - (2, 3), (2, 4), (3, 4)]) - np.testing.assert_array_equal(oldIndicesMulticlass[0][0][0], - np.array( - [5, 13, 15, 18, 20, 24, 27, 39, 41, - 43, 44, 45, 46, 48])) - np.testing.assert_array_equal(multiclassLabels[0], - np.array([-100, -100, -100, -100, -100, 0, - -100, -100, -100, -100, -100, - -100, - -100, 0, -100, 0, -100, -100, 1, - -100, 0, -100, -100, 1, 1, -100, - -100, - 0, -100, -100, -100, -100, -100, - 1, -100, -100, -100, -100, 1, 0, - -100, - 1, -100, 0, 0, 1, 0, -100, 0, - -100])) + cls.y = cls.random_state.randint(0, 3, 10) + + def test_biclass(self): + y = self.random_state.randint(0,2,10) + estimator="Test" + returned_estimator = get_mc_estim(estimator, y, self.random_state,) + self.assertEqual(returned_estimator, estimator) + + def test_multiclass_native(self): + estimator = FakeEstimNative() + returned_estimator = get_mc_estim(estimator, self.y, self.random_state) + self.assertIsInstance(returned_estimator, FakeEstimNative) + + def test_multiclass_ovo(self): + estimator = FakeNonProbaEstim() + returned_estimator = get_mc_estim(estimator, self.y, self.random_state) + self.assertIsInstance(returned_estimator, OVOWrapper) + + def test_multiclass_ovr(self): + estimator = FakeProbaEstim() + returned_estimator = get_mc_estim(estimator, self.y, self.random_state) + self.assertIsInstance(returned_estimator, OVRWrapper) + + def test_multiclass_ovo_multiview(self): + estimator = FakeNonProbaEstim() + returned_estimator = get_mc_estim(estimator, self.y, self.random_state, + multiview=True) + self.assertIsInstance(returned_estimator, MultiviewOVOWrapper) + + def test_multiclass_ovr_multiview(self): + estimator = FakeProbaEstim() + returned_estimator = get_mc_estim(estimator, self.y, self.random_state, + multiview=True) + self.assertIsInstance(returned_estimator, MultiviewOVRWrapper) + +class FakeMVClassifier(BaseEstimator): + + def __init__(self, short_name="None"): + self.short_name = short_name + + def fit(self, X, y, train_indices=None, view_indices=None): + self.n_classes = np.unique(y[train_indices]).shape[0] + self.views_indices = view_indices + + def predict(self, X, example_indices=None, view_indices=None): + self.example_indices = example_indices + self.views_indices = view_indices + return np.zeros((example_indices.shape[0])) + +class FakeMVClassifierProb(FakeMVClassifier): + + def predict_proba(self, X, example_indices=None, view_indices=None): + self.example_indices = example_indices + self.views_indices = view_indices + return np.zeros((example_indices.shape[0], 2)) + +class Test_MultiviewOVRWrapper_fit(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state = np.random.RandomState(42) + cls.X = "dataset" + cls.n_classes=3 + cls.y = cls.random_state.randint(0,cls.n_classes,50) + cls.train_indices = np.arange(25) + cls.example_indices = np.arange(25)+25 + cls.view_indices="None" + cls.wrapper = MultiviewOVRWrapper(FakeMVClassifierProb(), ) + + def test_fit(self): + fitted = self.wrapper.fit(self.X, self.y, train_indices=self.train_indices, + view_indices=self.view_indices) + for estimator in fitted.estimators_: + self.assertEqual(estimator.n_classes,2) + self.assertEqual(estimator.views_indices, "None") + + def test_predict(self): + fitted = self.wrapper.fit(self.X, self.y, train_indices=self.train_indices, + view_indices=self.view_indices) + pred = fitted.predict(self.X, example_indices=self.example_indices, + view_indices=self.view_indices) + for estimator in fitted.estimators_: + np.testing.assert_array_equal(estimator.example_indices, + self.example_indices) + + +class FakeDset: + + def __init__(self, n_examples): + self.n_examples = n_examples + + def get_nb_examples(self): + return self.n_examples + +class Test_MultiviewOVOWrapper_fit(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.random_state = np.random.RandomState(42) + cls.n_examples=50 + cls.X = FakeDset(n_examples=cls.n_examples) + cls.n_classes=3 + cls.y = cls.random_state.randint(0,cls.n_classes,cls.n_examples) + cls.train_indices = np.arange(int(cls.n_examples/2)) + cls.example_indices = np.arange(int(cls.n_examples/2))+int(cls.n_examples/2) + cls.view_indices="None" + cls.wrapper = MultiviewOVOWrapper(FakeMVClassifier(), ) + + def test_fit(self): + fitted = self.wrapper.fit(self.X, self.y, train_indices=self.train_indices, + view_indices=self.view_indices) + for estimator in fitted.estimators_: + self.assertEqual(estimator.n_classes,2) + self.assertEqual(estimator.views_indices, "None") + + def test_predict(self): + fitted = self.wrapper.fit(self.X, self.y, train_indices=self.train_indices, + view_indices=self.view_indices) + pred = fitted.predict(self.X, example_indices=self.example_indices, + view_indices=self.view_indices) + for estimator in fitted.estimators_: + np.testing.assert_array_equal(estimator.example_indices, + self.example_indices) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/multiview_platform/tests/utils.py b/multiview_platform/tests/utils.py index 1b39abf11e3810f31c827a931b2836fda5d86620..c27c31bfd0705b2de867bc754dacb2a34fafb542 100644 --- a/multiview_platform/tests/utils.py +++ b/multiview_platform/tests/utils.py @@ -2,11 +2,12 @@ import os import numpy as np import h5py -from ..mono_multi_view_classifiers.utils.dataset import Dataset +from ..mono_multi_view_classifiers.utils.dataset import HDF5Dataset tmp_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp_tests/") -test_dataset = Dataset(hdf5_file=h5py.File(os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_database.hdf5"), "r")) +# TODO Convert to ram dataset +test_dataset = HDF5Dataset(hdf5_file=h5py.File(os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_database.hdf5"), "r")) def rm_tmp(): try: diff --git a/requirements.txt b/requirements.txt index 940b0782298fe39c2d636716e19a5a95128938db..04142c4740a18794da4ae5f9994a8c31a9fcefe6 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ kiwisolver==1.1.0 numpy==1.16.4 pyparsing==2.4.0 python-dateutil==2.8.0 -scikit-learn==0.19.0 +scikit-learn>=0.19.0 scipy==1.3.0 six==1.12.0 pandas==0.23.3