diff --git a/config_files/config_test.yml b/config_files/config_test.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a4d59df8fc4edc1066c69fb6d7d99b427f66f15 --- /dev/null +++ b/config_files/config_test.yml @@ -0,0 +1,205 @@ +# The base configuration of the benchmark +Base : + log: true + name: ["Plausible"] + label: "_" + type: ".hdf5" + views: + pathf: "../data/" + nice: 0 + random_state: 42 + nb_cores: 1 + full: False + debug: True + add_noise: False + noise_std: 0.0 + res_dir: "../results/" + +# All the classification-realted configuration options +Classification: + multiclass_method: "oneVersusOne" + split: 0.8 + nb_folds: 2 + nb_class: 2 + classes: + type: ["multiview"] + algos_monoview: ["all"] + algos_multiview: ["mumbo"] + stats_iter: 2 + metrics: ["accuracy_score", "f1_score"] + metric_princ: "f1_score" + hps_type: "randomized_search" + hps_iter: 2 + + +##################################### +# The Monoview Classifier arguments # +##################################### + +random_forest: + n_estimators: [25] + max_depth: [3] + criterion: ["entropy"] + +svm_linear: + C: [1] + +svm_rbf: + C: [1] + +svm_poly: + C: [1] + degree: [2] + +adaboost: + n_estimators: [50] + base_estimator: ["DecisionTreeClassifier"] + +adaboost_pregen: + n_estimators: [50] + base_estimator: ["DecisionTreeClassifier"] + n_stumps: [1] + +adaboost_graalpy: + n_iterations: [50] + n_stumps: [1] + +decision_tree: + max_depth: [10] + criterion: ["gini"] + splitter: ["best"] + +decision_tree_pregen: + max_depth: [10] + criterion: ["gini"] + splitter: ["best"] + n_stumps: [1] + +sgd: + loss: ["hinge"] + penalty: [l2] + alpha: [0.0001] + +knn: + n_neighbors: [5] + weights: ["uniform"] + algorithm: ["auto"] + +scm: + model_type: ["conjunction"] + max_rules: [10] + p: [0.1] + +scm_pregen: + model_type: ["conjunction"] + max_rules: [10] + p: [0.1] + n_stumps: [1] + +cq_boost: + mu: [0.01] + epsilon: [1e-06] + n_max_iterations: [5] + n_stumps: [1] + +cg_desc: + n_max_iterations: [10] + n_stumps: [1] + +cb_boost: + n_max_iterations: [10] + n_stumps: [1] + +lasso: + alpha: [1] + max_iter: [2] + +gradient_boosting: + n_estimators: [2] + + +###################################### +# The Multiview Classifier arguments # +###################################### + +weighted_linear_early_fusion: + view_weights: [None] + monoview_classifier_name: ["decision_tree"] + monoview_classifier_config: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +entropy_fusion: + classifier_names: [["decision_tree"]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +disagree_fusion: + classifier_names: [["decision_tree"]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + + +double_fault_fusion: + classifier_names: [["decision_tree"]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +difficulty_fusion: + classifier_names: [["decision_tree"]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +scm_late_fusion: + classifier_names: [["decision_tree"]] + p: 0.1 + max_rules: 10 + model_type: 'conjunction' + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +majority_voting_fusion: + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +bayesian_inference_fusion: + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +weighted_linear_late_fusion: + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +mumbo: + base_estimator: [None] + n_estimators: [10] + best_view_mode: ["edge"] \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index 2adbba0dadfc8f75544c2ececd36dcdac488a1e1..aa342b5433e51a1be444df2f7914ef22e6e47fd2 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -72,10 +72,13 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args): benchmark["monoview"] = monoview_algos if "multiview" in cl_type: - benchmark["multiview"] = [name for _, name, isPackage in - pkgutil.iter_modules([ - "./mono_multi_view_classifiers/multiview_classifiers"]) - if not isPackage] + if multiview_algos==["all"]: + benchmark["multiview"] = [name for _, name, isPackage in + pkgutil.iter_modules([ + "./mono_multi_view_classifiers/multiview_classifiers"]) + if not isPackage] + else: + benchmark["multiview"] = multiview_algos return benchmark diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 7cca55ea9613cc0b33d16a6524c17789d9f31673..d1ccd57fd4596f269fc4454ebb2b6b8c790763f1 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -272,7 +272,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds searching_tool=hyper_param_search, n_iter=n_iter, classifier_config=classifier_config) - classifier = getattr(classifier_module, classifier_name)(random_state, + classifier = getattr(classifier_module, classifier_name)(random_state=random_state, **classifier_config) logging.debug("Done:\t Optimizing hyperparameters") diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/mumbo.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/mumbo.py new file mode 100644 index 0000000000000000000000000000000000000000..508d2a94d6c78d86cea917e2ae9164fcec4a8d49 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/mumbo.py @@ -0,0 +1,41 @@ +from sklearn.tree import DecisionTreeClassifier + + +from multimodalboost.mumbo import MumboClassifier +from ..multiview.multiview_utils import BaseMultiviewClassifier, \ + get_examples_views_indices +from ..utils.hyper_parameter_search import CustomRandint + +classifier_class_name = "Mumbo" + +class Mumbo(BaseMultiviewClassifier, MumboClassifier): + + def __init__(self, base_estimator=None, + n_estimators=50, + random_state=None, + best_view_mode="edge"): + super().__init__(random_state) + super(BaseMultiviewClassifier, self).__init__(base_estimator=base_estimator, + n_estimators=n_estimators, + random_state=random_state, + best_view_mode=best_view_mode) + self.param_names = ["base_estimator", "n_estimators", "random_state", "best_view_mode"] + self.distribs = [[DecisionTreeClassifier(max_depth=1)], + CustomRandint(5,200), [random_state], ["edge", "error"]] + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, view_indices = get_examples_views_indices(X, + train_indices, + view_indices) + numpy_X, view_limits = X.to_numpy_array(example_indices=train_indices, + view_indices=view_indices) + return super(Mumbo, self).fit(numpy_X, y[train_indices], + view_limits) + + def predict(self, X, example_indices=None, view_indices=None): + example_indices, view_indices = get_examples_views_indices(X, + example_indices, + view_indices) + numpy_X, view_limits = X.to_numpy_array(example_indices=example_indices, + view_indices=view_indices) + return super(Mumbo, self).predict(numpy_X) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py index 6c9301626c5c613775d40dc7b772f241298aed27..6c40d787545f5a155763571d180db58085040ea5 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py @@ -295,6 +295,39 @@ class Dataset(): # The following methods are hdf5 free + def to_numpy_array(self, example_indices=None, view_indices=None): + """ + To concanteant the needed views in one big numpy array while saving the + limits of each view in a list, to be bale to retrieve them later. + + Parameters + ---------- + example_indices : array like, + The indices of the examples to extract from the dataset + + view_indices : array like, + The indices of the view to concatenate in the numpy array + + Returns + ------- + concat_views : numpy array, + The numpy array containing all the needed views. + + view_limits : list of int + The limits of each slice used to extract the views. + + """ + view_limits = [0] + for view_index in view_indices: + view_data = self.get_v(view_index, example_indices=example_indices) + nb_features = view_data.shape[1] + view_limits.append(view_limits[-1]+nb_features) + concat_views = np.concatenate([self.get_v(view_index, + example_indices=example_indices) + for view_index in view_indices], axis=1) + return concat_views, view_limits + + def select_views_and_labels(self, nb_labels=None, selected_label_names=None, random_state=None, view_names = None, path_for_new="../data/"): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 9872bea62a103fc716c13a64d6042e29d1dd799d..8e3c104268599f2a528e48244ca4aab50eea9f05 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -93,7 +93,7 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None) def randomized_search(X, y, framework, random_state, output_file_name, classifier_module, classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None): - estimator = getattr(classifier_module, classifier_name)(random_state, + estimator = getattr(classifier_module, classifier_name)(random_state=random_state, **classifier_kwargs) params_dict = estimator.genDistribs() if params_dict: