Merge branch 'private_algos' into develop

01670fc0 · Baptiste Bauvin · b4c56498 · d3930ac2 · 01670fc0 · 01670fc0
Commit 01670fc0 authored 5 years ago by Baptiste Bauvin
--- a/config_files/config_test.yml
+++ b/config_files/config_test.yml
+# The base configuration of the benchmark
+Base :
+  log: true
+  name: ["Plausible"]
+  label: "_"
+  type: ".hdf5"
+  views:
+  pathf: "../data/"
+  nice: 0
+  random_state: 42
+  nb_cores: 1
+  full: False
+  debug: True
+  add_noise: False
+  noise_std: 0.0
+  res_dir: "../results/"
+# All the classification-realted configuration options
+Classification:
+  multiclass_method: "oneVersusOne"
+  split: 0.8
+  nb_folds: 2
+  nb_class: 2
+  classes:
+  type: ["multiview"]
+  algos_monoview: ["all"]
+  algos_multiview: ["mumbo"]
+  stats_iter: 2
+  metrics: ["accuracy_score", "f1_score"]
+  metric_princ: "f1_score"
+  hps_type: "randomized_search"
+  hps_iter: 2
+#####################################
+# The Monoview Classifier arguments #
+#####################################
+random_forest:
+  n_estimators: [25]
+  max_depth: [3]
+  criterion: ["entropy"]
+svm_linear:
+  C: [1]
+svm_rbf:
+  C: [1]
+svm_poly:
+  C: [1]
+  degree: [2]
+adaboost:
+  n_estimators: [50]
+  base_estimator: ["DecisionTreeClassifier"]
+adaboost_pregen:
+  n_estimators: [50]
+  base_estimator: ["DecisionTreeClassifier"]
+  n_stumps: [1]
+adaboost_graalpy:
+  n_iterations: [50]
+  n_stumps: [1]
+decision_tree:
+  max_depth: [10]
+  criterion: ["gini"]
+  splitter: ["best"]
+decision_tree_pregen:
+  max_depth: [10]
+  criterion: ["gini"]
+  splitter: ["best"]
+  n_stumps: [1]
+sgd:
+  loss: ["hinge"]
+  penalty: [l2]
+  alpha: [0.0001]
+knn:
+  n_neighbors: [5]
+  weights: ["uniform"]
+  algorithm: ["auto"]
+scm:
+  model_type: ["conjunction"]
+  max_rules: [10]
+  p: [0.1]
+scm_pregen:
+  model_type: ["conjunction"]
+  max_rules: [10]
+  p: [0.1]
+  n_stumps: [1]
+cq_boost:
+  mu: [0.01]
+  epsilon: [1e-06]
+  n_max_iterations: [5]
+  n_stumps: [1]
+cg_desc:
+  n_max_iterations: [10]
+  n_stumps: [1]
+cb_boost:
+  n_max_iterations: [10]
+  n_stumps: [1]
+lasso:
+  alpha: [1]
+  max_iter: [2]
+gradient_boosting:
+  n_estimators: [2]
+######################################
+# The Multiview Classifier arguments #
+######################################
+weighted_linear_early_fusion:
+  view_weights: [None]
+  monoview_classifier_name: ["decision_tree"]
+  monoview_classifier_config:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+entropy_fusion:
+  classifier_names: [["decision_tree"]]
+  classifier_configs:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+disagree_fusion:
+  classifier_names: [["decision_tree"]]
+  classifier_configs:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+double_fault_fusion:
+  classifier_names: [["decision_tree"]]
+  classifier_configs:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+difficulty_fusion:
+  classifier_names: [["decision_tree"]]
+  classifier_configs:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+scm_late_fusion:
+  classifier_names: [["decision_tree"]]
+  p: 0.1
+  max_rules: 10
+  model_type: 'conjunction'
+  classifier_configs:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+majority_voting_fusion:
+  classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]]
+  classifier_configs:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+bayesian_inference_fusion:
+  classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]]
+  classifier_configs:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+weighted_linear_late_fusion:
+  classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]]
+  classifier_configs:
+    decision_tree:
+      max_depth: [1]
+      criterion: ["gini"]
+      splitter: ["best"]
+mumbo:
+  base_estimator: [None]
+  n_estimators: [10]
+  best_view_mode: ["edge"]
\ No newline at end of file
--- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py
+++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py
@@ -72,10 +72,13 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args):
            benchmark["monoview"] = monoview_algos
    if "multiview" in cl_type:
+        if multiview_algos==["all"]:
            benchmark["multiview"] = [name for _, name, isPackage in
                                     pkgutil.iter_modules([
                                         "./mono_multi_view_classifiers/multiview_classifiers"])
                                     if not isPackage]
+        else:
+            benchmark["multiview"] = multiview_algos
    return benchmark

--- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py
+++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py
@@ -272,7 +272,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds
            searching_tool=hyper_param_search, n_iter=n_iter,
            classifier_config=classifier_config)
-    classifier = getattr(classifier_module, classifier_name)(random_state,
+    classifier = getattr(classifier_module, classifier_name)(random_state=random_state,
                                                             **classifier_config)
    logging.debug("Done:\t Optimizing hyperparameters")

--- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/mumbo.py
+++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/mumbo.py
+from sklearn.tree import DecisionTreeClassifier
+from multimodalboost.mumbo import MumboClassifier
+from ..multiview.multiview_utils import BaseMultiviewClassifier, \
+                                        get_examples_views_indices
+from ..utils.hyper_parameter_search import CustomRandint
+classifier_class_name = "Mumbo"
+class Mumbo(BaseMultiviewClassifier, MumboClassifier):
+    def __init__(self, base_estimator=None,
+                 n_estimators=50,
+                 random_state=None,
+                 best_view_mode="edge"):
+        super().__init__(random_state)
+        super(BaseMultiviewClassifier, self).__init__(base_estimator=base_estimator,
+                                    n_estimators=n_estimators,
+                                    random_state=random_state,
+                                    best_view_mode=best_view_mode)
+        self.param_names = ["base_estimator", "n_estimators", "random_state", "best_view_mode"]
+        self.distribs = [[DecisionTreeClassifier(max_depth=1)],
+                         CustomRandint(5,200), [random_state], ["edge", "error"]]
+    def fit(self, X, y, train_indices=None, view_indices=None):
+        train_indices, view_indices = get_examples_views_indices(X,
+                                                                 train_indices,
+                                                                 view_indices)
+        numpy_X, view_limits = X.to_numpy_array(example_indices=train_indices,
+                                                view_indices=view_indices)
+        return super(Mumbo, self).fit(numpy_X, y[train_indices],
+                                                view_limits)
+    def predict(self, X, example_indices=None, view_indices=None):
+        example_indices, view_indices = get_examples_views_indices(X,
+                                                                 example_indices,
+                                                                 view_indices)
+        numpy_X, view_limits = X.to_numpy_array(example_indices=example_indices,
+                                                view_indices=view_indices)
+        return super(Mumbo, self).predict(numpy_X)
--- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py
+++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py
@@ -295,6 +295,39 @@ class Dataset():
    # The following methods are hdf5 free
+    def to_numpy_array(self, example_indices=None, view_indices=None):
+        """
+        To concanteant the needed views in one big numpy array while saving the
+        limits of each view in a list, to be bale to retrieve them later.
+        Parameters
+        ----------
+        example_indices : array like,
+        The indices of the examples to extract from the dataset
+        view_indices : array like,
+        The indices of the view to concatenate in the numpy array
+        Returns
+        -------
+        concat_views : numpy array,
+        The numpy array containing all the needed views.
+        view_limits : list of int
+        The limits of each slice used to extract the views.
+        """
+        view_limits = [0]
+        for view_index in view_indices:
+            view_data = self.get_v(view_index, example_indices=example_indices)
+            nb_features = view_data.shape[1]
+            view_limits.append(view_limits[-1]+nb_features)
+        concat_views = np.concatenate([self.get_v(view_index,
+                                                  example_indices=example_indices)
+                                       for view_index in view_indices], axis=1)
+        return concat_views, view_limits
    def select_views_and_labels(self, nb_labels=None,
                                selected_label_names=None, random_state=None,
                                view_names = None, path_for_new="../data/"):

--- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py
+++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py
@@ -93,7 +93,7 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None)
 def randomized_search(X, y, framework, random_state, output_file_name, classifier_module,
                      classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None],
                      n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None):
-    estimator = getattr(classifier_module, classifier_name)(random_state,
+    estimator = getattr(classifier_module, classifier_name)(random_state=random_state,
                                                            **classifier_kwargs)
    params_dict = estimator.genDistribs()
    if params_dict: