Added algorithms

20b78ad2 · Baptiste Bauvin · 427049b8 · 20b78ad2 · 20b78ad2 · 20b78ad2
Commit 20b78ad2 authored 4 years ago by Baptiste Bauvin
--- a/config_files/config_cuisine.yml
+++ b/config_files/config_cuisine.yml
 # The base configuration of the benchmark
 log: True
-name: ["demo"]
+name: ["test_boules"]
 label: "_1_3"
 file_type: ".hdf5"
 views:
-pathf: "/home/baptiste/Documents/Datasets/Generated/"
+pathf: "/home/baptiste/Documents/Clouded/short_projects/latent_space_study/"
 nice: 0
 random_state: 42
 nb_cores: 1
@@ -17,23 +17,74 @@ track_tracebacks: False

 # All the classification-realted configuration options
 multiclass_method: "oneVersusOne"
-split: 0.75
+split: 0.10
 nb_folds: 5
-nb_class: 2
-classes: ['label_1', 'label_3']
+nb_class: 4
+classes:
 type: ["multiview","monoview"]
-algos_monoview: ["cb_boost",]
-algos_multiview: ["multiview_cbound_boosting"]
+algos_monoview: ["cb_boost", "decision_tree", 'random_forest']
+algos_multiview: ["mv_cb_boost", "weighted_linear_late_fusion","weighted_linear_early_fusion","mumbo" ]
 stats_iter: 5
 metrics:
  accuracy_score: {}
  f1_score:
-    average: 'binary'
+    average: 'micro'
 metric_princ: "accuracy_score"
-hps_type: "None"
-hps_args: {}
+hps_type: "Random"
+hps_args:
+  n_iter: 10
+  equivalent_draws: True

 cb_boost:
-  n_stumps: 10
-multiview_cbound_boosting:
-  n_stumps: 10
\ No newline at end of file
+  n_stumps: 30
+  n_max_iterations: 20
+  estimators_generator: "Trees"
+  max_depth: 1
+decision_tree:
+  max_depth: 2
+mumbo:
+  base_estimator: decision_tree
+  base_estimator__max_depth: 1
+  n_estimators: 80
+
+mv_cb_boost:
+  n_max_iterations: 80
+  n_stumps: 30
+  estimators_generator: "Trees"
+  max_depth: 1
+
+pb_mv_boost:
+  num_iterations: 20
+  decision_tree_depth: 1
+weighted_linear_early_fusion:
+  monoview_classifier_name: "cb_boost"
+  monoview_classifier_config:
+    cb_boost:
+      n_stumps: 30
+      n_max_iterations: 20
+      estimators_generator: "Trees"
+      max_depth: 1
+weighted_linear_late_fusion:
+  classifiers_names: ["cb_boost", "cb_boost", "cb_boost", "cb_boost"]
+  classifier_configs:
+    - cb_boost:
+        n_stumps: 30
+        n_max_iterations: 20
+        estimators_generator: "Trees"
+        max_depth: 1
+    - cb_boost:
+        n_stumps: 30
+        n_max_iterations: 20
+        estimators_generator: "Trees"
+        max_depth: 1
+    - cb_boost:
+        n_stumps: 30
+        n_max_iterations: 20
+        estimators_generator: "Trees"
+        max_depth: 1
+    - cb_boost:
+        n_stumps: 30
+        n_max_iterations: 20
+        estimators_generator: "Trees"
+        max_depth: 1
+
--- a/summit/__init__.py
+++ b/summit/__init__.py


 __version__ = "0.0.0.0"
+__url__ = "https://gitlab.lis-lab.fr/baptiste.bauvin/summit"

 from . import multiview_platform, execute
--- a/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py
+++ b/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py
@@ -129,6 +129,7 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
            np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1])))
                                for i in
                                range(self.estimator_errors_.shape[0])])
+        return self

    # def canProbas(self):
    #     """

--- a/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py
+++ b/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py
@@ -140,6 +140,18 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
        self.feature_importances_ /= np.sum(self.feature_importances_)
        return self

+    def predict_proba(self, X):
+        start = time.time()
+        check_is_fitted(self, 'weights_')
+        if scipy.sparse.issparse(X):
+            logging.warning('Converting sparse matrix to dense matrix.')
+            X = np.array(X.todense())
+
+        classification_matrix = self._binary_classification_matrix(X)
+        margins = np.sum(classification_matrix * self.weights_, axis=1)
+        proba = np.array([np.array([(1 - vote)/2, (1 + vote)/2]) for vote in margins])
+        return proba
+
    def predict(self, X):
        start = time.time()
        check_is_fitted(self, 'weights_')

--- a/summit/multiview_platform/monoview_classifiers/bagging.py
+++ b/summit/multiview_platform/monoview_classifiers/bagging.py
@@ -52,6 +52,7 @@ class Bagging(BaggingClassifier, BaseMonoviewClassifier,):
        end = time.time()
        self.train_time = end - begin
        self.train_shape = X.shape
+        return self


    def predict(self, X):

--- a/summit/multiview_platform/monoview_classifiers/bagging_pregen.py
+++ b/summit/multiview_platform/monoview_classifiers/bagging_pregen.py
@@ -64,6 +64,7 @@ class BaggingPregen(BaggingClassifier, BaseMonoviewClassifier,
        end = time.time()
        self.train_time = end - begin
        self.train_shape = pregen_X.shape
+        return self




--- a/summit/multiview_platform/monoview_classifiers/gradient_boosting_pregen.py
+++ b/summit/multiview_platform/monoview_classifiers/gradient_boosting_pregen.py
@@ -63,6 +63,7 @@ class GradientBoostingPregen(GradientBoostingClassifier, BaseMonoviewClassifier,
        # self.base_predictions = np.array(
        #     [change_label_to_zero(estim.predict(pregen_X)) for estim in
        #      self.estimators_])
+        return self




--- a/summit/multiview_platform/monoview_classifiers/random_forest_pregen.py
+++ b/summit/multiview_platform/monoview_classifiers/random_forest_pregen.py
@@ -62,7 +62,7 @@ class RandomForestPregen(RandomForestClassifier, BaseMonoviewClassifier,
        self.base_predictions = np.array(
            [change_label_to_zero(estim.predict(pregen_X)) for estim in
             self.estimators_])
-
+        return self


    def predict(self, X):

--- a/summit/multiview_platform/monoview_classifiers/scm.py
+++ b/summit/multiview_platform/monoview_classifiers/scm.py
@@ -74,6 +74,7 @@ class SCM(scm, BaseMonoviewClassifier):
        for rule, importance in zip(self.model_.rules, rules_importances):
            self.feature_importances_[rule.feature_idx] += importance
        self.feature_importances_ /= np.sum(self.feature_importances_)
+        return self

    # def canProbas(self):
    #     """

--- a/summit/multiview_platform/monoview_classifiers/scm_bagging.py
+++ b/summit/multiview_platform/monoview_classifiers/scm_bagging.py
@@ -69,7 +69,7 @@ class ScmBagging(ScmBaggingClassifier, BaseMonoviewClassifier):
    """

    def __init__(self,
-                 n_estimators=10,
+                 n_estimators=50,
                 max_samples=1.0,
                 max_features=1.0,
                 max_rules=10,

--- a/summit/multiview_platform/monoview_classifiers/scm_bagging_mincq.py
+++ b/summit/multiview_platform/monoview_classifiers/scm_bagging_mincq.py
-from .scm_bagging import ScmBagging
-from ..utils.hyper_parameter_search import CustomUniform, CustomRandint
+from scm_bagging.scm_bagging_classifier import ScmBaggingClassifier
+
+
+from ..monoview.monoview_utils import BaseMonoviewClassifier
+from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform, CustomRandint
+
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype

 classifier_class_name = "ScmBaggingMinCq"

-class ScmBaggingMinCq(ScmBagging):
+import numpy as np
+from six import iteritems
+
+MAX_INT = np.iinfo(np.int32).max
+
+
+class ScmBaggingMinCq(ScmBaggingClassifier, BaseMonoviewClassifier):
+    """A Bagging classifier. for SetCoveringMachineClassifier()
+    The base estimators are built on subsets of both samples
+    and features.
+    Parameters
+    ----------
+    n_estimators : int, default=10
+        The number of base estimators in the ensemble.
+    max_samples : int or float, default=1.0
+        The number of samples to draw from X to train each base estimator with
+        replacement.
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples.
+    max_features : int or float, default=1.0
+        The number of features to draw from X to train each base estimator (
+        without replacement.
+        - If int, then draw `max_features` features.
+        - If float, then draw `max_features * X.shape[1]` features.
+    p_options : list of float with len =< n_estimators, default=[1.0]
+        The estimators will be fitted with values of p found in p_options
+        let k be k = n_estimators/len(p_options),
+        the k first estimators will have p=p_options[0],
+        the next k estimators will have p=p_options[1] and so on...
+    random_state : int or RandomState, default=None
+        Controls the random resampling of the original dataset
+        (sample wise and feature wise).
+        If the base estimator accepts a `random_state` attribute, a different
+        seed is generated for each instance in the ensemble.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    n_features_ : int
+        The number of features when :meth:`fit` is performed.
+    estimators_ : list of estimators
+        The collection of fitted base estimators.
+    estim_features : list of arrays
+        The subset of drawn features for each base estimator.
+
+    Examples
+    --------
+    >>> @TODO
+
+    References
+    ----------
+    .. [1] L. Breiman, "Pasting small votes for classification in large
+           databases and on-line", Machine Learning, 36(1), 85-103, 1999.
+    .. [2] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine
+           Learning and Knowledge Discovery in Databases, 346-361, 2012.
+    """
+
    def __init__(self,
-                 n_estimators=10,
+                 n_estimators=50,
                 max_samples=1.0,
                 max_features=1.0,
                 max_rules=10,
@@ -14,7 +78,9 @@ class ScmBaggingMinCq(ScmBagging):
                 min_cq_combination=True,
                 min_cq_mu=10e-3,
                 random_state=None):
-        ScmBagging.__init__(self, n_estimators=n_estimators,
+        if isinstance(p_options, float):
+            p_options = [p_options]
+        ScmBaggingClassifier.__init__(self, n_estimators=n_estimators,
                 max_samples=max_samples,
                 max_features=max_features,
                 max_rules=max_rules,
@@ -23,5 +89,22 @@ class ScmBaggingMinCq(ScmBagging):
                 min_cq_combination=min_cq_combination,
                 min_cq_mu=min_cq_mu,
                 random_state=random_state)
-        self.param_names.append("min_cq_mu")
-        self.distribs.append(CustomRandint(1,7, multiplier='e-'))
\ No newline at end of file
+        self.param_names = ["n_estimators", "max_rules", "max_samples", "max_features", "model_type", "p_options", "random_state"]
+        self.classed_params = []
+        self.distribs = [CustomRandint(low=1, high=300), CustomRandint(low=1, high=20),
+                         CustomUniform(), CustomUniform(), ["conjunction", "disjunction"], CustomUniform(), [random_state]]
+        self.weird_strings = {}
+
+    def set_params(self, p_options=[0.316], **kwargs):
+        if not isinstance(p_options, list):
+            p_options = [p_options]
+        kwargs["p_options"] = p_options
+        for parameter, value in iteritems(kwargs):
+            setattr(self, parameter, value)
+        return self
+
+    def get_interpretation(self, directory, base_file_name, y_test,
+                           multi_class=False):
+        self.features_importance()
+        interpret_string = self.get_feature_importance(directory, base_file_name)
+        return interpret_string
--- a/summit/multiview_platform/monoview_classifiers/scm_mazid.py
+++ b/summit/multiview_platform/monoview_classifiers/scm_mazid.py
@@ -34,6 +34,7 @@ class DecisionStumpSCMNew(BaseMonoviewClassifier):
        print(self.model_type)
        self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state)
        self.clf.fit(X=X, y=y)
+        return self

    def predict(self, X):
        return self.clf.predict(X)

--- a/summit/multiview_platform/multiview_classifiers/additions/mv_cb_boost_adapt.py
+++ b/summit/multiview_platform/multiview_classifiers/additions/mv_cb_boost_adapt.py
--- a/summit/multiview_platform/multiview_classifiers/multiview_cbound_boosting.py
+++ b/summit/multiview_platform/multiview_classifiers/multiview_cbound_boosting.py
--- a/summit/multiview_platform/multiview_classifiers/mv_cb_boost.py
+++ b/summit/multiview_platform/multiview_classifiers/mv_cb_boost.py
+from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
+
+classifier_class_name = "MVCBBoost"
+
+class MVCBBoost(MultiviewCBoundBoostingAdapt):
+    def __init__(self, n_estimators=100,
+                              random_state=42,
+                              self_complemented=True,
+                              twice_the_same=False,
+                              random_start=False,
+                              n_stumps=10,
+                              c_bound_sol=True,
+                              base_estimator="Trees",
+                              max_depth=1,
+                              mincq_tracking=False,
+                              weight_add=3,
+                              weight_strategy="c_bound_based_dec",
+                              weight_update="multiplicative",
+                              full_combination=False,
+                              min_cq_pred=False,
+                              min_cq_mu=10e-3,
+                              sig_mult=15,
+                              sig_offset=5,
+                              use_previous_voters=False, **kwargs):
+        MultiviewCBoundBoostingAdapt.__init__(self, n_estimators=n_estimators, random_state=random_state,
+                 self_complemented=self_complemented, twice_the_same=twice_the_same,
+                 random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol, max_depth=max_depth,
+                 base_estimator=base_estimator, mincq_tracking=mincq_tracking,
+                 weight_add=weight_add, weight_strategy=weight_strategy,
+                 weight_update=weight_update, use_previous_voters=use_previous_voters,
+                                         full_combination=full_combination,
+                                         min_cq_pred=min_cq_pred, min_cq_mu=min_cq_mu,
+                                         sig_mult=sig_mult, sig_offset=sig_offset, **kwargs)
+        # self.param_names+=["weight_update", "weight_strategy"]
+        # self.distribs+=[["multiplicative", "additive", "replacement"],["c_bound_based_broken", "c_bound_based", "c_bound_based_dec", "sigmoid"]]
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/mv_cb_boost_base.py
+++ b/summit/multiview_platform/multiview_classifiers/mv_cb_boost_base.py
+from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
+
+classifier_class_name = "MVCBBoostBroken"
+
+class MVCBBoostBroken(MultiviewCBoundBoostingAdapt):
+    def __init__(self, n_max_iterations=100, random_state=None,
+                 self_complemented=True, twice_the_same=False,
+                 random_start=False, n_stumps=1, c_bound_sol=True,
+                 estimators_generator="Stumps", mincq_tracking=False,
+                 weight_add=3, weight_strategy="c_bound_based",
+                 weight_update="multiplicative", **kwargs):
+        MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
+                 self_complemented=self_complemented, twice_the_same=twice_the_same,
+                 random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
+                 estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
+                 weight_add=weight_add, weight_strategy=weight_strategy,
+                 weight_update=weight_update, **kwargs)
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/mv_cb_boost_broken.py
+++ b/summit/multiview_platform/multiview_classifiers/mv_cb_boost_broken.py
+from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
+
+classifier_class_name = "MVCBBoostBroken"
+
+class MVCBBoostBroken(MultiviewCBoundBoostingAdapt):
+    def __init__(self, n_max_iterations=10, random_state=None,
+                 self_complemented=True, twice_the_same=False,
+                 random_start=False, n_stumps=1, c_bound_sol=True,
+                 estimators_generator="Stumps", mincq_tracking=False,
+                 weight_add=3, weight_strategy="c_bound_based_broken",
+                 weight_update="multiplicative", **kwargs):
+        MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
+                 self_complemented=self_complemented, twice_the_same=twice_the_same,
+                 random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
+                 estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
+                 weight_add=weight_add, weight_strategy=weight_strategy,
+                 weight_update=weight_update, **kwargs)
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/mv_cb_boost_full.py
+++ b/summit/multiview_platform/multiview_classifiers/mv_cb_boost_full.py
+from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
+
+classifier_class_name = "MVCBBoostFull"
+
+class MVCBBoostFull(MultiviewCBoundBoostingAdapt):
+    def __init__(self, n_max_iterations=10, random_state=None,
+                 self_complemented=True, twice_the_same=False,
+                 random_start=False, n_stumps=1, c_bound_sol=True,
+                 estimators_generator="Stumps", mincq_tracking=False,
+                 weight_add=3, weight_strategy="c_bound_based_dec",
+                 weight_update="multiplicative", full_combination=True, **kwargs):
+        MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
+                 self_complemented=self_complemented, twice_the_same=twice_the_same,
+                 random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
+                 estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
+                 weight_add=weight_add, weight_strategy=weight_strategy,
+                 weight_update=weight_update, full_combination=full_combination, **kwargs)
--- a/summit/multiview_platform/multiview_classifiers/mv_cb_boost_mincq.py
+++ b/summit/multiview_platform/multiview_classifiers/mv_cb_boost_mincq.py
+from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
+
+classifier_class_name = "MVCBBoostMinCQ"
+
+class MVCBBoostMinCQ(MultiviewCBoundBoostingAdapt):
+    def __init__(self, n_max_iterations=10, random_state=None,
+                 self_complemented=True, twice_the_same=False,
+                 random_start=False, n_stumps=1, c_bound_sol=True,
+                 estimators_generator="Stumps", mincq_tracking=False,
+                 weight_add=3, weight_strategy="c_bound_based_dec",
+                 weight_update="multiplicative", full_combination=False, min_cq_pred=True, **kwargs):
+        MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
+                 self_complemented=self_complemented, twice_the_same=twice_the_same,
+                 random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
+                 estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
+                 weight_add=weight_add, weight_strategy=weight_strategy,
+                 weight_update=weight_update, full_combination=full_combination, min_cq_pred=min_cq_pred, **kwargs)
--- a/summit/multiview_platform/multiview_classifiers/mv_cb_boost_sig.py
+++ b/summit/multiview_platform/multiview_classifiers/mv_cb_boost_sig.py
+from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
+
+classifier_class_name = "MVCBBoostSig"
+
+class MVCBBoostSig(MultiviewCBoundBoostingAdapt):
+    def __init__(self, n_max_iterations=100, random_state=None,
+                 self_complemented=True, twice_the_same=False,
+                 random_start=False, n_stumps=1, c_bound_sol=True,
+                 estimators_generator="Stumps", mincq_tracking=False,
+                 weight_add=3, weight_strategy="c_bound_based_broken",
+                 weight_update="multiplicative", use_previous_voters=True,
+                 **kwargs):
+        MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
+                 self_complemented=self_complemented, twice_the_same=twice_the_same,
+                 random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
+                 estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
+                 weight_add=weight_add, weight_strategy=weight_strategy,
+                 weight_update=weight_update, use_previous_voters=use_previous_voters,
+                 **kwargs)
\ No newline at end of file