Stashed changes'

d6c51ec2 · Baptiste Bauvin · c603d6b3 · d6c51ec2 · d6c51ec2 · d6c51ec2
Commit d6c51ec2 authored Feb 22, 2023 by Baptiste Bauvin
--- a/summit/multiview_platform/exec_classif.py
+++ b/summit/multiview_platform/exec_classif.py
@@ -51,7 +51,6 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos):
        Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark.
    """
    benchmark = {"monoview": {}, "multiview": {}}
    if "monoview" in cl_type:
        if monoview_algos == ['all']:  # pragma: no cover
            benchmark["monoview"] = [name for _, name, isPackage in
@@ -306,8 +305,9 @@ def init_kwargs(args, classifiers_names, framework="monoview"):
        For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`"""
-    logging.info("Start:\t Initializing monoview classifiers arguments")
+    logging.info("Start:\t Initializing {} classifiers arguments".format(framework))
    kwargs = {}
    for classifiers_name in classifiers_names:
        try:
            if framework == "monoview":
@@ -316,13 +316,13 @@ def init_kwargs(args, classifiers_names, framework="monoview"):
                getattr(multiview_classifiers, classifiers_name)
        except AttributeError:
            raise AttributeError(
-                classifiers_name + " is not implemented in monoview_classifiers, "
+                classifiers_name + " is not implemented in {}_classifiers, "
-                                   "please specify the name of the file in monoview_classifiers")
+                                   "please specify the name of the file in monoview_classifiers".format(framework))
        if classifiers_name in args:
            kwargs[classifiers_name] = args[classifiers_name]
        else:
            kwargs[classifiers_name] = {}
-    logging.info("Done:\t Initializing monoview classifiers arguments")
+    logging.info("Done:\t Initializing {} classifiers arguments".format(framework))
    return kwargs

--- a/summit/multiview_platform/monoview_classifiers/ib_svm_rbf.py
+++ b/summit/multiview_platform/monoview_classifiers/ib_svm_rbf.py
+from imblearn.ensemble import BalancedBaggingClassifier
+import numpy as np
+from sklearn.svm import SVC
+from ..monoview.monoview_utils import BaseMonoviewClassifier
+from ..utils.base import base_boosting_estimators
+from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
+classifier_class_name = "ImbalanceBaggingSVMRBF"
+class ImbalanceBaggingSVMRBF(BaseMonoviewClassifier, BalancedBaggingClassifier):
+    def __init__(self, random_state=None, base_estimator=SVC(),
+                 n_estimators=10, sampling_strategy="auto",
+                 replacement=False, base_estimator_config=None, **kwargs):
+        base_estimator = self.get_base_estimator(base_estimator, base_estimator_config, **kwargs)
+        super(ImbalanceBaggingSVMRBF, self).__init__(random_state=random_state, base_estimator=base_estimator,
+                                         n_estimators=n_estimators,
+                                         sampling_strategy=sampling_strategy,
+                                         replacement=replacement)
+        self.param_names = ["n_estimators",  "sampling_strategy",]
+        self.distribs = [CustomRandint(low=1, high=50),
+                         ["auto"]]
+        self.weird_strings = {}
+        self.base_estimator_config = base_estimator_config
+    def fit(self, X, y):
+        BalancedBaggingClassifier.fit(self, X, y)
+        self.feature_importances_ = np.zeros(X.shape[1])
+        for estim in self.estimators_:
+            if hasattr(estim['classifier'], 'feature_importances_'):
+                self.feature_importances_ += estim['classifier'].feature_importances_
+        self.feature_importances_ /= np.sum(self.feature_importances_)
+        return self
--- a/summit/multiview_platform/monoview_classifiers/scm_bagging_mincq.py
+++ b/summit/multiview_platform/monoview_classifiers/scm_bagging_mincq.py
+from randomscm.randomscm import RandomScmClassifier
+from ..monoview.monoview_utils import BaseMonoviewClassifier
+from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform, CustomRandint
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype
+classifier_class_name = "ScmBaggingMinCq"
+import numpy as np
+from six import iteritems
+MAX_INT = np.iinfo(np.int32).max
+class ScmBaggingMinCq(RandomScmClassifier, BaseMonoviewClassifier):
+    """A Bagging classifier. for SetCoveringMachineClassifier()
+    The base estimators are built on subsets of both samples
+    and features.
+    Parameters
+    ----------
+    n_estimators : int, default=10
+        The number of base estimators in the ensemble.
+    max_samples : int or float, default=1.0
+        The number of samples to draw from X to train each base estimator with
+        replacement.
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples.
+    max_features : int or float, default=1.0
+        The number of features to draw from X to train each base estimator (
+        without replacement.
+        - If int, then draw `max_features` features.
+        - If float, then draw `max_features * X.shape[1]` features.
+    p_options : list of float with len =< n_estimators, default=[1.0]
+        The estimators will be fitted with values of p found in p_options
+        let k be k = n_estimators/len(p_options),
+        the k first estimators will have p=p_options[0],
+        the next k estimators will have p=p_options[1] and so on...
+    random_state : int or RandomState, default=None
+        Controls the random resampling of the original dataset
+        (sample wise and feature wise).
+        If the base estimator accepts a `random_state` attribute, a different
+        seed is generated for each instance in the ensemble.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+    Attributes
+    ----------
+    n_features_ : int
+        The number of features when :meth:`fit` is performed.
+    estimators_ : list of estimators
+        The collection of fitted base estimators.
+    estim_features : list of arrays
+        The subset of drawn features for each base estimator.
+    Examples
+    --------
+    >>> @TODO
+    References
+    ----------
+    .. [1] L. Breiman, "Pasting small votes for classification in large
+           databases and on-line", Machine Learning, 36(1), 85-103, 1999.
+    .. [2] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine
+           Learning and Knowledge Discovery in Databases, 346-361, 2012.
+    """
+    def __init__(self,
+                 n_estimators=50,
+                 max_samples=1.0,
+                 max_features=1.0,
+                 max_rules=10,
+                 p_options=[0.316],
+                 model_type="conjunction",
+                 min_cq_combination=True,
+                 min_cq_mu=10e-3,
+                 random_state=None):
+        if isinstance(p_options, float):
+            p_options = [p_options]
+        RandomScmClassifier.__init__(self, n_estimators=n_estimators,
+                 max_samples=max_samples,
+                 max_features=max_features,
+                 max_rules=max_rules,
+                 p_options=p_options,
+                 model_type=model_type,
+                 min_cq_combination=min_cq_combination,
+                 min_cq_mu=min_cq_mu,
+                 random_state=random_state)
+        self.param_names = ["n_estimators", "max_rules", "max_samples", "max_features", "model_type", "p_options", "random_state"]
+        self.classed_params = []
+        self.distribs = [CustomRandint(low=1, high=300), CustomRandint(low=1, high=20),
+                         CustomUniform(), CustomUniform(), ["conjunction", "disjunction"], CustomUniform(), [random_state]]
+        self.weird_strings = {}
+    def set_params(self, p_options=[0.316], **kwargs):
+        if not isinstance(p_options, list):
+            p_options = [p_options]
+        kwargs["p_options"] = p_options
+        for parameter, value in iteritems(kwargs):
+            setattr(self, parameter, value)
+        return self
+    def get_interpretation(self, directory, base_file_name, y_test,
+                           multi_class=False):
+        self.features_importance()
+        interpret_string = self.get_feature_importance(directory, base_file_name)
+        return interpret_string
--- a/summit/multiview_platform/monoview_classifiers/svm_rbf.py
+++ b/summit/multiview_platform/monoview_classifiers/svm_rbf.py
 from summit.multiview_platform.monoview_classifiers.additions.SVCClassifier import \
    SVCClassifier
+from sklearn.preprocessing import RobustScaler
 from ..monoview.monoview_utils import BaseMonoviewClassifier
 from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform
@@ -17,11 +19,27 @@ class SVMRBF(SVCClassifier, BaseMonoviewClassifier):
    Here, it is the RBF kernel version
    """
-    def __init__(self, random_state=None, C=1.0, **kwargs):
+    def __init__(self, random_state=None, C=1.0, scaler=None ,gamma="scale",**kwargs):
        SVCClassifier.__init__(self,
                               C=C,
                               kernel='rbf',
+                               gamma=gamma,
                               random_state=random_state
                               )
-        self.param_names = ["C", "random_state"]
+        self.scaler=scaler
-        self.distribs = [CustomUniform(loc=0, state=1), [random_state]]
+        self.param_names = ["C", "gamma", "scaler", "random_state"]
+        self.distribs = [CustomUniform(loc=0, state=1),
+                         CustomUniform(loc=0, state=1),
+                         [None, RobustScaler()],
+                         [random_state]]
+    def fit(self, X, y, sample_weight=None):
+        if self.scaler is not None:
+            X = self.scaler.fit_transform(X)
+        return SVCClassifier.fit(self, X, y, sample_weight=sample_weight)
+    def predict(self, X):
+        if self.scaler is not None:
+            X = self.scaler.transform(X)
+        return SVCClassifier.predict(self, X)
--- a/summit/multiview_platform/utils/dataset.py
+++ b/summit/multiview_platform/utils/dataset.py
@@ -503,6 +503,7 @@ class HDF5Dataset(Dataset):
            seleted labels' names
        """
        selected_labels = self.get_labels(sample_indices)
        if type(self.dataset["Labels"].attrs["names"][0]) == bytes:
            return [label_name.decode("utf-8")
                    for label, label_name in