diff --git a/summit/multiview_platform/exec_classif.py b/summit/multiview_platform/exec_classif.py index 11697f4b998b3548093484c6af638cbb4d7ed94d..089f0c9478b9057f40ae89f5bc47504368dbcc6f 100644 --- a/summit/multiview_platform/exec_classif.py +++ b/summit/multiview_platform/exec_classif.py @@ -51,7 +51,6 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos): Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ benchmark = {"monoview": {}, "multiview": {}} - if "monoview" in cl_type: if monoview_algos == ['all']: # pragma: no cover benchmark["monoview"] = [name for _, name, isPackage in @@ -306,8 +305,9 @@ def init_kwargs(args, classifiers_names, framework="monoview"): For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`""" - logging.info("Start:\t Initializing monoview classifiers arguments") + logging.info("Start:\t Initializing {} classifiers arguments".format(framework)) kwargs = {} + for classifiers_name in classifiers_names: try: if framework == "monoview": @@ -316,13 +316,13 @@ def init_kwargs(args, classifiers_names, framework="monoview"): getattr(multiview_classifiers, classifiers_name) except AttributeError: raise AttributeError( - classifiers_name + " is not implemented in monoview_classifiers, " - "please specify the name of the file in monoview_classifiers") + classifiers_name + " is not implemented in {}_classifiers, " + "please specify the name of the file in monoview_classifiers".format(framework)) if classifiers_name in args: kwargs[classifiers_name] = args[classifiers_name] else: kwargs[classifiers_name] = {} - logging.info("Done:\t Initializing monoview classifiers arguments") + logging.info("Done:\t Initializing {} classifiers arguments".format(framework)) return kwargs diff --git a/summit/multiview_platform/monoview_classifiers/ib_svm_rbf.py b/summit/multiview_platform/monoview_classifiers/ib_svm_rbf.py new file mode 100644 index 0000000000000000000000000000000000000000..3aaf35de2b0f55cebf5d6459ffd22399c68d26d7 --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/ib_svm_rbf.py @@ -0,0 +1,39 @@ +from imblearn.ensemble import BalancedBaggingClassifier +import numpy as np +from sklearn.svm import SVC + + +from ..monoview.monoview_utils import BaseMonoviewClassifier +from ..utils.base import base_boosting_estimators +from ..utils.hyper_parameter_search import CustomRandint, CustomUniform + +classifier_class_name = "ImbalanceBaggingSVMRBF" + +class ImbalanceBaggingSVMRBF(BaseMonoviewClassifier, BalancedBaggingClassifier): + + def __init__(self, random_state=None, base_estimator=SVC(), + n_estimators=10, sampling_strategy="auto", + replacement=False, base_estimator_config=None, **kwargs): + base_estimator = self.get_base_estimator(base_estimator, base_estimator_config, **kwargs) + super(ImbalanceBaggingSVMRBF, self).__init__(random_state=random_state, base_estimator=base_estimator, + n_estimators=n_estimators, + sampling_strategy=sampling_strategy, + replacement=replacement) + + self.param_names = ["n_estimators", "sampling_strategy",] + self.distribs = [CustomRandint(low=1, high=50), + ["auto"]] + self.weird_strings = {} + self.base_estimator_config = base_estimator_config + + def fit(self, X, y): + BalancedBaggingClassifier.fit(self, X, y) + self.feature_importances_ = np.zeros(X.shape[1]) + for estim in self.estimators_: + if hasattr(estim['classifier'], 'feature_importances_'): + self.feature_importances_ += estim['classifier'].feature_importances_ + self.feature_importances_ /= np.sum(self.feature_importances_) + return self + + + diff --git a/summit/multiview_platform/monoview_classifiers/scm_bagging_mincq.py b/summit/multiview_platform/monoview_classifiers/scm_bagging_mincq.py new file mode 100644 index 0000000000000000000000000000000000000000..92c7d95047042e481cc6acf339d97e9cc308d3ec --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/scm_bagging_mincq.py @@ -0,0 +1,112 @@ +from randomscm.randomscm import RandomScmClassifier + + + +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform, CustomRandint + +# Author-Info +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + +classifier_class_name = "ScmBaggingMinCq" + +import numpy as np +from six import iteritems + +MAX_INT = np.iinfo(np.int32).max + + +class ScmBaggingMinCq(RandomScmClassifier, BaseMonoviewClassifier): + + """A Bagging classifier. for SetCoveringMachineClassifier() + The base estimators are built on subsets of both samples + and features. + Parameters + ---------- + n_estimators : int, default=10 + The number of base estimators in the ensemble. + max_samples : int or float, default=1.0 + The number of samples to draw from X to train each base estimator with + replacement. + - If int, then draw `max_samples` samples. + - If float, then draw `max_samples * X.shape[0]` samples. + max_features : int or float, default=1.0 + The number of features to draw from X to train each base estimator ( + without replacement. + - If int, then draw `max_features` features. + - If float, then draw `max_features * X.shape[1]` features. + p_options : list of float with len =< n_estimators, default=[1.0] + The estimators will be fitted with values of p found in p_options + let k be k = n_estimators/len(p_options), + the k first estimators will have p=p_options[0], + the next k estimators will have p=p_options[1] and so on... + random_state : int or RandomState, default=None + Controls the random resampling of the original dataset + (sample wise and feature wise). + If the base estimator accepts a `random_state` attribute, a different + seed is generated for each instance in the ensemble. + Pass an int for reproducible output across multiple function calls. + See :term:`Glossary <random_state>`. + + Attributes + ---------- + n_features_ : int + The number of features when :meth:`fit` is performed. + estimators_ : list of estimators + The collection of fitted base estimators. + estim_features : list of arrays + The subset of drawn features for each base estimator. + + Examples + -------- + >>> @TODO + + References + ---------- + .. [1] L. Breiman, "Pasting small votes for classification in large + databases and on-line", Machine Learning, 36(1), 85-103, 1999. + .. [2] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine + Learning and Knowledge Discovery in Databases, 346-361, 2012. + """ + + def __init__(self, + n_estimators=50, + max_samples=1.0, + max_features=1.0, + max_rules=10, + p_options=[0.316], + model_type="conjunction", + min_cq_combination=True, + min_cq_mu=10e-3, + random_state=None): + if isinstance(p_options, float): + p_options = [p_options] + RandomScmClassifier.__init__(self, n_estimators=n_estimators, + max_samples=max_samples, + max_features=max_features, + max_rules=max_rules, + p_options=p_options, + model_type=model_type, + min_cq_combination=min_cq_combination, + min_cq_mu=min_cq_mu, + random_state=random_state) + self.param_names = ["n_estimators", "max_rules", "max_samples", "max_features", "model_type", "p_options", "random_state"] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), CustomRandint(low=1, high=20), + CustomUniform(), CustomUniform(), ["conjunction", "disjunction"], CustomUniform(), [random_state]] + self.weird_strings = {} + + def set_params(self, p_options=[0.316], **kwargs): + if not isinstance(p_options, list): + p_options = [p_options] + kwargs["p_options"] = p_options + for parameter, value in iteritems(kwargs): + setattr(self, parameter, value) + return self + + def get_interpretation(self, directory, base_file_name, y_test, + multi_class=False): + self.features_importance() + interpret_string = self.get_feature_importance(directory, base_file_name) + return interpret_string diff --git a/summit/multiview_platform/monoview_classifiers/svm_rbf.py b/summit/multiview_platform/monoview_classifiers/svm_rbf.py index 8e75a3c798a2eaa7a4a8fc211d7594a2d5a8f644..5869c19ed7806f4ea866fd28d1908ab6988dc4da 100644 --- a/summit/multiview_platform/monoview_classifiers/svm_rbf.py +++ b/summit/multiview_platform/monoview_classifiers/svm_rbf.py @@ -1,5 +1,7 @@ from summit.multiview_platform.monoview_classifiers.additions.SVCClassifier import \ SVCClassifier +from sklearn.preprocessing import RobustScaler + from ..monoview.monoview_utils import BaseMonoviewClassifier from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform @@ -17,11 +19,27 @@ class SVMRBF(SVCClassifier, BaseMonoviewClassifier): Here, it is the RBF kernel version """ - def __init__(self, random_state=None, C=1.0, **kwargs): + def __init__(self, random_state=None, C=1.0, scaler=None ,gamma="scale",**kwargs): SVCClassifier.__init__(self, C=C, kernel='rbf', + gamma=gamma, random_state=random_state ) - self.param_names = ["C", "random_state"] - self.distribs = [CustomUniform(loc=0, state=1), [random_state]] + self.scaler=scaler + self.param_names = ["C", "gamma", "scaler", "random_state"] + self.distribs = [CustomUniform(loc=0, state=1), + CustomUniform(loc=0, state=1), + [None, RobustScaler()], + [random_state]] + + def fit(self, X, y, sample_weight=None): + if self.scaler is not None: + X = self.scaler.fit_transform(X) + return SVCClassifier.fit(self, X, y, sample_weight=sample_weight) + + def predict(self, X): + if self.scaler is not None: + X = self.scaler.transform(X) + return SVCClassifier.predict(self, X) + diff --git a/summit/multiview_platform/utils/dataset.py b/summit/multiview_platform/utils/dataset.py index 600a06618169570e18e400901742f303c53f3402..11a9f317e7261731c16ca67d1bc7b19b609b74b5 100644 --- a/summit/multiview_platform/utils/dataset.py +++ b/summit/multiview_platform/utils/dataset.py @@ -503,6 +503,7 @@ class HDF5Dataset(Dataset): seleted labels' names """ selected_labels = self.get_labels(sample_indices) + if type(self.dataset["Labels"].attrs["names"][0]) == bytes: return [label_name.decode("utf-8") for label, label_name in