Skip to content
Snippets Groups Projects
Commit d6c51ec2 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Stashed changes'

parent c603d6b3
No related branches found
No related tags found
No related merge requests found
Pipeline #11485 failed
......@@ -51,7 +51,6 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos):
Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark.
"""
benchmark = {"monoview": {}, "multiview": {}}
if "monoview" in cl_type:
if monoview_algos == ['all']: # pragma: no cover
benchmark["monoview"] = [name for _, name, isPackage in
......@@ -306,8 +305,9 @@ def init_kwargs(args, classifiers_names, framework="monoview"):
For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`"""
logging.info("Start:\t Initializing monoview classifiers arguments")
logging.info("Start:\t Initializing {} classifiers arguments".format(framework))
kwargs = {}
for classifiers_name in classifiers_names:
try:
if framework == "monoview":
......@@ -316,13 +316,13 @@ def init_kwargs(args, classifiers_names, framework="monoview"):
getattr(multiview_classifiers, classifiers_name)
except AttributeError:
raise AttributeError(
classifiers_name + " is not implemented in monoview_classifiers, "
"please specify the name of the file in monoview_classifiers")
classifiers_name + " is not implemented in {}_classifiers, "
"please specify the name of the file in monoview_classifiers".format(framework))
if classifiers_name in args:
kwargs[classifiers_name] = args[classifiers_name]
else:
kwargs[classifiers_name] = {}
logging.info("Done:\t Initializing monoview classifiers arguments")
logging.info("Done:\t Initializing {} classifiers arguments".format(framework))
return kwargs
......
from imblearn.ensemble import BalancedBaggingClassifier
import numpy as np
from sklearn.svm import SVC
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.base import base_boosting_estimators
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
classifier_class_name = "ImbalanceBaggingSVMRBF"
class ImbalanceBaggingSVMRBF(BaseMonoviewClassifier, BalancedBaggingClassifier):
def __init__(self, random_state=None, base_estimator=SVC(),
n_estimators=10, sampling_strategy="auto",
replacement=False, base_estimator_config=None, **kwargs):
base_estimator = self.get_base_estimator(base_estimator, base_estimator_config, **kwargs)
super(ImbalanceBaggingSVMRBF, self).__init__(random_state=random_state, base_estimator=base_estimator,
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement)
self.param_names = ["n_estimators", "sampling_strategy",]
self.distribs = [CustomRandint(low=1, high=50),
["auto"]]
self.weird_strings = {}
self.base_estimator_config = base_estimator_config
def fit(self, X, y):
BalancedBaggingClassifier.fit(self, X, y)
self.feature_importances_ = np.zeros(X.shape[1])
for estim in self.estimators_:
if hasattr(estim['classifier'], 'feature_importances_'):
self.feature_importances_ += estim['classifier'].feature_importances_
self.feature_importances_ /= np.sum(self.feature_importances_)
return self
from randomscm.randomscm import RandomScmClassifier
from ..monoview.monoview_utils import BaseMonoviewClassifier
from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform, CustomRandint
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "ScmBaggingMinCq"
import numpy as np
from six import iteritems
MAX_INT = np.iinfo(np.int32).max
class ScmBaggingMinCq(RandomScmClassifier, BaseMonoviewClassifier):
"""A Bagging classifier. for SetCoveringMachineClassifier()
The base estimators are built on subsets of both samples
and features.
Parameters
----------
n_estimators : int, default=10
The number of base estimators in the ensemble.
max_samples : int or float, default=1.0
The number of samples to draw from X to train each base estimator with
replacement.
- If int, then draw `max_samples` samples.
- If float, then draw `max_samples * X.shape[0]` samples.
max_features : int or float, default=1.0
The number of features to draw from X to train each base estimator (
without replacement.
- If int, then draw `max_features` features.
- If float, then draw `max_features * X.shape[1]` features.
p_options : list of float with len =< n_estimators, default=[1.0]
The estimators will be fitted with values of p found in p_options
let k be k = n_estimators/len(p_options),
the k first estimators will have p=p_options[0],
the next k estimators will have p=p_options[1] and so on...
random_state : int or RandomState, default=None
Controls the random resampling of the original dataset
(sample wise and feature wise).
If the base estimator accepts a `random_state` attribute, a different
seed is generated for each instance in the ensemble.
Pass an int for reproducible output across multiple function calls.
See :term:`Glossary <random_state>`.
Attributes
----------
n_features_ : int
The number of features when :meth:`fit` is performed.
estimators_ : list of estimators
The collection of fitted base estimators.
estim_features : list of arrays
The subset of drawn features for each base estimator.
Examples
--------
>>> @TODO
References
----------
.. [1] L. Breiman, "Pasting small votes for classification in large
databases and on-line", Machine Learning, 36(1), 85-103, 1999.
.. [2] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine
Learning and Knowledge Discovery in Databases, 346-361, 2012.
"""
def __init__(self,
n_estimators=50,
max_samples=1.0,
max_features=1.0,
max_rules=10,
p_options=[0.316],
model_type="conjunction",
min_cq_combination=True,
min_cq_mu=10e-3,
random_state=None):
if isinstance(p_options, float):
p_options = [p_options]
RandomScmClassifier.__init__(self, n_estimators=n_estimators,
max_samples=max_samples,
max_features=max_features,
max_rules=max_rules,
p_options=p_options,
model_type=model_type,
min_cq_combination=min_cq_combination,
min_cq_mu=min_cq_mu,
random_state=random_state)
self.param_names = ["n_estimators", "max_rules", "max_samples", "max_features", "model_type", "p_options", "random_state"]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=300), CustomRandint(low=1, high=20),
CustomUniform(), CustomUniform(), ["conjunction", "disjunction"], CustomUniform(), [random_state]]
self.weird_strings = {}
def set_params(self, p_options=[0.316], **kwargs):
if not isinstance(p_options, list):
p_options = [p_options]
kwargs["p_options"] = p_options
for parameter, value in iteritems(kwargs):
setattr(self, parameter, value)
return self
def get_interpretation(self, directory, base_file_name, y_test,
multi_class=False):
self.features_importance()
interpret_string = self.get_feature_importance(directory, base_file_name)
return interpret_string
from summit.multiview_platform.monoview_classifiers.additions.SVCClassifier import \
SVCClassifier
from sklearn.preprocessing import RobustScaler
from ..monoview.monoview_utils import BaseMonoviewClassifier
from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform
......@@ -17,11 +19,27 @@ class SVMRBF(SVCClassifier, BaseMonoviewClassifier):
Here, it is the RBF kernel version
"""
def __init__(self, random_state=None, C=1.0, **kwargs):
def __init__(self, random_state=None, C=1.0, scaler=None ,gamma="scale",**kwargs):
SVCClassifier.__init__(self,
C=C,
kernel='rbf',
gamma=gamma,
random_state=random_state
)
self.param_names = ["C", "random_state"]
self.distribs = [CustomUniform(loc=0, state=1), [random_state]]
self.scaler=scaler
self.param_names = ["C", "gamma", "scaler", "random_state"]
self.distribs = [CustomUniform(loc=0, state=1),
CustomUniform(loc=0, state=1),
[None, RobustScaler()],
[random_state]]
def fit(self, X, y, sample_weight=None):
if self.scaler is not None:
X = self.scaler.fit_transform(X)
return SVCClassifier.fit(self, X, y, sample_weight=sample_weight)
def predict(self, X):
if self.scaler is not None:
X = self.scaler.transform(X)
return SVCClassifier.predict(self, X)
......@@ -503,6 +503,7 @@ class HDF5Dataset(Dataset):
seleted labels' names
"""
selected_labels = self.get_labels(sample_indices)
if type(self.dataset["Labels"].attrs["names"][0]) == bytes:
return [label_name.decode("utf-8")
for label, label_name in
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment