Skip to content
Snippets Groups Projects
Commit 20b78ad2 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added algorithms

parent 427049b8
No related branches found
No related tags found
No related merge requests found
Showing
with 813 additions and 640 deletions
# The base configuration of the benchmark
log: True
name: ["demo"]
name: ["test_boules"]
label: "_1_3"
file_type: ".hdf5"
views:
pathf: "/home/baptiste/Documents/Datasets/Generated/"
pathf: "/home/baptiste/Documents/Clouded/short_projects/latent_space_study/"
nice: 0
random_state: 42
nb_cores: 1
......@@ -17,23 +17,74 @@ track_tracebacks: False
# All the classification-realted configuration options
multiclass_method: "oneVersusOne"
split: 0.75
split: 0.10
nb_folds: 5
nb_class: 2
classes: ['label_1', 'label_3']
nb_class: 4
classes:
type: ["multiview","monoview"]
algos_monoview: ["cb_boost",]
algos_multiview: ["multiview_cbound_boosting"]
algos_monoview: ["cb_boost", "decision_tree", 'random_forest']
algos_multiview: ["mv_cb_boost", "weighted_linear_late_fusion","weighted_linear_early_fusion","mumbo" ]
stats_iter: 5
metrics:
accuracy_score: {}
f1_score:
average: 'binary'
average: 'micro'
metric_princ: "accuracy_score"
hps_type: "None"
hps_args: {}
hps_type: "Random"
hps_args:
n_iter: 10
equivalent_draws: True
cb_boost:
n_stumps: 10
multiview_cbound_boosting:
n_stumps: 10
\ No newline at end of file
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
decision_tree:
max_depth: 2
mumbo:
base_estimator: decision_tree
base_estimator__max_depth: 1
n_estimators: 80
mv_cb_boost:
n_max_iterations: 80
n_stumps: 30
estimators_generator: "Trees"
max_depth: 1
pb_mv_boost:
num_iterations: 20
decision_tree_depth: 1
weighted_linear_early_fusion:
monoview_classifier_name: "cb_boost"
monoview_classifier_config:
cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
weighted_linear_late_fusion:
classifiers_names: ["cb_boost", "cb_boost", "cb_boost", "cb_boost"]
classifier_configs:
- cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
- cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
- cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
- cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
__version__ = "0.0.0.0"
__url__ = "https://gitlab.lis-lab.fr/baptiste.bauvin/summit"
from . import multiview_platform, execute
......@@ -129,6 +129,7 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1])))
for i in
range(self.estimator_errors_.shape[0])])
return self
# def canProbas(self):
# """
......
......@@ -140,6 +140,18 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
self.feature_importances_ /= np.sum(self.feature_importances_)
return self
def predict_proba(self, X):
start = time.time()
check_is_fitted(self, 'weights_')
if scipy.sparse.issparse(X):
logging.warning('Converting sparse matrix to dense matrix.')
X = np.array(X.todense())
classification_matrix = self._binary_classification_matrix(X)
margins = np.sum(classification_matrix * self.weights_, axis=1)
proba = np.array([np.array([(1 - vote)/2, (1 + vote)/2]) for vote in margins])
return proba
def predict(self, X):
start = time.time()
check_is_fitted(self, 'weights_')
......
......@@ -52,6 +52,7 @@ class Bagging(BaggingClassifier, BaseMonoviewClassifier,):
end = time.time()
self.train_time = end - begin
self.train_shape = X.shape
return self
def predict(self, X):
......
......@@ -64,6 +64,7 @@ class BaggingPregen(BaggingClassifier, BaseMonoviewClassifier,
end = time.time()
self.train_time = end - begin
self.train_shape = pregen_X.shape
return self
......
......@@ -63,6 +63,7 @@ class GradientBoostingPregen(GradientBoostingClassifier, BaseMonoviewClassifier,
# self.base_predictions = np.array(
# [change_label_to_zero(estim.predict(pregen_X)) for estim in
# self.estimators_])
return self
......
......@@ -62,7 +62,7 @@ class RandomForestPregen(RandomForestClassifier, BaseMonoviewClassifier,
self.base_predictions = np.array(
[change_label_to_zero(estim.predict(pregen_X)) for estim in
self.estimators_])
return self
def predict(self, X):
......
......@@ -74,6 +74,7 @@ class SCM(scm, BaseMonoviewClassifier):
for rule, importance in zip(self.model_.rules, rules_importances):
self.feature_importances_[rule.feature_idx] += importance
self.feature_importances_ /= np.sum(self.feature_importances_)
return self
# def canProbas(self):
# """
......
......@@ -69,7 +69,7 @@ class ScmBagging(ScmBaggingClassifier, BaseMonoviewClassifier):
"""
def __init__(self,
n_estimators=10,
n_estimators=50,
max_samples=1.0,
max_features=1.0,
max_rules=10,
......
from .scm_bagging import ScmBagging
from ..utils.hyper_parameter_search import CustomUniform, CustomRandint
from scm_bagging.scm_bagging_classifier import ScmBaggingClassifier
from ..monoview.monoview_utils import BaseMonoviewClassifier
from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform, CustomRandint
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "ScmBaggingMinCq"
class ScmBaggingMinCq(ScmBagging):
import numpy as np
from six import iteritems
MAX_INT = np.iinfo(np.int32).max
class ScmBaggingMinCq(ScmBaggingClassifier, BaseMonoviewClassifier):
"""A Bagging classifier. for SetCoveringMachineClassifier()
The base estimators are built on subsets of both samples
and features.
Parameters
----------
n_estimators : int, default=10
The number of base estimators in the ensemble.
max_samples : int or float, default=1.0
The number of samples to draw from X to train each base estimator with
replacement.
- If int, then draw `max_samples` samples.
- If float, then draw `max_samples * X.shape[0]` samples.
max_features : int or float, default=1.0
The number of features to draw from X to train each base estimator (
without replacement.
- If int, then draw `max_features` features.
- If float, then draw `max_features * X.shape[1]` features.
p_options : list of float with len =< n_estimators, default=[1.0]
The estimators will be fitted with values of p found in p_options
let k be k = n_estimators/len(p_options),
the k first estimators will have p=p_options[0],
the next k estimators will have p=p_options[1] and so on...
random_state : int or RandomState, default=None
Controls the random resampling of the original dataset
(sample wise and feature wise).
If the base estimator accepts a `random_state` attribute, a different
seed is generated for each instance in the ensemble.
Pass an int for reproducible output across multiple function calls.
See :term:`Glossary <random_state>`.
Attributes
----------
n_features_ : int
The number of features when :meth:`fit` is performed.
estimators_ : list of estimators
The collection of fitted base estimators.
estim_features : list of arrays
The subset of drawn features for each base estimator.
Examples
--------
>>> @TODO
References
----------
.. [1] L. Breiman, "Pasting small votes for classification in large
databases and on-line", Machine Learning, 36(1), 85-103, 1999.
.. [2] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine
Learning and Knowledge Discovery in Databases, 346-361, 2012.
"""
def __init__(self,
n_estimators=10,
n_estimators=50,
max_samples=1.0,
max_features=1.0,
max_rules=10,
......@@ -14,7 +78,9 @@ class ScmBaggingMinCq(ScmBagging):
min_cq_combination=True,
min_cq_mu=10e-3,
random_state=None):
ScmBagging.__init__(self, n_estimators=n_estimators,
if isinstance(p_options, float):
p_options = [p_options]
ScmBaggingClassifier.__init__(self, n_estimators=n_estimators,
max_samples=max_samples,
max_features=max_features,
max_rules=max_rules,
......@@ -23,5 +89,22 @@ class ScmBaggingMinCq(ScmBagging):
min_cq_combination=min_cq_combination,
min_cq_mu=min_cq_mu,
random_state=random_state)
self.param_names.append("min_cq_mu")
self.distribs.append(CustomRandint(1,7, multiplier='e-'))
\ No newline at end of file
self.param_names = ["n_estimators", "max_rules", "max_samples", "max_features", "model_type", "p_options", "random_state"]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=300), CustomRandint(low=1, high=20),
CustomUniform(), CustomUniform(), ["conjunction", "disjunction"], CustomUniform(), [random_state]]
self.weird_strings = {}
def set_params(self, p_options=[0.316], **kwargs):
if not isinstance(p_options, list):
p_options = [p_options]
kwargs["p_options"] = p_options
for parameter, value in iteritems(kwargs):
setattr(self, parameter, value)
return self
def get_interpretation(self, directory, base_file_name, y_test,
multi_class=False):
self.features_importance()
interpret_string = self.get_feature_importance(directory, base_file_name)
return interpret_string
......@@ -34,6 +34,7 @@ class DecisionStumpSCMNew(BaseMonoviewClassifier):
print(self.model_type)
self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state)
self.clf.fit(X=X, y=y)
return self
def predict(self, X):
return self.clf.predict(X)
......
from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
classifier_class_name = "MVCBBoost"
class MVCBBoost(MultiviewCBoundBoostingAdapt):
def __init__(self, n_estimators=100,
random_state=42,
self_complemented=True,
twice_the_same=False,
random_start=False,
n_stumps=10,
c_bound_sol=True,
base_estimator="Trees",
max_depth=1,
mincq_tracking=False,
weight_add=3,
weight_strategy="c_bound_based_dec",
weight_update="multiplicative",
full_combination=False,
min_cq_pred=False,
min_cq_mu=10e-3,
sig_mult=15,
sig_offset=5,
use_previous_voters=False, **kwargs):
MultiviewCBoundBoostingAdapt.__init__(self, n_estimators=n_estimators, random_state=random_state,
self_complemented=self_complemented, twice_the_same=twice_the_same,
random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol, max_depth=max_depth,
base_estimator=base_estimator, mincq_tracking=mincq_tracking,
weight_add=weight_add, weight_strategy=weight_strategy,
weight_update=weight_update, use_previous_voters=use_previous_voters,
full_combination=full_combination,
min_cq_pred=min_cq_pred, min_cq_mu=min_cq_mu,
sig_mult=sig_mult, sig_offset=sig_offset, **kwargs)
# self.param_names+=["weight_update", "weight_strategy"]
# self.distribs+=[["multiplicative", "additive", "replacement"],["c_bound_based_broken", "c_bound_based", "c_bound_based_dec", "sigmoid"]]
\ No newline at end of file
from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
classifier_class_name = "MVCBBoostBroken"
class MVCBBoostBroken(MultiviewCBoundBoostingAdapt):
def __init__(self, n_max_iterations=100, random_state=None,
self_complemented=True, twice_the_same=False,
random_start=False, n_stumps=1, c_bound_sol=True,
estimators_generator="Stumps", mincq_tracking=False,
weight_add=3, weight_strategy="c_bound_based",
weight_update="multiplicative", **kwargs):
MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
self_complemented=self_complemented, twice_the_same=twice_the_same,
random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
weight_add=weight_add, weight_strategy=weight_strategy,
weight_update=weight_update, **kwargs)
\ No newline at end of file
from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
classifier_class_name = "MVCBBoostBroken"
class MVCBBoostBroken(MultiviewCBoundBoostingAdapt):
def __init__(self, n_max_iterations=10, random_state=None,
self_complemented=True, twice_the_same=False,
random_start=False, n_stumps=1, c_bound_sol=True,
estimators_generator="Stumps", mincq_tracking=False,
weight_add=3, weight_strategy="c_bound_based_broken",
weight_update="multiplicative", **kwargs):
MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
self_complemented=self_complemented, twice_the_same=twice_the_same,
random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
weight_add=weight_add, weight_strategy=weight_strategy,
weight_update=weight_update, **kwargs)
\ No newline at end of file
from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
classifier_class_name = "MVCBBoostFull"
class MVCBBoostFull(MultiviewCBoundBoostingAdapt):
def __init__(self, n_max_iterations=10, random_state=None,
self_complemented=True, twice_the_same=False,
random_start=False, n_stumps=1, c_bound_sol=True,
estimators_generator="Stumps", mincq_tracking=False,
weight_add=3, weight_strategy="c_bound_based_dec",
weight_update="multiplicative", full_combination=True, **kwargs):
MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
self_complemented=self_complemented, twice_the_same=twice_the_same,
random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
weight_add=weight_add, weight_strategy=weight_strategy,
weight_update=weight_update, full_combination=full_combination, **kwargs)
from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
classifier_class_name = "MVCBBoostMinCQ"
class MVCBBoostMinCQ(MultiviewCBoundBoostingAdapt):
def __init__(self, n_max_iterations=10, random_state=None,
self_complemented=True, twice_the_same=False,
random_start=False, n_stumps=1, c_bound_sol=True,
estimators_generator="Stumps", mincq_tracking=False,
weight_add=3, weight_strategy="c_bound_based_dec",
weight_update="multiplicative", full_combination=False, min_cq_pred=True, **kwargs):
MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
self_complemented=self_complemented, twice_the_same=twice_the_same,
random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
weight_add=weight_add, weight_strategy=weight_strategy,
weight_update=weight_update, full_combination=full_combination, min_cq_pred=min_cq_pred, **kwargs)
from .additions.mv_cb_boost_adapt import MultiviewCBoundBoostingAdapt
classifier_class_name = "MVCBBoostSig"
class MVCBBoostSig(MultiviewCBoundBoostingAdapt):
def __init__(self, n_max_iterations=100, random_state=None,
self_complemented=True, twice_the_same=False,
random_start=False, n_stumps=1, c_bound_sol=True,
estimators_generator="Stumps", mincq_tracking=False,
weight_add=3, weight_strategy="c_bound_based_broken",
weight_update="multiplicative", use_previous_voters=True,
**kwargs):
MultiviewCBoundBoostingAdapt.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state,
self_complemented=self_complemented, twice_the_same=twice_the_same,
random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol,
estimators_generator=estimators_generator, mincq_tracking=mincq_tracking,
weight_add=weight_add, weight_strategy=weight_strategy,
weight_update=weight_update, use_previous_voters=use_previous_voters,
**kwargs)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment