Skip to content
Snippets Groups Projects
Commit e5fbb2f9 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added some clfs"

parent d0e487aa
No related branches found
No related tags found
No related merge requests found
Pipeline #11472 failed
...@@ -18,21 +18,22 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier): ...@@ -18,21 +18,22 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier):
""" """
def __init__(self, random_state=None, max_depth=None, def __init__(self, random_state=None, max_depth=None,
criterion='gini', splitter='best', **kwargs): criterion='gini', splitter='best', class_weight='balanced',
**kwargs):
DecisionTreeClassifier.__init__(self, DecisionTreeClassifier.__init__(self,
max_depth=max_depth, max_depth=max_depth,
criterion=criterion, criterion=criterion,
class_weight="balanced",
splitter=splitter, splitter=splitter,
class_weight=class_weight,
random_state=random_state random_state=random_state
) )
self.param_names = ["max_depth", "criterion", "splitter", self.param_names = ["max_depth", "criterion", "splitter", "class_weight",
'random_state'] 'random_state']
self.classed_params = [] self.classed_params = []
self.distribs = [CustomRandint(low=1, high=300), self.distribs = [CustomRandint(low=1, high=300),
["gini", "entropy"], ["gini", "entropy"],
["best", "random"], [random_state]] ["best", "random"],["balanced", None] ,[random_state]]
self.weird_strings = {} self.weird_strings = {}
def get_interpretation(self, directory, base_file_name, y_test, feature_ids, def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
......
import os
import time
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from .. import metrics
from ..monoview.monoview_utils import BaseMonoviewClassifier, get_accuracy_graph
from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint
from ..monoview_classifiers.decision_tree import DecisionTree
from ..utils.base import base_boosting_estimators
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "REPboost"
class RepDT(DecisionTree):
def fit(self, X, y, sample_weight=None, check_input=True):
if sample_weight is not None:
new_X, new_y = self.fake_repetitions(X, y, sample_weight,
precision=5)
else:
new_X = X
new_y = y
DecisionTree.fit(self, new_X, new_y)
return self
def fake_repetitions(self, X, y, sample_weight, precision=3):
sample_repetitions = (np.round(sample_weight,
precision) * 10 ** precision).astype(
np.int64)
for ind, sample_rep in enumerate(sample_repetitions):
if sample_rep == 0:
sample_repetitions[ind] = 1
gcd = np.gcd.reduce(sample_repetitions)
sample_repetitions = (sample_repetitions / gcd).astype(np.int64)
new_X = np.zeros(
(X.shape[0]+ int(np.sum(sample_repetitions)-len(sample_repetitions)), X.shape[1]))
new_y = np.zeros(X.shape[0]+ int(np.sum(sample_repetitions)-len(sample_repetitions)))
ind = 0
for sample_index, (sample_rep, sample, label) in enumerate(
zip(sample_repetitions, X, y)):
new_X[ind:ind + sample_rep, :] = sample
new_y[ind:ind + sample_rep] = label
ind += sample_rep
return new_X, new_y
class REPboost(AdaBoostClassifier, BaseMonoviewClassifier):
"""
This class is an adaptation of scikit-learn's `AdaBoostClassifier <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html#sklearn.ensemble.AdaBoostClassifier>`_
"""
def __init__(self, random_state=None, n_estimators=50,
base_estimator=RepDT(max_depth=1),
base_estimator_config=None, **kwargs):
# base_estimator = BaseMonoviewClassifier.get_base_estimator(self,
# base_estimator,
# base_estimator_config)
AdaBoostClassifier.__init__(self,
random_state=random_state,
n_estimators=n_estimators,
base_estimator=base_estimator,
algorithm="SAMME"
)
self.param_names = ["n_estimators",]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=100),]
self.weird_strings = {}
self.plotted_metric = metrics.zero_one_loss
self.plotted_metric_name = "zero_one_loss"
self.base_estimator_config = base_estimator_config
self.step_predictions = None
def fit(self, X, y, sample_weight=None):
begin = time.time()
AdaBoostClassifier.fit(self, X, y)
end = time.time()
self.train_time = end - begin
self.train_shape = X.shape
self.base_predictions = np.array(
[estim.predict(X) for estim in self.estimators_])
self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in
self.staged_predict(X)])
return self
def predict(self, X):
begin = time.time()
pred = AdaBoostClassifier.predict(self, X)
end = time.time()
self.pred_time = end - begin
self.step_predictions = np.array(
[step_pred for step_pred in self.staged_predict(X)])
return pred
def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
multi_class=False): # pragma: no cover
interpretString = ""
# interpretString += self.get_feature_importance(directory,
# base_file_name,
# feature_ids)
# interpretString += "\n\n Estimator error | Estimator weight\n"
# interpretString += "\n".join(
# [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
# error, weight in
# zip(self.estimator_errors_, self.estimator_weights_)])
# step_test_metrics = np.array(
# [self.plotted_metric.score(y_test, step_pred) for step_pred in
# self.step_predictions])
# get_accuracy_graph(step_test_metrics, "Adaboost",
# os.path.join(directory,
# base_file_name + "test_metrics.png"),
# self.plotted_metric_name, set="test")
# np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
# step_test_metrics,
# delimiter=',')
# np.savetxt(
# os.path.join(directory, base_file_name + "train_metrics.csv"),
# self.metrics, delimiter=',')
# np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
# np.array([self.train_time, self.pred_time]), delimiter=',')
return interpretString
...@@ -61,7 +61,7 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier): ...@@ -61,7 +61,7 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier):
"relevance", "relevance",
"distance", "distance",
"train_weighting", "b", "pred_train", "normalizer", "train_weighting", "b", "pred_train", "normalizer",
"normalize_dists", "a", "class_weight"] "normalize_dists", "a", "class_weight", "base_estimator"]
self.distribs = [CustomRandint(low=1, high=70), self.distribs = [CustomRandint(low=1, high=70),
[ExpRelevance()], [ExpRelevance()],
[EuclidianDist(), PolarDist(), ExpEuclidianDist(), Jaccard()], [EuclidianDist(), PolarDist(), ExpEuclidianDist(), Jaccard()],
...@@ -70,7 +70,8 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier): ...@@ -70,7 +70,8 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier):
[True, False], [True, False],
[RobustScaler()], [RobustScaler()],
[True], CustomRandint(0, 10, 'e-'), [True], CustomRandint(0, 10, 'e-'),
["balanced", None]] ["balanced", None],
]
self.classed_params = [] self.classed_params = []
self.weird_strings = {} self.weird_strings = {}
......
...@@ -63,9 +63,14 @@ class SCM(scm, BaseMonoviewClassifier): ...@@ -63,9 +63,14 @@ class SCM(scm, BaseMonoviewClassifier):
self.classed_params = [] self.classed_params = []
self.weird_strings = {} self.weird_strings = {}
def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): def fit(self, X, y, tiebreaker=None, iteration_callback=None, sample_weight=None, **fit_params):
self.n_features = X.shape[1] if sample_weight is not None:
scm.fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params) new_X, new_y = self.fake_repetitions(X, y, sample_weight, precision=5)
else:
new_X = X
new_y = y
self.n_features = new_X.shape[1]
scm.fit(self, new_X, new_y, tiebreaker=None, iteration_callback=None, **fit_params)
self.feature_importances_ = np.zeros(self.n_features) self.feature_importances_ = np.zeros(self.n_features)
# sum the rules importances : # sum the rules importances :
# rules_importances = estim.get_rules_importances() #activate it when pyscm will implement importance # rules_importances = estim.get_rules_importances() #activate it when pyscm will implement importance
...@@ -76,6 +81,25 @@ class SCM(scm, BaseMonoviewClassifier): ...@@ -76,6 +81,25 @@ class SCM(scm, BaseMonoviewClassifier):
self.feature_importances_ /= np.sum(self.feature_importances_) self.feature_importances_ /= np.sum(self.feature_importances_)
return self return self
def fake_repetitions(self, X, y, sample_weight, precision=3):
sample_repetitions = (np.round(sample_weight, precision)*10**precision).astype(np.int64)
for ind, sample_rep in enumerate(sample_repetitions):
if sample_rep==0:
sample_repetitions[ind] = 1
gcd = np.gcd.reduce(sample_repetitions)
sample_repetitions = (sample_repetitions/gcd).astype(np.int64)
new_X = np.zeros((X.shape[0]+ int(np.sum(sample_repetitions)-len(sample_repetitions)), X.shape[1]))
new_y = np.zeros(X.shape[0]+ int(np.sum(sample_repetitions)-len(sample_repetitions)))
ind=0
for sample_index, (sample_rep, sample, label) in enumerate(zip(sample_repetitions, X, y)):
new_X[ind:ind+sample_rep, :] = sample
new_y[ind:ind+sample_rep] =label
ind+=sample_rep
return new_X, new_y
# def canProbas(self): # def canProbas(self):
# """ # """
# Used to know if the classifier can return label probabilities # Used to know if the classifier can return label probabilities
......
import os
import time
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from .. import metrics
from ..monoview.monoview_utils import BaseMonoviewClassifier, get_accuracy_graph
from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint, CustomUniform
from ..monoview_classifiers.scm import SCM
from ..utils.base import base_boosting_estimators
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "SCMboost"
class SCMboost(AdaBoostClassifier, BaseMonoviewClassifier):
"""
This class is an adaptation of scikit-learn's `AdaBoostClassifier <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html#sklearn.ensemble.AdaBoostClassifier>`_
"""
def __init__(self, random_state=None, n_estimators=50,
base_estimator=SCM(p=0.49, max_rules=1, model_type="disjunction"),
base_estimator_config=None, **kwargs):
if "base_estimator__p" in kwargs:
base_estimator.p = kwargs["base_estimator__p"]
AdaBoostClassifier.__init__(self,
random_state=random_state,
n_estimators=n_estimators,
base_estimator=base_estimator,
algorithm="SAMME",)
self.param_names = ["n_estimators", "base_estimator__p"]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=100), CustomUniform(loc=0, state=1)]
self.weird_strings = {}
self.plotted_metric = metrics.zero_one_loss
self.plotted_metric_name = "zero_one_loss"
self.base_estimator_config = base_estimator_config
self.step_predictions = None
def fit(self, X, y, sample_weight=None):
begin = time.time()
AdaBoostClassifier.fit(self, X, y)
end = time.time()
self.train_time = end - begin
self.train_shape = X.shape
self.base_predictions = np.array(
[estim.predict(X) for estim in self.estimators_])
self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in
self.staged_predict(X)])
return self
def predict(self, X):
begin = time.time()
pred = AdaBoostClassifier.predict(self, X)
end = time.time()
self.pred_time = end - begin
self.step_predictions = np.array(
[step_pred for step_pred in self.staged_predict(X)])
return pred
def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
multi_class=False): # pragma: no cover
interpretString = ""
# interpretString += self.get_feature_importance(directory,
# base_file_name,
# feature_ids)
# interpretString += "\n\n Estimator error | Estimator weight\n"
# interpretString += "\n".join(
# [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
# error, weight in
# zip(self.estimator_errors_, self.estimator_weights_)])
# step_test_metrics = np.array(
# [self.plotted_metric.score(y_test, step_pred) for step_pred in
# self.step_predictions])
# get_accuracy_graph(step_test_metrics, "Adaboost",
# os.path.join(directory,
# base_file_name + "test_metrics.png"),
# self.plotted_metric_name, set="test")
# np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
# step_test_metrics,
# delimiter=',')
# np.savetxt(
# os.path.join(directory, base_file_name + "train_metrics.csv"),
# self.metrics, delimiter=',')
# np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
# np.array([self.train_time, self.pred_time]), delimiter=',')
return interpretString
from .additions.early_fusion_from_monoview import BaseEarlyFusion
from ..utils.hyper_parameter_search import CustomUniform, CustomRandint
classifier_class_name = "EarlyFusionSVMRBF"
class EarlyFusionSVMRBF(BaseEarlyFusion):
def __init__(self, random_state=None, n_neighbors=5,
weights='uniform', algorithm='auto', p=2, **kwargs):
BaseEarlyFusion.__init__(self, random_state=random_state,
monoview_classifier="knn",n_neighbors=n_neighbors,
weights=weights,
algorithm=algorithm,
p=p, **kwargs)
self.param_names = ["n_neighbors", "weights", "algorithm", "p",
"random_state", ]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=10), ["uniform", "distance"],
["auto", "ball_tree", "kd_tree", "brute"], [1, 2],
[random_state]]
self.weird_strings = {}
self.random_state = random_state
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment