Skip to content
Snippets Groups Projects
Commit e5fbb2f9 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added some clfs"

parent d0e487aa
Branches
Tags
No related merge requests found
Pipeline #11472 failed
......@@ -18,21 +18,22 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier):
"""
def __init__(self, random_state=None, max_depth=None,
criterion='gini', splitter='best', **kwargs):
criterion='gini', splitter='best', class_weight='balanced',
**kwargs):
DecisionTreeClassifier.__init__(self,
max_depth=max_depth,
criterion=criterion,
class_weight="balanced",
splitter=splitter,
class_weight=class_weight,
random_state=random_state
)
self.param_names = ["max_depth", "criterion", "splitter",
self.param_names = ["max_depth", "criterion", "splitter", "class_weight",
'random_state']
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=300),
["gini", "entropy"],
["best", "random"], [random_state]]
["best", "random"],["balanced", None] ,[random_state]]
self.weird_strings = {}
def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
......
import os
import time
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from .. import metrics
from ..monoview.monoview_utils import BaseMonoviewClassifier, get_accuracy_graph
from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint
from ..monoview_classifiers.decision_tree import DecisionTree
from ..utils.base import base_boosting_estimators
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "REPboost"
class RepDT(DecisionTree):
def fit(self, X, y, sample_weight=None, check_input=True):
if sample_weight is not None:
new_X, new_y = self.fake_repetitions(X, y, sample_weight,
precision=5)
else:
new_X = X
new_y = y
DecisionTree.fit(self, new_X, new_y)
return self
def fake_repetitions(self, X, y, sample_weight, precision=3):
sample_repetitions = (np.round(sample_weight,
precision) * 10 ** precision).astype(
np.int64)
for ind, sample_rep in enumerate(sample_repetitions):
if sample_rep == 0:
sample_repetitions[ind] = 1
gcd = np.gcd.reduce(sample_repetitions)
sample_repetitions = (sample_repetitions / gcd).astype(np.int64)
new_X = np.zeros(
(X.shape[0]+ int(np.sum(sample_repetitions)-len(sample_repetitions)), X.shape[1]))
new_y = np.zeros(X.shape[0]+ int(np.sum(sample_repetitions)-len(sample_repetitions)))
ind = 0
for sample_index, (sample_rep, sample, label) in enumerate(
zip(sample_repetitions, X, y)):
new_X[ind:ind + sample_rep, :] = sample
new_y[ind:ind + sample_rep] = label
ind += sample_rep
return new_X, new_y
class REPboost(AdaBoostClassifier, BaseMonoviewClassifier):
"""
This class is an adaptation of scikit-learn's `AdaBoostClassifier <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html#sklearn.ensemble.AdaBoostClassifier>`_
"""
def __init__(self, random_state=None, n_estimators=50,
base_estimator=RepDT(max_depth=1),
base_estimator_config=None, **kwargs):
# base_estimator = BaseMonoviewClassifier.get_base_estimator(self,
# base_estimator,
# base_estimator_config)
AdaBoostClassifier.__init__(self,
random_state=random_state,
n_estimators=n_estimators,
base_estimator=base_estimator,
algorithm="SAMME"
)
self.param_names = ["n_estimators",]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=100),]
self.weird_strings = {}
self.plotted_metric = metrics.zero_one_loss
self.plotted_metric_name = "zero_one_loss"
self.base_estimator_config = base_estimator_config
self.step_predictions = None
def fit(self, X, y, sample_weight=None):
begin = time.time()
AdaBoostClassifier.fit(self, X, y)
end = time.time()
self.train_time = end - begin
self.train_shape = X.shape
self.base_predictions = np.array(
[estim.predict(X) for estim in self.estimators_])
self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in
self.staged_predict(X)])
return self
def predict(self, X):
begin = time.time()
pred = AdaBoostClassifier.predict(self, X)
end = time.time()
self.pred_time = end - begin
self.step_predictions = np.array(
[step_pred for step_pred in self.staged_predict(X)])
return pred
def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
multi_class=False): # pragma: no cover
interpretString = ""
# interpretString += self.get_feature_importance(directory,
# base_file_name,
# feature_ids)
# interpretString += "\n\n Estimator error | Estimator weight\n"
# interpretString += "\n".join(
# [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
# error, weight in
# zip(self.estimator_errors_, self.estimator_weights_)])
# step_test_metrics = np.array(
# [self.plotted_metric.score(y_test, step_pred) for step_pred in
# self.step_predictions])
# get_accuracy_graph(step_test_metrics, "Adaboost",
# os.path.join(directory,
# base_file_name + "test_metrics.png"),
# self.plotted_metric_name, set="test")
# np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
# step_test_metrics,
# delimiter=',')
# np.savetxt(
# os.path.join(directory, base_file_name + "train_metrics.csv"),
# self.metrics, delimiter=',')
# np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
# np.array([self.train_time, self.pred_time]), delimiter=',')
return interpretString
......@@ -61,7 +61,7 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier):
"relevance",
"distance",
"train_weighting", "b", "pred_train", "normalizer",
"normalize_dists", "a", "class_weight"]
"normalize_dists", "a", "class_weight", "base_estimator"]
self.distribs = [CustomRandint(low=1, high=70),
[ExpRelevance()],
[EuclidianDist(), PolarDist(), ExpEuclidianDist(), Jaccard()],
......@@ -70,7 +70,8 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier):
[True, False],
[RobustScaler()],
[True], CustomRandint(0, 10, 'e-'),
["balanced", None]]
["balanced", None],
]
self.classed_params = []
self.weird_strings = {}
......
......@@ -63,9 +63,14 @@ class SCM(scm, BaseMonoviewClassifier):
self.classed_params = []
self.weird_strings = {}
def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params):
self.n_features = X.shape[1]
scm.fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params)
def fit(self, X, y, tiebreaker=None, iteration_callback=None, sample_weight=None, **fit_params):
if sample_weight is not None:
new_X, new_y = self.fake_repetitions(X, y, sample_weight, precision=5)
else:
new_X = X
new_y = y
self.n_features = new_X.shape[1]
scm.fit(self, new_X, new_y, tiebreaker=None, iteration_callback=None, **fit_params)
self.feature_importances_ = np.zeros(self.n_features)
# sum the rules importances :
# rules_importances = estim.get_rules_importances() #activate it when pyscm will implement importance
......@@ -76,6 +81,25 @@ class SCM(scm, BaseMonoviewClassifier):
self.feature_importances_ /= np.sum(self.feature_importances_)
return self
def fake_repetitions(self, X, y, sample_weight, precision=3):
sample_repetitions = (np.round(sample_weight, precision)*10**precision).astype(np.int64)
for ind, sample_rep in enumerate(sample_repetitions):
if sample_rep==0:
sample_repetitions[ind] = 1
gcd = np.gcd.reduce(sample_repetitions)
sample_repetitions = (sample_repetitions/gcd).astype(np.int64)
new_X = np.zeros((X.shape[0]+ int(np.sum(sample_repetitions)-len(sample_repetitions)), X.shape[1]))
new_y = np.zeros(X.shape[0]+ int(np.sum(sample_repetitions)-len(sample_repetitions)))
ind=0
for sample_index, (sample_rep, sample, label) in enumerate(zip(sample_repetitions, X, y)):
new_X[ind:ind+sample_rep, :] = sample
new_y[ind:ind+sample_rep] =label
ind+=sample_rep
return new_X, new_y
# def canProbas(self):
# """
# Used to know if the classifier can return label probabilities
......
import os
import time
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from .. import metrics
from ..monoview.monoview_utils import BaseMonoviewClassifier, get_accuracy_graph
from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint, CustomUniform
from ..monoview_classifiers.scm import SCM
from ..utils.base import base_boosting_estimators
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "SCMboost"
class SCMboost(AdaBoostClassifier, BaseMonoviewClassifier):
"""
This class is an adaptation of scikit-learn's `AdaBoostClassifier <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html#sklearn.ensemble.AdaBoostClassifier>`_
"""
def __init__(self, random_state=None, n_estimators=50,
base_estimator=SCM(p=0.49, max_rules=1, model_type="disjunction"),
base_estimator_config=None, **kwargs):
if "base_estimator__p" in kwargs:
base_estimator.p = kwargs["base_estimator__p"]
AdaBoostClassifier.__init__(self,
random_state=random_state,
n_estimators=n_estimators,
base_estimator=base_estimator,
algorithm="SAMME",)
self.param_names = ["n_estimators", "base_estimator__p"]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=100), CustomUniform(loc=0, state=1)]
self.weird_strings = {}
self.plotted_metric = metrics.zero_one_loss
self.plotted_metric_name = "zero_one_loss"
self.base_estimator_config = base_estimator_config
self.step_predictions = None
def fit(self, X, y, sample_weight=None):
begin = time.time()
AdaBoostClassifier.fit(self, X, y)
end = time.time()
self.train_time = end - begin
self.train_shape = X.shape
self.base_predictions = np.array(
[estim.predict(X) for estim in self.estimators_])
self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in
self.staged_predict(X)])
return self
def predict(self, X):
begin = time.time()
pred = AdaBoostClassifier.predict(self, X)
end = time.time()
self.pred_time = end - begin
self.step_predictions = np.array(
[step_pred for step_pred in self.staged_predict(X)])
return pred
def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
multi_class=False): # pragma: no cover
interpretString = ""
# interpretString += self.get_feature_importance(directory,
# base_file_name,
# feature_ids)
# interpretString += "\n\n Estimator error | Estimator weight\n"
# interpretString += "\n".join(
# [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
# error, weight in
# zip(self.estimator_errors_, self.estimator_weights_)])
# step_test_metrics = np.array(
# [self.plotted_metric.score(y_test, step_pred) for step_pred in
# self.step_predictions])
# get_accuracy_graph(step_test_metrics, "Adaboost",
# os.path.join(directory,
# base_file_name + "test_metrics.png"),
# self.plotted_metric_name, set="test")
# np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
# step_test_metrics,
# delimiter=',')
# np.savetxt(
# os.path.join(directory, base_file_name + "train_metrics.csv"),
# self.metrics, delimiter=',')
# np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
# np.array([self.train_time, self.pred_time]), delimiter=',')
return interpretString
from .additions.early_fusion_from_monoview import BaseEarlyFusion
from ..utils.hyper_parameter_search import CustomUniform, CustomRandint
classifier_class_name = "EarlyFusionSVMRBF"
class EarlyFusionSVMRBF(BaseEarlyFusion):
def __init__(self, random_state=None, n_neighbors=5,
weights='uniform', algorithm='auto', p=2, **kwargs):
BaseEarlyFusion.__init__(self, random_state=random_state,
monoview_classifier="knn",n_neighbors=n_neighbors,
weights=weights,
algorithm=algorithm,
p=p, **kwargs)
self.param_names = ["n_neighbors", "weights", "algorithm", "p",
"random_state", ]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=10), ["uniform", "distance"],
["auto", "ball_tree", "kd_tree", "brute"], [1, 2],
[random_state]]
self.weird_strings = {}
self.random_state = random_state
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment