Skip to content
Snippets Groups Projects
Commit ceb2362a authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Test SCMpregen graalpy

parent e4a7febd
No related branches found
No related tags found
No related merge requests found
......@@ -92,6 +92,40 @@ class DecisionStumpClassifier(BaseEstimator, ClassifierMixin):
return probas
def predict_proba_t(self, X):
"""Compute probabilities of possible outcomes for samples in X.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
-------
avg : array-like, shape = [n_samples, n_classes]
Weighted average probability for each class per sample.
"""
try:
print('plouf')
print(X)
print("plaf")
except:
X=np.ones(X.shape)
check_is_fitted(self, 'classes_')
X = np.asarray(X)
probas = np.zeros((X.shape[0], 2))
positive_class = np.argwhere(X[:, self.attribute_index] > self.threshold)
negative_class = np.setdiff1d(range(X.shape[0]), positive_class)
probas[positive_class, 1] = 1.0
probas[negative_class, 0] = 1.0
if self.direction == -1:
probas = 1 - probas
return probas
def reverse_decision(self):
self.direction *= -1
......@@ -170,7 +204,7 @@ class StumpsClassifiersGenerator(ClassifiersGenerator):
Whether or not a binary complement voter must be generated for each voter. Defaults to False.
"""
def __init__(self, n_stumps_per_attribute=10, self_complemented=False, check_diff=True):
def __init__(self, n_stumps_per_attribute=10, self_complemented=False, check_diff=False):
super(StumpsClassifiersGenerator, self).__init__(self_complemented)
self.n_stumps_per_attribute = n_stumps_per_attribute
self.check_diff = check_diff
......
......@@ -14,7 +14,7 @@ from ... import Metrics
class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
def __init__(self, mu=0.01, epsilon=1e-06, n_max_iterations=None, estimators_generator=None, dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, random_state=None):
def __init__(self, mu=0.01, epsilon=1e-06, n_max_iterations=100, estimators_generator=None, dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, random_state=None):
super(ColumnGenerationClassifier, self).__init__()
self.epsilon = epsilon
self.n_max_iterations = n_max_iterations
......@@ -53,6 +53,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
self.initialize()
self.train_metrics = []
self.gammas = []
self.list_weights= []
self.bounds = []
self.previous_votes = []
# w = [0.5,0.5]
......@@ -79,6 +80,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
w, alpha = self._restricted_master_problem(previous_w=w, previous_alpha=alpha)
cbound = self.compute_empiric_cbound(w, y_kernel_matrix)
self.c_bounds.append(cbound)
self.list_weights.append(w)
self.update_values(h_values, worst_h_index, alpha, w)
......@@ -126,7 +128,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
self.step_decisions = np.zeros(classification_matrix.shape)
self.step_prod = np.zeros(classification_matrix.shape)
for weight_index in range(self.weights_.shape[0]-1):
margins = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1)
margins = np.sum(classification_matrix[:, :weight_index+1]* self.list_weights[weight_index], axis=1)
signs_array = np.array([int(x) for x in sign(margins)])
signs_array[signs_array == -1] = 0
self.step_decisions[:, weight_index] = signs_array
......
......@@ -52,7 +52,6 @@ class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost):
if self.estimators_generator is None:
self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps, self_complemented=self.self_complemented)
# Step 1: We fit the classifiers generator and get its classification matrix.
self.estimators_generator.fit(X, y_neg)
# hint: This is equivalent to construct a new X
......
......@@ -22,7 +22,7 @@ from sklearn.preprocessing import LabelEncoder
from ..Monoview.Additions.BoostUtils import ConvexProgram, StumpsClassifiersGenerator
from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomUniform
from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomUniform, change_label_to_zero, change_label_to_minus
from ..Metrics import zero_one_loss
# logger = logging.getLogger('MinCq')
......@@ -71,7 +71,7 @@ class MinCqClassifier(VotingClassifier):
# Validations
assert 0 < self.mu <= 1, "MinCqClassifier: mu parameter must be in (0, 1]"
assert xor(bool(self.estimators_generator), bool(self.estimators)), "MinCqClassifier: exactly one of estimator_generator or estimators must be used."
X, y = check_X_y(X, y)
X, y = check_X_y(X, change_label_to_minus(y))
# Fit the estimators using VotingClassifier's fit method. This will also fit a LabelEncoder that can be
# used to "normalize" labels (0, 1, 2, ...). In the case of binary classification, the two classes will be 0 and 1.
......@@ -98,36 +98,17 @@ class MinCqClassifier(VotingClassifier):
self.estimators = [('ds{}'.format(i), estimator) for i, estimator in enumerate(self.estimators_generator.estimators_)]
super().fit(X, y)
# We clean the estimators attribute (as we do not want it to be cloned later)
# self.estimators_ = []
# logger.info("Training started...")
# logger.info("Training dataset shape: {}".format(str(np.shape(X))))
# logger.info("Number of voters: {}".format(len(self.estimators_)))
# Preparation and resolution of the quadratic program
# logger.info("Preparing and solving QP...")
self.weights = self._solve(X, y)
if self.clean_me:
self.estimators = []
# print(self.weights.shape)
# print(np.unique(self.weights)[0:10])
# import pdb;pdb.set_trace()
self.train_cbound = 1 - (1.0/X.shape[0])*(np.sum(np.multiply(change_label_to_minus(y), np.average(self._binary_classification_matrix(X), axis=1, weights=self.weights)))**2)/(np.sum(np.average(self._binary_classification_matrix(X), axis=1, weights=self.weights)**2))
return self
# def evaluate_metrics(self, X, y, metrics_list=None, functions_list=None):
# if metrics_list is None:
# metrics_list = [zero_one_loss]
#
# if functions_list is None:
# functions_list = []
# else:
# raise NotImplementedError
#
# # Predict, evaluate metrics.
# predictions = self.predict(X)
# metrics_results = {metric.__name__: metric(y, predictions) for metric in metrics_list}
#
# metrics_dataframe = ResultsDataFrame([metrics_results])
# return metrics_dataframe
def _binary_classification_matrix(self, X):
probas = self.transform(X)
predicted_labels = np.argmax(probas, axis=2)
......@@ -149,7 +130,7 @@ class MinCqClassifier(VotingClassifier):
pred = super().predict(X)
if self.clean_me:
self.estimators = []
return pred
return change_label_to_zero(pred)
def _solve(self, X, y):
y = self.le_.transform(y)
......@@ -294,6 +275,7 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier):
# Keep learning information for further use.
self.learner_info_ = {}
print(np.unique(weights))
# We count the number of non-zero weights, including the implicit voters.
# TODO: Verify how we define non-zero weights here, could be if the weight is near 1/2n.
......@@ -303,35 +285,8 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier):
# Conversion of the weights of the n first voters to weights on the implicit 2n voters.
# See Section 7.1 of [2] for an explanation.
return np.array([2 * q - 1.0 / len(self.estimators_) for q in weights])
# def evaluate_metrics(self, X, y, metrics_list=None, functions_list=None):
# if metrics_list is None:
# metrics_list = [zero_one_loss]
#
# if functions_list is None:
# functions_list = []
#
# # Transductive setting: we only predict the X for labeled y
# if isinstance(y, np.ma.MaskedArray):
# labeled = np.where(np.logical_not(y.mask))[0]
# X = np.array(X[labeled])
# y = np.array(y[labeled])
#
# # Predict, evaluate metrics.
# predictions = self.predict(X)
# metrics_results = {metric.__name__: metric(y, predictions) for metric in metrics_list}
#
# # TODO: Repair in the case of non-{-1, 1} labels.
# assert set(y) == {-1, 1}
# classification_matrix = self._binary_classification_matrix(X)
#
# for function in functions_list:
# metrics_results[function.__name__] = function(classification_matrix, y, self.weights)
#
# metrics_dataframe = ResultsDataFrame([metrics_results])
# return metrics_dataframe
# return np.array([2 * q - 1.0 / len(self.estimators_) for q in weights])
return np.array(weights)
def build_laplacian(X, n_neighbors=None):
clf = SpectralEmbedding(n_neighbors=n_neighbors)
......@@ -373,7 +328,7 @@ class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier):
return {"random_state":self.random_state, "mu":self.mu, "n_stumps_per_attribute":self.n_stumps_per_attribute}
def getInterpret(self, directory, y_test):
interpret_string = ""
interpret_string = "Cbound on train :"+str(self.train_cbound)
# interpret_string += "Train C_bound value : "+str(self.cbound_train)
# y_rework = np.copy(y_test)
# y_rework[np.where(y_rework==0)] = -1
......
from sklearn.externals.six import iteritems
from pyscm.scm import SetCoveringMachineClassifier as scm
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero
from ..Monoview.Additions.BoostUtils import StumpsClassifiersGenerator, BaseBoost
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
class SCMPregen(scm, BaseMonoviewClassifier, BaseBoost):
def __init__(self, random_state=None, model_type="conjunction",
max_rules=10, p=0.1, n_stumps=10,self_complemented=True, **kwargs):
super(SCMPregen, self).__init__(
random_state=random_state,
model_type=model_type,
max_rules=max_rules,
p=p
)
self.param_names = ["model_type", "max_rules", "p", "n_stumps", "random_state"]
self.distribs = [["conjunction", "disjunction"],
CustomRandint(low=1, high=15),
CustomUniform(loc=0, state=1), [n_stumps], [random_state]]
self.classed_params = []
self.weird_strings = {}
self.self_complemented = self_complemented
self.n_stumps = n_stumps
self.estimators_generator = None
def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params):
pregen_X, pregen_y = self.pregen_voters(X, y)
super(SCMPregen, self).fit(pregen_X, pregen_y)
return self
def predict(self, X):
print('poul')
pregen_X, _ = self.pregen_voters(X)
print('from')
pred = super(SCMPregen, self).predict(pregen_X)
return pred
def pregen_voters(self, X, y=None):
if y is not None:
if self.estimators_generator is None:
self.estimators_generator = StumpsClassifiersGenerator(
n_stumps_per_attribute=self.n_stumps,
self_complemented=self.self_complemented)
self.estimators_generator.fit(X, y)
else:
neg_y=None
classification_matrix = self._binary_classification_matrix_t(X)
return classification_matrix, y
def _collect_probas_t(self, X):
print('jb')
for est in self.estimators_generator.estimators_:
print(type(est))
print(est.predict_proba_t(X))
print('ha')
return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_])
def _binary_classification_matrix_t(self, X):
probas = self._collect_probas_t(X)
predicted_labels = np.argmax(probas, axis=2)
predicted_labels[predicted_labels == 0] = -1
values = np.max(probas, axis=2)
return (predicted_labels * values).T
def canProbas(self):
"""Used to know if the classifier can return label probabilities"""
return True
def getInterpret(self, directory, y_test):
interpretString = "Model used : " + str(self.model_)
return interpretString
def formatCmdArgs(args):
"""Used to format kwargs for the parsed args"""
kwargsDict = {"model_type": args.SCP_model_type,
"p": args.SCP_p,
"max_rules": args.SCP_max_rules,
"n_stumps": args.SCP_stumps}
return kwargsDict
def paramsToSet(nIter, randomState):
paramsSet = []
for _ in range(nIter):
paramsSet.append({"model_type": randomState.choice(["conjunction", "disjunction"]),
"max_rules": randomState.randint(1, 15),
"p": randomState.random_sample()})
return paramsSet
......@@ -178,6 +178,20 @@ def parseTheArgs(arguments):
groupSCM.add_argument('--SCM_model_type', metavar='STRING', action='store',
help='Max number of rules for SCM', default="conjunction")
groupSCMPregen = parser.add_argument_group('SCMPregen arguments')
groupSCMPregen.add_argument('--SCP_max_rules', metavar='INT', type=int,
action='store',
help='Max number of rules for SCM', default=1)
groupSCMPregen.add_argument('--SCP_p', metavar='FLOAT', type=float,
action='store',
help='Max number of rules for SCM', default=1.0)
groupSCMPregen.add_argument('--SCP_model_type', metavar='STRING', action='store',
help='Max number of rules for SCM',
default="conjunction")
groupSCMPregen.add_argument('--SCP_stumps', metavar='INT', type=int,
action='store',
help='Number of stumps per attribute', default=1)
groupCQBoost = parser.add_argument_group('CQBoost arguments')
groupCQBoost.add_argument('--CQB_mu', metavar='FLOAT', type=float, action='store',
help='Set the mu parameter for CQBoost', default=0.001)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment