diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 2002b3e81d36a421a5973aa981a515ca81a32112..6789ca1745c43636ad93ad07f2dacce38dc609b7 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -92,6 +92,40 @@ class DecisionStumpClassifier(BaseEstimator, ClassifierMixin): return probas + def predict_proba_t(self, X): + """Compute probabilities of possible outcomes for samples in X. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + avg : array-like, shape = [n_samples, n_classes] + Weighted average probability for each class per sample. + + """ + try: + print('plouf') + print(X) + print("plaf") + except: + X=np.ones(X.shape) + check_is_fitted(self, 'classes_') + X = np.asarray(X) + probas = np.zeros((X.shape[0], 2)) + positive_class = np.argwhere(X[:, self.attribute_index] > self.threshold) + negative_class = np.setdiff1d(range(X.shape[0]), positive_class) + probas[positive_class, 1] = 1.0 + probas[negative_class, 0] = 1.0 + + if self.direction == -1: + probas = 1 - probas + + return probas + def reverse_decision(self): self.direction *= -1 @@ -170,7 +204,7 @@ class StumpsClassifiersGenerator(ClassifiersGenerator): Whether or not a binary complement voter must be generated for each voter. Defaults to False. """ - def __init__(self, n_stumps_per_attribute=10, self_complemented=False, check_diff=True): + def __init__(self, n_stumps_per_attribute=10, self_complemented=False, check_diff=False): super(StumpsClassifiersGenerator, self).__init__(self_complemented) self.n_stumps_per_attribute = n_stumps_per_attribute self.check_diff = check_diff diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py index 716ebf8c10a7523b21f9bf96a7fa25c959240b54..182c2e169ec3ce65ff59d2c6056de8fa9182f13a 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py @@ -14,7 +14,7 @@ from ... import Metrics class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): - def __init__(self, mu=0.01, epsilon=1e-06, n_max_iterations=None, estimators_generator=None, dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, random_state=None): + def __init__(self, mu=0.01, epsilon=1e-06, n_max_iterations=100, estimators_generator=None, dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, random_state=None): super(ColumnGenerationClassifier, self).__init__() self.epsilon = epsilon self.n_max_iterations = n_max_iterations @@ -53,6 +53,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.initialize() self.train_metrics = [] self.gammas = [] + self.list_weights= [] self.bounds = [] self.previous_votes = [] # w = [0.5,0.5] @@ -79,6 +80,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): w, alpha = self._restricted_master_problem(previous_w=w, previous_alpha=alpha) cbound = self.compute_empiric_cbound(w, y_kernel_matrix) self.c_bounds.append(cbound) + self.list_weights.append(w) self.update_values(h_values, worst_h_index, alpha, w) @@ -126,7 +128,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.step_decisions = np.zeros(classification_matrix.shape) self.step_prod = np.zeros(classification_matrix.shape) for weight_index in range(self.weights_.shape[0]-1): - margins = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) + margins = np.sum(classification_matrix[:, :weight_index+1]* self.list_weights[weight_index], axis=1) signs_array = np.array([int(x) for x in sign(margins)]) signs_array[signs_array == -1] = 0 self.step_decisions[:, weight_index] = signs_array diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostGraalpy.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostGraalpy.py index 04395dfec96f3069193fb96adf658ae858514116..bbad3086e4e115da5db25ecfa39c0a6f2951929b 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostGraalpy.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostGraalpy.py @@ -52,7 +52,6 @@ class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): if self.estimators_generator is None: self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps, self_complemented=self.self_complemented) - # Step 1: We fit the classifiers generator and get its classification matrix. self.estimators_generator.fit(X, y_neg) # hint: This is equivalent to construct a new X diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpy.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpy.py index 805ac493441c825b7d7afaecc335f24bdd63b537..8ef03c82debc074464520d16fa11502d2a9348ac 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpy.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpy.py @@ -22,7 +22,7 @@ from sklearn.preprocessing import LabelEncoder from ..Monoview.Additions.BoostUtils import ConvexProgram, StumpsClassifiersGenerator -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomUniform +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomUniform, change_label_to_zero, change_label_to_minus from ..Metrics import zero_one_loss # logger = logging.getLogger('MinCq') @@ -71,7 +71,7 @@ class MinCqClassifier(VotingClassifier): # Validations assert 0 < self.mu <= 1, "MinCqClassifier: mu parameter must be in (0, 1]" assert xor(bool(self.estimators_generator), bool(self.estimators)), "MinCqClassifier: exactly one of estimator_generator or estimators must be used." - X, y = check_X_y(X, y) + X, y = check_X_y(X, change_label_to_minus(y)) # Fit the estimators using VotingClassifier's fit method. This will also fit a LabelEncoder that can be # used to "normalize" labels (0, 1, 2, ...). In the case of binary classification, the two classes will be 0 and 1. @@ -98,36 +98,17 @@ class MinCqClassifier(VotingClassifier): self.estimators = [('ds{}'.format(i), estimator) for i, estimator in enumerate(self.estimators_generator.estimators_)] super().fit(X, y) - # We clean the estimators attribute (as we do not want it to be cloned later) - # self.estimators_ = [] - - # logger.info("Training started...") - # logger.info("Training dataset shape: {}".format(str(np.shape(X)))) - # logger.info("Number of voters: {}".format(len(self.estimators_))) - # Preparation and resolution of the quadratic program # logger.info("Preparing and solving QP...") self.weights = self._solve(X, y) if self.clean_me: self.estimators = [] + # print(self.weights.shape) + # print(np.unique(self.weights)[0:10]) + # import pdb;pdb.set_trace() + self.train_cbound = 1 - (1.0/X.shape[0])*(np.sum(np.multiply(change_label_to_minus(y), np.average(self._binary_classification_matrix(X), axis=1, weights=self.weights)))**2)/(np.sum(np.average(self._binary_classification_matrix(X), axis=1, weights=self.weights)**2)) return self - # def evaluate_metrics(self, X, y, metrics_list=None, functions_list=None): - # if metrics_list is None: - # metrics_list = [zero_one_loss] - # - # if functions_list is None: - # functions_list = [] - # else: - # raise NotImplementedError - # - # # Predict, evaluate metrics. - # predictions = self.predict(X) - # metrics_results = {metric.__name__: metric(y, predictions) for metric in metrics_list} - # - # metrics_dataframe = ResultsDataFrame([metrics_results]) - # return metrics_dataframe - def _binary_classification_matrix(self, X): probas = self.transform(X) predicted_labels = np.argmax(probas, axis=2) @@ -149,7 +130,7 @@ class MinCqClassifier(VotingClassifier): pred = super().predict(X) if self.clean_me: self.estimators = [] - return pred + return change_label_to_zero(pred) def _solve(self, X, y): y = self.le_.transform(y) @@ -294,6 +275,7 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): # Keep learning information for further use. self.learner_info_ = {} + print(np.unique(weights)) # We count the number of non-zero weights, including the implicit voters. # TODO: Verify how we define non-zero weights here, could be if the weight is near 1/2n. @@ -303,35 +285,8 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): # Conversion of the weights of the n first voters to weights on the implicit 2n voters. # See Section 7.1 of [2] for an explanation. - return np.array([2 * q - 1.0 / len(self.estimators_) for q in weights]) - - # def evaluate_metrics(self, X, y, metrics_list=None, functions_list=None): - # if metrics_list is None: - # metrics_list = [zero_one_loss] - # - # if functions_list is None: - # functions_list = [] - # - # # Transductive setting: we only predict the X for labeled y - # if isinstance(y, np.ma.MaskedArray): - # labeled = np.where(np.logical_not(y.mask))[0] - # X = np.array(X[labeled]) - # y = np.array(y[labeled]) - # - # # Predict, evaluate metrics. - # predictions = self.predict(X) - # metrics_results = {metric.__name__: metric(y, predictions) for metric in metrics_list} - # - # # TODO: Repair in the case of non-{-1, 1} labels. - # assert set(y) == {-1, 1} - # classification_matrix = self._binary_classification_matrix(X) - # - # for function in functions_list: - # metrics_results[function.__name__] = function(classification_matrix, y, self.weights) - # - # metrics_dataframe = ResultsDataFrame([metrics_results]) - # return metrics_dataframe - + # return np.array([2 * q - 1.0 / len(self.estimators_) for q in weights]) + return np.array(weights) def build_laplacian(X, n_neighbors=None): clf = SpectralEmbedding(n_neighbors=n_neighbors) @@ -373,7 +328,7 @@ class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): return {"random_state":self.random_state, "mu":self.mu, "n_stumps_per_attribute":self.n_stumps_per_attribute} def getInterpret(self, directory, y_test): - interpret_string = "" + interpret_string = "Cbound on train :"+str(self.train_cbound) # interpret_string += "Train C_bound value : "+str(self.cbound_train) # y_rework = np.copy(y_test) # y_rework[np.where(y_rework==0)] = -1 diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py new file mode 100644 index 0000000000000000000000000000000000000000..216853a6fd4a80603ce16b40829bd3cecdb113ff --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py @@ -0,0 +1,98 @@ +from sklearn.externals.six import iteritems +from pyscm.scm import SetCoveringMachineClassifier as scm +from sklearn.base import BaseEstimator, ClassifierMixin +import numpy as np + +from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero +from ..Monoview.Additions.BoostUtils import StumpsClassifiersGenerator, BaseBoost +# Author-Info +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + +class SCMPregen(scm, BaseMonoviewClassifier, BaseBoost): + + def __init__(self, random_state=None, model_type="conjunction", + max_rules=10, p=0.1, n_stumps=10,self_complemented=True, **kwargs): + super(SCMPregen, self).__init__( + random_state=random_state, + model_type=model_type, + max_rules=max_rules, + p=p + ) + self.param_names = ["model_type", "max_rules", "p", "n_stumps", "random_state"] + self.distribs = [["conjunction", "disjunction"], + CustomRandint(low=1, high=15), + CustomUniform(loc=0, state=1), [n_stumps], [random_state]] + self.classed_params = [] + self.weird_strings = {} + self.self_complemented = self_complemented + self.n_stumps = n_stumps + self.estimators_generator = None + + def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): + pregen_X, pregen_y = self.pregen_voters(X, y) + super(SCMPregen, self).fit(pregen_X, pregen_y) + return self + + def predict(self, X): + print('poul') + pregen_X, _ = self.pregen_voters(X) + print('from') + pred = super(SCMPregen, self).predict(pregen_X) + return pred + + def pregen_voters(self, X, y=None): + if y is not None: + if self.estimators_generator is None: + self.estimators_generator = StumpsClassifiersGenerator( + n_stumps_per_attribute=self.n_stumps, + self_complemented=self.self_complemented) + self.estimators_generator.fit(X, y) + else: + neg_y=None + classification_matrix = self._binary_classification_matrix_t(X) + return classification_matrix, y + + def _collect_probas_t(self, X): + print('jb') + for est in self.estimators_generator.estimators_: + print(type(est)) + print(est.predict_proba_t(X)) + print('ha') + return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_]) + + def _binary_classification_matrix_t(self, X): + probas = self._collect_probas_t(X) + predicted_labels = np.argmax(probas, axis=2) + predicted_labels[predicted_labels == 0] = -1 + values = np.max(probas, axis=2) + return (predicted_labels * values).T + + + def canProbas(self): + """Used to know if the classifier can return label probabilities""" + return True + + def getInterpret(self, directory, y_test): + interpretString = "Model used : " + str(self.model_) + return interpretString + + +def formatCmdArgs(args): + """Used to format kwargs for the parsed args""" + kwargsDict = {"model_type": args.SCP_model_type, + "p": args.SCP_p, + "max_rules": args.SCP_max_rules, + "n_stumps": args.SCP_stumps} + return kwargsDict + + +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append({"model_type": randomState.choice(["conjunction", "disjunction"]), + "max_rules": randomState.randint(1, 15), + "p": randomState.random_sample()}) + return paramsSet + + diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index 2fc233f2a1e8725c13d431b6d45ba851a48a2f58..2bfdbe1a32563b37b8b730a435000b31b6bc979e 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -178,6 +178,20 @@ def parseTheArgs(arguments): groupSCM.add_argument('--SCM_model_type', metavar='STRING', action='store', help='Max number of rules for SCM', default="conjunction") + groupSCMPregen = parser.add_argument_group('SCMPregen arguments') + groupSCMPregen.add_argument('--SCP_max_rules', metavar='INT', type=int, + action='store', + help='Max number of rules for SCM', default=1) + groupSCMPregen.add_argument('--SCP_p', metavar='FLOAT', type=float, + action='store', + help='Max number of rules for SCM', default=1.0) + groupSCMPregen.add_argument('--SCP_model_type', metavar='STRING', action='store', + help='Max number of rules for SCM', + default="conjunction") + groupSCMPregen.add_argument('--SCP_stumps', metavar='INT', type=int, + action='store', + help='Number of stumps per attribute', default=1) + groupCQBoost = parser.add_argument_group('CQBoost arguments') groupCQBoost.add_argument('--CQB_mu', metavar='FLOAT', type=float, action='store', help='Set the mu parameter for CQBoost', default=0.001)