diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index d343211690bd9e42da860d951c6a2921370b7b51..5c7c042bb0b65e98a1a2688900faddafdb0711a2 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -677,8 +677,6 @@ def get_accuracy_graph(plotted_data, classifier_name, file_name, name="Accuracie ax.set_ylim(bottom=0.0,top=1.0) ax.set_title(name+" during "+set+" for "+classifier_name) x = np.arange(len(plotted_data)) - if name == "zero_one_loss": - print(plotted_data) scat = ax.scatter(x, np.array(plotted_data), marker=".") if bounds: if boosting_bound: diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index 74ab90e3abd23dab1224a3c297d273968ab00a1c..eee18a47f533b5089f9f1b92c8bf5c1abbd675fa 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -21,7 +21,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): c_bound_choice=True, random_start=True, n_stumps_per_attribute=None, use_r=True, c_bound_sol=True, plotted_metric=Metrics.zero_one_loss, save_train_data=True, - test_graph=True): + test_graph=True, mincq_tracking=True): super(ColumnGenerationClassifierQar, self).__init__() r""" @@ -72,15 +72,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): "twice_the_same", "c_bound_choice", "random_start", "n_stumps", "use_r", "c_bound_sol"] + self.mincq_tracking = mincq_tracking def set_params(self, **params): - # self.self_complemented = params["self_complemented"] - # self.twice_the_same = params["twice_the_same"] - # self.c_bound_choice = params["c_bound_choice"] - # self.random_start = params["random_start"] self.n_max_iterations = params["n_max_iterations"] - # self.n_stumps = params["n_stumps_per_attribute"] - # self.use_r = params["use_r"] return self def fit(self, X, y): @@ -108,13 +103,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): # Print dynamically the step and the error of the current classifier self.it = k - # print( - # "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound, - # self.bounds[-1] > self.train_metrics[-1], - # k + 2, - # self.n_max_iterations, - # self.voter_perfs[-1]), - # end="\r") + print( + "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound, + self.bounds[-1] > self.train_metrics[-1], + k + 2, + self.n_max_iterations, + self.voter_perfs[-1]), + end="\r") sol, new_voter_index = self.choose_new_voter(y_kernel_matrix, formatted_y) @@ -171,6 +166,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def step_predict(self, classification_matrix): if classification_matrix.shape != self.train_shape: self.step_decisions = np.zeros(classification_matrix.shape) + self.mincq_step_decisions = np.zeros(classification_matrix.shape) self.step_prod = np.zeros(classification_matrix.shape) for weight_index in range(self.weights_.shape[0]-1): margins = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) @@ -178,6 +174,15 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): signs_array[signs_array == -1] = 0 self.step_decisions[:, weight_index] = signs_array self.step_prod[:, weight_index] = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) + if self.mincq_tracking: + if weight_index ==0: + self.mincq_step_decisions[:,weight_index] = signs_array + else: + mincq_margins = np.sum(self.mincq_learners[weight_index-1].majority_vote._weights*classification_matrix[:,:weight_index+1], axis=1) + mincq_signs_array = np.array([int(x) for x in sign(mincq_margins)]) + mincq_signs_array[mincq_signs_array == -1] = 0 + self.mincq_step_decisions[:, weight_index] = mincq_signs_array + # self.mincq_step_cbounds = self.mincq_learners[weight_index-1].majority_vote.cbound_value() def update_info_containers(self, y, voter_perf, k): """Is used at each iteration to compute and store all the needed quantities for later analysis""" @@ -201,6 +206,17 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.train_metrics.append(train_metric) self.bounds.append(bound) + if self.mincq_tracking: + from ...MonoviewClassifiers.MinCQ import MinCqLearner + mincq = MinCqLearner(10e-3, "stumps", n_stumps_per_attribute=1) + training_set = self.classification_matrix[:, self.chosen_columns_] + mincq.fit(training_set, y) + mincq_pred = mincq.predict(training_set) + self.mincq_learners.append(mincq) + self.mincq_train_metrics.append(self.plotted_metric.score(y, mincq_pred)) + self.mincq_weights.append(mincq.majority_vote._weights) + self.mincq_c_bounds.append(mincq.majority_vote.cbound_value(training_set, y.reshape((y.shape[0],)))) + def compute_voter_weight(self, voter_perf, sol): """used to compute the voter's weight according to the specified method (edge or error) """ @@ -301,6 +317,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.train_metrics.append(train_metric) self.bounds.append(bound) + if self.mincq_tracking: + self.mincq_train_metrics.append(train_metric) @@ -347,6 +365,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.selected_margins = [] self.tau = [] self.norm=[] + self.mincq_train_metrics = [] + self.mincq_c_bounds = [] + self.mincq_weights = [] + self.mincq_learners = [] + self.mincq_step_decisions = [] + def _compute_epsilon(self, y): """Updating the error variable, the old fashioned way uses the whole majority vote to update the error""" @@ -410,18 +434,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): C1s = 2 * (self.A0 * self.B2 - self.A2s * self.B0) C0s = self.A0 * self.B1s - self.A1s * self.B0 - sols = (-C1s + np.sqrt(C1s * C1s - 4 * C2s * C0s)) / (2 * C2s) - sols[np.where(C2s == 0)[0]] = C0s[np.where(C2s == 0)[0]] / C1s[np.where(C2s == 0)[0]] + sols = np.zeros(C0s.shape)-3 + # sols[np.where(C2s == 0)[0]] = C0s[np.where(C2s == 0)[0]] / C1s[np.where(C2s == 0)[0]] + sols[np.where(C2s != 0)[0]] = (-C1s[np.where(C2s != 0)[0]] + np.sqrt(C1s[np.where(C2s != 0)[0]] * C1s[np.where(C2s != 0)[0]] - 4 * C2s[np.where(C2s != 0)[0]] * C0s[np.where(C2s != 0)[0]])) / (2 * C2s[np.where(C2s != 0)[0]]) masked_c_bounds = self.make_masked_c_bounds(sols, bad_margins) - best_hyp_index = np.argmin(masked_c_bounds) - - self.c_bounds.append(masked_c_bounds[best_hyp_index]) - self.margins.append(math.sqrt(self.A2s[best_hyp_index]/m)) - self.disagreements.append(0.5*self.B1s[best_hyp_index]/m) - + if masked_c_bounds.mask.all(): + return "No more pertinent voters", 0 + else: + best_hyp_index = np.argmin(masked_c_bounds) - return sols[best_hyp_index], best_hyp_index + self.c_bounds.append(masked_c_bounds[best_hyp_index]) + self.margins.append(math.sqrt(self.A2s[best_hyp_index]/m)) + self.disagreements.append(0.5*self.B1s[best_hyp_index]/m) + return sols[best_hyp_index], best_hyp_index def make_masked_c_bounds(self, sols, bad_margins): c_bounds = self.compute_c_bounds(sols) @@ -435,6 +461,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): masked_c_bounds[sols < 0] = ma.masked # Masking nan c_bounds masked_c_bounds[np.isnan(c_bounds)] = ma.masked + if not self.twice_the_same: + masked_c_bounds[self.chosen_columns_] = ma.masked return masked_c_bounds def compute_c_bounds(self, sols): @@ -468,9 +496,27 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): for step_index in range(self.step_decisions.shape[1]-1): step_metrics.append(self.plotted_metric.score(y_test, self.step_decisions[:, step_index])) step_metrics = np.array(step_metrics) - np.savetxt(directory + "step_test_metrics.csv", step_metrics, delimiter=',') + np.savetxt(directory + "step_test_metrics.csv", step_metrics, + delimiter=',') get_accuracy_graph(step_metrics, self.__class__.__name__, - directory + 'step_test_metrics.png', self.plotted_metric, set="test") + directory + 'step_test_metrics.png', + self.plotted_metric, set="test") + + if self.mincq_tracking: + step_mincq_test_metrics = [] + for step_index in range(self.step_decisions.shape[1] - 1): + step_mincq_test_metrics.append(self.plotted_metric.score(y_test, + self.mincq_step_decisions[:, + step_index])) + # step_mincq_test_metrics = np.array(step_mincq_test_metrics) + np.savetxt(directory + "mincq_step_test_metrics.csv", + step_mincq_test_metrics, + delimiter=',') + get_accuracy_graph(step_metrics, self.__class__.__name__, + directory + 'step_test_metrics_comparaison.png', + self.plotted_metric, step_mincq_test_metrics, + "MinCQ metric", set="test") + step_cbounds = [] for step_index in range(self.step_prod.shape[1]): num = np.sum(y_test*self.step_prod[:, step_index])**2 @@ -494,6 +540,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): directory+'vote_weights.png', "weights", zero_to_one=False) get_accuracy_graph(self.c_bounds, self.__class__.__name__, directory + 'c_bounds.png', "C-Bounds") + if self.mincq_tracking: + get_accuracy_graph(self.c_bounds, self.__class__.__name__, + directory + 'c_bounds_comparaison.png', "1-var mins", self.mincq_c_bounds, "MinCQ min", zero_to_one=False) + get_accuracy_graph(self.train_metrics, self.__class__.__name__, + directory + 'train_metrics_comparaison.png', self.plotted_metric, + self.mincq_train_metrics, "MinCQ metrics") get_accuracy_graph(self.previous_margins, self.__class__.__name__, directory + 'margins.png', "Margins", zero_to_one=False) get_accuracy_graph(self.selected_margins, self.__class__.__name__, @@ -520,6 +572,11 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): delimiter=',') np.savetxt(directory + "disagreements.csv", self.norm, delimiter=',') + if self.mincq_tracking: + np.savetxt(directory + "mincq_cbounds.csv", self.mincq_c_bounds, + delimiter=',') + np.savetxt(directory + "mincq_train_metrics.csv", self.mincq_train_metrics, + delimiter=',') args_dict = dict( (arg_name, str(self.__dict__[arg_name])) for arg_name in self.printed_args_name_list) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index 59260507b66a5e3d99ad152db30415bd6b8967e6..f67aac063f2b69dd869871a968e16ff5777260e3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -27,9 +27,8 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo metricKWARGS = {} scorer = metricModule.get_scorer(**metricKWARGS) nb_possible_combinations = compute_possible_combinations(params_dict) - if nIter > nb_possible_combinations: - nIter = nb_possible_combinations - randomSearch = RandomizedSearchCV(estimator, n_iter=nIter, param_distributions=params_dict, refit=True, + min_list = np.array([min(nb_possible_combination, nIter) for nb_possible_combination in nb_possible_combinations]) + randomSearch = RandomizedSearchCV(estimator, n_iter=np.sum(min_list), param_distributions=params_dict, refit=True, n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState) detector = randomSearch.fit(X_train, y_train) @@ -54,7 +53,7 @@ def compute_possible_combinations(params_dict): n_possibs[value_index] = len(value) elif isinstance(value, CustomRandint): n_possibs[value_index] = value.get_nb_possibilities() - return np.prod(n_possibs) + return n_possibs def genTestFoldsPreds(X_train, y_train, KFolds, estimator): diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 71d6a839af8dde81632845139aef709998c76388..805275332bfc6b38beafd1e8292cbab04098bd5e 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -63,7 +63,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): step_test_metrics = np.array([self.plotted_metric.score(y_test, step_pred) for step_pred in self.step_predictions]) get_accuracy_graph(step_test_metrics, "Adaboost", directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "Adaboost", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds)) + get_accuracy_graph(self.metrics, "Adaboost", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds), bound_name="boosting bound") np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') np.savetxt(directory + "times.csv", np.array([self.train_time, self.pred_time]), delimiter=',') diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py index 8a520a0c487a52215e058d9ecad22ca463c5849f..6b0f045ec0ef98a30ad4c43aa8d36c73e1283f67 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py @@ -18,7 +18,7 @@ class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): ) self.param_names = ["n_max_iterations"] - self.distribs = [CustomRandint(low=1, high=500)] + self.distribs = [CustomRandint(low=2, high=1000)] self.classed_params = [] self.weird_strings = {} diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py index ed2271d355c2932fceb68af70b91315153a3995e..351698c2faac20f5e910063657e07d92c9b95be3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py @@ -18,7 +18,7 @@ class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): ) self.param_names = ["n_max_iterations"] - self.distribs = [CustomRandint(low=1, high=500)] + self.distribs = [CustomRandint(low=2, high=1000)] self.classed_params = [] self.weird_strings = {} diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQ.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQ.py new file mode 100644 index 0000000000000000000000000000000000000000..ed018d5d73b1a247c264ff6ed311989b5c67decf --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQ.py @@ -0,0 +1,559 @@ +from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, BaseMonoviewClassifier +from ..Monoview.Additions.BoostUtils import getInterpretBase +from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar + +#### Algorithm code #### + +#-*- coding:utf-8 -*- +""" MinCq learning algorithm + +Related papers: +[1] From PAC-Bayes Bounds to Quadratic Programs for Majority Votes (Laviolette et al., 2011) +[2] Risk Bounds for the Majority Vote: From a PAC-Bayesian Analysis to a Learning Algorithm (Germain et al., 2014) + +http://graal.ift.ulaval.ca/majorityvote/ +""" +__author__ = 'Jean-Francis Roy' + +import logging +import numpy as np +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, polynomial_kernel +# from qp import QP +from ..Monoview.Additions.BoostUtils import ConvexProgram as QP +# from majority_vote import MajorityVote +# from voter import StumpsVotersGenerator, KernelVotersGenerator + +class MinCqLearner(BaseEstimator, ClassifierMixin): + """ + MinCq algorithm learner. See [1, 2] + + Parameters + ---------- + mu : float + The fixed value of the first moment of the margin. + + voters_type : string, optional (default='kernel') + Specifies the type of voters. + It must be one of 'kernel', 'stumps' or 'manual'. If 'manual' is specified, the voters have to be manually set + using the "voters" parameter of the fit function. + + n_stumps_per_attribute : int, optional (default=10) + Specifies the amount of decision stumps per attribute. + It is only significant with 'stumps' voters_type. + + kernel : string, optional (default='rbf') + Specifies the kernel type to be used in the algorithm. + It must be one of 'linear', 'poly', 'rbf'. + + degree : int, optional (default=3) + Degree of the polynomial kernel function ('poly'). + Ignored by all other kernels. + + gamma : float, optional (default=0.0) + Kernel coefficient for 'rbf' and 'poly'. + If gamma is 0.0 then 1/n_features will be used instead. + """ + def __init__(self, mu, voters_type, n_stumps_per_attribute=10, kernel='rbf', degree=3, gamma=0.0): + assert mu > 0 and mu <= 1, "MinCqLearner: mu parameter must be in (0, 1]" + self.mu = mu + self.voters_type = voters_type + self.n_stumps_per_attribute = n_stumps_per_attribute + self.kernel = kernel + self.degree = degree + self.gamma = gamma + self.log = False + + self.majority_vote = None + self.qp = None + + def fit(self, X, y, voters=None): + """ Learn a majority vote weights using MinCq. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Training data + + y : ndarray, shape=(n_samples,), optional + Training labels + + voters : shape=(n_voters,), optional + A priori generated voters + """ + # Preparation of the majority vote, using a voter generator that depends on class attributes + + assert self.voters_type in ['stumps', 'kernel', 'manual'], "MinCqLearner: voters_type must be 'stumps', 'kernel' or 'manual'" + + if self.voters_type == 'manual': + if voters is None: + logging.error("Manually set voters is True, but no voters have been set.") + return self + + else: + voters_generator = None + + if self.voters_type == 'stumps': + assert self.n_stumps_per_attribute >= 1, 'MinCqLearner: n_stumps_per_attribute must be positive' + voters_generator = StumpsVotersGenerator(self.n_stumps_per_attribute) + + elif self.voters_type == 'kernel': + assert self.kernel in ['linear', 'poly', 'rbf'], "MinCqLearner: kernel must be 'linear', 'poly' or 'rbf'" + + gamma = self.gamma + if gamma == 0.0: + gamma = 1.0 / np.shape(X)[1] + + if self.kernel == 'linear': + voters_generator = KernelVotersGenerator(linear_kernel) + elif self.kernel == 'poly': + voters_generator = KernelVotersGenerator(polynomial_kernel, degree=self.degree, gamma=gamma) + elif self.kernel == 'rbf': + voters_generator = KernelVotersGenerator(rbf_kernel, gamma=gamma) + + voters = voters_generator.generate(X, y) + + if self.log: + logging.info("MinCq training started...") + logging.info("Training dataset shape: {}".format(str(np.shape(X)))) + logging.info("Number of voters: {}".format(len(voters))) + + self.majority_vote = MajorityVote(voters) + n_base_voters = len(self.majority_vote.weights) + + # Preparation and resolution of the quadratic program + + if self.log: + logging.info("Preparing QP...") + self._prepare_qp(X, y) + + try: + if self.log: + logging.info("Solving QP...") + solver_weights = self.qp.solve() + + # Conversion of the weights of the n first voters to weights on the implicit 2n voters. + # See Section 7.1 of [2] for an explanation. + self.majority_vote.weights = np.array([2 * q - 1.0 / n_base_voters for q in solver_weights]) + if self.log: + logging.info("First moment of the margin on the training set: {:.4f}".format(np.mean(y * self.majority_vote.margin(X)))) + + except Exception as e: + logging.error("{}: Error while solving the quadratic program: {}.".format(str(self), str(e))) + self.majority_vote = None + + return self + + def predict(self, X): + """ Using previously learned majority vote weights, predict the labels of new data points. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Samples to predict + + Returns + ------- + predictions : ndarray, shape=(n_samples,) + The predicted labels + """ + if self.log: + logging.info("Predicting...") + if self.majority_vote is None: + logging.error("{}: Error while predicting: MinCq has not been fit or fitting has failed. Will output invalid labels".format(str(self))) + return np.zeros((len(X),)) + + return self.majority_vote.vote(X) + + def predict_proba(self, X): + """ Using previously learned majority vote weights, predict the labels of new data points with a confidence + level. The confidence level is the margin of the majority vote. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Samples to predict + + Returns + ------- + predictions : ndarray, shape=(n_samples,) + The predicted labels + """ + probabilities = np.zeros((np.shape(X)[0], 2)) + + # The margin is between -1 and 1, we rescale it to be between 0 and 1. + margins = self.majority_vote.margin(X) + margins += 1 + margins /= 2 + + # Then, the conficence for class +1 is set to the margin, and confidence for class -1 is set to 1 - margin. + probabilities[:, 1] = margins + probabilities[:, 0] = 1 - margins + return probabilities + + def _prepare_qp(self, X, y): + """ Prepare MinCq's quadratic program. See Program 1 of [2] for more details on its content. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Training data + + y : ndarray, shape=(n_samples,) + Training labels + """ + + self.qp = QP() + + n_features = len(self.majority_vote.voters) + n_examples = len(X) + classification_matrix = self.majority_vote.classification_matrix(X) + + # Objective function. + self.qp.quadratic_func = 2.0 / n_examples * classification_matrix.T.dot(classification_matrix) + self.qp.linear_func = np.matrix(np.matrix(-1.0 * np.mean(self.qp.quadratic_func / 2.0, axis=1))).T + + # First moment of the margin fixed to mu. + a_matrix = 2.0 / n_examples * y.T.dot(classification_matrix) + self.qp.add_equality_constraints(a_matrix, self.mu + 1.0/2 * np.mean(a_matrix)) + + # Lower and upper bounds on the variables + self.qp.add_lower_bound(0.0) + self.qp.add_upper_bound(1.0 / n_features) + + +class MajorityVote(object): + """ A Majority Vote of real-valued functions. + + Parameters + ---------- + voters : ndarray of Voter instances + The voters of the majority vote. Each voter must take an example as an input, and output a real value in [-1,1]. + + weights : ndarray, optional (default: uniform distribution) + The weights associated to each voter. + """ + def __init__(self, voters, weights=None): + self._voters = np.array(voters) + + if weights is not None: + assert(len(voters) == len(weights)) + self._weights = np.array(weights) + else: + self._weights = np.array([1.0 / len(voters)] * len(voters)) + + def vote(self, X): + """ Returns the vote of the Majority Vote on a list of samples. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Input data to classify. + + Returns + ------- + votes : ndarray, shape=(n_samples,), where each value is either -1 or 1 + The vote of the majority vote for each sample. + """ + margins = self.margin(X) + return np.array([int(x) for x in np.sign(margins)]) + + def margin(self, X): + """ Returns the margin of the Majority Vote on a list of samples. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Input data on which to calculate the margin. + + Returns + ------- + margins : ndarray, shape=(n_samples,), where each value is either -1 or 1 + The margin of the majority vote for each sample. + """ + classification_matrix = self.classification_matrix(X) + return np.squeeze(np.asarray(np.dot(classification_matrix, self.weights))) + + def classification_matrix(self, X): + """ Returns the classification matrix of the majority vote. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Input data to classify + + Returns + ------- + classification_matrix : ndrray, shape=(n_samples, n_voters) + A matrix that contains the value output by each voter, for each sample. + + """ + return np.matrix([v.vote(X) for v in self._voters]).T + + @property + def weights(self): + return self._weights + + @weights.setter + def weights(self, weights): + self._weights = np.array(weights) + + @property + def voters(self): + return self._voters + + @voters.setter + def voters(self, voters): + self._voters = np.array(voters) + + def cbound_value(self, X, y): + """ Returns the value of the C-bound, evaluated on given examples. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_feature) + Input data + y : ndarray, shape=(n_samples, ) + Input labels, where each label is either -1 or 1. + """ + assert np.all(np.in1d(y, [-1, 1])), 'cbound_value: labels should be either -1 or 1' + + classification_matrix = self.classification_matrix(X) + first_moment = float(1.0/len(y) * classification_matrix.dot(self.weights).dot(y)) + second_moment = float(1.0/len(y) *self.weights.T.dot(classification_matrix.T.dot(classification_matrix)).dot(self.weights)) + + return 1 - (first_moment ** 2 / second_moment) + +#-*- coding:utf-8 -*- +__author__ = "Jean-Francis Roy" + +import numpy as np + + +class Voter(object): + """ Base class for a voter (function X -> [-1, 1]), where X is an array of samples + """ + def __init__(self): + pass + + def vote(self, X): + """ Returns the output of the voter, on a sample list X + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Input data to classify + + Returns + ------- + votes : ndarray, shape=(n_samples,) + The result the the voter function, for each sample + """ + raise NotImplementedError("Voter.vote: Not implemented.") + + +class BinaryKernelVoter(Voter): + """ A Binary Kernel Voter, which outputs the value of a kernel function whose first example is fixed a priori. + The sign of the output depends on the label (-1 or 1) of the sample on which the kernel voter is based + + Parameters + ---------- + x : ndarray, shape=(n_features,) + The base sample's description vector + + y : int, -1 or 1 + The label of the base sample. Determines if the voter thinks "negative" or "positive" + + kernel_function : function + The kernel function takes two samples and returns a similarity value. If the kernel has parameters, they should + be set using kwargs parameter + + kwargs : keyword arguments (optional) + Additional parameters for the kernel function + """ + + def __init__(self, x, y, kernel_function, **kwargs): + assert(y in {-1, 1}) + super(BinaryKernelVoter, self).__init__() + self._x = x + self._y = y + self._kernel_function = kernel_function + self._kernel_kwargs = kwargs + + def vote(self, X): + base_point_array = np.array([self._x]) + votes = self._y * self._kernel_function(base_point_array, X, **self._kernel_kwargs) + votes = np.squeeze(np.asarray(votes)) + + return votes + + +class DecisionStumpVoter(Voter): + """ + Generic Attribute Threshold Binary Classifier + + Parameters + ---------- + attribute_index : int + The attribute to consider for the classification + + threshold : float + The threshold value for classification rule + + direction : int (-1 or 1) + Used to reverse classification decision + """ + def __init__(self, attribute_index, threshold, direction=1): + super(DecisionStumpVoter, self).__init__() + self.attribute_index = attribute_index + self.threshold = threshold + self.direction = direction + + def vote(self, points): + return [((point[self.attribute_index] > self.threshold) * 2 - 1) * self.direction for point in points] + + + +class VotersGenerator(object): + """ Base class to create a set of voters using training samples + """ + + def generate(self, X, y=None, self_complemented=False): + """ Generates the voters using samples. + + Parameters + ---------- + X : ndarray, shape=(n_samples, n_features) + Input data on which to base the voters + + y : ndarray, shape=(n_samples,), optional + Input labels, usually determines the decision polarity of each voter + + self_complemented : bool + Determines if complement voters should be generated or not + + Returns + ------- + voters : ndarray + An array of voters + """ + raise NotImplementedError("VotersGenerator.generate: not implemented") + + +class StumpsVotersGenerator(VotersGenerator): + """ Decision Stumps Voters generator. + + Parameters + ---------- + n_stumps_per_attribute : int, (default=10) + Determines how many decision stumps will be created for each attribute. + """ + def __init__(self, n_stumps_per_attribute=10): + self._n_stumps_per_attribute = n_stumps_per_attribute + + def _find_extremums(self, X, i): + mini = np.Infinity + maxi = -np.Infinity + for x in X: + if x[i] < mini: + mini = x[i] + if x[i] > maxi: + maxi = x[i] + return mini, maxi + + def generate(self, X, y=None, self_complemented=False, only_complements=False): + voters = [] + if len(X) != 0: + for i in range(len(X[0])): + t = self._find_extremums(X, i) + inter = (t[1] - t[0]) / (self._n_stumps_per_attribute + 1) + + if inter != 0: + # If inter is zero, the attribute is useless as it has a constant value. We do not add stumps for + # this attribute. + for x in range(self._n_stumps_per_attribute): + + if not only_complements: + voters.append(DecisionStumpVoter(i, t[0] + inter * (x + 1), 1)) + + if self_complemented or only_complements: + voters.append(DecisionStumpVoter(i, t[0] + inter * (x + 1), -1)) + + return np.array(voters) + + +class KernelVotersGenerator(VotersGenerator): + """ Utility function to create binary kernel voters for each (x, y) sample. + + Parameters + ---------- + kernel_function : function + The kernel function takes two samples and returns a similarity value. If the kernel has parameters, they should + be set using kwargs parameter + + kwargs : keyword arguments (optional) + Additional parameters for the kernel function + """ + + def __init__(self, kernel_function, **kwargs): + self._kernel_function = kernel_function + self._kernel_kwargs = kwargs + + def generate(self, X, y=None, self_complemented=False, only_complements=False): + if y is None: + y = np.array([1] * len(X)) + + voters = [] + + for point, label in zip(X, y): + if not only_complements: + voters.append(BinaryKernelVoter(point, label, self._kernel_function, **self._kernel_kwargs)) + + if self_complemented or only_complements: + voters.append(BinaryKernelVoter(point, -1 * label, self._kernel_function, **self._kernel_kwargs)) + + return np.array(voters) + +class MinCQ(MinCqLearner, BaseMonoviewClassifier): + + def __init__(self, random_state=None, **kwargs): + def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, **kwargs): + super(MinCQ, self).__init__( + random_state=random_state, + mu=mu, + voters_type='stumps', + n_stumps_per_attribute = 1 + ) + self.param_names = ["mu"] + self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), + ] + self.classed_params = [] + self.weird_strings = {} + if "nbCores" not in kwargs: + self.nbCores = 1 + else: + self.nbCores = kwargs["nbCores"] + + def canProbas(self): + """Used to know if the classifier can return label probabilities""" + return True + + def getInterpret(self, directory, y_test): + return getInterpretBase(directory, y_test, "MinCq", self.majority_vote.weights) + + def get_name_for_fusion(self): + return "QBN2" + + +def formatCmdArgs(args): + """Used to format kwargs for the parsed args""" + kwargsDict = {} + return kwargsDict + + +def paramsToSet(nIter, randomState): + """Used for weighted linear early fusion to generate random search sets""" + paramsSet = [] + for _ in range(nIter): + paramsSet.append({}) + return paramsSet \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py deleted file mode 100644 index f5bf8416163edf7adb66b766bc0562e81b7e2af5..0000000000000000000000000000000000000000 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py +++ /dev/null @@ -1,46 +0,0 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier -from ..Monoview.Additions.BoostUtils import getInterpretBase -from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar - - -class QarBoostNC2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - - def __init__(self, random_state=None, **kwargs): - super(QarBoostNC2, self).__init__(n_max_iterations=300, - random_state=random_state, - self_complemented=True, - twice_the_same=True, - c_bound_choice=True, - random_start=False, - n_stumps_per_attribute=1, - use_r=True, - c_bound_sol=False - ) - self.param_names = [] - self.distribs = [] - self.classed_params = [] - self.weird_strings = {} - - def canProbas(self): - """Used to know if the classifier can return label probabilities""" - return True - - def getInterpret(self, directory, y_test): - return self.getInterpretQar(directory, y_test) - - def get_name_for_fusion(self): - return "QBN2" - - -def formatCmdArgs(args): - """Used to format kwargs for the parsed args""" - kwargsDict = {} - return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index 46af685ca3e90519caabbbf2af75d19733318e9a..95ffef91ccce1fa0d447f2e4fff9c56d1f91b6e5 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -61,7 +61,7 @@ def parseTheArgs(arguments): help='Determine the split ratio between learning and validation sets', type=float, default=0.2) groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation', - type=int, default=2) + type=int, default=5) groupClass.add_argument('--CL_nbClass', metavar='INT', action='store', help='Number of classes, -1 for all', type=int, default=2) groupClass.add_argument('--CL_classes', metavar='STRING', action='store', nargs="+",