diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py deleted file mode 100644 index 6242e9354ee09862968b542f2669f3583719765a..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost_graalpy.py +++ /dev/null @@ -1,278 +0,0 @@ -import logging - -import numpy as np -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.validation import check_is_fitted - -from ..metrics import zero_one_loss -from ..monoview.additions.BoostUtils import StumpsClassifiersGenerator, \ - BaseBoost -from ..monoview.monoview_utils import CustomRandint, \ - BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero - -classifier_class_name = "AdaboostGraalpy" - -class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): - """Scikit-Learn compatible AdaBoost classifier. Original code by Pascal Germain, adapted by Jean-Francis Roy. - - - Parameters - ---------- - - n_iterations : int, optional - The number of iterations of the algorithm. Defaults to 200. - - iterations_to_collect_as_hyperparameters : list - Iteration numbers to collect while learning, that will be converted as hyperparameter values at evaluation time. - Defaults to None. - classifiers_generator : Transformer, optional - A transformer to convert input samples in voters' outputs. Default: Decision stumps transformer, with 10 stumps - per attributes. - callback_function : function, optional - A function to call at each iteration that is supplied learning information. Defaults to None. - - n_stumps : int ( default : 10) - - self_complemented : boolean (default : True - - Attributes - ---------- - n_iterations : int, optional - The number of iterations of the algorithm. Defaults to 200. - iterations_to_collect_as_hyperparameters : list - Iteration numbers to collect while learning, that will be converted as hyperparameter values at evaluation time. - Defaults to None. - classifiers_generator : Transformer, optional - A transformer to convert input samples in voters' outputs. Default: Decision stumps transformer, with 10 stumps - per attributes. - callback_function : function, optional - A function to call at each iteration that is supplied learning information. Defaults to None. - - """ - - def __init__(self, n_iterations=200, - iterations_to_collect_as_hyperparameters=True, - classifiers_generator=None, callback_function=None, - n_stumps=10, self_complemented=True): - - self.n_iterations = n_iterations - self.n_stumps = n_stumps - self.iterations_to_collect_as_hyperparameters = iterations_to_collect_as_hyperparameters - self.estimators_generator = classifiers_generator - self.callback_function = callback_function - self.self_complemented = self_complemented - - def fit(self, X, y): - """Fits the algorithm on training data. - - Parameters - ---------- - X : ndarray of shape (n_samples, n_features) - The input data. - y : ndarray of shape (n_samples, ) - The input labels. - - Returns - ------- - self - - """ - y_neg = change_label_to_minus(y) - - if self.estimators_generator is None: - self.estimators_generator = StumpsClassifiersGenerator( - n_stumps_per_attribute=self.n_stumps, - self_complemented=self.self_complemented) - - # Step 1: We fit the classifiers generator and get its classification matrix. - self.estimators_generator.fit(X, y_neg) - # hint: This is equivalent to construct a new X - classification_matrix = self._binary_classification_matrix(X) - - n_samples, n_voters = classification_matrix.shape - # logging.debug("n_voters = {}".format(n_voters)) - - # Step 2: We initialize the weights on the samples and the weak classifiers. - sample_weights = np.ones(n_samples) / n_samples - alpha_weights = np.zeros(n_voters) - self.losses = [] - - # Step 3: We loop for each iteration. - self.collected_weight_vectors_ = [] - for t in range(self.n_iterations): - - # Step 4: We find the classifier that maximizes the success, - # weighted by the sample weights. - classifier_successes = np.dot(classification_matrix.T, - sample_weights * y_neg) - - best_voter_index = np.argmax(classifier_successes) - success = classifier_successes[best_voter_index] - - if success >= 1.0: - logging.info("AdaBoost stopped : perfect classifier found!") - self.weights_ = np.zeros(n_voters) - self.weights_[best_voter_index] = 1.0 - return self - - # Step 5: We calculate the alpha_t parameter and update the alpha weights. - alpha = 0.5 * np.log((1.0 + success) / (1.0 - success)) - alpha_weights[best_voter_index] += alpha - - # logging.debug("{} : {}".format(t, str(alpha))) - - # Step 6: We update the sample weights. - sample_weights *= np.exp( - -1 * alpha * y_neg * classification_matrix[:, best_voter_index]) - - normalization_constant = sample_weights.sum() - sample_weights = sample_weights / normalization_constant - - # We collect iteration information for later evaluation. - if self.iterations_to_collect_as_hyperparameters: - weights = alpha_weights / np.sum(alpha_weights) - self.collected_weight_vectors_.append(weights.copy()) - - loss = zero_one_loss.score(y_neg, np.sign(np.sum( - np.multiply(classification_matrix, - alpha_weights / np.sum(alpha_weights)), axis=1))) - self.losses.append(loss) - - if self.callback_function is not None: - self.callback_function(t, alpha_weights, normalization_constant, - self.estimators_generator, self.weights_) - - self.weights_ = alpha_weights / np.sum(alpha_weights) - self.losses = np.array(self.losses) - self.learner_info_ = { - 'n_nonzero_weights': np.sum(self.weights_ > 1e-12)} - - return self - - def predict(self, X): - """Predict inputs using the fit classifier. - - Parameters - ---------- - X : ndarray of shape (n_samples, n_features) - The data to classify. - - Returns - ------- - predictions : ndarray of shape (n_samples, ) - The estimated labels. - - """ - check_is_fitted(self, 'weights_') - classification_matrix = self._binary_classification_matrix(X) - - if self.iterations_to_collect_as_hyperparameters: - self.test_preds = [] - for weight_vector in self.collected_weight_vectors_: - preds = np.sum(np.multiply(classification_matrix, - weight_vector), axis=1) - self.test_preds.append(change_label_to_zero(np.sign(preds))) - self.test_preds = np.array(self.test_preds) - margins = np.squeeze( - np.asarray(np.dot(classification_matrix, self.weights_))) - return change_label_to_zero( - np.array([int(x) for x in np.sign(margins)])) - - -class AdaboostGraalpy(AdaBoostGP, BaseMonoviewClassifier): - """AdaboostGraalpy - - Parameters - ---------- - random_state : int seed, RandomState instance, or None (default=None) - The seed of the pseudo random number generator to use when - shuffling the data. - - n_iterations : in number of iterations (default : 200) - - n_stumps : int (default 1) - - kwargs : others arguments - - - Attributes - ---------- - param_names : - - distribs : - - weird_strings : - - n_stumps : - - nbCores : - - """ - def __init__(self, random_state=None, n_iterations=200, n_stumps=1, - **kwargs): - - super(AdaboostGraalpy, self).__init__( - n_iterations=n_iterations, - n_stumps=n_stumps - ) - self.param_names = ["n_iterations", "n_stumps", "random_state"] - self.distribs = [CustomRandint(low=1, high=500), [n_stumps], - [random_state]] - self.classed_params = [] - self.weird_strings = {} - self.n_stumps = n_stumps - if "nbCores" not in kwargs: - self.nbCores = 1 - else: - self.nbCores = kwargs["nbCores"] - - # def canProbas(self): - # """ - # Used to know if the classifier can return label probabilities - # - # Returns - # ------- - # True in any case - # """ - # return True - - def getInterpret(self, directory, y_test): - """ - - Parameters - ---------- - directory : - - y_test : - - Returns - ------- - retur string of interpret - """ - np.savetxt(directory + "train_metrics.csv", self.losses, delimiter=',') - np.savetxt(directory + "y_test_step.csv", self.test_preds, - delimiter=',') - step_metrics = [] - for step_index in range(self.test_preds.shape[0] - 1): - step_metrics.append(zero_one_loss.score(y_test, - self.test_preds[step_index, - :])) - step_metrics = np.array(step_metrics) - np.savetxt(directory + "step_test_metrics.csv", step_metrics, - delimiter=',') - return "" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"n_iterations": args.AdG_n_iter, -# "n_stumps": args.AdG_stumps, } -# return kwargsDict - - -def paramsToSet(nIter, random_state): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"n_iterations": random_state.randint(1, 500), }) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boost.py deleted file mode 100644 index fc9b44ed7d608d61b084d1915a6ee6084dbea05a..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/cq_boost.py +++ /dev/null @@ -1,76 +0,0 @@ -import numpy as np - -from ..monoview.additions.BoostUtils import getInterpretBase -from ..monoview.additions.CQBoostUtils import ColumnGenerationClassifier -from ..monoview.monoview_utils import CustomUniform, CustomRandint, \ - BaseMonoviewClassifier - -classifier_class_name = "CQBoost" - -class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): - - def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, - n_max_iterations=None, estimators_generator="Stumps", - max_depth=1, **kwargs): - super(CQBoost, self).__init__( - random_state=random_state, - mu=mu, - epsilon=epsilon, - estimators_generator=estimators_generator, - n_max_iterations=n_max_iterations, - max_depth=max_depth - ) - self.param_names = ["mu", "epsilon", "n_stumps", "random_state", - "n_max_iterations", "estimators_generator", - "max_depth"] - self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), - CustomRandint(low=1, high=15, multiplier="e-"), - [n_stumps], [random_state], [n_max_iterations], - ["Stumps", "Trees"], CustomRandint(low=1, high=5)] - self.classed_params = [] - self.weird_strings = {} - self.n_stumps = n_stumps - if "nbCores" not in kwargs: - self.nbCores = 1 - else: - self.nbCores = kwargs["nbCores"] - - # def canProbas(self): - # """Used to know if the classifier can return label probabilities""" - # return False - - def getInterpret(self, directory, y_test): - np.savetxt(directory + "train_metrics.csv", self.train_metrics, - delimiter=',') - np.savetxt(directory + "c_bounds.csv", self.c_bounds, - delimiter=',') - np.savetxt(directory + "y_test_step.csv", self.step_decisions, - delimiter=',') - step_metrics = [] - for step_index in range(self.step_decisions.shape[1] - 1): - step_metrics.append(self.plotted_metric.score(y_test, - self.step_decisions[:, - step_index])) - step_metrics = np.array(step_metrics) - np.savetxt(directory + "step_test_metrics.csv", step_metrics, - delimiter=',') - return getInterpretBase(self, directory, "CQBoost", self.weights_, - y_test) - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"mu": args.CQB_mu, -# "epsilon": args.CQB_epsilon, -# "n_stumps": args.CQB_stumps, -# "n_max_iterations": args.CQB_n_iter} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"mu": 10 ** -randomState.uniform(0.5, 1.5), - "epsilon": 10 ** -randomState.randint(1, 15)}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq.py deleted file mode 100644 index ec0bd7e7c56b46720afd2e759cec7a65957d6acd..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq.py +++ /dev/null @@ -1,652 +0,0 @@ -from ..monoview.monoview_utils import CustomUniform, BaseMonoviewClassifier - -#### Algorithm code #### - -# -*- coding:utf-8 -*- -""" MinCq learning algorithm - -Related papers: -[1] From PAC-Bayes Bounds to Quadratic Programs for Majority Votes (Laviolette et al., 2011) -[2] Risk Bounds for the Majority Vote: From a PAC-Bayesian Analysis to a Learning Algorithm (Germain et al., 2014) - -http://graal.ift.ulaval.ca/majorityvote/ -""" -__author__ = 'Jean-Francis Roy' -import time -import logging -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, \ - polynomial_kernel -# from qp import QP -from ..monoview.additions.BoostUtils import ConvexProgram as QP - - -classifier_class_name = "MinCQ" - -# from majority_vote import MajorityVote -# from voter import StumpsVotersGenerator, KernelVotersGenerator - -class MinCqLearner(BaseEstimator, ClassifierMixin): - """ - MinCq algorithm learner. See [1, 2] - - Parameters - ---------- - mu : float - The fixed value of the first moment of the margin. - - voters_type : string, optional (default='kernel') - Specifies the type of voters. - It must be one of 'kernel', 'stumps' or 'manual'. If 'manual' is specified, the voters have to be manually set - using the "voters" parameter of the fit function. - - n_stumps_per_attribute : int, optional (default=10) - Specifies the amount of decision stumps per attribute. - It is only significant with 'stumps' voters_type. - - kernel : string, optional (default='rbf') - Specifies the kernel type to be used in the algorithm. - It must be one of 'linear', 'poly', 'rbf'. - - degree : int, optional (default=3) - Degree of the polynomial kernel function ('poly'). - Ignored by all other kernels. - - gamma : float, optional (default=0.0) - Kernel coefficient for 'rbf' and 'poly'. - If gamma is 0.0 then 1/n_features will be used instead. - """ - - def __init__(self, mu, voters_type, n_stumps_per_attribute=10, kernel='rbf', - degree=3, gamma=0.0, self_complemented=True): - assert 0 < mu <= 1, "MinCqLearner: mu parameter must be in (0, 1]" - self.mu = mu - self.voters_type = voters_type - self.n_stumps_per_attribute = n_stumps_per_attribute - self.kernel = kernel - self.degree = degree - self.gamma = gamma - self.log = False - self.self_complemented = self_complemented - - self.majority_vote = None - self.qp = None - - def fit(self, X, y, voters=None): - """ Learn a majority vote weights using MinCq. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Training data - - y_reworked : ndarray, shape=(n_samples,), optional - Training labels - - voters : shape=(n_voters,), optional - A priori generated voters - """ - # Preparation of the majority vote, using a voter generator that depends on class attributes - if (np.unique(y) != [-1, 1]).any(): - y_reworked = np.copy(y) - y_reworked[np.where(y_reworked == 0)] = -1 - else: - y_reworked = y - - assert self.voters_type in ['stumps', 'kernel', - 'manual'], "MinCqLearner: voters_type must be 'stumps', 'kernel' or 'manual'" - - if self.voters_type == 'manual': - if voters is None: - logging.error( - "Manually set voters is True, but no voters have been set.") - return self - - else: - voters_generator = None - - if self.voters_type == 'stumps': - assert self.n_stumps_per_attribute >= 1, 'MinCqLearner: n_stumps_per_attribute must be positive' - voters_generator = StumpsVotersGenerator( - self.n_stumps_per_attribute) - - elif self.voters_type == 'kernel': - assert self.kernel in ['linear', 'poly', - 'rbf'], "MinCqLearner: kernel must be 'linear', 'poly' or 'rbf'" - - gamma = self.gamma - if gamma == 0.0: - gamma = 1.0 / np.shape(X)[1] - - if self.kernel == 'linear': - voters_generator = KernelVotersGenerator(linear_kernel) - elif self.kernel == 'poly': - voters_generator = KernelVotersGenerator(polynomial_kernel, - degree=self.degree, - gamma=gamma) - elif self.kernel == 'rbf': - voters_generator = KernelVotersGenerator(rbf_kernel, - gamma=gamma) - - voters = voters_generator.generate(X, y_reworked, - self_complemented=self.self_complemented) - - if self.log: - logging.info("MinCq training started...") - logging.info("Training dataset shape: {}".format(str(np.shape(X)))) - logging.info("Number of voters: {}".format(len(voters))) - self.majority_vote = MajorityVote(voters) - n_base_voters = len(self.majority_vote.weights) - - # Preparation and resolution of the quadratic program - - if self.log: - logging.info("Preparing QP...") - self._prepare_qp(X, y_reworked) - beg = time.time() - try: - if self.log: - logging.info("Solving QP...") - solver_weights = self.qp.solve() - - # Conversion of the weights of the n first voters to weights on the implicit 2n voters. - # See Section 7.1 of [2] for an explanation. - self.majority_vote.weights = np.array( - [2 * q - 1.0 / n_base_voters for q in solver_weights]) - if self.log: - logging.info( - "First moment of the margin on the training set: {:.4f}".format( - np.mean(y_reworked * self.majority_vote.margin(X)))) - - except Exception as e: - logging.error( - "{}: Error while solving the quadratic program: {}.".format( - str(self), str(e))) - self.majority_vote = None - self.cbound_train = self.majority_vote.cbound_value(X, y_reworked) - end=time.time() - self.train_time=end-beg - return self - - def predict(self, X, save_data=True): - """ Using previously learned majority vote weights, predict the labels of new data points. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Samples to predict - - Returns - ------- - predictions : ndarray, shape=(n_samples,) - The predicted labels - """ - if self.log: - logging.info("Predicting...") - if self.majority_vote is None: - logging.error( - "{}: Error while predicting: MinCq has not been fit or fitting has failed. Will output invalid labels".format( - str(self))) - return np.zeros((len(X),)) - if save_data: - self.x_test = X - - vote = self.majority_vote.vote(X) - vote[np.where(vote == -1)] = 0 - return vote - - def predict_proba(self, X): - """ Using previously learned majority vote weights, predict the labels of new data points with a confidence - level. The confidence level is the margin of the majority vote. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Samples to predict - - Returns - ------- - predictions : ndarray, shape=(n_samples,) - The predicted labels - """ - probabilities = np.zeros((np.shape(X)[0], 2)) - - # The margin is between -1 and 1, we rescale it to be between 0 and 1. - margins = self.majority_vote.margin(X) - margins += 1 - margins /= 2 - - # Then, the conficence for class +1 is set to the margin, and confidence for class -1 is set to 1 - margin. - probabilities[:, 1] = margins - probabilities[:, 0] = 1 - margins - return probabilities - - def _prepare_qp(self, X, y): - """ Prepare MinCq's quadratic program. See Program 1 of [2] for more details on its content. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Training data - - y : ndarray, shape=(n_samples,) - Training labels - """ - - self.qp = QP() - - n_features = len(self.majority_vote.voters) - n_examples = len(X) - classification_matrix = self.majority_vote.classification_matrix(X) - - # Objective function. - self.qp.quadratic_func = 2.0 / n_examples * classification_matrix.T.dot( - classification_matrix) - self.qp.linear_func = np.matrix( - np.matrix(-1.0 * np.mean(self.qp.quadratic_func / 2.0, axis=1))).T - - # First moment of the margin fixed to mu. - a_matrix = 2.0 / n_examples * y.T.dot(classification_matrix) - self.qp.add_equality_constraints(a_matrix, - self.mu + 1.0 / 2 * np.mean(a_matrix)) - - # Lower and upper bounds on the variables - self.qp.add_lower_bound(0.0) - self.qp.add_upper_bound(1.0 / n_features) - - -class MajorityVote(object): - """ A Majority Vote of real-valued functions. - - Parameters - ---------- - voters : ndarray of Voter instances - The voters of the majority vote. Each voter must take an example as an input, and output a real value in [-1,1]. - - weights : ndarray, optional (default: uniform distribution) - The weights associated to each voter. - """ - - def __init__(self, voters, weights=None): - self._voters = np.array(voters) - - if weights is not None: - assert (len(voters) == len(weights)) - self._weights = np.array(weights) - else: - self._weights = np.array([1.0 / len(voters)] * len(voters)) - - def vote(self, X): - """ Returns the vote of the Majority Vote on a list of samples. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Input data to classify. - - Returns - ------- - votes : ndarray, shape=(n_samples,), where each value is either -1 or 1 - The vote of the majority vote for each sample. - """ - margins = self.margin(X) - return np.array([int(x) for x in np.sign(margins)]) - - def margin(self, X): - """ Returns the margin of the Majority Vote on a list of samples. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Input data on which to calculate the margin. - - Returns - ------- - margins : ndarray, shape=(n_samples,), where each value is either -1 or 1 - The margin of the majority vote for each sample. - """ - classification_matrix = self.classification_matrix(X) - return np.squeeze( - np.asarray(np.dot(classification_matrix, self.weights))) - - def classification_matrix(self, X): - """ Returns the classification matrix of the majority vote. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Input data to classify - - Returns - ------- - classification_matrix : ndrray, shape=(n_samples, n_voters) - A matrix that contains the value output by each voter, for each sample. - - """ - return np.matrix([v.vote(X) for v in self._voters]).T - - @property - def weights(self): - return self._weights - - @weights.setter - def weights(self, weights): - self._weights = np.array(weights) - - @property - def voters(self): - return self._voters - - @voters.setter - def voters(self, voters): - self._voters = np.array(voters) - - def cbound_value(self, X, y): - """ Returns the value of the C-bound, evaluated on given examples. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_feature) - Input data - y : ndarray, shape=(n_samples, ) - Input labels, where each label is either -1 or 1. - """ - assert np.all(np.in1d(y, [-1, - 1])), 'cbound_value: labels should be either -1 or 1' - - classification_matrix = self.classification_matrix(X) - first_moment = float( - 1.0 / len(y) * classification_matrix.dot(self.weights).dot(y)) - second_moment = float(1.0 / len(y) * self.weights.T.dot( - classification_matrix.T.dot(classification_matrix)).dot( - self.weights)) - - return 1 - (first_moment ** 2 / second_moment) - - -# -*- coding:utf-8 -*- -__author__ = "Jean-Francis Roy" - -import numpy as np - - -class Voter(object): - """ Base class for a voter (function X -> [-1, 1]), where X is an array of samples - """ - - def __init__(self): - pass - - def vote(self, X): - """ Returns the output of the voter, on a sample list X - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Input data to classify - - Returns - ------- - votes : ndarray, shape=(n_samples,) - The result the the voter function, for each sample - """ - raise NotImplementedError("Voter.vote: Not implemented.") - - -class BinaryKernelVoter(Voter): - """ A Binary Kernel Voter, which outputs the value of a kernel function whose first example is fixed a priori. - The sign of the output depends on the label (-1 or 1) of the sample on which the kernel voter is based - - Parameters - ---------- - x : ndarray, shape=(n_features,) - The base sample's description vector - - y : int, -1 or 1 - The label of the base sample. Determines if the voter thinks "negative" or "positive" - - kernel_function : function - The kernel function takes two samples and returns a similarity value. If the kernel has parameters, they should - be set using kwargs parameter - - kwargs : keyword arguments (optional) - Additional parameters for the kernel function - """ - - def __init__(self, x, y, kernel_function, **kwargs): - assert (y in {-1, 1}) - super(BinaryKernelVoter, self).__init__() - self._x = x - self._y = y - self._kernel_function = kernel_function - self._kernel_kwargs = kwargs - - def vote(self, X): - base_point_array = np.array([self._x]) - votes = self._y * self._kernel_function(base_point_array, X, - **self._kernel_kwargs) - votes = np.squeeze(np.asarray(votes)) - - return votes - - -class DecisionStumpVoter(Voter): - """ - Generic Attribute Threshold Binary Classifier - - Parameters - ---------- - attribute_index : int - The attribute to consider for the classification - - threshold : float - The threshold value for classification rule - - direction : int (-1 or 1) - Used to reverse classification decision - - Attributes - ---------- - - attribute_index : - threshold : - direction : - """ - - def __init__(self, attribute_index, threshold, direction=1): - super(DecisionStumpVoter, self).__init__() - self.attribute_index = attribute_index - self.threshold = threshold - self.direction = direction - - def vote(self, points): - return [((point[ - self.attribute_index] > self.threshold) * 2 - 1) * self.direction - for point in points] - - -class VotersGenerator(object): - """ Base class to create a set of voters using training samples - """ - - def generate(self, X, y=None, self_complemented=False): - """ Generates the voters using samples. - - Parameters - ---------- - X : ndarray, shape=(n_samples, n_features) - Input data on which to base the voters - - y : ndarray, shape=(n_samples,), optional - Input labels, usually determines the decision polarity of each voter - - self_complemented : bool - Determines if complement voters should be generated or not - - Returns - ------- - voters : ndarray - An array of voters - """ - raise NotImplementedError("VotersGenerator.generate: not implemented") - - -class StumpsVotersGenerator(VotersGenerator): - """ Decision Stumps Voters generator. - - Parameters - ---------- - n_stumps_per_attribute : int, (default=10) - Determines how many decision stumps will be created for each attribute. - """ - - def __init__(self, n_stumps_per_attribute=10): - self._n_stumps_per_attribute = n_stumps_per_attribute - - def _find_extremums(self, X, i): - mini = np.Infinity - maxi = -np.Infinity - for x in X: - if x[i] < mini: - mini = x[i] - if x[i] > maxi: - maxi = x[i] - return mini, maxi - - def generate(self, X, y=None, self_complemented=False, - only_complements=False): - """ - - Parameters - ---------- - X - y - self_complemented - only_complements - - Returns - ------- - - """ - voters = [] - if len(X) != 0: - for i in range(len(X[0])): - t = self._find_extremums(X, i) - inter = (t[1] - t[0]) / (self._n_stumps_per_attribute + 1) - - if inter != 0: - # If inter is zero, the attribute is useless as it has a constant value. We do not add stumps for - # this attribute. - for x in range(self._n_stumps_per_attribute): - - if not only_complements: - voters.append( - DecisionStumpVoter(i, t[0] + inter * (x + 1), - 1)) - - if self_complemented or only_complements: - voters.append( - DecisionStumpVoter(i, t[0] + inter * (x + 1), - -1)) - - return np.array(voters) - - -class KernelVotersGenerator(VotersGenerator): - """ Utility function to create binary kernel voters for each (x, y) sample. - - Parameters - ---------- - kernel_function : function - The kernel function takes two samples and returns a similarity value. If the kernel has parameters, they should - be set using kwargs parameter - - kwargs : keyword arguments (optional) - Additional parameters for the kernel function - """ - - def __init__(self, kernel_function, **kwargs): - self._kernel_function = kernel_function - self._kernel_kwargs = kwargs - - def generate(self, X, y=None, self_complemented=False, - only_complements=False): - if y is None: - y = np.array([1] * len(X)) - - voters = [] - - for point, label in zip(X, y): - if not only_complements: - voters.append( - BinaryKernelVoter(point, label, self._kernel_function, - **self._kernel_kwargs)) - - if self_complemented or only_complements: - voters.append( - BinaryKernelVoter(point, -1 * label, self._kernel_function, - **self._kernel_kwargs)) - - return np.array(voters) - - -class MinCQ(MinCqLearner, BaseMonoviewClassifier): - - def __init__(self, random_state=None, mu=0.01, self_complemented=True, - n_stumps_per_attribute=10, **kwargs): - super(MinCQ, self).__init__(mu=mu, - voters_type='stumps', - n_stumps_per_attribute=n_stumps_per_attribute, - self_complemented=self_complemented - ) - self.param_names = ["mu", "n_stumps_per_attribute", "random_state"] - self.distribs = [CustomUniform(loc=0.5, state=2.0, multiplier="e-"), - [n_stumps_per_attribute], [random_state]] - self.random_state = random_state - self.classed_params = [] - self.weird_strings = {} - if "nbCores" not in kwargs: - self.nbCores = 1 - else: - self.nbCores = kwargs["nbCores"] - - # def canProbas(self): - # """Used to know if the classifier can return label probabilities""" - # return True - - def set_params(self, **params): - self.mu = params["mu"] - self.random_state = params["random_state"] - self.n_stumps_per_attribute = params["n_stumps_per_attribute"] - return self - - def get_params(self, deep=True): - return {"random_state": self.random_state, "mu": self.mu, - "n_stumps_per_attribute": self.n_stumps_per_attribute} - - def getInterpret(self, directory, y_test): - interpret_string = "Train C_bound value : " + str(self.cbound_train) - y_rework = np.copy(y_test) - y_rework[np.where(y_rework == 0)] = -1 - interpret_string += "\n Test c_bound value : " + str( - self.majority_vote.cbound_value(self.x_test, y_rework)) - np.savetxt(directory+"times.csv", np.array([self.train_time, 0])) - return interpret_string - - def get_name_for_fusion(self): - return "MCQ" - -# -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"mu": args.MCQ_mu, -# "n_stumps_per_attribute": args.MCQ_stumps} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py deleted file mode 100644 index 8355dffc1a47dda9290a6cd57bbede64890d3454..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py +++ /dev/null @@ -1,141 +0,0 @@ -import numpy as np - -from ..monoview.additions.BoostUtils import StumpsClassifiersGenerator -from ..monoview.additions.MinCQUtils import RegularizedBinaryMinCqClassifier -from ..monoview.monoview_utils import BaseMonoviewClassifier, CustomUniform - - -classifier_class_name = "MinCQGraalpy" - -class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): - """ - MinCQGraalpy extend of ``RegularizedBinaryMinCqClassifier `` - - Parameters - ---------- - random_state : int seed, RandomState instance, or None (default=None) - The seed of the pseudo random number generator to use when - shuffling the data. - - mu : float, (default: 0.01) - - self_complemented : bool (default : True) - - n_stumps_per_attribute : (default: =1 - - kwargs : others arguments - - - Attributes - ---------- - param_names - - distribs - - n_stumps_per_attribute - - classed_params - - weird_strings - - nbCores : number of cores - - """ - def __init__(self, random_state=None, mu=0.01, self_complemented=True, - n_stumps_per_attribute=1, **kwargs): - super(MinCQGraalpy, self).__init__(mu=mu, - estimators_generator=StumpsClassifiersGenerator( - n_stumps_per_attribute=n_stumps_per_attribute, - self_complemented=self_complemented), - ) - self.param_names = ["mu", "n_stumps_per_attribute", "random_state"] - self.distribs = [CustomUniform(loc=0.05, state=2.0, multiplier="e-"), - [n_stumps_per_attribute], [random_state]] - self.n_stumps_per_attribute = n_stumps_per_attribute - self.classed_params = [] - self.weird_strings = {} - self.random_state = random_state - if "nbCores" not in kwargs: - self.nbCores = 1 - else: - self.nbCores = kwargs["nbCores"] - - # def canProbas(self): - # """ - # Used to know if the classifier can return label probabilities - # Returns - # ------- - # False - # """ - # return False - - def set_params(self, **params): - """ - set parameter 'self.mu', 'self.random_state - 'self.n_stumps_per_attribute - - Parameters - ---------- - params - - Returns - ------- - self : object - Returns self. - """ - self.mu = params["mu"] - self.random_state = params["random_state"] - self.n_stumps_per_attribute = params["n_stumps_per_attribute"] - return self - - def get_params(self, deep=True): - """ - - Parameters - ---------- - deep : bool (default : true) not used - - Returns - ------- - dictianary with "random_state", "mu", "n_stumps_per_attribute" - """ - return {"random_state": self.random_state, "mu": self.mu, - "n_stumps_per_attribute": self.n_stumps_per_attribute} - - def getInterpret(self, directory, y_test): - """ - - Parameters - ---------- - directory - y_test - - Returns - ------- - string of interpret_string - """ - interpret_string = "Cbound on train :" + str(self.train_cbound) - np.savetxt(directory + "times.csv", np.array([self.train_time, 0])) - # interpret_string += "Train C_bound value : "+str(self.cbound_train) - # y_rework = np.copy(y_test) - # y_rework[np.where(y_rework==0)] = -1 - # interpret_string += "\n Test c_bound value : "+str(self.majority_vote.cbound_value(self.x_test, y_rework)) - return interpret_string - - def get_name_for_fusion(self): - return "MCG" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"mu": args.MCG_mu, -# "n_stumps_per_attribute": args.MCG_stumps} -# return kwargsDict - - -def paramsToSet(nIter, random_state): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy_tree.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy_tree.py deleted file mode 100644 index ac7a409d82e0b0698f1af913b4c1f2f41b9114d6..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy_tree.py +++ /dev/null @@ -1,152 +0,0 @@ -import numpy as np - -from ..monoview.additions.BoostUtils import TreeClassifiersGenerator -from ..monoview.additions.MinCQUtils import RegularizedBinaryMinCqClassifier -from ..monoview.monoview_utils import BaseMonoviewClassifier, CustomUniform - -classifier_class_name = "MinCQGraalpyTree" - -class MinCQGraalpyTree(RegularizedBinaryMinCqClassifier, - BaseMonoviewClassifier): - """ - - Parameters - ---------- - random_state : - - mu : (default : 0.01) - - self_complemented : ( default : True) - - n_stumps_per_attribute : int ( default : 1) - max_depth : - - kwargs : others parameters - - - Attributes - ---------- - param_name : - - distribs : - - classed_params : - - n_stumps_per_attribute : int - - weird_strings : - - max_depth : - - random_state : - - nbCores : - """ - def __init__(self, random_state=None, mu=0.01, self_complemented=True, - n_stumps_per_attribute=1, max_depth=2, **kwargs): - - super(MinCQGraalpyTree, self).__init__(mu=mu, - estimators_generator=TreeClassifiersGenerator( - n_trees=n_stumps_per_attribute, - max_depth=max_depth, - self_complemented=self_complemented), - ) - self.param_names = ["mu", "n_stumps_per_attribute", "random_state", - "max_depth"] - self.distribs = [CustomUniform(loc=0.05, state=2.0, multiplier="e-"), - [n_stumps_per_attribute], [random_state], [max_depth]] - self.n_stumps_per_attribute = n_stumps_per_attribute - self.classed_params = [] - self.weird_strings = {} - self.max_depth = max_depth - self.random_state = random_state - if "nbCores" not in kwargs: - self.nbCores = 1 - else: - self.nbCores = kwargs["nbCores"] - - # def canProbas(self): - # """ - # Used to know if the classifier can return label probabilities - # - # Returns - # ------- - # True - # """ - # return True - - def set_params(self, **params): - """ - set parameter in the input dictionary - - Parameters - ---------- - params : dict parameter to set - - Returns - ------- - self : object - Returns self. - """ - self.mu = params["mu"] - self.random_state = params["random_state"] - self.n_stumps_per_attribute = params["n_stumps_per_attribute"] - self.max_depth = params["max_depth"] - return self - - def get_params(self, deep=True): - """ - get parameter - - Parameters - ---------- - deep : (boolean (default : True) not used - - Returns - ------- - dictionary of parameter as key and its values - """ - return {"random_state": self.random_state, "mu": self.mu, - "n_stumps_per_attribute": self.n_stumps_per_attribute, - "max_depth": self.max_depth} - - def getInterpret(self, directory, y_test): - """ - - Parameters - ---------- - directory : - - y_test : - - - Returns - ------- - string for interpretation interpret_string - """ - interpret_string = "Cbound on train :" + str(self.train_cbound) - np.savetxt(directory + "times.csv", np.array([self.train_time, 0])) - # interpret_string += "Train C_bound value : "+str(self.cbound_train) - # y_rework = np.copy(y_test) - # y_rework[np.where(y_rework==0)] = -1 - # interpret_string += "\n Test c_bound value : "+str(self.majority_vote.cbound_value(self.x_test, y_rework)) - return interpret_string - - def get_name_for_fusion(self): - return "MCG" - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"mu": args.MCGT_mu, -# "n_stumps_per_attribute": args.MCGT_trees, -# "max_depth": args.MCGT_max_depth} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py deleted file mode 100644 index eb829fb97321b974951aa0802661050f3af59c54..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py +++ /dev/null @@ -1,125 +0,0 @@ -from pyscm.scm import SetCoveringMachineClassifier as scm - -from ..monoview.monoview_utils import CustomRandint, CustomUniform, \ - BaseMonoviewClassifier - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -# class DecisionStumpSCMNew(scm, BaseEstimator, ClassifierMixin): -# """docstring for SCM -# A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like -# CV, gridsearch, and so on ...""" -# -# def __init__(self, model_type='conjunction', p=0.1, max_rules=10, random_state=42): -# super(DecisionStumpSCMNew, self).__init__(model_type=model_type, max_rules=max_rules, p=p, random_state=random_state) -# # self.model_type = model_type -# # self.p = p -# # self.max_rules = max_rules -# # self.random_state = random_state -# # self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state) -# -# # def fit(self, X, y): -# # print(self.clf.model_type) -# # self.clf.fit(X=X, y=y) -# # -# # def predict(self, X): -# # return self.clf.predict(X) -# # -# # def set_params(self, **params): -# # for key, value in iteritems(params): -# # if key == 'p': -# # self.p = value -# # if key == 'model_type': -# # self.model_type = value -# # if key == 'max_rules': -# # self.max_rules = value -# -# # def get_stats(self): -# # return {"Binary_attributes": self.clf.model_.rules} - - -classifier_class_name = "SCM" - -class SCM(scm, BaseMonoviewClassifier): - """ - SCM Classifier - Parameters - ---------- - random_state (default : None) - model_type : string (default: "conjunction") - max_rules : int number maximum of rules (default : 10) - p : float value(default : 0.1 ) - - kwarg : others arguments - - Attributes - ---------- - param_names - - distribs - - classed_params - - weird_strings - - """ - - def __init__(self, random_state=None, model_type="conjunction", - max_rules=10, p=0.1, **kwargs): - """ - - Parameters - ---------- - random_state - model_type - max_rules - p - kwargs - """ - super(SCM, self).__init__( - random_state=random_state, - model_type=model_type, - max_rules=max_rules, - p=p - ) - self.param_names = ["model_type", "max_rules", "p", "random_state"] - self.distribs = [["conjunction", "disjunction"], - CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [random_state]] - self.classed_params = [] - self.weird_strings = {} - - # def canProbas(self): - # """ - # Used to know if the classifier can return label probabilities - # - # Returns - # ------- - # return False in any case - # """ - # return False - - def getInterpret(self, directory, y_test): - interpretString = "Model used : " + str(self.model_) - return interpretString - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"model_type": args.SCM_model_type, -# "p": args.SCM_p, -# "max_rules": args.SCM_max_rules} -# return kwargsDict - - -def paramsToSet(nIter, random_state): - paramsSet = [] - for _ in range(nIter): - paramsSet.append( - {"model_type": random_state.choice(["conjunction", "disjunction"]), - "max_rules": random_state.randint(1, 15), - "p": random_state.random_sample()}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen.py deleted file mode 100644 index 4b7ea990f2f5fd0b3d09acc14952e98770509fd7..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm_pregen.py +++ /dev/null @@ -1,203 +0,0 @@ -import os - -import numpy as np -from pyscm.scm import SetCoveringMachineClassifier as scm - -from ..monoview.additions.PregenUtils import PregenClassifier -from ..monoview.monoview_utils import CustomRandint, CustomUniform, \ - BaseMonoviewClassifier - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - -classifier_class_name = "SCMPregen" - -class SCMPregen(BaseMonoviewClassifier, PregenClassifier, scm): - """ - - Parameters - ---------- - random_state : int seed, RandomState instance, or None (default=None) - The seed of the pseudo random number generator to use when - shuffling the data. - - model_type - max_rules - p - n_stumps - self_complemented - estimators_generator - max_depth - kwargs - - Attributes - ---------- - param_names - - distribs - classed_params - weird_strings - self_complemented - n_stumps - estimators_generator - max_depth - """ - def __init__(self, random_state=None, model_type="conjunction", - max_rules=10, p=0.1, n_stumps=10, self_complemented=True, - estimators_generator="Stumps", max_depth=1, **kwargs): - super(SCMPregen, self).__init__( - random_state=random_state, - model_type=model_type, - max_rules=max_rules, - p=p - ) - self.param_names = ["model_type", "max_rules", "p", "n_stumps", - "random_state", "estimators_generator", "max_depth"] - self.distribs = [["conjunction", "disjunction"], - CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [n_stumps], - [random_state], ["Stumps", "Tree"], - CustomRandint(low=1, high=5)] - self.classed_params = [] - self.weird_strings = {} - self.self_complemented = self_complemented - self.n_stumps = n_stumps - self.estimators_generator = estimators_generator - self.max_depth=1 - - def get_params(self, deep=True): - """ - - Parameters - ---------- - deep : boolean (default : True) not used - - Returns - ------- - parameters dictionary - """ - params = super(SCMPregen, self).get_params(deep) - params["estimators_generator"] = self.estimators_generator - params["max_depth"] = self.max_depth - params["n_stumps"] = self.n_stumps - return params - - def fit(self, X, y, tiebreaker=None, iteration_callback=None, - **fit_params): - """ - fit function - - Parameters - ---------- - X {array-like, sparse matrix}, shape (n_samples, n_features) - For kernel="precomputed", the expected shape of X is - (n_samples_test, n_samples_train). - - y : { array-like, shape (n_samples,) - Target values class labels in classification - - tiebreaker - - iteration_callback : (default : None) - - fit_params : others parameters - - Returns - ------- - self : object - Returns self. - """ - pregen_X, _ = self.pregen_voters(X, y) - list_files = os.listdir(".") - a = int(self.random_state.randint(0, 10000)) - if "pregen_x" + str(a) + ".csv" in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - while file_name in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - else: - file_name = "pregen_x" + str(a) + ".csv" - np.savetxt(file_name, pregen_X, delimiter=',') - place_holder = np.genfromtxt(file_name, delimiter=',') - os.remove(file_name) - super(SCMPregen, self).fit(place_holder, y, tiebreaker=tiebreaker, - iteration_callback=iteration_callback, - **fit_params) - return self - - def predict(self, X): - """ - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Training vectors, where n_samples is the number of samples - and n_features is the number of features. - For kernel="precomputed", the expected shape of X is - (n_samples, n_samples). - - Returns - ------- - y_pred : array, shape (n_samples,) - """ - pregen_X, _ = self.pregen_voters(X) - list_files = os.listdir(".") - a = int(self.random_state.randint(0, 10000)) - if "pregen_x" + str(a) + ".csv" in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - while file_name in list_files: - a = int(np.random.randint(0, 10000)) - file_name = "pregen_x" + str(a) + ".csv" - else: - file_name = "pregen_x" + str(a) + ".csv" - np.savetxt(file_name, pregen_X, delimiter=',') - place_holder = np.genfromtxt(file_name, delimiter=',') - os.remove(file_name) - return self.classes_[self.model_.predict(place_holder)] - - # def canProbas(self): - # """ - # Used to know if the classifier can return label probabilities - # Returns - # ------- - # False in any case - # """ - # - # return False - - def getInterpret(self, directory, y_test): - """ - - Parameters - ---------- - directory - y_test - - Returns - ------- - interpret_string string of interpretation - """ - interpret_string = "Model used : " + str(self.model_) - return interpret_string - - -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"model_type": args.SCP_model_type, -# "p": args.SCP_p, -# "max_rules": args.SCP_max_rules, -# "n_stumps": args.SCP_stumps} -# return kwargsDict - - -def paramsToSet(nIter, randomState): - paramsSet = [] - for _ in range(nIter): - paramsSet.append( - {"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) - return paramsSet diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/__init__.py deleted file mode 100644 index dc8665a06cb54657c49364482cfdcdbc046ca244..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import fat_late_fusion, analyze_results \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/analyze_results.py deleted file mode 100644 index 6e58780dc111ceec257df0ee15b489adf174077e..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/analyze_results.py +++ /dev/null @@ -1,21 +0,0 @@ -from ...multiview import analyze_results - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -def execute(classifier, trainLabels, - testLabels, DATASET, - classificationKWARGS, classification_indices, - labels_dictionary, views, nbCores, times, - name, KFolds, - hyper_param_search, nIter, metrics, - views_indices, randomState, labels, classifierModule): - return analyze_results.execute(classifier, trainLabels, - testLabels, DATASET, - classificationKWARGS, classificationIndices, - labels_dictionary, views, nbCores, times, - name, KFolds, - hyper_param_search, nIter, metrics, - views_indices, randomState, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py deleted file mode 100644 index b93e79a4fc5713eb9adc9e363be949eac89e35f6..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py +++ /dev/null @@ -1,82 +0,0 @@ -import numpy as np - -from ...utils.multiclass import isBiclass, genMulticlassMonoviewDecision - - -def genName(config): - return "fat_late_fusion" - - -def getBenchmark(benchmark, args=None): - benchmark["multiview"]["fat_late_fusion"] = ["take_everything"] - return benchmark - - -def getArgs(args, benchmark, views, views_indices, randomState, directory, resultsMonoview, classificationIndices): - argumentsList = [] - multiclass_preds = [monoviewResult.y_test_multiclass_pred for monoviewResult in resultsMonoview] - if isBiclass(multiclass_preds): - monoviewDecisions = np.array([monoviewResult.full_labels_pred for monoviewResult in resultsMonoview]) - else: - monoviewDecisions = np.array([genMulticlassMonoviewDecision(monoviewResult, classificationIndices) for monoviewResult in resultsMonoview]) - if len(args.FLF_weights) == 0: - weights = [1.0 for _ in range(monoviewDecisions.shape[0])] - else: - weights = args.FLF_weights - arguments = {"CL_type": "fat_late_fusion", - "views": views, - "NB_VIEW": len(resultsMonoview), - "views_indices": range(len(resultsMonoview)), - "NB_CLASS": len(args.CL_classes), - "LABELS_NAMES": args.CL_classes, - "FatLateFusionKWARGS": { - "monoviewDecisions": monoviewDecisions, - "weights": weights - } - } - argumentsList.append(arguments) - return argumentsList - - -def genParamsSets(classificationKWARGS, randomState, nIter=1): - """Used to generate parameters sets for the random hyper parameters optimization function""" - nbMonoviewClassifiers = len(classificationKWARGS["monoviewDecisions"]) - weights = [randomState.random_sample(nbMonoviewClassifiers) for _ in range(nIter)] - nomralizedWeights = [[weightVector/np.sum(weightVector)] for weightVector in weights] - return nomralizedWeights - - -class FatLateFusionClass: - - def __init__(self, randomState, NB_CORES=1, **kwargs): - if kwargs["weights"] == []: - self.weights = [1.0/len(["monoviewDecisions"]) for _ in range(len(["monoviewDecisions"]))] - else: - self.weights = np.array(kwargs["weights"])/np.sum(np.array(kwargs["weights"])) - self.monoviewDecisions = kwargs["monoviewDecisions"] - - def setParams(self, paramsSet): - self.weights = paramsSet[0] - - def fit_hdf5(self, DATASET, labels, trainIndices=None, views_indices=None, metric=["f1_score", None]): - pass - - def predict_hdf5(self, DATASET, usedIndices=None, views_indices=None): - if usedIndices is None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - votes = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbClass"]), dtype=float) - for usedIndex, exampleIndex in enumerate(usedIndices): - for monoviewDecisionIndex, monoviewDecision in enumerate(self.monoviewDecisions): - votes[usedIndex, monoviewDecision[exampleIndex]] += self.weights[monoviewDecisionIndex] - predictedLabels = np.argmax(votes, axis=1) - return predictedLabels - - def predict_probas_hdf5(self, DATASET, usedIndices=None): - pass - - def getConfigString(self, classificationKWARGS): - return "weights : "+", ".join(map(str, list(self.weights))) - - def getSpecificAnalysis(self, classificationKWARGS): - stringAnalysis = '' - return stringAnalysis diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/__init__.py deleted file mode 100644 index fce28aa3a7727ea6998ab5f0f2e2b61f31ada922..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import fat_scm_late_fusion, analyze_results \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/analyze_results.py deleted file mode 100644 index d5fcd8a976689cd4aeac84bdbc9a9a03c3b95224..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/analyze_results.py +++ /dev/null @@ -1,21 +0,0 @@ -from ...multiview import analyze_results - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -def execute(classifier, trainLabels, - testLabels, DATASET, - classificationKWARGS, classification_indices, - labels_dictionary, views, nbCores, times, - name, KFolds, - hyper_param_search, nIter, metrics, - views_indices, random_state, labels, classifierModule): - return analyze_results.execute(classifier, trainLabels, - testLabels, DATASET, - classificationKWARGS, classification_indices, - labels_dictionary, views, nbCores, times, - name, KFolds, - hyper_param_search, nIter, metrics, - views_indices, random_state, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/fat_scm_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/fat_scm_late_fusion.py deleted file mode 100644 index 34d3e982fed33d263447ce8a6e745b426f9b4768..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_scm_late_fusion/fat_scm_late_fusion.py +++ /dev/null @@ -1,132 +0,0 @@ -import numpy as np -from pyscm.scm import SetCoveringMachineClassifier as scm -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.externals.six import iteritems - - -from ...utils.multiclass import isBiclass, genMulticlassMonoviewDecision - -def genName(config): - return "fat_scm_late_fusion" - - -def getBenchmark(benchmark, args=None): - benchmark["multiview"]["fat_scm_late_fusion"] = ["take_everything"] - return benchmark - - - -def getArgs(args, benchmark, views, views_indices, random_state, directory, resultsMonoview, classificationIndices): - argumentsList = [] - multiclass_preds = [monoviewResult.y_test_multiclass_pred for monoviewResult in resultsMonoview] - if isBiclass(multiclass_preds): - monoviewDecisions = np.array([monoviewResult.full_labels_pred for monoviewResult in resultsMonoview]) - else: - monoviewDecisions = np.array([genMulticlassMonoviewDecision(monoviewResult, classification_indices) for monoviewResult in resultsMonoview]) - monoviewDecisions = np.transpose(monoviewDecisions) - #monoviewDecisions = np.transpose(np.array([monoviewResult[1][3] for monoviewResult in resultsMonoview])) - arguments = {"CL_type": "fat_scm_late_fusion", - "views": ["all"], - "NB_VIEW": len(resultsMonoview), - "views_indices": range(len(resultsMonoview)), - "NB_CLASS": len(args.CL_classes), - "LABELS_NAMES": args.CL_classes, - "FatSCMLateFusionKWARGS": { - "monoviewDecisions": monoviewDecisions, - "p": args.FSCMLF_p, - "max_attributes": args.FSCMLF_max_attributes, - "model":args.FSCMLF_model, - } - } - argumentsList.append(arguments) - return argumentsList - - -def genParamsSets(classificationKWARGS, random_state, nIter=1): - """Used to generate parameters sets for the random hyper parameters optimization function""" - paramsSets = [] - for _ in range(nIter): - max_attributes = random_state.randint(1, 20) - p = random_state.random_sample() - model = random_state.choice(["conjunction", "disjunction"]) - paramsSets.append([p, max_attributes, model]) - - return paramsSets - - -class FatSCMLateFusionClass: - - def __init__(self, random_state, NB_CORES=1, **kwargs): - if kwargs["p"]: - self.p = kwargs["p"] - else: - self.p = 0.5 - if kwargs["max_attributes"]: - self.max_attributes = kwargs["max_attributes"] - else: - self.max_attributes = 5 - if kwargs["model"]: - self.model = kwargs["model"] - else: - self.model = "conjunction" - self.monoviewDecisions = kwargs["monoviewDecisions"] - self.random_state = random_state - - def setParams(self, paramsSet): - self.p = paramsSet[0] - self.max_attributes = paramsSet[1] - self.model = paramsSet[2] - - def fit_hdf5(self, DATASET, labels, trainIndices=None, views_indices=None, metric=["f1_score", None]): - features = self.monoviewDecisions[trainIndices] - self.SCMClassifier = DecisionStumpSCMNew(p=self.p, max_rules=self.max_attributes, model_type=self.model, - random_state=self.random_state) - self.SCMClassifier.fit(features, labels[trainIndices].astype(int)) - - def predict_hdf5(self, DATASET, usedIndices=None, views_indices=None): - if usedIndices is None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - predictedLabels = self.SCMClassifier.predict(self.monoviewDecisions[usedIndices]) - return predictedLabels - - def predict_probas_hdf5(self, DATASET, usedIndices=None): - pass - - def getConfigString(self, classificationKWARGS): - return "p : "+str(self.p)+", max_aributes : "+str(self.max_attributes)+", model : "+self.model - - def getSpecificAnalysis(self, classificationKWARGS): - stringAnalysis = 'Rules used : ' + str(self.SCMClassifier.clf.model_) - return stringAnalysis - - -class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin): - """docstring for SCM - A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like - CV, gridsearch, and so on ...""" - - def __init__(self, model_type='conjunction', p=0.1, max_rules=10, random_state=42): - super(DecisionStumpSCMNew, self).__init__() - self.model_type = model_type - self.p = p - self.max_rules = max_rules - self.random_state = random_state - - def fit(self, X, y): - self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state) - self.clf.fit(X=X, y=y) - - def predict(self, X): - return self.clf.predict(X) - - def set_params(self, **params): - for key, value in iteritems(params): - if key == 'p': - self.p = value - if key == 'model_type': - self.model_type = value - if key == 'max_rules': - self.max_rules = value - - def get_stats(self): - return {"Binary_attributes": self.clf.model_.rules} diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/mumbo.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/mumbo.py deleted file mode 100644 index 508d2a94d6c78d86cea917e2ae9164fcec4a8d49..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/mumbo.py +++ /dev/null @@ -1,41 +0,0 @@ -from sklearn.tree import DecisionTreeClassifier - - -from multimodalboost.mumbo import MumboClassifier -from ..multiview.multiview_utils import BaseMultiviewClassifier, \ - get_examples_views_indices -from ..utils.hyper_parameter_search import CustomRandint - -classifier_class_name = "Mumbo" - -class Mumbo(BaseMultiviewClassifier, MumboClassifier): - - def __init__(self, base_estimator=None, - n_estimators=50, - random_state=None, - best_view_mode="edge"): - super().__init__(random_state) - super(BaseMultiviewClassifier, self).__init__(base_estimator=base_estimator, - n_estimators=n_estimators, - random_state=random_state, - best_view_mode=best_view_mode) - self.param_names = ["base_estimator", "n_estimators", "random_state", "best_view_mode"] - self.distribs = [[DecisionTreeClassifier(max_depth=1)], - CustomRandint(5,200), [random_state], ["edge", "error"]] - - def fit(self, X, y, train_indices=None, view_indices=None): - train_indices, view_indices = get_examples_views_indices(X, - train_indices, - view_indices) - numpy_X, view_limits = X.to_numpy_array(example_indices=train_indices, - view_indices=view_indices) - return super(Mumbo, self).fit(numpy_X, y[train_indices], - view_limits) - - def predict(self, X, example_indices=None, view_indices=None): - example_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) - numpy_X, view_limits = X.to_numpy_array(example_indices=example_indices, - view_indices=view_indices) - return super(Mumbo, self).predict(numpy_X) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/__init__.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/__init__.py deleted file mode 100644 index d6773304b2c117c67cdf8399b4840a4e54f76f03..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import analyze_results, pseudo_cq_fusion \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/analyze_results.py deleted file mode 100644 index 3823e68753d996524dd83c3475fb0fac8ee435e8..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/analyze_results.py +++ /dev/null @@ -1,21 +0,0 @@ -from ...multiview import analyze_results - -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -def execute(classifier, trainLabels, - testLabels, DATASET, - classificationKWARGS, classificationIndices, - labels_dictionary, views, nbCores, times, - name, KFolds, - hyper_param_search, nIter, metrics, - views_indices, randomState, labels, classifierModule): - return analyze_results.execute(classifier, trainLabels, - testLabels, DATASET, - classificationKWARGS, classificationIndices, - labels_dictionary, views, nbCores, times, - name, KFolds, - hyper_param_search, nIter, metrics, - views_indices, randomState, labels, classifierModule) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py deleted file mode 100644 index bfd219d329c368594f6eab0a466c7eb5a4d3d358..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/pseudo_cq_fusion/pseudo_cq_fusion.py +++ /dev/null @@ -1,41 +0,0 @@ -from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.additions import \ - diversity_utils -from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion_old import difficulty -from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion_old import doubleFault - - -def genName(config): - return "pseudo_cq_fusion" - - -def getBenchmark(benchmark, args=None): - benchmark["multiview"]["pseudo_cq_fusion"] = ["take_everything"] - return benchmark - - -def pseudoCQ(difficulty, doubleFlaut): - return difficulty/float(doubleFlaut) - - -def getArgs(args, benchmark, views, views_indices, randomState, directory, resultsMonoview, classificationIndices): - return diversity_utils.getArgs(args, benchmark, views, - views_indices, randomState, directory, - resultsMonoview, classificationIndices, - [doubleFault, difficulty], "pseudo_cq_fusion") - - -def genParamsSets(classificationKWARGS, randomState, nIter=1): - return diversity_utils.genParamsSets(classificationKWARGS, randomState, nIter=nIter) - - - -class PseudoCQFusionClass(diversity_utils.DiversityFusionClass): - - def __init__(self, randomState, NB_CORES=1, **kwargs): - diversity_utils.DiversityFusionClass.__init__(self, randomState, NB_CORES=1, **kwargs) - - def getSpecificAnalysis(self, classificationKWARGS): - - stringAnalysis = "Classifiers used for each view : " + ', '.join(self.classifiers_names) +\ - ', with a pseudo CQ of ' + str(self.div_measure) - return stringAnalysis \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/scm_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/scm_late_fusion.py deleted file mode 100644 index a8ec6bb2063760101b5be106141f9245843527fc..0000000000000000000000000000000000000000 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/scm_late_fusion.py +++ /dev/null @@ -1,125 +0,0 @@ -import numpy as np -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.externals.six import iteritems -import itertools -from pyscm.scm import SetCoveringMachineClassifier as scm - - -from ..multiview_classifiers.additions.late_fusion_utils import \ - LateFusionClassifier -from ..multiview.multiview_utils import get_examples_views_indices -from ..monoview.monoview_utils import CustomRandint, CustomUniform - -classifier_class_name = "SCMLateFusionClassifier" - - -class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin): - """docstring for SCM - A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like - CV, gridsearch, and so on ...""" - - def __init__(self, model_type='conjunction', p=0.1, max_rules=10, random_state=42): - super(DecisionStumpSCMNew, self).__init__() - self.model_type = model_type - self.p = p - self.max_rules = max_rules - self.random_state = random_state - - def fit(self, X, y): - self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state) - self.clf.fit(X=X, y=y) - - def predict(self, X): - return self.clf.predict(X) - - def set_params(self, **params): - for key, value in iteritems(params): - if key == 'p': - self.p = value - if key == 'model_type': - self.model_type = value - if key == 'max_rules': - self.max_rules = value - - def get_stats(self): - return {"Binary_attributes": self.clf.model_.rules} - - -class SCMLateFusionClassifier(LateFusionClassifier): - def __init__(self, random_state=None, classifier_names=None, - classifier_configs=None, nb_cores=1, - p=1, max_rules=5, order=1, model_type="conjunction", weights=None): - self.need_probas=False - super(SCMLateFusionClassifier, self).__init__(random_state=random_state, - classifier_names=classifier_names, - classifier_configs=classifier_configs, - nb_cores=nb_cores - ) - self.scm_classifier = None - self.p = p - self.max_rules = max_rules - self.order = order - self.model_type = model_type - self.param_names+=["model_type", "max_rules", "p", "order"] - self.distribs+=[["conjunction", "disjunction"], - CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [1,2,3]] - - def fit(self, X, y, train_indices=None, view_indices=None): - super(SCMLateFusionClassifier, self).fit(X, y, - train_indices=train_indices, - view_indices=view_indices) - self.scm_fusion_fit(X, y, train_indices=train_indices, view_indices=view_indices) - return self - - def predict(self, X, example_indices=None, view_indices=None): - example_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) - monoview_decisions = np.zeros((len(example_indices), X.nb_view), - dtype=int) - for index, view_index in enumerate(view_indices): - monoview_decision = self.monoview_estimators[index].predict( - X.get_v(view_index, example_indices)) - monoview_decisions[:, index] = monoview_decision - features = self.generate_interactions(monoview_decisions) - predicted_labels = self.scm_classifier.predict(features) - return predicted_labels - - def scm_fusion_fit(self, X, y, train_indices=None, view_indices=None): - train_indices, view_indices = get_examples_views_indices(X, train_indices, view_indices) - - self.scm_classifier = DecisionStumpSCMNew(p=self.p, max_rules=self.max_rules, model_type=self.model_type, - random_state=self.random_state) - monoview_decisions = np.zeros((len(train_indices), X.nb_view), dtype=int) - for index, view_index in enumerate(view_indices): - monoview_decisions[:, index] = self.monoview_estimators[index].predict( - X.get_v(view_index, train_indices)) - features = self.generate_interactions(monoview_decisions) - features = np.array([np.array([feat for feat in feature]) - for feature in features]) - self.scm_classifier.fit(features, y[train_indices].astype(int)) - - def generate_interactions(self, monoview_decisions): - if self.order is None: - self.order = monoview_decisions.shape[1] - if self.order == 1: - return monoview_decisions - else: - genrated_intercations = [monoview_decisions[:, i] - for i in range(monoview_decisions.shape[1])] - for order_index in range(self.order - 1): - combins = itertools.combinations(range(monoview_decisions.shape[1]), - order_index + 2) - for combin in combins: - generated_decision = monoview_decisions[:, combin[0]] - for index in range(len(combin) - 1): - if self.model_type == "disjunction": - generated_decision = np.logical_and(generated_decision, - monoview_decisions[:, combin[index + 1]]) - else: - generated_decision = np.logical_or(generated_decision, - monoview_decisions[:, combin[index + 1]]) - genrated_intercations.append(generated_decision) - return np.transpose(np.array(genrated_intercations)) -