diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py index 3850082516eec4f44d0064c498411b34093db2b8..d885d596051a0116505011cb59878ec006b08c8d 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py @@ -28,5 +28,5 @@ def getConfig(**kwargs): sample_weight = kwargs["0"] except: sample_weight = None - configString = "Jaccard similarity score using " + str(sample_weight) + " as sample_weights (higher is better)" + configString = "Jaccard_similarity score using " + str(sample_weight) + " as sample_weights (higher is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py index 4f2cc4dbf0d4f33b27e7072bed9053d22be533d0..c6bbfe2d43060c32d7abd34e73f11b89f606b922 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py @@ -49,6 +49,6 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "micro" - configString = "ROC AUC score using " + str( + configString = "ROC_AUC score using " + str( sample_weight) + " as sample_weights, " + average + " as average (higher is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py index cf632fed02e9ed794a9ba83988907a508197978b..c946499b5cb1e6dc7c2c5037cc6fea0e7384794f 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py @@ -28,5 +28,5 @@ def getConfig(**kwargs): sample_weight = kwargs["0"] except: sample_weight = None - configString = "Zero one loss using " + str(sample_weight) + " as sample_weights (lower is better)" + configString = "Zero_one loss using " + str(sample_weight) + " as sample_weights (lower is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 5cde3c0e5dc60fbf61cc3e2e091da51e25d633eb..f5191941440f620e4f3665bf996f0d6378e1b4c0 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -145,6 +145,18 @@ class ClassifiersGenerator(BaseEstimator, TransformerMixin): check_is_fitted(self, 'estimators_') return np.array([voter.predict(X) for voter in self.estimators_]).T +# class TreesClassifiersGenerator(ClassifiersGenerator): +# """A generator to widen the voter's pool of our boosting algorithms. +# """ +# +# def __init__(self, n_stumps_per_attribute=10, self_complemented=False, check_diff=True, max_depth=3): +# super(TreesClassifiersGenerator, self).__init__(self_complemented) +# self.n_stumps_per_attribute = n_stumps_per_attribute +# self.check_diff = check_diff +# self.max_depth = max_depth +# +# def fit(self, X, y=None): + class StumpsClassifiersGenerator(ClassifiersGenerator): """Decision Stump Voters transformer. @@ -656,15 +668,29 @@ class ConvexProgram(object): signs[array == 0] = -1 return signs -def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies"): - f, ax = plt.subplots(nrows=1, ncols=1) - ax.set_title(name+" during train for "+classifier_name) - x = np.arange(len(train_accuracies)) - scat = ax.scatter(x, np.array(train_accuracies), ) - ax.legend((scat,), (name,)) - plt.tight_layout() - f.savefig(file_name) - plt.close() + +def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None): + if type(name) is not str: + name = " ".join(name.getConfig().strip().split(" ")[:2]) + if bounds: + f, ax = plt.subplots(nrows=1, ncols=1) + ax.set_title(name+" during train for "+classifier_name) + x = np.arange(len(train_accuracies)) + scat = ax.scatter(x, np.array(train_accuracies), ) + scat2 = ax.scatter(x, np.array(bounds), ) + ax.legend((scat,scat2), (name,"Bounds")) + plt.tight_layout() + f.savefig(file_name) + plt.close() + else: + f, ax = plt.subplots(nrows=1, ncols=1) + ax.set_title(name+" during train for "+classifier_name) + x = np.arange(len(train_accuracies)) + scat = ax.scatter(x, np.array(train_accuracies), ) + ax.legend((scat,), (name,)) + plt.tight_layout() + f.savefig(file_name) + plt.close() class BaseBoost(object): @@ -725,5 +751,5 @@ def getInterpretBase(classifier, directory, classifier_name, weights, separator=',', suppress_small=True) np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',') np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',') - get_accuracy_graph(classifier.train_accuracies, classifier_name, directory + 'accuracies.png') + get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds) return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py index 9f9f5ba7ad5881246c92a023976ffa5d9ff0f0e2..c7ebf500968c67104e7f1e4594a26352c6286e46 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py @@ -7,8 +7,10 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.metrics import accuracy_score import numpy as np import time +import math from .BoostUtils import StumpsClassifiersGenerator, ConvexProgram, sign, BaseBoost +from ... import Metrics class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): @@ -20,8 +22,10 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.dual_constraint_rhs = dual_constraint_rhs self.mu = mu self.train_time = 0 + self.plotted_metric = Metrics.accuracy_score def fit(self, X, y): + start = time.time() if scipy.sparse.issparse(X): X = np.array(X.todense()) @@ -45,7 +49,9 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): # Initialization alpha = self._initialize_alphas(m) self.initialize() - self.train_accuracies = [] + self.train_metrics = [] + self.gammas = [] + self.bounds = [] self.previous_votes = [] # w = [0.5,0.5] w= None @@ -74,14 +80,17 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): margins = self.get_margins(w) signs_array = np.array([int(x) for x in sign(margins)]) - self.train_accuracies.append(accuracy_score(y, signs_array)) + self.train_metrics.append(self.plotted_metric.score(y, signs_array)) + self.gammas.append(accuracy_score(y, signs_array)) + self.bounds.append(math.exp(-2 * np.sum(np.square(np.array(self.gammas))))) self.nb_opposed_voters = self.check_opposed_voters() self.compute_weights_(w) # self.weights_ = w self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_] + end = time.time() - + self.train_time = end-start y[y == -1] = 0 return self diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index de24ffb4d393f5cdd6b9fe2b9e346e3d03c611a2..47567b95cdb2bd9c94e62e046790c586332efa4b 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -11,13 +11,14 @@ import time import matplotlib.pyplot as plt from .BoostUtils import StumpsClassifiersGenerator, sign, BaseBoost, getInterpretBase, get_accuracy_graph +from ... import Metrics class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def __init__(self, n_max_iterations=350, estimators_generator=None, dual_constraint_rhs=0, random_state=42, self_complemented=True, twice_the_same=False, old_fashioned=False, previous_vote_weighted=True, c_bound_choice = True, random_start = True, - two_wieghts_problem=False, divided_ponderation=True): + two_wieghts_problem=False, divided_ponderation=True, n_stumps_per_attribute=None): super(ColumnGenerationClassifierQar, self).__init__() self.n_max_iterations = n_max_iterations self.estimators_generator = estimators_generator @@ -35,6 +36,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.random_start = random_start self.two_wieghts_problem = two_wieghts_problem self.divided_ponderation = divided_ponderation + self.plotted_metric = Metrics.zero_one_loss + if n_stumps_per_attribute: + self.n_stumps = n_stumps_per_attribute + + self.printed_args_name_list = ["n_max_iterations", "self_complemented", "twice_the_same", "old_fashioned", + "previous_vote_weighted", "c_bound_choice", "random_start", + "two_wieghts_problem", "divided_ponderation", "n_stumps"] def set_params(self, **params): self.self_complemented = params["self_complemented"] @@ -78,7 +86,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.c_bounds = [] self.epsilons = [] self.example_weights_ = [self.example_weights] - self.train_accuracies = [] + self.train_metrics = [] + self.gammas = [] + self.bounds = [] self.previous_votes = [] self.previous_margins = [np.multiply(y,y)] @@ -94,7 +104,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): if self.random_start: first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, y)) else: - first_voter_index, plif = self._find_best_weighted_margin(y_kernel_matrix) + first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix) self.chosen_columns_.append(first_voter_index) self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1)) @@ -120,11 +130,14 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self._update_example_weights(y) self.example_weights_.append(self.example_weights) self.previous_margins.append(np.multiply(y, self.previous_vote)) - self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote))) + self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote))) + self.gammas.append(accuracy_score(y, np.sign(self.previous_vote))-0.5) + self.bounds.append(math.exp(-2*self.gammas[-1]**2)) continue - if epsilon > 0.5: - import pdb;pdb.set_trace() + + # Print dynamicly the step and the error of the current classifier print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r") + # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum # C-bound with the previous vote or the one with the best weighted margin @@ -152,8 +165,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): if self.divided_ponderation: self.q = (1/(self.n_max_iterations-k))*math.log((1 - epsilon) / epsilon) else: - # self.q = math.log((1 - epsilon) / epsilon) - self.q = math.log((1 + epsilon) / (1 - epsilon)) + self.q = math.log((1 - epsilon) / epsilon) + # self.q = math.log((1 + epsilon) / (1 - epsilon)) self.weights_.append(self.q) # Update the distribution on the examples. @@ -165,7 +178,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1)) self.previous_votes.append(self.previous_vote) self.previous_margins.append(np.multiply(y, self.previous_vote)) - self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote))) + self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote))) + self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons))))) self.nb_opposed_voters = self.check_opposed_voters() self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_] @@ -469,7 +483,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): import shutil shutil.rmtree(path+"/gif_images") get_accuracy_graph(self.epsilons, self.__class__.__name__, directory + 'epsilons.png', "Errors") - return getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause) + interpretString = getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause) + + args_dict = dict((arg_name, str(self.__dict__[arg_name])) for arg_name in self.printed_args_name_list) + interpretString += "\n \n With arguments : \n"+u'\u2022 '+ ("\n"+u'\u2022 ').join(['%s: \t%s' % (key, value) + for (key, value) in args_dict.items()]) + + return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 43979158c6416a81b71423d2b5639a78be4de36d..19352ef3d0d4d55d8ec122543fd5dcc398643a58 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -1,9 +1,12 @@ from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier +import numpy as np from sklearn.metrics import accuracy_score from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier from ..Monoview.Additions.BoostUtils import get_accuracy_graph +from .. import Metrics +from ..Monoview.Additions.BoostUtils import get_accuracy_graph # Author-Info __author__ = "Baptiste Bauvin" @@ -24,9 +27,14 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): self.classed_params = ["base_estimator"] self.distribs = [CustomRandint(low=1, high=500), [DecisionTreeClassifier(max_depth=1)]] self.weird_strings = {"base_estimator": "class_name"} + self.plotted_metric = Metrics.zero_one_loss + self.plotted_metric_name = "zero_one_loss" def fit(self, X, y, sample_weight=None): super(Adaboost, self).fit(X, y, sample_weight=sample_weight) + self.base_predictions = np.array([estim.predict(X) for estim in self.estimators_]) + self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)]) + self.bounds = np.array([np.prod(np.sqrt(1-4*np.square(0.5-self.estimator_errors_[:i+1]))) for i in range(self.estimator_errors_.shape[0])]) def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -37,6 +45,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): interpretString += self.getFeatureImportance(directory) interpretString += "\n\n Estimator error | Estimator weight\n" interpretString += "\n".join([str(error) +" | "+ str(weight/sum(self.estimator_weights_)) for error, weight in zip(self.estimator_errors_, self.estimator_weights_)]) + get_accuracy_graph(self.metrics, "Adaboost", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds)) return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py index 4d4c64e314e096917de4857a37f9e76f14dd8ae1..98e27afd3294050e2bea48ba5f7a76f43b1216bb 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py @@ -16,7 +16,8 @@ class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier): c_bound_choice=True, random_start=True, two_wieghts_problem=False, - divided_ponderation=False + divided_ponderation=False, + n_stumps_per_attribute=1 ) self.param_names = [] self.distribs = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py index 256403e3a215f578077bfb39158395bd9fb76adf..dbafa810e61f52c47c0ae9e2c2e592b770e00289 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py @@ -8,13 +8,15 @@ class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): super(QarBoostNC3, self).__init__( random_state=random_state, - self_complemented=True, + self_complemented=False, twice_the_same=False, old_fashioned=False, previous_vote_weighted=False, c_bound_choice=True, random_start=True, - two_wieghts_problem=False + two_wieghts_problem=False, + divided_ponderation=False, + n_stumps_per_attribute=10 ) self.param_names = [] self.distribs = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py index ccbc52bce4eeadcf6c2c963516cbee460eb68dcd..e72688f6c8f7ec97363792631d283e14a2cd42e9 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py @@ -9,8 +9,14 @@ class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): super(QarBoostv2, self).__init__( random_state=random_state, self_complemented=True, - twice_the_same=True, - previous_vote_weighted=True + twice_the_same=False, + old_fashioned=False, + previous_vote_weighted=False, + c_bound_choice=True, + random_start=True, + two_wieghts_problem=False, + divided_ponderation=False, + n_stumps_per_attribute=10 ) self.param_names = [] self.distribs = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py index dd650fb7b7ece95a78a58058e67d0ead33e0ace7..7b63ea7597fb8117f901281aaba0415468b18c7e 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py @@ -1,43 +1,22 @@ -import numpy as np - from ..Monoview.MonoviewUtils import BaseMonoviewClassifier from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar -# class ColumnGenerationClassifierQar3(ColumnGenerationClassifierQar): -# def __init__(self, n_max_iterations=None, estimators_generator=None, -# dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, -# random_state=42, self_complemented=True, twice_the_same=False): -# super(ColumnGenerationClassifierQar3, self).__init__(n_max_iterations=n_max_iterations, -# estimators_generator=estimators_generator, -# dual_constraint_rhs=dual_constraint_rhs, -# save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each, -# random_state=random_state, -# self_complemented=self_complemented, -# twice_the_same=twice_the_same) -# -# def _compute_epsilon(self,): -# """Updating the \epsilon varaible""" -# ones_matrix = np.zeros(self.new_voter.shape) -# ones_matrix[self.new_voter < 0] = 1 -# epsilon = (1.0/self.n_total_examples)*np.sum(self.example_weights*ones_matrix, axis=0) -# return epsilon -# -# def _update_example_weights(self, y): -# new_weights = self.example_weights*np.exp(-self.q*y.reshape((self.n_total_examples, 1))*self.new_voter) -# self.example_weights = new_weights/np.sum(new_weights) - - - class QarBoostv3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): super(QarBoostv3, self).__init__( random_state=random_state, self_complemented=True, - twice_the_same=True, - previous_vote_weighted=False + twice_the_same=False, + old_fashioned=False, + previous_vote_weighted=False, + c_bound_choice=True, + random_start=True, + two_wieghts_problem=False, + divided_ponderation=False, + n_stumps_per_attribute=1 ) self.param_names = [] self.distribs = [] @@ -65,382 +44,3 @@ def paramsToSet(nIter, randomState): for _ in range(nIter): paramsSet.append({}) return paramsSet - - - - # def __init__(self, epsilon=1e-06, n_max_iterations=None, estimators_generator=None, dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, random_state=42): - # super(ColumnGenerationClassifierQar3, self).__init__() - # self.epsilon = epsilon - # self.n_max_iterations = n_max_iterations - # self.estimators_generator = estimators_generator - # self.dual_constraint_rhs = dual_constraint_rhs - # self.save_iteration_as_hyperparameter_each = save_iteration_as_hyperparameter_each - # self.random_state = random_state - # - # def fit(self, X, y): - # if scipy.sparse.issparse(X): - # logging.info('Converting to dense matrix.') - # X = np.array(X.todense()) - # - # if self.estimators_generator is None: - # self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps, self_complemented=True) - # - # y[y == 0] = -1 - # - # self.estimators_generator.fit(X, y) - # self.classification_matrix = self._binary_classification_matrix(X) - # - # - # self.weights_ = [] - # self.infos_per_iteration_ = defaultdict(list) - # - # m, n = self.classification_matrix.shape - # y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), self.classification_matrix) - # - # # Initialization - # - # self.collected_weight_vectors_ = {} - # self.collected_dual_constraint_violations_ = {} - # - # self.example_weights = self._initialize_alphas(m).reshape((m,1)) - # - # self.chosen_columns_ = [] - # self.fobidden_columns = [] - # self.edge_scores = [] - # self.epsilons = [] - # self.example_weights_ = [self.example_weights] - # self.train_accuracies = [] - # self.previous_votes = [] - # - # self.n_total_hypotheses_ = n - # self.n_total_examples = m - # - # for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)): - # # To choose the first voter, we select the one that has the best margin. - # if k == 0: - # first_voter_index = self._find_best_margin(y_kernel_matrix) - # self.chosen_columns_.append(first_voter_index) - # - # self.previous_vote = self.classification_matrix[:, first_voter_index].reshape((m,1)) - # self.previous_votes.append(self.previous_vote) - # self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1)) - # - # epsilon = self._compute_epsilon() - # self.epsilons.append(epsilon) - # self.q = math.log((1-epsilon)/epsilon) - # self.weights_.append(self.q) - # - # self._update_example_weights(y) - # self.example_weights_.append(self.example_weights) - # self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote))) - # continue - # - # # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum - # # C-bound with the previous vote - # sol, new_voter_index = self._find_new_voter(y_kernel_matrix, y) - # if type(sol) == str: - # self.break_cause = " no more hypothesis were able to improve the boosted vote." - # break - # - # # Append the weak hypothesis. - # self.chosen_columns_.append(new_voter_index) - # # self.weighted_sum = np.matmul(np.concatenate((self.previous_vote, self.classification_matrix[:, new_voter_index].reshape((m,1))), axis=1), - # # sol).reshape((m,1)) - # self.new_voter = self.classification_matrix[:, new_voter_index].reshape((m,1)) - # - # # Generate the new weight for the new voter - # epsilon = self._compute_epsilon() - # self.epsilons.append(epsilon) - # if epsilon == 0. or math.log((1 - epsilon) / epsilon) == math.inf: - # self.chosen_columns_.pop() - # self.break_cause = " epsilon was too small." - # break - # self.q = math.log((1 - epsilon) / epsilon) - # self.weights_.append(self.q) - # - # # Update the distribution on the examples. - # self._update_example_weights(y) - # self.example_weights_.append(self.example_weights) - # - # # Update the "previous vote" to prepare for the next iteration - # self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_], - # np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1)) - # self.previous_votes.append(self.previous_vote) - # self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote))) - # - # self.nb_opposed_voters = self.check_opposed_voters() - # self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_] - # self.weights_ = np.array(self.weights_) - # - # self.weights_/=np.sum(self.weights_) - # y[y == -1] = 0 - # - # return self - # - # def predict(self, X): - # start = time.time() - # check_is_fitted(self, 'weights_') - # if scipy.sparse.issparse(X): - # logging.warning('Converting sparse matrix to dense matrix.') - # X = np.array(X.todense()) - # classification_matrix = self._binary_classification_matrix(X) - # margins = np.squeeze(np.asarray(np.matmul(classification_matrix, self.weights_))) - # signs_array = np.array([int(x) for x in sign(margins)]) - # signs_array[signs_array == -1 ] = 0 - # end = time.time() - # self.predict_time = end-start - # return signs_array - - # - # def _find_best_margin(self, y_kernel_matrix): - # """Used only on the first iteration to select the voter with the largest margin""" - # pseudo_h_values = ma.array(np.sum(y_kernel_matrix, axis=0), fill_value=-np.inf) - # pseudo_h_values[self.fobidden_columns] = ma.masked - # worst_h_index = ma.argmax(pseudo_h_values) - # return worst_h_index - # - # def _find_new_voter(self, y_kernel_matrix, y): - # """Here, we solve the two_voters_mincq_problem for each potential new voter, - # and select the one that has the smallest minimum""" - # c_borns = [] - # possible_sols = [] - # indices = [] - # for hypothese_index, hypothese in enumerate(y_kernel_matrix.transpose()): - # causes = [] - # if hypothese_index not in self.chosen_columns_: - # w = self._solve_two_weights_min_c(hypothese, y) - # if w[0] != "break": - # c_borns.append(self._cbound(w[0])) - # possible_sols.append(w) - # indices.append(hypothese_index) - # else: - # causes.append(w[1]) - # if c_borns: - # min_c_born_index = ma.argmin(c_borns) - # selected_sol = possible_sols[min_c_born_index] - # selected_voter_index = indices[min_c_born_index] - # return selected_sol, selected_voter_index - # else: - # return "break", "smthng" - - # - # def _solve_two_weights_min_c(self, next_column, y): - # """Here we solve the min C-bound problem for two voters and return the best 2-weights array""" - # m = next_column.shape[0] - # zero_diag = np.ones((m, m)) - np.identity(m) - # - # weighted_previous_sum = np.multiply(np.multiply(y.reshape((m, 1)), self.previous_vote.reshape((m, 1))), self.example_weights.reshape((m,1))) - # weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1))) - # - # mat_prev = np.repeat(weighted_previous_sum, m, axis=1) * zero_diag - # mat_next = np.repeat(weighted_next_column, m, axis=1) * zero_diag - # - # self.B2 = np.sum((weighted_previous_sum - weighted_next_column) ** 2) - # self.B1 = np.sum(2 * weighted_next_column * (weighted_previous_sum - 2 * weighted_next_column * weighted_next_column)) - # self.B0 = np.sum(weighted_next_column * weighted_next_column) - # - # self.A2 = self.B2 + np.sum((mat_prev - mat_next) * np.transpose(mat_prev - mat_next)) - # self.A1 = self.B1 + np.sum(mat_prev * np.transpose(mat_next) - mat_next * np.transpose(mat_prev) - 2 * mat_next * np.transpose(mat_next)) - # self.A0 = self.B0 + np.sum(mat_next * np.transpose(mat_next)) - # C2 = (self.A1 * self.B2 - self.A2 * self.B1) - # C1 = 2 * (self.A0 * self.B2 - self.A2 * self.B0) - # C0 = self.A0 * self.B1 - self.A1 * self.B0 - # - # if C2 == 0: - # if C1 == 0: - # return np.array([0.5, 0.5]) - # elif abs(C1) > 0: - # return np.array([0., 1.]) - # else: - # return ['break', "the derivate was constant."] - # elif C2 == 0: - # return ["break", "the derivate was affine."] - # try: - # sols = np.roots(np.array([C2, C1, C0])) - # except: - # return ["break", "nan"] - # - # is_acceptable, sol = self._analyze_solutions(sols) - # if is_acceptable: - # return np.array([sol, 1-sol]) - # else: - # return ["break", sol] - # - # def _analyze_solutions(self, sols): - # """"We just check that the solution found by np.roots is acceptable under our constraints - # (real, a minimum and between 0 and 1)""" - # for sol_index, sol in enumerate(sols): - # if isinstance(sol, complex): - # sols[sol_index] = -1 - # if sols.shape[0] == 1: - # if self._cbound(sols[0]) < self._cbound(sols[0] + 1): - # best_sol = sols[0] - # else: - # return False, " the only solution was a maximum." - # elif sols.shape[0] == 2: - # best_sol = self._best_sol(sols) - # else: - # return False, " no solution were found." - # - # if 0 < best_sol < 1: - # return True, self._best_sol(sols) - # - # elif best_sol <= 0: - # return False, " the minimum was below 0." - # else: - # return False, " the minimum was over 1." - # - # def _cbound(self, sol): - # """Computing the objective function""" - # return 1 - (self.A2*sol**2 + self.A1*sol + self.A0)/(self.B2*sol**2 + self.B1*sol + self.B0) - # - # def _best_sol(self, sols): - # values = np.array([self._cbound(sol) for sol in sols]) - # return sols[np.argmin(values)] - - -# class QarBoostClassifier3(ColumnGenerationClassifierQar3): -# def __init__(self, mu=0.001, epsilon=1e-08, n_max_iterations=None, estimators_generator=None, save_iteration_as_hyperparameter_each=None, random_state=42): -# super(QarBoostClassifier3, self).__init__(epsilon, n_max_iterations, estimators_generator, dual_constraint_rhs=0, -# save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each, random_state=random_state) -# self.mu = mu -# self.train_time = 0 -# -# def _initialize_alphas(self, n_examples): -# return 1.0 / n_examples * np.ones((n_examples,)) -# -# -# class QarBoostv3(QarBoostClassifier3): -# -# def __init__(self, random_state, **kwargs): -# super(QarBoostv3, self).__init__( -# mu=kwargs['mu'], -# epsilon=kwargs['epsilon'], -# n_max_iterations= kwargs['n_max_iterations'], -# random_state = random_state) -# -# def canProbas(self): -# """Used to know if the classifier can return label probabilities""" -# return False -# -# def paramsToSrt(self, nIter=1): -# """Used for weighted linear early fusion to generate random search sets""" -# paramsSet = [] -# for _ in range(nIter): -# paramsSet.append({"mu": 0.001, -# "epsilon": 1e-08, -# "n_max_iterations": None}) -# return paramsSet -# -# def getKWARGS(self, args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {} -# kwargsDict['mu'] = 0.001 -# kwargsDict['epsilon'] = 1e-08 -# kwargsDict['n_max_iterations'] = None -# return kwargsDict -# -# def genPipeline(self): -# return Pipeline([('classifier', QarBoostClassifier3())]) -# -# def genParamsDict(self, randomState): -# return {"classifier__mu": [0.001], -# "classifier__epsilon": [1e-08], -# "classifier__n_max_iterations": [None]} -# -# def genBestParams(self, detector): -# return {"mu": detector.best_params_["classifier__mu"], -# "epsilon": detector.best_params_["classifier__epsilon"], -# "n_max_iterations": detector.best_params_["classifier__n_max_iterations"]} -# -# def genParamsFromDetector(self, detector): -# nIter = len(detector.cv_results_['param_classifier__mu']) -# return [("mu", np.array([0.001 for _ in range(nIter)])), -# ("epsilon", np.array(detector.cv_results_['param_classifier__epsilon'])), -# ("n_max_iterations", np.array(detector.cv_results_['param_classifier__n_max_iterations']))] -# -# def getConfig(self, config): -# if type(config) is not dict: # Used in late fusion when config is a classifier -# return "\n\t\t- QarBoost with mu : " + str(config.mu) + ", epsilon : " + str( -# config.epsilon + ", n_max_iterations : " + str(config.n_max_iterations)) -# else: -# return "\n\t\t- QarBoost with mu : " + str(config["mu"]) + ", epsilon : " + str( -# config["epsilon"] + ", n_max_iterations : " + str(config["n_max_iterations"])) -# -# -# def getInterpret(self, classifier, directory): -# interpretString = "" -# return interpretString -# -# -# def canProbas(): -# return False -# -# -# def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): -# start =time.time() -# """Used to fit the monoview classifier with the args stored in kwargs""" -# classifier = QarBoostClassifier3(mu=kwargs['mu'], -# epsilon=kwargs['epsilon'], -# n_max_iterations=kwargs["n_max_iterations"], -# random_state=randomState) -# classifier.fit(DATASET, CLASS_LABELS) -# end = time.time() -# classifier.train_time = end-start -# return classifier -# -# -# def paramsToSet(nIter, randomState): -# """Used for weighted linear early fusion to generate random search sets""" -# paramsSet = [] -# for _ in range(nIter): -# paramsSet.append({"mu": randomState.uniform(1e-02, 10**(-0.5)), -# "epsilon": 10**-randomState.randint(1, 15), -# "n_max_iterations": None}) -# return paramsSet -# -# -# def getKWARGS(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {} -# kwargsDict['mu'] = args.QarB3_mu -# kwargsDict['epsilon'] = args.QarB3_epsilon -# kwargsDict['n_max_iterations'] = None -# return kwargsDict -# -# -# def genPipeline(): -# return Pipeline([('classifier', QarBoostClassifier3())]) -# -# -# def genParamsDict(randomState): -# return {"classifier__mu": CustomUniform(loc=.5, state=2, multiplier='e-'), -# "classifier__epsilon": CustomRandint(low=1, high=15, multiplier='e-'), -# "classifier__n_max_iterations": [None], -# "classifier__random_state":[randomState]} -# -# -# def genBestParams(detector): -# return {"mu": detector.best_params_["classifier__mu"], -# "epsilon": detector.best_params_["classifier__epsilon"], -# "n_max_iterations": detector.best_params_["classifier__n_max_iterations"]} -# -# -# def genParamsFromDetector(detector): -# nIter = len(detector.cv_results_['param_classifier__mu']) -# return [("mu", np.array(detector.cv_results_['param_classifier__mu'])), -# ("epsilon", np.array(detector.cv_results_['param_classifier__epsilon'])), -# ("n_max_iterations", np.array(detector.cv_results_['param_classifier__n_max_iterations']))] -# -# -# def getConfig(config): -# if type(config) is not dict: # Used in late fusion when config is a classifier -# return "\n\t\t- QarBoost with mu : " + str(config.mu) + ", epsilon : " + str( -# config.epsilon) + ", n_max_iterations : " + str(config.n_max_iterations) -# else: -# return "\n\t\t- QarBoost with mu : " + str(config["mu"]) + ", epsilon : " + str( -# config["epsilon"]) + ", n_max_iterations : " + str(config["n_max_iterations"]) -# -# -# def getInterpret(classifier, directory): -# return getInterpretBase(classifier, directory, "QarBoostv3", classifier.weights_, classifier.break_cause)