diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index 7402dfc63ee74466c3b01d854b5a17fb1084fa24..5fdb396fd98608cc0464e661b3f188cc441b02f9 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -87,14 +87,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.epsilons = [] self.example_weights_ = [self.example_weights] self.train_metrics = [] - self.gammas = [] self.bounds = [] self.previous_votes = [] self.previous_margins = [np.multiply(y,y)] self.n_total_hypotheses_ = n self.n_total_examples = m - self.n_max_iterations = n + self.n_max_iterations = 100 self.break_cause = " the maximum number of iterations was attained." for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)): @@ -105,54 +104,32 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, y)) else: first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix) + self.chosen_columns_.append(first_voter_index) self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1)) self.previous_vote = self.new_voter self.weighted_sum = self.new_voter - # We update the weights of the examples according to the error of the fisrt voter - epsilon = self._compute_epsilon(y) - if epsilon == 0. or math.log((1 - epsilon) / epsilon) == math.inf: - self.break_cause = " epsilon was too small" - self.weights_ = [1.0] - self.train_accuracies = [1.0] - break - self.epsilons.append(epsilon) - if self.divided_ponderation: - self.q = (1 / (self.n_max_iterations - k)) * math.log((1 - epsilon) / epsilon) - else: - # self.q = math.log((1 - epsilon) / epsilon) - self.q = math.log((1 + epsilon) / (1-epsilon)) - self.weights_.append(self.q) - - # Update the boosting variables - self._update_example_weights(y) - self.example_weights_.append(self.example_weights) - self.previous_margins.append(np.multiply(y, self.previous_vote)) - self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote))) - self.gammas.append(accuracy_score(y, np.sign(self.previous_vote))-0.5) - self.bounds.append(math.exp(-2*self.gammas[-1]**2)) - continue - - # Print dynamicly the step and the error of the current classifier - print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r") - - # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum - # C-bound with the previous vote or the one with the best weighted margin - if self.c_bound_choice: - sol, new_voter_index = self._find_new_voter(y_kernel_matrix, y) else: - new_voter_index, sol = self._find_best_weighted_margin(y_kernel_matrix) + # Print dynamically the step and the error of the current classifier + print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r") - # If the new voter selector could not find one, break the loop - if type(sol) == str: - self.break_cause = new_voter_index # " no more hypothesis were able to improve the boosted vote." - break + # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum + # C-bound with the previous vote or the one with the best weighted margin + if self.c_bound_choice: + sol, new_voter_index = self._find_new_voter(y_kernel_matrix, y) + else: + new_voter_index, sol = self._find_best_weighted_margin(y_kernel_matrix) - # Append the weak hypothesis. - self.chosen_columns_.append(new_voter_index) - self.new_voter = self.classification_matrix[:, new_voter_index].reshape((m, 1)) + # If the new voter selector could not find one, break the loop + if type(sol) == str: + self.break_cause = new_voter_index # + break + + # Append the weak hypothesis. + self.chosen_columns_.append(new_voter_index) + self.new_voter = self.classification_matrix[:, new_voter_index].reshape((m, 1)) # Generate the new weight for the new voter epsilon = self._compute_epsilon(y) @@ -165,20 +142,25 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.q = (1/(self.n_max_iterations-k))*math.log((1 - epsilon) / epsilon) else: self.q = math.log((1 - epsilon) / epsilon) - # self.q = math.log((1 + epsilon) / (1 - epsilon)) self.weights_.append(self.q) # Update the distribution on the examples. self._update_example_weights(y) self.example_weights_.append(self.example_weights) - # Update the "previous vote" to prepare for the next iteration - self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_], - np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1)) - self.previous_votes.append(self.previous_vote) + if k != 0: + # Update the "previous vote" to prepare for the next iteration + self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_], + np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1)) + self.previous_votes.append(self.previous_vote) self.previous_margins.append(np.multiply(y, self.previous_vote)) self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote))) - self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons))))) + # self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons))))) + r = self._compute_r(y) + if k!=0: + self.bounds.append(self.bounds[-1]*math.sqrt(1-r**2)) + else: + self.bounds.append(math.sqrt(1 - r ** 2)) self.nb_opposed_voters = self.check_opposed_voters() self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_] @@ -214,6 +196,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0) return epsilon + def _compute_r(self, y): + ones_matrix = np.ones(y.shape) + ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = -1 # can np.divide if needed + r = np.average(ones_matrix, weights=self.example_weights, axis=0) + return r + def _update_example_weights(self, y): """Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter.""" if self.old_fashioned: @@ -314,7 +302,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1))) self.B2 = np.sum(weighted_next_column ** 2) - self.B1 = np.sum(2 * weighted_next_column * weighted_next_column) + self.B1 = np.sum(2 * weighted_next_column * weighted_previous_sum) self.B0 = np.sum(weighted_previous_sum ** 2) M2 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag)) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py index 98e27afd3294050e2bea48ba5f7a76f43b1216bb..34dc7bb0ff662f5badadbd7367d6f6c939a4818f 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py @@ -9,15 +9,15 @@ class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): super(QarBoostNC, self).__init__( random_state=random_state, - self_complemented=False, + self_complemented=True, twice_the_same=False, old_fashioned=False, previous_vote_weighted=False, - c_bound_choice=True, + c_bound_choice=False, random_start=True, - two_wieghts_problem=False, + two_wieghts_problem=True, divided_ponderation=False, - n_stumps_per_attribute=1 + n_stumps_per_attribute=100 ) self.param_names = [] self.distribs = []