Corrected B1 and modified bound computing for clarity

a87e1266 · Baptiste Bauvin · 1c2c00c8 · a87e1266 · a87e1266
Commit a87e1266 authored 6 years ago by Baptiste Bauvin
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
@@ -87,14 +87,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        self.epsilons = []
        self.example_weights_ = [self.example_weights]
        self.train_metrics = []
-        self.gammas = []
        self.bounds = []
        self.previous_votes = []
        self.previous_margins = [np.multiply(y,y)]

        self.n_total_hypotheses_ = n
        self.n_total_examples = m
-        self.n_max_iterations = n
+        self.n_max_iterations = 100
        self.break_cause = " the maximum number of iterations was attained."

        for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)):
@@ -105,37 +104,15 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                    first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, y))
                else:
                    first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix)
+
                self.chosen_columns_.append(first_voter_index)
                self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1))

                self.previous_vote = self.new_voter
                self.weighted_sum = self.new_voter

-                # We update the weights of the examples according to the error of the fisrt voter
-                epsilon = self._compute_epsilon(y)
-                if epsilon == 0. or math.log((1 - epsilon) / epsilon) == math.inf:
-                    self.break_cause = " epsilon was too small"
-                    self.weights_ = [1.0]
-                    self.train_accuracies = [1.0]
-                    break
-                self.epsilons.append(epsilon)
-                if self.divided_ponderation:
-                    self.q = (1 / (self.n_max_iterations - k)) * math.log((1 - epsilon) / epsilon)
            else:
-                    # self.q = math.log((1 - epsilon) / epsilon)
-                    self.q = math.log((1 + epsilon) / (1-epsilon))
-                self.weights_.append(self.q)
-
-                # Update the boosting variables
-                self._update_example_weights(y)
-                self.example_weights_.append(self.example_weights)
-                self.previous_margins.append(np.multiply(y, self.previous_vote))
-                self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote)))
-                self.gammas.append(accuracy_score(y, np.sign(self.previous_vote))-0.5)
-                self.bounds.append(math.exp(-2*self.gammas[-1]**2))
-                continue
-
-            # Print dynamicly the step and the error of the current classifier
+                # Print dynamically the step and the error of the current classifier
                print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r")

                # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
@@ -147,7 +124,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):

                # If the new voter selector could not find one, break the loop
                if type(sol) == str:
-                self.break_cause = new_voter_index  # " no more hypothesis were able to improve the boosted vote."
+                    self.break_cause = new_voter_index  #
                    break

                # Append the weak hypothesis.
@@ -165,20 +142,25 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                self.q = (1/(self.n_max_iterations-k))*math.log((1 - epsilon) / epsilon)
            else:
                self.q = math.log((1 - epsilon) / epsilon)
-                # self.q = math.log((1 + epsilon) / (1 - epsilon))
            self.weights_.append(self.q)

            # Update the distribution on the examples.
            self._update_example_weights(y)
            self.example_weights_.append(self.example_weights)

+            if k != 0:
                # Update the "previous vote" to prepare for the next iteration
                self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_],
                                               np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1))
                self.previous_votes.append(self.previous_vote)
            self.previous_margins.append(np.multiply(y, self.previous_vote))
            self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote)))
-            self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))
+            # self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))
+            r = self._compute_r(y)
+            if k!=0:
+                self.bounds.append(self.bounds[-1]*math.sqrt(1-r**2))
+            else:
+                self.bounds.append(math.sqrt(1 - r ** 2))

        self.nb_opposed_voters = self.check_opposed_voters()
        self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
@@ -214,6 +196,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0)
        return epsilon

+    def _compute_r(self, y):
+        ones_matrix = np.ones(y.shape)
+        ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = -1  # can np.divide if needed
+        r = np.average(ones_matrix, weights=self.example_weights, axis=0)
+        return r
+
    def _update_example_weights(self, y):
        """Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter."""
        if self.old_fashioned:
@@ -314,7 +302,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1)))

        self.B2 = np.sum(weighted_next_column ** 2)
-        self.B1 = np.sum(2 * weighted_next_column * weighted_next_column)
+        self.B1 = np.sum(2 * weighted_next_column * weighted_previous_sum)
        self.B0 = np.sum(weighted_previous_sum ** 2)

        M2 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag))

--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py
@@ -9,15 +9,15 @@ class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
    def __init__(self, random_state=None, **kwargs):
        super(QarBoostNC, self).__init__(
            random_state=random_state,
-            self_complemented=False,
+            self_complemented=True,
            twice_the_same=False,
            old_fashioned=False,
            previous_vote_weighted=False,
-            c_bound_choice=True,
+            c_bound_choice=False,
            random_start=True,
-            two_wieghts_problem=False,
+            two_wieghts_problem=True,
            divided_ponderation=False,
-            n_stumps_per_attribute=1
+            n_stumps_per_attribute=100
            )
        self.param_names = []
        self.distribs = []