Added format_X_y

9d0667f2 · Baptiste Bauvin · 258f6c74 · 9d0667f2
Commit 9d0667f2 authored 6 years ago by Baptiste Bauvin
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
@@ -55,33 +55,16 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):

        m,n,y_kernel_matrix = self.init_hypotheses(formatted_X, formatted_y)

-        self.example_weights = self._initialize_alphas(m).reshape((m,1))
-
-
-        self.previous_margins.append(np.multiply(formatted_y, formatted_y))
-        self.example_weights_.append(self.example_weights)
        self.n_total_hypotheses_ = n
        self.n_total_examples = m
-        self.break_cause = " the maximum number of iterations was attained."
-
-        for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)):

-            # To choose the first voter, we select the one that has the best margin or a random one..
-            if k == 0:
-                if self.random_start:
-                    first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, formatted_y))
-                else:
-                    first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix)
+        self.init_boosting(m, formatted_y, y_kernel_matrix)
+        self.break_cause = " the maximum number of iterations was attained."

-                self.chosen_columns_.append(first_voter_index)
-                self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1))
+        for k in range(min(n-1, self.n_max_iterations-1 if self.n_max_iterations is not None else np.inf)):

-                self.previous_vote = self.new_voter
-                self.weighted_sum = self.new_voter
-
-            else:
            # Print dynamically the step and the error of the current classifier
-                print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r")
+            print("{}/{}, eps :{}".format(k+2, self.n_max_iterations, self.epsilons[-1]), end="\r")

            # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
            # C-bound with the previous vote or the one with the best weighted margin
@@ -99,6 +82,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
            self.chosen_columns_.append(new_voter_index)
            self.new_voter = self.classification_matrix[:, new_voter_index].reshape((m, 1))

+
            # Generate the new weight for the new voter
            epsilon = self._compute_epsilon(formatted_y)
            self.epsilons.append(epsilon)
@@ -120,19 +104,16 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
            self._update_example_weights(formatted_y)
            self.example_weights_.append(self.example_weights)

-            if k != 0:
            # Update the "previous vote" to prepare for the next iteration
            self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_],
-                                               np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1))
+                                           np.array(self.weights_).reshape((k + 2, 1))).reshape((m, 1))
            self.previous_votes.append(self.previous_vote)
+
            self.previous_margins.append(np.multiply(formatted_y, self.previous_vote))
            self.train_metrics.append(self.plotted_metric.score(formatted_y, np.sign(self.previous_vote)))
            # self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))

-            if k!=0:
            self.bounds.append(self.bounds[-1]*math.sqrt(1-r**2))
-            else:
-                self.bounds.append(math.sqrt(1 - r ** 2))

        self.nb_opposed_voters = self.check_opposed_voters()
        self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
@@ -159,6 +140,46 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        self.predict_time = end - start
        return signs_array

+    def init_boosting(self, m, y, y_kernel_matrix):
+        self.example_weights = self._initialize_alphas(m).reshape((m, 1))
+
+        self.previous_margins.append(np.multiply(y, y))
+        self.example_weights_.append(self.example_weights)
+        if self.random_start:
+            first_voter_index = self.random_state.choice(
+                self.get_possible(y_kernel_matrix, y))
+        else:
+            first_voter_index, _ = self._find_best_weighted_margin(
+                y_kernel_matrix)
+
+        self.chosen_columns_.append(first_voter_index)
+        self.new_voter = self.classification_matrix[:,
+                         first_voter_index].reshape((m, 1))
+
+        self.previous_vote = self.new_voter
+
+        epsilon = self._compute_epsilon(y)
+        self.epsilons.append(epsilon)
+
+        r = self._compute_r(y)
+
+        if self.use_r:
+            self.q = 0.5 * math.log((1 + r) / (1 - r))
+        else:
+            self.q = math.log((1 - epsilon) / epsilon)
+        self.weights_.append(self.q)
+
+        # Update the distribution on the examples.
+        self._update_example_weights(y)
+        self.example_weights_.append(self.example_weights)
+
+        self.previous_margins.append(
+            np.multiply(y, self.previous_vote))
+        self.train_metrics.append(
+            self.plotted_metric.score(y, np.sign(self.previous_vote)))
+
+        self.bounds.append(math.sqrt(1 - r ** 2))
+
    def format_X_y(self, X, y):
        if scipy.sparse.issparse(X):
            logging.info('Converting to dense matrix.')