From 9d0667f2cd62be3e76bdb3bbb7ba6744c836ee43 Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Sat, 1 Dec 2018 13:47:12 -0500
Subject: [PATCH] Added format_X_y

---
 .../Monoview/Additions/QarBoostUtils.py       | 105 +++++++++++-------
 1 file changed, 63 insertions(+), 42 deletions(-)

diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
index bfe3cded..eeb92c92 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
@@ -55,49 +55,33 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
 
         m,n,y_kernel_matrix = self.init_hypotheses(formatted_X, formatted_y)
 
-        self.example_weights = self._initialize_alphas(m).reshape((m,1))
-
-
-        self.previous_margins.append(np.multiply(formatted_y, formatted_y))
-        self.example_weights_.append(self.example_weights)
         self.n_total_hypotheses_ = n
         self.n_total_examples = m
-        self.break_cause = " the maximum number of iterations was attained."
 
-        for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)):
-
-            # To choose the first voter, we select the one that has the best margin or a random one..
-            if k == 0:
-                if self.random_start:
-                    first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, formatted_y))
-                else:
-                    first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix)
+        self.init_boosting(m, formatted_y, y_kernel_matrix)
+        self.break_cause = " the maximum number of iterations was attained."
 
-                self.chosen_columns_.append(first_voter_index)
-                self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1))
+        for k in range(min(n-1, self.n_max_iterations-1 if self.n_max_iterations is not None else np.inf)):
 
-                self.previous_vote = self.new_voter
-                self.weighted_sum = self.new_voter
+            # Print dynamically the step and the error of the current classifier
+            print("{}/{}, eps :{}".format(k+2, self.n_max_iterations, self.epsilons[-1]), end="\r")
 
+            # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
+            # C-bound with the previous vote or the one with the best weighted margin
+            if self.c_bound_choice:
+                sol, new_voter_index = self._find_new_voter(y_kernel_matrix, formatted_y)
             else:
-                # Print dynamically the step and the error of the current classifier
-                print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r")
+                new_voter_index, sol = self._find_best_weighted_margin(y_kernel_matrix)
 
-                # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
-                # C-bound with the previous vote or the one with the best weighted margin
-                if self.c_bound_choice:
-                    sol, new_voter_index = self._find_new_voter(y_kernel_matrix, formatted_y)
-                else:
-                    new_voter_index, sol = self._find_best_weighted_margin(y_kernel_matrix)
+            # If the new voter selector could not find one, break the loop
+            if type(sol) == str:
+                self.break_cause = new_voter_index  #
+                break
 
-                # If the new voter selector could not find one, break the loop
-                if type(sol) == str:
-                    self.break_cause = new_voter_index  #
-                    break
+            # Append the weak hypothesis.
+            self.chosen_columns_.append(new_voter_index)
+            self.new_voter = self.classification_matrix[:, new_voter_index].reshape((m, 1))
 
-                # Append the weak hypothesis.
-                self.chosen_columns_.append(new_voter_index)
-                self.new_voter = self.classification_matrix[:, new_voter_index].reshape((m, 1))
 
             # Generate the new weight for the new voter
             epsilon = self._compute_epsilon(formatted_y)
@@ -120,19 +104,16 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
             self._update_example_weights(formatted_y)
             self.example_weights_.append(self.example_weights)
 
-            if k != 0:
-                # Update the "previous vote" to prepare for the next iteration
-                self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_],
-                                               np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1))
-                self.previous_votes.append(self.previous_vote)
+            # Update the "previous vote" to prepare for the next iteration
+            self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_],
+                                           np.array(self.weights_).reshape((k + 2, 1))).reshape((m, 1))
+            self.previous_votes.append(self.previous_vote)
+
             self.previous_margins.append(np.multiply(formatted_y, self.previous_vote))
             self.train_metrics.append(self.plotted_metric.score(formatted_y, np.sign(self.previous_vote)))
             # self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))
 
-            if k!=0:
-                self.bounds.append(self.bounds[-1]*math.sqrt(1-r**2))
-            else:
-                self.bounds.append(math.sqrt(1 - r ** 2))
+            self.bounds.append(self.bounds[-1]*math.sqrt(1-r**2))
 
         self.nb_opposed_voters = self.check_opposed_voters()
         self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
@@ -159,6 +140,46 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         self.predict_time = end - start
         return signs_array
 
+    def init_boosting(self, m, y, y_kernel_matrix):
+        self.example_weights = self._initialize_alphas(m).reshape((m, 1))
+
+        self.previous_margins.append(np.multiply(y, y))
+        self.example_weights_.append(self.example_weights)
+        if self.random_start:
+            first_voter_index = self.random_state.choice(
+                self.get_possible(y_kernel_matrix, y))
+        else:
+            first_voter_index, _ = self._find_best_weighted_margin(
+                y_kernel_matrix)
+
+        self.chosen_columns_.append(first_voter_index)
+        self.new_voter = self.classification_matrix[:,
+                         first_voter_index].reshape((m, 1))
+
+        self.previous_vote = self.new_voter
+
+        epsilon = self._compute_epsilon(y)
+        self.epsilons.append(epsilon)
+
+        r = self._compute_r(y)
+
+        if self.use_r:
+            self.q = 0.5 * math.log((1 + r) / (1 - r))
+        else:
+            self.q = math.log((1 - epsilon) / epsilon)
+        self.weights_.append(self.q)
+
+        # Update the distribution on the examples.
+        self._update_example_weights(y)
+        self.example_weights_.append(self.example_weights)
+
+        self.previous_margins.append(
+            np.multiply(y, self.previous_vote))
+        self.train_metrics.append(
+            self.plotted_metric.score(y, np.sign(self.previous_vote)))
+
+        self.bounds.append(math.sqrt(1 - r ** 2))
+
     def format_X_y(self, X, y):
         if scipy.sparse.issparse(X):
             logging.info('Converting to dense matrix.')
-- 
GitLab