Reformatted code

aed9da3b · Baptiste Bauvin · 77fc6fe0 · aed9da3b
Commit aed9da3b authored Dec 4, 2018 by Baptiste Bauvin
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
@@ -9,7 +9,8 @@ from sklearn.base import BaseEstimator, ClassifierMixin
 import time
 import matplotlib.pyplot as plt

-from .BoostUtils import StumpsClassifiersGenerator, sign, BaseBoost, getInterpretBase, get_accuracy_graph
+from .BoostUtils import StumpsClassifiersGenerator, sign, BaseBoost, \
+    getInterpretBase, get_accuracy_graph
 from ... import Metrics


@@ -59,7 +60,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        if n_stumps_per_attribute:
            self.n_stumps = n_stumps_per_attribute
        self.use_r = use_r
-        self.printed_args_name_list = ["n_max_iterations", "self_complemented", "twice_the_same",
+        self.printed_args_name_list = ["n_max_iterations", "self_complemented",
+                                       "twice_the_same",
                                       "c_bound_choice", "random_start",
                                       "n_stumps", "use_r"]

@@ -87,12 +89,19 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        self.init_boosting(m, formatted_y, y_kernel_matrix)
        self.break_cause = " the maximum number of iterations was attained."

-        for k in range(min(n-1, self.n_max_iterations-1 if self.n_max_iterations is not None else np.inf)):
+        for k in range(min(n - 1,
+                           self.n_max_iterations - 1 if self.n_max_iterations is not None else np.inf)):

            # Print dynamically the step and the error of the current classifier
-            print("Resp. bound : {}, {}/{}, eps :{}".format(self.respected_bound, k+2, self.n_max_iterations, self.voter_perfs[-1]), end="\r")
-
-            sol, new_voter_index = self.choose_new_voter(y_kernel_matrix, formatted_y)
+            print(
+                "Resp. bound : {}, {}/{}, eps :{}".format(self.respected_bound,
+                                                          k + 2,
+                                                          self.n_max_iterations,
+                                                          self.voter_perfs[-1]),
+                end="\r")
+
+            sol, new_voter_index = self.choose_new_voter(y_kernel_matrix,
+                                                         formatted_y)

            if type(sol) == str:
                self.break_cause = new_voter_index  #
@@ -108,9 +117,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):

            self.update_info_containers(formatted_y, voter_perf, k)

-
        self.nb_opposed_voters = self.check_opposed_voters()
-        self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
+        self.estimators_generator.estimators_ = \
+        self.estimators_generator.estimators_[self.chosen_columns_]

        self.weights_ = np.array(self.weights_)
        self.weights_ /= np.sum(self.weights_)
@@ -129,7 +138,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
            logging.warning('Converting sparse matrix to dense matrix.')
            X = np.array(X.todense())
        classification_matrix = self._binary_classification_matrix(X)
-        margins = np.squeeze(np.asarray(np.matmul(classification_matrix, self.weights_)))
+        margins = np.squeeze(
+            np.asarray(np.matmul(classification_matrix, self.weights_)))
        signs_array = np.array([int(x) for x in sign(margins)])
        signs_array[signs_array == -1] = 0
        end = time.time()
@@ -141,7 +151,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        self.example_weights_.append(self.example_weights)
        self.previous_vote = np.matmul(
            self.classification_matrix[:, self.chosen_columns_],
-            np.array(self.weights_).reshape((k + 2, 1))).reshape((self.n_total_examples, 1))
+            np.array(self.weights_).reshape((k + 2, 1))).reshape(
+            (self.n_total_examples, 1))
        self.previous_votes.append(self.previous_vote)

        self.previous_margins.append(
@@ -150,7 +161,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        if self.use_r:
            bound = self.bounds[-1] * math.sqrt(1 - voter_perf ** 2)
        else:
-            bound = np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.voter_perfs))))
+            bound = np.prod(
+                np.sqrt(1 - 4 * np.square(0.5 - np.array(self.voter_perfs))))

        if train_metric > bound:
            self.respected_bound = False
@@ -193,7 +205,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                y_kernel_matrix)
        return sol, new_voter_index

-
    def init_boosting(self, m, y, y_kernel_matrix):
        """THis initialization corressponds to the first round of boosting with equal weights for each examples and the voter chosen by it's margin."""
        self.example_weights = self._initialize_alphas(m).reshape((m, 1))
@@ -220,7 +231,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
            epsilon = self._compute_epsilon(y)
            self.voter_perfs.append(epsilon)

-
        if self.use_r:
            self.q = 0.5 * math.log((1 + r) / (1 - r))
        else:
@@ -260,7 +270,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
    def init_hypotheses(self, X, y):
        """Inintialization for the hyptotheses used to build the boosted vote"""
        if self.estimators_generator is None:
-            self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps,
+            self.estimators_generator = StumpsClassifiersGenerator(
+                n_stumps_per_attribute=self.n_stumps,
                self_complemented=self.self_complemented)
        self.estimators_generator.fit(X, y)
        self.classification_matrix = self._binary_classification_matrix(X)
@@ -286,28 +297,38 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
    def _compute_epsilon(self, y):
        """Updating the error variable, the old fashioned way uses the whole majority vote to update the error"""
        ones_matrix = np.zeros(y.shape)
-        ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = 1  # can np.divide if needed
+        ones_matrix[np.multiply(y, self.new_voter.reshape(
+            y.shape)) < 0] = 1  # can np.divide if needed
        epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0)
        return epsilon

    def _compute_r(self, y):
        ones_matrix = np.ones(y.shape)
-        ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = -1  # can np.divide if needed
+        ones_matrix[np.multiply(y, self.new_voter.reshape(
+            y.shape)) < 0] = -1  # can np.divide if needed
        r = np.average(ones_matrix, weights=self.example_weights, axis=0)
        return r

    def update_example_weights(self, y):
        """Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter."""
-        new_weights = self.example_weights.reshape((self.n_total_examples, 1))*np.exp(-self.q*np.multiply(y,self.new_voter))
+        new_weights = self.example_weights.reshape(
+            (self.n_total_examples, 1)) * np.exp(
+            -self.q * np.multiply(y, self.new_voter))
        self.example_weights = new_weights / np.sum(new_weights)

-    def _find_best_weighted_margin(self, y_kernel_matrix, upper_bound=1.0, lower_bound=0.0):
+    def _find_best_weighted_margin(self, y_kernel_matrix, upper_bound=1.0,
+                                   lower_bound=0.0):
        """Finds the new voter by choosing the one that has the best weighted margin between 0.5 and 0.55
        to avoid too god voters that will get all the votes weights"""
-        weighted_kernel_matrix = np.multiply(y_kernel_matrix, self.example_weights.reshape((self.n_total_examples, 1)))
-        pseudo_h_values = ma.array(np.sum(weighted_kernel_matrix, axis=0), fill_value=-np.inf)
+        weighted_kernel_matrix = np.multiply(y_kernel_matrix,
+                                             self.example_weights.reshape(
+                                                 (self.n_total_examples, 1)))
+        pseudo_h_values = ma.array(np.sum(weighted_kernel_matrix, axis=0),
+                                   fill_value=-np.inf)
        pseudo_h_values[self.chosen_columns_] = ma.masked
-        acceptable_indices = np.where(np.logical_and(np.greater(upper_bound, pseudo_h_values), np.greater(pseudo_h_values, lower_bound)))[0]
+        acceptable_indices = np.where(
+            np.logical_and(np.greater(upper_bound, pseudo_h_values),
+                           np.greater(pseudo_h_values, lower_bound)))[0]
        if acceptable_indices.size > 0:
            worst_h_index = self.random_state.choice(acceptable_indices)
            return worst_h_index, [0]
@@ -336,8 +357,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        possible_sols = []
        indices = []
        causes = []
-        for hypothese_index, hypothese in enumerate(y_kernel_matrix.transpose()):
-            if (hypothese_index not in self.chosen_columns_ or self.twice_the_same) \
+        for hypothese_index, hypothese in enumerate(
+                y_kernel_matrix.transpose()):
+            if (
+                    hypothese_index not in self.chosen_columns_ or self.twice_the_same) \
                    and set(self.chosen_columns_) != {hypothese_index} \
                    and self._is_not_too_wrong(hypothese, y):
                w = self._solve_one_weight_min_c(hypothese, y)
@@ -363,18 +386,26 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        No precalc because longer ; see the "derivee" latex document for more precision"""
        m = next_column.shape[0]
        zero_diag = np.ones((m, m)) - np.identity(m)
-        weighted_previous_sum = np.multiply(y, self.previous_vote.reshape((m, 1)))
-        weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1)))
+        weighted_previous_sum = np.multiply(y,
+                                            self.previous_vote.reshape((m, 1)))
+        weighted_next_column = np.multiply(next_column.reshape((m, 1)),
+                                           self.example_weights.reshape((m, 1)))

        self.B2 = np.sum(weighted_next_column ** 2)
        self.B1 = np.sum(2 * weighted_next_column * weighted_previous_sum)
        self.B0 = np.sum(weighted_previous_sum ** 2)

-        M2 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag))
-        M1 = np.sum(np.multiply(np.matmul(weighted_previous_sum, np.transpose(weighted_next_column)) +
-                                np.matmul(weighted_next_column, np.transpose(weighted_previous_sum))
+        M2 = np.sum(np.multiply(
+            np.matmul(weighted_next_column, np.transpose(weighted_next_column)),
+            zero_diag))
+        M1 = np.sum(np.multiply(np.matmul(weighted_previous_sum,
+                                          np.transpose(weighted_next_column)) +
+                                np.matmul(weighted_next_column,
+                                          np.transpose(weighted_previous_sum))
                                , zero_diag))
-        M0 = np.sum(np.multiply(np.matmul(weighted_previous_sum, np.transpose(weighted_previous_sum)), zero_diag))
+        M0 = np.sum(np.multiply(np.matmul(weighted_previous_sum,
+                                          np.transpose(weighted_previous_sum)),
+                                zero_diag))

        self.A2 = self.B2 + M2
        self.A1 = self.B1 + M1
@@ -387,7 +418,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
            if C1 == 0:
                return ['break', "the derivate was constant"]
            else:
-                is_acceptable, sol = self._analyze_solutions_one_weight(np.array(float(C0)/C1).reshape((1,1)))
+                is_acceptable, sol = self._analyze_solutions_one_weight(
+                    np.array(float(C0) / C1).reshape((1, 1)))
                if is_acceptable:
                    return np.array([sol])
        try:
@@ -421,7 +453,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):

    def _cbound(self, sol):
        """Computing the objective function"""
-        return 1 - (self.A2*sol**2 + self.A1*sol + self.A0)/(self.B2*sol**2 + self.B1*sol + self.B0)/self.n_total_examples
+        return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / (
+                    self.B2 * sol ** 2 + self.B1 * sol + self.B0) / self.n_total_examples

    def _best_sol(self, sols):
        """Return the best min in the two possible sols"""
@@ -441,8 +474,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        except:
            raise
        filenames = []
-        max_weight = max([np.max(examples_weights) for examples_weights in self.example_weights_])
-        min_weight = min([np.max(examples_weights) for examples_weights in self.example_weights_])
+        max_weight = max([np.max(examples_weights) for examples_weights in
+                          self.example_weights_])
+        min_weight = min([np.max(examples_weights) for examples_weights in
+                          self.example_weights_])
        for iterIndex, examples_weights in enumerate(self.example_weights_):
            r = np.array(examples_weights)
            theta = np.arange(self.n_total_examples)
@@ -464,18 +499,19 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        imageio.mimsave(path + '/weights.gif', images, duration=1. / 2)
        import shutil
        shutil.rmtree(path + "/gif_images")
-        get_accuracy_graph(self.voter_perfs, self.__class__.__name__, directory + 'voter_perfs.png', "Errors")
-        interpretString = getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause)
-
-        args_dict = dict((arg_name, str(self.__dict__[arg_name])) for arg_name in self.printed_args_name_list)
-        interpretString += "\n \n With arguments : \n"+u'\u2022 '+ ("\n"+u'\u2022 ').join(['%s: \t%s' % (key, value)
-                                                                                           for (key, value) in args_dict.items()])
+        get_accuracy_graph(self.voter_perfs, self.__class__.__name__,
+                           directory + 'voter_perfs.png', "Errors")
+        interpretString = getInterpretBase(self, directory, "QarBoost",
+                                           self.weights_, self.break_cause)
+
+        args_dict = dict(
+            (arg_name, str(self.__dict__[arg_name])) for arg_name in
+            self.printed_args_name_list)
+        interpretString += "\n \n With arguments : \n" + u'\u2022 ' + (
+                    "\n" + u'\u2022 ').join(['%s: \t%s' % (key, value)
+                                             for (key, value) in
+                                             args_dict.items()])
        if not self.respected_bound:
            interpretString += "\n\n The bound was not respected"

        return interpretString
-
-
-
-
-