Corrected some errors and added n_stumps option

6380eef5 · Baptiste Bauvin · b961031f · 6380eef5 · 6380eef5 · 6380eef5
Commit 6380eef5 authored Oct 8, 2018 by Baptiste Bauvin
--- a/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py
@@ -28,5 +28,5 @@ def getConfig(**kwargs):
        sample_weight = kwargs["0"]
    except:
        sample_weight = None
-    configString = "Jaccard similarity score using " + str(sample_weight) + " as sample_weights (higher is better)"
+    configString = "Jaccard_similarity score using " + str(sample_weight) + " as sample_weights (higher is better)"
    return configString
--- a/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py
@@ -49,6 +49,6 @@ def getConfig(**kwargs):
        average = kwargs["3"]
    except:
        average = "micro"
-    configString = "ROC AUC score using " + str(
+    configString = "ROC_AUC score using " + str(
        sample_weight) + " as sample_weights, " + average + " as average (higher is better)"
    return configString
--- a/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py
@@ -28,5 +28,5 @@ def getConfig(**kwargs):
        sample_weight = kwargs["0"]
    except:
        sample_weight = None
-    configString = "Zero one loss using " + str(sample_weight) + " as sample_weights (lower is better)"
+    configString = "Zero_one loss using " + str(sample_weight) + " as sample_weights (lower is better)"
    return configString
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
@@ -145,6 +145,18 @@ class ClassifiersGenerator(BaseEstimator, TransformerMixin):
        check_is_fitted(self, 'estimators_')
        return np.array([voter.predict(X) for voter in self.estimators_]).T

+# class TreesClassifiersGenerator(ClassifiersGenerator):
+#     """A generator to widen the voter's pool of our boosting algorithms.
+#     """
+#
+#     def __init__(self, n_stumps_per_attribute=10, self_complemented=False, check_diff=True, max_depth=3):
+#         super(TreesClassifiersGenerator, self).__init__(self_complemented)
+#         self.n_stumps_per_attribute = n_stumps_per_attribute
+#         self.check_diff = check_diff
+#         self.max_depth = max_depth
+#
+#     def fit(self, X, y=None):
+
 class StumpsClassifiersGenerator(ClassifiersGenerator):
    """Decision Stump Voters transformer.

@@ -656,7 +668,21 @@ class ConvexProgram(object):
        signs[array == 0] = -1
        return signs

-def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies"):
+
+def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None):
+    if type(name) is not str:
+        name = " ".join(name.getConfig().strip().split(" ")[:2])
+    if bounds:
+        f, ax = plt.subplots(nrows=1, ncols=1)
+        ax.set_title(name+" during train for "+classifier_name)
+        x = np.arange(len(train_accuracies))
+        scat = ax.scatter(x, np.array(train_accuracies), )
+        scat2 = ax.scatter(x, np.array(bounds), )
+        ax.legend((scat,scat2), (name,"Bounds"))
+        plt.tight_layout()
+        f.savefig(file_name)
+        plt.close()
+    else:
        f, ax = plt.subplots(nrows=1, ncols=1)
        ax.set_title(name+" during train for "+classifier_name)
        x = np.arange(len(train_accuracies))
@@ -725,5 +751,5 @@ def getInterpretBase(classifier, directory, classifier_name, weights,
                                       separator=',', suppress_small=True)
    np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',')
    np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',')
-    get_accuracy_graph(classifier.train_accuracies, classifier_name, directory + 'accuracies.png')
+    get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds)
    return interpretString
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py
@@ -7,8 +7,10 @@ from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.metrics import accuracy_score
 import numpy as np
 import time
+import math

 from .BoostUtils import StumpsClassifiersGenerator, ConvexProgram, sign, BaseBoost
+from ... import Metrics


 class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
@@ -20,8 +22,10 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
        self.dual_constraint_rhs = dual_constraint_rhs
        self.mu = mu
        self.train_time = 0
+        self.plotted_metric = Metrics.accuracy_score

    def fit(self, X, y):
+        start = time.time()
        if scipy.sparse.issparse(X):
            X = np.array(X.todense())

@@ -45,7 +49,9 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
        # Initialization
        alpha = self._initialize_alphas(m)
        self.initialize()
-        self.train_accuracies = []
+        self.train_metrics = []
+        self.gammas = []
+        self.bounds = []
        self.previous_votes = []
        # w = [0.5,0.5]
        w= None
@@ -74,14 +80,17 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):

            margins = self.get_margins(w)
            signs_array = np.array([int(x) for x in sign(margins)])
-            self.train_accuracies.append(accuracy_score(y, signs_array))
+            self.train_metrics.append(self.plotted_metric.score(y, signs_array))
+            self.gammas.append(accuracy_score(y, signs_array))
+            self.bounds.append(math.exp(-2 * np.sum(np.square(np.array(self.gammas)))))

        self.nb_opposed_voters = self.check_opposed_voters()
        self.compute_weights_(w)
        # self.weights_ = w
        self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
+        end = time.time()

-
+        self.train_time = end-start
        y[y == -1] = 0
        return self


--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
@@ -11,13 +11,14 @@ import time
 import matplotlib.pyplot as plt

 from .BoostUtils import StumpsClassifiersGenerator, sign, BaseBoost, getInterpretBase, get_accuracy_graph
+from ... import Metrics


 class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
    def __init__(self, n_max_iterations=350, estimators_generator=None, dual_constraint_rhs=0,
                 random_state=42, self_complemented=True, twice_the_same=False, old_fashioned=False,
                 previous_vote_weighted=True, c_bound_choice = True, random_start = True,
-                 two_wieghts_problem=False, divided_ponderation=True):
+                 two_wieghts_problem=False, divided_ponderation=True, n_stumps_per_attribute=None):
        super(ColumnGenerationClassifierQar, self).__init__()
        self.n_max_iterations = n_max_iterations
        self.estimators_generator = estimators_generator
@@ -35,6 +36,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        self.random_start = random_start
        self.two_wieghts_problem = two_wieghts_problem
        self.divided_ponderation = divided_ponderation
+        self.plotted_metric = Metrics.zero_one_loss
+        if n_stumps_per_attribute:
+            self.n_stumps = n_stumps_per_attribute
+
+        self.printed_args_name_list = ["n_max_iterations", "self_complemented", "twice_the_same", "old_fashioned",
+                                       "previous_vote_weighted", "c_bound_choice", "random_start",
+                                       "two_wieghts_problem", "divided_ponderation", "n_stumps"]

    def set_params(self, **params):
        self.self_complemented = params["self_complemented"]
@@ -78,7 +86,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        self.c_bounds = []
        self.epsilons = []
        self.example_weights_ = [self.example_weights]
-        self.train_accuracies = []
+        self.train_metrics = []
+        self.gammas = []
+        self.bounds = []
        self.previous_votes = []
        self.previous_margins = [np.multiply(y,y)]

@@ -94,7 +104,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                if self.random_start:
                    first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, y))
                else:
-                    first_voter_index, plif = self._find_best_weighted_margin(y_kernel_matrix)
+                    first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix)
                self.chosen_columns_.append(first_voter_index)
                self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1))

@@ -120,11 +130,14 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                self._update_example_weights(y)
                self.example_weights_.append(self.example_weights)
                self.previous_margins.append(np.multiply(y, self.previous_vote))
-                self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote)))
+                self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote)))
+                self.gammas.append(accuracy_score(y, np.sign(self.previous_vote))-0.5)
+                self.bounds.append(math.exp(-2*self.gammas[-1]**2))
                continue
-            if epsilon > 0.5:
-                import pdb;pdb.set_trace()
+
+            # Print dynamicly the step and the error of the current classifier
            print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r")
+
            # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
            # C-bound with the previous vote or the one with the best weighted margin

@@ -152,8 +165,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
            if self.divided_ponderation:
                self.q = (1/(self.n_max_iterations-k))*math.log((1 - epsilon) / epsilon)
            else:
-                # self.q = math.log((1 - epsilon) / epsilon)
-                self.q = math.log((1 + epsilon) / (1 - epsilon))
+                self.q = math.log((1 - epsilon) / epsilon)
+                # self.q = math.log((1 + epsilon) / (1 - epsilon))
            self.weights_.append(self.q)

            # Update the distribution on the examples.
@@ -165,7 +178,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                                           np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1))
            self.previous_votes.append(self.previous_vote)
            self.previous_margins.append(np.multiply(y, self.previous_vote))
-            self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote)))
+            self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote)))
+            self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))

        self.nb_opposed_voters = self.check_opposed_voters()
        self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
@@ -469,7 +483,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
        import shutil
        shutil.rmtree(path+"/gif_images")
        get_accuracy_graph(self.epsilons, self.__class__.__name__, directory + 'epsilons.png', "Errors")
-        return getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause)
+        interpretString = getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause)
+
+        args_dict = dict((arg_name, str(self.__dict__[arg_name])) for arg_name in self.printed_args_name_list)
+        interpretString += "\n \n With arguments : \n"+u'\u2022 '+ ("\n"+u'\u2022 ').join(['%s: \t%s' % (key, value)
+                                                                                         for (key, value) in args_dict.items()])
+
+        return interpretString




--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py
 from sklearn.ensemble import AdaBoostClassifier
 from sklearn.tree import DecisionTreeClassifier
+import numpy as np
 from sklearn.metrics import accuracy_score

 from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier
 from ..Monoview.Additions.BoostUtils import get_accuracy_graph
+from .. import Metrics
+from ..Monoview.Additions.BoostUtils import get_accuracy_graph

 # Author-Info
 __author__ = "Baptiste Bauvin"
@@ -24,9 +27,14 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
        self.classed_params = ["base_estimator"]
        self.distribs = [CustomRandint(low=1, high=500), [DecisionTreeClassifier(max_depth=1)]]
        self.weird_strings = {"base_estimator": "class_name"}
+        self.plotted_metric = Metrics.zero_one_loss
+        self.plotted_metric_name = "zero_one_loss"

    def fit(self, X, y, sample_weight=None):
        super(Adaboost, self).fit(X, y, sample_weight=sample_weight)
+        self.base_predictions = np.array([estim.predict(X) for estim in self.estimators_])
+        self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)])
+        self.bounds = np.array([np.prod(np.sqrt(1-4*np.square(0.5-self.estimator_errors_[:i+1]))) for i in range(self.estimator_errors_.shape[0])])

    def canProbas(self):
        """Used to know if the classifier can return label probabilities"""
@@ -37,6 +45,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
        interpretString += self.getFeatureImportance(directory)
        interpretString += "\n\n Estimator error | Estimator weight\n"
        interpretString += "\n".join([str(error) +" | "+ str(weight/sum(self.estimator_weights_)) for error, weight in zip(self.estimator_errors_, self.estimator_weights_)])
+        get_accuracy_graph(self.metrics, "Adaboost", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds))
        return interpretString



--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py
@@ -16,7 +16,8 @@ class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
            c_bound_choice=True,
            random_start=True,
            two_wieghts_problem=False,
-            divided_ponderation=False
+            divided_ponderation=False,
+            n_stumps_per_attribute=1
            )
        self.param_names = []
        self.distribs = []

--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py
@@ -8,13 +8,15 @@ class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
    def __init__(self, random_state=None, **kwargs):
        super(QarBoostNC3, self).__init__(
            random_state=random_state,
-            self_complemented=True,
+            self_complemented=False,
            twice_the_same=False,
            old_fashioned=False,
            previous_vote_weighted=False,
            c_bound_choice=True,
            random_start=True,
-            two_wieghts_problem=False
+            two_wieghts_problem=False,
+            divided_ponderation=False,
+            n_stumps_per_attribute=10
            )
        self.param_names = []
        self.distribs = []

--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py
@@ -9,8 +9,14 @@ class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
        super(QarBoostv2, self).__init__(
            random_state=random_state,
            self_complemented=True,
-            twice_the_same=True,
-            previous_vote_weighted=True
+            twice_the_same=False,
+            old_fashioned=False,
+            previous_vote_weighted=False,
+            c_bound_choice=True,
+            random_start=True,
+            two_wieghts_problem=False,
+            divided_ponderation=False,
+            n_stumps_per_attribute=10
            )
        self.param_names = []
        self.distribs = []

--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py
-import numpy as np
-
 from ..Monoview.MonoviewUtils import BaseMonoviewClassifier
 from ..Monoview.Additions.BoostUtils import getInterpretBase
 from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar


-# class ColumnGenerationClassifierQar3(ColumnGenerationClassifierQar):
-#     def __init__(self, n_max_iterations=None, estimators_generator=None,
-#                  dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None,
-#                  random_state=42, self_complemented=True, twice_the_same=False):
-#         super(ColumnGenerationClassifierQar3, self).__init__(n_max_iterations=n_max_iterations,
-#                                                              estimators_generator=estimators_generator,
-#                                                              dual_constraint_rhs=dual_constraint_rhs,
-#                                                              save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each,
-#                                                              random_state=random_state,
-#                                                              self_complemented=self_complemented,
-#                                                              twice_the_same=twice_the_same)
-#
-#     def _compute_epsilon(self,):
-#         """Updating the \epsilon varaible"""
-#         ones_matrix = np.zeros(self.new_voter.shape)
-#         ones_matrix[self.new_voter < 0] = 1
-#         epsilon = (1.0/self.n_total_examples)*np.sum(self.example_weights*ones_matrix, axis=0)
-#         return epsilon
-#
-#     def _update_example_weights(self, y):
-#         new_weights = self.example_weights*np.exp(-self.q*y.reshape((self.n_total_examples, 1))*self.new_voter)
-#         self.example_weights = new_weights/np.sum(new_weights)
-
-
-
 class QarBoostv3(ColumnGenerationClassifierQar, BaseMonoviewClassifier):

    def __init__(self, random_state=None, **kwargs):
        super(QarBoostv3, self).__init__(
            random_state=random_state,
            self_complemented=True,
-            twice_the_same=True,
-            previous_vote_weighted=False
+            twice_the_same=False,
+            old_fashioned=False,
+            previous_vote_weighted=False,
+            c_bound_choice=True,
+            random_start=True,
+            two_wieghts_problem=False,
+            divided_ponderation=False,
+            n_stumps_per_attribute=1
        )
        self.param_names = []
        self.distribs = []
@@ -65,382 +44,3 @@ def paramsToSet(nIter, randomState):
    for _ in range(nIter):
        paramsSet.append({})
    return paramsSet
-
-
-
-    # def __init__(self, epsilon=1e-06, n_max_iterations=None, estimators_generator=None, dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, random_state=42):
-    #     super(ColumnGenerationClassifierQar3, self).__init__()
-    #     self.epsilon = epsilon
-    #     self.n_max_iterations = n_max_iterations
-    #     self.estimators_generator = estimators_generator
-    #     self.dual_constraint_rhs = dual_constraint_rhs
-    #     self.save_iteration_as_hyperparameter_each = save_iteration_as_hyperparameter_each
-    #     self.random_state = random_state
-    #
-    # def fit(self, X, y):
-    #     if scipy.sparse.issparse(X):
-    #         logging.info('Converting to dense matrix.')
-    #         X = np.array(X.todense())
-    #
-    #     if self.estimators_generator is None:
-    #         self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps, self_complemented=True)
-    #
-    #     y[y == 0] = -1
-    #
-    #     self.estimators_generator.fit(X, y)
-    #     self.classification_matrix = self._binary_classification_matrix(X)
-    #
-    #
-    #     self.weights_ = []
-    #     self.infos_per_iteration_ = defaultdict(list)
-    #
-    #     m, n = self.classification_matrix.shape
-    #     y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), self.classification_matrix)
-    #
-    #    # Initialization
-    #
-    #     self.collected_weight_vectors_ = {}
-    #     self.collected_dual_constraint_violations_ = {}
-    #
-    #     self.example_weights = self._initialize_alphas(m).reshape((m,1))
-    #
-    #     self.chosen_columns_ = []
-    #     self.fobidden_columns = []
-    #     self.edge_scores = []
-    #     self.epsilons = []
-    #     self.example_weights_ = [self.example_weights]
-    #     self.train_accuracies = []
-    #     self.previous_votes = []
-    #
-    #     self.n_total_hypotheses_ = n
-    #     self.n_total_examples = m
-    #
-    #     for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)):
-    #         # To choose the first voter, we select the one that has the best margin.
-    #         if k == 0:
-    #             first_voter_index = self._find_best_margin(y_kernel_matrix)
-    #             self.chosen_columns_.append(first_voter_index)
-    #
-    #             self.previous_vote = self.classification_matrix[:, first_voter_index].reshape((m,1))
-    #             self.previous_votes.append(self.previous_vote)
-    #             self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1))
-    #
-    #             epsilon = self._compute_epsilon()
-    #             self.epsilons.append(epsilon)
-    #             self.q = math.log((1-epsilon)/epsilon)
-    #             self.weights_.append(self.q)
-    #
-    #             self._update_example_weights(y)
-    #             self.example_weights_.append(self.example_weights)
-    #             self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote)))
-    #             continue
-    #
-    #         # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
-    #         # C-bound with the previous vote
-    #         sol, new_voter_index = self._find_new_voter(y_kernel_matrix, y)
-    #         if type(sol) == str:
-    #             self.break_cause = " no more hypothesis were able to improve the boosted vote."
-    #             break
-    #
-    #         # Append the weak hypothesis.
-    #         self.chosen_columns_.append(new_voter_index)
-    #         # self.weighted_sum = np.matmul(np.concatenate((self.previous_vote, self.classification_matrix[:, new_voter_index].reshape((m,1))), axis=1),
-    #         #                                sol).reshape((m,1))
-    #         self.new_voter = self.classification_matrix[:, new_voter_index].reshape((m,1))
-    #
-    #         # Generate the new weight for the new voter
-    #         epsilon = self._compute_epsilon()
-    #         self.epsilons.append(epsilon)
-    #         if epsilon == 0. or math.log((1 - epsilon) / epsilon) == math.inf:
-    #             self.chosen_columns_.pop()
-    #             self.break_cause = " epsilon was too small."
-    #             break
-    #         self.q = math.log((1 - epsilon) / epsilon)
-    #         self.weights_.append(self.q)
-    #
-    #         # Update the distribution on the examples.
-    #         self._update_example_weights(y)
-    #         self.example_weights_.append(self.example_weights)
-    #
-    #         # Update the "previous vote" to prepare for the next iteration
-    #         self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_],
-    #                                        np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1))
-    #         self.previous_votes.append(self.previous_vote)
-    #         self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote)))
-    #
-    #     self.nb_opposed_voters = self.check_opposed_voters()
-    #     self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
-    #     self.weights_ = np.array(self.weights_)
-    #
-    #     self.weights_/=np.sum(self.weights_)
-    #     y[y == -1] = 0
-    #
-    #     return self
-    #
-    # def predict(self, X):
-    #     start = time.time()
-    #     check_is_fitted(self, 'weights_')
-    #     if scipy.sparse.issparse(X):
-    #         logging.warning('Converting sparse matrix to dense matrix.')
-    #         X = np.array(X.todense())
-    #     classification_matrix = self._binary_classification_matrix(X)
-    #     margins = np.squeeze(np.asarray(np.matmul(classification_matrix, self.weights_)))
-    #     signs_array = np.array([int(x) for x in sign(margins)])
-    #     signs_array[signs_array == -1 ] = 0
-    #     end = time.time()
-    #     self.predict_time = end-start
-    #     return signs_array
-
-    #
-    # def _find_best_margin(self, y_kernel_matrix):
-    #     """Used only on the first iteration to select the voter with the largest margin"""
-    #     pseudo_h_values = ma.array(np.sum(y_kernel_matrix, axis=0), fill_value=-np.inf)
-    #     pseudo_h_values[self.fobidden_columns] = ma.masked
-    #     worst_h_index = ma.argmax(pseudo_h_values)
-    #     return worst_h_index
-    #
-    # def _find_new_voter(self, y_kernel_matrix, y):
-    #     """Here, we solve the two_voters_mincq_problem for each potential new voter,
-    #     and select the one that has the smallest minimum"""
-    #     c_borns = []
-    #     possible_sols = []
-    #     indices = []
-    #     for hypothese_index, hypothese in enumerate(y_kernel_matrix.transpose()):
-    #         causes = []
-    #         if hypothese_index not in self.chosen_columns_:
-    #             w = self._solve_two_weights_min_c(hypothese, y)
-    #             if w[0] != "break":
-    #                 c_borns.append(self._cbound(w[0]))
-    #                 possible_sols.append(w)
-    #                 indices.append(hypothese_index)
-    #             else:
-    #                 causes.append(w[1])
-    #     if c_borns:
-    #         min_c_born_index = ma.argmin(c_borns)
-    #         selected_sol = possible_sols[min_c_born_index]
-    #         selected_voter_index = indices[min_c_born_index]
-    #         return selected_sol, selected_voter_index
-    #     else:
-    #         return "break", "smthng"
-
-    #
-    # def _solve_two_weights_min_c(self, next_column, y):
-    #     """Here we solve the min C-bound problem for two voters and return the best 2-weights array"""
-    #     m = next_column.shape[0]
-    #     zero_diag = np.ones((m, m)) - np.identity(m)
-    #
-    #     weighted_previous_sum = np.multiply(np.multiply(y.reshape((m, 1)), self.previous_vote.reshape((m, 1))), self.example_weights.reshape((m,1)))
-    #     weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1)))
-    #
-    #     mat_prev = np.repeat(weighted_previous_sum, m, axis=1) * zero_diag
-    #     mat_next = np.repeat(weighted_next_column, m, axis=1) * zero_diag
-    #
-    #     self.B2 = np.sum((weighted_previous_sum - weighted_next_column) ** 2)
-    #     self.B1 = np.sum(2 * weighted_next_column * (weighted_previous_sum - 2 * weighted_next_column * weighted_next_column))
-    #     self.B0 = np.sum(weighted_next_column * weighted_next_column)
-    #
-    #     self.A2 = self.B2 + np.sum((mat_prev - mat_next) * np.transpose(mat_prev - mat_next))
-    #     self.A1 = self.B1 + np.sum(mat_prev * np.transpose(mat_next) - mat_next * np.transpose(mat_prev) - 2 * mat_next * np.transpose(mat_next))
-    #     self.A0 = self.B0 + np.sum(mat_next * np.transpose(mat_next))
-    #     C2 = (self.A1 * self.B2 - self.A2 * self.B1)
-    #     C1 = 2 * (self.A0 * self.B2 - self.A2 * self.B0)
-    #     C0 = self.A0 * self.B1 - self.A1 * self.B0
-    #
-    #     if C2 == 0:
-    #         if C1 == 0:
-    #             return np.array([0.5, 0.5])
-    #         elif abs(C1) > 0:
-    #             return np.array([0., 1.])
-    #         else:
-    #             return ['break', "the derivate was constant."]
-    #     elif C2 == 0:
-    #         return ["break", "the derivate was affine."]
-    #     try:
-    #         sols = np.roots(np.array([C2, C1, C0]))
-    #     except:
-    #         return ["break", "nan"]
-    #
-    #     is_acceptable, sol = self._analyze_solutions(sols)
-    #     if is_acceptable:
-    #         return np.array([sol, 1-sol])
-    #     else:
-    #         return ["break", sol]
-    #
-    # def _analyze_solutions(self, sols):
-    #     """"We just check that the solution found by np.roots is acceptable under our constraints
-    #     (real, a minimum and between 0 and 1)"""
-    #     for sol_index, sol in enumerate(sols):
-    #         if isinstance(sol, complex):
-    #             sols[sol_index] = -1
-    #     if sols.shape[0] == 1:
-    #         if self._cbound(sols[0]) < self._cbound(sols[0] + 1):
-    #             best_sol = sols[0]
-    #         else:
-    #             return False, " the only solution was a maximum."
-    #     elif sols.shape[0] == 2:
-    #         best_sol = self._best_sol(sols)
-    #     else:
-    #         return False, " no solution were found."
-    #
-    #     if 0 < best_sol < 1:
-    #         return True, self._best_sol(sols)
-    #
-    #     elif best_sol <= 0:
-    #         return False, " the minimum was below 0."
-    #     else:
-    #         return False, " the minimum was over 1."
-    #
-    # def _cbound(self, sol):
-    #     """Computing the objective function"""
-    #     return 1 - (self.A2*sol**2 + self.A1*sol + self.A0)/(self.B2*sol**2 + self.B1*sol + self.B0)
-    #
-    # def _best_sol(self, sols):
-    #     values = np.array([self._cbound(sol) for sol in sols])
-    #     return sols[np.argmin(values)]
-
-
-# class QarBoostClassifier3(ColumnGenerationClassifierQar3):
-#     def __init__(self, mu=0.001, epsilon=1e-08, n_max_iterations=None, estimators_generator=None, save_iteration_as_hyperparameter_each=None, random_state=42):
-#         super(QarBoostClassifier3, self).__init__(epsilon, n_max_iterations, estimators_generator, dual_constraint_rhs=0,
-#                                                   save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each, random_state=random_state)
-#         self.mu = mu
-#         self.train_time = 0
-#
-#     def _initialize_alphas(self, n_examples):
-#         return 1.0 / n_examples * np.ones((n_examples,))
-#
-#
-# class QarBoostv3(QarBoostClassifier3):
-#
-#     def __init__(self, random_state, **kwargs):
-#         super(QarBoostv3, self).__init__(
-#             mu=kwargs['mu'],
-#             epsilon=kwargs['epsilon'],
-#             n_max_iterations= kwargs['n_max_iterations'],
-#             random_state = random_state)
-#
-#     def canProbas(self):
-#         """Used to know if the classifier can return label probabilities"""
-#         return False
-#
-#     def paramsToSrt(self, nIter=1):
-#         """Used for weighted linear early fusion to generate random search sets"""
-#         paramsSet = []
-#         for _ in range(nIter):
-#             paramsSet.append({"mu": 0.001,
-#                               "epsilon": 1e-08,
-#                               "n_max_iterations": None})
-#         return paramsSet
-#
-#     def getKWARGS(self, args):
-#         """Used to format kwargs for the parsed args"""
-#         kwargsDict = {}
-#         kwargsDict['mu'] = 0.001
-#         kwargsDict['epsilon'] = 1e-08
-#         kwargsDict['n_max_iterations'] = None
-#         return kwargsDict
-#
-#     def genPipeline(self):
-#         return Pipeline([('classifier', QarBoostClassifier3())])
-#
-#     def genParamsDict(self, randomState):
-#         return {"classifier__mu": [0.001],
-#                 "classifier__epsilon": [1e-08],
-#                 "classifier__n_max_iterations": [None]}
-#
-#     def genBestParams(self, detector):
-#         return {"mu": detector.best_params_["classifier__mu"],
-#                 "epsilon": detector.best_params_["classifier__epsilon"],
-#                 "n_max_iterations": detector.best_params_["classifier__n_max_iterations"]}
-#
-#     def genParamsFromDetector(self, detector):
-#         nIter = len(detector.cv_results_['param_classifier__mu'])
-#         return [("mu", np.array([0.001 for _ in range(nIter)])),
-#                 ("epsilon", np.array(detector.cv_results_['param_classifier__epsilon'])),
-#                 ("n_max_iterations", np.array(detector.cv_results_['param_classifier__n_max_iterations']))]
-#
-#     def getConfig(self, config):
-#         if type(config) is not dict:  # Used in late fusion when config is a classifier
-#             return "\n\t\t- QarBoost with mu : " + str(config.mu) + ", epsilon : " + str(
-#                 config.epsilon + ", n_max_iterations : " + str(config.n_max_iterations))
-#         else:
-#             return "\n\t\t- QarBoost with mu : " + str(config["mu"]) + ", epsilon : " + str(
-#                    config["epsilon"] + ", n_max_iterations : " + str(config["n_max_iterations"]))
-#
-#
-#     def getInterpret(self, classifier, directory):
-#         interpretString = ""
-#         return interpretString
-#
-#
-# def canProbas():
-#     return False
-#
-#
-# def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
-#     start =time.time()
-#     """Used to fit the monoview classifier with the args stored in kwargs"""
-#     classifier = QarBoostClassifier3(mu=kwargs['mu'],
-#                                      epsilon=kwargs['epsilon'],
-#                                      n_max_iterations=kwargs["n_max_iterations"],
-#                                      random_state=randomState)
-#     classifier.fit(DATASET, CLASS_LABELS)
-#     end = time.time()
-#     classifier.train_time = end-start
-#     return classifier
-#
-#
-# def paramsToSet(nIter, randomState):
-#     """Used for weighted linear early fusion to generate random search sets"""
-#     paramsSet = []
-#     for _ in range(nIter):
-#         paramsSet.append({"mu": randomState.uniform(1e-02, 10**(-0.5)),
-#                           "epsilon": 10**-randomState.randint(1, 15),
-#                           "n_max_iterations": None})
-#     return paramsSet
-#
-#
-# def getKWARGS(args):
-#     """Used to format kwargs for the parsed args"""
-#     kwargsDict = {}
-#     kwargsDict['mu'] = args.QarB3_mu
-#     kwargsDict['epsilon'] = args.QarB3_epsilon
-#     kwargsDict['n_max_iterations'] = None
-#     return kwargsDict
-#
-#
-# def genPipeline():
-#     return Pipeline([('classifier', QarBoostClassifier3())])
-#
-#
-# def genParamsDict(randomState):
-#     return {"classifier__mu": CustomUniform(loc=.5, state=2, multiplier='e-'),
-#             "classifier__epsilon": CustomRandint(low=1, high=15, multiplier='e-'),
-#             "classifier__n_max_iterations": [None],
-#             "classifier__random_state":[randomState]}
-#
-#
-# def genBestParams(detector):
-#     return {"mu": detector.best_params_["classifier__mu"],
-#                 "epsilon": detector.best_params_["classifier__epsilon"],
-#                 "n_max_iterations": detector.best_params_["classifier__n_max_iterations"]}
-#
-#
-# def genParamsFromDetector(detector):
-#     nIter = len(detector.cv_results_['param_classifier__mu'])
-#     return [("mu", np.array(detector.cv_results_['param_classifier__mu'])),
-#             ("epsilon", np.array(detector.cv_results_['param_classifier__epsilon'])),
-#             ("n_max_iterations", np.array(detector.cv_results_['param_classifier__n_max_iterations']))]
-#
-#
-# def getConfig(config):
-#     if type(config) is not dict:  # Used in late fusion when config is a classifier
-#         return "\n\t\t- QarBoost with mu : " + str(config.mu) + ", epsilon : " + str(
-#             config.epsilon) + ", n_max_iterations : " + str(config.n_max_iterations)
-#     else:
-#         return "\n\t\t- QarBoost with mu : " + str(config["mu"]) + ", epsilon : " + str(
-#             config["epsilon"]) + ", n_max_iterations : " + str(config["n_max_iterations"])
-#
-#
-# def getInterpret(classifier, directory):
-#     return getInterpretBase(classifier, directory, "QarBoostv3", classifier.weights_, classifier.break_cause)