From ac9f952c56e9c6d4518ed9e52dea88be38bf3b07 Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Mon, 25 Feb 2019 15:16:53 -0500
Subject: [PATCH] Added grad desc modified saved vars

---
 .../Monoview/Additions/BoostUtils.py          | 27 +++++------
 .../Monoview/Additions/QarBoostUtils.py       | 39 ++++++++++-----
 .../MonoviewClassifiers/CGDesc.py             | 48 +++++++++++++++++++
 .../MonoviewClassifiers/CGreed.py             |  2 +-
 .../utils/execution.py                        | 12 ++++-
 5 files changed, 99 insertions(+), 29 deletions(-)
 create mode 100644 multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py

diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
index e7242ee6..d3432116 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
@@ -669,15 +669,18 @@ class ConvexProgram(object):
         return signs
 
 
-def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None, set="train"):
+def get_accuracy_graph(plotted_data, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None, set="train", zero_to_one=True):
     if type(name) is not str:
         name = " ".join(name.getConfig().strip().split(" ")[:2])
-    if bounds:
-        f, ax = plt.subplots(nrows=1, ncols=1)
+    f, ax = plt.subplots(nrows=1, ncols=1)
+    if zero_to_one:
         ax.set_ylim(bottom=0.0,top=1.0)
-        ax.set_title(name+" during "+set+" for "+classifier_name)
-        x = np.arange(len(train_accuracies))
-        scat = ax.scatter(x, np.array(train_accuracies), marker=".")
+    ax.set_title(name+" during "+set+" for "+classifier_name)
+    x = np.arange(len(plotted_data))
+    if name == "zero_one_loss":
+        print(plotted_data)
+    scat = ax.scatter(x, np.array(plotted_data), marker=".")
+    if bounds:
         if boosting_bound:
             scat2 = ax.scatter(x, boosting_bound, marker=".")
             scat3 = ax.scatter(x, np.array(bounds), marker=".", )
@@ -687,18 +690,10 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur
             ax.legend((scat, scat2),
                       (name, bound_name))
         # plt.tight_layout()
-        f.savefig(file_name)
-        plt.close()
     else:
-        f, ax = plt.subplots(nrows=1, ncols=1)
-        ax.set_ylim(bottom=0.0, top=1.0)
-        ax.set_title(name + " during "+set+" for " + classifier_name)
-        x = np.arange(len(train_accuracies))
-        scat = ax.scatter(x, np.array(train_accuracies), marker=".", )
         ax.legend((scat,), (name,))
-        plt.tight_layout()
-        f.savefig(file_name)
-        plt.close()
+    f.savefig(file_name)
+    plt.close()
 
 
 class BaseBoost(object):
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
index 50eb7948..74ab90e3 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
@@ -182,11 +182,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
     def update_info_containers(self, y, voter_perf, k):
         """Is used at each iteration to compute and store all the needed quantities for later analysis"""
         self.example_weights_.append(self.example_weights)
+        self.tau.append(np.sum(np.multiply(self.previous_vote, self.new_voter))/float(self.n_total_examples))
         self.previous_vote += self.q * self.new_voter
+        self.norm.append(np.linalg.norm(self.previous_vote)**2)
         self.previous_votes.append(self.previous_vote)
         self.previous_margins.append(
-            np.multiply(y, self.previous_vote))
-        self.selected_margins.append(np.sum(np.multiply(y, self.new_voter)))
+            np.sum(np.multiply(y, self.previous_vote))/float(self.n_total_examples))
+        self.selected_margins.append(np.sum(np.multiply(y, self.new_voter))/float(self.n_total_examples))
         train_metric = self.plotted_metric.score(y, np.sign(self.previous_vote))
         if self.use_r:
             bound = self.bounds[-1] * math.sqrt(1 - voter_perf ** 2)
@@ -258,6 +260,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                          first_voter_index].reshape((m, 1)), copy=True)
 
         self.previous_vote = self.new_voter
+        self.norm.append(np.linalg.norm(self.previous_vote) ** 2)
 
         if self.use_r:
             r = self._compute_r(y)
@@ -280,8 +283,11 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
 
 
         self.previous_margins.append(
-            np.multiply(y, self.previous_vote))
+            np.sum(np.multiply(y, self.previous_vote))/float(self.n_total_examples))
         self.selected_margins.append(np.sum(np.multiply(y, self.previous_vote)))
+        self.tau.append(
+            np.sum(np.multiply(self.previous_vote, self.new_voter)) / float(
+                self.n_total_examples))
 
         train_metric = self.plotted_metric.score(y, np.sign(self.previous_vote))
         if self.use_r:
@@ -339,6 +345,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         self.previous_margins = []
         self.respected_bound = True
         self.selected_margins = []
+        self.tau = []
+        self.norm=[]
 
     def _compute_epsilon(self, y):
         """Updating the error variable, the old fashioned way uses the whole majority vote to update the error"""
@@ -480,21 +488,24 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         """Used to interpret the functionning of the algorithm"""
         if self.step_decisions is not None:
             self.get_step_decision_test_graph(directory, y_test)
-        get_accuracy_graph(self.voter_perfs[:20], self.__class__.__name__,
-                           directory + 'voter_perfs.png', "Rs")
+        # get_accuracy_graph(self.voter_perfs[:20], self.__class__.__name__,
+        #                    directory + 'voter_perfs.png', "Rs")
         get_accuracy_graph(self.weights_, self.__class__.__name__,
-                           directory+'vote_weights.png', "weights")
+                           directory+'vote_weights.png', "weights", zero_to_one=False)
         get_accuracy_graph(self.c_bounds, self.__class__.__name__,
                            directory + 'c_bounds.png', "C-Bounds")
-        get_accuracy_graph(self.margins, self.__class__.__name__,
-                           directory + 'margins.png', "Squared Margins")
+        get_accuracy_graph(self.previous_margins, self.__class__.__name__,
+                           directory + 'margins.png', "Margins", zero_to_one=False)
         get_accuracy_graph(self.selected_margins, self.__class__.__name__,
                            directory + 'selected_margins.png', "Selected Margins")
-        self.disagreements[0] = 0
-        get_accuracy_graph(self.disagreements, self.__class__.__name__,
-                           directory + 'disagreements.png', "disagreements")
+        self.tau[0] = 0
+        get_accuracy_graph(self.tau, self.__class__.__name__,
+                           directory + 'disagreements.png', "disagreements", zero_to_one=False)
         get_accuracy_graph(self.train_metrics[:-1], self.__class__.__name__,
                            directory + 'c_bounds_train_metrics.png', self.plotted_metric, self.c_bounds, "C-Bound", self.bounds[:-1])
+        get_accuracy_graph(self.norm, self.__class__.__name__,
+                           directory + 'norms.png',
+                           "squared 2-norm",zero_to_one=False)
         interpretString = getInterpretBase(self, directory, self.__class__.__name__,
                                            self.weights_, self.break_cause)
         if self.save_train_data:
@@ -503,6 +514,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
             np.savetxt(directory + "raw_weights.csv", self.raw_weights, delimiter=',')
             np.savetxt(directory + "c_bounds.csv", self.c_bounds, delimiter=',')
             np.savetxt(directory + "train_metrics.csv", self.train_metrics, delimiter=',')
+            np.savetxt(directory + "margins.csv", self.previous_margins,
+                       delimiter=',')
+            np.savetxt(directory + "disagreements.csv", self.tau,
+                       delimiter=',')
+            np.savetxt(directory + "disagreements.csv", self.norm,
+                       delimiter=',')
         args_dict = dict(
             (arg_name, str(self.__dict__[arg_name])) for arg_name in
             self.printed_args_name_list)
diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py
new file mode 100644
index 00000000..8a520a0c
--- /dev/null
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py
@@ -0,0 +1,48 @@
+from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint
+from ..Monoview.Additions.BoostUtils import getInterpretBase
+from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
+
+
+class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
+
+    def __init__(self, random_state=None, n_max_iterations=500, n_stumps_per_attribute=10, **kwargs):
+        super(CGDesc, self).__init__(n_max_iterations=n_max_iterations,
+            random_state=random_state,
+            self_complemented=True,
+            twice_the_same=True,
+            c_bound_choice=True,
+            random_start=False,
+            n_stumps_per_attribute=n_stumps_per_attribute,
+            use_r=True,
+            c_bound_sol=True
+            )
+
+        self.param_names = ["n_max_iterations"]
+        self.distribs = [CustomRandint(low=1, high=500)]
+        self.classed_params = []
+        self.weird_strings = {}
+
+    def canProbas(self):
+        """Used to know if the classifier can return label probabilities"""
+        return True
+
+    def getInterpret(self, directory, y_test):
+        return self.getInterpretQar(directory, y_test)
+
+    def get_name_for_fusion(self):
+        return "CGr"
+
+
+def formatCmdArgs(args):
+    """Used to format kwargs for the parsed args"""
+    kwargsDict = {"n_stumps_per_attribute":args.CGD_stumps,
+    "n_max_iterations":args.CGD_n_iter}
+    return kwargsDict
+
+
+def paramsToSet(nIter, randomState):
+    """Used for weighted linear early fusion to generate random search sets"""
+    paramsSet = []
+    for _ in range(nIter):
+        paramsSet.append({})
+    return paramsSet
\ No newline at end of file
diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py
index b2d7dcad..ed2271d3 100644
--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py
@@ -9,7 +9,7 @@ class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
         super(CGreed, self).__init__(n_max_iterations=n_max_iterations,
             random_state=random_state,
             self_complemented=True,
-            twice_the_same=True,
+            twice_the_same=False,
             c_bound_choice=True,
             random_start=False,
             n_stumps_per_attribute=n_stumps_per_attribute,
diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
index bd653055..46af685c 100644
--- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
+++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
@@ -186,6 +186,16 @@ def parseTheArgs(arguments):
     groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, action='store',
                                  help='Set the n_max_iterations parameter for CGreed', default=100)
 
+    groupCGDesc = parser.add_argument_group('CGDesc arguments')
+    groupCGDesc.add_argument('--CGD_stumps', metavar='INT', type=int,
+                             action='store',
+                             help='Set the n_stumps_per_attribute parameter for CGreed',
+                             default=1)
+    groupCGDesc.add_argument('--CGD_n_iter', metavar='INT', type=int,
+                             action='store',
+                             help='Set the n_max_iterations parameter for CGreed',
+                             default=100)
+
     groupQarBoostv3 = parser.add_argument_group('QarBoostv3 arguments')
     groupQarBoostv3.add_argument('--QarB3_mu', metavar='FLOAT', type=float, action='store',
                                  help='Set the mu parameter for QarBoostv3', default=0.001)
@@ -395,7 +405,7 @@ def initLogFile(name, views, CL_type, log, debug, label):
     if debug:
         resultDirectory = "../Results/" + name + "/debug_started_" + time.strftime("%Y_%m_%d-%H_%M_%S") + "_" + label + "/"
     else:
-        resultDirectory = "../Results/" + name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") +"_" + label + "/"
+        resultDirectory = "../Results/" + name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "_" + label + "/"
     logFileName = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join(CL_type) + "-" + "_".join(
         views) + "-" + name + "-LOG"
     if os.path.exists(os.path.dirname(resultDirectory)):
-- 
GitLab