From ac9f952c56e9c6d4518ed9e52dea88be38bf3b07 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Mon, 25 Feb 2019 15:16:53 -0500 Subject: [PATCH] Added grad desc modified saved vars --- .../Monoview/Additions/BoostUtils.py | 27 +++++------ .../Monoview/Additions/QarBoostUtils.py | 39 ++++++++++----- .../MonoviewClassifiers/CGDesc.py | 48 +++++++++++++++++++ .../MonoviewClassifiers/CGreed.py | 2 +- .../utils/execution.py | 12 ++++- 5 files changed, 99 insertions(+), 29 deletions(-) create mode 100644 multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index e7242ee6..d3432116 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -669,15 +669,18 @@ class ConvexProgram(object): return signs -def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None, set="train"): +def get_accuracy_graph(plotted_data, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None, set="train", zero_to_one=True): if type(name) is not str: name = " ".join(name.getConfig().strip().split(" ")[:2]) - if bounds: - f, ax = plt.subplots(nrows=1, ncols=1) + f, ax = plt.subplots(nrows=1, ncols=1) + if zero_to_one: ax.set_ylim(bottom=0.0,top=1.0) - ax.set_title(name+" during "+set+" for "+classifier_name) - x = np.arange(len(train_accuracies)) - scat = ax.scatter(x, np.array(train_accuracies), marker=".") + ax.set_title(name+" during "+set+" for "+classifier_name) + x = np.arange(len(plotted_data)) + if name == "zero_one_loss": + print(plotted_data) + scat = ax.scatter(x, np.array(plotted_data), marker=".") + if bounds: if boosting_bound: scat2 = ax.scatter(x, boosting_bound, marker=".") scat3 = ax.scatter(x, np.array(bounds), marker=".", ) @@ -687,18 +690,10 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur ax.legend((scat, scat2), (name, bound_name)) # plt.tight_layout() - f.savefig(file_name) - plt.close() else: - f, ax = plt.subplots(nrows=1, ncols=1) - ax.set_ylim(bottom=0.0, top=1.0) - ax.set_title(name + " during "+set+" for " + classifier_name) - x = np.arange(len(train_accuracies)) - scat = ax.scatter(x, np.array(train_accuracies), marker=".", ) ax.legend((scat,), (name,)) - plt.tight_layout() - f.savefig(file_name) - plt.close() + f.savefig(file_name) + plt.close() class BaseBoost(object): diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index 50eb7948..74ab90e3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -182,11 +182,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def update_info_containers(self, y, voter_perf, k): """Is used at each iteration to compute and store all the needed quantities for later analysis""" self.example_weights_.append(self.example_weights) + self.tau.append(np.sum(np.multiply(self.previous_vote, self.new_voter))/float(self.n_total_examples)) self.previous_vote += self.q * self.new_voter + self.norm.append(np.linalg.norm(self.previous_vote)**2) self.previous_votes.append(self.previous_vote) self.previous_margins.append( - np.multiply(y, self.previous_vote)) - self.selected_margins.append(np.sum(np.multiply(y, self.new_voter))) + np.sum(np.multiply(y, self.previous_vote))/float(self.n_total_examples)) + self.selected_margins.append(np.sum(np.multiply(y, self.new_voter))/float(self.n_total_examples)) train_metric = self.plotted_metric.score(y, np.sign(self.previous_vote)) if self.use_r: bound = self.bounds[-1] * math.sqrt(1 - voter_perf ** 2) @@ -258,6 +260,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): first_voter_index].reshape((m, 1)), copy=True) self.previous_vote = self.new_voter + self.norm.append(np.linalg.norm(self.previous_vote) ** 2) if self.use_r: r = self._compute_r(y) @@ -280,8 +283,11 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.previous_margins.append( - np.multiply(y, self.previous_vote)) + np.sum(np.multiply(y, self.previous_vote))/float(self.n_total_examples)) self.selected_margins.append(np.sum(np.multiply(y, self.previous_vote))) + self.tau.append( + np.sum(np.multiply(self.previous_vote, self.new_voter)) / float( + self.n_total_examples)) train_metric = self.plotted_metric.score(y, np.sign(self.previous_vote)) if self.use_r: @@ -339,6 +345,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.previous_margins = [] self.respected_bound = True self.selected_margins = [] + self.tau = [] + self.norm=[] def _compute_epsilon(self, y): """Updating the error variable, the old fashioned way uses the whole majority vote to update the error""" @@ -480,21 +488,24 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): """Used to interpret the functionning of the algorithm""" if self.step_decisions is not None: self.get_step_decision_test_graph(directory, y_test) - get_accuracy_graph(self.voter_perfs[:20], self.__class__.__name__, - directory + 'voter_perfs.png', "Rs") + # get_accuracy_graph(self.voter_perfs[:20], self.__class__.__name__, + # directory + 'voter_perfs.png', "Rs") get_accuracy_graph(self.weights_, self.__class__.__name__, - directory+'vote_weights.png', "weights") + directory+'vote_weights.png', "weights", zero_to_one=False) get_accuracy_graph(self.c_bounds, self.__class__.__name__, directory + 'c_bounds.png', "C-Bounds") - get_accuracy_graph(self.margins, self.__class__.__name__, - directory + 'margins.png', "Squared Margins") + get_accuracy_graph(self.previous_margins, self.__class__.__name__, + directory + 'margins.png', "Margins", zero_to_one=False) get_accuracy_graph(self.selected_margins, self.__class__.__name__, directory + 'selected_margins.png', "Selected Margins") - self.disagreements[0] = 0 - get_accuracy_graph(self.disagreements, self.__class__.__name__, - directory + 'disagreements.png', "disagreements") + self.tau[0] = 0 + get_accuracy_graph(self.tau, self.__class__.__name__, + directory + 'disagreements.png', "disagreements", zero_to_one=False) get_accuracy_graph(self.train_metrics[:-1], self.__class__.__name__, directory + 'c_bounds_train_metrics.png', self.plotted_metric, self.c_bounds, "C-Bound", self.bounds[:-1]) + get_accuracy_graph(self.norm, self.__class__.__name__, + directory + 'norms.png', + "squared 2-norm",zero_to_one=False) interpretString = getInterpretBase(self, directory, self.__class__.__name__, self.weights_, self.break_cause) if self.save_train_data: @@ -503,6 +514,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): np.savetxt(directory + "raw_weights.csv", self.raw_weights, delimiter=',') np.savetxt(directory + "c_bounds.csv", self.c_bounds, delimiter=',') np.savetxt(directory + "train_metrics.csv", self.train_metrics, delimiter=',') + np.savetxt(directory + "margins.csv", self.previous_margins, + delimiter=',') + np.savetxt(directory + "disagreements.csv", self.tau, + delimiter=',') + np.savetxt(directory + "disagreements.csv", self.norm, + delimiter=',') args_dict = dict( (arg_name, str(self.__dict__[arg_name])) for arg_name in self.printed_args_name_list) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py new file mode 100644 index 00000000..8a520a0c --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py @@ -0,0 +1,48 @@ +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint +from ..Monoview.Additions.BoostUtils import getInterpretBase +from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar + + +class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): + + def __init__(self, random_state=None, n_max_iterations=500, n_stumps_per_attribute=10, **kwargs): + super(CGDesc, self).__init__(n_max_iterations=n_max_iterations, + random_state=random_state, + self_complemented=True, + twice_the_same=True, + c_bound_choice=True, + random_start=False, + n_stumps_per_attribute=n_stumps_per_attribute, + use_r=True, + c_bound_sol=True + ) + + self.param_names = ["n_max_iterations"] + self.distribs = [CustomRandint(low=1, high=500)] + self.classed_params = [] + self.weird_strings = {} + + def canProbas(self): + """Used to know if the classifier can return label probabilities""" + return True + + def getInterpret(self, directory, y_test): + return self.getInterpretQar(directory, y_test) + + def get_name_for_fusion(self): + return "CGr" + + +def formatCmdArgs(args): + """Used to format kwargs for the parsed args""" + kwargsDict = {"n_stumps_per_attribute":args.CGD_stumps, + "n_max_iterations":args.CGD_n_iter} + return kwargsDict + + +def paramsToSet(nIter, randomState): + """Used for weighted linear early fusion to generate random search sets""" + paramsSet = [] + for _ in range(nIter): + paramsSet.append({}) + return paramsSet \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py index b2d7dcad..ed2271d3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py @@ -9,7 +9,7 @@ class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): super(CGreed, self).__init__(n_max_iterations=n_max_iterations, random_state=random_state, self_complemented=True, - twice_the_same=True, + twice_the_same=False, c_bound_choice=True, random_start=False, n_stumps_per_attribute=n_stumps_per_attribute, diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index bd653055..46af685c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -186,6 +186,16 @@ def parseTheArgs(arguments): groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, action='store', help='Set the n_max_iterations parameter for CGreed', default=100) + groupCGDesc = parser.add_argument_group('CGDesc arguments') + groupCGDesc.add_argument('--CGD_stumps', metavar='INT', type=int, + action='store', + help='Set the n_stumps_per_attribute parameter for CGreed', + default=1) + groupCGDesc.add_argument('--CGD_n_iter', metavar='INT', type=int, + action='store', + help='Set the n_max_iterations parameter for CGreed', + default=100) + groupQarBoostv3 = parser.add_argument_group('QarBoostv3 arguments') groupQarBoostv3.add_argument('--QarB3_mu', metavar='FLOAT', type=float, action='store', help='Set the mu parameter for QarBoostv3', default=0.001) @@ -395,7 +405,7 @@ def initLogFile(name, views, CL_type, log, debug, label): if debug: resultDirectory = "../Results/" + name + "/debug_started_" + time.strftime("%Y_%m_%d-%H_%M_%S") + "_" + label + "/" else: - resultDirectory = "../Results/" + name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") +"_" + label + "/" + resultDirectory = "../Results/" + name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "_" + label + "/" logFileName = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join(CL_type) + "-" + "_".join( views) + "-" + name + "-LOG" if os.path.exists(os.path.dirname(resultDirectory)): -- GitLab