diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index f5191941440f620e4f3665bf996f0d6378e1b4c0..b1eb25405df720c972276146ebdcb4e4120a6f28 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -669,7 +669,7 @@ class ConvexProgram(object): return signs -def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None): +def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None): if type(name) is not str: name = " ".join(name.getConfig().strip().split(" ")[:2]) if bounds: @@ -677,8 +677,14 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur ax.set_title(name+" during train for "+classifier_name) x = np.arange(len(train_accuracies)) scat = ax.scatter(x, np.array(train_accuracies), ) - scat2 = ax.scatter(x, np.array(bounds), ) - ax.legend((scat,scat2), (name,"Bounds")) + if boosting_bound: + scat2 = ax.scatter(x, boosting_bound) + scat3 = ax.scatter(x, np.array(bounds), ) + ax.legend((scat, scat2, scat3), (name,"Boosting bound", bound_name)) + else: + scat2 = ax.scatter(x, np.array(bounds), ) + ax.legend((scat, scat2), + (name, bound_name)) plt.tight_layout() f.savefig(file_name) plt.close() @@ -751,5 +757,5 @@ def getInterpretBase(classifier, directory, classifier_name, weights, separator=',', suppress_small=True) np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',') np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',') - get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds) + get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds, "Boosting bound") return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index 65c4abb43c2173d5af4b935965ab4ba6d42a5b34..ee475b02b7fa344638912235a2a82793b0bb8179 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -18,8 +18,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def __init__(self, n_max_iterations=None, estimators_generator=None, random_state=42, self_complemented=True, twice_the_same=False, c_bound_choice=True, random_start=True, - n_stumps_per_attribute=None, use_r=True, - plotted_metric=Metrics.zero_one_loss): + n_stumps_per_attribute=None, use_r=True, c_bound_sol=True, + plotted_metric=Metrics.zero_one_loss, save_train_data=True): super(ColumnGenerationClassifierQar, self).__init__() r""" @@ -60,10 +60,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): if n_stumps_per_attribute: self.n_stumps = n_stumps_per_attribute self.use_r = use_r + self.c_bound_sol = c_bound_sol + self.save_train_data = save_train_data self.printed_args_name_list = ["n_max_iterations", "self_complemented", "twice_the_same", "c_bound_choice", "random_start", - "n_stumps", "use_r"] + "n_stumps", "use_r", "c_bound_sol"] def set_params(self, **params): self.self_complemented = params["self_complemented"] @@ -94,7 +96,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): # Print dynamically the step and the error of the current classifier print( - "Resp. bound : {}, {}/{}, eps :{}".format(self.respected_bound, + "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound, + self.bounds[-1] > self.train_metrics[-1], k + 2, self.n_max_iterations, self.voter_perfs[-1]), @@ -111,7 +114,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): voter_perf = self.compute_voter_perf(formatted_y) - self.compute_voter_weight(voter_perf) + self.compute_voter_weight(voter_perf, sol) self.update_example_weights(formatted_y) @@ -121,6 +124,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.estimators_generator.estimators_ = \ self.estimators_generator.estimators_[self.chosen_columns_] + if self.save_train_data: + self.X_train = self.classification_matrix[:, self.chosen_columns_] + self.y_train = formatted_y + self.weights_ = np.array(self.weights_) self.weights_ /= np.sum(self.weights_) @@ -170,12 +177,15 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.train_metrics.append(train_metric) self.bounds.append(bound) - def compute_voter_weight(self, voter_perf): + def compute_voter_weight(self, voter_perf, sol): """used to compute the voter's weight according to the specified method (edge or error) """ - if self.use_r: - self.q = 0.5 * math.log((1 + voter_perf) / (1 - voter_perf)) + if self.c_bound_sol: + self.q = sol else: - self.q = math.log((1 - voter_perf) / voter_perf) + if self.use_r: + self.q = 0.5 * math.log((1 + voter_perf) / (1 - voter_perf)) + else: + self.q = math.log((1 - voter_perf) / voter_perf) self.weights_.append(self.q) def compute_voter_perf(self, formatted_y): @@ -230,11 +240,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): else: epsilon = self._compute_epsilon(y) self.voter_perfs.append(epsilon) - - if self.use_r: - self.q = 0.5 * math.log((1 + r) / (1 - r)) + if self.c_bound_sol: + self.q = 1 else: - self.q = math.log((1 - epsilon) / epsilon) + if self.use_r: + self.q = 0.5 * math.log((1 + r) / (1 - r)) + else: + self.q = math.log((1 - epsilon) / epsilon) self.weights_.append(self.q) # Update the distribution on the examples. @@ -290,6 +302,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.example_weights_ = [] self.train_metrics = [] self.bounds = [] + self.disagreements = [] + self.margins = [] self.previous_votes = [] self.previous_margins = [] self.respected_bound = True @@ -373,11 +387,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): if not causes: causes = ["no feature was better than random and acceptable"] if c_borns: - min_c_born_index = ma.argmin(c_borns) - self.c_bounds.append(c_borns[min_c_born_index]) - selected_sol = possible_sols[min_c_born_index] - selected_voter_index = indices[min_c_born_index] - return selected_sol, selected_voter_index + min_c_bound_index = ma.argmin(c_borns) + self.c_bounds.append(c_borns[min_c_bound_index]) + selected_sol = possible_sols[min_c_bound_index] + self.margins.append(self.margin(selected_sol)) + self.disagreements.append(self.disagreement(selected_sol)) + selected_voter_index = indices[min_c_bound_index] + return selected_sol/(1+selected_sol), selected_voter_index else: return "break", " and ".join(set(causes)) @@ -388,7 +404,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): zero_diag = np.ones((m, m)) - np.identity(m) weighted_previous_sum = np.multiply(y, self.previous_vote.reshape((m, 1))) - weighted_next_column = np.multiply(next_column.reshape((m, 1)), + if self.c_bound_sol: + weighted_next_column = next_column.reshape((m, 1)) + else: + weighted_next_column = np.multiply(next_column.reshape((m, 1)), self.example_weights.reshape((m, 1))) self.B2 = np.sum(weighted_next_column ** 2) @@ -437,12 +456,17 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): """"We just check that the solution found by np.roots is acceptable under our constraints (real, a minimum and over 0)""" if sols.shape[0] == 1: - if self._cbound(sols[0]) < self._cbound(sols[0] + 1): + if self._cbound(sols[0]) < self._cbound(sols[0] + 1) and sols[0] > 0: best_sol = sols[0] else: - return False, "the only solution was a maximum." - elif sols.shape[0] == 2: + if sols[0] > 0: + return False, "the only solution was a maximum." + else: + return False, "the only solution was negative" + elif sols.shape[0] == 2 and sols[0] > 0 and sols[1] > 1: best_sol = self._best_sol(sols) + elif np.greater(sols, np.zeros(2)).any(): + return self._analyze_solutions_one_weight(np.array([np.max(sols)])) else: return False, "no solution were found" @@ -453,8 +477,14 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def _cbound(self, sol): """Computing the objective function""" - return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / ( - self.B2 * sol ** 2 + self.B1 * sol + self.B0) / self.n_total_examples + return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / (( + self.B2 * sol ** 2 + self.B1 * sol + self.B0) * self.n_total_examples) + + def disagreement(self, sol): + return self.B2 * sol ** 2 + self.B1 * sol + self.B0 + + def margin(self, sol): + return (self.A2 * sol ** 2 + self.A1 * sol + self.A0)/self.n_total_examples def _best_sol(self, sols): """Return the best min in the two possible sols""" @@ -501,9 +531,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): shutil.rmtree(path + "/gif_images") get_accuracy_graph(self.voter_perfs, self.__class__.__name__, directory + 'voter_perfs.png', "Errors") + get_accuracy_graph(self.c_bounds, self.__class__.__name__, + directory + 'c_bounds.png', "C-Bounds") + get_accuracy_graph(self.margins, self.__class__.__name__, + directory + 'margins.png', "Margins") + self.disagreements[0] = 0 + get_accuracy_graph(self.disagreements, self.__class__.__name__, + directory + 'disagreements.png', "disagreements") + get_accuracy_graph(self.train_metrics[1:], self.__class__.__name__, + directory + 'c_bounds_train_metrics.png', self.plotted_metric, self.c_bounds, "C-Bound", self.bounds[1:]) interpretString = getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause) - + if self.save_train_data: + np.savetxt(directory+"x_train.csv", self.X_train, delimiter=',') + np.savetxt(directory+"y_train.csv", self.y_train, delimiter=',') args_dict = dict( (arg_name, str(self.__dict__[arg_name])) for arg_name in self.printed_args_name_list) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py index 97197e805170d605992bd0c68e8e0fcfb8acdd9e..6bf35c91ce62e5a77589e1484715f5b125a03f07 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py @@ -6,14 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): - super(QarBoostNC, self).__init__(n_max_iterations=50, + super(QarBoostNC, self).__init__(n_max_iterations=500, random_state=random_state, self_complemented=True, twice_the_same=False, c_bound_choice=True, random_start=False, n_stumps_per_attribute=1, - use_r=True + use_r=True, + c_bound_sol=False ) self.param_names = [] self.distribs = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py index 2739affea598beee878ca939fc8d7393172d1047..e27a8a4d045f9c11a19350e00860e3f8d021fb75 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py @@ -6,18 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class QarBoostNC2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): - super(QarBoostNC2, self).__init__( + super(QarBoostNC2, self).__init__(n_max_iterations=500, random_state=random_state, self_complemented=True, - twice_the_same=False, - old_fashioned=False, - previous_vote_weighted=False, + twice_the_same=True, c_bound_choice=True, - random_start=True, - two_wieghts_problem=False, - divided_ponderation=False, - n_stumps_per_attribute=10, - use_r=True + random_start=False, + n_stumps_per_attribute=1, + use_r=True, + c_bound_sol=False ) self.param_names = [] self.distribs = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py index 7cb49df54f584f395117f33368386c7e401887ca..1c44cc3b6414e97269b31d71b125a5d0c82d8d6b 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py @@ -6,19 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): - super(QarBoostNC3, self).__init__( + super(QarBoostNC3, self).__init__(n_max_iterations=500, random_state=random_state, - self_complemented=False, + self_complemented=True, twice_the_same=False, - old_fashioned=False, - previous_vote_weighted=False, c_bound_choice=True, - random_start=True, - two_wieghts_problem=False, - divided_ponderation=True, + random_start=False, n_stumps_per_attribute=1, - use_r=True - ) + use_r=True, + c_bound_sol=True) self.param_names = [] self.distribs = [] self.classed_params = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py index 7fb2239a9f8fc71c09d1ac70604adf73c5cc702c..e06e069841c186eaab2cee6ff090cdc42f6b2fa3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py @@ -6,18 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): - super(QarBoostv2, self).__init__( + super(QarBoostv2, self).__init__(n_max_iterations=500, random_state=random_state, - self_complemented=False, - twice_the_same=False, - old_fashioned=False, - previous_vote_weighted=False, + self_complemented=True, + twice_the_same=True, c_bound_choice=True, random_start=False, - two_wieghts_problem=False, - divided_ponderation=False, n_stumps_per_attribute=1, - use_r=True + use_r=True, + c_bound_sol=True ) self.param_names = [] self.distribs = []