diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index b1eb25405df720c972276146ebdcb4e4120a6f28..503bae21b04262b51e873e6d3a09b0d3029c9dcb 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -676,23 +676,23 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur f, ax = plt.subplots(nrows=1, ncols=1) ax.set_title(name+" during train for "+classifier_name) x = np.arange(len(train_accuracies)) - scat = ax.scatter(x, np.array(train_accuracies), ) + scat = ax.scatter(x, np.array(train_accuracies), marker=".") if boosting_bound: - scat2 = ax.scatter(x, boosting_bound) - scat3 = ax.scatter(x, np.array(bounds), ) + scat2 = ax.scatter(x, boosting_bound, marker=".") + scat3 = ax.scatter(x, np.array(bounds), marker=".", ) ax.legend((scat, scat2, scat3), (name,"Boosting bound", bound_name)) else: - scat2 = ax.scatter(x, np.array(bounds), ) + scat2 = ax.scatter(x, np.array(bounds), marker=".", ) ax.legend((scat, scat2), (name, bound_name)) - plt.tight_layout() + # plt.tight_layout() f.savefig(file_name) plt.close() else: f, ax = plt.subplots(nrows=1, ncols=1) ax.set_title(name+" during train for "+classifier_name) x = np.arange(len(train_accuracies)) - scat = ax.scatter(x, np.array(train_accuracies), ) + scat = ax.scatter(x, np.array(train_accuracies), marker=".", ) ax.legend((scat,), (name,)) plt.tight_layout() f.savefig(file_name) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index ee475b02b7fa344638912235a2a82793b0bb8179..8566f3e3ef737e4b079b3b33414723c211517acb 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -19,7 +19,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): random_state=42, self_complemented=True, twice_the_same=False, c_bound_choice=True, random_start=True, n_stumps_per_attribute=None, use_r=True, c_bound_sol=True, - plotted_metric=Metrics.zero_one_loss, save_train_data=True): + plotted_metric=Metrics.zero_one_loss, save_train_data=True, + test_graph=True): super(ColumnGenerationClassifierQar, self).__init__() r""" @@ -62,6 +63,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.use_r = use_r self.c_bound_sol = c_bound_sol self.save_train_data = save_train_data + self.test_graph = test_graph self.printed_args_name_list = ["n_max_iterations", "self_complemented", "twice_the_same", "c_bound_choice", "random_start", @@ -102,7 +104,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.n_max_iterations, self.voter_perfs[-1]), end="\r") - sol, new_voter_index = self.choose_new_voter(y_kernel_matrix, formatted_y) @@ -145,6 +146,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): logging.warning('Converting sparse matrix to dense matrix.') X = np.array(X.todense()) classification_matrix = self._binary_classification_matrix(X) + # if self.test_graph: + # self.make_test_graph(classification_matrix) margins = np.squeeze( np.asarray(np.matmul(classification_matrix, self.weights_))) signs_array = np.array([int(x) for x in sign(margins)]) @@ -164,6 +167,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.previous_margins.append( np.multiply(y, self.previous_vote)) + self.selected_margins.append(np.sum(np.multiply(y, self.new_voter))) train_metric = self.plotted_metric.score(y, np.sign(self.previous_vote)) if self.use_r: bound = self.bounds[-1] * math.sqrt(1 - voter_perf ** 2) @@ -187,6 +191,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): else: self.q = math.log((1 - voter_perf) / voter_perf) self.weights_.append(self.q) + # self.weights_ = [weight/(1.0*sum(self.weights_)) for weight in self.weights_] def compute_voter_perf(self, formatted_y): """Used to computer the performance (error or edge) of the selected voter""" @@ -219,7 +224,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): """THis initialization corressponds to the first round of boosting with equal weights for each examples and the voter chosen by it's margin.""" self.example_weights = self._initialize_alphas(m).reshape((m, 1)) - self.previous_margins.append(np.multiply(y, y)) + # self.previous_margins.append(np.multiply(y, y)) self.example_weights_.append(self.example_weights) if self.random_start: first_voter_index = self.random_state.choice( @@ -255,6 +260,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.previous_margins.append( np.multiply(y, self.previous_vote)) + self.selected_margins.append(np.sum(np.multiply(y, self.previous_vote))) train_metric = self.plotted_metric.score(y, np.sign(self.previous_vote)) if self.use_r: @@ -307,6 +313,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.previous_votes = [] self.previous_margins = [] self.respected_bound = True + self.selected_margins = [] def _compute_epsilon(self, y): """Updating the error variable, the old fashioned way uses the whole majority vote to update the error""" @@ -318,6 +325,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def _compute_r(self, y): ones_matrix = np.ones(y.shape) + ones_matrix[np.multiply(y, self.new_voter.reshape( y.shape)) < 0] = -1 # can np.divide if needed r = np.average(ones_matrix, weights=self.example_weights, axis=0) @@ -340,14 +348,15 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): pseudo_h_values = ma.array(np.sum(weighted_kernel_matrix, axis=0), fill_value=-np.inf) pseudo_h_values[self.chosen_columns_] = ma.masked - acceptable_indices = np.where( - np.logical_and(np.greater(upper_bound, pseudo_h_values), - np.greater(pseudo_h_values, lower_bound)))[0] - if acceptable_indices.size > 0: - worst_h_index = self.random_state.choice(acceptable_indices) - return worst_h_index, [0] - else: - return " no margin over random and acceptable", "" + return np.argmax(pseudo_h_values), [0] + # acceptable_indices = np.where( + # np.logical_and(np.greater(upper_bound, pseudo_h_values), + # np.greater(pseudo_h_values, lower_bound)))[0] + # if acceptable_indices.size > 0: + # worst_h_index = self.random_state.choice(acceptable_indices) + # return worst_h_index, [0] + # else: + # return " no margin over random and acceptable", "" def _is_not_too_wrong(self, hypothese, y): """Check if the weighted margin is better than random""" @@ -367,7 +376,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def _find_new_voter(self, y_kernel_matrix, y): """Here, we solve the two_voters_mincq_problem for each potential new voter, and select the one that has the smallest minimum""" - c_borns = [] + c_bounds = [] possible_sols = [] indices = [] causes = [] @@ -379,21 +388,21 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): and self._is_not_too_wrong(hypothese, y): w = self._solve_one_weight_min_c(hypothese, y) if w[0] != "break": - c_borns.append(self._cbound(w[0])) + c_bounds.append(self._cbound(w[0])) possible_sols.append(w) indices.append(hypothese_index) else: causes.append(w[1]) if not causes: causes = ["no feature was better than random and acceptable"] - if c_borns: - min_c_bound_index = ma.argmin(c_borns) - self.c_bounds.append(c_borns[min_c_bound_index]) + if c_bounds: + min_c_bound_index = ma.argmin(c_bounds) + self.c_bounds.append(c_bounds[min_c_bound_index]) selected_sol = possible_sols[min_c_bound_index] self.margins.append(self.margin(selected_sol)) self.disagreements.append(self.disagreement(selected_sol)) selected_voter_index = indices[min_c_bound_index] - return selected_sol/(1+selected_sol), selected_voter_index + return selected_sol, selected_voter_index #selected_sol/(1+selected_sol) else: return "break", " and ".join(set(causes)) @@ -441,10 +450,18 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): np.array(float(C0) / C1).reshape((1, 1))) if is_acceptable: return np.array([sol]) - try: - sols = np.roots(np.array([C2, C1, C0])) - except: - return ["break", "nan"] + if C1*C1 < 4*C2*C0: + return ['break', "no roots"] + if C2 > 0 and C1*C1 == 4*C2*C0: + return ['break', "maximum"] + + if C1*C1-4*C2*C0<=0: + try: + sols = np.roots(np.array([C2, C1, C0])) + except: + return ["break", "nan"] + else: + sols = np.array([(-C1-math.sqrt(C1*C1-4*C2*C0))/(2*C2), (-C1+math.sqrt(C1*C1-4*C2*C0))/(2*C2)]) is_acceptable, sol = self._analyze_solutions_one_weight(sols) if is_acceptable: @@ -481,7 +498,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.B2 * sol ** 2 + self.B1 * sol + self.B0) * self.n_total_examples) def disagreement(self, sol): - return self.B2 * sol ** 2 + self.B1 * sol + self.B0 + return (self.B2 * sol ** 2 + self.B1 * sol + self.B0)/self.n_total_examples def margin(self, sol): return (self.A2 * sol ** 2 + self.A1 * sol + self.A0)/self.n_total_examples @@ -495,51 +512,70 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): """Initialize the examples wieghts""" return 1.0 / n_examples * np.ones((n_examples,)) + # def make_test_graph(self, classification_matrix): + # signs_arrays = np.array([self.predict_classification_matrix(classification_matrix, index+1) for index in range(self.weights_.shape[0])]).reshape((classification_matrix.shape)) + # np.savetxt("/home/baptiste/signs_arrays.csv", + # signs_arrays.astype(np.int16), delimiter=",") + # + # def predict_classification_matrix(self, classification_matrix, index): + # margins = np.squeeze( + # np.asarray(np.matmul(classification_matrix[:, :index], self.weights_[:index]))) + # signs_array = np.array([int(x) for x in sign(margins)]) + # signs_array[signs_array == -1] = 0 + # return signs_array + def getInterpretQar(self, directory): + self.directory = directory """Used to interpret the functionning of the algorithm""" path = "/".join(directory.split("/")[:-1]) - try: - import os - os.makedirs(path + "/gif_images") - except: - raise - filenames = [] - max_weight = max([np.max(examples_weights) for examples_weights in - self.example_weights_]) - min_weight = min([np.max(examples_weights) for examples_weights in - self.example_weights_]) - for iterIndex, examples_weights in enumerate(self.example_weights_): - r = np.array(examples_weights) - theta = np.arange(self.n_total_examples) - colors = np.sign(self.previous_margins[iterIndex]) - fig = plt.figure(figsize=(5, 5), dpi=80) - ax = fig.add_subplot(111) - c = ax.scatter(theta, r, c=colors, cmap='RdYlGn', alpha=0.75) - ax.set_ylim(min_weight, max_weight) - filename = path + "/gif_images/" + str(iterIndex) + ".png" - filenames.append(filename) - plt.savefig(filename) - plt.close() - - import imageio - images = [] - logging.getLogger("PIL").setLevel(logging.WARNING) - for filename in filenames: - images.append(imageio.imread(filename)) - imageio.mimsave(path + '/weights.gif', images, duration=1. / 2) - import shutil - shutil.rmtree(path + "/gif_images") - get_accuracy_graph(self.voter_perfs, self.__class__.__name__, - directory + 'voter_perfs.png', "Errors") + # try: + # import os + # os.makedirs(path + "/gif_images") + # except: + # raise + # filenames = [] + # max_weight = max([np.max(examples_weights) for examples_weights in + # self.example_weights_]) + # min_weight = min([np.max(examples_weights) for examples_weights in + # self.example_weights_]) + # for iterIndex, examples_weights in enumerate(self.example_weights_): + # r = np.array(examples_weights) + # theta = np.arange(self.n_total_examples) + # colors = np.sign(self.previous_margins[iterIndex]) + # fig = plt.figure(figsize=(5, 5), dpi=80) + # ax = fig.add_subplot(111) + # c = ax.scatter(theta, r, c=colors, cmap='RdYlGn', alpha=0.75) + # ax.set_ylim(min_weight, max_weight) + # filename = path + "/gif_images/" + str(iterIndex) + ".png" + # filenames.append(filename) + # plt.savefig(filename) + # plt.close() + # + # import imageio + # images = [] + # logging.getLogger("PIL").setLevel(logging.WARNING) + # for filename in filenames: + # images.append(imageio.imread(filename)) + # imageio.mimsave(path + '/weights.gif', images, duration=1. / 2) + # import shutil + # shutil.rmtree(path + "/gif_images") + get_accuracy_graph(self.voter_perfs[:20], self.__class__.__name__, + directory + 'voter_perfs.png', "Rs") + get_accuracy_graph(self.weights_, self.__class__.__name__, + directory+'vote_weights.png', "weights") get_accuracy_graph(self.c_bounds, self.__class__.__name__, directory + 'c_bounds.png', "C-Bounds") get_accuracy_graph(self.margins, self.__class__.__name__, - directory + 'margins.png', "Margins") + directory + 'margins.png', "Squared Margins") + print(self.selected_margins) + print(len(self.selected_margins)) + get_accuracy_graph(self.selected_margins, self.__class__.__name__, + directory + 'selected_margins.png', "Selected Margins") self.disagreements[0] = 0 get_accuracy_graph(self.disagreements, self.__class__.__name__, directory + 'disagreements.png', "disagreements") - get_accuracy_graph(self.train_metrics[1:], self.__class__.__name__, - directory + 'c_bounds_train_metrics.png', self.plotted_metric, self.c_bounds, "C-Bound", self.bounds[1:]) + get_accuracy_graph(self.train_metrics[:-1], self.__class__.__name__, + directory + 'c_bounds_train_metrics.png', self.plotted_metric, self.c_bounds, "C-Bound", self.bounds[:-1]) interpretString = getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause) if self.save_train_data: diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index fd0784a91ad017bad6844b79b560212b1d614149..95f89ec7f8b9cb2787e6f2f83833f9b7a13545d7 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -106,7 +106,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.debug("Done:\t Getting Results") logging.debug("Start:\t Saving preds") - saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis) + saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis, y_test) logging.info("Done:\t Saving Results") viewIndex = args["viewIndex"] @@ -172,7 +172,7 @@ def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, return clKWARGS, testFoldsPreds -def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis): +def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis, y_test): logging.info(stringAnalysis) outputTextFile = open(outputFileName + 'summary.txt', 'w') outputTextFile.write(stringAnalysis) @@ -180,6 +180,8 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",") np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",") np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",") + np.savetxt(outputFileName + "test_labels.csv", y_test.astype(np.int16), + delimiter=",") if imagesAnalysis is not None: for imageName in imagesAnalysis: diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py index 6bf35c91ce62e5a77589e1484715f5b125a03f07..2ea8b44664323f7656ab1840774f31a4a5b77a65 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC.py @@ -6,15 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): - super(QarBoostNC, self).__init__(n_max_iterations=500, + super(QarBoostNC, self).__init__(n_max_iterations=300, random_state=random_state, self_complemented=True, - twice_the_same=False, + twice_the_same=True, c_bound_choice=True, random_start=False, n_stumps_per_attribute=1, use_r=True, - c_bound_sol=False + c_bound_sol=True ) self.param_names = [] self.distribs = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py index e27a8a4d045f9c11a19350e00860e3f8d021fb75..9b07ade50a40f220a210fbfa1bc047002dc97c30 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC2.py @@ -6,7 +6,7 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class QarBoostNC2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): - super(QarBoostNC2, self).__init__(n_max_iterations=500, + super(QarBoostNC2, self).__init__(n_max_iterations=300, random_state=random_state, self_complemented=True, twice_the_same=True, diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py index 1c44cc3b6414e97269b31d71b125a5d0c82d8d6b..7133f80620bbae532461b5f7b053d3a8b440157a 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py @@ -6,7 +6,7 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): - super(QarBoostNC3, self).__init__(n_max_iterations=500, + super(QarBoostNC3, self).__init__(n_max_iterations=300, random_state=random_state, self_complemented=True, twice_the_same=False, @@ -14,7 +14,8 @@ class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): random_start=False, n_stumps_per_attribute=1, use_r=True, - c_bound_sol=True) + c_bound_sol=True + ) self.param_names = [] self.distribs = [] self.classed_params = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py index e06e069841c186eaab2cee6ff090cdc42f6b2fa3..934e07ad329bf0f6e11546caab967e71163eb798 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py @@ -6,15 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): - super(QarBoostv2, self).__init__(n_max_iterations=500, + super(QarBoostv2, self).__init__(n_max_iterations=300, random_state=random_state, self_complemented=True, - twice_the_same=True, + twice_the_same=False, c_bound_choice=True, random_start=False, n_stumps_per_attribute=1, use_r=True, - c_bound_sol=True + c_bound_sol=False ) self.param_names = [] self.distribs = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/Dataset.py b/multiview_platform/MonoMultiViewClassifiers/utils/Dataset.py index 686cdc199aa3db43193d109ef51d3eae7d4a280a..f11b060f303bf357ce5e18a467466fd509796f84 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/Dataset.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/Dataset.py @@ -40,6 +40,7 @@ def getShape(DATASET, viewIndex): return DATASET.get("View" + str(viewIndex)).attrs["shape"] + def getValue(DATASET): """Used to get the value of a view in the HDF5 dataset even if it sparse""" if not DATASET.attrs["sparse"]: