diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 01c57b052095ad2fe3fca17d0e430f97313f9f62..e76c1234e7415d3ce2ae934e62d71ad4e57598bc 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -666,6 +666,7 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name): f.savefig(file_name) plt.close() + class BaseBoost(object): def __init__(self): @@ -702,6 +703,8 @@ def getInterpretBase(classifier, directory, classifier_name, weights, interpretString += np.array2string(weights[weights_sort], precision=4, separator=',', suppress_small=True) interpretString += "\n \t It generated {} columns by attributes and used {} iterations to converge, and selected {} couple(s) of opposed voters".format(classifier.n_stumps, len(weights_sort), classifier.nb_opposed_voters) + if max(weights) > 0.50: + interpretString += "\n \t The vote is useless in this context : voter n°{} is a dictator of weight > 0.50".format(classifier.chosen_columns_[np.argmax(np.array(weights))]) if len(weights_sort) == classifier.n_max_iterations or len(weights) == classifier.n_total_hypotheses_: if len(weights) == classifier.n_max_iterations: interpretString += ", and used all available iterations, " diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py index 16f1ff150fe42057fbe34b3999f844418fbafbb7..9f9f5ba7ad5881246c92a023976ffa5d9ff0f0e2 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py @@ -192,7 +192,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): # def __init__(self, mu=0.001, epsilon=1e-08, n_max_iterations=None, estimators_generator=None, save_iteration_as_hyperparameter_each=None): # super(CqBoostClassifier, self).__init__(epsilon, n_max_iterations, estimators_generator, dual_constraint_rhs=0, # save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each) -# # TODO: Vérifier la valeur de nu (dual_constraint_rhs) à l'initialisation, mais de toute manière ignorée car +# # TODO: Verifier la valeur de nu (dual_constraint_rhs) a l'initialisation, mais de toute maniere ignoree car # # on ne peut pas quitter la boucle principale avec seulement un votant. # self.mu = mu # self.train_time = 0 diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index 8747040f453713588bffbfd7b57d33fa0e488fc2..9038ca61fc065768cdc57b898e6072cc0e0ab899 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -8,26 +8,35 @@ from sklearn.utils.validation import check_is_fitted from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.metrics import accuracy_score import time +import matplotlib.pyplot as plt from .BoostUtils import StumpsClassifiersGenerator, sign, BaseBoost class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def __init__(self, n_max_iterations=None, estimators_generator=None, dual_constraint_rhs=0, - save_iteration_as_hyperparameter_each=None, random_state=42, - self_complemented=True, twice_the_same=False, old_fashioned=False, previous_vote_weighted=True): + random_state=42, self_complemented=True, twice_the_same=False, old_fashioned=False, + previous_vote_weighted=True, c_bound_choice = True, random_start = True, + two_wieghts_problem=False): super(ColumnGenerationClassifierQar, self).__init__() self.n_max_iterations = n_max_iterations self.estimators_generator = estimators_generator self.dual_constraint_rhs = dual_constraint_rhs - self.save_iteration_as_hyperparameter_each = save_iteration_as_hyperparameter_each - self.random_state = random_state + if type(random_state) is int: + self.random_state = np.random.RandomState(random_state) + else: + self.random_state = random_state self.self_complemented =self_complemented self.twice_the_same = twice_the_same self.train_time = 0 self.old_fashioned = old_fashioned self.previous_vote_weighted = previous_vote_weighted - self.mu = 0.0649091 + self.c_bound_choice = True + self.random_start = True + self.two_wieghts_problem = False + self.c_bound_choice = c_bound_choice + self.random_start = random_start + self.two_wieghts_problem = two_wieghts_problem def fit(self, X, y): start = time.time() @@ -38,13 +47,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): if self.estimators_generator is None: self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps, self_complemented=self.self_complemented) - + # Initialization y[y == 0] = -1 self.estimators_generator.fit(X, y) self.classification_matrix = self._binary_classification_matrix(X) - self.weights_ = [] self.infos_per_iteration_ = defaultdict(list) @@ -52,9 +60,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): y = y.reshape((m,1)) y_kernel_matrix = np.multiply(y, self.classification_matrix) - - # Initialization - self.collected_weight_vectors_ = {} self.collected_dual_constraint_violations_ = {} @@ -63,25 +68,31 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.chosen_columns_ = [] self.fobidden_columns = [] self.edge_scores = [] + self.c_bounds = [] self.epsilons = [] self.example_weights_ = [self.example_weights] self.train_accuracies = [] self.previous_votes = [] + self.previous_margins = [np.multiply(y,y)] self.n_total_hypotheses_ = n self.n_total_examples = m self.break_cause = " the maximum number of iterations was attained." for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)): - # To choose the first voter, we select the one that has the best margin. + # To choose the first voter, we select the one that has the best margin or a random one.. if k == 0: - first_voter_index = self._find_best_margin(y_kernel_matrix) + if self.random_start: + first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, y)) + else: + first_voter_index, plif = self._find_best_weighted_margin(y_kernel_matrix) self.chosen_columns_.append(first_voter_index) self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1)) self.previous_vote = self.new_voter self.weighted_sum = self.new_voter + # We update the weights of the examples according to the error of the fisrt voter epsilon = self._compute_epsilon(y) if epsilon == 0. or math.log((1 - epsilon) / epsilon) == math.inf: self.break_cause = " epsilon was too small" @@ -92,16 +103,22 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.q = math.log((1-epsilon)/epsilon) self.weights_.append(self.q) + # Update the boosting variables self._update_example_weights(y) self.example_weights_.append(self.example_weights) + self.previous_margins.append(np.multiply(y, self.previous_vote)) self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote))) continue # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum - # C-bound with the previous vote - # new_voter_index,sol = self._find_best_weighted_margin(y_kernel_matrix) - sol, new_voter_index = self._find_new_voter(y_kernel_matrix, y) + # C-bound with the previous vote or the one with the best weighted margin + + if self.c_bound_choice: + sol, new_voter_index = self._find_new_voter(y_kernel_matrix, y) + else: + new_voter_index,sol = self._find_best_weighted_margin(y_kernel_matrix) + # If the new voter selector could not find one, break the loop if type(sol) == str: self.break_cause = new_voter_index # " no more hypothesis were able to improve the boosted vote." break @@ -109,8 +126,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): # Append the weak hypothesis. self.chosen_columns_.append(new_voter_index) self.new_voter = self.classification_matrix[:, new_voter_index].reshape((m, 1)) - # self.weighted_sum = np.matmul(np.concatenate((self.previous_vote, self.classification_matrix[:, new_voter_index].reshape((m,1))), axis=1), - # sol).reshape((m,1)) # Generate the new weight for the new voter epsilon = self._compute_epsilon(y) @@ -130,18 +145,18 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_], np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1)) self.previous_votes.append(self.previous_vote) + self.previous_margins.append(np.multiply(y, self.previous_vote)) self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote))) self.nb_opposed_voters = self.check_opposed_voters() self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_] self.weights_ = np.array(self.weights_) - self.weights_/=np.sum(self.weights_) + self.weights_/= np.sum(self.weights_) y[y == -1] = 0 y = y.reshape((m,)) end = time.time() self.train_time = end - start - print([epsi for epsi in self.epsilons])# if epsi >0.50]) return self def predict(self, X): @@ -159,23 +174,24 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): return signs_array def _compute_epsilon(self,y): - """Updating the \epsilon varaible""" + """Updating the error variable, the old fashioned way uses the whole majority vote to update the error""" if self.old_fashioned: return self._compute_epsilon_old() ones_matrix = np.zeros(y.shape) - ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = 1 + ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = 1 # can np.divide if needed epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0) return epsilon def _update_example_weights(self, y): + """Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter.""" if self.old_fashioned: - self._update_example_weights(y) + self._update_example_weights_old(y) else: - new_weights = self.example_weights*np.exp(-self.q*y*self.new_voter) + new_weights = self.example_weights.reshape((self.n_total_examples, 1))*np.exp(-self.q*y*self.new_voter) self.example_weights = new_weights/np.sum(new_weights) def _compute_epsilon_old(self,): - """Updating the \epsilon varaible computed on the combination of the old vote and the new voter""" + """Updating the error variable computed on the combination of the old vote and the new voter""" ones_matrix = np.zeros(self.weighted_sum.shape) ones_matrix[self.weighted_sum < 0] = 1 epsilon = (1.0/self.n_total_examples)*np.sum(self.example_weights*ones_matrix, axis=0) @@ -194,22 +210,35 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): return worst_h_index def _find_best_weighted_margin(self, y_kernel_matrix): - """Just a try""" - if len(self.chosen_columns_) < 30: - weighted_kernel_matrix = np.multiply(y_kernel_matrix, self.example_weights.reshape((self.n_total_examples, 1))) - pseudo_h_values = ma.array(np.sum(weighted_kernel_matrix, axis=0), fill_value=-np.inf) - pseudo_h_values[self.chosen_columns_] = ma.masked - worst_h_index = ma.argmax(pseudo_h_values) + """Finds the new voter by choosing the one that has the best weighted margin between 0.5 and 0.55 + to avoid too god voters that will get all the votes weights""" + upper_bound = 0.55 + weighted_kernel_matrix = np.multiply(y_kernel_matrix, self.example_weights.reshape((self.n_total_examples, 1))) + pseudo_h_values = ma.array(np.sum(weighted_kernel_matrix, axis=0), fill_value=-np.inf) + pseudo_h_values[self.chosen_columns_] = ma.masked + acceptable_indices = np.where(np.logical_and(np.greater(upper_bound, pseudo_h_values), np.greater(pseudo_h_values, 0.5)))[0] + print(acceptable_indices, np.sum(self.example_weights)) + if acceptable_indices.size > 0: + worst_h_index = self.random_state.choice(acceptable_indices) return worst_h_index, [0] else: - return "plif", "plouf" + return " no margin over random and acceptable", "" def _is_not_too_wrong(self, hypothese, y): + """Check if the weighted margin is better than random""" ones_matrix = np.zeros(y.shape) ones_matrix[hypothese.reshape(y.shape) < 0] = 1 epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0) return epsilon < 0.5 + def get_possible(self, y_kernel_matrix, y): + """Get all the indices of the hypothesis that are good enough to be chosen""" + possibleIndices = [] + for hypIndex, hypothese in enumerate(np.transpose(y_kernel_matrix)): + if self._is_not_too_wrong(hypothese, y): + possibleIndices.append(hypIndex) + return np.array(possibleIndices) + def _find_new_voter(self, y_kernel_matrix, y): """Here, we solve the two_voters_mincq_problem for each potential new voter, and select the one that has the smallest minimum""" @@ -218,8 +247,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): indices = [] causes = [] for hypothese_index, hypothese in enumerate(y_kernel_matrix.transpose()): - if (hypothese_index not in self.chosen_columns_ or self.twice_the_same) and set(self.chosen_columns_)!={hypothese_index} and self._is_not_too_wrong(hypothese, y): - w = self._solve_two_weights_min_c(hypothese, y) + if (hypothese_index not in self.chosen_columns_ or self.twice_the_same)\ + and set(self.chosen_columns_)!={hypothese_index} \ + and self._is_not_too_wrong(hypothese, y): + if self.two_wieghts_problem: + w = self._solve_two_weights_min_c(hypothese, y) + else: + w = self._solve_one_weight_min_c(hypothese, y) if w[0] != "break": c_borns.append(self._cbound(w[0])) possible_sols.append(w) @@ -230,12 +264,78 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): causes = ["no feature was better than random and acceptable"] if c_borns: min_c_born_index = ma.argmin(c_borns) + print(c_borns[min_c_born_index]) + self.c_bounds.append(c_borns[min_c_born_index]) selected_sol = possible_sols[min_c_born_index] selected_voter_index = indices[min_c_born_index] return selected_sol, selected_voter_index else: return "break", " and ".join(set(causes)) + def _solve_one_weight_min_c(self, next_column, y): + """Here we solve the min C-bound problem for two voters using one weight only and return the best weight + No precalc because longer ; see the "derivee" latex document for more precision""" + m = next_column.shape[0] + zero_diag = np.ones((m, m)) - np.identity(m) + if self.previous_vote_weighted: + weighted_previous_sum = np.multiply(np.multiply(y, self.previous_vote.reshape((m, 1))), self.example_weights.reshape((m,1))) + else: + weighted_previous_sum = np.multiply(y, self.previous_vote.reshape((m, 1))) + weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1))) + + self.B2 = np.sum(weighted_next_column ** 2) + self.B1 = np.sum(2 * weighted_next_column * weighted_next_column) + self.B0 = np.sum(weighted_previous_sum ** 2) + + M2 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag)) + M1 = np.sum(np.multiply(np.matmul(weighted_previous_sum, np.transpose(weighted_next_column)) + + np.matmul(weighted_next_column, np.transpose(weighted_previous_sum)) + , zero_diag)) + M0 = np.sum(np.multiply(np.matmul(weighted_previous_sum, np.transpose(weighted_previous_sum)), zero_diag)) + + self.A2 = self.B2 + M2 + self.A1 = self.B1 + M1 + self.A0 = self.B0 + M0 + + C2 = (M1 * self.B2 - M2 * self.B1) + C1 = 2 * (M0 * self.B2 - M2 * self.B0) + C0 = M0 * self.B1 - M1 * self.B0 + if C2 == 0: + if C1 == 0: + return ['break', "the derivate was constant"] + else : + is_acceptable, sol = self._analyze_solutions_one_weight(np.array(float(C0)/C1)) + if is_acceptable: + return np.array([sol]) + try: + sols = np.roots(np.array([C2, C1, C0])) + except: + return ["break", "nan"] + + is_acceptable, sol = self._analyze_solutions_one_weight(sols) + if is_acceptable: + return np.array([sol]) + else: + return ["break", sol] + + def _analyze_solutions_one_weight(self, sols): + """"We just check that the solution found by np.roots is acceptable under our constraints + (real, a minimum and over 0)""" + if sols.shape[0] == 1: + if self._cbound(sols[0]) < self._cbound(sols[0] + 1): + best_sol = sols[0] + else: + return False, "the only solution was a maximum." + elif sols.shape[0] == 2: + best_sol = self._best_sol(sols) + else: + return False, "no solution were found" + + if isinstance(best_sol, complex): + return False, "the sol was complex" + else: + return True, best_sol + def _solve_two_weights_min_c(self, next_column, y): """Here we solve the min C-bound problem for two voters and return the best 2-weights array No precalc because longer""" @@ -248,11 +348,11 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1))) self.B2 = np.sum((weighted_previous_sum - weighted_next_column) ** 2) - self.B1 = np.sum(2 * weighted_next_column * (weighted_previous_sum - 2 * weighted_next_column * weighted_next_column)) + self.B1 = np.sum(2 * weighted_next_column * (weighted_previous_sum - weighted_next_column)) self.B0 = np.sum(weighted_next_column * weighted_next_column) M2 = np.sum(np.multiply(np.matmul((weighted_previous_sum - weighted_next_column), np.transpose(weighted_previous_sum - weighted_next_column)), zero_diag)) - M1 = np.sum(np.multiply(np.matmul(weighted_previous_sum, np.transpose(weighted_next_column)) - np.matmul(weighted_next_column, np.transpose(weighted_previous_sum)) - 2*np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag)) + M1 = np.sum(np.multiply(np.matmul(weighted_previous_sum, np.transpose(weighted_next_column)) + np.matmul(weighted_next_column, np.transpose(weighted_previous_sum)) - 2*np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag)) M0 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag)) self.A2 = self.B2 + M2 @@ -276,7 +376,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): sols = np.roots(np.array([C2, C1, C0])) except: return ["break", "nan"] - is_acceptable, sol = self._analyze_solutions(sols) if is_acceptable: return np.array([sol, 1-sol]) @@ -309,30 +408,49 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def _cbound(self, sol): """Computing the objective function""" - return 1 - (self.A2*sol**2 + self.A1*sol + self.A0)/(self.B2*sol**2 + self.B1*sol + self.B0) + return 1 - (self.A2*sol**2 + self.A1*sol + self.A0)/(self.B2*sol**2 + self.B1*sol + self.B0)/self.n_total_examples def _best_sol(self, sols): + """Return the best min in the two possible sols""" values = np.array([self._cbound(sol) for sol in sols]) return sols[np.argmin(values)] def _initialize_alphas(self, n_examples): + """Initialize the examples wieghts""" return 1.0 / n_examples * np.ones((n_examples,)) + def getInterpretQar(self, directory): + """Used to interpret the functionning of the algorithm""" + path = "/".join(directory.split("/")[:-1]) + try: + import os + os.makedirs(path+"/gif_images") + except: + raise + filenames=[] + max_weight = max([np.max(examples_weights) for examples_weights in self.example_weights_]) + for iterIndex, examples_weights in enumerate(self.example_weights_): + r = np.array(examples_weights) + theta = np.arange(self.n_total_examples) + colors = np.sign(self.previous_margins[iterIndex]) + fig = plt.figure(figsize=(5, 5), dpi=80) + ax = fig.add_subplot(111) + c = ax.scatter(theta, r, c=colors, cmap='RdYlGn', alpha=0.75) + ax.set_ylim(0.0, max_weight) + filename = path+"/gif_images/"+str(iterIndex)+".png" + filenames.append(filename) + plt.savefig(filename) + plt.close() + + import imageio + images = [] + for filename in filenames: + images.append(imageio.imread(filename)) + imageio.mimsave(path+'/weights.gif', images, duration=1. / 2) + import shutil + shutil.rmtree(path+"/gif_images") + -# def to_mat(vect, n_cols): -# if vect.shape[1] == 1: -# return np.array([vect for _ in range(n_cols)]) -# else: -# col_vect = np.reshape(vect, (vect.shape[0], 1)) -# return np.array([col_vect for _ in range(n_cols)]) -# class QarBoostClassifier(ColumnGenerationClassifierQar): -# def __init__(self, n_max_iterations=None, estimators_generator=None, save_iteration_as_hyperparameter_each=None, random_state=42, self_complemented=True): -# super(QarBoostClassifier, self).__init__(n_max_iterations, estimators_generator, dual_constraint_rhs=0, -# save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each, random_state=random_state, self_complemente=self_complemented) -# -# -# def _initialize_alphas(self, n_examples): -# return 1.0 / n_examples * np.ones((n_examples,)) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index 77be240546d97401c5a84197915da3fab122b15e..59260507b66a5e3d99ad152db30415bd6b8967e6 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -20,13 +20,13 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo estimator = getattr(classifierModule, CL_type)(randomState) params_dict = estimator.genDistribs() if params_dict: - nb_possible_combinations = compute_possible_combinations(params_dict) metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} scorer = metricModule.get_scorer(**metricKWARGS) + nb_possible_combinations = compute_possible_combinations(params_dict) if nIter > nb_possible_combinations: nIter = nb_possible_combinations randomSearch = RandomizedSearchCV(estimator, n_iter=nIter, param_distributions=params_dict, refit=True, diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/__init__.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/__init__.py index 8bbd0830cf0726bd3df3fd0fa4f120876627e912..e94c149514edbf920daebd101e425a0e22c03d02 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/__init__.py @@ -1 +1 @@ -from . import ExecClassifMonoView, MonoviewUtils, analyzeResult +# from . import ExecClassifMonoView, MonoviewUtils, analyzeResult diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 91baeefc31af070593b046156e204d6c38a22d5f..dfca9f84cefb22e8cb24983afc87e45a3f1ebc8e 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -16,11 +16,12 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): random_state=random_state, n_estimators=n_estimators, base_estimator=base_estimator, + algorithm="SAMME" ) self.param_names = ["n_estimators", "base_estimator"] self.classed_params = ["base_estimator"] self.distribs = [CustomRandint(low=1, high=500), [None]] - self.weird_strings = {"base_estimator":"class_name"} + self.weird_strings = {"base_estimator": "class_name"} def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -29,8 +30,8 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): def getInterpret(self, directory): interpretString = "" interpretString += self.getFeatureImportance(directory) - interpretString += "\n\n" - interpretString += str(self.estimator_errors_) + interpretString += "\n\n Estimator error | Estimator weight\n" + interpretString += "\n".join([str(error) +" | "+ str(weight/sum(self.estimator_weights_)) for error, weight in zip(self.estimator_errors_, self.estimator_weights_)]) return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv21.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv21.py index 516c7bb0c0fdb873c456a5d51fefd906ec1e53de..9274d9dc11ecaff6c5d107c20f305176650626f7 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv21.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv21.py @@ -79,7 +79,7 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote))) continue - # ---- On résoud le problème à deux votants analytiquement. + # ---- On resoud le probleme a deux votants analytiquement. w = self._solve_two_weights_min_c(new_voter_margin, example_weights) if w[0] == "break": self.chosen_columns_.pop() diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/KNN.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/KNN.py index 3037abceb3a0f238df7d410444f490d9cd5fd8b2..e7de1a29a46c6e416b5f591fb9aefc9977108461 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/KNN.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/KNN.py @@ -19,7 +19,7 @@ class KNN(KNeighborsClassifier, BaseMonoviewClassifier): ) self.param_names = ["n_neighbors", "weights", "algorithm", "p"] self.classed_params = [] - self.distribs = [CustomRandint(low=1, high=20), ["uniform", "distance"], + self.distribs = [CustomRandint(low=1, high=10), ["uniform", "distance"], ["auto", "ball_tree", "kd_tree", "brute"], [1, 2]] self.weird_strings = {} self.random_state=random_state diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoost.py index 8ca5b6f97b56fdf97ea5f5bad121dda5b577489b..f973f18a62652665d35da83ed66743238d6e7879 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoost.py @@ -7,10 +7,12 @@ class QarBoost(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): super(QarBoost, self).__init__( - random_state=random_state, - ) - self.param_names = [] - self.distribs = [] + random_state=random_state) + + self.param_names = ["self_complemented", "twice_the_same", "old_fashioned", "previous_vote_weighted", + "c_bound_choice", "random_start", "two_wieghts_problem"] + self.distribs = [[True, False], [True, False], [True, False], [True, False], + [True, False], [True, False], [True, False]] self.classed_params = [] self.weird_strings = {} @@ -19,6 +21,7 @@ class QarBoost(ColumnGenerationClassifierQar, BaseMonoviewClassifier): return True def getInterpret(self, directory): + self.getInterpretQar(directory) return getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py index a7ff114e85632089b04fda958a41c2eecb9eb922..b79cc9483d0dff5b215dcc3224d73fd8e103e33c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py @@ -9,7 +9,7 @@ class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): super(QarBoostNC3, self).__init__( random_state=random_state, self_complemented=False, - twice_the_same=True, + twice_the_same=False, previous_vote_weighted=False ) self.param_names = [] @@ -22,6 +22,7 @@ class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): return True def getInterpret(self, directory): + self.getInterpretQar(directory) return getInterpretBase(self, directory, "QarBoostNC3", self.weights_, self.break_cause) def get_name_for_fusion(self): diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py index 5b8b234e5e1a51975d7e17ebb68049244d491112..ccbc52bce4eeadcf6c2c963516cbee460eb68dcd 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py @@ -9,7 +9,8 @@ class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): super(QarBoostv2, self).__init__( random_state=random_state, self_complemented=True, - twice_the_same=True + twice_the_same=True, + previous_vote_weighted=True ) self.param_names = [] self.distribs = []