Skip to content
Snippets Groups Projects
Commit 23159936 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Crue simplification

parent d26c9f30
Branches
Tags
No related merge requests found
......@@ -66,23 +66,16 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
# Print dynamically the step and the error of the current classifier
print("{}/{}, eps :{}".format(k+2, self.n_max_iterations, self.epsilons[-1]), end="\r")
# Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
# C-bound with the previous vote or the one with the best weighted margin
sol, new_voter_index = self.choose_new_voter(y_kernel_matrix, formatted_y)
# If the new voter selector could not find one, break the loop
if type(sol) == str:
self.break_cause = new_voter_index #
break
# Append the weak hypothesis.
self.append_new_voter(new_voter_index)
# Generate the new weight for the new voter
epsilon, r = self.compute_voter_perf(formatted_y)
if epsilon == 0. or math.log((1 - epsilon) / epsilon) == math.inf:
self.chosen_columns_.pop()
self.break_cause = " epsilon was too small."
......@@ -90,20 +83,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.compute_voter_weight(r, epsilon)
# Update the distribution on the examples.
self.update_example_weights(formatted_y)
# Update the "previous vote" to prepare for the next iteration
self.update_info_containers(formatted_y, r, k)
self.nb_opposed_voters = self.check_opposed_voters()
self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
self.weights_ = np.array(self.weights_)
self.weights_ = np.array(self.weights_)
self.weights_/= np.sum(self.weights_)
formatted_y[formatted_y == -1] = 0
formatted_y = formatted_y.reshape((m,))
end = time.time()
self.train_time = end - start
return self
......@@ -123,6 +116,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
return signs_array
def update_info_containers(self, y, r, k):
"""Is used at each iteration to compute and store all the needed quantities for later analysis"""
self.example_weights_.append(self.example_weights)
self.previous_vote = np.matmul(
self.classification_matrix[:, self.chosen_columns_],
......@@ -137,6 +131,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
# self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))
def compute_voter_weight(self, r, epsilon):
"""used to compute the voter's weight according to the specified method (edge or error) """
if self.use_r:
self.q = 0.5 * math.log((1 + r) / (1 - r))
else:
......@@ -144,6 +139,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.weights_.append(self.q)
def compute_voter_perf(self, formatted_y):
"""Used to computer the performance (error or edge) of the selected voter"""
epsilon = self._compute_epsilon(formatted_y)
self.epsilons.append(epsilon)
......@@ -151,11 +147,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
return epsilon, r
def append_new_voter(self, new_voter_index):
"""Used to append the voter to the majority vote"""
self.chosen_columns_.append(new_voter_index)
self.new_voter = self.classification_matrix[:, new_voter_index].reshape(
(self.n_total_examples, 1))
def choose_new_voter(self, y_kernel_matrix, formatted_y):
"""Used to chhoose the voter according to the specified criterion (margin or C-Bound"""
if self.c_bound_choice:
sol, new_voter_index = self._find_new_voter(y_kernel_matrix,
formatted_y)
......@@ -166,6 +164,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def init_boosting(self, m, y, y_kernel_matrix):
"""THis initialization corressponds to the first round of boosting with equal weights for each examples and the voter chosen by it's margin."""
self.example_weights = self._initialize_alphas(m).reshape((m, 1))
self.previous_margins.append(np.multiply(y, y))
......@@ -206,6 +205,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.bounds.append(math.sqrt(1 - r ** 2))
def format_X_y(self, X, y):
"""Formats the data : X -the examples- and y -the labels- to be used properly by the algorithm """
if scipy.sparse.issparse(X):
logging.info('Converting to dense matrix.')
X = np.array(X.todense())
......@@ -215,6 +215,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
return X, y
def init_hypotheses(self, X, y):
"""Inintialization for the hyptotheses used to build the boosted vote"""
if self.estimators_generator is None:
self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps,
self_complemented=self.self_complemented)
......@@ -226,6 +227,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
return m,n,y_kernel_matrix
def init_info_containers(self):
"""Initialize the containers that will be collected at each iteration for the analysis"""
self.weights_ = []
self.chosen_columns_ = []
self.fobidden_columns = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment