Skip to content
Snippets Groups Projects
Commit a87e1266 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Corrected B1 and modified bound computing for clarity

parent 1c2c00c8
No related branches found
No related tags found
No related merge requests found
......@@ -87,14 +87,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.epsilons = []
self.example_weights_ = [self.example_weights]
self.train_metrics = []
self.gammas = []
self.bounds = []
self.previous_votes = []
self.previous_margins = [np.multiply(y,y)]
self.n_total_hypotheses_ = n
self.n_total_examples = m
self.n_max_iterations = n
self.n_max_iterations = 100
self.break_cause = " the maximum number of iterations was attained."
for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)):
......@@ -105,37 +104,15 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, y))
else:
first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix)
self.chosen_columns_.append(first_voter_index)
self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1))
self.previous_vote = self.new_voter
self.weighted_sum = self.new_voter
# We update the weights of the examples according to the error of the fisrt voter
epsilon = self._compute_epsilon(y)
if epsilon == 0. or math.log((1 - epsilon) / epsilon) == math.inf:
self.break_cause = " epsilon was too small"
self.weights_ = [1.0]
self.train_accuracies = [1.0]
break
self.epsilons.append(epsilon)
if self.divided_ponderation:
self.q = (1 / (self.n_max_iterations - k)) * math.log((1 - epsilon) / epsilon)
else:
# self.q = math.log((1 - epsilon) / epsilon)
self.q = math.log((1 + epsilon) / (1-epsilon))
self.weights_.append(self.q)
# Update the boosting variables
self._update_example_weights(y)
self.example_weights_.append(self.example_weights)
self.previous_margins.append(np.multiply(y, self.previous_vote))
self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote)))
self.gammas.append(accuracy_score(y, np.sign(self.previous_vote))-0.5)
self.bounds.append(math.exp(-2*self.gammas[-1]**2))
continue
# Print dynamicly the step and the error of the current classifier
# Print dynamically the step and the error of the current classifier
print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r")
# Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
......@@ -147,7 +124,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
# If the new voter selector could not find one, break the loop
if type(sol) == str:
self.break_cause = new_voter_index # " no more hypothesis were able to improve the boosted vote."
self.break_cause = new_voter_index #
break
# Append the weak hypothesis.
......@@ -165,20 +142,25 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.q = (1/(self.n_max_iterations-k))*math.log((1 - epsilon) / epsilon)
else:
self.q = math.log((1 - epsilon) / epsilon)
# self.q = math.log((1 + epsilon) / (1 - epsilon))
self.weights_.append(self.q)
# Update the distribution on the examples.
self._update_example_weights(y)
self.example_weights_.append(self.example_weights)
if k != 0:
# Update the "previous vote" to prepare for the next iteration
self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_],
np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1))
self.previous_votes.append(self.previous_vote)
self.previous_margins.append(np.multiply(y, self.previous_vote))
self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote)))
self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))
# self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))
r = self._compute_r(y)
if k!=0:
self.bounds.append(self.bounds[-1]*math.sqrt(1-r**2))
else:
self.bounds.append(math.sqrt(1 - r ** 2))
self.nb_opposed_voters = self.check_opposed_voters()
self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
......@@ -214,6 +196,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0)
return epsilon
def _compute_r(self, y):
ones_matrix = np.ones(y.shape)
ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = -1 # can np.divide if needed
r = np.average(ones_matrix, weights=self.example_weights, axis=0)
return r
def _update_example_weights(self, y):
"""Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter."""
if self.old_fashioned:
......@@ -314,7 +302,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1)))
self.B2 = np.sum(weighted_next_column ** 2)
self.B1 = np.sum(2 * weighted_next_column * weighted_next_column)
self.B1 = np.sum(2 * weighted_next_column * weighted_previous_sum)
self.B0 = np.sum(weighted_previous_sum ** 2)
M2 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag))
......
......@@ -9,15 +9,15 @@ class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs):
super(QarBoostNC, self).__init__(
random_state=random_state,
self_complemented=False,
self_complemented=True,
twice_the_same=False,
old_fashioned=False,
previous_vote_weighted=False,
c_bound_choice=True,
c_bound_choice=False,
random_start=True,
two_wieghts_problem=False,
two_wieghts_problem=True,
divided_ponderation=False,
n_stumps_per_attribute=1
n_stumps_per_attribute=100
)
self.param_names = []
self.distribs = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment