Skip to content
Snippets Groups Projects
Commit a87e1266 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Corrected B1 and modified bound computing for clarity

parent 1c2c00c8
No related branches found
No related tags found
No related merge requests found
...@@ -87,14 +87,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -87,14 +87,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.epsilons = [] self.epsilons = []
self.example_weights_ = [self.example_weights] self.example_weights_ = [self.example_weights]
self.train_metrics = [] self.train_metrics = []
self.gammas = []
self.bounds = [] self.bounds = []
self.previous_votes = [] self.previous_votes = []
self.previous_margins = [np.multiply(y,y)] self.previous_margins = [np.multiply(y,y)]
self.n_total_hypotheses_ = n self.n_total_hypotheses_ = n
self.n_total_examples = m self.n_total_examples = m
self.n_max_iterations = n self.n_max_iterations = 100
self.break_cause = " the maximum number of iterations was attained." self.break_cause = " the maximum number of iterations was attained."
for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)): for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)):
...@@ -105,37 +104,15 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -105,37 +104,15 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, y)) first_voter_index = self.random_state.choice(self.get_possible(y_kernel_matrix, y))
else: else:
first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix) first_voter_index, _ = self._find_best_weighted_margin(y_kernel_matrix)
self.chosen_columns_.append(first_voter_index) self.chosen_columns_.append(first_voter_index)
self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1)) self.new_voter = self.classification_matrix[:, first_voter_index].reshape((m,1))
self.previous_vote = self.new_voter self.previous_vote = self.new_voter
self.weighted_sum = self.new_voter self.weighted_sum = self.new_voter
# We update the weights of the examples according to the error of the fisrt voter
epsilon = self._compute_epsilon(y)
if epsilon == 0. or math.log((1 - epsilon) / epsilon) == math.inf:
self.break_cause = " epsilon was too small"
self.weights_ = [1.0]
self.train_accuracies = [1.0]
break
self.epsilons.append(epsilon)
if self.divided_ponderation:
self.q = (1 / (self.n_max_iterations - k)) * math.log((1 - epsilon) / epsilon)
else: else:
# self.q = math.log((1 - epsilon) / epsilon) # Print dynamically the step and the error of the current classifier
self.q = math.log((1 + epsilon) / (1-epsilon))
self.weights_.append(self.q)
# Update the boosting variables
self._update_example_weights(y)
self.example_weights_.append(self.example_weights)
self.previous_margins.append(np.multiply(y, self.previous_vote))
self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote)))
self.gammas.append(accuracy_score(y, np.sign(self.previous_vote))-0.5)
self.bounds.append(math.exp(-2*self.gammas[-1]**2))
continue
# Print dynamicly the step and the error of the current classifier
print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r") print("{}/{}, eps :{}".format(k, self.n_max_iterations, self.epsilons[-1]), end="\r")
# Find best weak hypothesis given example_weights. Select the one that has the lowest minimum # Find best weak hypothesis given example_weights. Select the one that has the lowest minimum
...@@ -147,7 +124,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -147,7 +124,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
# If the new voter selector could not find one, break the loop # If the new voter selector could not find one, break the loop
if type(sol) == str: if type(sol) == str:
self.break_cause = new_voter_index # " no more hypothesis were able to improve the boosted vote." self.break_cause = new_voter_index #
break break
# Append the weak hypothesis. # Append the weak hypothesis.
...@@ -165,20 +142,25 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -165,20 +142,25 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.q = (1/(self.n_max_iterations-k))*math.log((1 - epsilon) / epsilon) self.q = (1/(self.n_max_iterations-k))*math.log((1 - epsilon) / epsilon)
else: else:
self.q = math.log((1 - epsilon) / epsilon) self.q = math.log((1 - epsilon) / epsilon)
# self.q = math.log((1 + epsilon) / (1 - epsilon))
self.weights_.append(self.q) self.weights_.append(self.q)
# Update the distribution on the examples. # Update the distribution on the examples.
self._update_example_weights(y) self._update_example_weights(y)
self.example_weights_.append(self.example_weights) self.example_weights_.append(self.example_weights)
if k != 0:
# Update the "previous vote" to prepare for the next iteration # Update the "previous vote" to prepare for the next iteration
self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_], self.previous_vote = np.matmul(self.classification_matrix[:, self.chosen_columns_],
np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1)) np.array(self.weights_).reshape((k + 1, 1))).reshape((m, 1))
self.previous_votes.append(self.previous_vote) self.previous_votes.append(self.previous_vote)
self.previous_margins.append(np.multiply(y, self.previous_vote)) self.previous_margins.append(np.multiply(y, self.previous_vote))
self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote))) self.train_metrics.append(self.plotted_metric.score(y, np.sign(self.previous_vote)))
self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons))))) # self.bounds.append(np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.epsilons)))))
r = self._compute_r(y)
if k!=0:
self.bounds.append(self.bounds[-1]*math.sqrt(1-r**2))
else:
self.bounds.append(math.sqrt(1 - r ** 2))
self.nb_opposed_voters = self.check_opposed_voters() self.nb_opposed_voters = self.check_opposed_voters()
self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_] self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
...@@ -214,6 +196,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -214,6 +196,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0) epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0)
return epsilon return epsilon
def _compute_r(self, y):
ones_matrix = np.ones(y.shape)
ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = -1 # can np.divide if needed
r = np.average(ones_matrix, weights=self.example_weights, axis=0)
return r
def _update_example_weights(self, y): def _update_example_weights(self, y):
"""Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter.""" """Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter."""
if self.old_fashioned: if self.old_fashioned:
...@@ -314,7 +302,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -314,7 +302,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1))) weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1)))
self.B2 = np.sum(weighted_next_column ** 2) self.B2 = np.sum(weighted_next_column ** 2)
self.B1 = np.sum(2 * weighted_next_column * weighted_next_column) self.B1 = np.sum(2 * weighted_next_column * weighted_previous_sum)
self.B0 = np.sum(weighted_previous_sum ** 2) self.B0 = np.sum(weighted_previous_sum ** 2)
M2 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag)) M2 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag))
......
...@@ -9,15 +9,15 @@ class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier): ...@@ -9,15 +9,15 @@ class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs): def __init__(self, random_state=None, **kwargs):
super(QarBoostNC, self).__init__( super(QarBoostNC, self).__init__(
random_state=random_state, random_state=random_state,
self_complemented=False, self_complemented=True,
twice_the_same=False, twice_the_same=False,
old_fashioned=False, old_fashioned=False,
previous_vote_weighted=False, previous_vote_weighted=False,
c_bound_choice=True, c_bound_choice=False,
random_start=True, random_start=True,
two_wieghts_problem=False, two_wieghts_problem=True,
divided_ponderation=False, divided_ponderation=False,
n_stumps_per_attribute=1 n_stumps_per_attribute=100
) )
self.param_names = [] self.param_names = []
self.distribs = [] self.distribs = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment