Skip to content
Snippets Groups Projects
Commit aed9da3b authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Reformatted code

parent 77fc6fe0
No related branches found
No related tags found
No related merge requests found
......@@ -9,7 +9,8 @@ from sklearn.base import BaseEstimator, ClassifierMixin
import time
import matplotlib.pyplot as plt
from .BoostUtils import StumpsClassifiersGenerator, sign, BaseBoost, getInterpretBase, get_accuracy_graph
from .BoostUtils import StumpsClassifiersGenerator, sign, BaseBoost, \
getInterpretBase, get_accuracy_graph
from ... import Metrics
......@@ -59,7 +60,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if n_stumps_per_attribute:
self.n_stumps = n_stumps_per_attribute
self.use_r = use_r
self.printed_args_name_list = ["n_max_iterations", "self_complemented", "twice_the_same",
self.printed_args_name_list = ["n_max_iterations", "self_complemented",
"twice_the_same",
"c_bound_choice", "random_start",
"n_stumps", "use_r"]
......@@ -87,12 +89,19 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.init_boosting(m, formatted_y, y_kernel_matrix)
self.break_cause = " the maximum number of iterations was attained."
for k in range(min(n-1, self.n_max_iterations-1 if self.n_max_iterations is not None else np.inf)):
for k in range(min(n - 1,
self.n_max_iterations - 1 if self.n_max_iterations is not None else np.inf)):
# Print dynamically the step and the error of the current classifier
print("Resp. bound : {}, {}/{}, eps :{}".format(self.respected_bound, k+2, self.n_max_iterations, self.voter_perfs[-1]), end="\r")
sol, new_voter_index = self.choose_new_voter(y_kernel_matrix, formatted_y)
print(
"Resp. bound : {}, {}/{}, eps :{}".format(self.respected_bound,
k + 2,
self.n_max_iterations,
self.voter_perfs[-1]),
end="\r")
sol, new_voter_index = self.choose_new_voter(y_kernel_matrix,
formatted_y)
if type(sol) == str:
self.break_cause = new_voter_index #
......@@ -108,9 +117,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.update_info_containers(formatted_y, voter_perf, k)
self.nb_opposed_voters = self.check_opposed_voters()
self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_]
self.estimators_generator.estimators_ = \
self.estimators_generator.estimators_[self.chosen_columns_]
self.weights_ = np.array(self.weights_)
self.weights_ /= np.sum(self.weights_)
......@@ -129,7 +138,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
logging.warning('Converting sparse matrix to dense matrix.')
X = np.array(X.todense())
classification_matrix = self._binary_classification_matrix(X)
margins = np.squeeze(np.asarray(np.matmul(classification_matrix, self.weights_)))
margins = np.squeeze(
np.asarray(np.matmul(classification_matrix, self.weights_)))
signs_array = np.array([int(x) for x in sign(margins)])
signs_array[signs_array == -1] = 0
end = time.time()
......@@ -141,7 +151,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.example_weights_.append(self.example_weights)
self.previous_vote = np.matmul(
self.classification_matrix[:, self.chosen_columns_],
np.array(self.weights_).reshape((k + 2, 1))).reshape((self.n_total_examples, 1))
np.array(self.weights_).reshape((k + 2, 1))).reshape(
(self.n_total_examples, 1))
self.previous_votes.append(self.previous_vote)
self.previous_margins.append(
......@@ -150,7 +161,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if self.use_r:
bound = self.bounds[-1] * math.sqrt(1 - voter_perf ** 2)
else:
bound = np.prod(np.sqrt(1-4*np.square(0.5-np.array(self.voter_perfs))))
bound = np.prod(
np.sqrt(1 - 4 * np.square(0.5 - np.array(self.voter_perfs))))
if train_metric > bound:
self.respected_bound = False
......@@ -193,7 +205,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
y_kernel_matrix)
return sol, new_voter_index
def init_boosting(self, m, y, y_kernel_matrix):
"""THis initialization corressponds to the first round of boosting with equal weights for each examples and the voter chosen by it's margin."""
self.example_weights = self._initialize_alphas(m).reshape((m, 1))
......@@ -220,7 +231,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
epsilon = self._compute_epsilon(y)
self.voter_perfs.append(epsilon)
if self.use_r:
self.q = 0.5 * math.log((1 + r) / (1 - r))
else:
......@@ -260,7 +270,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def init_hypotheses(self, X, y):
"""Inintialization for the hyptotheses used to build the boosted vote"""
if self.estimators_generator is None:
self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps,
self.estimators_generator = StumpsClassifiersGenerator(
n_stumps_per_attribute=self.n_stumps,
self_complemented=self.self_complemented)
self.estimators_generator.fit(X, y)
self.classification_matrix = self._binary_classification_matrix(X)
......@@ -286,28 +297,38 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def _compute_epsilon(self, y):
"""Updating the error variable, the old fashioned way uses the whole majority vote to update the error"""
ones_matrix = np.zeros(y.shape)
ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = 1 # can np.divide if needed
ones_matrix[np.multiply(y, self.new_voter.reshape(
y.shape)) < 0] = 1 # can np.divide if needed
epsilon = np.average(ones_matrix, weights=self.example_weights, axis=0)
return epsilon
def _compute_r(self, y):
ones_matrix = np.ones(y.shape)
ones_matrix[np.multiply(y, self.new_voter.reshape(y.shape)) < 0] = -1 # can np.divide if needed
ones_matrix[np.multiply(y, self.new_voter.reshape(
y.shape)) < 0] = -1 # can np.divide if needed
r = np.average(ones_matrix, weights=self.example_weights, axis=0)
return r
def update_example_weights(self, y):
"""Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter."""
new_weights = self.example_weights.reshape((self.n_total_examples, 1))*np.exp(-self.q*np.multiply(y,self.new_voter))
new_weights = self.example_weights.reshape(
(self.n_total_examples, 1)) * np.exp(
-self.q * np.multiply(y, self.new_voter))
self.example_weights = new_weights / np.sum(new_weights)
def _find_best_weighted_margin(self, y_kernel_matrix, upper_bound=1.0, lower_bound=0.0):
def _find_best_weighted_margin(self, y_kernel_matrix, upper_bound=1.0,
lower_bound=0.0):
"""Finds the new voter by choosing the one that has the best weighted margin between 0.5 and 0.55
to avoid too god voters that will get all the votes weights"""
weighted_kernel_matrix = np.multiply(y_kernel_matrix, self.example_weights.reshape((self.n_total_examples, 1)))
pseudo_h_values = ma.array(np.sum(weighted_kernel_matrix, axis=0), fill_value=-np.inf)
weighted_kernel_matrix = np.multiply(y_kernel_matrix,
self.example_weights.reshape(
(self.n_total_examples, 1)))
pseudo_h_values = ma.array(np.sum(weighted_kernel_matrix, axis=0),
fill_value=-np.inf)
pseudo_h_values[self.chosen_columns_] = ma.masked
acceptable_indices = np.where(np.logical_and(np.greater(upper_bound, pseudo_h_values), np.greater(pseudo_h_values, lower_bound)))[0]
acceptable_indices = np.where(
np.logical_and(np.greater(upper_bound, pseudo_h_values),
np.greater(pseudo_h_values, lower_bound)))[0]
if acceptable_indices.size > 0:
worst_h_index = self.random_state.choice(acceptable_indices)
return worst_h_index, [0]
......@@ -336,8 +357,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
possible_sols = []
indices = []
causes = []
for hypothese_index, hypothese in enumerate(y_kernel_matrix.transpose()):
if (hypothese_index not in self.chosen_columns_ or self.twice_the_same) \
for hypothese_index, hypothese in enumerate(
y_kernel_matrix.transpose()):
if (
hypothese_index not in self.chosen_columns_ or self.twice_the_same) \
and set(self.chosen_columns_) != {hypothese_index} \
and self._is_not_too_wrong(hypothese, y):
w = self._solve_one_weight_min_c(hypothese, y)
......@@ -363,18 +386,26 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
No precalc because longer ; see the "derivee" latex document for more precision"""
m = next_column.shape[0]
zero_diag = np.ones((m, m)) - np.identity(m)
weighted_previous_sum = np.multiply(y, self.previous_vote.reshape((m, 1)))
weighted_next_column = np.multiply(next_column.reshape((m,1)), self.example_weights.reshape((m,1)))
weighted_previous_sum = np.multiply(y,
self.previous_vote.reshape((m, 1)))
weighted_next_column = np.multiply(next_column.reshape((m, 1)),
self.example_weights.reshape((m, 1)))
self.B2 = np.sum(weighted_next_column ** 2)
self.B1 = np.sum(2 * weighted_next_column * weighted_previous_sum)
self.B0 = np.sum(weighted_previous_sum ** 2)
M2 = np.sum(np.multiply(np.matmul(weighted_next_column, np.transpose(weighted_next_column)), zero_diag))
M1 = np.sum(np.multiply(np.matmul(weighted_previous_sum, np.transpose(weighted_next_column)) +
np.matmul(weighted_next_column, np.transpose(weighted_previous_sum))
M2 = np.sum(np.multiply(
np.matmul(weighted_next_column, np.transpose(weighted_next_column)),
zero_diag))
M1 = np.sum(np.multiply(np.matmul(weighted_previous_sum,
np.transpose(weighted_next_column)) +
np.matmul(weighted_next_column,
np.transpose(weighted_previous_sum))
, zero_diag))
M0 = np.sum(np.multiply(np.matmul(weighted_previous_sum, np.transpose(weighted_previous_sum)), zero_diag))
M0 = np.sum(np.multiply(np.matmul(weighted_previous_sum,
np.transpose(weighted_previous_sum)),
zero_diag))
self.A2 = self.B2 + M2
self.A1 = self.B1 + M1
......@@ -387,7 +418,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if C1 == 0:
return ['break', "the derivate was constant"]
else:
is_acceptable, sol = self._analyze_solutions_one_weight(np.array(float(C0)/C1).reshape((1,1)))
is_acceptable, sol = self._analyze_solutions_one_weight(
np.array(float(C0) / C1).reshape((1, 1)))
if is_acceptable:
return np.array([sol])
try:
......@@ -421,7 +453,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def _cbound(self, sol):
"""Computing the objective function"""
return 1 - (self.A2*sol**2 + self.A1*sol + self.A0)/(self.B2*sol**2 + self.B1*sol + self.B0)/self.n_total_examples
return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / (
self.B2 * sol ** 2 + self.B1 * sol + self.B0) / self.n_total_examples
def _best_sol(self, sols):
"""Return the best min in the two possible sols"""
......@@ -441,8 +474,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
except:
raise
filenames = []
max_weight = max([np.max(examples_weights) for examples_weights in self.example_weights_])
min_weight = min([np.max(examples_weights) for examples_weights in self.example_weights_])
max_weight = max([np.max(examples_weights) for examples_weights in
self.example_weights_])
min_weight = min([np.max(examples_weights) for examples_weights in
self.example_weights_])
for iterIndex, examples_weights in enumerate(self.example_weights_):
r = np.array(examples_weights)
theta = np.arange(self.n_total_examples)
......@@ -464,18 +499,19 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
imageio.mimsave(path + '/weights.gif', images, duration=1. / 2)
import shutil
shutil.rmtree(path + "/gif_images")
get_accuracy_graph(self.voter_perfs, self.__class__.__name__, directory + 'voter_perfs.png', "Errors")
interpretString = getInterpretBase(self, directory, "QarBoost", self.weights_, self.break_cause)
args_dict = dict((arg_name, str(self.__dict__[arg_name])) for arg_name in self.printed_args_name_list)
interpretString += "\n \n With arguments : \n"+u'\u2022 '+ ("\n"+u'\u2022 ').join(['%s: \t%s' % (key, value)
for (key, value) in args_dict.items()])
get_accuracy_graph(self.voter_perfs, self.__class__.__name__,
directory + 'voter_perfs.png', "Errors")
interpretString = getInterpretBase(self, directory, "QarBoost",
self.weights_, self.break_cause)
args_dict = dict(
(arg_name, str(self.__dict__[arg_name])) for arg_name in
self.printed_args_name_list)
interpretString += "\n \n With arguments : \n" + u'\u2022 ' + (
"\n" + u'\u2022 ').join(['%s: \t%s' % (key, value)
for (key, value) in
args_dict.items()])
if not self.respected_bound:
interpretString += "\n\n The bound was not respected"
return interpretString
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment