Skip to content
Snippets Groups Projects
Commit 2b978a80 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added data extraction

parent aed9da3b
Branches
Tags
No related merge requests found
...@@ -669,7 +669,7 @@ class ConvexProgram(object): ...@@ -669,7 +669,7 @@ class ConvexProgram(object):
return signs return signs
def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None): def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None):
if type(name) is not str: if type(name) is not str:
name = " ".join(name.getConfig().strip().split(" ")[:2]) name = " ".join(name.getConfig().strip().split(" ")[:2])
if bounds: if bounds:
...@@ -677,8 +677,14 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur ...@@ -677,8 +677,14 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur
ax.set_title(name+" during train for "+classifier_name) ax.set_title(name+" during train for "+classifier_name)
x = np.arange(len(train_accuracies)) x = np.arange(len(train_accuracies))
scat = ax.scatter(x, np.array(train_accuracies), ) scat = ax.scatter(x, np.array(train_accuracies), )
if boosting_bound:
scat2 = ax.scatter(x, boosting_bound)
scat3 = ax.scatter(x, np.array(bounds), )
ax.legend((scat, scat2, scat3), (name,"Boosting bound", bound_name))
else:
scat2 = ax.scatter(x, np.array(bounds), ) scat2 = ax.scatter(x, np.array(bounds), )
ax.legend((scat,scat2), (name,"Bounds")) ax.legend((scat, scat2),
(name, bound_name))
plt.tight_layout() plt.tight_layout()
f.savefig(file_name) f.savefig(file_name)
plt.close() plt.close()
...@@ -751,5 +757,5 @@ def getInterpretBase(classifier, directory, classifier_name, weights, ...@@ -751,5 +757,5 @@ def getInterpretBase(classifier, directory, classifier_name, weights,
separator=',', suppress_small=True) separator=',', suppress_small=True)
np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',') np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',')
np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',') np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',')
get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds) get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds, "Boosting bound")
return interpretString return interpretString
...@@ -18,8 +18,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -18,8 +18,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def __init__(self, n_max_iterations=None, estimators_generator=None, def __init__(self, n_max_iterations=None, estimators_generator=None,
random_state=42, self_complemented=True, twice_the_same=False, random_state=42, self_complemented=True, twice_the_same=False,
c_bound_choice=True, random_start=True, c_bound_choice=True, random_start=True,
n_stumps_per_attribute=None, use_r=True, n_stumps_per_attribute=None, use_r=True, c_bound_sol=True,
plotted_metric=Metrics.zero_one_loss): plotted_metric=Metrics.zero_one_loss, save_train_data=True):
super(ColumnGenerationClassifierQar, self).__init__() super(ColumnGenerationClassifierQar, self).__init__()
r""" r"""
...@@ -60,10 +60,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -60,10 +60,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if n_stumps_per_attribute: if n_stumps_per_attribute:
self.n_stumps = n_stumps_per_attribute self.n_stumps = n_stumps_per_attribute
self.use_r = use_r self.use_r = use_r
self.c_bound_sol = c_bound_sol
self.save_train_data = save_train_data
self.printed_args_name_list = ["n_max_iterations", "self_complemented", self.printed_args_name_list = ["n_max_iterations", "self_complemented",
"twice_the_same", "twice_the_same",
"c_bound_choice", "random_start", "c_bound_choice", "random_start",
"n_stumps", "use_r"] "n_stumps", "use_r", "c_bound_sol"]
def set_params(self, **params): def set_params(self, **params):
self.self_complemented = params["self_complemented"] self.self_complemented = params["self_complemented"]
...@@ -94,7 +96,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -94,7 +96,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
# Print dynamically the step and the error of the current classifier # Print dynamically the step and the error of the current classifier
print( print(
"Resp. bound : {}, {}/{}, eps :{}".format(self.respected_bound, "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound,
self.bounds[-1] > self.train_metrics[-1],
k + 2, k + 2,
self.n_max_iterations, self.n_max_iterations,
self.voter_perfs[-1]), self.voter_perfs[-1]),
...@@ -111,7 +114,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -111,7 +114,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
voter_perf = self.compute_voter_perf(formatted_y) voter_perf = self.compute_voter_perf(formatted_y)
self.compute_voter_weight(voter_perf) self.compute_voter_weight(voter_perf, sol)
self.update_example_weights(formatted_y) self.update_example_weights(formatted_y)
...@@ -121,6 +124,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -121,6 +124,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.estimators_generator.estimators_ = \ self.estimators_generator.estimators_ = \
self.estimators_generator.estimators_[self.chosen_columns_] self.estimators_generator.estimators_[self.chosen_columns_]
if self.save_train_data:
self.X_train = self.classification_matrix[:, self.chosen_columns_]
self.y_train = formatted_y
self.weights_ = np.array(self.weights_) self.weights_ = np.array(self.weights_)
self.weights_ /= np.sum(self.weights_) self.weights_ /= np.sum(self.weights_)
...@@ -170,8 +177,11 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -170,8 +177,11 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.train_metrics.append(train_metric) self.train_metrics.append(train_metric)
self.bounds.append(bound) self.bounds.append(bound)
def compute_voter_weight(self, voter_perf): def compute_voter_weight(self, voter_perf, sol):
"""used to compute the voter's weight according to the specified method (edge or error) """ """used to compute the voter's weight according to the specified method (edge or error) """
if self.c_bound_sol:
self.q = sol
else:
if self.use_r: if self.use_r:
self.q = 0.5 * math.log((1 + voter_perf) / (1 - voter_perf)) self.q = 0.5 * math.log((1 + voter_perf) / (1 - voter_perf))
else: else:
...@@ -230,7 +240,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -230,7 +240,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
else: else:
epsilon = self._compute_epsilon(y) epsilon = self._compute_epsilon(y)
self.voter_perfs.append(epsilon) self.voter_perfs.append(epsilon)
if self.c_bound_sol:
self.q = 1
else:
if self.use_r: if self.use_r:
self.q = 0.5 * math.log((1 + r) / (1 - r)) self.q = 0.5 * math.log((1 + r) / (1 - r))
else: else:
...@@ -290,6 +302,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -290,6 +302,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.example_weights_ = [] self.example_weights_ = []
self.train_metrics = [] self.train_metrics = []
self.bounds = [] self.bounds = []
self.disagreements = []
self.margins = []
self.previous_votes = [] self.previous_votes = []
self.previous_margins = [] self.previous_margins = []
self.respected_bound = True self.respected_bound = True
...@@ -373,11 +387,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -373,11 +387,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if not causes: if not causes:
causes = ["no feature was better than random and acceptable"] causes = ["no feature was better than random and acceptable"]
if c_borns: if c_borns:
min_c_born_index = ma.argmin(c_borns) min_c_bound_index = ma.argmin(c_borns)
self.c_bounds.append(c_borns[min_c_born_index]) self.c_bounds.append(c_borns[min_c_bound_index])
selected_sol = possible_sols[min_c_born_index] selected_sol = possible_sols[min_c_bound_index]
selected_voter_index = indices[min_c_born_index] self.margins.append(self.margin(selected_sol))
return selected_sol, selected_voter_index self.disagreements.append(self.disagreement(selected_sol))
selected_voter_index = indices[min_c_bound_index]
return selected_sol/(1+selected_sol), selected_voter_index
else: else:
return "break", " and ".join(set(causes)) return "break", " and ".join(set(causes))
...@@ -388,6 +404,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -388,6 +404,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
zero_diag = np.ones((m, m)) - np.identity(m) zero_diag = np.ones((m, m)) - np.identity(m)
weighted_previous_sum = np.multiply(y, weighted_previous_sum = np.multiply(y,
self.previous_vote.reshape((m, 1))) self.previous_vote.reshape((m, 1)))
if self.c_bound_sol:
weighted_next_column = next_column.reshape((m, 1))
else:
weighted_next_column = np.multiply(next_column.reshape((m, 1)), weighted_next_column = np.multiply(next_column.reshape((m, 1)),
self.example_weights.reshape((m, 1))) self.example_weights.reshape((m, 1)))
...@@ -437,12 +456,17 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -437,12 +456,17 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
""""We just check that the solution found by np.roots is acceptable under our constraints """"We just check that the solution found by np.roots is acceptable under our constraints
(real, a minimum and over 0)""" (real, a minimum and over 0)"""
if sols.shape[0] == 1: if sols.shape[0] == 1:
if self._cbound(sols[0]) < self._cbound(sols[0] + 1): if self._cbound(sols[0]) < self._cbound(sols[0] + 1) and sols[0] > 0:
best_sol = sols[0] best_sol = sols[0]
else: else:
if sols[0] > 0:
return False, "the only solution was a maximum." return False, "the only solution was a maximum."
elif sols.shape[0] == 2: else:
return False, "the only solution was negative"
elif sols.shape[0] == 2 and sols[0] > 0 and sols[1] > 1:
best_sol = self._best_sol(sols) best_sol = self._best_sol(sols)
elif np.greater(sols, np.zeros(2)).any():
return self._analyze_solutions_one_weight(np.array([np.max(sols)]))
else: else:
return False, "no solution were found" return False, "no solution were found"
...@@ -453,8 +477,14 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -453,8 +477,14 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def _cbound(self, sol): def _cbound(self, sol):
"""Computing the objective function""" """Computing the objective function"""
return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / ( return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / ((
self.B2 * sol ** 2 + self.B1 * sol + self.B0) / self.n_total_examples self.B2 * sol ** 2 + self.B1 * sol + self.B0) * self.n_total_examples)
def disagreement(self, sol):
return self.B2 * sol ** 2 + self.B1 * sol + self.B0
def margin(self, sol):
return (self.A2 * sol ** 2 + self.A1 * sol + self.A0)/self.n_total_examples
def _best_sol(self, sols): def _best_sol(self, sols):
"""Return the best min in the two possible sols""" """Return the best min in the two possible sols"""
...@@ -501,9 +531,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): ...@@ -501,9 +531,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
shutil.rmtree(path + "/gif_images") shutil.rmtree(path + "/gif_images")
get_accuracy_graph(self.voter_perfs, self.__class__.__name__, get_accuracy_graph(self.voter_perfs, self.__class__.__name__,
directory + 'voter_perfs.png', "Errors") directory + 'voter_perfs.png', "Errors")
get_accuracy_graph(self.c_bounds, self.__class__.__name__,
directory + 'c_bounds.png', "C-Bounds")
get_accuracy_graph(self.margins, self.__class__.__name__,
directory + 'margins.png', "Margins")
self.disagreements[0] = 0
get_accuracy_graph(self.disagreements, self.__class__.__name__,
directory + 'disagreements.png', "disagreements")
get_accuracy_graph(self.train_metrics[1:], self.__class__.__name__,
directory + 'c_bounds_train_metrics.png', self.plotted_metric, self.c_bounds, "C-Bound", self.bounds[1:])
interpretString = getInterpretBase(self, directory, "QarBoost", interpretString = getInterpretBase(self, directory, "QarBoost",
self.weights_, self.break_cause) self.weights_, self.break_cause)
if self.save_train_data:
np.savetxt(directory+"x_train.csv", self.X_train, delimiter=',')
np.savetxt(directory+"y_train.csv", self.y_train, delimiter=',')
args_dict = dict( args_dict = dict(
(arg_name, str(self.__dict__[arg_name])) for arg_name in (arg_name, str(self.__dict__[arg_name])) for arg_name in
self.printed_args_name_list) self.printed_args_name_list)
......
...@@ -6,14 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar ...@@ -6,14 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier): class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs): def __init__(self, random_state=None, **kwargs):
super(QarBoostNC, self).__init__(n_max_iterations=50, super(QarBoostNC, self).__init__(n_max_iterations=500,
random_state=random_state, random_state=random_state,
self_complemented=True, self_complemented=True,
twice_the_same=False, twice_the_same=False,
c_bound_choice=True, c_bound_choice=True,
random_start=False, random_start=False,
n_stumps_per_attribute=1, n_stumps_per_attribute=1,
use_r=True use_r=True,
c_bound_sol=False
) )
self.param_names = [] self.param_names = []
self.distribs = [] self.distribs = []
......
...@@ -6,18 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar ...@@ -6,18 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
class QarBoostNC2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): class QarBoostNC2(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs): def __init__(self, random_state=None, **kwargs):
super(QarBoostNC2, self).__init__( super(QarBoostNC2, self).__init__(n_max_iterations=500,
random_state=random_state, random_state=random_state,
self_complemented=True, self_complemented=True,
twice_the_same=False, twice_the_same=True,
old_fashioned=False,
previous_vote_weighted=False,
c_bound_choice=True, c_bound_choice=True,
random_start=True, random_start=False,
two_wieghts_problem=False, n_stumps_per_attribute=1,
divided_ponderation=False, use_r=True,
n_stumps_per_attribute=10, c_bound_sol=False
use_r=True
) )
self.param_names = [] self.param_names = []
self.distribs = [] self.distribs = []
......
...@@ -6,19 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar ...@@ -6,19 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs): def __init__(self, random_state=None, **kwargs):
super(QarBoostNC3, self).__init__( super(QarBoostNC3, self).__init__(n_max_iterations=500,
random_state=random_state, random_state=random_state,
self_complemented=False, self_complemented=True,
twice_the_same=False, twice_the_same=False,
old_fashioned=False,
previous_vote_weighted=False,
c_bound_choice=True, c_bound_choice=True,
random_start=True, random_start=False,
two_wieghts_problem=False,
divided_ponderation=True,
n_stumps_per_attribute=1, n_stumps_per_attribute=1,
use_r=True use_r=True,
) c_bound_sol=True)
self.param_names = [] self.param_names = []
self.distribs = [] self.distribs = []
self.classed_params = [] self.classed_params = []
......
...@@ -6,18 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar ...@@ -6,18 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs): def __init__(self, random_state=None, **kwargs):
super(QarBoostv2, self).__init__( super(QarBoostv2, self).__init__(n_max_iterations=500,
random_state=random_state, random_state=random_state,
self_complemented=False, self_complemented=True,
twice_the_same=False, twice_the_same=True,
old_fashioned=False,
previous_vote_weighted=False,
c_bound_choice=True, c_bound_choice=True,
random_start=False, random_start=False,
two_wieghts_problem=False,
divided_ponderation=False,
n_stumps_per_attribute=1, n_stumps_per_attribute=1,
use_r=True use_r=True,
c_bound_sol=True
) )
self.param_names = [] self.param_names = []
self.distribs = [] self.distribs = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment