Skip to content
Snippets Groups Projects
Commit 2b978a80 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added data extraction

parent aed9da3b
No related branches found
No related tags found
No related merge requests found
......@@ -669,7 +669,7 @@ class ConvexProgram(object):
return signs
def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None):
def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None):
if type(name) is not str:
name = " ".join(name.getConfig().strip().split(" ")[:2])
if bounds:
......@@ -677,8 +677,14 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur
ax.set_title(name+" during train for "+classifier_name)
x = np.arange(len(train_accuracies))
scat = ax.scatter(x, np.array(train_accuracies), )
if boosting_bound:
scat2 = ax.scatter(x, boosting_bound)
scat3 = ax.scatter(x, np.array(bounds), )
ax.legend((scat, scat2, scat3), (name,"Boosting bound", bound_name))
else:
scat2 = ax.scatter(x, np.array(bounds), )
ax.legend((scat,scat2), (name,"Bounds"))
ax.legend((scat, scat2),
(name, bound_name))
plt.tight_layout()
f.savefig(file_name)
plt.close()
......@@ -751,5 +757,5 @@ def getInterpretBase(classifier, directory, classifier_name, weights,
separator=',', suppress_small=True)
np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',')
np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',')
get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds)
get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds, "Boosting bound")
return interpretString
......@@ -18,8 +18,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def __init__(self, n_max_iterations=None, estimators_generator=None,
random_state=42, self_complemented=True, twice_the_same=False,
c_bound_choice=True, random_start=True,
n_stumps_per_attribute=None, use_r=True,
plotted_metric=Metrics.zero_one_loss):
n_stumps_per_attribute=None, use_r=True, c_bound_sol=True,
plotted_metric=Metrics.zero_one_loss, save_train_data=True):
super(ColumnGenerationClassifierQar, self).__init__()
r"""
......@@ -60,10 +60,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if n_stumps_per_attribute:
self.n_stumps = n_stumps_per_attribute
self.use_r = use_r
self.c_bound_sol = c_bound_sol
self.save_train_data = save_train_data
self.printed_args_name_list = ["n_max_iterations", "self_complemented",
"twice_the_same",
"c_bound_choice", "random_start",
"n_stumps", "use_r"]
"n_stumps", "use_r", "c_bound_sol"]
def set_params(self, **params):
self.self_complemented = params["self_complemented"]
......@@ -94,7 +96,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
# Print dynamically the step and the error of the current classifier
print(
"Resp. bound : {}, {}/{}, eps :{}".format(self.respected_bound,
"Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound,
self.bounds[-1] > self.train_metrics[-1],
k + 2,
self.n_max_iterations,
self.voter_perfs[-1]),
......@@ -111,7 +114,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
voter_perf = self.compute_voter_perf(formatted_y)
self.compute_voter_weight(voter_perf)
self.compute_voter_weight(voter_perf, sol)
self.update_example_weights(formatted_y)
......@@ -121,6 +124,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.estimators_generator.estimators_ = \
self.estimators_generator.estimators_[self.chosen_columns_]
if self.save_train_data:
self.X_train = self.classification_matrix[:, self.chosen_columns_]
self.y_train = formatted_y
self.weights_ = np.array(self.weights_)
self.weights_ /= np.sum(self.weights_)
......@@ -170,8 +177,11 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.train_metrics.append(train_metric)
self.bounds.append(bound)
def compute_voter_weight(self, voter_perf):
def compute_voter_weight(self, voter_perf, sol):
"""used to compute the voter's weight according to the specified method (edge or error) """
if self.c_bound_sol:
self.q = sol
else:
if self.use_r:
self.q = 0.5 * math.log((1 + voter_perf) / (1 - voter_perf))
else:
......@@ -230,7 +240,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
else:
epsilon = self._compute_epsilon(y)
self.voter_perfs.append(epsilon)
if self.c_bound_sol:
self.q = 1
else:
if self.use_r:
self.q = 0.5 * math.log((1 + r) / (1 - r))
else:
......@@ -290,6 +302,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self.example_weights_ = []
self.train_metrics = []
self.bounds = []
self.disagreements = []
self.margins = []
self.previous_votes = []
self.previous_margins = []
self.respected_bound = True
......@@ -373,11 +387,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if not causes:
causes = ["no feature was better than random and acceptable"]
if c_borns:
min_c_born_index = ma.argmin(c_borns)
self.c_bounds.append(c_borns[min_c_born_index])
selected_sol = possible_sols[min_c_born_index]
selected_voter_index = indices[min_c_born_index]
return selected_sol, selected_voter_index
min_c_bound_index = ma.argmin(c_borns)
self.c_bounds.append(c_borns[min_c_bound_index])
selected_sol = possible_sols[min_c_bound_index]
self.margins.append(self.margin(selected_sol))
self.disagreements.append(self.disagreement(selected_sol))
selected_voter_index = indices[min_c_bound_index]
return selected_sol/(1+selected_sol), selected_voter_index
else:
return "break", " and ".join(set(causes))
......@@ -388,6 +404,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
zero_diag = np.ones((m, m)) - np.identity(m)
weighted_previous_sum = np.multiply(y,
self.previous_vote.reshape((m, 1)))
if self.c_bound_sol:
weighted_next_column = next_column.reshape((m, 1))
else:
weighted_next_column = np.multiply(next_column.reshape((m, 1)),
self.example_weights.reshape((m, 1)))
......@@ -437,12 +456,17 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
""""We just check that the solution found by np.roots is acceptable under our constraints
(real, a minimum and over 0)"""
if sols.shape[0] == 1:
if self._cbound(sols[0]) < self._cbound(sols[0] + 1):
if self._cbound(sols[0]) < self._cbound(sols[0] + 1) and sols[0] > 0:
best_sol = sols[0]
else:
if sols[0] > 0:
return False, "the only solution was a maximum."
elif sols.shape[0] == 2:
else:
return False, "the only solution was negative"
elif sols.shape[0] == 2 and sols[0] > 0 and sols[1] > 1:
best_sol = self._best_sol(sols)
elif np.greater(sols, np.zeros(2)).any():
return self._analyze_solutions_one_weight(np.array([np.max(sols)]))
else:
return False, "no solution were found"
......@@ -453,8 +477,14 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def _cbound(self, sol):
"""Computing the objective function"""
return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / (
self.B2 * sol ** 2 + self.B1 * sol + self.B0) / self.n_total_examples
return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / ((
self.B2 * sol ** 2 + self.B1 * sol + self.B0) * self.n_total_examples)
def disagreement(self, sol):
return self.B2 * sol ** 2 + self.B1 * sol + self.B0
def margin(self, sol):
return (self.A2 * sol ** 2 + self.A1 * sol + self.A0)/self.n_total_examples
def _best_sol(self, sols):
"""Return the best min in the two possible sols"""
......@@ -501,9 +531,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
shutil.rmtree(path + "/gif_images")
get_accuracy_graph(self.voter_perfs, self.__class__.__name__,
directory + 'voter_perfs.png', "Errors")
get_accuracy_graph(self.c_bounds, self.__class__.__name__,
directory + 'c_bounds.png', "C-Bounds")
get_accuracy_graph(self.margins, self.__class__.__name__,
directory + 'margins.png', "Margins")
self.disagreements[0] = 0
get_accuracy_graph(self.disagreements, self.__class__.__name__,
directory + 'disagreements.png', "disagreements")
get_accuracy_graph(self.train_metrics[1:], self.__class__.__name__,
directory + 'c_bounds_train_metrics.png', self.plotted_metric, self.c_bounds, "C-Bound", self.bounds[1:])
interpretString = getInterpretBase(self, directory, "QarBoost",
self.weights_, self.break_cause)
if self.save_train_data:
np.savetxt(directory+"x_train.csv", self.X_train, delimiter=',')
np.savetxt(directory+"y_train.csv", self.y_train, delimiter=',')
args_dict = dict(
(arg_name, str(self.__dict__[arg_name])) for arg_name in
self.printed_args_name_list)
......
......@@ -6,14 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
class QarBoostNC(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs):
super(QarBoostNC, self).__init__(n_max_iterations=50,
super(QarBoostNC, self).__init__(n_max_iterations=500,
random_state=random_state,
self_complemented=True,
twice_the_same=False,
c_bound_choice=True,
random_start=False,
n_stumps_per_attribute=1,
use_r=True
use_r=True,
c_bound_sol=False
)
self.param_names = []
self.distribs = []
......
......@@ -6,18 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
class QarBoostNC2(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs):
super(QarBoostNC2, self).__init__(
super(QarBoostNC2, self).__init__(n_max_iterations=500,
random_state=random_state,
self_complemented=True,
twice_the_same=False,
old_fashioned=False,
previous_vote_weighted=False,
twice_the_same=True,
c_bound_choice=True,
random_start=True,
two_wieghts_problem=False,
divided_ponderation=False,
n_stumps_per_attribute=10,
use_r=True
random_start=False,
n_stumps_per_attribute=1,
use_r=True,
c_bound_sol=False
)
self.param_names = []
self.distribs = []
......
......@@ -6,19 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs):
super(QarBoostNC3, self).__init__(
super(QarBoostNC3, self).__init__(n_max_iterations=500,
random_state=random_state,
self_complemented=False,
self_complemented=True,
twice_the_same=False,
old_fashioned=False,
previous_vote_weighted=False,
c_bound_choice=True,
random_start=True,
two_wieghts_problem=False,
divided_ponderation=True,
random_start=False,
n_stumps_per_attribute=1,
use_r=True
)
use_r=True,
c_bound_sol=True)
self.param_names = []
self.distribs = []
self.classed_params = []
......
......@@ -6,18 +6,15 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
def __init__(self, random_state=None, **kwargs):
super(QarBoostv2, self).__init__(
super(QarBoostv2, self).__init__(n_max_iterations=500,
random_state=random_state,
self_complemented=False,
twice_the_same=False,
old_fashioned=False,
previous_vote_weighted=False,
self_complemented=True,
twice_the_same=True,
c_bound_choice=True,
random_start=False,
two_wieghts_problem=False,
divided_ponderation=False,
n_stumps_per_attribute=1,
use_r=True
use_r=True,
c_bound_sol=True
)
self.param_names = []
self.distribs = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment