diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py index 40eb51457cfe0bf85e6d67b37bb944da1073acc6..493e5909995fbbb76208a1e252882c8b3402e8e8 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py @@ -1,17 +1,28 @@ from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import GradientBoostingClassifier +import time +import numpy as np from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier +from .. import Metrics +from ..Monoview.Additions.BoostUtils import get_accuracy_graph # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +class CustomDecisionTree(DecisionTreeClassifier): + def predict(self, X, check_input=True): + y_pred = super(CustomDecisionTree, self).predict(X, check_input=check_input) + return y_pred.reshape((y_pred.shape[0], 1)).astype(float) + class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, loss="exponential", max_depth=1.0, - n_estimators=100, init=DecisionTreeClassifier(max_depth=1), **kwargs): + n_estimators=100, + init=CustomDecisionTree(max_depth=1), + **kwargs): super(GradientBoosting, self).__init__( loss=loss, max_depth=max_depth, @@ -23,6 +34,33 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): self.classed_params = [] self.distribs = [CustomRandint(low=50, high=500),] self.weird_strings = {} + self.plotted_metric = Metrics.zero_one_loss + self.plotted_metric_name = "zero_one_loss" + self.step_predictions = None + + def fit(self, X, y, sample_weight=None, monitor=None): + begin = time.time() + super(GradientBoosting, self).fit(X, y, sample_weight=sample_weight) + end = time.time() + self.train_time = end - begin + self.train_shape = X.shape + self.base_predictions = np.array( + [estim[0].predict(X) for estim in self.estimators_]) + self.metrics = np.array( + [self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)]) + # self.bounds = np.array([np.prod( + # np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) for i + # in range(self.estimator_errors_.shape[0])]) + return self + + def predict(self, X): + begin = time.time() + pred = super(GradientBoosting, self).predict(X) + end = time.time() + self.pred_time = end - begin + if X.shape != self.train_shape: + self.step_predictions = np.array([step_pred for step_pred in self.staged_predict(X)]) + return pred def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -30,6 +68,14 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): def getInterpret(self, directory, y_test): interpretString = "" + interpretString += self.getFeatureImportance(directory) + step_test_metrics = np.array([self.plotted_metric.score(y_test, step_pred) for step_pred in self.step_predictions]) + get_accuracy_graph(step_test_metrics, "AdaboostClassic", directory + "test_metrics.png", + self.plotted_metric_name, set="test") + get_accuracy_graph(self.metrics, "AdaboostClassic", directory+"metrics.png", self.plotted_metric_name) + np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') + np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') + np.savetxt(directory + "times.csv", np.array([self.train_time, self.pred_time]), delimiter=',') return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index 95ffef91ccce1fa0d447f2e4fff9c56d1f91b6e5..711d19f56341b143672560e96f6d09bfda9c860f 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -186,16 +186,22 @@ def parseTheArgs(arguments): groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, action='store', help='Set the n_max_iterations parameter for CGreed', default=100) - groupCGDesc = parser.add_argument_group('CGDesc arguments') - groupCGDesc.add_argument('--CGD_stumps', metavar='INT', type=int, + groupGradientBoosting = parser.add_argument_group('CGDesc arguments') + groupGradientBoosting.add_argument('--CGD_stumps', metavar='INT', type=int, action='store', help='Set the n_stumps_per_attribute parameter for CGreed', default=1) - groupCGDesc.add_argument('--CGD_n_iter', metavar='INT', type=int, + groupGradientBoosting.add_argument('--CGD_n_iter', metavar='INT', type=int, action='store', help='Set the n_max_iterations parameter for CGreed', default=100) + groupGradientBoosting = parser.add_argument_group('Gradient Boosting arguments') + groupGradientBoosting.add_argument('--GB_n_est', metavar='INT', type=int, + action='store', + help='Set the n_estimators_parameter for Gradient Boosting', + default=1) + groupQarBoostv3 = parser.add_argument_group('QarBoostv3 arguments') groupQarBoostv3.add_argument('--QarB3_mu', metavar='FLOAT', type=float, action='store', help='Set the mu parameter for QarBoostv3', default=0.001)