From 7fff8a6f16537f23221b86d067cce7864c2e82c5 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Wed, 3 Apr 2019 16:11:40 -0400 Subject: [PATCH] Pre cluster commit --- .../Monoview/Additions/BoostUtils.py | 1 - .../Monoview/Additions/CGDescUtils.py | 2 - .../Monoview/Additions/CQBoostUtils.py | 1 - .../Monoview/Additions/MinCQUtils.py | 1 - .../Monoview/Additions/PregenUtils.py | 2 +- .../Monoview/ExecClassifMonoView.py | 1 + .../MonoviewClassifiers/CGDesc.py | 2 +- .../MonoviewClassifiers/CQBoost.py | 4 +- .../MonoviewClassifiers/CQBoostTree.py | 1 - .../MonoviewClassifiers/DecisionTreePregen.py | 84 +++++++++++++++++++ .../utils/execution.py | 14 ++++ 11 files changed, 103 insertions(+), 10 deletions(-) create mode 100644 multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTreePregen.py diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 877e262d..3ac3c046 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -782,7 +782,6 @@ class BaseBoost(object): def _collect_probas(self, X, sub_sampled=False): if self.estimators_generator.__class__.__name__ == "TreeClassifiersGenerator": - print("frogom") return np.asarray([clf.predict_proba(X[:,attribute_indices]) for clf, attribute_indices in zip(self.estimators_generator.estimators_, self.estimators_generator.attribute_indices)]) else: return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_]) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py index 9b1f8f9a..a3e53c6b 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py @@ -154,7 +154,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): end = time.time() self.predict_time = end - start self.step_predict(classification_matrix) - print(np.unique(signs_array)) return signs_array def step_predict(self, classification_matrix): @@ -490,7 +489,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): return 1.0 / n_examples * np.ones((n_examples,)) def get_step_decision_test_graph(self, directory, y_test): - print(np.unique(y_test)) np.savetxt(directory + "y_test_step.csv", self.step_decisions, delimiter=',') step_metrics = [] for step_index in range(self.step_decisions.shape[1]-1): diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py index 401bcdca..6560eb1a 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py @@ -36,7 +36,6 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): elif self.estimators_generator is "Trees": self.estimators_generator = TreeClassifiersGenerator(max_depth=self.max_depth, n_trees=self.n_stumps, self_complemented=True) - print(self.max_depth, self.n_stumps) self.estimators_generator.fit(X, y) self.classification_matrix = self._binary_classification_matrix(X) self.c_bounds = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/MinCQUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/MinCQUtils.py index afa55be5..0e8479a3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/MinCQUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/MinCQUtils.py @@ -272,7 +272,6 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): # Keep learning information for further use. self.learner_info_ = {} - print(np.unique(weights)) # We count the number of non-zero weights, including the implicit voters. # TODO: Verify how we define non-zero weights here, could be if the weight is near 1/2n. diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/PregenUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/PregenUtils.py index 92a603e8..6011f7c6 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/PregenUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/PregenUtils.py @@ -1,7 +1,7 @@ from ..MonoviewUtils import change_label_to_minus from .BoostUtils import StumpsClassifiersGenerator, BaseBoost, TreeClassifiersGenerator -import numpy as np + class PregenClassifier(BaseBoost): diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index c0d43c74..156c187a 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -52,6 +52,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol learningRate, \ labelsString, \ outputFileName = initConstants(args, X, classificationIndices, labelsNames, name, directory) + logging.debug("Done:\t Loading data") logging.debug("Info:\t Classification - Database:" + str(name) + " Feature:" + str(feat) + " train ratio:" diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py index 76ba5dae..ed2d9536 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py @@ -18,7 +18,7 @@ class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): estimators_generator="Stumps" ) self.param_names = ["n_max_iterations", "n_stumps", "random_state"] - self.distribs = [CustomRandint(low=2, high=1000), [n_stumps], + self.distribs = [CustomRandint(low=2, high=500), [n_stumps], [random_state]] self.classed_params = [] self.weird_strings = {} diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py index dfed2a18..14d65453 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py @@ -7,13 +7,13 @@ import os class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): - def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, n_max_iterations=100, **kwargs): + def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, n_max_iterations=None, **kwargs): super(CQBoost, self).__init__( random_state=random_state, mu=mu, epsilon=epsilon, estimators_generator="Stumps", - n_max_iterations=100 + n_max_iterations=n_max_iterations ) self.param_names = ["mu", "epsilon", "n_stumps", "random_state", "n_max_iterations"] self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostTree.py index b7a8dad5..25c2a5ea 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostTree.py @@ -8,7 +8,6 @@ import os class CQBoostTree(ColumnGenerationClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, max_depth=2, n_max_iterations=100, **kwargs): - print(n_max_iterations) super(CQBoostTree, self).__init__( random_state=random_state, mu=mu, diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTreePregen.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTreePregen.py new file mode 100644 index 00000000..14f554e5 --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTreePregen.py @@ -0,0 +1,84 @@ +from sklearn.tree import DecisionTreeClassifier +import time +import numpy as np + +from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero +from ..Monoview.Additions.PregenUtils import PregenClassifier + +# Author-Info +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + + +class DecisionTreePregen(DecisionTreeClassifier, BaseMonoviewClassifier, PregenClassifier): + + def __init__(self, random_state=None, max_depth=None, + criterion='gini', splitter='best', n_stumps=1, self_complemented=True, **kwargs): + super(DecisionTreePregen, self).__init__( + max_depth=max_depth, + criterion=criterion, + splitter=splitter, + random_state=random_state + ) + self.estimators_generator = "Stumps" + self.n_stumps = n_stumps + self.self_complemented = self_complemented + self.param_names = ["max_depth", "criterion", "splitter",'random_state', + 'n_stumps'] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), + ["gini", "entropy"], + ["best", "random"], [random_state], [n_stumps]] + self.weird_strings = {} + + def fit(self, X, y, sample_weight=None, check_input=True, + X_idx_sorted=None): + begin = time.time() + pregen_X, pregen_y = self.pregen_voters(X, y) + super(DecisionTreePregen, self).fit(pregen_X, pregen_y, + sample_weight=sample_weight, + check_input=check_input, + X_idx_sorted=X_idx_sorted) + end = time.time() + self.train_time = end - begin + self.train_shape = pregen_X.shape + return self + + def predict(self, X, check_input=True): + begin = time.time() + pregen_X, _ = self.pregen_voters(X) + pred = super(DecisionTreePregen, self).predict(pregen_X, check_input=check_input) + end = time.time() + self.pred_time = end - begin + return change_label_to_zero(pred) + + def canProbas(self): + """Used to know if the classifier can return label probabilities""" + return True + + def getInterpret(self, directory, y_test): + interpretString = "" + interpretString += self.getFeatureImportance(directory) + np.savetxt(directory + "times.csv", + np.array([self.train_time, self.pred_time]), delimiter=',') + return interpretString + + +def formatCmdArgs(args): + """Used to format kwargs for the parsed args""" + kwargsDict = {"max_depth": args.DTP_depth, + "criterion": args.DTP_criterion, + "splitter": args.DTP_splitter, + "n_stumps":args.DTP_stumps} + return kwargsDict + + +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append({"max_depth": randomState.randint(1, 300), + "criterion": randomState.choice(["gini", "entropy"]), + "splitter": randomState.choice(["best", "random"])}) + return paramsSet + + diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index 3e235fae..189de203 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -157,6 +157,20 @@ def parseTheArgs(arguments): groupDT.add_argument('--DT_splitter', metavar='STRING', action='store', help='Determine criterion for Decision Trees', default="random") + groupDTP = parser.add_argument_group('Decision Trees pregen arguments') + groupDTP.add_argument('--DTP_depth', metavar='INT', type=int, action='store', + help='Determine max depth for Decision Trees', + default=3) + groupDTP.add_argument('--DTP_criterion', metavar='STRING', action='store', + help='Determine max depth for Decision Trees', + default="entropy") + groupDTP.add_argument('--DTP_splitter', metavar='STRING', action='store', + help='Determine criterion for Decision Trees', + default="random") + groupDTP.add_argument('--DTP_stumps', metavar='INT', type=int, action='store', + help='Determine the number of stumps for Decision Trees pregen', + default=1) + groupSGD = parser.add_argument_group('SGD arguments') groupSGD.add_argument('--SGD_alpha', metavar='FLOAT', type=float, action='store', help='Determine alpha for SGDClassifier', default=0.1) -- GitLab