From 40c7ee5a50a991be14a167e06f400c9b3c557fdb Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Thu, 28 Feb 2019 16:52:27 -0500 Subject: [PATCH] Added get_params and removed mincq tracking --- .../Monoview/Additions/BoostUtils.py | 3 -- .../Monoview/Additions/QarBoostUtils.py | 13 +++--- .../Monoview/ExportResults.py | 2 +- .../Monoview/MonoviewUtils.py | 2 +- .../MonoviewClassifiers/CGDesc.py | 3 +- .../MonoviewClassifiers/CGreed.py | 2 +- .../MonoviewClassifiers/CQBoost.py | 3 +- .../MonoviewClassifiers/GradientBoosting.py | 46 +++++++++++++++++++ 8 files changed, 58 insertions(+), 16 deletions(-) create mode 100644 multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 5c7c042b..ca2cf704 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -696,9 +696,6 @@ def get_accuracy_graph(plotted_data, classifier_name, file_name, name="Accuracie class BaseBoost(object): - def __init__(self): - self.n_stumps = 10 - def _collect_probas(self, X): return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_]) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index eee18a47..7e51a32f 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -19,9 +19,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def __init__(self, n_max_iterations=None, estimators_generator=None, random_state=42, self_complemented=True, twice_the_same=False, c_bound_choice=True, random_start=True, - n_stumps_per_attribute=None, use_r=True, c_bound_sol=True, + n_stumps_per_attribute=1, use_r=True, c_bound_sol=True, plotted_metric=Metrics.zero_one_loss, save_train_data=True, - test_graph=True, mincq_tracking=True): + test_graph=True, mincq_tracking=False): super(ColumnGenerationClassifierQar, self).__init__() r""" @@ -46,7 +46,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): plotted_metric : Metric module The metric that will be plotted for each iteration of boosting. """ - if type(random_state) is int: self.random_state = np.random.RandomState(random_state) else: @@ -62,8 +61,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.c_bound_choice = c_bound_choice self.random_start = random_start self.plotted_metric = plotted_metric - if n_stumps_per_attribute: - self.n_stumps = n_stumps_per_attribute + self.n_stumps = n_stumps_per_attribute self.use_r = use_r self.c_bound_sol = c_bound_sol self.save_train_data = save_train_data @@ -74,15 +72,16 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): "n_stumps", "use_r", "c_bound_sol"] self.mincq_tracking = mincq_tracking + def get_params(self, deep=True): + return {"random_state":self.random_state, "n_max_iterations":self.n_max_iterations} + def set_params(self, **params): self.n_max_iterations = params["n_max_iterations"] return self def fit(self, X, y): - start = time.time() - formatted_X, formatted_y = self.format_X_y(X, y) self.init_info_containers() diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py index dedcf5e6..a5e34dda 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py @@ -23,7 +23,7 @@ from sklearn import metrics # For stastics on classification # Author-Info __author__ = "Nikolas Huelsmann" __status__ = "Prototype" # Production, Development, Prototype -__date__ = 2016 - 03 - 25 +__date__ = 2016_03_25 #### Export Features to CSV diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index f67aac06..64ecb3ff 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -28,7 +28,7 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo scorer = metricModule.get_scorer(**metricKWARGS) nb_possible_combinations = compute_possible_combinations(params_dict) min_list = np.array([min(nb_possible_combination, nIter) for nb_possible_combination in nb_possible_combinations]) - randomSearch = RandomizedSearchCV(estimator, n_iter=np.sum(min_list), param_distributions=params_dict, refit=True, + randomSearch = RandomizedSearchCV(estimator, n_iter=int(np.sum(min_list)), param_distributions=params_dict, refit=True, n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState) detector = randomSearch.fit(X_train, y_train) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py index 6b0f045e..6773cc5c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py @@ -16,7 +16,6 @@ class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): use_r=True, c_bound_sol=True ) - self.param_names = ["n_max_iterations"] self.distribs = [CustomRandint(low=2, high=1000)] self.classed_params = [] @@ -30,7 +29,7 @@ class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): return self.getInterpretQar(directory, y_test) def get_name_for_fusion(self): - return "CGr" + return "CGD" def formatCmdArgs(args): diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py index 351698c2..bb0a6ca0 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py @@ -5,7 +5,7 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - def __init__(self, random_state=None, n_max_iterations=500, n_stumps_per_attribute=10, **kwargs): + def __init__(self, random_state=None, n_max_iterations=500, n_stumps_per_attribute=1, **kwargs): super(CGreed, self).__init__(n_max_iterations=n_max_iterations, random_state=random_state, self_complemented=True, diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py index 99618aff..9556dd1c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py @@ -7,7 +7,7 @@ import os class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): - def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, **kwargs): + def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=10, **kwargs): super(CQBoost, self).__init__( random_state=random_state, mu=mu, @@ -18,6 +18,7 @@ class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): CustomRandint(low=1, high=15, multiplier="e-")] self.classed_params = [] self.weird_strings = {} + self.n_stumps = n_stumps if "nbCores" not in kwargs: self.nbCores = 1 else: diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py new file mode 100644 index 00000000..40eb5145 --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py @@ -0,0 +1,46 @@ +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import GradientBoostingClassifier + +from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier + +# Author-Info +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + + +class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): + + def __init__(self, random_state=None, loss="exponential", max_depth=1.0, + n_estimators=100, init=DecisionTreeClassifier(max_depth=1), **kwargs): + super(GradientBoosting, self).__init__( + loss=loss, + max_depth=max_depth, + n_estimators=n_estimators, + init=init, + random_state=random_state + ) + self.param_names = ["n_estimators",] + self.classed_params = [] + self.distribs = [CustomRandint(low=50, high=500),] + self.weird_strings = {} + + def canProbas(self): + """Used to know if the classifier can return label probabilities""" + return True + + def getInterpret(self, directory, y_test): + interpretString = "" + return interpretString + + +def formatCmdArgs(args): + """Used to format kwargs for the parsed args""" + kwargsDict = {"n_estimators": args.GB_n_est,} + return kwargsDict + + +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append({"n_estimators": randomState.randint(50, 500),}) + return paramsSet \ No newline at end of file -- GitLab