diff --git a/config_files/config_cuisine.yml b/config_files/config_cuisine.yml index 656d2d87e2f506e1687a057dd68a25bc3a198f9c..0bab158fc3457b297cabb806f5d6bf6dad2b2b99 100644 --- a/config_files/config_cuisine.yml +++ b/config_files/config_cuisine.yml @@ -22,7 +22,7 @@ nb_folds: 5 nb_class: 2 classes: type: ["monoview"] -algos_monoview: ["scm_bagging",] +algos_monoview: ["scm_bagging", "scm", "cb_boost", "adaboost"] algos_multiview: ["group_scm"] stats_iter: 2 metrics: @@ -30,5 +30,8 @@ metrics: f1_score: average: 'binary' metric_princ: "f1_score" -hps_type: "Random" -hps_args: {} \ No newline at end of file +hps_type: "None" +hps_args: {} + +cb_boost: + n_stumps: 10 \ No newline at end of file diff --git a/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py b/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py index 88ec05acf5e868817e9919b7e5b39d8574a92ec6..704a58951fb617738b42160019c2b311410ce268 100644 --- a/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py +++ b/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py @@ -57,6 +57,7 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.step_prod = None self.n_max_iterations = n_max_iterations self.estimators_generator = estimators_generator + self.estimators_generator_name = estimators_generator self.self_complemented = self_complemented self.twice_the_same = twice_the_same self.random_start = random_start @@ -72,6 +73,7 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.mincq_tracking = mincq_tracking def fit(self, X, y): + self.n_features = X.shape[1] formatted_X, formatted_y = self.format_X_y(X, y) self.init_info_containers() @@ -131,6 +133,11 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost): end = time.time() self.train_time = end - start + self.feature_importances_ = np.zeros(X.shape[1]) + for iter_index, iteration_feature_imporances in enumerate(self.chosen_features): + for feature_index, importance in iteration_feature_imporances: + self.feature_importances_[feature_index] += importance*self.weights_[iter_index] + self.feature_importances_ /= np.sum(self.feature_importances_) return self def predict(self, X): @@ -239,6 +246,13 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost): def append_new_voter(self, new_voter_index): """Used to append the voter to the majority vote""" self.chosen_columns_.append(new_voter_index) + if self.estimators_generator_name=="Stumps": + self.chosen_features.append([(int(new_voter_index%(self.n_stumps*self.n_features)/self.n_stumps), 1)]) + elif self.estimators_generator_name == "Trees": + self.chosen_features.append([(self.estimators_generator.attribute_indices[new_voter_index][fake_ind], importance) + for fake_ind, importance + in enumerate(self.estimators_generator.estimators_[new_voter_index].feature_importances_) + if importance>0]) self.new_voter = self.classification_matrix[:, new_voter_index].reshape( (self.n_total_examples, 1)) @@ -309,6 +323,7 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost): def init_info_containers(self): """Initialize the containers that will be collected at each iteration for the analysis""" self.weights_ = [] + self.chosen_features = [] self.chosen_columns_ = [] self.fobidden_columns = [] self.c_bounds = [] @@ -468,7 +483,7 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost): os.path.join(directory, 'step_test_c_bounds.png'), "C_bound", set="test") - def getInterpretCBBoost(self, directory, y_test=None): + def getInterpretCBBoost(self, directory, base_file_name="", y_test=None): self.directory = directory """Used to interpret the functionning of the algorithm""" if self.step_decisions is not None: @@ -539,7 +554,7 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost): interpretString += "\n \n With arguments : \n" + u'\u2022 ' + ( "\n" + u'\u2022 ').join(['%s: \t%s' % (key, value) for (key, value) in - args_dict.items()]) + args_dict.items()])+"\n\n" if not self.respected_bound: interpretString += "\n\n The bound was not respected" diff --git a/summit/multiview_platform/monoview_classifiers/cb_boost.py b/summit/multiview_platform/monoview_classifiers/cb_boost.py index 853a380f0d8bb8b73ad369f1c72140049cedb3e2..2374d68ea605a8b6f8bad0f7409dfef244685bc7 100644 --- a/summit/multiview_platform/monoview_classifiers/cb_boost.py +++ b/summit/multiview_platform/monoview_classifiers/cb_boost.py @@ -76,7 +76,9 @@ class CBBoost(CBBoostClassifier, BaseMonoviewClassifier): ------- """ - return self.getInterpretCBBoost(directory, y_test) + interpret_string = self.getInterpretCBBoost(directory, base_file_name, y_test) + interpret_string += self.get_feature_importance(directory, base_file_name) + return interpret_string def get_name_for_fusion(self): """ diff --git a/summit/multiview_platform/monoview_classifiers/scm.py b/summit/multiview_platform/monoview_classifiers/scm.py index c4b2dd705e020e6a6247365c8b2746dc0bf1681a..56fc0dd031929881a8a9d6bfe9ec9bf45e9aba88 100644 --- a/summit/multiview_platform/monoview_classifiers/scm.py +++ b/summit/multiview_platform/monoview_classifiers/scm.py @@ -1,5 +1,7 @@ from pyscm.scm import SetCoveringMachineClassifier as scm +import numpy as np + from ..monoview.monoview_utils import BaseMonoviewClassifier from ..utils.hyper_parameter_search import CustomRandint, CustomUniform @@ -61,6 +63,18 @@ class SCM(scm, BaseMonoviewClassifier): self.classed_params = [] self.weird_strings = {} + def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): + self.n_features = X.shape[1] + scm.fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params) + self.feature_importances_ = np.zeros(self.n_features) + # sum the rules importances : + # rules_importances = estim.get_rules_importances() #activate it when pyscm will implement importance + rules_importances = np.ones(len( + self.model_.rules)) # delete it when pyscm will implement importance + for rule, importance in zip(self.model_.rules, rules_importances): + self.feature_importances_[rule.feature_idx] += importance + self.feature_importances_ /= np.sum(self.feature_importances_) + # def canProbas(self): # """ # Used to know if the classifier can return label probabilities @@ -71,17 +85,11 @@ class SCM(scm, BaseMonoviewClassifier): # """ # return False - def get_interpretation(self, directory, y_test, multi_class=False): - interpretString = "Model used : " + str(self.model_) - return interpretString - + def get_interpretation(self, directory, base_file_name, y_test, multi_class=False): + interpret_string = self.get_feature_importance(directory, base_file_name) + interpret_string += "Model used : " + str(self.model_) + return interpret_string -# def formatCmdArgs(args): -# """Used to format kwargs for the parsed args""" -# kwargsDict = {"model_type": args.SCM_model_type, -# "p": args.SCM_p, -# "max_rules": args.SCM_max_rules} -# return kwargsDict def paramsToSet(nIter, random_state): diff --git a/summit/multiview_platform/monoview_classifiers/scm_bagging.py b/summit/multiview_platform/monoview_classifiers/scm_bagging.py index d91598cf63956de55e85a6f68deb9e2f07d6059c..90ee6772f4e5d271802a184fb8e219729f15b1f8 100644 --- a/summit/multiview_platform/monoview_classifiers/scm_bagging.py +++ b/summit/multiview_platform/monoview_classifiers/scm_bagging.py @@ -170,6 +170,7 @@ class ScmBaggingClassifier(BaseEnsemble, ClassifierMixin, BaseMonoviewClassifier self.binary_to_labels = {bin_label: str_label for str_label, bin_label in self.labels_to_binary.items()} y = np.array([self.labels_to_binary[l] for l in y]) + self.n_features = X.shape[1] estimators = [] self.estim_features = [] @@ -296,6 +297,10 @@ class ScmBaggingClassifier(BaseEnsemble, ClassifierMixin, BaseMonoviewClassifier print(feature_id_occurences) importances = {k: round(v / feature_id_occurences[k], 3) for k, v in importances.items()} + self.feature_importances_ = np.array([importances[k] + if k in importances else 0 + for k in range(self.n_features)]) + self.feature_importances_ /= np.sum(self.feature_importances_) return importances def get_estimators_indices(self): @@ -313,3 +318,9 @@ class ScmBaggingClassifier(BaseEnsemble, ClassifierMixin, BaseMonoviewClassifier def score(self, X, y): return accuracy_score(y, self.predict(X)) + + def get_interpretation(self, directory, base_file_name, y_test, + multi_class=False): + self.features_importance() + interpret_string = self.get_feature_importance(directory, base_file_name) + return interpret_string