diff --git a/summit/multiview_platform/monoview_classifiers/random_scm.py b/summit/multiview_platform/monoview_classifiers/random_scm.py index c648c01f40c968cbff0d4bc2a570990cc15b63cd..287cfcabf1ae29a1ea88bbd49b3f2b22be2e4ee1 100644 --- a/summit/multiview_platform/monoview_classifiers/random_scm.py +++ b/summit/multiview_platform/monoview_classifiers/random_scm.py @@ -82,7 +82,7 @@ class ScmBagging(RandomScmClassifier, BaseMonoviewClassifier): max_samples=max_samples, max_features=max_features, max_rules=max_rules, - p_options=p_options, + p=p_options, model_type=model_type, random_state=random_state) self.param_names = ["n_estimators", "max_rules", "max_samples", "max_features", "model_type", "p_options", "random_state"] @@ -94,7 +94,7 @@ class ScmBagging(RandomScmClassifier, BaseMonoviewClassifier): def set_params(self, p_options=[0.316], **kwargs): if not isinstance(p_options, list): p_options = [p_options] - kwargs["p_options"] = p_options + kwargs["p"] = p_options for parameter, value in iteritems(kwargs): setattr(self, parameter, value) return self diff --git a/summit/multiview_platform/multiview_classifiers/mucombo.py b/summit/multiview_platform/multiview_classifiers/mucombo.py index 9fbb698c54621f8456e5ae4a74d348134c854a70..2776055ebd88b74cf3ae6a976cc3d7928597233c 100644 --- a/summit/multiview_platform/multiview_classifiers/mucombo.py +++ b/summit/multiview_platform/multiview_classifiers/mucombo.py @@ -7,20 +7,20 @@ from ..utils.hyper_parameter_search import CustomRandint from ..utils.dataset import get_samples_views_indices from ..utils.base import base_boosting_estimators -classifier_class_name = "MuCumbo" +classifier_class_name = "MuCombo" class MuCombo(BaseMultiviewClassifier, MuComboClassifier): - def __init__(self, estimator=None, + def __init__(self, base_estimator=None, n_estimators=50, random_state=None,**kwargs): BaseMultiviewClassifier.__init__(self, random_state) - estimator = self.set_base_estim_from_dict(estimator, **kwargs) - MuComboClassifier.__init__(self, estimator=estimator, + base_estimator = self.set_base_estim_from_dict(base_estimator, **kwargs) + MuComboClassifier.__init__(self, base_estimator=base_estimator, n_estimators=n_estimators, random_state=random_state,) - self.param_names = ["estimator", "n_estimators", "random_state",] + self.param_names = ["base_estimator", "n_estimators", "random_state",] self.distribs = [base_boosting_estimators, CustomRandint(5,200), [random_state],] @@ -43,6 +43,12 @@ class MuCombo(BaseMultiviewClassifier, MuComboClassifier): view_indices=view_indices) return MuComboClassifier.predict(self, numpy_X) - def get_interpretation(self, directory, base_file_name, labels, - multiclass=False): + def get_interpretation(self, directory, base_file_name, y_test, feature_ids, + multi_class=False): return "" + + def set_base_estim_from_dict(self, dict): + key, args = list(dict.items())[0] + + if key == "decision_tree": + return DecisionTreeClassifier(**args) \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/mumbo.py b/summit/multiview_platform/multiview_classifiers/mumbo.py index fcaf64ab5ccdbb29867b19740c986ce4118ae8f6..b3933cbaafccec4f62b70208779d46897549ade9 100644 --- a/summit/multiview_platform/multiview_classifiers/mumbo.py +++ b/summit/multiview_platform/multiview_classifiers/mumbo.py @@ -13,21 +13,22 @@ from .. import monoview_classifiers classifier_class_name = "Mumbo" + class Mumbo(BaseMultiviewClassifier, MumboClassifier): - def __init__(self, estimator=None, + def __init__(self, base_estimator=None, n_estimators=50, random_state=None, best_view_mode="edge", **kwargs): BaseMultiviewClassifier.__init__(self, random_state) - base_estimator = self.set_base_estim_from_dict(estimator, **kwargs) - MumboClassifier.__init__(self, base_estimator=estimator, - n_estimators=n_estimators, - random_state=random_state, - best_view_mode=best_view_mode) - self.param_names = ["estimator", "n_estimators", "random_state", "best_view_mode"] + base_estimator = self.set_base_estim_from_dict(base_estimator) + MumboClassifier.__init__(self, base_estimator=base_estimator, + n_estimators=n_estimators, + random_state=random_state, + best_view_mode=best_view_mode) + self.param_names = ["base_estimator", "n_estimators", "random_state", "best_view_mode"] self.distribs = [base_boosting_estimators, - CustomRandint(5,200), [random_state], ["edge", "error"]] + CustomRandint(5, 200), [random_state], ["edge", "error"]] def set_params(self, estimator=None, **params): """ @@ -42,23 +43,22 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier): self.base_estimator = self.set_base_estim_from_dict(estimator) MumboClassifier.set_params(self, **params) else: - MumboClassifier.set_params(self, estimator=estimator, **params) - + MumboClassifier.set_params(self, base_estimator=estimator, **params) def fit(self, X, y, train_indices=None, view_indices=None): train_indices, view_indices = get_samples_views_indices(X, - train_indices, - view_indices) + train_indices, + view_indices) self.used_views = view_indices self.view_names = [X.get_view_name(view_index) for view_index in view_indices] numpy_X, view_limits = X.to_numpy_array(sample_indices=train_indices, view_indices=view_indices) - self.view_shapes = [view_limits[ind+1]-view_limits[ind] - for ind in range(len(self.used_views)) ] + self.view_shapes = [view_limits[ind + 1] - view_limits[ind] + for ind in range(len(self.used_views))] return MumboClassifier.fit(self, numpy_X, y[train_indices], - view_limits) + view_limits) def predict(self, X, sample_indices=None, view_indices=None): sample_indices, view_indices = get_samples_views_indices(X, @@ -69,10 +69,11 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier): view_indices=view_indices) return MumboClassifier.predict(self, numpy_X) - def get_interpretation(self, directory, base_file_name, labels, multiclass=False): + def get_interpretation(self, directory, base_file_name, y_test, feature_ids, + multi_class=False): self.view_importances = np.zeros(len(self.used_views)) self.feature_importances_ = [np.zeros(view_shape) - for view_shape in self.view_shapes] + for view_shape in self.view_shapes] for best_view, estimator_weight, estimator in zip(self.best_views_, self.estimator_weights_, self.estimators_): self.view_importances[best_view] += estimator_weight if hasattr(estimator, "feature_importances_"): @@ -80,26 +81,32 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier): importances_sum = sum([np.sum(feature_importances) for feature_importances in self.feature_importances_]) - self.feature_importances_ = [feature_importances/importances_sum + self.feature_importances_ = [feature_importances / importances_sum for feature_importances in self.feature_importances_] for feature_importances, view_name in zip(self.feature_importances_, self.view_names): secure_file_path(os.path.join(directory, "feature_importances", - base_file_name+view_name+"-feature_importances.csv")) + base_file_name + view_name + "-feature_importances.csv")) np.savetxt(os.path.join(directory, "feature_importances", - base_file_name+view_name+"-feature_importances.csv"), + base_file_name + view_name + "-feature_importances.csv"), feature_importances, delimiter=',') self.view_importances /= np.sum(self.view_importances) - np.savetxt(os.path.join(directory, base_file_name+"view_importances.csv"), self.view_importances, + np.savetxt(os.path.join(directory, base_file_name + "view_importances.csv"), self.view_importances, delimiter=',') sorted_view_indices = np.argsort(-self.view_importances) interpret_string = "Mumbo used {} iterations to converge.".format(self.best_views_.shape[0]) - interpret_string+= "\n\nViews importance : \n" + interpret_string += "\n\nViews importance : \n" for view_index in sorted_view_indices: - interpret_string+="- View {} ({}), importance {}\n".format(view_index, - self.view_names[view_index], - self.view_importances[view_index]) - interpret_string +="\n The boosting process selected views : \n" + ", ".join(map(str, self.best_views_)) - interpret_string+="\n\n With estimator weights : \n"+ "\n".join(map(str,self.estimator_weights_/np.sum(self.estimator_weights_))) + interpret_string += "- View {} ({}), importance {}\n".format(view_index, + self.view_names[view_index], + self.view_importances[view_index]) + interpret_string += "\n The boosting process selected views : \n" + ", ".join(map(str, self.best_views_)) + interpret_string += "\n\n With estimator weights : \n" + "\n".join( + map(str, self.estimator_weights_ / np.sum(self.estimator_weights_))) return interpret_string + + def set_base_estim_from_dict(self, dict): + key, args = list(dict.items())[0] + if key == "decision_tree": + return DecisionTreeClassifier(**args) diff --git a/summit/multiview_platform/result_analysis/feature_importances.py b/summit/multiview_platform/result_analysis/feature_importances.py index 36c0eb3514b0fa3db388af10803b60f2f245f011..735455abab7676bd95c9e8d821c7042b028ef6a0 100644 --- a/summit/multiview_platform/result_analysis/feature_importances.py +++ b/summit/multiview_platform/result_analysis/feature_importances.py @@ -44,7 +44,7 @@ def get_feature_importances(result, feature_ids=None, view_names=None,): v_feature_id] feature_importances["mv"] = pd.DataFrame(index=feat_ids) if hasattr(classifier_result.clf, 'feature_importances_'): - feature_importances["mv"][classifier_result.classifier_name] = classifier_result.clf.feature_importances_ + feature_importances["mv"][classifier_result.classifier_name] = np.concatenate(classifier_result.clf.feature_importances_) return feature_importances diff --git a/summit/multiview_platform/utils/dataset.py b/summit/multiview_platform/utils/dataset.py index 98dc7a39ea552e70b5f555fb0728b650aa26fa5b..beaf5829d86231854630d0f935968b7e450e343a 100644 --- a/summit/multiview_platform/utils/dataset.py +++ b/summit/multiview_platform/utils/dataset.py @@ -106,9 +106,9 @@ class Dataset(): return concat_views, view_limits def select_labels(self, selected_label_names): - selected_labels = [self.get_label_names(decode=True).index(label_name.decode()) + selected_labels = [self.get_label_names().index(label_name.decode()) if isinstance(label_name, bytes) - else self.get_label_names(decode=True).index(label_name) + else self.get_label_names().index(label_name) for label_name in selected_label_names] selected_indices = np.array([index for index, label in