diff --git a/summit/multiview_platform/multiview_classifiers/additions/kernel_learning.py b/summit/multiview_platform/multiview_classifiers/additions/kernel_learning.py new file mode 100644 index 0000000000000000000000000000000000000000..98047643e9d514f848fc2888c18f86e0a4c5f015 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/additions/kernel_learning.py @@ -0,0 +1,103 @@ +from sklearn.metrics import pairwise +import numpy as np + +from ...multiview.multiview_utils import BaseMultiviewClassifier +from ...utils.hyper_parameter_search import CustomUniform, CustomRandint +from ...utils.transformations import sign_labels, unsign_labels +from ...utils.dataset import get_samples_views_indices + +class KernelClassifier(BaseMultiviewClassifier): + + def __init__(self, random_state=None,): + super().__init__(random_state) + + # def _compute_kernels(self, X, sample_indices, view_indices, ): + # new_X = {} + # for index, (kernel_function, kernel_config, view_index) in enumerate( + # zip(self.kernel_functions, self.kernel_configs, view_indices)): + # new_X[index] = kernel_function(X.get_v(view_index, + # sample_indices), + # **kernel_config) + # return new_X + + def format_X(self, X, sample_indices, view_indices): + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) + formatted_X = dict((index, X.get_v(view_index, sample_indices=sample_indices)) + for index, view_index in enumerate(view_indices)) + + return formatted_X, sample_indices + + def extract_labels(self, predicted_labels): + signed_labels = np.sign(predicted_labels) + return unsign_labels(signed_labels) + + def init_kernels(self, nb_view=2, ): + if isinstance(self.kernel, KernelDistribution): + self.kernel = self.kernel.draw(nb_view) + elif isinstance(self.kernel, str): + self.kernel = [self.kernel + for _ in range(nb_view)] + elif isinstance(self.kernel, list): + pass + + if isinstance(self.kernel_params, KernelConfigDistribution): + self.kernel_params = self.kernel_params.draw(nb_view) + self.kernel_params = [kernel_config[kernel_name] + for kernel_config, kernel_name + in zip(self.kernel_params, + self.kernel)] + + elif isinstance(self.kernel_params, dict): + self.kernel_params = [self.kernel_params for _ in range(nb_view)] + else: + pass + + +class KernelConfigGenerator: + + def __init__(self): + pass + + def rvs(self, random_state=None): + return KernelConfigDistribution(seed=random_state.randint(1)) + + +class KernelConfigDistribution: + + def __init__(self, seed=42): + self.random_state=np.random.RandomState(seed) + self.possible_config = { + "additive_chi2": {"gamma": CustomUniform()}, + "rbf": {"gamma": CustomUniform()}, + "poly":{"degree": CustomRandint(1,4), "gamma":CustomUniform()} + } + + def draw(self, nb_view): + drawn_params = [{} for _ in range(nb_view)] + for view_index in range(nb_view): + for kernel_name, params_dict in self.possible_config.items(): + drawn_params[view_index][kernel_name] = {} + for param_name, distrib in params_dict.items(): + drawn_params[view_index][kernel_name][param_name] = distrib.rvs(self.random_state) + return drawn_params + + +class KernelGenerator: + + def __init__(self): + pass + + def rvs(self, random_state=None): + return KernelDistribution(seed=random_state.randint(1)) + + +class KernelDistribution: + + def __init__(self, seed=42): + self.random_state=np.random.RandomState(seed) + self.available_kernels = ["rbf"] + + def draw(self, nb_view): + return list(self.random_state.choice(self.available_kernels, nb_view)) diff --git a/summit/multiview_platform/multiview_classifiers/lp_norm_mkl.py b/summit/multiview_platform/multiview_classifiers/lp_norm_mkl.py new file mode 100644 index 0000000000000000000000000000000000000000..9ae3ec9390383859de2e0b90bb8c3685349a3087 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/lp_norm_mkl.py @@ -0,0 +1,36 @@ + +from multimodal.kernels.lpMKL import MKL + +from .additions.kernel_learning import KernelClassifier, KernelConfigGenerator +from ..utils.hyper_parameter_search import CustomUniform + +classifier_class_name = "LPNormMKL" + +class LPNormMKL(KernelClassifier, MKL): + def __init__(self, random_state=None, lmbda=0.1, nystrom_param=1, n_loops=50, + precision=0.0001, use_approx=True, kernel="rbf", + kernel_params=None): + KernelClassifier.__init__(self, random_state) + MKL.__init__(self, lmbda, nystrom_param=nystrom_param, + kernel=kernel, + n_loops=n_loops, + precision=precision, + use_approx=use_approx, + kernel_params=kernel_params) + self.param_names = ["lmbda", "kernel", "kernel_params"] + self.distribs = [CustomUniform(), ['rbf', 'additive_chi2', 'poly' ], + KernelConfigGenerator()] + + def fit(self, X, y, train_indices=None, view_indices=None): + formatted_X, train_indices = self.format_X(X, train_indices, view_indices) + self.init_kernels(nb_view=len(formatted_X)) + + + return MKL.fit(self, formatted_X, y[train_indices]) + + def predict(self, X, sample_indices=None, view_indices=None): + new_X, _ = self.format_X(X, sample_indices, view_indices) + return self.extract_labels(MKL.predict(self, new_X)) + + + diff --git a/summit/multiview_platform/multiview_classifiers/mucombo.py b/summit/multiview_platform/multiview_classifiers/mucombo.py new file mode 100644 index 0000000000000000000000000000000000000000..9fbb698c54621f8456e5ae4a74d348134c854a70 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/mucombo.py @@ -0,0 +1,48 @@ +from sklearn.tree import DecisionTreeClassifier + + +from multimodal.boosting.combo import MuComboClassifier +from ..multiview.multiview_utils import BaseMultiviewClassifier +from ..utils.hyper_parameter_search import CustomRandint +from ..utils.dataset import get_samples_views_indices +from ..utils.base import base_boosting_estimators + +classifier_class_name = "MuCumbo" + + +class MuCombo(BaseMultiviewClassifier, MuComboClassifier): + + def __init__(self, estimator=None, + n_estimators=50, + random_state=None,**kwargs): + BaseMultiviewClassifier.__init__(self, random_state) + estimator = self.set_base_estim_from_dict(estimator, **kwargs) + MuComboClassifier.__init__(self, estimator=estimator, + n_estimators=n_estimators, + random_state=random_state,) + self.param_names = ["estimator", "n_estimators", "random_state",] + self.distribs = [base_boosting_estimators, + CustomRandint(5,200), [random_state],] + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, view_indices = get_samples_views_indices(X, + train_indices, + view_indices) + self.used_views = view_indices + numpy_X, view_limits = X.to_numpy_array(sample_indices=train_indices, + view_indices=view_indices) + return MuComboClassifier.fit(self, numpy_X, y[train_indices], + view_limits) + + def predict(self, X, sample_indices=None, view_indices=None): + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) + self._check_views(view_indices) + numpy_X, view_limits = X.to_numpy_array(sample_indices=sample_indices, + view_indices=view_indices) + return MuComboClassifier.predict(self, numpy_X) + + def get_interpretation(self, directory, base_file_name, labels, + multiclass=False): + return "" diff --git a/summit/multiview_platform/multiview_classifiers/mumbo.py b/summit/multiview_platform/multiview_classifiers/mumbo.py new file mode 100644 index 0000000000000000000000000000000000000000..fcaf64ab5ccdbb29867b19740c986ce4118ae8f6 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/mumbo.py @@ -0,0 +1,105 @@ +from sklearn.tree import DecisionTreeClassifier +import numpy as np +import os + +from multimodal.boosting.mumbo import MumboClassifier + +from ..multiview.multiview_utils import BaseMultiviewClassifier +from ..utils.hyper_parameter_search import CustomRandint +from ..utils.dataset import get_samples_views_indices +from ..utils.base import base_boosting_estimators +from ..utils.organization import secure_file_path +from .. import monoview_classifiers + +classifier_class_name = "Mumbo" + +class Mumbo(BaseMultiviewClassifier, MumboClassifier): + + def __init__(self, estimator=None, + n_estimators=50, + random_state=None, + best_view_mode="edge", **kwargs): + BaseMultiviewClassifier.__init__(self, random_state) + base_estimator = self.set_base_estim_from_dict(estimator, **kwargs) + MumboClassifier.__init__(self, base_estimator=estimator, + n_estimators=n_estimators, + random_state=random_state, + best_view_mode=best_view_mode) + self.param_names = ["estimator", "n_estimators", "random_state", "best_view_mode"] + self.distribs = [base_boosting_estimators, + CustomRandint(5,200), [random_state], ["edge", "error"]] + + def set_params(self, estimator=None, **params): + """ + Sets the base estimator from a dict. + :param base_estimator: + :param params: + :return: + """ + if estimator is None: + self.estimator = DecisionTreeClassifier() + elif isinstance(estimator, dict): + self.base_estimator = self.set_base_estim_from_dict(estimator) + MumboClassifier.set_params(self, **params) + else: + MumboClassifier.set_params(self, estimator=estimator, **params) + + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, view_indices = get_samples_views_indices(X, + train_indices, + view_indices) + self.used_views = view_indices + self.view_names = [X.get_view_name(view_index) + for view_index in view_indices] + numpy_X, view_limits = X.to_numpy_array(sample_indices=train_indices, + view_indices=view_indices) + self.view_shapes = [view_limits[ind+1]-view_limits[ind] + for ind in range(len(self.used_views)) ] + + return MumboClassifier.fit(self, numpy_X, y[train_indices], + view_limits) + + def predict(self, X, sample_indices=None, view_indices=None): + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) + self._check_views(view_indices) + numpy_X, view_limits = X.to_numpy_array(sample_indices=sample_indices, + view_indices=view_indices) + return MumboClassifier.predict(self, numpy_X) + + def get_interpretation(self, directory, base_file_name, labels, multiclass=False): + self.view_importances = np.zeros(len(self.used_views)) + self.feature_importances_ = [np.zeros(view_shape) + for view_shape in self.view_shapes] + for best_view, estimator_weight, estimator in zip(self.best_views_, self.estimator_weights_, self.estimators_): + self.view_importances[best_view] += estimator_weight + if hasattr(estimator, "feature_importances_"): + self.feature_importances_[best_view] += estimator.feature_importances_ + importances_sum = sum([np.sum(feature_importances) + for feature_importances + in self.feature_importances_]) + self.feature_importances_ = [feature_importances/importances_sum + for feature_importances + in self.feature_importances_] + for feature_importances, view_name in zip(self.feature_importances_, self.view_names): + secure_file_path(os.path.join(directory, "feature_importances", + base_file_name+view_name+"-feature_importances.csv")) + np.savetxt(os.path.join(directory, "feature_importances", + base_file_name+view_name+"-feature_importances.csv"), + feature_importances, delimiter=',') + self.view_importances /= np.sum(self.view_importances) + np.savetxt(os.path.join(directory, base_file_name+"view_importances.csv"), self.view_importances, + delimiter=',') + + sorted_view_indices = np.argsort(-self.view_importances) + interpret_string = "Mumbo used {} iterations to converge.".format(self.best_views_.shape[0]) + interpret_string+= "\n\nViews importance : \n" + for view_index in sorted_view_indices: + interpret_string+="- View {} ({}), importance {}\n".format(view_index, + self.view_names[view_index], + self.view_importances[view_index]) + interpret_string +="\n The boosting process selected views : \n" + ", ".join(map(str, self.best_views_)) + interpret_string+="\n\n With estimator weights : \n"+ "\n".join(map(str,self.estimator_weights_/np.sum(self.estimator_weights_))) + return interpret_string diff --git a/summit/multiview_platform/multiview_classifiers/mvml.py b/summit/multiview_platform/multiview_classifiers/mvml.py new file mode 100644 index 0000000000000000000000000000000000000000..458b42c7f342d0bb793c66b36f11b573bf3ee252 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/mvml.py @@ -0,0 +1,49 @@ + +from multimodal.kernels.mvml import MVML + +from .additions.kernel_learning import KernelClassifier, KernelConfigGenerator +from ..utils.hyper_parameter_search import CustomUniform, CustomRandint + + +classifier_class_name = "MVMLClassifier" + + +class MVMLClassifier(KernelClassifier, MVML): + + def __init__(self, random_state=None, lmbda=0.1, eta=0.1, nystrom_param=1, + n_loops=50, + precision=0.0001, learn_A=0, kernel="rbf", learn_w=0, + kernel_params=None): + KernelClassifier.__init__(self, random_state) + MVML.__init__(self, lmbda=lmbda, eta=eta, + nystrom_param=nystrom_param, + kernel=kernel, + n_loops=n_loops, + precision=precision, + learn_A=learn_A, + learn_w=learn_w, + kernel_params=kernel_params) + self.param_names = ["lmbda", "eta", "nystrom_param", "learn_A", + "learn_w", "n_loops", "kernel_params", "kernel", + "precision"] + self.distribs = [CustomUniform(), + CustomUniform(), + CustomUniform(), + [1,3,4], + [0,1], + CustomRandint(low=5, high=25), + KernelConfigGenerator(), + ['rbf', 'additive_chi2', 'poly' ], + CustomRandint(low=3, high=6, multiplier="e-")] + + def fit(self, X, y, train_indices=None, view_indices=None): + formatted_X, train_indices = self.format_X(X, train_indices, view_indices) + self.init_kernels(nb_view=len(formatted_X)) + return MVML.fit(self, formatted_X, y[train_indices]) + + def predict(self, X, sample_indices=None, view_indices=None): + new_X, _ = self.format_X(X, sample_indices, view_indices) + return self.extract_labels(MVML.predict(self, new_X)) + + +#