diff --git a/config_files/config_test.yml b/config_files/config_test.yml index c2d2d87bfe8cb7dda8052dd6516aba82f45f1c40..2a8482ea1b855c9e53b261eb26c5a944dea5c9bb 100644 --- a/config_files/config_test.yml +++ b/config_files/config_test.yml @@ -24,7 +24,7 @@ Classification: classes: type: ["multiview"] algos_monoview: ["all"] - algos_multiview: ["mumbo", "easy_mkl"] + algos_multiview: ["mumbo", "lp_norm_mkl"] stats_iter: 2 metrics: ["accuracy_score", "f1_score"] metric_princ: "f1_score" @@ -207,3 +207,11 @@ mumbo: easy_mkl: degrees: [1] lam: [0.1] + +lp_norm_mkl: + lmbda: [0.1] + max_rounds: [50] + max_diff: [0.0001] + kernel_types: ["rbf_kernel"] + kernel_configs: + gamma: [0.1] diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index aa342b5433e51a1be444df2f7914ef22e6e47fd2..7dae037afa71cd77c1010cd54c1970107068346f 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -679,7 +679,6 @@ def exec_benchmark(nb_cores, stats_iter, nb_multiclass, benchmark_arguments_dictionaries[0])] else: for arguments in benchmark_arguments_dictionaries: - print(arguments) results += [exec_one_benchmark_mono_core(dataset_var=dataset_var, **arguments)] logging.debug("Done:\t Executing all the needed biclass benchmarks") diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index d1ccd57fd4596f269fc4454ebb2b6b8c790763f1..85bf7742c98b56261f1c5faf0e756b5e9bedc7d6 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -265,6 +265,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds logging.debug("Start:\t Optimizing hyperparameters") if hyper_param_search != "None": + print(metrics) classifier_config = hyper_parameter_search.search_best_settings( dataset_var, labels, classifier_module, classifier_name, metrics[0], learning_indices, k_folds, random_state, diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/data_sample.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/data_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..f31537feaab8537d8e54385dc078618787db13f0 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/data_sample.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- + +"""This module contains the DataSample class and Splearn_array class +The DataSample class encapsulates a sample 's components +nbL and nbEx numbers, +Splearn_array class inherit from numpy ndarray and contains a 2d data ndarray +with the shape + +==== ==== ==== ==== ==== +x x x x -1 +x x x x x +x x -1 -1 -1 +x -1 -1 -1 -1 +-1 -1 -1 -1 -1 +==== ==== ==== ==== ==== + +where -1 a indicates a empty cell, +the number nbL and nbEx and , the fourth dictionaries for sample, +prefix, suffix and factor where they are computed +""" +import numpy as np +import numpy.ma as ma + + +class Metriclearn_array(ma.MaskedArray, np.ndarray): + """ + + Parameters + ---------- + data: + view_ind: + + Returns + ------- + + """ + """Splearn_array inherit from numpy ndarray + + :Example: + + >>> from metriclearning.datasets.base import load_data + >>> from metriclearning.datasets.get_dataset_path import get_dataset_path + >>> train_file = '' # '4.spice.train' + >>> data = load_data(adr=get_dataset_path(train_file)) + >>> print(data.__class__) + >>> data.data + + """ + def __new__(cls, data, view_ind=None): + + shapes_int = [] + index = 0 + new_data = np.ndarray([]) + n_views = len(data) + thekeys = None + view_ind_self = None + if isinstance(data, dict): + n_views = len(data) + for key, dat_values in data.items(): + new_data = cls._populate_new_data(index, dat_values, new_data) + shapes_int.append(dat_values.shape[0]) + index += 1 + thekeys = data.keys() + if isinstance(data, np.ndarray) and view_ind is None and data.ndim == 1: + n_views = data.shape[0] + for dat_values in data: + shapes_int.append(dat_values.shape[0]) + new_data = cls._populate_new_data(index, dat_values, new_data) + index += 1 + elif isinstance(data, np.ndarray) and data.ndim > 1: + if view_ind is not None: + n_views = view_ind.shape[0] + shapes_int = [ in2-in1 for in1, in2 in zip(view_ind, view_ind[1: ])] + elif view_ind is None: + if data.shape[1] > 1: + view_ind = np.array([0, data.shape[1]//2, data.shape[1]]) + else: + view_ind = np.array([0, data.shape[1]]) + view_ind, n_views = cls._validate_views_ind(view_ind, + data.shape[1]) + new_data = data + view_ind_self = view_ind + + # obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls) + # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask) + if hasattr(new_data, "mask"): + obj = ma.masked_array(new_data.data, new_data.mask).view(cls) + elif hasattr(new_data, "data") and \ + hasattr(new_data, "shape") and len(new_data.shape) > 0: + obj = np.ndarray(new_data.data).view(cls) + else: + obj = np.recarray.__new__(cls, shape=(), dtype=np.float) + obj.views_ind = view_ind_self + obj.shapes_int = shapes_int + obj.n_views = n_views + obj.keys = thekeys + return obj + + @staticmethod + def _populate_new_data(index, dat_values, new_data): + if index == 0: + if isinstance(dat_values, ma.MaskedArray) or isinstance(dat_values, np.ndarray): + new_data = dat_values + else: + new_data = dat_values.view(ma.MaskedArray) # ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #( + new_data.mask = ma.nomask + else: + if isinstance(dat_values, ma.MaskedArray) or isinstance(dat_values, np.ndarray): + new_data = ma.hstack((new_data, dat_values)) + else: + new_data = ma.hstack((new_data, dat_values.view(ma.MaskedArray) ) ) # ma.masked_array(dat_values, mask=ma.nomask + return new_data + + def __array_finalize__(self, obj): + if obj is None: return + super(Metriclearn_array, self).__array_finalize__(obj) + self.shapes_int = getattr(obj, 'shapes_int', None) + self.n_views = getattr(obj, 'n_views', None) + self.keys = getattr(obj, 'keys', None) + self.views_ind_self = getattr(obj, 'views_ind_self', None) + + def get_col(self, view, col): + start = np.sum(np.asarray(self.shapes_int[0: view])) + return self.data[start+col, :] + + def get_view(self, view): + start = int(np.sum(np.asarray(self.shapes_int[0: view]))) + stop = int(start + self.shapes_int[view]) + return self.data[:, start:stop] + + def set_view(self, view, data): + start = int(np.sum(np.asarray(self.shapes_int[0: view]))) + stop = int(start + self.shapes_int[view]) + if stop-start == data.shape[0] and data.shape[1]== self.data.shape[1]: + self.data[:, start:stop] = data + else: + raise ValueError( + "shape of data does not match (%d, %d)" %stop-start %self.data.shape[1]) + + def get_raw(self, view, raw): + start = np.sum(np.asarray(self.shapes_int[0: view])) + stop = np.sum(np.asarray(self.shapes_int[0: view+1])) + return self.data[start:stop, raw] + + def add_view(self, v, data): + if len(self.shape) > 0: + if data.shape[0] == self.data.shape[0]: + indice = self.shapes_int[v] + np.insert(self.data, data, indice+1, axis=0) + self.shapes_int.append(data.shape[1]) + self.n_views +=1 + else: + raise ValueError("New view can't initialazed") + # self.shapes_int= [data.shape[1]] + # self.data.reshape(data.shape[0],) + # np.insert(self.data, data, 0) + # self.n_views = 1 + + def _todict(self): + dico = {} + for view in range(self.n_views): + dico[view] = self.X.get_view(view) + return dico + + def _validate_views_ind(self, views_ind, n_features): + """Ensure proper format for views_ind and return number of views.""" + views_ind = np.array(views_ind) + if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1: + if np.any(views_ind[:-1] >= views_ind[1:]): + raise ValueError("Values in views_ind must be sorted.") + if views_ind[0] < 0 or views_ind[-1] > n_features: + raise ValueError("Values in views_ind are not in a correct " + + "range for the provided data.") + self.view_mode_ = "slices" + n_views = views_ind.shape[0]-1 + else: + raise ValueError("The format of views_ind is not " + + "supported.") + + return (views_ind, n_views) + + +class DataSample(dict): + """ A DataSample instance + + :Example: + + >>> from metriclearning.datasets.base import load_dict + >>> from metriclearning.datasets.tests.get_dataset_path import get_dataset_path + >>> file = 'input_x_dic.pkl' # '4.spice.train' + >>> data = load_dict(adr=get_dataset_path(file)) + >>> print + (data.__class__) + + >>> data.data + + - Input: + + :param string adr: adresse and name of the loaden file + :param string type: (default value = 'SPiCe') indicate + the structure of the file + :param lrows: number or list of rows, + a list of strings if partial=True; + otherwise, based on self.pref if version="classic" or + "prefix", self.fact otherwise + :type lrows: int or list of int + :param lcolumns: number or list of columns + a list of strings if partial=True ; + otherwise, based on self.suff if version="classic" or "suffix", + self.fact otherwise + :type lcolumns: int or list of int + :param string version: (default = "classic") version name + :param boolean partial: (default value = False) build of partial + + """ + + def __init__(self, data=None, **kwargs): + + # The dictionary that contains the sample + super(DataSample, self).__init__(kwargs) + self._data = None # Metriclearn_array(np.zeros((0,0))) + if data is not None: + self._data = Metriclearn_array(data) + + + @property + def data(self): + """Metriclearn_array""" + + return self._data + + @data.setter + def data(self, data): + if isinstance(data, (Metriclearn_array, np.ndarray, ma.MaskedArray, np.generic)): + self._data = data + else: + raise TypeError("sample should be a Metriclearn_array.") diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/easy_mkl.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/easy_mkl.py index 18c4a0ab1939d0d58720998c6a949364e7b7ceae..6b4a70690f07bc98986d6051aa6c6e8e165effde 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/easy_mkl.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/easy_mkl.py @@ -1,5 +1,7 @@ from MKLpy.algorithms import EasyMKL from MKLpy.metrics import pairwise +from MKLpy.lists import HPK_generator +from MKLpy.algorithms.komd import KOMD import numpy as np from ..multiview.multiview_utils import BaseMultiviewClassifier, get_examples_views_indices @@ -10,34 +12,40 @@ classifier_class_name = "EasyMKLClassifier" class EasyMKLClassifier(BaseMultiviewClassifier, EasyMKL): - def __init__(self, random_state=None, degrees=1, lam=0.1): + def __init__(self, random_state=None, degrees=1, lam=0.1, + learner=KOMD(lam=0.1), generator=HPK_generator(n=10), + multiclass_strategy='ova', verbose=False): super().__init__(random_state) - super(BaseMultiviewClassifier, self).__init__(lam=lam) + super(BaseMultiviewClassifier, self).__init__(lam=lam, + learner=learner, + generator=generator, + multiclass_strategy=multiclass_strategy, + verbose=verbose) self.degrees = degrees self.param_names = ["lam", "degrees"] self.distribs = [CustomUniform(), DegreesGenerator()] - def fit(self, X, y, train_indices=None, views_indices=None ): - train_indices, views_indices = get_examples_views_indices(X, + def fit(self, X, y, train_indices=None, view_indices=None ): + train_indices, view_indices = get_examples_views_indices(X, train_indices, - views_indices) + view_indices) if isinstance(self.degrees, DegreesDistribution): - self.degrees = self.degrees.draw(len(views_indices)) + self.degrees = self.degrees.draw(len(view_indices)) elif isinstance(int, self.degrees): - self.degrees = [self.degrees for _ in range(len(views_indices))] + self.degrees = [self.degrees for _ in range(len(view_indices))] - kernels = [pairwise.homogeneous_polynomial_kernel(X.get_V(views_indices[index], + kernels = [pairwise.homogeneous_polynomial_kernel(X.get_v(view_indices[index], train_indices), degree=degree) for index, degree in enumerate(self.degrees)] return super(EasyMKLClassifier, self).fit(kernels, y[train_indices]) - def predict(self, X, example_indices=None, views_indices=None): - example_indices, views_indices = get_examples_views_indices(X, + def predict(self, X, example_indices=None, view_indices=None): + example_indices, view_indices = get_examples_views_indices(X, example_indices, - views_indices) + view_indices) kernels = [ - pairwise.homogeneous_polynomial_kernel(X.get_V(views_indices[index], + pairwise.homogeneous_polynomial_kernel(X.get_v(view_indices[index], example_indices), degree=degree) for index, degree in enumerate(self.degrees)] @@ -59,4 +67,4 @@ class DegreesDistribution: self.random_state=np.random.RandomState(seed) def draw(self, nb_view): - return self.random_state.randint(low=1,high=10,size=nb_view) + return self.random_state.randint(low=5,high=10,size=nb_view) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/lp_norm_mkl.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/lp_norm_mkl.py new file mode 100644 index 0000000000000000000000000000000000000000..6c83f84a20b1b77795e2c8ca8a2ca493ddfb8c45 --- /dev/null +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/lp_norm_mkl.py @@ -0,0 +1,324 @@ + +from sklearn.metrics import pairwise + +from ..multiview.multiview_utils import BaseMultiviewClassifier, get_examples_views_indices +from ..utils.hyper_parameter_search import CustomUniform, CustomRandint + +classifier_class_name = "LPNormMKL" + +### The following code is a welcome contribution by Riikka Huusari +# (riikka.huusari@lis-lab.fr) that we adapted te create the classifier + + +import numpy as np +from sklearn.base import BaseEstimator +from sklearn.base import ClassifierMixin +from sklearn.utils.multiclass import unique_labels +from sklearn.utils.validation import check_X_y +from .additions.data_sample import Metriclearn_array + + +class MKL(BaseEstimator, ClassifierMixin): + def __init__(self, lmbda, m_param=1.0, use_approx=True, max_rounds=50, + max_diff=0.0001, p=2): + print(lmbda) + # calculate nyström approximation (if used) + self.lmbda = lmbda + self.use_approx = use_approx + self.m_param = m_param + + # Non-optimizable Hyper-params + self.max_rounds = max_rounds + self.max_diff = max_diff + self.p = p + + def fit(self, X, y= None, views_ind=None): + if isinstance(X, Metriclearn_array): + self.X_ = X + elif isinstance(X, dict): + self.X_ = Metriclearn_array(X) + elif isinstance(X, np.ndarray) : + self.X_ = Metriclearn_array(X, views_ind) + self.classes_ = unique_labels(y) + check_X_y(self.X_, y) + self.y_ = y + n = self.X_.shape[0] + self._calc_nystrom(self.X_, n) + C, weights = self.learn_lpMKL() + self.C = C + self.weights = weights + + def learn_lpMKL(self): + + views = self.X_.n_views + X = self.X_ + # p = 2 + n = self.X_.shape[0] + weights = np.ones(views) / (views) + + prevalpha = False + max_diff = 1 + + kernels = np.zeros((views, n, n)) + for v in range(0, views): + kernels[v, :, :] = np.dot(self.U_dict[v], np.transpose(self.U_dict[v])) + + rounds = 0 + stuck = False + while max_diff > self.max_diff and rounds < self.max_rounds and not stuck: + + # gammas are fixed upon arrival to the loop + # -> solve for alpha! + + if self.m_param < 1 and self.use_approx: + combined_kernel = np.zeros((n, n)) + for v in range(0, views): + combined_kernel = combined_kernel + weights[v] * kernels[v] + else: + combined_kernel = np.zeros((n, n)) + for v in range(0, views): + combined_kernel = combined_kernel + weights[v]*X.get_view(v) + # combined kernel includes the weights + + # alpha = (K-lambda*I)^-1 y + C = np.linalg.solve((combined_kernel + self.lmbda * np.eye(n)), self.y_) + + # alpha fixed -> calculate gammas + weights_old = weights.copy() + + # first the ||f_t||^2 todo wtf is the formula used here???? + ft2 = np.zeros(views) + for v in range(0, views): + if self.m_param < 1 and self.use_approx: + # ft2[v,vv] = weights_old[v,vv] * np.dot(np.transpose(C), np.dot(np.dot(np.dot(data.U_dict[v], + # np.transpose(data.U_dict[v])), + # np.dot(data.U_dict[vv], + # np.transpose(data.U_dict[vv]))), C)) + ft2[v] = np.linalg.norm(weights_old[v] * np.dot(kernels[v], C))**2 + else: + ft2[v] = np.linalg.norm(weights_old[v] * np.dot(X.get_view(v), C))**2 + # ft2[v] = weights_old[v] * np.dot(np.transpose(C), np.dot(data.kernel_dict[v], C)) + + # calculate the sum for downstairs + + # print(weights_old) + # print(ft2) + # print(ft2 ** (p / (p + 1.0))) + + downstairs = np.sum(ft2 ** (self.p / (self.p + 1.0))) ** (1.0 / self.p) + # and then the gammas + weights = (ft2 ** (1 / (self.p + 1))) / downstairs + + # convergence + if prevalpha == False: # first time in loop we don't have a previous alpha value + prevalpha = True + diff_alpha = 1 + else: + diff_alpha = np.linalg.norm(C_old - C) / np.linalg.norm(C_old) + max_diff_gamma_prev = max_diff_gamma + + max_diff_gamma = np.max(np.max(np.abs(weights - weights_old))) + + # try to see if convergence is as good as it gets: if it is stuck + if max_diff_gamma < 1e-3 and max_diff_gamma_prev < max_diff_gamma: + # if the gamma difference starts to grow we are most definitely stuck! + # (this condition determined empirically by running algo and observing the convergence) + stuck = True + if rounds > 1 and max_diff_gamma - max_diff_gamma_prev > 1e-2: + # If suddenly the difference starts to grow much + stuck = True + + max_diff = np.max([max_diff_gamma, diff_alpha]) + # print([max_diff_gamma, diff_alpha]) # print if convergence is interesting + C_old = C.copy() + rounds = rounds + 1 + + # print("\nlearned the weights:") + # np.set_printoptions(precision=3, suppress=True) + # print(weights) + # print("") + + # print if resulting convergence is of interest + # print("convergence of ", max_diff, " at step ", rounds, "/500") + + if stuck: + return C_old, weights_old + else: + return C, weights + + + def predict(self, X, views_ind=None): + if isinstance(X, Metriclearn_array): + # self.X_ = X + pass + elif isinstance(X, dict): + X = Metriclearn_array(X) + elif isinstance(X, np.ndarray): + X = Metriclearn_array(X, views_ind) + C = self.C + weights = self.weights + return self.lpMKL_predict(X , C, weights) + + + def lpMKL_predict(self, X, C, weights, views_ind=None): + if isinstance(X, Metriclearn_array): + # self.X_ = X + pass + elif isinstance(X, dict): + X = Metriclearn_array(X) + elif isinstance(X, np.ndarray): + X = Metriclearn_array(X, views_ind) + views = X.n_views + tt = X.shape[0] + m = self.X_.shape[0] # self.m_param * n + + # NO TEST KERNEL APPROXIMATION + # kernel = weights[0] * self.data.test_kernel_dict[0] + # for v in range(1, views): + # kernel = kernel + weights[v] * self.data.test_kernel_dict[v] + + # TEST KERNEL APPROXIMATION + kernel = np.zeros((tt, self.X_.shape[0])) + for v in range(0, views): + if self.m_param < 1: + kernel = kernel + weights[v] * np.dot(np.dot(X.get_view(v)[:, 0:m], self.W_sqrootinv_dict[v]), + np.transpose(self.U_dict[v])) + else: + kernel = kernel + weights[v] * X.get_view(v) + + return np.dot(kernel, C) + + def _calc_nystrom(self, kernels, n_approx): + # calculates the nyström approximation for all the kernels in the given dictionary + self.W_sqrootinv_dict = {} + self.U_dict = {} + for v in range(kernels.n_views): + kernel = kernels.get_view(v) + E = kernel[:, 0:n_approx] + W = E[0:n_approx, :] + Ue, Va, _ = np.linalg.svd(W) + vak = Va[0:n_approx] + inVa = np.diag(vak ** (-0.5)) + U_v = np.dot(E, np.dot(Ue[:, 0:n_approx], inVa)) + self.U_dict[v] = U_v + self.W_sqrootinv_dict[v] = np.dot(Ue[:, 0:n_approx], inVa) + + +class LPNormMKL(BaseMultiviewClassifier, MKL): + def __init__(self, random_state=None, lmbda=0.1, m_param=1, max_rounds=50, + max_diff=0.0001, use_approx=True, kernel_types="rbf_kernel", + kernel_configs=None, p=2, prev_alpha=False): + super().__init__(random_state) + super(BaseMultiviewClassifier, self).__init__(lmbda, m_param, + use_approx, max_rounds, + max_diff, p) + self.param_names = ["lmbda", "kernel_types", "kernel_configs"] + self.distribs = [CustomUniform(), KernelGenerator(), + KernelConfigGenerator()] + self.kernel_types = kernel_types + self.kernel_configs = kernel_configs + + self.prev_alpha = prev_alpha + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, view_indices = get_examples_views_indices(X, train_indices, + view_indices) + nb_view, n = len(view_indices), len(train_indices) + + self.init_kernels(nb_view=nb_view, ) + new_X = {} + for index, (kernel_function, kernel_config, view_index) in enumerate( + zip(self.kernel_functions, self.kernel_configs, view_indices)): + new_X[index] = kernel_function(X.get_v(view_index, + train_indices), + **kernel_config) + return super(LPNormMKL, self).fit(new_X, y[train_indices]) + + def predict(self, X, example_indices=None, view_indices=None): + example_indices, view_indices = get_examples_views_indices(X, + example_indices, + view_indices) + new_X = {} + for index, (kernel_function, kernel_config, view_index) in enumerate( + zip(self.kernel_functions, self.kernel_configs, view_indices)): + new_X[index] = kernel_function(X.get_v(view_index, + example_indices), + **kernel_config) + return super(LPNormMKL, self).predict(new_X) + + def init_kernels(self, nb_view=2, ): + if isinstance(self.kernel_types, KernelDistribution): + self.kernel_functions = self.kernel_types.draw(nb_view) + elif isinstance(self.kernel_types, str): + self.kernel_functions = [getattr(pairwise, self.kernel_types) + for _ in range(nb_view)] + elif isinstance(self.kernel_types, list): + self.kernel_functions = [getattr(pairwise, kernel_type) + for kernel_type in self.kernel_types] + + if isinstance(self.kernel_configs, KernelConfigDistribution): + self.kernel_configs = self.kernel_configs.draw(nb_view) + self.kernel_configs = [kernel_config[kernel_function.__name__] + for kernel_config, kernel_function + in zip(self.kernel_configs, + self.kernel_functions)] + + elif isinstance(self.kernel_configs, dict): + self.kernel_configs = [self.kernel_configs for _ in range(nb_view)] + else: + pass + + +class KernelConfigGenerator: + + def __init__(self): + pass + + def rvs(self, random_state=None): + return KernelConfigDistribution(seed=random_state.randint(1)) + + +class KernelConfigDistribution: + + def __init__(self, seed=42): + self.random_state=np.random.RandomState(seed) + self.possible_config = { + "polynomial_kernel":{"degree": CustomRandint(low=1, high=7), + "gamma": CustomUniform(), + "coef0": CustomUniform() + + }, + "chi2_kernel": {"gamma": CustomUniform()}, + "rbf_kernel": {"gamma": CustomUniform()}, + } + + def draw(self, nb_view): + drawn_params = [{} for _ in range(nb_view)] + for view_index in range(nb_view): + for kernel_name, params_dict in self.possible_config.items(): + drawn_params[view_index][kernel_name] = {} + for param_name, distrib in params_dict.items(): + drawn_params[view_index][kernel_name][param_name] = distrib.rvs(self.random_state) + return drawn_params + + +class KernelGenerator: + + def __init__(self): + pass + + def rvs(self, random_state=None): + return KernelDistribution(seed=random_state.randint(1)) + + +class KernelDistribution: + + def __init__(self, seed=42): + self.random_state=np.random.RandomState(seed) + self.available_kernels = [pairwise.polynomial_kernel, + pairwise.chi2_kernel, + pairwise.rbf_kernel, + ] + + def draw(self, nb_view): + return self.random_state.choice(self.available_kernels, nb_view)