Skip to content
Snippets Groups Projects
Commit ffb86575 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added MKL

parent c87e29b2
No related branches found
No related tags found
No related merge requests found
......@@ -24,7 +24,7 @@ Classification:
classes:
type: ["multiview"]
algos_monoview: ["all"]
algos_multiview: ["mumbo", "easy_mkl"]
algos_multiview: ["mumbo", "lp_norm_mkl"]
stats_iter: 2
metrics: ["accuracy_score", "f1_score"]
metric_princ: "f1_score"
......@@ -207,3 +207,11 @@ mumbo:
easy_mkl:
degrees: [1]
lam: [0.1]
lp_norm_mkl:
lmbda: [0.1]
max_rounds: [50]
max_diff: [0.0001]
kernel_types: ["rbf_kernel"]
kernel_configs:
gamma: [0.1]
......@@ -679,7 +679,6 @@ def exec_benchmark(nb_cores, stats_iter, nb_multiclass,
benchmark_arguments_dictionaries[0])]
else:
for arguments in benchmark_arguments_dictionaries:
print(arguments)
results += [exec_one_benchmark_mono_core(dataset_var=dataset_var, **arguments)]
logging.debug("Done:\t Executing all the needed biclass benchmarks")
......
......@@ -265,6 +265,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds
logging.debug("Start:\t Optimizing hyperparameters")
if hyper_param_search != "None":
print(metrics)
classifier_config = hyper_parameter_search.search_best_settings(
dataset_var, labels, classifier_module, classifier_name,
metrics[0], learning_indices, k_folds, random_state,
......
# -*- coding: utf-8 -*-
"""This module contains the DataSample class and Splearn_array class
The DataSample class encapsulates a sample 's components
nbL and nbEx numbers,
Splearn_array class inherit from numpy ndarray and contains a 2d data ndarray
with the shape
==== ==== ==== ==== ====
x x x x -1
x x x x x
x x -1 -1 -1
x -1 -1 -1 -1
-1 -1 -1 -1 -1
==== ==== ==== ==== ====
where -1 a indicates a empty cell,
the number nbL and nbEx and , the fourth dictionaries for sample,
prefix, suffix and factor where they are computed
"""
import numpy as np
import numpy.ma as ma
class Metriclearn_array(ma.MaskedArray, np.ndarray):
"""
Parameters
----------
data:
view_ind:
Returns
-------
"""
"""Splearn_array inherit from numpy ndarray
:Example:
>>> from metriclearning.datasets.base import load_data
>>> from metriclearning.datasets.get_dataset_path import get_dataset_path
>>> train_file = '' # '4.spice.train'
>>> data = load_data(adr=get_dataset_path(train_file))
>>> print(data.__class__)
>>> data.data
"""
def __new__(cls, data, view_ind=None):
shapes_int = []
index = 0
new_data = np.ndarray([])
n_views = len(data)
thekeys = None
view_ind_self = None
if isinstance(data, dict):
n_views = len(data)
for key, dat_values in data.items():
new_data = cls._populate_new_data(index, dat_values, new_data)
shapes_int.append(dat_values.shape[0])
index += 1
thekeys = data.keys()
if isinstance(data, np.ndarray) and view_ind is None and data.ndim == 1:
n_views = data.shape[0]
for dat_values in data:
shapes_int.append(dat_values.shape[0])
new_data = cls._populate_new_data(index, dat_values, new_data)
index += 1
elif isinstance(data, np.ndarray) and data.ndim > 1:
if view_ind is not None:
n_views = view_ind.shape[0]
shapes_int = [ in2-in1 for in1, in2 in zip(view_ind, view_ind[1: ])]
elif view_ind is None:
if data.shape[1] > 1:
view_ind = np.array([0, data.shape[1]//2, data.shape[1]])
else:
view_ind = np.array([0, data.shape[1]])
view_ind, n_views = cls._validate_views_ind(view_ind,
data.shape[1])
new_data = data
view_ind_self = view_ind
# obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls)
# bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask)
if hasattr(new_data, "mask"):
obj = ma.masked_array(new_data.data, new_data.mask).view(cls)
elif hasattr(new_data, "data") and \
hasattr(new_data, "shape") and len(new_data.shape) > 0:
obj = np.ndarray(new_data.data).view(cls)
else:
obj = np.recarray.__new__(cls, shape=(), dtype=np.float)
obj.views_ind = view_ind_self
obj.shapes_int = shapes_int
obj.n_views = n_views
obj.keys = thekeys
return obj
@staticmethod
def _populate_new_data(index, dat_values, new_data):
if index == 0:
if isinstance(dat_values, ma.MaskedArray) or isinstance(dat_values, np.ndarray):
new_data = dat_values
else:
new_data = dat_values.view(ma.MaskedArray) # ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #(
new_data.mask = ma.nomask
else:
if isinstance(dat_values, ma.MaskedArray) or isinstance(dat_values, np.ndarray):
new_data = ma.hstack((new_data, dat_values))
else:
new_data = ma.hstack((new_data, dat_values.view(ma.MaskedArray) ) ) # ma.masked_array(dat_values, mask=ma.nomask
return new_data
def __array_finalize__(self, obj):
if obj is None: return
super(Metriclearn_array, self).__array_finalize__(obj)
self.shapes_int = getattr(obj, 'shapes_int', None)
self.n_views = getattr(obj, 'n_views', None)
self.keys = getattr(obj, 'keys', None)
self.views_ind_self = getattr(obj, 'views_ind_self', None)
def get_col(self, view, col):
start = np.sum(np.asarray(self.shapes_int[0: view]))
return self.data[start+col, :]
def get_view(self, view):
start = int(np.sum(np.asarray(self.shapes_int[0: view])))
stop = int(start + self.shapes_int[view])
return self.data[:, start:stop]
def set_view(self, view, data):
start = int(np.sum(np.asarray(self.shapes_int[0: view])))
stop = int(start + self.shapes_int[view])
if stop-start == data.shape[0] and data.shape[1]== self.data.shape[1]:
self.data[:, start:stop] = data
else:
raise ValueError(
"shape of data does not match (%d, %d)" %stop-start %self.data.shape[1])
def get_raw(self, view, raw):
start = np.sum(np.asarray(self.shapes_int[0: view]))
stop = np.sum(np.asarray(self.shapes_int[0: view+1]))
return self.data[start:stop, raw]
def add_view(self, v, data):
if len(self.shape) > 0:
if data.shape[0] == self.data.shape[0]:
indice = self.shapes_int[v]
np.insert(self.data, data, indice+1, axis=0)
self.shapes_int.append(data.shape[1])
self.n_views +=1
else:
raise ValueError("New view can't initialazed")
# self.shapes_int= [data.shape[1]]
# self.data.reshape(data.shape[0],)
# np.insert(self.data, data, 0)
# self.n_views = 1
def _todict(self):
dico = {}
for view in range(self.n_views):
dico[view] = self.X.get_view(view)
return dico
def _validate_views_ind(self, views_ind, n_features):
"""Ensure proper format for views_ind and return number of views."""
views_ind = np.array(views_ind)
if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1:
if np.any(views_ind[:-1] >= views_ind[1:]):
raise ValueError("Values in views_ind must be sorted.")
if views_ind[0] < 0 or views_ind[-1] > n_features:
raise ValueError("Values in views_ind are not in a correct "
+ "range for the provided data.")
self.view_mode_ = "slices"
n_views = views_ind.shape[0]-1
else:
raise ValueError("The format of views_ind is not "
+ "supported.")
return (views_ind, n_views)
class DataSample(dict):
""" A DataSample instance
:Example:
>>> from metriclearning.datasets.base import load_dict
>>> from metriclearning.datasets.tests.get_dataset_path import get_dataset_path
>>> file = 'input_x_dic.pkl' # '4.spice.train'
>>> data = load_dict(adr=get_dataset_path(file))
>>> print
(data.__class__)
>>> data.data
- Input:
:param string adr: adresse and name of the loaden file
:param string type: (default value = 'SPiCe') indicate
the structure of the file
:param lrows: number or list of rows,
a list of strings if partial=True;
otherwise, based on self.pref if version="classic" or
"prefix", self.fact otherwise
:type lrows: int or list of int
:param lcolumns: number or list of columns
a list of strings if partial=True ;
otherwise, based on self.suff if version="classic" or "suffix",
self.fact otherwise
:type lcolumns: int or list of int
:param string version: (default = "classic") version name
:param boolean partial: (default value = False) build of partial
"""
def __init__(self, data=None, **kwargs):
# The dictionary that contains the sample
super(DataSample, self).__init__(kwargs)
self._data = None # Metriclearn_array(np.zeros((0,0)))
if data is not None:
self._data = Metriclearn_array(data)
@property
def data(self):
"""Metriclearn_array"""
return self._data
@data.setter
def data(self, data):
if isinstance(data, (Metriclearn_array, np.ndarray, ma.MaskedArray, np.generic)):
self._data = data
else:
raise TypeError("sample should be a Metriclearn_array.")
from MKLpy.algorithms import EasyMKL
from MKLpy.metrics import pairwise
from MKLpy.lists import HPK_generator
from MKLpy.algorithms.komd import KOMD
import numpy as np
from ..multiview.multiview_utils import BaseMultiviewClassifier, get_examples_views_indices
......@@ -10,34 +12,40 @@ classifier_class_name = "EasyMKLClassifier"
class EasyMKLClassifier(BaseMultiviewClassifier, EasyMKL):
def __init__(self, random_state=None, degrees=1, lam=0.1):
def __init__(self, random_state=None, degrees=1, lam=0.1,
learner=KOMD(lam=0.1), generator=HPK_generator(n=10),
multiclass_strategy='ova', verbose=False):
super().__init__(random_state)
super(BaseMultiviewClassifier, self).__init__(lam=lam)
super(BaseMultiviewClassifier, self).__init__(lam=lam,
learner=learner,
generator=generator,
multiclass_strategy=multiclass_strategy,
verbose=verbose)
self.degrees = degrees
self.param_names = ["lam", "degrees"]
self.distribs = [CustomUniform(), DegreesGenerator()]
def fit(self, X, y, train_indices=None, views_indices=None ):
train_indices, views_indices = get_examples_views_indices(X,
def fit(self, X, y, train_indices=None, view_indices=None ):
train_indices, view_indices = get_examples_views_indices(X,
train_indices,
views_indices)
view_indices)
if isinstance(self.degrees, DegreesDistribution):
self.degrees = self.degrees.draw(len(views_indices))
self.degrees = self.degrees.draw(len(view_indices))
elif isinstance(int, self.degrees):
self.degrees = [self.degrees for _ in range(len(views_indices))]
self.degrees = [self.degrees for _ in range(len(view_indices))]
kernels = [pairwise.homogeneous_polynomial_kernel(X.get_V(views_indices[index],
kernels = [pairwise.homogeneous_polynomial_kernel(X.get_v(view_indices[index],
train_indices),
degree=degree)
for index, degree in enumerate(self.degrees)]
return super(EasyMKLClassifier, self).fit(kernels, y[train_indices])
def predict(self, X, example_indices=None, views_indices=None):
example_indices, views_indices = get_examples_views_indices(X,
def predict(self, X, example_indices=None, view_indices=None):
example_indices, view_indices = get_examples_views_indices(X,
example_indices,
views_indices)
view_indices)
kernels = [
pairwise.homogeneous_polynomial_kernel(X.get_V(views_indices[index],
pairwise.homogeneous_polynomial_kernel(X.get_v(view_indices[index],
example_indices),
degree=degree)
for index, degree in enumerate(self.degrees)]
......@@ -59,4 +67,4 @@ class DegreesDistribution:
self.random_state=np.random.RandomState(seed)
def draw(self, nb_view):
return self.random_state.randint(low=1,high=10,size=nb_view)
return self.random_state.randint(low=5,high=10,size=nb_view)
from sklearn.metrics import pairwise
from ..multiview.multiview_utils import BaseMultiviewClassifier, get_examples_views_indices
from ..utils.hyper_parameter_search import CustomUniform, CustomRandint
classifier_class_name = "LPNormMKL"
### The following code is a welcome contribution by Riikka Huusari
# (riikka.huusari@lis-lab.fr) that we adapted te create the classifier
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.utils.multiclass import unique_labels
from sklearn.utils.validation import check_X_y
from .additions.data_sample import Metriclearn_array
class MKL(BaseEstimator, ClassifierMixin):
def __init__(self, lmbda, m_param=1.0, use_approx=True, max_rounds=50,
max_diff=0.0001, p=2):
print(lmbda)
# calculate nyström approximation (if used)
self.lmbda = lmbda
self.use_approx = use_approx
self.m_param = m_param
# Non-optimizable Hyper-params
self.max_rounds = max_rounds
self.max_diff = max_diff
self.p = p
def fit(self, X, y= None, views_ind=None):
if isinstance(X, Metriclearn_array):
self.X_ = X
elif isinstance(X, dict):
self.X_ = Metriclearn_array(X)
elif isinstance(X, np.ndarray) :
self.X_ = Metriclearn_array(X, views_ind)
self.classes_ = unique_labels(y)
check_X_y(self.X_, y)
self.y_ = y
n = self.X_.shape[0]
self._calc_nystrom(self.X_, n)
C, weights = self.learn_lpMKL()
self.C = C
self.weights = weights
def learn_lpMKL(self):
views = self.X_.n_views
X = self.X_
# p = 2
n = self.X_.shape[0]
weights = np.ones(views) / (views)
prevalpha = False
max_diff = 1
kernels = np.zeros((views, n, n))
for v in range(0, views):
kernels[v, :, :] = np.dot(self.U_dict[v], np.transpose(self.U_dict[v]))
rounds = 0
stuck = False
while max_diff > self.max_diff and rounds < self.max_rounds and not stuck:
# gammas are fixed upon arrival to the loop
# -> solve for alpha!
if self.m_param < 1 and self.use_approx:
combined_kernel = np.zeros((n, n))
for v in range(0, views):
combined_kernel = combined_kernel + weights[v] * kernels[v]
else:
combined_kernel = np.zeros((n, n))
for v in range(0, views):
combined_kernel = combined_kernel + weights[v]*X.get_view(v)
# combined kernel includes the weights
# alpha = (K-lambda*I)^-1 y
C = np.linalg.solve((combined_kernel + self.lmbda * np.eye(n)), self.y_)
# alpha fixed -> calculate gammas
weights_old = weights.copy()
# first the ||f_t||^2 todo wtf is the formula used here????
ft2 = np.zeros(views)
for v in range(0, views):
if self.m_param < 1 and self.use_approx:
# ft2[v,vv] = weights_old[v,vv] * np.dot(np.transpose(C), np.dot(np.dot(np.dot(data.U_dict[v],
# np.transpose(data.U_dict[v])),
# np.dot(data.U_dict[vv],
# np.transpose(data.U_dict[vv]))), C))
ft2[v] = np.linalg.norm(weights_old[v] * np.dot(kernels[v], C))**2
else:
ft2[v] = np.linalg.norm(weights_old[v] * np.dot(X.get_view(v), C))**2
# ft2[v] = weights_old[v] * np.dot(np.transpose(C), np.dot(data.kernel_dict[v], C))
# calculate the sum for downstairs
# print(weights_old)
# print(ft2)
# print(ft2 ** (p / (p + 1.0)))
downstairs = np.sum(ft2 ** (self.p / (self.p + 1.0))) ** (1.0 / self.p)
# and then the gammas
weights = (ft2 ** (1 / (self.p + 1))) / downstairs
# convergence
if prevalpha == False: # first time in loop we don't have a previous alpha value
prevalpha = True
diff_alpha = 1
else:
diff_alpha = np.linalg.norm(C_old - C) / np.linalg.norm(C_old)
max_diff_gamma_prev = max_diff_gamma
max_diff_gamma = np.max(np.max(np.abs(weights - weights_old)))
# try to see if convergence is as good as it gets: if it is stuck
if max_diff_gamma < 1e-3 and max_diff_gamma_prev < max_diff_gamma:
# if the gamma difference starts to grow we are most definitely stuck!
# (this condition determined empirically by running algo and observing the convergence)
stuck = True
if rounds > 1 and max_diff_gamma - max_diff_gamma_prev > 1e-2:
# If suddenly the difference starts to grow much
stuck = True
max_diff = np.max([max_diff_gamma, diff_alpha])
# print([max_diff_gamma, diff_alpha]) # print if convergence is interesting
C_old = C.copy()
rounds = rounds + 1
# print("\nlearned the weights:")
# np.set_printoptions(precision=3, suppress=True)
# print(weights)
# print("")
# print if resulting convergence is of interest
# print("convergence of ", max_diff, " at step ", rounds, "/500")
if stuck:
return C_old, weights_old
else:
return C, weights
def predict(self, X, views_ind=None):
if isinstance(X, Metriclearn_array):
# self.X_ = X
pass
elif isinstance(X, dict):
X = Metriclearn_array(X)
elif isinstance(X, np.ndarray):
X = Metriclearn_array(X, views_ind)
C = self.C
weights = self.weights
return self.lpMKL_predict(X , C, weights)
def lpMKL_predict(self, X, C, weights, views_ind=None):
if isinstance(X, Metriclearn_array):
# self.X_ = X
pass
elif isinstance(X, dict):
X = Metriclearn_array(X)
elif isinstance(X, np.ndarray):
X = Metriclearn_array(X, views_ind)
views = X.n_views
tt = X.shape[0]
m = self.X_.shape[0] # self.m_param * n
# NO TEST KERNEL APPROXIMATION
# kernel = weights[0] * self.data.test_kernel_dict[0]
# for v in range(1, views):
# kernel = kernel + weights[v] * self.data.test_kernel_dict[v]
# TEST KERNEL APPROXIMATION
kernel = np.zeros((tt, self.X_.shape[0]))
for v in range(0, views):
if self.m_param < 1:
kernel = kernel + weights[v] * np.dot(np.dot(X.get_view(v)[:, 0:m], self.W_sqrootinv_dict[v]),
np.transpose(self.U_dict[v]))
else:
kernel = kernel + weights[v] * X.get_view(v)
return np.dot(kernel, C)
def _calc_nystrom(self, kernels, n_approx):
# calculates the nyström approximation for all the kernels in the given dictionary
self.W_sqrootinv_dict = {}
self.U_dict = {}
for v in range(kernels.n_views):
kernel = kernels.get_view(v)
E = kernel[:, 0:n_approx]
W = E[0:n_approx, :]
Ue, Va, _ = np.linalg.svd(W)
vak = Va[0:n_approx]
inVa = np.diag(vak ** (-0.5))
U_v = np.dot(E, np.dot(Ue[:, 0:n_approx], inVa))
self.U_dict[v] = U_v
self.W_sqrootinv_dict[v] = np.dot(Ue[:, 0:n_approx], inVa)
class LPNormMKL(BaseMultiviewClassifier, MKL):
def __init__(self, random_state=None, lmbda=0.1, m_param=1, max_rounds=50,
max_diff=0.0001, use_approx=True, kernel_types="rbf_kernel",
kernel_configs=None, p=2, prev_alpha=False):
super().__init__(random_state)
super(BaseMultiviewClassifier, self).__init__(lmbda, m_param,
use_approx, max_rounds,
max_diff, p)
self.param_names = ["lmbda", "kernel_types", "kernel_configs"]
self.distribs = [CustomUniform(), KernelGenerator(),
KernelConfigGenerator()]
self.kernel_types = kernel_types
self.kernel_configs = kernel_configs
self.prev_alpha = prev_alpha
def fit(self, X, y, train_indices=None, view_indices=None):
train_indices, view_indices = get_examples_views_indices(X, train_indices,
view_indices)
nb_view, n = len(view_indices), len(train_indices)
self.init_kernels(nb_view=nb_view, )
new_X = {}
for index, (kernel_function, kernel_config, view_index) in enumerate(
zip(self.kernel_functions, self.kernel_configs, view_indices)):
new_X[index] = kernel_function(X.get_v(view_index,
train_indices),
**kernel_config)
return super(LPNormMKL, self).fit(new_X, y[train_indices])
def predict(self, X, example_indices=None, view_indices=None):
example_indices, view_indices = get_examples_views_indices(X,
example_indices,
view_indices)
new_X = {}
for index, (kernel_function, kernel_config, view_index) in enumerate(
zip(self.kernel_functions, self.kernel_configs, view_indices)):
new_X[index] = kernel_function(X.get_v(view_index,
example_indices),
**kernel_config)
return super(LPNormMKL, self).predict(new_X)
def init_kernels(self, nb_view=2, ):
if isinstance(self.kernel_types, KernelDistribution):
self.kernel_functions = self.kernel_types.draw(nb_view)
elif isinstance(self.kernel_types, str):
self.kernel_functions = [getattr(pairwise, self.kernel_types)
for _ in range(nb_view)]
elif isinstance(self.kernel_types, list):
self.kernel_functions = [getattr(pairwise, kernel_type)
for kernel_type in self.kernel_types]
if isinstance(self.kernel_configs, KernelConfigDistribution):
self.kernel_configs = self.kernel_configs.draw(nb_view)
self.kernel_configs = [kernel_config[kernel_function.__name__]
for kernel_config, kernel_function
in zip(self.kernel_configs,
self.kernel_functions)]
elif isinstance(self.kernel_configs, dict):
self.kernel_configs = [self.kernel_configs for _ in range(nb_view)]
else:
pass
class KernelConfigGenerator:
def __init__(self):
pass
def rvs(self, random_state=None):
return KernelConfigDistribution(seed=random_state.randint(1))
class KernelConfigDistribution:
def __init__(self, seed=42):
self.random_state=np.random.RandomState(seed)
self.possible_config = {
"polynomial_kernel":{"degree": CustomRandint(low=1, high=7),
"gamma": CustomUniform(),
"coef0": CustomUniform()
},
"chi2_kernel": {"gamma": CustomUniform()},
"rbf_kernel": {"gamma": CustomUniform()},
}
def draw(self, nb_view):
drawn_params = [{} for _ in range(nb_view)]
for view_index in range(nb_view):
for kernel_name, params_dict in self.possible_config.items():
drawn_params[view_index][kernel_name] = {}
for param_name, distrib in params_dict.items():
drawn_params[view_index][kernel_name][param_name] = distrib.rvs(self.random_state)
return drawn_params
class KernelGenerator:
def __init__(self):
pass
def rvs(self, random_state=None):
return KernelDistribution(seed=random_state.randint(1))
class KernelDistribution:
def __init__(self, seed=42):
self.random_state=np.random.RandomState(seed)
self.available_kernels = [pairwise.polynomial_kernel,
pairwise.chi2_kernel,
pairwise.rbf_kernel,
]
def draw(self, nb_view):
return self.random_state.choice(self.available_kernels, nb_view)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment