Skip to content
Snippets Groups Projects
Commit 40c3cbbe authored by Dominique Benielli's avatar Dominique Benielli
Browse files

modif estimator MVML

parent ddeba9f2
No related branches found
No related tags found
No related merge requests found
Pipeline #3972 failed
......@@ -106,27 +106,27 @@ mvml.fit(kernel_dict, Y[0:n_tr])
#
pred1 = np.sign(mvml.predict(test_kernel_dict) ) # take sign for classification result
pred1 = mvml.predict(test_kernel_dict)
#
# without approximation
mvml2 = MVML(lmbda=0.1, eta=1, nystrom_param=1, kernel='precomputed') # without approximation
mvml2.fit(kernel_dict, Y[0:n_tr])
pred2 = np.sign(mvml2.predict(test_kernel_dict))
pred2 = mvml2.predict(test_kernel_dict)
#
# use MVML_Cov, don't learn w
mvml3 = MVML(lmbda=0.1, eta=1,learn_A=3, nystrom_param=1, kernel='precomputed')
mvml3.fit(kernel_dict, Y[0:n_tr])
pred3 = np.sign(mvml3.predict(test_kernel_dict))
pred3 = mvml3.predict(test_kernel_dict)
#
# use MVML_I, don't learn w
mvml4 = MVML(lmbda=0.1, eta=1,learn_A=4, nystrom_param=1, kernel='precomputed')
mvml4.fit(kernel_dict, Y[0:n_tr])
pred4 = np.sign(mvml4.predict(test_kernel_dict))
pred4 = mvml4.predict(test_kernel_dict)
#
# use kernel rbf equivalent to case 1
mvml5 = MVML(lmbda=0.1, eta=1, nystrom_param=0.2, kernel='rbf')
mvml5.fit(x_dict, Y[0:n_tr])
pred5 = np.sign(mvml5.predict(test_x_dict))
pred5 = mvml5.predict(test_x_dict)
#
#
# # =========== show results ============
......
......@@ -59,6 +59,7 @@ class UBoosting(metaclass=ABCMeta):
if not isinstance(X_, MultiModalData):
try:
X_ = np.asarray(X)
X_ = MultiModalArray(X_)
except Exception as e:
raise TypeError('Reshape your data')
return X_
......@@ -280,8 +280,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
shapes_int: list of int numbers of feature for each views
keys : name of key, where data come from a dictionary
:Example:
......@@ -295,8 +293,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
>>> multiviews = MultiModalArray(data)
>>> multiviews.shape
(120, 240)
>>> multiviews.keys
dict_keys([0, 1])
>>> multiviews.shapes_int
[120, 120]
>>> multiviews.n_views
......@@ -313,16 +309,22 @@ class MultiModalArray(np.ndarray, MultiModalData):
thekeys = None
# view_ind_self = None
view_mode = 'slices'
if isinstance(data, dict):
if isinstance(data, dict) and not isinstance(data, sp.dok_matrix):
n_views = len(data)
view_ind = [0]
for key, dat_values in data.items():
dat_values = np.asarray(dat_values)
if dat_values.ndim < 2:
dat_values = dat_values.reshape(1, dat_values.shape[0])
new_data = cls._populate_new_data(index, dat_values, new_data)
shapes_int.append(dat_values.shape[1])
view_ind.append(dat_values.shape[1] + view_ind[index])
index += 1
thekeys = data.keys()
if new_data.ndim < 2 :
raise ValueError('Reshape your data')
if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()):
raise ValueError('Reshape your data')
elif isinstance(data, np.ndarray) and view_ind is None and data.ndim == 1:
try:
dat0 = np.array(data[0])
......@@ -359,15 +361,13 @@ class MultiModalArray(np.ndarray, MultiModalData):
else:
try:
new_data = np.asarray(data)
# if new_data.ndim == 1:
# new_data = new_data.reshape(1, new_data.shape[0])
if view_ind is None:
view_ind = np.array([0, new_data.shape[1]])
except Exception as e:
raise ValueError('Reshape your data')
if new_data.ndim < 2 :
raise ValueError('Reshape your data')
if new_data.ndim > 1 and new_data.shape == (1, 1):
if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()):
raise ValueError('Reshape your data')
if view_ind.ndim < 2 and new_data.ndim <2 and view_ind[-1] > new_data.shape[1]:
raise ValueError('Reshape your data')
......@@ -396,7 +396,7 @@ class MultiModalArray(np.ndarray, MultiModalData):
obj.views_ind = view_ind
obj.shapes_int = shapes_int
obj.n_views = n_views
obj.keys = thekeys
# obj.keys = thekeys
return obj
@staticmethod
......@@ -443,7 +443,7 @@ class MultiModalArray(np.ndarray, MultiModalData):
# super(MultiModalArray, self).__array_finalize__(obj)
self.shapes_int = getattr(obj, 'shapes_int', None)
self.n_views = getattr(obj, 'n_views', None)
self.keys = getattr(obj, 'keys', None)
# self.keys = getattr(obj, 'keys', None)
self.views_ind = getattr(obj, 'views_ind', None)
self.view_mode_ = getattr(obj, 'view_mode_', None)
......
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.metrics.pairwise import pairwise_kernels, PAIRWISE_KERNEL_FUNCTIONS
from abc import ABCMeta
from multimodal.datasets.data_sample import DataSample, MultiModalArray
......@@ -21,6 +21,9 @@ class MKernel(metaclass=ABCMeta):
kernel_params : list of dict of corresponding kernels
params KERNEL_PARAMS
"""
def _check_kernel(self):
if self.kernel not in PAIRWISE_KERNEL_FUNCTIONS.keys():
raise ValueError(self.kernel + "is not a availlable kernel")
def _get_kernel(self, X, Y=None, v=0):
met =None
......@@ -67,34 +70,38 @@ class MKernel(metaclass=ABCMeta):
K_ dict of kernels
"""
kernel_dict = {}
flag_sparse = False
X_ = None
y = None
if Y is None:
y = Y
if isinstance(X, np.ndarray) and X.ndim == 1:
if isinstance(X, sp.sparse.spmatrix):
raise TypeError("Nonsensical Error: no sparse data are allowed as input")
else:
X_= MultiModalArray(X, views_ind)
for v in range(X.shape[0]):
if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X_, Y, v)
kernel_dict[v] = self._get_kernel(X[v], y)
elif isinstance(X, dict):
X_= MultiModalArray(X)
for v in X.keys():
if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X_, Y, v)
kernel_dict[v] = self._get_kernel(X[v], y)
elif isinstance(X, np.ndarray) and X.ndim > 1:
X_ = MultiModalArray(X, views_ind)
X = X_
if isinstance(X, MultiModalArray):
X_ = X
if not isinstance(X_, MultiModalArray):
try:
X_ = np.asarray(X)
X_ = MultiModalArray(X_)
except Exception as e:
pass
# raise TypeError('Reshape your data')
if isinstance(X_, MultiModalArray):
for v in range(X.n_views):
if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X, Y, v)
kernel_dict[v] = self._get_kernel(X.get_view(v), y)
X_= X
kernel_dict[v] = self._get_kernel(X_.get_view(v), y)
if not isinstance(X_, MultiModalArray):
raise TypeError("Input format is not reconized")
if sp.sparse.issparse(X):
raise TypeError("Nonsensical Error: no sparse data are allowed as input")
raise TypeError('Reshape your data')
K_ = MultiModalArray(kernel_dict)
return X_, K_
def _calc_nystrom(self, kernels, n_approx):
# calculates the nyström approximation for all the kernels in the given dictionary
self.W_sqrootinv_dict = {}
......
......@@ -8,7 +8,8 @@ from sklearn.utils.multiclass import unique_labels
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.utils.validation import check_X_y
from sklearn.utils.validation import check_array
from sklearn.metrics.pairwise import check_pairwise_arrays
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.multiclass import type_of_target
from sklearn.utils.validation import check_is_fitted
from multimodal.datasets.data_sample import DataSample, MultiModalArray
from multimodal.kernels.mkernel import MKernel
......@@ -30,7 +31,9 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin):
Parameters
----------
regression_params: array/list of regression parameters, first for basic regularization, second for
lmbda : float regression_params lmbda (default = 0.1) for basic regularization
eta : float regression_params eta (default = 1), first for basic regularization,
regularization of A (not necessary if A is not learned)
kernel : list of str (default: "precomputed") if kernel is as input of fit function set kernel to
......@@ -58,6 +61,10 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin):
Attributes
----------
lmbda : float regression_params lmbda (default = 0.1)
eta : float regression_params eta (default = 1)
regression_params : array/list of regression parameters
kernel : list or str indicate the metrics used for each kernels
......@@ -89,14 +96,16 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin):
y_ : array-like, shape = (n_samples,)
Target values (class labels).
regression_ : if the classifier is used as regression (default : False)
"""
# r_cond = 10-30
def __init__(self, lmbda, eta, nystrom_param, kernel="precomputed",
def __init__(self, lmbda=0.1, eta=1, nystrom_param=1.0, kernel="linear",
kernel_params=None,
learn_A=1, learn_w=0, precision=1E-4, n_loops=6):
super(MVML, self).__init__()
# calculate nyström approximation (if used)
self.nystrom_param = nystrom_param
self.lmbda = lmbda
......@@ -110,6 +119,10 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin):
self.precision = precision
self.warning_message = {}
def _more_tags(self):
return {'X_types': ["2darray"], 'binary_only': True,
'multilabel' : False}
def fit(self, X, y= None, views_ind=None):
"""
Fit the MVML classifier
......@@ -153,11 +166,23 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin):
# Check that X and y have correct shape
# Store the classes seen during fit
self.regression_ = False
self.X_, self.K_= self._global_kernel_transform(X, views_ind=views_ind)
check_X_y(self.X_, y)
# if type_of_target(y) not in "binary":
# raise ValueError("target should be binary")
check_classification_targets(y)
self.classes_ = unique_labels(y)
if type_of_target(y) in "binary":
self.classes_, y = np.unique(y, return_inverse=True)
y[y==0] = -1.0
elif type_of_target(y) in "continuous":
y = y.astype(float)
self.regression_ = True
else:
raise ValueError("MVML algorithms is a binary classifier"
" or performs regression with float target")
self.y_ = y
# n = X[0].shape[0]
......@@ -169,14 +194,14 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin):
self.U_dict = self.K_._todict()
# Return the classifier
self.A, self.g, self.w = self.learn_mvml(learn_A=self.learn_A, learn_w=self.learn_w, n_loops=self.n_loops)
self.A, self.g, self.w = self._learn_mvml(learn_A=self.learn_A, learn_w=self.learn_w, n_loops=self.n_loops)
if self.warning_message:
import logging
logging.warning("warning appears during fit process" + str(self.warning_message))
# print("warning appears during fit process", self.warning_message)
return self
def learn_mvml(self, learn_A=1, learn_w=0, n_loops=6):
def _learn_mvml(self, learn_A=1, learn_w=0, n_loops=6):
"""
Parameters
......@@ -402,13 +427,22 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin):
Predicted classes.
"""
check_is_fitted(self, ['X_', 'U_dict', 'K_', 'y_']) # , 'U_dict', 'K_' 'y_'
X , test_kernels = self._global_kernel_transform(X,
views_ind=views_ind,
Y=self.X_)
X, test_kernels = self._global_kernel_transform(X,
views_ind=views_ind,
Y=self.X_)
check_array(X)
return self.predict_mvml(test_kernels, self.g, self.w)
pred = self._predict_mvml(test_kernels, self.g, self.w).squeeze()
if self.regression_:
return pred
else:
pred = np.sign(pred)
pred[pred==-1] = 0
pred = pred.astype(int)
return np.take(self.classes_, pred)
def predict_mvml(self, test_kernels, g, w):
def _predict_mvml(self, test_kernels, g, w):
"""
Parameters
......
......@@ -66,17 +66,6 @@ class TestMuCumboClassifier(unittest.TestCase):
np.testing.assert_equal(beta_class, expected_beta_class)
np.testing.assert_equal(score_function, np.zeros((n_views, 4, n_classes)))
# def test_compute_edge_global(self):
# cost_global = np.array([[-2, 1, 1], [1, 1, -2], [1, -2, 1], [1, 1, -2]],
# dtype=np.float64)
# predicted_classes = np.array([[0, 0, 1, 1], [0, 1, 0, 2], [2, 2, 0, 0]])
# y = np.array([0, 2, 1, 2])
# expected_edge_global = np.array([0.25, 0.25, -0.125])
#
# clf = MuCumboClassifier()
# edge_global = clf._compute_edge_global(cost_global, predicted_classes, y)
# np.testing.assert_equal(edge_global, expected_edge_global)
def test_compute_dist(self):
cost = np.array(
[[[-2, 1, 1], [-1, -1, -2], [1, -2, 1], [1, 1, -2]],
......@@ -258,63 +247,7 @@ class TestMuCumboClassifier(unittest.TestCase):
np.testing.assert_almost_equal(np.sum(betas, axis =1), np.ones(3, dtype=np.float), 9)
self.assertTrue(np.all(betas <= 1) )
self.assertTrue(np.all(betas >= 0) )
#
#
# def test_compute_cost_global():betas.
#
# decimal = 12
# label_score_glob = np.array(
# [[-1, -2, 4], [-8, 1, 4], [2, 8, -4], [2, -1, 4]],
# dtype=np.float64)
# best_pred_classes = np.array([0, 1, 0, 2])
# y = np.array([0, 2, 1, 2])
# alpha = 0.5
# expected_label_score_glob = np.array(
# [[-0.5, -2, 4], [-8, 1.5, 4], [2.5, 8, -4], [2, -1, 4.5]],
# dtype=np.float64)
#
# clf = MuCumboClassifier()
# cost_glob, label_score_glob = clf._compute_cost_global(
# label_score_glob, best_pred_classes, y, alpha)
#
# assert_array_almost_equal(label_score_glob, expected_label_score_glob,
# decimal=decimal)
#
# label_score_glob = np.zeros((4, 3), dtype=np.float64)
# alpha = 0.
# expected_label_score_glob = np.zeros((4, 3), dtype=np.float64)
# expected_cost_glob = np.array(
# [[-2, 1, 1], [1, 1, -2], [1, -2, 1], [1, 1, -2]],
# dtype=np.float64)
#
# cost_glob, label_score_glob = clf._compute_cost_global(
# label_score_glob, best_pred_classes, y, alpha)
#
# assert_array_equal(label_score_glob, expected_label_score_glob)
# assert_array_almost_equal(cost_glob, expected_cost_glob, decimal=decimal)
#
# label_score_glob = np.array(
# [[0, 0, np.log(4)], [np.log(8), 0, 0], [0, 0, 0], [0, 0, 0]],
# dtype=np.float64)
# alpha = np.log(2)
# expected_label_score_glob = np.array(
# [[alpha, 0, np.log(4)],
# [np.log(8), alpha, 0],
# [alpha, 0, 0],
# [0, 0, alpha]],
# dtype=np.float64)
# expected_cost_glob = np.array(
# [[-2.5, 0.5, 2.], [8., 2., -10.], [2., -3., 1.], [0.5, 0.5, -1.]],
# dtype=np.float64)
#
# cost_glob, label_score_glob = clf._compute_cost_global(
# label_score_glob, best_pred_classes, y, alpha)
#
# assert_array_almost_equal(label_score_glob, expected_label_score_glob,
# decimal=decimal)
# assert_array_almost_equal(cost_glob, expected_cost_glob, decimal=decimal)
# def test_compute_beta(self):
def test_indicatrice(self):
clf = MuCumboClassifier()
......@@ -524,17 +457,6 @@ class TestMuCumboClassifier(unittest.TestCase):
clf = MuCumboClassifier()
clf.fit(X, y)
np.testing.assert_equal(clf.predict(X), y)
# assert_array_equal(clf.predict(X), y)
#
# y = [1, 0]
# clf = MuCumboClassifier()
# clf.fit(X, y, views_ind)
# assert_array_equal(clf.predict(X), y)
#
# y = (2, 1)
# clf = MuCumboClassifier()
# clf.fit(X, y, views_ind)
# assert_array_equal(clf.predict(X), y)
#
# # Check that misformed or inconsistent inputs raise expections
X = np.zeros((5, 4, 2))
......@@ -1050,8 +972,6 @@ class TestMuCumboClassifier(unittest.TestCase):
# else:
# assert_true(all([type_ == csr_matrix for type_ in types]))
#
# def test():
# cumbo = MuCumboClassifier()
if __name__ == '__main__':
unittest.main()
......
......@@ -38,8 +38,6 @@ class UnitaryTest(unittest.TestCase):
self.assertEqual(a.shape, (120, 240))
self.assertEqual(a.shapes_int, [120, 120])
self.assertEqual(a.n_views, 2)
dict_key = {0: 'a',1: 'b' }
self.assertEqual(a.keys, dict_key.keys())
def test_init_Array(self):
a = MultiModalArray(self.kernel_dict)
......
......@@ -9,7 +9,7 @@ from sklearn.exceptions import NotFittedError
from multimodal.datasets.data_sample import MultiModalArray
from multimodal.kernels.mvml import MVML
from multimodal.tests.datasets.get_dataset_path import get_dataset_path
from sklearn.utils.estimator_checks import check_estimator
class MVMLTest(unittest.TestCase):
......@@ -96,7 +96,7 @@ class MVMLTest(unittest.TestCase):
self.assertEqual(mvml2.A.shape, (240, 240))
self.assertEqual(mvml2.g.shape,(240, 1))
np.testing.assert_almost_equal(mvml2.w, w_expected, 8)
with self.assertRaises(TypeError):
with self.assertRaises(ValueError):
mvml2.fit([1, 2, 3])
def testFitMVMLMetric_learA4(self):
......@@ -111,7 +111,7 @@ class MVMLTest(unittest.TestCase):
self.assertEqual(mvml2.A.shape, (240, 240))
self.assertEqual(mvml2.g.shape,(240, 1))
np.testing.assert_almost_equal(mvml2.w, w_expected, 8)
with self.assertRaises(TypeError):
with self.assertRaises(ValueError):
mvml2.fit([1, 2, 3])
def testFitMVMLMetric_learA3(self):
......@@ -126,7 +126,7 @@ class MVMLTest(unittest.TestCase):
self.assertEqual(mvml2.A.shape, (240, 240))
self.assertEqual(mvml2.g.shape,(240, 1))
np.testing.assert_almost_equal(mvml2.w, w_expected, 8)
with self.assertRaises(TypeError):
with self.assertRaises(ValueError):
mvml2.fit([1, 2, 3])
def testFitMVMLMetric_PredictA2(self):
......@@ -142,7 +142,7 @@ class MVMLTest(unittest.TestCase):
self.assertEqual(mvml2.g.shape,(144, 1))
np.testing.assert_almost_equal(mvml2.w, w_expected, 0)
pred = mvml2.predict(self.test_kernel_dict)
self.assertEqual(pred.shape, (80,1))
self.assertEqual(pred.shape, (80,))
def testFitMVMLMetric_PredictA1(self):
#######################################################
......@@ -157,7 +157,7 @@ class MVMLTest(unittest.TestCase):
self.assertEqual(mvml2.g.shape,(144, 1))
np.testing.assert_almost_equal(mvml2.w, w_expected, 0)
pred = mvml2.predict(self.test_kernel_dict)
self.assertEqual(pred.shape, (80,1))
self.assertEqual(pred.shape, (80,))
def testFitMVMLArray_2d(self):
#######################################################
......@@ -202,17 +202,19 @@ class MVMLTest(unittest.TestCase):
nystrom_param=0.2, learn_A=4)
mvml.fit(self.kernel_dict, y=self.y)
pred =mvml.predict(self.test_kernel_dict)
self.assertEqual(pred.shape, (80,1))
self.assertEqual(pred.shape, (80,))
def testPredictMVML(self):
mvml = MVML(lmbda=0.1, eta=1,
nystrom_param=1.0, learn_A=4)
mvml.fit(self.kernel_dict, y=self.y)
pred = mvml.predict(self.test_kernel_dict)
self.assertEqual(pred.shape, (80,1))
self.assertEqual(pred.shape, (80,))
# print(pred.shape)
def test_classifier(self):
pass
# return check_estimator(MVML)
if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testName']
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment