diff --git a/examples/mvml/plot_mvml_.py b/examples/mvml/plot_mvml_.py index a449a7c868af0b92677a8703e6ca12e2685d9e35..3120cb00eaddb579972db21be7065746dbff472b 100644 --- a/examples/mvml/plot_mvml_.py +++ b/examples/mvml/plot_mvml_.py @@ -106,27 +106,27 @@ mvml.fit(kernel_dict, Y[0:n_tr]) # -pred1 = np.sign(mvml.predict(test_kernel_dict) ) # take sign for classification result +pred1 = mvml.predict(test_kernel_dict) # # without approximation mvml2 = MVML(lmbda=0.1, eta=1, nystrom_param=1, kernel='precomputed') # without approximation mvml2.fit(kernel_dict, Y[0:n_tr]) -pred2 = np.sign(mvml2.predict(test_kernel_dict)) +pred2 = mvml2.predict(test_kernel_dict) # # use MVML_Cov, don't learn w mvml3 = MVML(lmbda=0.1, eta=1,learn_A=3, nystrom_param=1, kernel='precomputed') mvml3.fit(kernel_dict, Y[0:n_tr]) -pred3 = np.sign(mvml3.predict(test_kernel_dict)) +pred3 = mvml3.predict(test_kernel_dict) # # use MVML_I, don't learn w mvml4 = MVML(lmbda=0.1, eta=1,learn_A=4, nystrom_param=1, kernel='precomputed') mvml4.fit(kernel_dict, Y[0:n_tr]) -pred4 = np.sign(mvml4.predict(test_kernel_dict)) +pred4 = mvml4.predict(test_kernel_dict) # # use kernel rbf equivalent to case 1 mvml5 = MVML(lmbda=0.1, eta=1, nystrom_param=0.2, kernel='rbf') mvml5.fit(x_dict, Y[0:n_tr]) -pred5 = np.sign(mvml5.predict(test_x_dict)) +pred5 = mvml5.predict(test_x_dict) # # # # =========== show results ============ diff --git a/multimodal/boosting/boost.py b/multimodal/boosting/boost.py index 5ef38ddfc95e86acac032fbc67176e03438196c5..706350318258c25d35a6afb5aafcd57811663140 100644 --- a/multimodal/boosting/boost.py +++ b/multimodal/boosting/boost.py @@ -59,6 +59,7 @@ class UBoosting(metaclass=ABCMeta): if not isinstance(X_, MultiModalData): try: X_ = np.asarray(X) + X_ = MultiModalArray(X_) except Exception as e: raise TypeError('Reshape your data') return X_ diff --git a/multimodal/datasets/data_sample.py b/multimodal/datasets/data_sample.py index 7543874494cfc893394e1bb6c394a6c0f605fde5..eff5b3dfbe6553e5053cc0ea985752683297eb5f 100644 --- a/multimodal/datasets/data_sample.py +++ b/multimodal/datasets/data_sample.py @@ -280,8 +280,6 @@ class MultiModalArray(np.ndarray, MultiModalData): shapes_int: list of int numbers of feature for each views - keys : name of key, where data come from a dictionary - :Example: @@ -295,8 +293,6 @@ class MultiModalArray(np.ndarray, MultiModalData): >>> multiviews = MultiModalArray(data) >>> multiviews.shape (120, 240) - >>> multiviews.keys - dict_keys([0, 1]) >>> multiviews.shapes_int [120, 120] >>> multiviews.n_views @@ -313,16 +309,22 @@ class MultiModalArray(np.ndarray, MultiModalData): thekeys = None # view_ind_self = None view_mode = 'slices' - if isinstance(data, dict): + if isinstance(data, dict) and not isinstance(data, sp.dok_matrix): n_views = len(data) view_ind = [0] for key, dat_values in data.items(): + dat_values = np.asarray(dat_values) + if dat_values.ndim < 2: + dat_values = dat_values.reshape(1, dat_values.shape[0]) new_data = cls._populate_new_data(index, dat_values, new_data) shapes_int.append(dat_values.shape[1]) view_ind.append(dat_values.shape[1] + view_ind[index]) index += 1 thekeys = data.keys() - + if new_data.ndim < 2 : + raise ValueError('Reshape your data') + if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): + raise ValueError('Reshape your data') elif isinstance(data, np.ndarray) and view_ind is None and data.ndim == 1: try: dat0 = np.array(data[0]) @@ -359,15 +361,13 @@ class MultiModalArray(np.ndarray, MultiModalData): else: try: new_data = np.asarray(data) - # if new_data.ndim == 1: - # new_data = new_data.reshape(1, new_data.shape[0]) if view_ind is None: view_ind = np.array([0, new_data.shape[1]]) except Exception as e: raise ValueError('Reshape your data') if new_data.ndim < 2 : raise ValueError('Reshape your data') - if new_data.ndim > 1 and new_data.shape == (1, 1): + if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): raise ValueError('Reshape your data') if view_ind.ndim < 2 and new_data.ndim <2 and view_ind[-1] > new_data.shape[1]: raise ValueError('Reshape your data') @@ -396,7 +396,7 @@ class MultiModalArray(np.ndarray, MultiModalData): obj.views_ind = view_ind obj.shapes_int = shapes_int obj.n_views = n_views - obj.keys = thekeys + # obj.keys = thekeys return obj @staticmethod @@ -443,7 +443,7 @@ class MultiModalArray(np.ndarray, MultiModalData): # super(MultiModalArray, self).__array_finalize__(obj) self.shapes_int = getattr(obj, 'shapes_int', None) self.n_views = getattr(obj, 'n_views', None) - self.keys = getattr(obj, 'keys', None) + # self.keys = getattr(obj, 'keys', None) self.views_ind = getattr(obj, 'views_ind', None) self.view_mode_ = getattr(obj, 'view_mode_', None) diff --git a/multimodal/kernels/mkernel.py b/multimodal/kernels/mkernel.py index a550dcb111e147eb4da51361016b69992938f3f2..d1c432f56b2db83c4e56e15e9556eb4a27c31baa 100644 --- a/multimodal/kernels/mkernel.py +++ b/multimodal/kernels/mkernel.py @@ -1,6 +1,6 @@ import numpy as np import scipy as sp -from sklearn.metrics.pairwise import pairwise_kernels +from sklearn.metrics.pairwise import pairwise_kernels, PAIRWISE_KERNEL_FUNCTIONS from abc import ABCMeta from multimodal.datasets.data_sample import DataSample, MultiModalArray @@ -21,6 +21,9 @@ class MKernel(metaclass=ABCMeta): kernel_params : list of dict of corresponding kernels params KERNEL_PARAMS """ + def _check_kernel(self): + if self.kernel not in PAIRWISE_KERNEL_FUNCTIONS.keys(): + raise ValueError(self.kernel + "is not a availlable kernel") def _get_kernel(self, X, Y=None, v=0): met =None @@ -67,34 +70,38 @@ class MKernel(metaclass=ABCMeta): K_ dict of kernels """ kernel_dict = {} - + flag_sparse = False X_ = None y = None if Y is None: y = Y - if isinstance(X, np.ndarray) and X.ndim == 1: + if isinstance(X, sp.sparse.spmatrix): + raise TypeError("Nonsensical Error: no sparse data are allowed as input") + else: X_= MultiModalArray(X, views_ind) - for v in range(X.shape[0]): - if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X_, Y, v) - kernel_dict[v] = self._get_kernel(X[v], y) - elif isinstance(X, dict): - X_= MultiModalArray(X) - for v in X.keys(): - if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X_, Y, v) - kernel_dict[v] = self._get_kernel(X[v], y) - elif isinstance(X, np.ndarray) and X.ndim > 1: - X_ = MultiModalArray(X, views_ind) X = X_ if isinstance(X, MultiModalArray): + X_ = X + if not isinstance(X_, MultiModalArray): + try: + X_ = np.asarray(X) + X_ = MultiModalArray(X_) + except Exception as e: + pass + # raise TypeError('Reshape your data') + if isinstance(X_, MultiModalArray): for v in range(X.n_views): if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X, Y, v) - kernel_dict[v] = self._get_kernel(X.get_view(v), y) - X_= X + kernel_dict[v] = self._get_kernel(X_.get_view(v), y) + if not isinstance(X_, MultiModalArray): - raise TypeError("Input format is not reconized") + if sp.sparse.issparse(X): + raise TypeError("Nonsensical Error: no sparse data are allowed as input") + raise TypeError('Reshape your data') K_ = MultiModalArray(kernel_dict) return X_, K_ + def _calc_nystrom(self, kernels, n_approx): # calculates the nyström approximation for all the kernels in the given dictionary self.W_sqrootinv_dict = {} diff --git a/multimodal/kernels/mvml.py b/multimodal/kernels/mvml.py index 7a122af124ac990fdf051bd412da7202e0422d12..e1b22f27c7f3ebfd51892aad234d031198c09d78 100644 --- a/multimodal/kernels/mvml.py +++ b/multimodal/kernels/mvml.py @@ -8,7 +8,8 @@ from sklearn.utils.multiclass import unique_labels from sklearn.metrics.pairwise import pairwise_kernels from sklearn.utils.validation import check_X_y from sklearn.utils.validation import check_array -from sklearn.metrics.pairwise import check_pairwise_arrays +from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.multiclass import type_of_target from sklearn.utils.validation import check_is_fitted from multimodal.datasets.data_sample import DataSample, MultiModalArray from multimodal.kernels.mkernel import MKernel @@ -30,7 +31,9 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): Parameters ---------- - regression_params: array/list of regression parameters, first for basic regularization, second for + lmbda : float regression_params lmbda (default = 0.1) for basic regularization + + eta : float regression_params eta (default = 1), first for basic regularization, regularization of A (not necessary if A is not learned) kernel : list of str (default: "precomputed") if kernel is as input of fit function set kernel to @@ -58,6 +61,10 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): Attributes ---------- + lmbda : float regression_params lmbda (default = 0.1) + + eta : float regression_params eta (default = 1) + regression_params : array/list of regression parameters kernel : list or str indicate the metrics used for each kernels @@ -89,14 +96,16 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): y_ : array-like, shape = (n_samples,) Target values (class labels). + + regression_ : if the classifier is used as regression (default : False) """ # r_cond = 10-30 - def __init__(self, lmbda, eta, nystrom_param, kernel="precomputed", + def __init__(self, lmbda=0.1, eta=1, nystrom_param=1.0, kernel="linear", kernel_params=None, learn_A=1, learn_w=0, precision=1E-4, n_loops=6): - + super(MVML, self).__init__() # calculate nyström approximation (if used) self.nystrom_param = nystrom_param self.lmbda = lmbda @@ -110,6 +119,10 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): self.precision = precision self.warning_message = {} + def _more_tags(self): + return {'X_types': ["2darray"], 'binary_only': True, + 'multilabel' : False} + def fit(self, X, y= None, views_ind=None): """ Fit the MVML classifier @@ -153,11 +166,23 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): # Check that X and y have correct shape # Store the classes seen during fit - + self.regression_ = False self.X_, self.K_= self._global_kernel_transform(X, views_ind=views_ind) check_X_y(self.X_, y) + # if type_of_target(y) not in "binary": + # raise ValueError("target should be binary") + + check_classification_targets(y) - self.classes_ = unique_labels(y) + if type_of_target(y) in "binary": + self.classes_, y = np.unique(y, return_inverse=True) + y[y==0] = -1.0 + elif type_of_target(y) in "continuous": + y = y.astype(float) + self.regression_ = True + else: + raise ValueError("MVML algorithms is a binary classifier" + " or performs regression with float target") self.y_ = y # n = X[0].shape[0] @@ -169,14 +194,14 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): self.U_dict = self.K_._todict() # Return the classifier - self.A, self.g, self.w = self.learn_mvml(learn_A=self.learn_A, learn_w=self.learn_w, n_loops=self.n_loops) + self.A, self.g, self.w = self._learn_mvml(learn_A=self.learn_A, learn_w=self.learn_w, n_loops=self.n_loops) if self.warning_message: import logging logging.warning("warning appears during fit process" + str(self.warning_message)) # print("warning appears during fit process", self.warning_message) return self - def learn_mvml(self, learn_A=1, learn_w=0, n_loops=6): + def _learn_mvml(self, learn_A=1, learn_w=0, n_loops=6): """ Parameters @@ -402,13 +427,22 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): Predicted classes. """ check_is_fitted(self, ['X_', 'U_dict', 'K_', 'y_']) # , 'U_dict', 'K_' 'y_' - X , test_kernels = self._global_kernel_transform(X, - views_ind=views_ind, - Y=self.X_) + X, test_kernels = self._global_kernel_transform(X, + views_ind=views_ind, + Y=self.X_) + check_array(X) - return self.predict_mvml(test_kernels, self.g, self.w) + pred = self._predict_mvml(test_kernels, self.g, self.w).squeeze() + if self.regression_: + return pred + else: + pred = np.sign(pred) + pred[pred==-1] = 0 + pred = pred.astype(int) + return np.take(self.classes_, pred) + - def predict_mvml(self, test_kernels, g, w): + def _predict_mvml(self, test_kernels, g, w): """ Parameters diff --git a/multimodal/tests/datasets/input_x_dic2.pkl b/multimodal/tests/datasets/input_x_dic2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/multimodal/tests/test_cumbo.py b/multimodal/tests/test_cumbo.py index 7841d32461bc954617c3e1877ecea4b225432a78..49ec85c2d87b110c04d283e94b8a3d180d0790dc 100644 --- a/multimodal/tests/test_cumbo.py +++ b/multimodal/tests/test_cumbo.py @@ -66,17 +66,6 @@ class TestMuCumboClassifier(unittest.TestCase): np.testing.assert_equal(beta_class, expected_beta_class) np.testing.assert_equal(score_function, np.zeros((n_views, 4, n_classes))) - # def test_compute_edge_global(self): - # cost_global = np.array([[-2, 1, 1], [1, 1, -2], [1, -2, 1], [1, 1, -2]], - # dtype=np.float64) - # predicted_classes = np.array([[0, 0, 1, 1], [0, 1, 0, 2], [2, 2, 0, 0]]) - # y = np.array([0, 2, 1, 2]) - # expected_edge_global = np.array([0.25, 0.25, -0.125]) - # - # clf = MuCumboClassifier() - # edge_global = clf._compute_edge_global(cost_global, predicted_classes, y) - # np.testing.assert_equal(edge_global, expected_edge_global) - def test_compute_dist(self): cost = np.array( [[[-2, 1, 1], [-1, -1, -2], [1, -2, 1], [1, 1, -2]], @@ -258,63 +247,7 @@ class TestMuCumboClassifier(unittest.TestCase): np.testing.assert_almost_equal(np.sum(betas, axis =1), np.ones(3, dtype=np.float), 9) self.assertTrue(np.all(betas <= 1) ) self.assertTrue(np.all(betas >= 0) ) - # - # - # def test_compute_cost_global():betas. - # - # decimal = 12 - # label_score_glob = np.array( - # [[-1, -2, 4], [-8, 1, 4], [2, 8, -4], [2, -1, 4]], - # dtype=np.float64) - # best_pred_classes = np.array([0, 1, 0, 2]) - # y = np.array([0, 2, 1, 2]) - # alpha = 0.5 - # expected_label_score_glob = np.array( - # [[-0.5, -2, 4], [-8, 1.5, 4], [2.5, 8, -4], [2, -1, 4.5]], - # dtype=np.float64) - # - # clf = MuCumboClassifier() - # cost_glob, label_score_glob = clf._compute_cost_global( - # label_score_glob, best_pred_classes, y, alpha) - # - # assert_array_almost_equal(label_score_glob, expected_label_score_glob, - # decimal=decimal) - # - # label_score_glob = np.zeros((4, 3), dtype=np.float64) - # alpha = 0. - # expected_label_score_glob = np.zeros((4, 3), dtype=np.float64) - # expected_cost_glob = np.array( - # [[-2, 1, 1], [1, 1, -2], [1, -2, 1], [1, 1, -2]], - # dtype=np.float64) - # - # cost_glob, label_score_glob = clf._compute_cost_global( - # label_score_glob, best_pred_classes, y, alpha) - # - # assert_array_equal(label_score_glob, expected_label_score_glob) - # assert_array_almost_equal(cost_glob, expected_cost_glob, decimal=decimal) - # - # label_score_glob = np.array( - # [[0, 0, np.log(4)], [np.log(8), 0, 0], [0, 0, 0], [0, 0, 0]], - # dtype=np.float64) - # alpha = np.log(2) - # expected_label_score_glob = np.array( - # [[alpha, 0, np.log(4)], - # [np.log(8), alpha, 0], - # [alpha, 0, 0], - # [0, 0, alpha]], - # dtype=np.float64) - # expected_cost_glob = np.array( - # [[-2.5, 0.5, 2.], [8., 2., -10.], [2., -3., 1.], [0.5, 0.5, -1.]], - # dtype=np.float64) - # - # cost_glob, label_score_glob = clf._compute_cost_global( - # label_score_glob, best_pred_classes, y, alpha) - # - # assert_array_almost_equal(label_score_glob, expected_label_score_glob, - # decimal=decimal) - # assert_array_almost_equal(cost_glob, expected_cost_glob, decimal=decimal) - # def test_compute_beta(self): def test_indicatrice(self): clf = MuCumboClassifier() @@ -524,17 +457,6 @@ class TestMuCumboClassifier(unittest.TestCase): clf = MuCumboClassifier() clf.fit(X, y) np.testing.assert_equal(clf.predict(X), y) - # assert_array_equal(clf.predict(X), y) - # - # y = [1, 0] - # clf = MuCumboClassifier() - # clf.fit(X, y, views_ind) - # assert_array_equal(clf.predict(X), y) - # - # y = (2, 1) - # clf = MuCumboClassifier() - # clf.fit(X, y, views_ind) - # assert_array_equal(clf.predict(X), y) # # # Check that misformed or inconsistent inputs raise expections X = np.zeros((5, 4, 2)) @@ -1050,8 +972,6 @@ class TestMuCumboClassifier(unittest.TestCase): # else: # assert_true(all([type_ == csr_matrix for type_ in types])) # - # def test(): - # cumbo = MuCumboClassifier() if __name__ == '__main__': unittest.main() diff --git a/multimodal/tests/test_data_sample.py b/multimodal/tests/test_data_sample.py index b21d4afd6e3bdde764ae3f1c495ead0d8a7ac59f..39c1dd3264b77d712a029549d3bbe557a8f88b8f 100644 --- a/multimodal/tests/test_data_sample.py +++ b/multimodal/tests/test_data_sample.py @@ -38,8 +38,6 @@ class UnitaryTest(unittest.TestCase): self.assertEqual(a.shape, (120, 240)) self.assertEqual(a.shapes_int, [120, 120]) self.assertEqual(a.n_views, 2) - dict_key = {0: 'a',1: 'b' } - self.assertEqual(a.keys, dict_key.keys()) def test_init_Array(self): a = MultiModalArray(self.kernel_dict) diff --git a/multimodal/tests/test_mvml.py b/multimodal/tests/test_mvml.py index 32624f2be8d258483f06f5654902eb836a8b0f9f..be6541f84c3e683ffdce50bfcff213112c354a55 100644 --- a/multimodal/tests/test_mvml.py +++ b/multimodal/tests/test_mvml.py @@ -9,7 +9,7 @@ from sklearn.exceptions import NotFittedError from multimodal.datasets.data_sample import MultiModalArray from multimodal.kernels.mvml import MVML from multimodal.tests.datasets.get_dataset_path import get_dataset_path - +from sklearn.utils.estimator_checks import check_estimator class MVMLTest(unittest.TestCase): @@ -96,7 +96,7 @@ class MVMLTest(unittest.TestCase): self.assertEqual(mvml2.A.shape, (240, 240)) self.assertEqual(mvml2.g.shape,(240, 1)) np.testing.assert_almost_equal(mvml2.w, w_expected, 8) - with self.assertRaises(TypeError): + with self.assertRaises(ValueError): mvml2.fit([1, 2, 3]) def testFitMVMLMetric_learA4(self): @@ -111,7 +111,7 @@ class MVMLTest(unittest.TestCase): self.assertEqual(mvml2.A.shape, (240, 240)) self.assertEqual(mvml2.g.shape,(240, 1)) np.testing.assert_almost_equal(mvml2.w, w_expected, 8) - with self.assertRaises(TypeError): + with self.assertRaises(ValueError): mvml2.fit([1, 2, 3]) def testFitMVMLMetric_learA3(self): @@ -126,7 +126,7 @@ class MVMLTest(unittest.TestCase): self.assertEqual(mvml2.A.shape, (240, 240)) self.assertEqual(mvml2.g.shape,(240, 1)) np.testing.assert_almost_equal(mvml2.w, w_expected, 8) - with self.assertRaises(TypeError): + with self.assertRaises(ValueError): mvml2.fit([1, 2, 3]) def testFitMVMLMetric_PredictA2(self): @@ -142,7 +142,7 @@ class MVMLTest(unittest.TestCase): self.assertEqual(mvml2.g.shape,(144, 1)) np.testing.assert_almost_equal(mvml2.w, w_expected, 0) pred = mvml2.predict(self.test_kernel_dict) - self.assertEqual(pred.shape, (80,1)) + self.assertEqual(pred.shape, (80,)) def testFitMVMLMetric_PredictA1(self): ####################################################### @@ -157,7 +157,7 @@ class MVMLTest(unittest.TestCase): self.assertEqual(mvml2.g.shape,(144, 1)) np.testing.assert_almost_equal(mvml2.w, w_expected, 0) pred = mvml2.predict(self.test_kernel_dict) - self.assertEqual(pred.shape, (80,1)) + self.assertEqual(pred.shape, (80,)) def testFitMVMLArray_2d(self): ####################################################### @@ -202,17 +202,19 @@ class MVMLTest(unittest.TestCase): nystrom_param=0.2, learn_A=4) mvml.fit(self.kernel_dict, y=self.y) pred =mvml.predict(self.test_kernel_dict) - self.assertEqual(pred.shape, (80,1)) + self.assertEqual(pred.shape, (80,)) def testPredictMVML(self): mvml = MVML(lmbda=0.1, eta=1, nystrom_param=1.0, learn_A=4) mvml.fit(self.kernel_dict, y=self.y) pred = mvml.predict(self.test_kernel_dict) - self.assertEqual(pred.shape, (80,1)) + self.assertEqual(pred.shape, (80,)) # print(pred.shape) - + def test_classifier(self): + pass + # return check_estimator(MVML) if __name__ == "__main__": # import sys;sys.argv = ['', 'Test.testName']