diff --git a/multimodal/boosting/boost.py b/multimodal/boosting/boost.py index c689eea3ca6eb633cc102f2136e16e45cef4e942..94362ecc216ad608250ed8bba2d2f9e62d913ae4 100644 --- a/multimodal/boosting/boost.py +++ b/multimodal/boosting/boost.py @@ -59,13 +59,6 @@ class UBoosting(metaclass=ABCMeta): def _validate_X_predict(self, X): """Ensure that X is in the proper format.""" - if (self.base_estimator is None or - isinstance(self.base_estimator, - (BaseDecisionTree, BaseForest))): - check_array(X, accept_sparse='csr', dtype=DTYPE) - - else: - check_array(X, accept_sparse=['csr', 'csc']) if X.ndim < 2: X = X[np.newaxis, :] if X.shape[1] != self.n_features_: @@ -73,8 +66,15 @@ class UBoosting(metaclass=ABCMeta): "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, X.shape[1])) else: - mes = "Reshape your data" + mes = "Reshape your data as a 2D-array " raise ValueError(mes) + if (self.base_estimator is None or + isinstance(self.base_estimator, + (BaseDecisionTree, BaseForest))): + check_array(X, accept_sparse='csr', dtype=DTYPE) + + else: + check_array(X, accept_sparse=['csr', 'csc']) if X.ndim > 1: if X.shape[1] != self.n_features_: if X.shape[0] == self.n_features_ and X.shape[1] > 1: @@ -83,10 +83,6 @@ class UBoosting(metaclass=ABCMeta): raise ValueError("Number of features of the model must " "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, X.shape[1])) - - - # - # raise ValueError(mes) return X def _global_X_transform(self, X, views_ind=None): @@ -97,10 +93,10 @@ class UBoosting(metaclass=ABCMeta): X_ = MultiModalSparseArray(X, views_ind) else: X_ = MultiModalArray(X, views_ind) - if not isinstance(X_, MultiModalData): - try: - X_ = np.asarray(X) - X_ = MultiModalArray(X_) - except Exception as e: - raise TypeError('Reshape your data') + # if not isinstance(X_, MultiModalData): + # try: + # X_ = np.asarray(X) + # X_ = MultiModalArray(X_) + # except Exception as e: + # raise TypeError('Reshape your data') return X_ diff --git a/multimodal/datasets/data_sample.py b/multimodal/datasets/data_sample.py index ac20e3fdd446c52303acf6c86ff2cc0c881dd453..a4b5f9c310c9cb0ce314e682074f17b5da62b3f1 100644 --- a/multimodal/datasets/data_sample.py +++ b/multimodal/datasets/data_sample.py @@ -365,14 +365,14 @@ class MultiModalArray(np.ndarray, MultiModalData): views_ind.append(dat_values.shape[1] + views_ind[index]) index += 1 thekeys = data.keys() - if new_data.ndim < 2 : - raise ValueError('Reshape your data') - if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): + # if new_data.ndim < 2 : + # raise ValueError('Reshape your data') + if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): raise ValueError('Reshape your data') elif isinstance(data, np.ndarray) and views_ind is None and data.ndim == 1: try: dat0 = np.array(data[0]) - except Exception: + except Exception: # pragma: no cover raise TypeError("input format is not supported") if dat0.ndim < 2: @@ -388,13 +388,13 @@ class MultiModalArray(np.ndarray, MultiModalData): elif (isinstance(data, np.ndarray) ) and data.ndim > 1: try: data = np.asarray(data) - except: + except: # pragma: no cover raise TypeError("input format is not supported") if views_ind is not None: try: views_ind = np.asarray(views_ind) - except : + except : # pragma: no cover raise TypeError("n_views should be list or nparray") elif views_ind is None: if data.shape[1] > 1: @@ -413,7 +413,7 @@ class MultiModalArray(np.ndarray, MultiModalData): elif not isinstance(views_ind, np.ndarray): try: views_ind = np.asarray(views_ind) - except Exception: + except Exception: # pragma: no cover raise TypeError("format of views_ind is not list or nd array") except Exception as e: raise ValueError('Reshape your data') @@ -421,8 +421,8 @@ class MultiModalArray(np.ndarray, MultiModalData): raise ValueError('Reshape your data') if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): raise ValueError('Reshape your data') - if views_ind.ndim < 2 and new_data.ndim < 2 and views_ind[-1] > new_data.shape[1]: - raise ValueError('Reshape your data') + # if views_ind.ndim < 2 and new_data.ndim < 2 and views_ind[-1] > new_data.shape[1]: + # raise ValueError('Reshape your data') # views_ind_self = views_ind # if new_data.shape[1] < 1: @@ -437,7 +437,7 @@ class MultiModalArray(np.ndarray, MultiModalData): # obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls) # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask) - if hasattr(new_data, "mask"): + if hasattr(new_data, "mask"): # pragma: no cover obj = ma.masked_array(new_data.data, new_data.mask).view(cls) elif hasattr(new_data, "data") and \ hasattr(new_data, "shape") and len(new_data.shape) > 0: @@ -462,7 +462,7 @@ class MultiModalArray(np.ndarray, MultiModalData): for dat_values in data: try: dat_values = np.array(dat_values) - except Exception: + except Exception: # pragma: no cover raise TypeError("input format is not supported") new_data = cls._populate_new_data(index, dat_values, new_data) views_ind[index + 1] = dat_values.shape[1] + views_ind[index] @@ -471,7 +471,8 @@ class MultiModalArray(np.ndarray, MultiModalData): return new_data, shapes_int, views_ind @staticmethod - def _populate_new_data(index, dat_values, new_data): + def _populate_new_data(index, dat_values, new_data): # pragma: no cover + """Work in progress : including missing data""" if index == 0: if isinstance(dat_values, ma.MaskedArray) or \ isinstance(dat_values, np.ndarray) or sp.issparse(dat_values): @@ -525,30 +526,32 @@ class MultiModalArray(np.ndarray, MultiModalData): def set_view(self, view, data): start = int(np.sum(np.asarray(self.shapes_int[0: view]))) stop = int(start + self.shapes_int[view]) - if stop-start == data.shape[0] and data.shape[1]== self.data.shape[1]: + if stop-start == data.shape[1] and data.shape[0] == self.data.shape[0]: self[:, start:stop] = data else: raise ValueError( - "shape of data does not match (%d, %d)" %stop-start %self.data.shape[1]) + "shape of data does not match (%d, %d)" %(stop-start ,self.data.shape[0])) - def get_raw(self, view, raw): - start = np.sum(np.asarray(self.shapes_int[0: view])) - stop = np.sum(np.asarray(self.shapes_int[0: view+1])) - return self.data[start:stop, raw] - - def add_view(self, v, data): - if len(self.shape) > 0: - if data.shape[0] == self.data.shape[0]: - indice = self.shapes_int[v] - np.insert(self.data, data, indice+1, axis=0) - self.shapes_int.append(data.shape[1]) - self.n_views +=1 - else: - raise ValueError("New view can't initialazed") - # self.shapes_int= [data.shape[1]] - # self.data.reshape(data.shape[0],) - # np.insert(self.data, data, 0) - # self.n_views = 1 + def get_row(self, view, row): + start = int(np.sum(np.asarray(self.shapes_int[0: view]))) + stop = int(np.sum(np.asarray(self.shapes_int[0: view+1]))) + return self[row, start:stop] + + # def add_view(self, data): + # if len(self.shape) > 0: + # if data.shape[0] == self.data.shape[0]: + # print(self.data.shape, data.shape) + # new_data = np.hstack((self.data, data)) + # self.shapes_int.append(data.shape[1]) + # self.n_views +=1 + # print(new_data.shape) + # + # else: + # raise ValueError("New view can't initialazed") + # # self.shapes_int= [data.shape[1]] + # # self.data.reshape(data.shape[0],) + # # np.insert(self.data, data, 0) + # # self.n_views = 1 def _todict(self): dico = {} diff --git a/multimodal/kernels/mkernel.py b/multimodal/kernels/mkernel.py index 7334e7916866ea13c3b6d88d7321899d83103682..2b8d6d5f4ea94911f1122d0fb1e0c8ae963c8681 100644 --- a/multimodal/kernels/mkernel.py +++ b/multimodal/kernels/mkernel.py @@ -124,22 +124,22 @@ class MKernel(metaclass=ABCMeta): X = X_ if isinstance(X, MultiModalArray): X_ = X - if not isinstance(X_, MultiModalArray): - try: - X_ = np.asarray(X) - X_ = MultiModalArray(X_, views_ind) - except Exception as e: - pass + # if not isinstance(X_, MultiModalArray): + # try: + # X_ = np.asarray(X) + # X_ = MultiModalArray(X_, views_ind) + # except Exception as e: + # pass # raise TypeError('Reshape your data') if isinstance(X_, MultiModalArray): for v in range(X.n_views): if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X, Y, v) kernel_dict[v] = self._get_kernel(X_.get_view(v), y) - if not isinstance(X_, MultiModalArray): - if sp.sparse.issparse(X): - raise TypeError("Nonsensical Error: no sparse data are allowed as input") - raise TypeError('Reshape your data') + # if not isinstance(X_, MultiModalArray): + # if sp.sparse.issparse(X): + # raise TypeError("Nonsensical Error: no sparse data are allowed as input") + # raise TypeError('Reshape your data') K_ = MultiModalArray(kernel_dict) return X_, K_ diff --git a/multimodal/tests/test_combo.py b/multimodal/tests/test_combo.py index f0588935bcfd2e8f435058ebf5d7ab468a196a4e..f8d085dfa2138313b8060fc51eb81456dc87876f 100644 --- a/multimodal/tests/test_combo.py +++ b/multimodal/tests/test_combo.py @@ -374,6 +374,8 @@ class TestMuComboClassifier(unittest.TestCase): [ 4.49110023, 1., -2. ], [ 8., 2.49110023, 1. ]]]) np.testing.assert_almost_equal(label_score, expected_label_score,6) + + # # label_score = np.array( # [[[-1, -2, 4], [-8, 1, 4], [2, 8, -4], [2, -1, 4]], diff --git a/multimodal/tests/test_data_sample.py b/multimodal/tests/test_data_sample.py index 469ba500685e879a6c2816fe7455466bb5d87ca1..a25977e9c6b1bd9bc38521dd5e739da3ad7a04b1 100644 --- a/multimodal/tests/test_data_sample.py +++ b/multimodal/tests/test_data_sample.py @@ -47,6 +47,7 @@ from multimodal.tests.datasets.get_dataset_path import get_dataset_path from multimodal.datasets.data_sample import MultiModalArray import pickle + class UnitaryTest(unittest.TestCase): @classmethod @@ -85,6 +86,44 @@ class UnitaryTest(unittest.TestCase): array_x = a.data b = MultiModalArray(a) np.testing.assert_equal(b.views_ind, np.array([0, 120, 240])) + view_1 = np.random.randint(1,10,10) + view_2 = np.random.randint(1,10,11) + data = {0 : view_1, + 1 : view_2,} + c = MultiModalArray(data) + np.testing.assert_array_equal(np.asarray(c[0,:]), np.concatenate((view_1, view_2))) + view_1 = np.random.randint(1, 10, 1) + data = {0: view_1, } + self.assertRaises(ValueError, MultiModalArray, data) + view_1 = np.array([0,]) + data = view_1 + d = MultiModalArray(data) + np.testing.assert_array_equal(d.views_ind, np.array([0,1])) + view_1 = [[0]] + data = view_1 + self.assertRaises(ValueError, MultiModalArray, data) + view_1 = [[0,1,2 ],[0,1,2 ]] + data = view_1 + d = MultiModalArray(data, [0,1]) + np.testing.assert_array_equal(d.views_ind, np.array([0, 1])) + view_1 = [[0]] + data = view_1 + self.assertRaises(ValueError, MultiModalArray, data) + + def test_view_functions(self): + view_1 = np.random.randint(1, 10, (5, 10)) + view_2 = np.random.randint(1, 10, (5, 10)) + view_3 = np.random.randint(1, 10, (5, 10)) + data = {0: view_1, + 1: view_2, } + c = MultiModalArray(data) + c.set_view(1, view_3) + np.testing.assert_array_equal(c.get_view(1), view_3) + view_3 = np.random.randint(1, 10, (12, 10)) + c = MultiModalArray(data) + self.assertRaises(ValueError, c.set_view, 1, view_3) + np.testing.assert_array_equal(c.get_row(0, 2), view_1[2, :]) + if __name__ == '__main__': diff --git a/multimodal/tests/test_mumbo.py b/multimodal/tests/test_mumbo.py index 1a8f58c8b1e43e17f21e560c15260219dde3b3da..af9c5c69f222c26ad892c1cbf2e4bfb68104574b 100644 --- a/multimodal/tests/test_mumbo.py +++ b/multimodal/tests/test_mumbo.py @@ -909,6 +909,18 @@ class TestMumboClassifier(unittest.TestCase): else: self.assertTrue(all([issubclass(type_, csr_matrix) for type_ in types])) + def test_validate_X_predict(self): + clf = MumboClassifier() + X = np.random.randint(1, 10, (2, 10)) + y = [1, 0] + clf.fit(X, y) + X_pred = np.random.randint(1, 10, 10) + self.assertRaises(ValueError, clf._validate_X_predict, X_pred) + X_pred = np.random.randint(1,10,9) + self.assertRaises(ValueError, clf._validate_X_predict, X_pred) + X_pred = np.random.randint(1, 10, (2, 9)) + self.assertRaises(ValueError, clf._validate_X_predict, X_pred) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/multimodal/tests/test_mvml.py b/multimodal/tests/test_mvml.py index 7b1cb4c3479d5d58486c7bba52204db1f33a9bb6..83c6ea3125032a0574398185fbe3b220da427453 100644 --- a/multimodal/tests/test_mvml.py +++ b/multimodal/tests/test_mvml.py @@ -43,6 +43,7 @@ import pickle import unittest import numpy as np +import scipy as sp from sklearn.exceptions import NotFittedError from multimodal.datasets.data_sample import MultiModalArray @@ -271,6 +272,21 @@ class MVMLTest(unittest.TestCase): pass # return check_estimator(MVML) + def test_check_kernel(self): + clf = MVML() + clf.kernel = "an_unknown_kernel" + self.assertRaises(ValueError, clf._check_kernel) + + def testFitMVMLSparesArray(self): + ####################################################### + # task with nparray 2d + ####################################################### + x_metricl = MultiModalArray(self.kernel_dict) + x_array = np.asarray(x_metricl) + x_array_sparse = sp.sparse.csr_matrix(x_array) + mvml3 = MVML(lmbda=0.1, eta=1, nystrom_param=1.0) + self.assertRaises(TypeError, mvml3.fit, x_array_sparse, self.y, [0, 120, 240]) + if __name__ == "__main__": # import sys;sys.argv = ['', 'Test.testName'] unittest.main()