diff --git a/multimodal/boosting/boost.py b/multimodal/boosting/boost.py index 7b039e6f94f37d16cafe9e4999ed51b7c4bda9bd..5ef38ddfc95e86acac032fbc67176e03438196c5 100644 --- a/multimodal/boosting/boost.py +++ b/multimodal/boosting/boost.py @@ -26,14 +26,22 @@ class UBoosting(metaclass=ABCMeta): else: check_array(X, accept_sparse=['csr', 'csc']) if X.ndim < 2: - mes = "Reshape your data" - raise ValueError(mes) - if X.ndim > 1: + X = X[np.newaxis, :] if X.shape[1] != self.n_features_: - mes = "Reshape your data" raise ValueError("Number of features of the model must " - "match the input. Model n_features is %s and " - "input n_features is %s " % (self.n_features_, X.shape[1])) + "match the input. Model n_features is %s and " + "input n_features is %s " % (self.n_features_, X.shape[1])) + else: + mes = "Reshape your data" + raise ValueError(mes) + if X.ndim > 1: + if X.shape[1] != self.n_features_: + if X.shape[0] == self.n_features_ and X.shape[1] > 1: + raise ValueError("Reshape your data") + else: + raise ValueError("Number of features of the model must " + "match the input. Model n_features is %s and " + "input n_features is %s " % (self.n_features_, X.shape[1])) # diff --git a/multimodal/datasets/__pycache__/__init__.cpython-36.pyc b/multimodal/datasets/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 78203c1c83371c7711d386af2cea6da83faec7cb..0000000000000000000000000000000000000000 Binary files a/multimodal/datasets/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/multimodal/datasets/__pycache__/base.cpython-36.pyc b/multimodal/datasets/__pycache__/base.cpython-36.pyc deleted file mode 100644 index 5d08b5d131a62ac27ac9450f9ba3386c212792bd..0000000000000000000000000000000000000000 Binary files a/multimodal/datasets/__pycache__/base.cpython-36.pyc and /dev/null differ diff --git a/multimodal/datasets/__pycache__/data_sample.cpython-36.pyc b/multimodal/datasets/__pycache__/data_sample.cpython-36.pyc deleted file mode 100644 index aad5511d634098208d0e9aa1cb41329d923a21d6..0000000000000000000000000000000000000000 Binary files a/multimodal/datasets/__pycache__/data_sample.cpython-36.pyc and /dev/null differ diff --git a/multimodal/datasets/data_sample.py b/multimodal/datasets/data_sample.py index ed3ab6d2e95a17b95a758e84db1d71396a090963..9f6d730d4ac6d7fecd851cef66d5a52b6e2c420e 100644 --- a/multimodal/datasets/data_sample.py +++ b/multimodal/datasets/data_sample.py @@ -33,7 +33,6 @@ class MultiModalData(metaclass=ABCMeta): def _first_validate_views_ind(views_ind, n_features): """Ensure proper format for views_ind and return number of views.""" views_ind = np.array(views_ind) - if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1: if len(views_ind) > 2 and np.any(views_ind[:-1] >= views_ind[1:]): raise ValueError("Values in views_ind must be sorted.") @@ -79,7 +78,6 @@ class MultiModalData(metaclass=ABCMeta): def _validate_views_ind(self, views_ind, n_features): """Ensure proper format for views_ind and return number of views.""" views_ind = np.array(views_ind) - if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1: if len(views_ind) > 2 and np.any(views_ind[:-1] >= views_ind[1:]): raise ValueError("Values in views_ind must be sorted.") @@ -228,129 +226,6 @@ class MultiModalSparseArray(sp.csr_matrix, sp.csc_matrix, MultiModalSparseInfo, sp.csc_matrix.__init__(self, *arg, **kwargs) -# class MultiModalSparseArray(sp.csr_matrix, sp.csc_matrix, MultiModalData): -# """ -# MultiModalArray inherit from numpy ndarray -# -# -# Parameters -# ---------- -# -# data : can be -# - dictionary of multiview array with shape = (n_samples, n_features) for multi-view -# for each view. -# {0: array([[]], -# 1: array([[]], -# ...} -# - numpy array like with shape = (n_samples, n_features) for multi-view -# for each view. -# [[[...]], -# [[...]], -# ...] -# - {array like} with (n_samples, nviews * n_features) with 'views_ind' diferent to 'None' -# for Multi-view input samples. -# -# -# -# -# views_ind : array-like (default= None ) if None -# [0, n_features//2, n_features]) is constructed (2 views) -# Paramater specifying how to extract the data views from X: -# -# - views_ind is a 1-D array of sorted integers, the entries -# indicate the limits of the slices used to extract the views, -# where view ``n`` is given by -# ``X[:, views_ind[n]:views_ind[n+1]]``. -# -# Attributes -# ---------- -# -# view_ind : list of views' indice (may be None) -# -# n_views : int number of views -# -# shapes_int: list of int numbers of feature for each views -# -# keys : name of key, where data come from a dictionary -# -# -# :Example: -# -# >>> from multimodal.datasets.base import load_dict -# >>> from multimodal.tests.datasets.get_dataset_path import get_dataset_path -# >>> from multimodal.datasets.data_sample import DataSample -# >>> file = 'input_x_dic.pkl' -# >>> data = load_dict(get_dataset_path(file)) -# >>> print(data.__class__) -# <class 'dict'> -# >>> multiviews = MultiModalArray(data) -# >>> multiviews.shape -# (120, 240) -# >>> multiviews.keys -# dict_keys([0, 1]) -# >>> multiviews.shapes_int -# [120, 120] -# >>> multiviews.n_views -# 2 -# -# -# """ -# -# def __init__(self, data, view_ind=None, shape=None, dtype=None, copy=False): -# """Constructor of Metriclearn_array""" -# shapes_int = [] -# index = 0 -# new_data = np.ndarray([]) -# n_views = 1 -# thekeys = None -# # view_ind_self = None -# view_mode = 'slices' -# if isinstance(data, tuple) and len(data) == 3: -# data_data = data[0] -# indices = data[1] -# indptr = data[2] -# data_shape = shape -# else: -# if shape is None: -# data_shape = data.shape -# if dtype is None: -# dtype = data.dtype -# data_data = data.data -# data_indices = data.indices -# data_indptr = data.indptr -# if (sp.issparse(data)) and data.ndim > 1: -# if view_ind is not None: -# try: -# view_ind = np.asarray(view_ind) -# except : -# raise TypeError("n_views should be list or nparray") -# elif view_ind is None: -# if data.shape[1] > 1: -# view_ind = np.array([0, data.shape[1]//2, data.shape[1]]) -# else: -# view_ind = np.array([0, data.shape[1]]) -# -# new_data = data -# # view_ind_self = view_ind -# view_ind, n_views, view_mode = self._first_validate_views_ind(view_ind, -# data_shape[1]) -# if view_ind.ndim == 1 and view_mode.startswith("slicing"): -# shapes_int = [in2 - in1 for in1, in2 in zip(view_ind, view_ind[1:])] -# if isinstance(data, sp.csr_matrix) : -# sp.csr_matrix.__init__(self, (data_data, data_indices, data_indptr), shape=data_shape) -# #sp.csr_matrix.__init__(self, data) -# elif isinstance(data, sp.csc_matrix): -# sp.csc_matrix.__init__(self, (data_data, data_indices, data_indptr), shape=data_shape) -# #sp.csc_matrix.__init__(self, data) -# else: -# raise TypeError("This sparse format is not supported") -# if self.shape[0] < 1 or self.shape[1] < 1: -# raise ValueError("input data shouldbe not empty") -# self.view_mode_ = view_mode -# self.views_ind = view_ind -# self.shapes_int = shapes_int -# self.n_views = n_views - class MultiModalArray(np.ndarray, MultiModalData): """ @@ -420,7 +295,7 @@ class MultiModalArray(np.ndarray, MultiModalData): """ def __new__(cls, data, view_ind=None): - """Constructor of Metriclearn_array""" + """Constructor of MultiModalArray_array""" shapes_int = [] index = 0 new_data = np.ndarray([]) @@ -430,24 +305,30 @@ class MultiModalArray(np.ndarray, MultiModalData): view_mode = 'slices' if isinstance(data, dict): n_views = len(data) + view_ind = [0] for key, dat_values in data.items(): new_data = cls._populate_new_data(index, dat_values, new_data) shapes_int.append(dat_values.shape[1]) + view_ind.append(dat_values.shape[1] + view_ind[index]) index += 1 thekeys = data.keys() - if isinstance(data, np.ndarray) and view_ind is None and data.ndim == 1: + + elif isinstance(data, np.ndarray) and view_ind is None and data.ndim == 1: + try: + dat0 = np.array(data[0]) + except Exception: + raise TypeError("input format is not supported") + + if dat0.ndim < 2: + data = data[np.newaxis, ...] + if data.shape[1] > 1: + view_ind = np.array([0, data.shape[1]//2, data.shape[1]]) + else: + view_ind = np.array([0, data.shape[1]]) + new_data = data + else: + new_data, shapes_int, view_ind = cls._for_data(cls, data) n_views = data.shape[0] - view_ind = np.empty(n_views+1) - view_ind[0] = 0 - for dat_values in data: - try: - dat_values = np.array(dat_values) - except: - raise TypeError("input format is not supported") - shapes_int.append(dat_values.shape[1]) - view_ind[index+1] = dat_values.shape[1] + view_ind[index] - new_data = cls._populate_new_data(index, dat_values, new_data) - index += 1 elif (isinstance(data, np.ndarray) ) and data.ndim > 1: try: data = np.asarray(data) @@ -468,12 +349,16 @@ class MultiModalArray(np.ndarray, MultiModalData): else: try: new_data = np.asarray(data) - if new_data.ndim == 1: - new_data = new_data.reshape(1, new_data.shape[0]) - view_ind = np.array([0, new_data.shape[1]]) + # if new_data.ndim == 1: + # new_data = new_data.reshape(1, new_data.shape[0]) + if view_ind is None: + view_ind = np.array([0, new_data.shape[1]]) except Exception as e: raise ValueError('Reshape your data') + if new_data.ndim < 2 or new_data.shape == (1, 1) or view_ind[-1] > new_data.shape[1]: + raise ValueError('Reshape your data') + # view_ind_self = view_ind # if new_data.shape[1] < 1: # msg = ("%d feature\(s\) \\(shape=\%s\) while a minimum of \\d* " @@ -482,7 +367,7 @@ class MultiModalArray(np.ndarray, MultiModalData): # raise ValueError(msg) view_ind, n_views, view_mode = cls._first_validate_views_ind(view_ind, new_data.shape[1]) - if view_ind.ndim == 1 and view_mode.startswith("slicing"): + if view_ind.ndim == 1 and view_mode.startswith("slices"): shapes_int = [in2 - in1 for in1, in2 in zip(view_ind, view_ind[1:])] # obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls) # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask) @@ -501,6 +386,25 @@ class MultiModalArray(np.ndarray, MultiModalData): obj.keys = thekeys return obj + @staticmethod + def _for_data(cls, data): + n_views = data.shape[0] + index = 0 + view_ind = np.empty(n_views + 1, dtype=np.int) + view_ind[0] = 0 + shapes_int = [] + new_data = np.ndarray([]) + for dat_values in data: + try: + dat_values = np.array(dat_values) + except Exception: + raise TypeError("input format is not supported") + new_data = cls._populate_new_data(index, dat_values, new_data) + view_ind[index + 1] = dat_values.shape[1] + view_ind[index] + shapes_int.append(dat_values.shape[1]) + index += 1 + return new_data, shapes_int, view_ind + @staticmethod def _populate_new_data(index, dat_values, new_data): if index == 0: diff --git a/multimodal/tests/__pycache__/__init__.cpython-36.pyc b/multimodal/tests/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 1e99420542d93339ad39b73b8ba0b9c48bc020ac..0000000000000000000000000000000000000000 Binary files a/multimodal/tests/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/multimodal/tests/data/__pycache__/__init__.cpython-36.pyc b/multimodal/tests/data/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index e5ff6f0d7034f50a62323bf1f47e69b0ec338479..0000000000000000000000000000000000000000 Binary files a/multimodal/tests/data/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/multimodal/tests/data/__pycache__/get_dataset_path.cpython-36.pyc b/multimodal/tests/data/__pycache__/get_dataset_path.cpython-36.pyc deleted file mode 100644 index 99a01a1c57ae74bcb18ca8377f9c799fc5fb125a..0000000000000000000000000000000000000000 Binary files a/multimodal/tests/data/__pycache__/get_dataset_path.cpython-36.pyc and /dev/null differ diff --git a/multimodal/tests/datasets/__pycache__/__init__.cpython-36.pyc b/multimodal/tests/datasets/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index a2c8d7aba3ba76053fed1475db7036e08f9fc6bd..0000000000000000000000000000000000000000 Binary files a/multimodal/tests/datasets/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/multimodal/tests/datasets/__pycache__/get_dataset_path.cpython-36.pyc b/multimodal/tests/datasets/__pycache__/get_dataset_path.cpython-36.pyc deleted file mode 100644 index 72e442b9eaa3fe3d85f23eac5c5fcf430c5cedf9..0000000000000000000000000000000000000000 Binary files a/multimodal/tests/datasets/__pycache__/get_dataset_path.cpython-36.pyc and /dev/null differ diff --git a/multimodal/tests/test.py b/multimodal/tests/test.py deleted file mode 100644 index 9a68d84f26be86e9fa5ac7f7791070d22b63c10f..0000000000000000000000000000000000000000 --- a/multimodal/tests/test.py +++ /dev/null @@ -1,224 +0,0 @@ - -from abc import ABCMeta -import numpy as np -import numpy.ma as ma -import scipy.sparse as sp - -from multimodal.boosting.mumbo import MumboClassifier - -class MultiModalData(metaclass=ABCMeta): - - @staticmethod - def _first_validate_views_ind(views_ind, n_features): - """Ensure proper format for views_ind and return number of views.""" - views_ind = np.array(views_ind) - if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1: - if np.any(views_ind[:-1] >= views_ind[1:]): - raise ValueError("Values in views_ind must be sorted.") - if views_ind[0] < 0 or views_ind[-1] > n_features: - raise ValueError("Values in views_ind are not in a correct " - + "range for the provided data.") - view_mode_ = "slices" - n_views = views_ind.shape[0]-1 - else: - if views_ind.ndim == 1: - if not views_ind.dtype == np.object: - raise ValueError("The format of views_ind is not " - + "supported.") - for ind, val in enumerate(views_ind): - views_ind[ind] = np.array(val) - if not np.issubdtype(views_ind[ind].dtype, np.integer): - raise ValueError("Values in views_ind must be " - + "integers.") - if views_ind[ind].min() < 0 \ - or views_ind[ind].max() >= n_features: - raise ValueError("Values in views_ind are not in a " - + "correct range for the provided " - + "data.") - elif views_ind.ndim == 2: - if not np.issubdtype(views_ind.dtype, np.integer): - raise ValueError("Values in views_ind must be integers.") - if views_ind.min() < 0 or views_ind.max() >= n_features: - raise ValueError("Values in views_ind are not in a " - + "correct range for the provided data.") - else: - raise ValueError("The format of views_ind is not supported.") - view_mode_ = "indices" - n_views = views_ind.shape[0] - return (views_ind, n_views, view_mode_) - - def _extract_view(self, ind_view): - """Extract the view for the given index ind_view from the dataset X.""" - if self.view_mode_ == "indices": - return self[:, self.views_ind[ind_view]] - else: - return self[:, self.views_ind[ind_view]:self.views_ind[ind_view+1]] - - def _validate_views_ind(self, views_ind, n_features): - """Ensure proper format for views_ind and return number of views.""" - views_ind = np.array(views_ind) - if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1: - if np.any(views_ind[:-1] >= views_ind[1:]): - raise ValueError("Values in views_ind must be sorted.") - if views_ind[0] < 0 or views_ind[-1] > n_features: - raise ValueError("Values in views_ind are not in a correct " - + "range for the provided data.") - self.view_mode_ = "slices" - n_views = views_ind.shape[0]-1 - else: - if views_ind.ndim == 1: - if not views_ind.dtype == np.object: - raise ValueError("The format of views_ind is not " - + "supported.") - for ind, val in enumerate(views_ind): - views_ind[ind] = np.array(val) - if not np.issubdtype(views_ind[ind].dtype, np.integer): - raise ValueError("Values in views_ind must be " - + "integers.") - if views_ind[ind].min() < 0 \ - or views_ind[ind].max() >= n_features: - raise ValueError("Values in views_ind are not in a " - + "correct range for the provided " - + "data.") - elif views_ind.ndim == 2: - if not np.issubdtype(views_ind.dtype, np.integer): - raise ValueError("Values in views_ind must be integers.") - if views_ind.min() < 0 or views_ind.max() >= n_features: - raise ValueError("Values in views_ind are not in a " - + "correct range for the provided data.") - else: - raise ValueError("The format of views_ind is not supported.") - self.view_mode_ = "indices" - n_views = views_ind.shape[0] - self.views_ind = views_ind - self.n_views = n_views - return (views_ind, n_views) - -class MultiModalSparseInfo(): - - def __init__(self, data, view_ind=None): - """Constructor of Metriclearn_array""" - shapes_int = [] - index = 0 - new_data = np.ndarray([]) - n_views = data.size - thekeys = None - # view_ind_self = None - view_mode = 'slices' - - if (sp.issparse(data)) and data.ndim > 1: - if view_ind is not None: - try: - view_ind = np.asarray(view_ind) - except : - raise TypeError("n_views should be list or nparray") - elif view_ind is None: - if data.shape[1] > 1: - view_ind = np.array([0, data.shape[1]//2, data.shape[1]]) - else: - view_ind = np.array([0, data.shape[1]]) - - new_data = data - # view_ind_self = view_ind - view_ind, n_views, view_mode = self._first_validate_views_ind(view_ind, - data.shape[1]) - if view_ind.ndim == 1 and view_mode.startswith("slicing"): - shapes_int = [in2 - in1 for in1, in2 in zip(view_ind, view_ind[1:])] - - if data.shape[0] < 1 or data.shape[1] < 1: - raise ValueError("input data shouldbe not empty") - self.view_mode_ = view_mode - self.views_ind = view_ind - self.shapes_int = shapes_int - self.n_views = n_views - - -class MultiModalSparseArray(sp.csr_matrix, sp.csc_matrix, MultiModalSparseInfo, MultiModalData): - """ - MultiModalArray inherit from numpy ndarray - - - Parameters - ---------- - - data : can be - - dictionary of multiview array with shape = (n_samples, n_features) for multi-view - for each view. - {0: array([[]], - 1: array([[]], - ...} - - numpy array like with shape = (n_samples, n_features) for multi-view - for each view. - [[[...]], - [[...]], - ...] - - {array like} with (n_samples, nviews * n_features) with 'views_ind' diferent to 'None' - for Multi-view input samples. - - - - - views_ind : array-like (default= None ) if None - [0, n_features//2, n_features]) is constructed (2 views) - Paramater specifying how to extract the data views from X: - - - views_ind is a 1-D array of sorted integers, the entries - indicate the limits of the slices used to extract the views, - where view ``n`` is given by - ``X[:, views_ind[n]:views_ind[n+1]]``. - - Attributes - ---------- - - view_ind : list of views' indice (may be None) - - n_views : int number of views - - shapes_int: list of int numbers of feature for each views - - keys : name of key, where data come from a dictionary - - - :Example: - - >>> from multimodal.datasets.base import load_dict - >>> from multimodal.tests.datasets.get_dataset_path import get_dataset_path - >>> from multimodal.datasets.data_sample import DataSample - >>> file = 'input_x_dic.pkl' - >>> data = load_dict(get_dataset_path(file)) - - """ - - def __init__(self, *arg, **kwargs ): - """Constructor of Metriclearn_array""" - if sp.issparse(arg[0]): - MultiModalSparseInfo.__init__(self, *arg) - if isinstance(arg[0], sp.csr_matrix) : - sp.csr_matrix.__init__(self, arg[0]) - elif isinstance(arg[0], sp.csc_matrix): - sp.csc_matrix.__init__(self, arg[0]) - else: - raise TypeError("This sparse format is not supported") - else: - if isinstance(self,sp.csr_matrix): - sp.csr_matrix.__init__(self, *arg, **kwargs) - elif isinstance(self, sp.csc_matrix): - sp.csc_matrix.__init__(self, *arg, **kwargs) - - - - -if __name__ == '__main__': - rng = np.random.RandomState(0) - X = rng.rand(40, 10) - X[X < .8] = 0 - X_csr = sp.csr_matrix(X) - y = (4 * rng.rand(40)).astype(np.int) - X_ = MultiModalSparseArray(X_csr) - print(X_.shape) - print(X_[:,0:1]) - - X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1]]) - y = [1, 1, 1, 2, 2, 2] - clf = MumboClassifier() - clf.fit(X, y) \ No newline at end of file diff --git a/multimodal/tests/test_data_sample.py b/multimodal/tests/test_data_sample.py index 04c6b49e39fc637eddd5e238c323a3802ccf586b..b21d4afd6e3bdde764ae3f1c495ead0d8a7ac59f 100644 --- a/multimodal/tests/test_data_sample.py +++ b/multimodal/tests/test_data_sample.py @@ -33,7 +33,7 @@ class UnitaryTest(unittest.TestCase): np.testing.assert_almost_equal(a.get_view(0), self.kernel_dict[0], 8) np.testing.assert_almost_equal(a.get_view(1), self.kernel_dict[1], 8) - def test_init_Metriclearn_array(self): + def test_init_Multimodal_array(self): a = MultiModalArray(self.kernel_dict) self.assertEqual(a.shape, (120, 240)) self.assertEqual(a.shapes_int, [120, 120]) @@ -48,3 +48,5 @@ class UnitaryTest(unittest.TestCase): np.testing.assert_equal(b.views_ind, np.array([0, 120, 240])) +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/multimodal/tests/test_mumbo.py b/multimodal/tests/test_mumbo.py index 978244fa63a653500330d0430903ceef21cf61dd..f22f5ba72ea6d747c9f80be486830198c3f2ddb4 100644 --- a/multimodal/tests/test_mumbo.py +++ b/multimodal/tests/test_mumbo.py @@ -705,8 +705,8 @@ class TestMuCumboClassifier(unittest.TestCase): def test_classifier(self): - X_zero_features = np.empty(0).reshape(3, 0) - y = np.array([1, 0, 1]) + # X_zero_features = np.empty(0).reshape(3, 0) + # y = np.array([1, 0, 1]) # e = MumboClassifier() # e.fit(X_zero_features, y) # print(e.predict(X_zero_features))