diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 96070201033c21397c009349deeedd08ab6a2e01..2b8e89e8f39c952aa07f380aa39b3f2c653a78a0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -40,6 +40,7 @@ pages: script: - export LC_ALL=$(locale -a | grep en_US) - export LANG=$(locale -a | grep en_US) + - python3 setup.py install - python3 setup.py build_sphinx - cp -r build/sphinx/html public artifacts: diff --git a/multimodal/boosting/boost.py b/multimodal/boosting/boost.py index f6d7c685f0c4fe6172e69ed858f6c458d2814269..bab7c806721c891bec828f7c25ff7864a734d366 100644 --- a/multimodal/boosting/boost.py +++ b/multimodal/boosting/boost.py @@ -44,12 +44,12 @@ import scipy.sparse as sp from abc import ABCMeta from sklearn.utils import check_array, check_X_y, check_random_state from sklearn.tree import DecisionTreeClassifier -from sklearn.tree.tree import BaseDecisionTree +from sklearn.tree import BaseDecisionTree from sklearn.tree._tree import DTYPE -from sklearn.ensemble.forest import BaseForest from sklearn.base import clone from sklearn.ensemble._base import _set_random_states from sklearn.ensemble import BaseEnsemble +from sklearn.ensemble._forest import BaseForest from multimodal.datasets.data_sample import DataSample from multimodal.datasets.data_sample import MultiModalData, MultiModalArray, MultiModalSparseArray @@ -83,13 +83,6 @@ class UBoosting(metaclass=ABCMeta): def _validate_X_predict(self, X): """Ensure that X is in the proper format.""" - if (self.base_estimator is None or - isinstance(self.base_estimator, - (BaseDecisionTree, BaseForest))): - check_array(X, accept_sparse='csr', dtype=DTYPE) - - else: - check_array(X, accept_sparse=['csr', 'csc']) if X.ndim < 2: X = X[np.newaxis, :] if X.shape[1] != self.n_features_: @@ -97,8 +90,15 @@ class UBoosting(metaclass=ABCMeta): "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, X.shape[1])) else: - mes = "Reshape your data" + mes = "Reshape your data as a 2D-array " raise ValueError(mes) + if (self.base_estimator is None or + isinstance(self.base_estimator, + (BaseDecisionTree, BaseForest))): + check_array(X, accept_sparse='csr', dtype=DTYPE) + + else: + check_array(X, accept_sparse=['csr', 'csc']) if X.ndim > 1: if X.shape[1] != self.n_features_: if X.shape[0] == self.n_features_ and X.shape[1] > 1: @@ -107,10 +107,6 @@ class UBoosting(metaclass=ABCMeta): raise ValueError("Number of features of the model must " "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, X.shape[1])) - - - # - # raise ValueError(mes) return X def _global_X_transform(self, X, views_ind=None): @@ -121,10 +117,10 @@ class UBoosting(metaclass=ABCMeta): X_ = MultiModalSparseArray(X, views_ind) else: X_ = MultiModalArray(X, views_ind) - if not isinstance(X_, MultiModalData): - try: - X_ = np.asarray(X) - X_ = MultiModalArray(X_) - except Exception as e: - raise TypeError('Reshape your data') + # if not isinstance(X_, MultiModalData): + # try: + # X_ = np.asarray(X) + # X_ = MultiModalArray(X_) + # except Exception as e: + # raise TypeError('Reshape your data') return X_ diff --git a/multimodal/boosting/combo.py b/multimodal/boosting/combo.py index 5b67ba8215c3d44b59e69b8d2f03f7caddd6d1b4..c5be4a1bfbda7f9dc43a30123fc3f8535f738ea7 100644 --- a/multimodal/boosting/combo.py +++ b/multimodal/boosting/combo.py @@ -48,11 +48,11 @@ estimator for classification implemented in the ``MuComboClassifier`` class. import numpy as np from sklearn.base import ClassifierMixin from sklearn.ensemble import BaseEnsemble -from sklearn.ensemble.forest import BaseForest +from sklearn.ensemble._forest import BaseForest from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier from sklearn.tree._tree import DTYPE -from sklearn.tree.tree import BaseDecisionTree +from sklearn.tree import BaseDecisionTree from sklearn.utils import check_array, check_X_y, check_random_state from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import check_is_fitted, has_fit_parameter @@ -127,13 +127,13 @@ class MuComboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): >>> views_ind = [0, 2, 4] # view 0: sepal data, view 1: petal data >>> clf = MuComboClassifier(random_state=0) >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE - MuComboClassifier(base_estimator=None, n_estimators=50, random_state=0) + MuComboClassifier(random_state=0) >>> print(clf.predict([[ 5., 3., 1., 1.]])) [0] >>> views_ind = [[0, 2], [1, 3]] # view 0: length data, view 1: width data >>> clf = MuComboClassifier(random_state=0) >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE - MuComboClassifier(base_estimator=None, n_estimators=50, random_state=0) + MuComboClassifier(random_state=0) >>> print(clf.predict([[ 5., 3., 1., 1.]])) [0] @@ -141,13 +141,8 @@ class MuComboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): >>> base_estimator = DecisionTreeClassifier(max_depth=2) >>> clf = MuComboClassifier(base_estimator=base_estimator, random_state=1) >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE - MuComboClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2, - max_features=None, max_leaf_nodes=None, - min_impurity_decrease=0.0, min_impurity_split=None, - min_samples_leaf=1, min_samples_split=2, - min_weight_fraction_leaf=0.0, presort=False, random_state=None, - splitter='best'), - n_estimators=50, random_state=1) + MuComboClassifier(base_estimator=DecisionTreeClassifier(max_depth=2), + random_state=1) >>> print(clf.predict([[ 5., 3., 1., 1.]])) [0] @@ -222,8 +217,9 @@ class MuComboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): dist = np.empty(cost.shape[:2], dtype=cost.dtype, order="C") # NOTE: In Sokol's PhD thesis, the formula for dist is mistakenly given # with a minus sign in section 2.2.2 page 31 - dist[:, :] = cost[:, np.arange(n_samples), y] \ - / np.sum(cost[:, np.arange(n_samples), y], axis=1)[:, np.newaxis] + sum_cost = np.sum(cost[:, np.arange(n_samples), y], axis=1)[:, np.newaxis] + sum_cost[sum_cost==0] = 1 + dist[:, :] = cost[:, np.arange(n_samples), y] / sum_cost return dist def _indicatrice(self, predicted_classes, y_i): @@ -444,6 +440,8 @@ class MuComboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): views_ind_, n_views = self.X_._validate_views_ind(self.X_.views_ind, self.X_.shape[1]) check_X_y(self.X_, y) + if not isinstance(y, np.ndarray): + y = np.asarray(y) check_classification_targets(y) self._validate_estimator() @@ -452,6 +450,7 @@ class MuComboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): self.n_classes_ = len(self.classes_) self.n_views_ = n_views self.n_features_ = self.X_.shape[1] + self.n_features_in_ = self.n_features_ if self.n_classes_ == 1: # This case would lead to division by 0 when computing the cost # matrix so it needs special handling (but it is an obvious case as diff --git a/multimodal/boosting/mumbo.py b/multimodal/boosting/mumbo.py index 690061b420691d17ceca22be85bcef2812d5c248..0ced162f9a3446d04d3b17cdc1d08d4e0c13d8dc 100644 --- a/multimodal/boosting/mumbo.py +++ b/multimodal/boosting/mumbo.py @@ -49,10 +49,10 @@ import numpy as np from sklearn.base import ClassifierMixin, clone from sklearn.ensemble import BaseEnsemble from sklearn.ensemble._base import _set_random_states -from sklearn.ensemble.forest import BaseForest +from sklearn.ensemble._forest import BaseForest from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier -from sklearn.tree.tree import BaseDecisionTree +from sklearn.tree import BaseDecisionTree from sklearn.tree._tree import DTYPE from sklearn.utils import check_array, check_X_y, check_random_state from sklearn.utils.multiclass import check_classification_targets @@ -128,15 +128,13 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): >>> views_ind = [0, 2, 4] # view 0: sepal data, view 1: petal data >>> clf = MumboClassifier(random_state=0) >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE - MumboClassifier(base_estimator=None, best_view_mode='edge', - n_estimators=50, random_state=0) + MumboClassifier(random_state=0) >>> print(clf.predict([[ 5., 3., 1., 1.]])) [1] >>> views_ind = [[0, 2], [1, 3]] # view 0: length data, view 1: width data >>> clf = MumboClassifier(random_state=0) >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE - MumboClassifier(base_estimator=None, best_view_mode='edge', - n_estimators=50, random_state=0) + MumboClassifier(random_state=0) >>> print(clf.predict([[ 5., 3., 1., 1.]])) [1] @@ -144,13 +142,8 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): >>> base_estimator = DecisionTreeClassifier(max_depth=2) >>> clf = MumboClassifier(base_estimator=base_estimator, random_state=0) >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE - MumboClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, - criterion='gini', max_depth=2, max_features=None, - max_leaf_nodes=None, min_impurity_decrease=0.0, - min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, - min_weight_fraction_leaf=0.0, presort=False, random_state=None, - splitter='best'), - best_view_mode='edge', n_estimators=50, random_state=0) + MumboClassifier(base_estimator=DecisionTreeClassifier(max_depth=2), + random_state=0) >>> print(clf.predict([[ 5., 3., 1., 1.]])) [1] @@ -192,7 +185,6 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): """Check the estimator and set the base_estimator_ attribute.""" super(MumboClassifier, self)._validate_estimator( default=DecisionTreeClassifier(max_depth=1)) - if type(self.base_estimator_) is list: for estimator in self.base_estimator_: if not has_fit_parameter(estimator, "sample_weight"): @@ -411,12 +403,15 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): views_ind_, n_views = self.X_._validate_views_ind(self.X_.views_ind, self.X_.shape[1]) check_X_y(self.X_, y, accept_sparse=accept_sparse, dtype=dtype) + if not isinstance(y, np.ndarray): + y = np.asarray(y) check_classification_targets(y) self._validate_estimator() self.classes_, y = np.unique(y, return_inverse=True) self.n_classes_ = len(self.classes_) self.n_features_ = self.X_.shape[1] + self.n_features_in_ = self.n_features_ if self.n_classes_ == 1: # This case would lead to division by 0 when computing the cost # matrix so it needs special handling (but it is an obvious case as diff --git a/multimodal/datasets/data_sample.py b/multimodal/datasets/data_sample.py index ac20e3fdd446c52303acf6c86ff2cc0c881dd453..a4b5f9c310c9cb0ce314e682074f17b5da62b3f1 100644 --- a/multimodal/datasets/data_sample.py +++ b/multimodal/datasets/data_sample.py @@ -365,14 +365,14 @@ class MultiModalArray(np.ndarray, MultiModalData): views_ind.append(dat_values.shape[1] + views_ind[index]) index += 1 thekeys = data.keys() - if new_data.ndim < 2 : - raise ValueError('Reshape your data') - if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): + # if new_data.ndim < 2 : + # raise ValueError('Reshape your data') + if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): raise ValueError('Reshape your data') elif isinstance(data, np.ndarray) and views_ind is None and data.ndim == 1: try: dat0 = np.array(data[0]) - except Exception: + except Exception: # pragma: no cover raise TypeError("input format is not supported") if dat0.ndim < 2: @@ -388,13 +388,13 @@ class MultiModalArray(np.ndarray, MultiModalData): elif (isinstance(data, np.ndarray) ) and data.ndim > 1: try: data = np.asarray(data) - except: + except: # pragma: no cover raise TypeError("input format is not supported") if views_ind is not None: try: views_ind = np.asarray(views_ind) - except : + except : # pragma: no cover raise TypeError("n_views should be list or nparray") elif views_ind is None: if data.shape[1] > 1: @@ -413,7 +413,7 @@ class MultiModalArray(np.ndarray, MultiModalData): elif not isinstance(views_ind, np.ndarray): try: views_ind = np.asarray(views_ind) - except Exception: + except Exception: # pragma: no cover raise TypeError("format of views_ind is not list or nd array") except Exception as e: raise ValueError('Reshape your data') @@ -421,8 +421,8 @@ class MultiModalArray(np.ndarray, MultiModalData): raise ValueError('Reshape your data') if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): raise ValueError('Reshape your data') - if views_ind.ndim < 2 and new_data.ndim < 2 and views_ind[-1] > new_data.shape[1]: - raise ValueError('Reshape your data') + # if views_ind.ndim < 2 and new_data.ndim < 2 and views_ind[-1] > new_data.shape[1]: + # raise ValueError('Reshape your data') # views_ind_self = views_ind # if new_data.shape[1] < 1: @@ -437,7 +437,7 @@ class MultiModalArray(np.ndarray, MultiModalData): # obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls) # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask) - if hasattr(new_data, "mask"): + if hasattr(new_data, "mask"): # pragma: no cover obj = ma.masked_array(new_data.data, new_data.mask).view(cls) elif hasattr(new_data, "data") and \ hasattr(new_data, "shape") and len(new_data.shape) > 0: @@ -462,7 +462,7 @@ class MultiModalArray(np.ndarray, MultiModalData): for dat_values in data: try: dat_values = np.array(dat_values) - except Exception: + except Exception: # pragma: no cover raise TypeError("input format is not supported") new_data = cls._populate_new_data(index, dat_values, new_data) views_ind[index + 1] = dat_values.shape[1] + views_ind[index] @@ -471,7 +471,8 @@ class MultiModalArray(np.ndarray, MultiModalData): return new_data, shapes_int, views_ind @staticmethod - def _populate_new_data(index, dat_values, new_data): + def _populate_new_data(index, dat_values, new_data): # pragma: no cover + """Work in progress : including missing data""" if index == 0: if isinstance(dat_values, ma.MaskedArray) or \ isinstance(dat_values, np.ndarray) or sp.issparse(dat_values): @@ -525,30 +526,32 @@ class MultiModalArray(np.ndarray, MultiModalData): def set_view(self, view, data): start = int(np.sum(np.asarray(self.shapes_int[0: view]))) stop = int(start + self.shapes_int[view]) - if stop-start == data.shape[0] and data.shape[1]== self.data.shape[1]: + if stop-start == data.shape[1] and data.shape[0] == self.data.shape[0]: self[:, start:stop] = data else: raise ValueError( - "shape of data does not match (%d, %d)" %stop-start %self.data.shape[1]) + "shape of data does not match (%d, %d)" %(stop-start ,self.data.shape[0])) - def get_raw(self, view, raw): - start = np.sum(np.asarray(self.shapes_int[0: view])) - stop = np.sum(np.asarray(self.shapes_int[0: view+1])) - return self.data[start:stop, raw] - - def add_view(self, v, data): - if len(self.shape) > 0: - if data.shape[0] == self.data.shape[0]: - indice = self.shapes_int[v] - np.insert(self.data, data, indice+1, axis=0) - self.shapes_int.append(data.shape[1]) - self.n_views +=1 - else: - raise ValueError("New view can't initialazed") - # self.shapes_int= [data.shape[1]] - # self.data.reshape(data.shape[0],) - # np.insert(self.data, data, 0) - # self.n_views = 1 + def get_row(self, view, row): + start = int(np.sum(np.asarray(self.shapes_int[0: view]))) + stop = int(np.sum(np.asarray(self.shapes_int[0: view+1]))) + return self[row, start:stop] + + # def add_view(self, data): + # if len(self.shape) > 0: + # if data.shape[0] == self.data.shape[0]: + # print(self.data.shape, data.shape) + # new_data = np.hstack((self.data, data)) + # self.shapes_int.append(data.shape[1]) + # self.n_views +=1 + # print(new_data.shape) + # + # else: + # raise ValueError("New view can't initialazed") + # # self.shapes_int= [data.shape[1]] + # # self.data.reshape(data.shape[0],) + # # np.insert(self.data, data, 0) + # # self.n_views = 1 def _todict(self): dico = {} diff --git a/multimodal/kernels/mkernel.py b/multimodal/kernels/mkernel.py index 7334e7916866ea13c3b6d88d7321899d83103682..2b8d6d5f4ea94911f1122d0fb1e0c8ae963c8681 100644 --- a/multimodal/kernels/mkernel.py +++ b/multimodal/kernels/mkernel.py @@ -124,22 +124,22 @@ class MKernel(metaclass=ABCMeta): X = X_ if isinstance(X, MultiModalArray): X_ = X - if not isinstance(X_, MultiModalArray): - try: - X_ = np.asarray(X) - X_ = MultiModalArray(X_, views_ind) - except Exception as e: - pass + # if not isinstance(X_, MultiModalArray): + # try: + # X_ = np.asarray(X) + # X_ = MultiModalArray(X_, views_ind) + # except Exception as e: + # pass # raise TypeError('Reshape your data') if isinstance(X_, MultiModalArray): for v in range(X.n_views): if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X, Y, v) kernel_dict[v] = self._get_kernel(X_.get_view(v), y) - if not isinstance(X_, MultiModalArray): - if sp.sparse.issparse(X): - raise TypeError("Nonsensical Error: no sparse data are allowed as input") - raise TypeError('Reshape your data') + # if not isinstance(X_, MultiModalArray): + # if sp.sparse.issparse(X): + # raise TypeError("Nonsensical Error: no sparse data are allowed as input") + # raise TypeError('Reshape your data') K_ = MultiModalArray(kernel_dict) return X_, K_ diff --git a/multimodal/kernels/mvml.py b/multimodal/kernels/mvml.py index e261661636d5851369b896be336f0b9e3be975b7..8444ff3636f3cd0bc10b495f30cfabfe93da6646 100644 --- a/multimodal/kernels/mvml.py +++ b/multimodal/kernels/mvml.py @@ -152,8 +152,7 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): >>> clf.get_params() {'eta': 1, 'kernel': 'linear', 'kernel_params': None, 'learn_A': 1, 'learn_w': 0, 'lmbda': 0.1, 'n_loops': 6, 'nystrom_param': 1.0, 'precision': 0.0001} >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE - MVML(eta=1, kernel='linear', kernel_params=None, learn_A=1, learn_w=0, - lmbda=0.1, n_loops=6, nystrom_param=1.0, precision=0.0001) + MVML() >>> print(clf.predict([[ 5., 3., 1., 1.]])) 0 @@ -230,9 +229,10 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): # if type_of_target(y) not in "binary": # raise ValueError("target should be binary") - check_classification_targets(y) + if type_of_target(y) in "binary": + check_classification_targets(y) self.classes_, y = np.unique(y, return_inverse=True) y[y==0] = -1.0 self.n_classes = len(self.classes_) @@ -342,7 +342,7 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): else: # A_inv = self._inverse_precond_LU(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A") # self._inverse_precond_jacobi(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A") A_inv = self._inv_best_precond(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A") - except spli.LinAlgError: + except spli.LinAlgError: # pragma: no cover self.warning_message["LinAlgError"] = self.warning_message.get("LinAlgError", 0) + 1 try: A_inv = spli.pinv(A + 1e-07 * np.eye(views * self.n_approx)) @@ -352,7 +352,7 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): except ValueError: self.warning_message["ValueError"] = self.warning_message.get("ValueError", 0) + 1 return A_prev, g_prev - except ValueError: + except ValueError: # pragma: no cover self.warning_message["ValueError"] = self.warning_message.get("ValueError", 0) + 1 return A_prev, g_prev, w_prev # print("A_inv ",np.sum(A_inv)) @@ -372,7 +372,7 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): else: # g = np.dot(self._inverse_precond_LU(A_inv, pos="precond_A_1"), g) g = np.dot(self._inv_best_precond(A_inv, pos="precond_A_1"), g) - except spli.LinAlgError: + except spli.LinAlgError: # pragma: no cover self.warning_message["LinAlgError"] = self.warning_message.get("LinAlgError", 0) + 1 g = spli.solve(A_inv, g) @@ -428,7 +428,7 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): A_inv = spli.pinv(A) return A_inv - def _inverse_precond_jacobi(self, A, pos="precond_A"): + def _inverse_precond_jacobi(self, A, pos="precond_A"): # pragma: no cover J_1 = np.diag(1.0/np.diag(A)) # J_1 = np.linalg.inv(J) P = np.dot(J_1, A) @@ -438,10 +438,9 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): self.warning_message[pos] = self.warning_message.get(pos, 0) + 1 else: A_inv = self._inverse_precond_LU(A, pos=pos) - return A_inv - def _inverse_precond_LU(self, A, pos="precond_A"): + def _inverse_precond_LU(self, A, pos="precond_A"): # pragma: no cover P, L, U = spli.lu(A) M = spli.inv(np.dot(L, U)) P = np.dot(M, A) @@ -606,17 +605,17 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): try: # minA_inv = np.min(np.absolute(A_prev)) , rcond=self.r_cond*minA_inv A_prev_inv = spli.pinv(A_prev) - except spli.LinAlgError: + except spli.LinAlgError: # pragma: no cover try: A_prev_inv = spli.pinv(A_prev + 1e-6 * np.eye(views * m)) except spli.LinAlgError: return A_prev except ValueError: return A_prev - except ValueError: + except ValueError: # pragma: no cover return A_prev - if np.any(np.isnan(A_prev_inv)): + if np.any(np.isnan(A_prev_inv)): # pragma: no cover # just in case the inverse didn't return a proper solution (happened once or twice) return A_prev @@ -625,9 +624,9 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin): # if there is one small negative eigenvalue this gets rid of it try: val, vec = spli.eigh(A_tmp) - except spli.LinAlgError: + except spli.LinAlgError: # pragma: no cover return A_prev - except ValueError: + except ValueError: # pragma: no cover return A_prev val[val < 0] = 0 diff --git a/multimodal/tests/test_combo.py b/multimodal/tests/test_combo.py index 52c59f09a8998981f40f1e8c3fb30830b926c504..f8d085dfa2138313b8060fc51eb81456dc87876f 100644 --- a/multimodal/tests/test_combo.py +++ b/multimodal/tests/test_combo.py @@ -54,7 +54,7 @@ from scipy.sparse import csc_matrix, csr_matrix, coo_matrix, dok_matrix from scipy.sparse import lil_matrix from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC -from sklearn.ensemble.forest import RandomForestClassifier +from sklearn.ensemble import RandomForestClassifier from sklearn.cluster import KMeans from sklearn.linear_model import Lasso from sklearn.tree import DecisionTreeClassifier @@ -64,6 +64,12 @@ from multimodal.boosting.combo import MuComboClassifier from multimodal.tests.data.get_dataset_path import get_dataset_path from multimodal.datasets.data_sample import MultiModalArray +class NoSampleWeightLasso(Lasso): + + def fit(self, X, y, check_input=True): + return Lasso.fit(self, X, y, sample_weight=None, check_input=True) + + class TestMuComboClassifier(unittest.TestCase): @classmethod @@ -368,6 +374,8 @@ class TestMuComboClassifier(unittest.TestCase): [ 4.49110023, 1., -2. ], [ 8., 2.49110023, 1. ]]]) np.testing.assert_almost_equal(label_score, expected_label_score,6) + + # # label_score = np.array( # [[[-1, -2, 4], [-8, 1, 4], [2, 8, -4], [2, -1, 4]], @@ -836,7 +844,7 @@ class TestMuComboClassifier(unittest.TestCase): # def test_classifier(self): - return check_estimator(MuComboClassifier) + return check_estimator(MuComboClassifier()) # # # def test_iris(): @@ -957,7 +965,8 @@ class TestMuComboClassifier(unittest.TestCase): # # Check that using a base estimator that doesn't support sample_weight # # raises an error. - clf = MuComboClassifier(Lasso()) + clf = MuComboClassifier(NoSampleWeightLasso()) + self.assertRaises(ValueError, clf.fit, self.iris.data, self.iris.target, self.iris.views_ind) # assert_raises(ValueError, clf.fit, iris.data, iris.target, iris.views_ind) # diff --git a/multimodal/tests/test_data_sample.py b/multimodal/tests/test_data_sample.py index 469ba500685e879a6c2816fe7455466bb5d87ca1..a25977e9c6b1bd9bc38521dd5e739da3ad7a04b1 100644 --- a/multimodal/tests/test_data_sample.py +++ b/multimodal/tests/test_data_sample.py @@ -47,6 +47,7 @@ from multimodal.tests.datasets.get_dataset_path import get_dataset_path from multimodal.datasets.data_sample import MultiModalArray import pickle + class UnitaryTest(unittest.TestCase): @classmethod @@ -85,6 +86,44 @@ class UnitaryTest(unittest.TestCase): array_x = a.data b = MultiModalArray(a) np.testing.assert_equal(b.views_ind, np.array([0, 120, 240])) + view_1 = np.random.randint(1,10,10) + view_2 = np.random.randint(1,10,11) + data = {0 : view_1, + 1 : view_2,} + c = MultiModalArray(data) + np.testing.assert_array_equal(np.asarray(c[0,:]), np.concatenate((view_1, view_2))) + view_1 = np.random.randint(1, 10, 1) + data = {0: view_1, } + self.assertRaises(ValueError, MultiModalArray, data) + view_1 = np.array([0,]) + data = view_1 + d = MultiModalArray(data) + np.testing.assert_array_equal(d.views_ind, np.array([0,1])) + view_1 = [[0]] + data = view_1 + self.assertRaises(ValueError, MultiModalArray, data) + view_1 = [[0,1,2 ],[0,1,2 ]] + data = view_1 + d = MultiModalArray(data, [0,1]) + np.testing.assert_array_equal(d.views_ind, np.array([0, 1])) + view_1 = [[0]] + data = view_1 + self.assertRaises(ValueError, MultiModalArray, data) + + def test_view_functions(self): + view_1 = np.random.randint(1, 10, (5, 10)) + view_2 = np.random.randint(1, 10, (5, 10)) + view_3 = np.random.randint(1, 10, (5, 10)) + data = {0: view_1, + 1: view_2, } + c = MultiModalArray(data) + c.set_view(1, view_3) + np.testing.assert_array_equal(c.get_view(1), view_3) + view_3 = np.random.randint(1, 10, (12, 10)) + c = MultiModalArray(data) + self.assertRaises(ValueError, c.set_view, 1, view_3) + np.testing.assert_array_equal(c.get_row(0, 2), view_1[2, :]) + if __name__ == '__main__': diff --git a/multimodal/tests/test_mumbo.py b/multimodal/tests/test_mumbo.py index e0ec2cd1aa7a0df3952ad83b71769e3c2d8f8084..af9c5c69f222c26ad892c1cbf2e4bfb68104574b 100644 --- a/multimodal/tests/test_mumbo.py +++ b/multimodal/tests/test_mumbo.py @@ -57,6 +57,7 @@ from sklearn.tree import DecisionTreeClassifier from sklearn import datasets from multimodal.boosting.mumbo import MumboClassifier +from multimodal.tests.test_combo import NoSampleWeightLasso class TestMumboClassifier(unittest.TestCase): @@ -730,7 +731,7 @@ class TestMumboClassifier(unittest.TestCase): # e = MumboClassifier() # e.fit(X_zero_features, y) # print(e.predict(X_zero_features)) - return check_estimator(MumboClassifier) + return check_estimator(MumboClassifier()) def test_iris(self): # Check consistency on dataset iris. @@ -840,7 +841,7 @@ class TestMumboClassifier(unittest.TestCase): # Check that using a base estimator that doesn't support sample_weight # raises an error. - clf = MumboClassifier(Lasso()) + clf = MumboClassifier(NoSampleWeightLasso()) self.assertRaises(ValueError, clf.fit, self.iris.data, self.iris.target, self.iris.views_ind) @@ -908,6 +909,18 @@ class TestMumboClassifier(unittest.TestCase): else: self.assertTrue(all([issubclass(type_, csr_matrix) for type_ in types])) + def test_validate_X_predict(self): + clf = MumboClassifier() + X = np.random.randint(1, 10, (2, 10)) + y = [1, 0] + clf.fit(X, y) + X_pred = np.random.randint(1, 10, 10) + self.assertRaises(ValueError, clf._validate_X_predict, X_pred) + X_pred = np.random.randint(1,10,9) + self.assertRaises(ValueError, clf._validate_X_predict, X_pred) + X_pred = np.random.randint(1, 10, (2, 9)) + self.assertRaises(ValueError, clf._validate_X_predict, X_pred) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/multimodal/tests/test_mvml.py b/multimodal/tests/test_mvml.py index 9f33c3b5959835ed1e441d602e8ecc4202b02c6d..83c6ea3125032a0574398185fbe3b220da427453 100644 --- a/multimodal/tests/test_mvml.py +++ b/multimodal/tests/test_mvml.py @@ -43,6 +43,7 @@ import pickle import unittest import numpy as np +import scipy as sp from sklearn.exceptions import NotFittedError from multimodal.datasets.data_sample import MultiModalArray @@ -95,6 +96,22 @@ class MVMLTest(unittest.TestCase): w_expected = np.array([[0.5],[0.5]]) np.testing.assert_almost_equal(mvml.w, w_expected, 8) + def testFitMVMLRegression(self): + ####################################################### + # task with dict and not precomputed + ####################################################### + y = self.y + y += np.random.uniform(0,1, size=y.shape) + mvml = MVML(lmbda=0.1, eta=1, + kernel=['rbf'], kernel_params=[{'gamma':50}], + nystrom_param=0.2) + views_ind = [120, 240] + mvml.fit(self.kernel_dict, y=y, views_ind=None) + self.assertEqual(mvml.A.shape, (48, 48)) + self.assertEqual(mvml.g.shape,(48, 1)) + w_expected = np.array([[0.5],[0.5]]) + np.testing.assert_almost_equal(mvml.w, w_expected, 8) + def testFitMVMLPrecision(self): ####################################################### # task with dict and not precomputed @@ -255,6 +272,21 @@ class MVMLTest(unittest.TestCase): pass # return check_estimator(MVML) + def test_check_kernel(self): + clf = MVML() + clf.kernel = "an_unknown_kernel" + self.assertRaises(ValueError, clf._check_kernel) + + def testFitMVMLSparesArray(self): + ####################################################### + # task with nparray 2d + ####################################################### + x_metricl = MultiModalArray(self.kernel_dict) + x_array = np.asarray(x_metricl) + x_array_sparse = sp.sparse.csr_matrix(x_array) + mvml3 = MVML(lmbda=0.1, eta=1, nystrom_param=1.0) + self.assertRaises(TypeError, mvml3.fit, x_array_sparse, self.y, [0, 120, 240]) + if __name__ == "__main__": # import sys;sys.argv = ['', 'Test.testName'] unittest.main() diff --git a/setup.cfg b/setup.cfg index 2b8e7f45560df1bf68c6b693c57d29a50f6e0434..f3b90c5a82a8077eedee2012c96b090952131fb2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,3 +14,6 @@ branch = True source = multimodal include = */multimodal/* omit = */tests/* + +[coverage:report] +exclude_lines = pragma: no cover \ No newline at end of file diff --git a/setup.py b/setup.py index fa4dd7e49a88f535d56e3a7241f63a2bf8a25512..0bb17e2c7a939f99098001af1fe6ce404bcdcf90 100644 --- a/setup.py +++ b/setup.py @@ -176,8 +176,8 @@ def setup_package(): keywords = ('machine learning, supervised learning, classification, ' 'ensemble methods, boosting, kernel') packages = find_packages(exclude=['*.tests']) - install_requires = ['scikit-learn>=0.19', 'numpy', 'scipy', 'cvxopt' ] - python_requires = '>=3.5' + install_requires = ['scikit-learn>=0.24', 'numpy', 'scipy', 'cvxopt' ] + python_requires = '>=3.6' extras_require = { 'dev': ['pytest', 'pytest-cov'], 'doc': ['sphinx', 'numpydoc', 'sphinx_gallery', 'matplotlib']}