diff --git a/examples/cumbo/plot_cumbo_2_views_2_classes.py b/examples/cumbo/plot_cumbo_2_views_2_classes.py index c669f6919c71f964891940bdfa3c1276e7342992..77d0588aafa3392199880800c9ad33cba959f094 100644 --- a/examples/cumbo/plot_cumbo_2_views_2_classes.py +++ b/examples/cumbo/plot_cumbo_2_views_2_classes.py @@ -97,10 +97,10 @@ for ind_view in range(2): label='Class {} ({})'.format(ind_class, label)) ax.legend() -print('\nThe second figure displays the classification results for the ' - 'sub-classifiers\non the learning sample data.\n') - -styles = ('.b', '.r') +# print('\nThe second figure displays the classification results for the ' +# 'sub-classifiers\non the learning sample data.\n') +# +# styles = ('.b', '.r') # fig = plt.figure(figsize=(12., 7.)) # fig.suptitle('Classification results on the learning data for the ' # 'sub-classifiers', size=16) diff --git a/examples/mvml/plot_mvml_.py b/examples/mvml/plot_mvml_.py index 3120cb00eaddb579972db21be7065746dbff472b..8989230d812287c5c4fefde4dcc9aecbfd2d8a48 100644 --- a/examples/mvml/plot_mvml_.py +++ b/examples/mvml/plot_mvml_.py @@ -157,7 +157,7 @@ plt.subplot(342) plt.scatter(X1[n_tr:n_tot, 0], X1[n_tr:n_tot, 1], c=Y[n_tr:n_tot]) plt.title("orig. view 2") # -pred1[np.where(pred1[:, 0] != Y[n_tr:n_tot])] = 0 +pred1[np.where(pred1[:] != Y[n_tr:n_tot])] = 0 pred1 = pred1.reshape((pred1.shape[0])) plt.subplot(343) plt.scatter(X0[n_tr:n_tot, 0], X0[n_tr:n_tot, 1], c=pred1) @@ -166,7 +166,7 @@ plt.subplot(344) plt.scatter(X1[n_tr:n_tot, 0], X1[n_tr:n_tot, 1], c=pred1) plt.title("MVML view 2") # -pred2[np.where(pred2[:, 0] != Y[n_tr:n_tot])] = 0 +pred2[np.where(pred2[:] != Y[n_tr:n_tot])] = 0 pred2 = pred2.reshape((pred2.shape[0])) plt.subplot(345) plt.scatter(X0[n_tr:n_tot, 0], X0[n_tr:n_tot, 1], c=pred2) @@ -175,7 +175,7 @@ plt.subplot(346) plt.scatter(X1[n_tr:n_tot, 0], X1[n_tr:n_tot, 1], c=pred2) plt.title("MVMLsparse view 2") # -pred3[np.where(pred3[:, 0] != Y[n_tr:n_tot])] = 0 +pred3[np.where(pred3[:] != Y[n_tr:n_tot])] = 0 pred3 = pred3.reshape((pred3.shape[0])) # plt.subplot(347) @@ -185,7 +185,7 @@ plt.subplot(348) plt.scatter(X1[n_tr:n_tot, 0], X1[n_tr:n_tot, 1], c=pred3) plt.title("MVML_Cov view 2") # -pred4[np.where(pred4[:, 0] != Y[n_tr:n_tot])] = 0 +pred4[np.where(pred4[:] != Y[n_tr:n_tot])] = 0 pred4 = pred4.reshape((pred4.shape[0])) plt.subplot(349) plt.scatter(X0[n_tr:n_tot, 0], X0[n_tr:n_tot, 1], c=pred4) @@ -194,7 +194,7 @@ plt.subplot(3,4,10) plt.scatter(X1[n_tr:n_tot, 0], X1[n_tr:n_tot, 1], c=pred4) plt.title("MVML_I view 2") # -pred5[np.where(pred5[:, 0] != Y[n_tr:n_tot])] = 0 +pred5[np.where(pred5[:] != Y[n_tr:n_tot])] = 0 pred5 = pred5.reshape((pred5.shape[0])) plt.subplot(3,4,11) plt.scatter(X0[n_tr:n_tot, 0], X0[n_tr:n_tot, 1], c=pred5) diff --git a/multimodal/boosting/boost.py b/multimodal/boosting/boost.py index 706350318258c25d35a6afb5aafcd57811663140..e6536e27acf84fef0f11e05678eee74388bc9b00 100644 --- a/multimodal/boosting/boost.py +++ b/multimodal/boosting/boost.py @@ -50,12 +50,12 @@ class UBoosting(metaclass=ABCMeta): def _global_X_transform(self, X, views_ind=None): X_ = None - if isinstance(X, sp.spmatrix): + if isinstance(X, MultiModalData): + X_ = X + elif isinstance(X, sp.spmatrix): X_ = MultiModalSparseArray(X, views_ind) else: X_ = MultiModalArray(X, views_ind) - if isinstance(X, MultiModalData): - X_ = X if not isinstance(X_, MultiModalData): try: X_ = np.asarray(X) diff --git a/multimodal/boosting/cumbo.py b/multimodal/boosting/cumbo.py index 0d8d42c5cade316febd1f6b3b3feed09e35cb7fa..b498064cc18f703c9afc7d50811c947f87585651 100644 --- a/multimodal/boosting/cumbo.py +++ b/multimodal/boosting/cumbo.py @@ -99,15 +99,15 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): >>> from sklearn.tree import DecisionTreeClassifier >>> base_estimator = DecisionTreeClassifier(max_depth=2) - >>> clf = MuCumboClassifier(base_estimator=base_estimator, random_state=0) + >>> clf = MuCumboClassifier(base_estimator=base_estimator, random_state=1) >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE MuCumboClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2, - max_features=None, max_leaf_nodes=None, - min_impurity_decrease=0.0, min_impurity_split=None, - min_samples_leaf=1, min_samples_split=2, - min_weight_fraction_leaf=0.0, presort=False, random_state=None, - splitter='best'), - n_estimators=50, random_state=0) + max_features=None, max_leaf_nodes=None, + min_impurity_decrease=0.0, min_impurity_split=None, + min_samples_leaf=1, min_samples_split=2, + min_weight_fraction_leaf=0.0, presort=False, random_state=None, + splitter='best'), + n_estimators=50, random_state=1) >>> print(clf.predict([[ 5., 3., 1., 1.]])) [0] diff --git a/multimodal/datasets/data_sample.py b/multimodal/datasets/data_sample.py index eff5b3dfbe6553e5053cc0ea985752683297eb5f..29fd0e60abdee29e2070bd80c71c1a386333e0e0 100644 --- a/multimodal/datasets/data_sample.py +++ b/multimodal/datasets/data_sample.py @@ -128,39 +128,39 @@ class MultiModalData(metaclass=ABCMeta): class MultiModalSparseInfo(): - def __init__(self, data, view_ind=None): + def __init__(self, data, views_ind=None): """Constructor of Metriclearn_array""" shapes_int = [] index = 0 new_data = np.ndarray([]) n_views = data.size thekeys = None - # view_ind_self = None + # views_ind_self = None view_mode = 'slices' if (sp.issparse(data)) and data.ndim > 1: - if view_ind is not None: + if views_ind is not None: try: - view_ind = np.asarray(view_ind) + views_ind = np.asarray(views_ind) except : raise TypeError("n_views should be list or nparray") - elif view_ind is None: + elif views_ind is None: if data.shape[1] > 1: - view_ind = np.array([0, data.shape[1]//2, data.shape[1]]) + views_ind = np.array([0, data.shape[1]//2, data.shape[1]]) else: - view_ind = np.array([0, data.shape[1]]) + views_ind = np.array([0, data.shape[1]]) new_data = data - # view_ind_self = view_ind - view_ind, n_views, view_mode = self._first_validate_views_ind(view_ind, + # views_ind_self = views_ind + views_ind, n_views, view_mode = self._first_validate_views_ind(views_ind, data.shape[1]) - if view_ind.ndim == 1 and view_mode.startswith("slicing"): - shapes_int = [in2 - in1 for in1, in2 in zip(view_ind, view_ind[1:])] + if views_ind.ndim == 1 and view_mode.startswith("slicing"): + shapes_int = [in2 - in1 for in1, in2 in zip(views_ind, views_ind[1:])] if data.shape[0] < 1 or data.shape[1] < 1: raise ValueError("input data shouldbe not empty") self.view_mode_ = view_mode - self.views_ind = view_ind + self.views_ind = views_ind self.shapes_int = shapes_int self.n_views = n_views @@ -201,7 +201,7 @@ class MultiModalSparseArray(sp.csr_matrix, sp.csc_matrix, MultiModalSparseInfo, Attributes ---------- - view_ind : list of views' indice (may be None) + views_ind : list of views' indice (may be None) n_views : int number of views @@ -274,7 +274,7 @@ class MultiModalArray(np.ndarray, MultiModalData): Attributes ---------- - view_ind : list of views' indice (may be None) + views_ind : list of views' indice (may be None) n_views : int number of views @@ -300,32 +300,31 @@ class MultiModalArray(np.ndarray, MultiModalData): """ - def __new__(cls, data, view_ind=None): + def __new__(cls, data, views_ind=None): """Constructor of MultiModalArray_array""" shapes_int = [] index = 0 new_data = np.ndarray([]) n_views = 1 thekeys = None - # view_ind_self = None view_mode = 'slices' if isinstance(data, dict) and not isinstance(data, sp.dok_matrix): n_views = len(data) - view_ind = [0] + views_ind = [0] for key, dat_values in data.items(): dat_values = np.asarray(dat_values) if dat_values.ndim < 2: dat_values = dat_values.reshape(1, dat_values.shape[0]) new_data = cls._populate_new_data(index, dat_values, new_data) shapes_int.append(dat_values.shape[1]) - view_ind.append(dat_values.shape[1] + view_ind[index]) + views_ind.append(dat_values.shape[1] + views_ind[index]) index += 1 thekeys = data.keys() if new_data.ndim < 2 : raise ValueError('Reshape your data') if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): raise ValueError('Reshape your data') - elif isinstance(data, np.ndarray) and view_ind is None and data.ndim == 1: + elif isinstance(data, np.ndarray) and views_ind is None and data.ndim == 1: try: dat0 = np.array(data[0]) except Exception: @@ -334,12 +333,12 @@ class MultiModalArray(np.ndarray, MultiModalData): if dat0.ndim < 2: data = data[np.newaxis, ...] if data.shape[1] > 1: - view_ind = np.array([0, data.shape[1]//2, data.shape[1]]) + views_ind = np.array([0, data.shape[1]//2, data.shape[1]]) else: - view_ind = np.array([0, data.shape[1]]) + views_ind = np.array([0, data.shape[1]]) new_data = data else: - new_data, shapes_int, view_ind = cls._for_data(cls, data) + new_data, shapes_int, views_ind = cls._for_data(cls, data) n_views = data.shape[0] elif (isinstance(data, np.ndarray) ) and data.ndim > 1: try: @@ -347,41 +346,41 @@ class MultiModalArray(np.ndarray, MultiModalData): except: raise TypeError("input format is not supported") - if view_ind is not None: + if views_ind is not None: try: - view_ind = np.asarray(view_ind) + views_ind = np.asarray(views_ind) except : raise TypeError("n_views should be list or nparray") - elif view_ind is None: + elif views_ind is None: if data.shape[1] > 1: - view_ind = np.array([0, data.shape[1]//2, data.shape[1]]) + views_ind = np.array([0, data.shape[1]//2, data.shape[1]]) else: - view_ind = np.array([0, data.shape[1]]) + views_ind = np.array([0, data.shape[1]]) new_data = data else: try: new_data = np.asarray(data) - if view_ind is None: - view_ind = np.array([0, new_data.shape[1]]) + if views_ind is None: + views_ind = np.array([0, new_data.shape[1]]) except Exception as e: raise ValueError('Reshape your data') if new_data.ndim < 2 : raise ValueError('Reshape your data') if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()): raise ValueError('Reshape your data') - if view_ind.ndim < 2 and new_data.ndim <2 and view_ind[-1] > new_data.shape[1]: + if views_ind.ndim < 2 and new_data.ndim <2 and views_ind[-1] > new_data.shape[1]: raise ValueError('Reshape your data') - # view_ind_self = view_ind + # views_ind_self = views_ind # if new_data.shape[1] < 1: # msg = ("%d feature\(s\) \\(shape=\%s\) while a minimum of \\d* " # "is required.") % (new_data.shape[1], str(new_data.shape)) # # "%d feature\(s\) \(shape=\(%d, %d\)\) while a minimum of \d* is required." % (new_data.shape[1], new_data.shape[0], new_data.shape[1]) # raise ValueError(msg) - view_ind, n_views, view_mode = cls._first_validate_views_ind(view_ind, + views_ind, n_views, view_mode = cls._first_validate_views_ind(views_ind, new_data.shape[1]) - if view_ind.ndim == 1 and view_mode.startswith("slices"): - shapes_int = [in2 - in1 for in1, in2 in zip(view_ind, view_ind[1:])] + if views_ind.ndim == 1 and view_mode.startswith("slices"): + shapes_int = [in2 - in1 for in1, in2 in zip(views_ind, views_ind[1:])] # obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls) # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask) @@ -393,7 +392,7 @@ class MultiModalArray(np.ndarray, MultiModalData): else: obj = np.recarray.__new__(cls, shape=(0, 0), dtype=np.float) obj.view_mode_ = view_mode - obj.views_ind = view_ind + obj.views_ind = views_ind obj.shapes_int = shapes_int obj.n_views = n_views # obj.keys = thekeys @@ -403,8 +402,8 @@ class MultiModalArray(np.ndarray, MultiModalData): def _for_data(cls, data): n_views = data.shape[0] index = 0 - view_ind = np.empty(n_views + 1, dtype=np.int) - view_ind[0] = 0 + views_ind = np.empty(n_views + 1, dtype=np.int) + views_ind[0] = 0 shapes_int = [] new_data = np.ndarray([]) for dat_values in data: @@ -413,10 +412,10 @@ class MultiModalArray(np.ndarray, MultiModalData): except Exception: raise TypeError("input format is not supported") new_data = cls._populate_new_data(index, dat_values, new_data) - view_ind[index + 1] = dat_values.shape[1] + view_ind[index] + views_ind[index + 1] = dat_values.shape[1] + views_ind[index] shapes_int.append(dat_values.shape[1]) index += 1 - return new_data, shapes_int, view_ind + return new_data, shapes_int, views_ind @staticmethod def _populate_new_data(index, dat_values, new_data): diff --git a/multimodal/kernels/lpMKL.py b/multimodal/kernels/lpMKL.py index cb051c03540f654cd5fc49e8edb70a602a86661a..ca61b8cbf283c62192c26d30ee26abed419dd192 100644 --- a/multimodal/kernels/lpMKL.py +++ b/multimodal/kernels/lpMKL.py @@ -193,7 +193,6 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel): # ft2[v] = weights_old[v] * np.dot(np.transpose(C), np.dot(data.kernel_dict[v], C)) # calculate the sum for downstairs # print(weights_old) - # print(ft2) # print(ft2 ** (p / (p + 1.0))) downstairs = np.sum(ft2 ** (p / (p + 1.0))) ** (1.0 / p) # and then the gammas diff --git a/multimodal/kernels/mvml.py b/multimodal/kernels/mvml.py index e1b22f27c7f3ebfd51892aad234d031198c09d78..edcf0935d2f4928e12887ed5a07b70064b3010e7 100644 --- a/multimodal/kernels/mvml.py +++ b/multimodal/kernels/mvml.py @@ -99,7 +99,41 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): regression_ : if the classifier is used as regression (default : False) - + Examples + -------- + >>> from multimodal.kernels.mvml import MVML + >>> from sklearn.datasets import load_iris + >>> X, y = load_iris(return_X_y=True) + >>> views_ind = [0, 2, 4] # view 0: sepal data, view 1: petal data + >>> clf = MVML() + clf.get_params() + {'eta': 1, 'kernel': 'linear', 'kernel_params': None, 'learn_A': 1, 'learn_w': 0, 'lmbda': 0.1, 'n_loops': 6, 'nystrom_param': 1.0, 'precision': 0.0001} + >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE + MumboClassifier(base_estimator=None, best_view_mode='edge', + n_estimators=50, random_state=0) + >>> print(clf.predict([[ 5., 3., 1., 1.]])) + [1] + >>> views_ind = [[0, 2], [1, 3]] # view 0: length data, view 1: width data + >>> clf = MumboClassifier(random_state=0) + >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE + MumboClassifier(base_estimator=None, best_view_mode='edge', + n_estimators=50, random_state=0) + >>> print(clf.predict([[ 5., 3., 1., 1.]])) + [1] + + >>> from sklearn.tree import DecisionTreeClassifier + >>> base_estimator = DecisionTreeClassifier(max_depth=2) + >>> clf = MumboClassifier(base_estimator=base_estimator, random_state=0) + >>> clf.fit(X, y, views_ind) # doctest: +NORMALIZE_WHITESPACE + MumboClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, + criterion='gini', max_depth=2, max_features=None, + max_leaf_nodes=None, min_impurity_decrease=0.0, + min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, + min_weight_fraction_leaf=0.0, presort=False, random_state=None, + splitter='best'), + best_view_mode='edge', n_estimators=50, random_state=0) + >>> print(clf.predict([[ 5., 3., 1., 1.]])) + [1] """ # r_cond = 10-30 def __init__(self, lmbda=0.1, eta=1, nystrom_param=1.0, kernel="linear", diff --git a/multimodal/tests/test_cumbo.py b/multimodal/tests/test_cumbo.py index 49ec85c2d87b110c04d283e94b8a3d180d0790dc..f028e26fe46e93da31d6fe7c5bef8849804f5534 100644 --- a/multimodal/tests/test_cumbo.py +++ b/multimodal/tests/test_cumbo.py @@ -866,9 +866,15 @@ class TestMuCumboClassifier(unittest.TestCase): 'base_estimator__max_depth': (1, 2)} clf = GridSearchCV(mumbo, parameters) clf.fit(self.iris.data, self.iris.target, views_ind=self.iris.views_ind) - # - # - # def test_pickle(): + self.assertEqual(clf.best_params_,{'base_estimator__max_depth': 2, 'n_estimators': 2}) + + multimodal_data = MultiModalArray(self.iris.data, views_ind=self.iris.views_ind) + clf = GridSearchCV(mumbo, parameters) + clf.fit(multimodal_data, self.iris.target) + + self.assertEqual(clf.best_params_, {'base_estimator__max_depth': 2, 'n_estimators': 2}) + + # def test_pick le(): # np.random.seed(seed) # # # Check pickability. diff --git a/multimodal/tests/test_mumbo.py b/multimodal/tests/test_mumbo.py index f22f5ba72ea6d747c9f80be486830198c3f2ddb4..7082462eea6ffe34de2960228a1659a93089e882 100644 --- a/multimodal/tests/test_mumbo.py +++ b/multimodal/tests/test_mumbo.py @@ -786,6 +786,7 @@ class TestMuCumboClassifier(unittest.TestCase): clf = GridSearchCV(mumbo, parameters) clf.fit(self.iris.data, self.iris.target, views_ind=self.iris.views_ind) + def test_pickle(self): seed = 7 np.random.seed(seed)