From cdf9409ebfd6c930c520dcdbd68bdcfcc7b1dced Mon Sep 17 00:00:00 2001
From: Dominique Benielli <dominique.benielli@lis-lab.fr>
Date: Mon, 6 Jan 2020 18:04:31 +0100
Subject: [PATCH] common dataset

---
 multimodal/boosting/boost.py                  |  81 ++++++++++
 multimodal/boosting/cumbo.py                  | 147 ++++--------------
 multimodal/boosting/mumbo.py                  |  96 +++---------
 .../__pycache__/data_sample.cpython-36.pyc    | Bin 8827 -> 10701 bytes
 multimodal/datasets/data_sample.py            |  85 +++++++---
 multimodal/kernels/lpMKL.py                   |   1 -
 multimodal/kernels/mkernel.py                 |  14 +-
 7 files changed, 208 insertions(+), 216 deletions(-)

diff --git a/multimodal/boosting/boost.py b/multimodal/boosting/boost.py
index e69de29..7de84ee 100644
--- a/multimodal/boosting/boost.py
+++ b/multimodal/boosting/boost.py
@@ -0,0 +1,81 @@
+import numpy as np
+from abc import ABCMeta
+from sklearn.utils import check_array, check_X_y, check_random_state
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.tree.tree import BaseDecisionTree
+from sklearn.tree._tree import DTYPE
+from sklearn.ensemble.forest import BaseForest
+from multimodal.datasets.data_sample import DataSample, MultiModalArray
+
+class UBoosting(metaclass=ABCMeta):
+    """
+    Abstract class MuCumboClassifier and  MumboClassifier should inherit from
+    UBoosting for methods
+    """
+
+    def _validate_X_predict(self, X):
+        """Ensure that X is in the proper format."""
+        if (self.base_estimator is None or
+                isinstance(self.base_estimator,
+                           (BaseDecisionTree, BaseForest))):
+            check_array(X, accept_sparse='csr', dtype=DTYPE)
+
+        else:
+            check_array(X, accept_sparse=['csr', 'csc'])
+        if X.shape[1] != self.n_features_:
+            raise ValueError("X doesn't contain the right number of features.")
+        return X
+
+
+    def _validate_views_ind(self, views_ind, n_features):
+        """Ensure proper format for views_ind and return number of views."""
+        views_ind = np.array(views_ind)
+        if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1:
+            if np.any(views_ind[:-1] >= views_ind[1:]):
+                raise ValueError("Values in views_ind must be sorted.")
+            if views_ind[0] < 0 or views_ind[-1] > n_features:
+                raise ValueError("Values in views_ind are not in a correct "
+                                 + "range for the provided data.")
+            self.view_mode_ = "slices"
+            n_views = views_ind.shape[0]-1
+        else:
+            if views_ind.ndim == 1:
+                if not views_ind.dtype == np.object:
+                    raise ValueError("The format of views_ind is not "
+                                     + "supported.")
+                for ind, val in enumerate(views_ind):
+                    views_ind[ind] = np.array(val)
+                    if not np.issubdtype(views_ind[ind].dtype, np.integer):
+                        raise ValueError("Values in views_ind must be "
+                                         + "integers.")
+                    if views_ind[ind].min() < 0 \
+                            or views_ind[ind].max() >= n_features:
+                        raise ValueError("Values in views_ind are not in a "
+                                         + "correct range for the provided "
+                                         + "data.")
+            elif views_ind.ndim == 2:
+                if not np.issubdtype(views_ind.dtype, np.integer):
+                    raise ValueError("Values in views_ind must be integers.")
+                if views_ind.min() < 0 or views_ind.max() >= n_features:
+                    raise ValueError("Values in views_ind are not in a "
+                                     + "correct range for the provided data.")
+            else:
+                raise ValueError("The format of views_ind is not supported.")
+            self.view_mode_ = "indices"
+            n_views = views_ind.shape[0]
+        return (views_ind, n_views)
+
+    def _global_X_transform(self, X, views_ind=None):
+        X_ = None
+        if isinstance(X, np.ndarray) and X.ndim == 1:
+            X_= MultiModalArray(X, views_ind)
+        elif isinstance(X, dict):
+            X_= MultiModalArray(X)
+        elif isinstance(X, np.ndarray) and X.ndim > 1:
+            X_ = MultiModalArray(X, views_ind)
+        if not isinstance(X_, MultiModalArray):
+            raise TypeError("Input format is not reconized")
+        if hasattr(self, "X_"):
+            if not self.X_.viexs_ind == views_ind:
+                raise ValueError("Input format (viewd, features) for fit and predict must be the same")
+        return X_
\ No newline at end of file
diff --git a/multimodal/boosting/cumbo.py b/multimodal/boosting/cumbo.py
index 169925a..3b22496 100644
--- a/multimodal/boosting/cumbo.py
+++ b/multimodal/boosting/cumbo.py
@@ -43,11 +43,11 @@ from sklearn.utils import check_array, check_X_y, check_random_state
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.utils.validation import check_is_fitted, has_fit_parameter
 from cvxopt import solvers, matrix, spdiag, exp, spmatrix, mul, div
-from multimodal.datasets.data_sample import Metriclearn_array
+from .boost import UBoosting
 import warnings
 
 
-class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
+class MuCumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting):
     r"""It then iterates the process on the same dataset but where the weights of
     incorrectly classified instances are adjusted such that subsequent
     classifiers focus more on difficult cases.
@@ -114,7 +114,7 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
 
     Examples
     --------
-    >>> from multiconfusion.cumbo import MuCumboClassifier
+    >>> from multimodal.boosting.cumbo import MuCumboClassifier
     >>> from sklearn.datasets import load_iris
     >>> X, y = load_iris(return_X_y=True)
     >>> views_ind = [0, 2, 4]  # view 0: sepal data, view 1: petal data
@@ -178,7 +178,6 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
         self.random_state = random_state
         # self.best_view_mode = self._validate_best_view_mode(best_view_mode)
 
-
     def _validate_estimator(self):
         """Check the estimator and set the base_estimator_ attribute."""
         super(MuCumboClassifier, self)._validate_estimator(
@@ -188,83 +187,6 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
             raise ValueError("%s doesn't support sample_weight."
                              % self.base_estimator_.__class__.__name__)
 
-    def _validate_X_predict(self, X):
-        """Ensure that X is in the proper format."""
-        if (self.base_estimator is None or
-                isinstance(self.base_estimator,
-                           (BaseDecisionTree, BaseForest))):
-            X = check_array(X, accept_sparse='csr', dtype=DTYPE)
-        else:
-            X = check_array(X, accept_sparse=['csr', 'csc'])
-        if X.shape[1] != self.n_features_:
-            raise ValueError("X doesn't contain the right number of features.")
-        return X
-
-    def _extract_view(self, X, ind_view):
-        """Extract the view for the given index ind_view from the dataset X."""
-        if self.view_mode_ == "indices":
-            return X[:, self.views_ind_[ind_view]]
-        else:
-            return X[:, self.views_ind_[ind_view]:self.views_ind_[ind_view+1]]
-
-    def _compute_predictions(self, X):
-        """Compute predictions for all the stored estimators on the data X."""
-        n_samples = X.shape[0]
-        n_estimators = len(self.estimators_)
-        predictions = np.zeros((n_samples, n_estimators), dtype=np.int64)
-        for ind_estimator, estimator in enumerate(self.estimators_):
-            # no best view in mucumbo but all view
-            # ind_view = self.best_views_[ind_estimator]
-            ind_view = ind_estimator % self.n_views_
-            predictions[:, ind_estimator] \
-                = estimator.predict(self._extract_view(X, ind_view))
-        return predictions
-
-    def _validate_views_ind(self, views_ind, n_features):
-        """Ensure proper format for views_ind and return number of views."""
-        views_ind = np.array(views_ind)
-        if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1:
-            if np.any(views_ind[:-1] >= views_ind[1:]):
-                raise ValueError("Values in views_ind must be sorted.")
-            if views_ind[0] < 0 or views_ind[-1] > n_features:
-                raise ValueError("Values in views_ind are not in a correct "
-                                 + "range for the provided data.")
-            self.view_mode_ = "slices"
-            n_views = views_ind.shape[0]-1
-        else:
-            if views_ind.ndim == 1:
-                if not views_ind.dtype == np.object:
-                    raise ValueError("The format of views_ind is not "
-                                     + "supported.")
-                for ind, val in enumerate(views_ind):
-                    views_ind[ind] = np.array(val)
-                    if not np.issubdtype(views_ind[ind].dtype, np.integer):
-                        raise ValueError("Values in views_ind must be "
-                                         + "integers.")
-                    if views_ind[ind].min() < 0 \
-                            or views_ind[ind].max() >= n_features:
-                        raise ValueError("Values in views_ind are not in a "
-                                         + "correct range for the provided "
-                                         + "data.")
-            elif views_ind.ndim == 2:
-                if not np.issubdtype(views_ind.dtype, np.integer):
-                    raise ValueError("Values in views_ind must be integers.")
-                if views_ind.min() < 0 or views_ind.max() >= n_features:
-                    raise ValueError("Values in views_ind are not in a "
-                                     + "correct range for the provided data.")
-            else:
-                raise ValueError("The format of views_ind is not supported.")
-            self.view_mode_ = "indices"
-            n_views = views_ind.shape[0]
-        return (views_ind, n_views)
-
-    # def _validate_best_view_mode(self, best_view_mode):
-    #     """Ensure that best_view_mode has a proper value."""
-    #     if best_view_mode not in ("edge", "error"):
-    #         raise ValueError('best_view_mode value must be either "edge" '
-    #                          + 'or "error"')
-    #     return best_view_mode
-
     def _init_var(self, n_views, y):
         "Create and initialize the variables used by the MuMBo algorithm."
         n_classes = self.n_classes_
@@ -279,9 +201,6 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
             n_yi_s[indice_class] = int(n_yi)
             cost[:, :, indice_class] /=   n_yi
         cost[:, np.arange(n_samples), y] *= -(n_classes-1)
-        # not necessary in mucombo
-        # cost_global = np.ones((n_samples, n_classes))
-        # cost_global[np.arange(n_samples), y] = -(n_classes-1)
         label_score = np.zeros((n_views, n_samples, n_classes))
         label_score_global = np.zeros((n_samples, n_classes))
         predicted_classes = np.empty((n_views, n_samples), dtype=np.int64)
@@ -289,15 +208,6 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
         return (cost, label_score, label_score_global, predicted_classes,
                 score_function, beta_class, n_yi_s)
 
-    # def _compute_edge_global(self, cost_global, predicted_classes, y):
-    #     """Compute edge values for the global cost matrix."""
-    #     n_samples = y.shape[0]
-    #     edge_global = - np.sum(
-    #         cost_global[np.arange(n_samples), predicted_classes], axis=1) \
-    #         / (np.sum(cost_global)
-    #            - np.sum(cost_global[np.arange(n_samples), y]))
-    #     return edge_global
-
     def _compute_dist(self, cost, y):
         """Compute the sample distribution (i.e. the weights to use)."""
         n_samples = y.shape[0]
@@ -312,13 +222,6 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
             / np.sum(cost[:, np.arange(n_samples), y], axis=1)[:, np.newaxis]
         return dist
 
-    # def _compute_coop_coef(self, predicted_classes, y):
-    #     """Compute the cooperation coefficients."""
-    #     coop_coef = np.zeros(predicted_classes.shape)
-    #     coop_coef[predicted_classes == y] = 1.
-    #     coop_coef[:, np.logical_not(coop_coef.any(axis=0))] = 1.
-    #     return coop_coef
-
     def _indicatrice(self, predicted_classes, y_i):
         n_samples = y_i.shape[0]
         indicate_ones = np.zeros((self.n_views_, n_samples, self.n_classes_), dtype=np.int)
@@ -464,6 +367,19 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
             print("Value Error on the evaluation on beta coefficient %s "% e)
         return solver
 
+    def _compute_predictions(self, X):
+        """Compute predictions for all the stored estimators on the data X."""
+        n_samples = X.shape[0]
+        n_estimators = len(self.estimators_)
+        predictions = np.zeros((n_samples, n_estimators), dtype=np.int64)
+        for ind_estimator, estimator in enumerate(self.estimators_):
+            # no best view in mucumbo but all view
+            # ind_view = self.best_views_[ind_estimator]
+            ind_view = ind_estimator % self.n_views_
+            predictions[:, ind_estimator] \
+                = estimator.predict(X._extract_view(ind_view))
+        return predictions
+
     def fit(self, X, y, views_ind=None):
         """Build a multimodal boosted classifier from the training set (X, y).
 
@@ -516,17 +432,19 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
         else:
             dtype = None
             accept_sparse = ['csr', 'csc']
-        X, y = check_X_y(X, y, accept_sparse=accept_sparse, dtype=dtype)
-        check_classification_targets(y)
-        self._validate_estimator()
         if views_ind is None:
             if X.shape[1] > 1:
                 views_ind = np.array([0, X.shape[1]//2, X.shape[1]])
             else:
                 views_ind = np.array([0, X.shape[1]])
-        self.X_ = Metriclearn_array(X, view_ind=views_ind)
-        self.views_ind_, n_views = self._validate_views_ind(views_ind,
-                                                            X.shape[1])
+
+        self.X_ = self._global_X_transform(X, views_ind=views_ind)
+        views_ind_, n_views = self.X_._validate_views_ind(views_ind,
+                                                          X.shape[1])
+        check_X_y(self.X_, y, accept_sparse=accept_sparse, dtype=dtype)
+        check_classification_targets(y)
+        self._validate_estimator()
+
         self.n_iterations_ = self.n_estimators // n_views
         self.classes_, y = np.unique(y, return_inverse=True)
         self.n_classes_ = len(self.classes_)
@@ -560,13 +478,13 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
             for ind_view in range(n_views):
                 estimator = self._make_estimator(append=False,
                                                  random_state=random_state)
-                estimator.fit(self._extract_view(X, ind_view), y,
+                estimator.fit(self.X_._extract_view(ind_view), y,
                               sample_weight=dist[ind_view, :])
                 predicted_classes[ind_view, :] = estimator.predict(
-                    self._extract_view(X, ind_view))
+                    self.X_._extract_view(ind_view))
                 self.estimators_.append(estimator)
 
-            # fin de choose cost matrix
+            # end of choose cost matrix
             #   TO DO estimator_errors_ estimate
             ###########################################
 
@@ -609,7 +527,7 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
             ``classes_``.
         """
         check_is_fitted(self, ("estimators_", "estimator_weights_alpha_","n_views_",
-                               "estimator_weights_beta_", "n_classes_", "views_ind_"))
+                               "estimator_weights_beta_", "n_classes_"))
         X = self._validate_X_predict(X)
 
         n_samples = X.shape[0]
@@ -662,7 +580,7 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
             ``classes_``.
         """
         check_is_fitted(self, ("estimators_", "estimator_weights_alpha_","n_views_",
-                               "estimator_weights_beta_", "n_classes_", "views_ind_"))
+                               "estimator_weights_beta_", "n_classes_"))
         X = self._validate_X_predict(X)
 
         n_samples = X.shape[0]
@@ -687,7 +605,7 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
             else:
                 yield np.array(dec_func)
 
-    def predict(self, X):
+    def predict(self, X, views_ind=None):
         """Predict classes for X.
 
         The predicted class of an input sample is computed as the weighted mean
@@ -710,6 +628,7 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
         ValueError   'X' input matrix must be have the same total number of features
                      of 'X' fit data
         """
+        X = self._global_X_transform(X, views_ind=views_ind)
         pred = self.decision_function(X)
 
         if self.n_classes_ == 2:
@@ -739,9 +658,10 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
         y : generator of numpy.ndarrays, shape = (n_samples,)
             Predicted classes.
         """
+
         n_classes = self.n_classes_
         classes = self.classes_
-
+        X = self._validate_X_predict(X)
         if n_classes == 2:
             for pred in self.staged_decision_function(X):
                 yield np.array(classes.take(pred > 0, axis=0))
@@ -766,6 +686,7 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin):
         score : float
             Mean accuracy of self.predict(X) wrt. y.
         """
+        X = self._validate_X_predict(X)
         return super(MuCumboClassifier, self).score(X, y)
 
     def staged_score(self, X, y):
diff --git a/multimodal/boosting/mumbo.py b/multimodal/boosting/mumbo.py
index fd7bd42..c2fb588 100644
--- a/multimodal/boosting/mumbo.py
+++ b/multimodal/boosting/mumbo.py
@@ -42,9 +42,10 @@ from sklearn.tree._tree import DTYPE
 from sklearn.utils import check_array, check_X_y, check_random_state
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.utils.validation import check_is_fitted, has_fit_parameter
+from .boost import UBoosting
 
 
-class MumboClassifier(BaseEnsemble, ClassifierMixin):
+class MumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting):
     r"""It then iterates the process on the same dataset but where the weights of
     incorrectly classified instances are adjusted such that subsequent
     classifiers focus more on difficult cases.
@@ -106,7 +107,7 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin):
 
     Examples
     --------
-    >>> from multimodalboost.mumbo import MumboClassifier
+    >>> from multimodal.boosting.mumbo import MumboClassifier
     >>> from sklearn.datasets import load_iris
     >>> X, y = load_iris(return_X_y=True)
     >>> views_ind = [0, 2, 4]  # view 0: sepal data, view 1: petal data
@@ -175,75 +176,6 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin):
             raise ValueError("%s doesn't support sample_weight."
                              % self.base_estimator_.__class__.__name__)
 
-    def _validate_X_predict(self, X):
-        """Ensure that X is in the proper format."""
-        if (self.base_estimator is None or
-                isinstance(self.base_estimator,
-                           (BaseDecisionTree, BaseForest))):
-            X = check_array(X, accept_sparse='csr', dtype=DTYPE)
-
-        else:
-            X = check_array(X, accept_sparse=['csr', 'csc'])
-        if X.shape[1] != self.n_features_:
-            raise ValueError("X doesn't contain the right number of features.")
-        return X
-
-    def _extract_view(self, X, ind_view):
-        """Extract the view for the given index ind_view from the dataset X."""
-        if self.view_mode_ == "indices":
-            return X[:, self.views_ind_[ind_view]]
-        else:
-            return X[:, self.views_ind_[ind_view]:self.views_ind_[ind_view+1]]
-
-    def _compute_predictions(self, X):
-        """Compute predictions for all the stored estimators on the data X."""
-        n_samples = X.shape[0]
-        n_estimators = len(self.estimators_)
-        predictions = np.zeros((n_samples, n_estimators), dtype=np.int64)
-        for ind_estimator, estimator in enumerate(self.estimators_):
-            ind_view = self.best_views_[ind_estimator]
-            predictions[:, ind_estimator] \
-                = estimator.predict(self._extract_view(X, ind_view))
-        return predictions
-
-    def _validate_views_ind(self, views_ind, n_features):
-        """Ensure proper format for views_ind and return number of views."""
-        views_ind = np.array(views_ind)
-        if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1:
-            if np.any(views_ind[:-1] >= views_ind[1:]):
-                raise ValueError("Values in views_ind must be sorted.")
-            if views_ind[0] < 0 or views_ind[-1] > n_features:
-                raise ValueError("Values in views_ind are not in a correct "
-                                 + "range for the provided data.")
-            self.view_mode_ = "slices"
-            n_views = views_ind.shape[0]-1
-        else:
-            if views_ind.ndim == 1:
-                if not views_ind.dtype == np.object:
-                    raise ValueError("The format of views_ind is not "
-                                     + "supported.")
-                for ind, val in enumerate(views_ind):
-                    views_ind[ind] = np.array(val)
-                    if not np.issubdtype(views_ind[ind].dtype, np.integer):
-                        raise ValueError("Values in views_ind must be "
-                                         + "integers.")
-                    if views_ind[ind].min() < 0 \
-                            or views_ind[ind].max() >= n_features:
-                        raise ValueError("Values in views_ind are not in a "
-                                         + "correct range for the provided "
-                                         + "data.")
-            elif views_ind.ndim == 2:
-                if not np.issubdtype(views_ind.dtype, np.integer):
-                    raise ValueError("Values in views_ind must be integers.")
-                if views_ind.min() < 0 or views_ind.max() >= n_features:
-                    raise ValueError("Values in views_ind are not in a "
-                                     + "correct range for the provided data.")
-            else:
-                raise ValueError("The format of views_ind is not supported.")
-            self.view_mode_ = "indices"
-            n_views = views_ind.shape[0]
-        return (views_ind, n_views)
-
     def _validate_best_view_mode(self, best_view_mode):
         """Ensure that best_view_mode has a proper value."""
         if best_view_mode not in ("edge", "error"):
@@ -353,6 +285,17 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin):
         cost[:, np.arange(n_samples), y] -= np.sum(cost, axis=2)
         return (cost, label_score)
 
+    def _compute_predictions(self, X):
+        """Compute predictions for all the stored estimators on the data X."""
+        n_samples = X.shape[0]
+        n_estimators = len(self.estimators_)
+        predictions = np.zeros((n_samples, n_estimators), dtype=np.int64)
+        for ind_estimator, estimator in enumerate(self.estimators_):
+            ind_view = self.best_views_[ind_estimator]
+            predictions[:, ind_estimator] \
+                = estimator.predict(X._extract_view(ind_view))
+        return predictions
+
     def fit(self, X, y, views_ind=None):
         """Build a multimodal boosted classifier from the training set (X, y).
 
@@ -400,9 +343,6 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin):
         else:
             dtype = None
             accept_sparse = ['csr', 'csc']
-        X, y = check_X_y(X, y, accept_sparse=accept_sparse, dtype=dtype)
-        check_classification_targets(y)
-        self._validate_estimator()
         if views_ind is None:
             if X.shape[1] > 1:
                 views_ind = np.array([0, X.shape[1]//2, X.shape[1]])
@@ -410,6 +350,10 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin):
                 views_ind = np.array([0, X.shape[1]])
         self.views_ind_, n_views = self._validate_views_ind(views_ind,
                                                             X.shape[1])
+        self.X_ = self._global_X_transform(X, views_ind=self.views_ind_)
+        check_X_y(self.X_, y, accept_sparse=accept_sparse, dtype=dtype)
+        check_classification_targets(y)
+        self._validate_estimator()
 
         self.classes_, y = np.unique(y, return_inverse=True)
         self.n_classes_ = len(self.classes_)
@@ -441,11 +385,11 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin):
             for ind_view in range(n_views):
                 estimator = self._make_estimator(append=False,
                                                  random_state=random_state)
-                estimator.fit(self._extract_view(X, ind_view), y,
+                estimator.fit(self.X_._extract_view(ind_view), y,
                               sample_weight=dist[ind_view, :])
                 estimators.append(estimator)
                 predicted_classes[ind_view, :] = estimator.predict(
-                    self._extract_view(X, ind_view))
+                    self.X_._extract_view(ind_view))
 
             edges = self._compute_edge_global(
                     cost_global, predicted_classes, y)
diff --git a/multimodal/datasets/__pycache__/data_sample.cpython-36.pyc b/multimodal/datasets/__pycache__/data_sample.cpython-36.pyc
index 0dd01d941e19a920b48b248b2107c9ec6586b3b1..0a201db1c4aaece161b8408837f5bd47c3c60df1 100644
GIT binary patch
delta 3304
zcmezEayFRDn3tDprMGaLh~Y*q9cG~>1_p*yhA74qhA5^K#wg|#>B%0<0X`|BDPk=Q
zQGBVaDdH&-Eey?!QT!>ADN-#AQ3CA@EDTYC!3>%*xA=Wab4oIO^HUOY9E*w)D-(6u
z85p<}6ciMYBsSk*=4IssDa-{aoE$8xhs5iXbw}bga&CzMsVvJ(EiYEEMK%UxzD7!7
zNunm$0y{f9g<QB&z2c0-f>f}ChM|#xj)IYi0XB7$+qqQ1)`2aUypc-<!P|U`i-%e4
z7IS8vA&4+4Vr5`p029BWC;#D9QY=o+%+4&)1=|YtN=|BGQJy}6KiP%PfJYn2>yw-K
z%ouqm@8k<%<ekjNugxwQpO#rvTr$~;--L|^WIXre5`J$15c3uX$W8H?c_~F|lh5;u
z@~Ja0Fx+A<$w<vktt`GZ`4hiB8z)GM@?>>^OdfG$Yvu{8j}ZdN$%FK8`z97=r>1~o
zq=+9REC3=DL2_()`MHV3*+tx7PDXJ_Vsf@7bCDKEfj)@P1`%+d7lCaQoxD;|o<|AU
z#%qGT!fGI;AXgTNgIKBz3=ESSg>(|Ih-)(4Vku6|Nh>m7U|`T>yu}$GpPZ9eTpS+{
z4q%8irVI=WOBom#iuo8A7#J8i7+4qu82P}k3dM4M;T%Pfziu%Xm*y5pf-DEy2?|C{
zmLhYIfa2si!cyuW=ig#aPc4a0&d*s3GMfc#wiII(N&wy$u2BP-SY!#(i^D{al=b9B
z5s`YZu^b>{L2<SbWG)-nTscM|#wrP<fC8mjJ($8|CXgZq1_lrf(%=lz>BGRlP{WYL
zkiwYFRHRhHkj0q7l)}`?RKt+PRKqCAkj5m*5DZcQ7q5p%Koq1fi!(?vv@$`Ir7#6E
zXtGo}x>l4FB_@|Blw_nTfZV8%mR|(sq-U0;<|$<6rKDDX$asikQGPC17L>n=Q%e*g
z^lq_d=A~pNrxyDaF@xfa`4$&QX?$*eN@`Mk5h$pOKryb#1P<?89GQ73V8b903Qj@b
zVB(EWg}4PQyA9-Hkk=WQxELjvs$|i_3aWatjhM{jCnBfoZ9$=qEoyi{5+G;X;tEU5
zDNS`PD#|a?WCgpX2$a?#ZnOp|vjY)eCxHpDyEuwdOTbRr3vv?3AO=QI^l30kFfqX+
zkrh;AIZieeYfQ)H2#_{pJ8v-;C6*WAv=f{XixSI^fvg0F0;3e899mkG7eALE2y!so
zhehDj4YL0hTV`HyYEem%7)Sv)@40|t2a@P-F_wWn4N?V)JM^gFNKApm(do&)65>3_
z>2z|DL|h=)JbRFN%-~?W#gdYloDT_1aKM2HaOknem*l5pCYRg<`Gf<MtQkSMRS4BD
zo0TQ!F){{BJ|tC<0&;$lG^p6&$}BD}O-d=LEJ!T^Wpz+eD@jdHEh>@#Ni!$rRThDK
zRRjtEO=fU1yTz3kpO%_fQd*Q+Tm*I}*ezfJ<e(xM1_p-vljljx*K1-VDR7w^UzV7Y
znUYwN3Xb$*P(hmvO4%T{gQ5at6gWS&F)}ceFk~^-Fg7#RGM0dIVHR@<Ll#RjQ&Ce5
zV-_nor#6)^WU<wN@}x*BlO#h8V+tcE*Gf)qkrb$BOkt5^NMXrlC{C(jOc9l2sA0-x
zDatBgPZ4WoY+_7duVIMisA0_F1gm3|V5nuDz>xO=X7~igBEA|%u%%qtOvQ^z*i*zo
z`YQI7aD$xX!Vnu1!&J*s%L;ZVYcpdFV-1T9$k|LFR=p$xSTjpAQ}MkL_7n+_W{`v=
zLl#dn*kKbGiy5FIyv-ozu@<c<VNa0)sVrO*&X5-Zb_{<u(*(w%DAZ7xz*uZj!k!`k
zQd`4_WMV)KV;0CXd!}sG35-QaCG08EAVql`CG06eAXbVnvd?RoYZz-7;`wTrYZ&7B
zYZz;o(?I2y%;c+*J2`H#73Y8opviqwx=d9vlVzo3c&j)w^T0`^Sg%TPvb&TbQ<ePW
z7gF+kRji<*ORor2yjQ7$)N&(Ks!V<%<<C>(0Ln7~U}EwisZu5kbq<q{OF2wFD&;Zx
zn3P++rU0b!1f|)cK#(?2Vl4^+u|Ns6C>X>7Csaoe3zS-KvE?Uar6!l$;!Mpe%}p&z
zEJ?k^oST_<i#a#30-TV+iC2>ioWCFfAt245Bn@hHq}*aIOUwaf+<Hjc16(!U62_8x
zKZ3Fz$a+vw3Mxi87$q2)7}*%v7=@U47<s^KAx1t%5hzQ5kqIm=#K^%Y!^p(Q!NkGH
z#>fTMQKgKYYbIZi7H1Ti{6bo<UKx}vd5TLCOEQylQ%f@PQ;I}Ejs{iOMK&N7sL&_^
z<(?u}5Z4_<fa<j(e-I0lH;ZCGhVVk`{p8e~99Z2C%6OWLMHV2H;H(U)9c4kDlVV_C
z;BaCQXXG;D;pgGtVBuipVCUfEVCCTBVB-;*JWnQzaqebL*~5(BRvfrZHo1{glt&NA
zGn4<wE#Lw*6d}#j$t&c;yi7oLC4(Hq5g(tKmst`Ye~UF9l!A&tmTN+aKWJH+U7lE!
zUJP>~$hF`&DN12rV6dF*q2O+IOVBs9q$o2P+#ZZi1l9SPj76!Sf|L;w(O_L*f)ivl
zsM>Izyj>x+-l#Y+w;(4~p*SPIG$%zNDODj6z0s?XU!;&%np;q*05(}qld}li)&sc|
zl0w1FJ5bZF$PnZw_Vm=e)S}E}unUSnfe5Xn!5#voEqKzZXJ7yedNMLF6oX1Z1||tc
z4kk@TKTXb}SddfWKtw!<NB|KaPv7DwD9SHLEh?!5>%GNRoLW+nT2zz-QULZOh=2tc
z*u$W(mIT=Wswz0-SeW>@c-V@P8T=R+G`WgECf{NO`w{G+TO44vUT$I$+*d_lPlEj{
W46+AgB9d1?HWqPCj#iRqtOWpoz7tRY

delta 2119
zcmX>b{M&`gn3tF9;Esc_n(7<5beM&T7#J8*8KM|d7^0X`7^9d|#3y?&2RNj%riiA9
zwJ<a@M)9>XurNgN2Qz3&+!FLnEh)-O&Ph!y%8O4dDoU(O)MRI1;8IXfP(YE|{Dhg8
zb@BlfzRi<W92xa<!P*pTQ5AvoYosKWBx-^Uu(PvMfGC`t$tBMU)-rh_mnxLCpGyV8
z+Wdx#hgmF&IWx}?L>LvZGB7ZJiC+nm`S_I7a!YecGRrbk%OR$LU7MMg4s`{LJ2{Zg
zfKM02)suVq%ow>RAK?pO<en_augxetnNeAIavZ-R8yCn9&dII(-U1+I6bH!s_{_YN
zB9+Mx_(l0t85kI%*h?}}vr{XJqb4&8=(Dkdv?xxt5Xj_{M73tE!1@>gkgO~N1H&zD
z-^AkV)D%Zha1`-^g!w>(JV=f$FF!Z2IJ<}w%*iM&NlebxWG+$%DbNBD8Xy8507YOs
zg(vS2l;=}HwezuHubc`<G03Gwq9F75Kz=9&c_O|zH79Lyvye^_s*)mo1_lOArduo^
z`66u)hw&C?e0*|FVsUYNJUGxHb{aA;Ff3(YU?>K;oq>^qfrU|kkq->3&|JbVoTCWx
z<}K#p(%d3(kek4^g2GjkrN|g0AU}DIuoNh1G2CKLPc4a0&d*s3GM$Bifq{Whim?hU
zMDGgM)Pu|{G6m_zVJ1k*9OQA9;*$IVh~vRQ2MV=Y93Xo@A+ix<5gXVdIYuGIDk+qJ
z1jUjbOldMRNErhI11kf=WLB}O^_CzzusMVqBmwf_Ev~S{oYGX+qN4mFO;)IHVUk54
z=M`CjxL}8Y35e^8Q%mAOuG<E39S_J_Mv&_?7$umP;BI6GC+_&f$#LS1xWfa<)?3U)
ziRHL$1&4i6V)=fMrH~+zVw6J<eRz-*C6>cOgk^G!#EyD?kXPV-F9OFiD0ptMW#$#9
z7L^o<fE0j}qzx!_L8AW_V_6X>tfJU5^HMUCQ!%0{F(m~Y7)L<vf}}V;MlnV%MhQkC
zMgb;xl(9hys>#zO<ATAKSc5EK28VhSOG;*Pei0~}p_|VhUy`4anOt%SWIP8rtU<|H
z2;JkG-K6F*vbixZFjTotc90gCyg?x*U6T`%i6ucPjVrUbxHKuHq_QBj2o!0z*faA=
zQqxn5io`(D%!zrGMIetCfx<|Wxkwo#%9R(NmYP^nT9jH`1a>tz8Gs2;S}2lYU|_g0
z`JjwEpDIeyocu>ghEZU$q_UokBFG$`;*!LY%;enEl8pS6B4JQSgR*In1&9R-i6UDN
z%N|5Hg9uj;AvL*ISzgx!#01;M#K6D+DyND;F6MAzVdOI7;pgGtVBuip;N)QC;NxJM
zd{;S(ap`6cmBWm@;9?t`3^Vi6CpU76^68;E(o}5$4?<CV;^g~kVQdB<>jEcRtIKGB
z^FJu|Z?R>UCl;j_YcdvrU4saKAO;2ouF3W4?ixiP*J&~q1%u+15ge~*kqJ&nB9s59
zr`D?$C*~I9q$(6=<d^28C?usSBw|)bdYYU?;IbKHG9)HJ39JYd1VuWaU}aBF%}XuH
zOa@z8BmuIS31TzI)!3|;Vq{<_W~&D!Bqj+)4kk@TKTXaeZ;+pTK!h)d@B<MbpWWgp
zD9SHLEh?!5yZshhacW6PYEe-DNC7wyKm;sQz;P7Fz`*bdmRLFDSeW>@c-V?SqMBSq
zAcJqQ=9N0;7F2>gbc-XeG`FBqFE<h4t0J%`!G0D5*#pvx<Q0&OMeLK;YRWVE0RSWo
B;?@8F

diff --git a/multimodal/datasets/data_sample.py b/multimodal/datasets/data_sample.py
index e6bcf81..fbcf5aa 100644
--- a/multimodal/datasets/data_sample.py
+++ b/multimodal/datasets/data_sample.py
@@ -26,9 +26,9 @@ import numpy as np
 import numpy.ma as ma
 
 
-class Metriclearn_array(ma.MaskedArray, np.ndarray):
+class MultiModalArray(ma.MaskedArray, np.ndarray):
     """
-    Metriclearn_array inherit from numpy ndarray
+    MultiModalArray inherit from numpy ndarray
 
 
     Parameters
@@ -74,21 +74,21 @@ class Metriclearn_array(ma.MaskedArray, np.ndarray):
 
     :Example:
 
-    >>> from metriclearning.datasets.base import load_dict
-    >>> from metriclearning.tests.datasets.get_dataset_path import get_dataset_path
-    >>> from metriclearning.datasets.data_sample import DataSample
+    >>> from multimodal.datasets.base import load_dict
+    >>> from multimodal.tests.datasets.get_dataset_path import get_dataset_path
+    >>> from multimodal.datasets.data_sample import DataSample
     >>> file = 'input_x_dic.pkl'
     >>> data = load_dict(get_dataset_path(file))
     >>> print(data.__class__)
     <class 'dict'>
-    >>> metric = Metriclearn_array(data)
-    >>> metric.shape
+    >>> multiviews = MultiModalArray(data)
+    >>> multiviews.shape
     (120, 240)
-    >>> metric.keys
+    >>> multiviews.keys
     dict_keys([0, 1])
-    >>> metric.shapes_int
+    >>> multiviews.shapes_int
     [120, 120]
-    >>> metric.n_views
+    >>> multiviews.n_views
     2
 
 
@@ -126,7 +126,7 @@ class Metriclearn_array(ma.MaskedArray, np.ndarray):
                     view_ind = np.array([0, data.shape[1]//2, data.shape[1]])
                 else:
                     view_ind = np.array([0, data.shape[1]])
-                view_ind, n_views = cls._validate_views_ind(view_ind,
+                view_ind, n_views = cls._first_validate_views_ind(view_ind,
                                                             data.shape[1])
             shapes_int = [  in2-in1  for in1, in2 in  zip(view_ind, view_ind[1: ])]
             new_data = data
@@ -164,11 +164,11 @@ class Metriclearn_array(ma.MaskedArray, np.ndarray):
 
     def __array_finalize__(self, obj):
         if obj is None: return
-        super(Metriclearn_array, self).__array_finalize__(obj)
+        super(MultiModalArray, self).__array_finalize__(obj)
         self.shapes_int = getattr(obj, 'shapes_int', None)
         self.n_views = getattr(obj, 'n_views', None)
         self.keys = getattr(obj, 'keys', None)
-        self.views_ind_self = getattr(obj, 'views_ind_self', None)
+        self.views_ind = getattr(obj, 'views_ind', None)
 
     def get_col(self, view, col):
         start = np.sum(np.asarray(self.shapes_int[0: view]))
@@ -179,6 +179,13 @@ class Metriclearn_array(ma.MaskedArray, np.ndarray):
         stop = int(start + self.shapes_int[view])
         return self.data[:, start:stop]
 
+    def _extract_view(self, ind_view):
+        """Extract the view for the given index ind_view from the dataset X."""
+        if self.view_mode_ == "indices":
+            return self.data[:, self.views_ind[ind_view]]
+        else:
+            return self.data[:, self.views_ind[ind_view]:self.views_ind[ind_view+1]]
+
     def set_view(self, view, data):
         start = int(np.sum(np.asarray(self.shapes_int[0: view])))
         stop = int(start + self.shapes_int[view])
@@ -214,7 +221,7 @@ class Metriclearn_array(ma.MaskedArray, np.ndarray):
         return dico
 
     @staticmethod
-    def _validate_views_ind(views_ind, n_features):
+    def _first_validate_views_ind(views_ind, n_features):
         """Ensure proper format for views_ind and return number of views."""
         views_ind = np.array(views_ind)
         if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1:
@@ -231,6 +238,46 @@ class Metriclearn_array(ma.MaskedArray, np.ndarray):
         return (views_ind, n_views)
 
 
+    def _validate_views_ind(self, views_ind, n_features):
+        """Ensure proper format for views_ind and return number of views."""
+        views_ind = np.array(views_ind)
+        if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1:
+            if np.any(views_ind[:-1] >= views_ind[1:]):
+                raise ValueError("Values in views_ind must be sorted.")
+            if views_ind[0] < 0 or views_ind[-1] > n_features:
+                raise ValueError("Values in views_ind are not in a correct "
+                                 + "range for the provided data.")
+            self.view_mode_ = "slices"
+            n_views = views_ind.shape[0]-1
+        else:
+            if views_ind.ndim == 1:
+                if not views_ind.dtype == np.object:
+                    raise ValueError("The format of views_ind is not "
+                                     + "supported.")
+                for ind, val in enumerate(views_ind):
+                    views_ind[ind] = np.array(val)
+                    if not np.issubdtype(views_ind[ind].dtype, np.integer):
+                        raise ValueError("Values in views_ind must be "
+                                         + "integers.")
+                    if views_ind[ind].min() < 0 \
+                            or views_ind[ind].max() >= n_features:
+                        raise ValueError("Values in views_ind are not in a "
+                                         + "correct range for the provided "
+                                         + "data.")
+            elif views_ind.ndim == 2:
+                if not np.issubdtype(views_ind.dtype, np.integer):
+                    raise ValueError("Values in views_ind must be integers.")
+                if views_ind.min() < 0 or views_ind.max() >= n_features:
+                    raise ValueError("Values in views_ind are not in a "
+                                     + "correct range for the provided data.")
+            else:
+                raise ValueError("The format of views_ind is not supported.")
+            self.view_mode_ = "indices"
+            n_views = views_ind.shape[0]
+        self.views_ind = views_ind
+        self.n_views = n_views
+        return (views_ind, n_views)
+
 class DataSample(dict):
     """
     A DataSample instance
@@ -247,7 +294,7 @@ class DataSample(dict):
     <class 'dict'>
     >>> s = DataSample(data)
     >>> type(s.data)
-    <class 'metriclearning.datasets.data_sample.Metriclearn_array'>
+    <class 'multimodal.datasets.data_sample.MultiModalArray'>
 
 
     - Input:
@@ -260,7 +307,7 @@ class DataSample(dict):
     Attributes
     ----------
 
-    data   : { array like}  Metriclearn_array
+    data   : { array like}  MultiModalArray
     """
 
     def __init__(self, data=None, **kwargs):
@@ -270,7 +317,7 @@ class DataSample(dict):
         super(DataSample, self).__init__(kwargs)
         self._data = None # Metriclearn_array(np.zeros((0,0)))
         if data is not None:
-            self._data = Metriclearn_array(data)
+            self._data = MultiModalArray(data)
 
 
     @property
@@ -281,10 +328,10 @@ class DataSample(dict):
 
     @data.setter
     def data(self, data):
-        if isinstance(data, (Metriclearn_array, np.ndarray, ma.MaskedArray, np.generic)):
+        if isinstance(data, (MultiModalArray, np.ndarray, ma.MaskedArray, np.generic)):
             self._data = data
         else:
-            raise TypeError("sample should be a Metriclearn_array.")
+            raise TypeError("sample should be a MultiModalArray or numpy array.")
 
 
 
diff --git a/multimodal/kernels/lpMKL.py b/multimodal/kernels/lpMKL.py
index 20ccb24..bdcfe13 100644
--- a/multimodal/kernels/lpMKL.py
+++ b/multimodal/kernels/lpMKL.py
@@ -5,7 +5,6 @@ from sklearn.utils.multiclass import unique_labels
 from sklearn.utils.validation import check_X_y
 from sklearn.utils.validation  import check_array
 from sklearn.utils.validation  import check_is_fitted
-from metriclearning.datasets.data_sample import DataSample, Metriclearn_array
 from metriclearning.mkernel import MKernel
 
 
diff --git a/multimodal/kernels/mkernel.py b/multimodal/kernels/mkernel.py
index 9c4644c..ac1ef5c 100644
--- a/multimodal/kernels/mkernel.py
+++ b/multimodal/kernels/mkernel.py
@@ -2,7 +2,7 @@ import numpy as np
 import scipy as sp
 from sklearn.metrics.pairwise import pairwise_kernels
 from abc import ABCMeta
-from metriclearning.datasets.data_sample import DataSample, Metriclearn_array
+from multimodal.datasets.data_sample import DataSample, MultiModalArray
 
 
 class MKernel(metaclass=ABCMeta):
@@ -36,26 +36,26 @@ class MKernel(metaclass=ABCMeta):
         if Y is None:
             y = Y
         if isinstance(X, np.ndarray) and X.ndim == 1:
-            X_= Metriclearn_array(X, views_ind)
+            X_= MultiModalArray(X, views_ind)
             for v in range(X.shape[0]):
                 if Y is not None:  y = Y.get_view(v) #  y = self._global_check_pairwise(X_, Y, v)
                 kernel_dict[v] = self._get_kernel(X[v], y)
         elif isinstance(X, dict):
-            X_= Metriclearn_array(X)
+            X_= MultiModalArray(X)
             for v in X.keys():
                 if Y is not None:  y = Y.get_view(v) # y = self._global_check_pairwise(X_, Y, v)
                 kernel_dict[v] = self._get_kernel(X[v], y)
         elif isinstance(X, np.ndarray) and X.ndim > 1:
-            X_ = Metriclearn_array(X, views_ind)
+            X_ = MultiModalArray(X, views_ind)
             X = X_
-        if isinstance(X, Metriclearn_array):
+        if isinstance(X, MultiModalArray):
             for v in range(X.n_views):
                 if Y is not None:   y = Y.get_view(v) # y = self._global_check_pairwise(X, Y, v)
                 kernel_dict[v] = self._get_kernel(X.get_view(v), y)
             X_= X
-        if not isinstance(X_, Metriclearn_array):
+        if not isinstance(X_, MultiModalArray):
             raise TypeError("Input format is not reconized")
-        K_ = Metriclearn_array(kernel_dict)
+        K_ = MultiModalArray(kernel_dict)
         return X_, K_
 
     def _calc_nystrom(self, kernels, n_approx):
-- 
GitLab