Revisions

122d71c1 · Baptiste Bauvin · c33aa00e · 122d71c1 · 122d71c1 · 122d71c1
Commit 122d71c1 authored Nov 18, 2021 by Baptiste Bauvin
--- a/multimodal/__init__.py
+++ b/multimodal/__init__.py
-__version__ = "0.0.dev0"
+__version__ = "0.0.0"
--- a/multimodal/boosting/boost.py
+++ b/multimodal/boosting/boost.py
@@ -110,17 +110,10 @@ class UBoosting(metaclass=ABCMeta):
        return X

    def _global_X_transform(self, X, views_ind=None):
-        X_ = None
        if isinstance(X, MultiModalData):
            X_ = X
        elif isinstance(X, sp.spmatrix):
            X_ = MultiModalSparseArray(X, views_ind)
        else:
            X_ = MultiModalArray(X, views_ind)
-        # if not isinstance(X_, MultiModalData):
-        #     try:
-        #         X_ = np.asarray(X)
-        #         X_ = MultiModalArray(X_)
-        #     except Exception as e:
-        #         raise TypeError('Reshape your data')
        return X_
--- a/multimodal/boosting/combo.py
+++ b/multimodal/boosting/combo.py
@@ -53,7 +53,7 @@ from sklearn.metrics import accuracy_score
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.tree._tree import DTYPE
 from sklearn.tree import BaseDecisionTree
-from sklearn.utils import check_array, check_X_y, check_random_state
+from sklearn.utils import check_X_y, check_random_state
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.utils.validation import check_is_fitted, has_fit_parameter
 from cvxopt import solvers, matrix, spdiag, exp, spmatrix, mul, div

--- a/multimodal/datasets/data_sample.py
+++ b/multimodal/datasets/data_sample.py
@@ -133,7 +133,6 @@ class MultiModalData(metaclass=ABCMeta):

    def _validate_views_ind(self, views_ind, n_features):
        """Ensure proper format for views_ind and return number of views."""
-        # views_ind = np.array(views_ind)
        if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1:
            if len(views_ind) > 2 and np.any(views_ind[:-1] >= views_ind[1:]):
                raise ValueError("Values in views_ind must be sorted.")
@@ -180,7 +179,6 @@ class MultiModalSparseInfo():
        new_data = np.ndarray([])
        n_views = data.size
        thekeys = None
-        # views_ind_self =  None
        view_mode = 'slices'

        if (sp.issparse(data)) and data.ndim > 1:
@@ -196,7 +194,6 @@ class MultiModalSparseInfo():
                    views_ind = np.array([0, data.shape[1]])

            new_data = data
-            # views_ind_self = views_ind
        views_ind, n_views, view_mode = self._first_validate_views_ind(views_ind,
                                                                      data.shape[1])
        if views_ind.ndim == 1 and view_mode.startswith("slicing"):
@@ -365,8 +362,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
                views_ind.append(dat_values.shape[1] + views_ind[index])
                index += 1
            thekeys = data.keys()
-            # if new_data.ndim < 2 :
-            #     raise ValueError('Reshape your data')
            if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()):
                raise ValueError('Reshape your data')
        elif isinstance(data, np.ndarray) and views_ind is None and data.ndim == 1:
@@ -421,21 +416,11 @@ class MultiModalArray(np.ndarray, MultiModalData):
                raise ValueError('Reshape your data')
            if  new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()):
                raise ValueError('Reshape your data')
-            # if views_ind.ndim < 2 and new_data.ndim < 2 and views_ind[-1] > new_data.shape[1]:
-            #     raise ValueError('Reshape your data')
-
-            # views_ind_self = views_ind
-        # if new_data.shape[1] < 1:
-        #     msg = ("%d feature\(s\) \\(shape=\%s\) while a minimum of \\d* "
-        #            "is required.") % (new_data.shape[1], str(new_data.shape))
-        #     # "%d feature\(s\) \(shape=\(%d, %d\)\) while a minimum of \d* is required." % (new_data.shape[1], new_data.shape[0], new_data.shape[1])
-        #     raise ValueError(msg)
+
        views_ind, n_views, view_mode = cls._first_validate_views_ind(views_ind,
                                                                      new_data.shape[1])
        if views_ind.ndim == 1 and view_mode.startswith("slices"):
            shapes_int = [in2 - in1 for in1, in2 in zip(views_ind, views_ind[1:])]
-        # obj =   ma.MaskedArray.__new(new_data)   # new_data.view()  a.MaskedArray(new_data, mask=new_data.mask).view(cls)
-        # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask)

        if hasattr(new_data, "mask"):  # pragma: no cover
            obj = ma.masked_array(new_data.data, new_data.mask).view(cls)
@@ -448,7 +433,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
        obj.views_ind = views_ind
        obj.shapes_int = shapes_int
        obj.n_views = n_views
-        # obj.keys = thekeys
        return obj

    @staticmethod
@@ -478,8 +462,7 @@ class MultiModalArray(np.ndarray, MultiModalData):
                  isinstance(dat_values, np.ndarray) or sp.issparse(dat_values):
                new_data = dat_values
            else:
-                new_data = dat_values.view(np.ndarray) #  ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #(
-                # new_data.mask = ma.nomask
+                new_data = dat_values.view(np.ndarray)
        else:
            if isinstance(dat_values, np.ndarray):
                new_data = np.hstack((new_data, dat_values))
@@ -488,15 +471,13 @@ class MultiModalArray(np.ndarray, MultiModalData):
            elif sp.issparse(dat_values):
                new_data = sp.hstack((new_data, dat_values))
            else:
-                new_data = np.hstack((new_data,  dat_values.view(np.ndarray) ) ) #  ma.masked_array(dat_values, mask=ma.nomask
+                new_data = np.hstack((new_data,  dat_values.view(np.ndarray) ) )
        return new_data

    def __array_finalize__(self, obj):
        if obj is None: return
-        # super(MultiModalArray, self).__array_finalize__(obj)
        self.shapes_int = getattr(obj, 'shapes_int', None)
        self.n_views = getattr(obj, 'n_views', None)
-        # self.keys = getattr(obj, 'keys', None)
        self.views_ind = getattr(obj, 'views_ind', None)
        self.view_mode_ = getattr(obj, 'view_mode_', None)

@@ -537,22 +518,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
        stop = int(np.sum(np.asarray(self.shapes_int[0: view+1])))
        return self[row, start:stop]

-    # def add_view(self, data):
-    #     if len(self.shape) > 0:
-    #         if data.shape[0] == self.data.shape[0]:
-    #             print(self.data.shape, data.shape)
-    #             new_data = np.hstack((self.data, data))
-    #             self.shapes_int.append(data.shape[1])
-    #             self.n_views +=1
-    #             print(new_data.shape)
-    #
-    #     else:
-    #         raise ValueError("New view can't initialazed")
-    #        # self.shapes_int= [data.shape[1]]
-    #        # self.data.reshape(data.shape[0],)
-    #        # np.insert(self.data, data, 0)
-    #        # self.n_views = 1
-
    def _todict(self):
        dico = {}
        for view in range(self.n_views):
@@ -560,8 +525,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
        return dico


-
-
 class DataSample(dict):
    """
    A DataSample instance
@@ -596,7 +559,6 @@ class DataSample(dict):

    def __init__(self, data=None, **kwargs):

-
        # The dictionary that contains the sample
        super(DataSample, self).__init__(kwargs)
        self._data = None # Metriclearn_arrayMultiModalArray(np.zeros((0,0)))

--- a/multimodal/kernels/lpMKL.py
+++ b/multimodal/kernels/lpMKL.py
@@ -176,7 +176,6 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel):
            self.regression_ = True
        else:
            raise ValueError("MKL algorithms is a binary classifier")
-                            # " or performs regression with float target")
        self.y_ = y
        n = self.K_.shape[0]
        self._calc_nystrom(self.K_, n)
@@ -233,7 +232,7 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel):
            # alpha fixed -> calculate gammas
            weights_old = weights.copy()

-            # first the ||f_t||^2 todo wtf is the formula used here????
+            # first the ||f_t||^2 todo what is the formula used here ?
            ft2 = np.zeros(views)
            for v in range(0, views):
                if self.nystrom_param < 1 and self.use_approx:
@@ -274,16 +273,9 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel):
                stuck = True

            max_diff = np.max([max_diff_gamma, diff_alpha])
-            # print([max_diff_gamma, diff_alpha])  # print if convergence is interesting
            C_old = C.copy()
            rounds = rounds + 1
-        # print("\nlearned the weights:")
-        # np.set_printoptions(precision=3, suppress=True)
-        # print(weights)
-        # print("")

-        # print if resulting convergence is of interest
-        # print("convergence of ", max_diff, " at step ", rounds, "/500")
        if stuck:
            return C_old, weights_old
        else:

--- a/multimodal/kernels/mkernel.py
+++ b/multimodal/kernels/mkernel.py
@@ -82,7 +82,6 @@ class MKernel(metaclass=ABCMeta):
        elif isinstance(self.kernel, list):
            ind = min(v, len(self.kernel) - 1)
            met = self.kernel[ind]
-        # Y,
        return pairwise_kernels(X, Y, metric=met,
                                filter_params=True, **params)

@@ -113,7 +112,6 @@ class MKernel(metaclass=ABCMeta):
        """
        kernel_dict = {}
        flag_sparse = False
-        X_ = None
        y = None
        if Y is None:
            y = Y
@@ -124,22 +122,10 @@ class MKernel(metaclass=ABCMeta):
            X = X_
        if isinstance(X, MultiModalArray):
            X_ = X
-        # if not isinstance(X_, MultiModalArray):
-        #     try:
-        #         X_ = np.asarray(X)
-        #         X_ = MultiModalArray(X_, views_ind)
-        #     except Exception as e:
-        #         pass
-                # raise TypeError('Reshape your data')
        if isinstance(X_, MultiModalArray):
            for v in range(X.n_views):
                if Y is not None:   y = Y.get_view(v) # y = self._global_check_pairwise(X, Y, v)
                kernel_dict[v] = self._get_kernel(X_.get_view(v), y)
-
-        # if not isinstance(X_, MultiModalArray):
-        #     if sp.sparse.issparse(X):
-        #         raise TypeError("Nonsensical Error: no sparse data are allowed as input")
-        #     raise TypeError('Reshape your data')
        K_ = MultiModalArray(kernel_dict)
        return X_, K_


--- a/multimodal/kernels/mvml.py
+++ b/multimodal/kernels/mvml.py
@@ -166,7 +166,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
        self.nystrom_param = nystrom_param
        self.lmbda = lmbda
        self.eta = eta
-        # self.regression_params = regression_params
        self.learn_A = learn_A
        self.learn_w = learn_w
        self.n_loops = n_loops
@@ -226,10 +225,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
        self.regression_ = False
        self.X_, self.K_= self._global_kernel_transform(X, views_ind=views_ind)
        check_X_y(self.X_, y)
-        # if type_of_target(y) not in "binary":
-        #     raise ValueError("target should be binary")
-
-

        if type_of_target(y) in "binary":
            check_classification_targets(y)
@@ -244,7 +239,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
                             " or performs regression with float target")
        self.y_ = y

-        # n = X[0].shape[0]
        n = self.K_.shape[0]
        self.n_approx = int(np.floor(self.nystrom_param * n))  # number of samples in approximation, equals n if no approx.
        if self.nystrom_param < 1:
@@ -257,7 +251,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
        if self.warning_message:
            import logging
            logging.warning("warning appears during fit process" + str(self.warning_message))
-            # print("warning appears during fit process", self.warning_message)
        return self

    def _learn_mvml(self, learn_A=1, learn_w=0, n_loops=6):
@@ -335,11 +328,13 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):

            # first invert A
            try:
+                # Changed because of numerical instability
                # A_inv = np.linalg.pinv(A + 1e-09 * np.eye(views * self.n_approx))
                cond_A = np.linalg.cond(A + 1e-08 * np.eye(views * self.n_approx))
                if cond_A < 10:
                    A_inv = spli.pinv(A + 1e-8 * np.eye(views * self.n_approx))
                else:
+                    # Changed because of numerical instability
                    # A_inv = self._inverse_precond_LU(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A") # self._inverse_precond_jacobi(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A")
                    A_inv = self._inv_best_precond(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A")
            except spli.LinAlgError:  # pragma: no cover
@@ -355,7 +350,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
            except ValueError:  # pragma: no cover
                self.warning_message["ValueError"] = self.warning_message.get("ValueError", 0) + 1
                return A_prev, g_prev, w_prev
-            # print("A_inv ",np.sum(A_inv))
            # then calculate g (block-sparse multiplications in loop) using A_inv
            for v in range(views):
                for vv in range(views):
@@ -365,11 +359,13 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
                                      vv * self.n_approx:(vv + 1) * self.n_approx]
                g[v * self.n_approx:(v + 1) * self.n_approx, 0] = np.dot(w[v] * np.transpose(self.U_dict[v]), self.y_)
            try:
+                # Changed because of numerical instability
                # minA_inv = np.min(np.absolute(A_inv)) , rcond=self.r_cond*minA_inv
                # here A_inv isn't actually inverse of A (changed in above loop)
                if np.linalg.cond(A_inv) < 10:
                   g = np.dot(spli.pinv(A_inv), g)
                else:
+                    # Changed because of numerical instability
                    # g = np.dot(self._inverse_precond_LU(A_inv, pos="precond_A_1"), g)
                    g = np.dot(self._inv_best_precond(A_inv, pos="precond_A_1"), g)
            except spli.LinAlgError:  # pragma: no cover
@@ -413,9 +409,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
        Pm, L, U = spli.lu(A)
        M = spli.inv(np.dot(L, U))
        Pre_lu = np.dot(M, A)
-        # print("cond a", np.linalg.cond(A))
-        # print("cond Pre_J", np.linalg.cond(Pre_J))
-        # print("cond Pre_lu", np.linalg.cond(Pre_lu))
        if np.linalg.cond(A) > np.linalg.cond(Pre_J) and np.linalg.cond(Pre_J) <= np.linalg.cond(Pre_lu):
            P_inv = spli.pinv(Pre_J)
            A_inv = np.dot(P_inv,  J_1)
@@ -430,7 +423,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):

    def _inverse_precond_jacobi(self, A, pos="precond_A"):  # pragma: no cover
        J_1 = np.diag(1.0/np.diag(A))
-        # J_1 = np.linalg.inv(J)
        P = np.dot(J_1, A)
        if np.linalg.cond(A) > np.linalg.cond(P):
            P_inv = spli.pinv(P)
@@ -532,7 +524,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):

        """
        views = len(self.U_dict)
-        # t = test_kernels[0].shape[0]
        t = test_kernels.shape[0]
        K = np.zeros((t, views * self.n_approx))
        for v in range(views):
@@ -580,7 +571,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
        rounds = 0

        L = lmbda * np.linalg.norm(np.dot(g, g.T))
-        # print("L ", L)

        while not converged and rounds < 100:
            # no line search - this has worked well enough experimentally