Skip to content
Snippets Groups Projects
Commit 122d71c1 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Revisions

parent c33aa00e
No related branches found
No related tags found
No related merge requests found
Pipeline #8599 failed
__version__ = "0.0.dev0"
__version__ = "0.0.0"
......@@ -110,17 +110,10 @@ class UBoosting(metaclass=ABCMeta):
return X
def _global_X_transform(self, X, views_ind=None):
X_ = None
if isinstance(X, MultiModalData):
X_ = X
elif isinstance(X, sp.spmatrix):
X_ = MultiModalSparseArray(X, views_ind)
else:
X_ = MultiModalArray(X, views_ind)
# if not isinstance(X_, MultiModalData):
# try:
# X_ = np.asarray(X)
# X_ = MultiModalArray(X_)
# except Exception as e:
# raise TypeError('Reshape your data')
return X_
......@@ -53,7 +53,7 @@ from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree._tree import DTYPE
from sklearn.tree import BaseDecisionTree
from sklearn.utils import check_array, check_X_y, check_random_state
from sklearn.utils import check_X_y, check_random_state
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_is_fitted, has_fit_parameter
from cvxopt import solvers, matrix, spdiag, exp, spmatrix, mul, div
......
......@@ -133,7 +133,6 @@ class MultiModalData(metaclass=ABCMeta):
def _validate_views_ind(self, views_ind, n_features):
"""Ensure proper format for views_ind and return number of views."""
# views_ind = np.array(views_ind)
if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1:
if len(views_ind) > 2 and np.any(views_ind[:-1] >= views_ind[1:]):
raise ValueError("Values in views_ind must be sorted.")
......@@ -180,7 +179,6 @@ class MultiModalSparseInfo():
new_data = np.ndarray([])
n_views = data.size
thekeys = None
# views_ind_self = None
view_mode = 'slices'
if (sp.issparse(data)) and data.ndim > 1:
......@@ -196,7 +194,6 @@ class MultiModalSparseInfo():
views_ind = np.array([0, data.shape[1]])
new_data = data
# views_ind_self = views_ind
views_ind, n_views, view_mode = self._first_validate_views_ind(views_ind,
data.shape[1])
if views_ind.ndim == 1 and view_mode.startswith("slicing"):
......@@ -365,8 +362,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
views_ind.append(dat_values.shape[1] + views_ind[index])
index += 1
thekeys = data.keys()
# if new_data.ndim < 2 :
# raise ValueError('Reshape your data')
if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()):
raise ValueError('Reshape your data')
elif isinstance(data, np.ndarray) and views_ind is None and data.ndim == 1:
......@@ -421,21 +416,11 @@ class MultiModalArray(np.ndarray, MultiModalData):
raise ValueError('Reshape your data')
if new_data.ndim > 1 and (new_data.shape == (1, 1) or new_data.shape == ()):
raise ValueError('Reshape your data')
# if views_ind.ndim < 2 and new_data.ndim < 2 and views_ind[-1] > new_data.shape[1]:
# raise ValueError('Reshape your data')
# views_ind_self = views_ind
# if new_data.shape[1] < 1:
# msg = ("%d feature\(s\) \\(shape=\%s\) while a minimum of \\d* "
# "is required.") % (new_data.shape[1], str(new_data.shape))
# # "%d feature\(s\) \(shape=\(%d, %d\)\) while a minimum of \d* is required." % (new_data.shape[1], new_data.shape[0], new_data.shape[1])
# raise ValueError(msg)
views_ind, n_views, view_mode = cls._first_validate_views_ind(views_ind,
new_data.shape[1])
if views_ind.ndim == 1 and view_mode.startswith("slices"):
shapes_int = [in2 - in1 for in1, in2 in zip(views_ind, views_ind[1:])]
# obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls)
# bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask)
if hasattr(new_data, "mask"): # pragma: no cover
obj = ma.masked_array(new_data.data, new_data.mask).view(cls)
......@@ -448,7 +433,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
obj.views_ind = views_ind
obj.shapes_int = shapes_int
obj.n_views = n_views
# obj.keys = thekeys
return obj
@staticmethod
......@@ -478,8 +462,7 @@ class MultiModalArray(np.ndarray, MultiModalData):
isinstance(dat_values, np.ndarray) or sp.issparse(dat_values):
new_data = dat_values
else:
new_data = dat_values.view(np.ndarray) # ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #(
# new_data.mask = ma.nomask
new_data = dat_values.view(np.ndarray)
else:
if isinstance(dat_values, np.ndarray):
new_data = np.hstack((new_data, dat_values))
......@@ -488,15 +471,13 @@ class MultiModalArray(np.ndarray, MultiModalData):
elif sp.issparse(dat_values):
new_data = sp.hstack((new_data, dat_values))
else:
new_data = np.hstack((new_data, dat_values.view(np.ndarray) ) ) # ma.masked_array(dat_values, mask=ma.nomask
new_data = np.hstack((new_data, dat_values.view(np.ndarray) ) )
return new_data
def __array_finalize__(self, obj):
if obj is None: return
# super(MultiModalArray, self).__array_finalize__(obj)
self.shapes_int = getattr(obj, 'shapes_int', None)
self.n_views = getattr(obj, 'n_views', None)
# self.keys = getattr(obj, 'keys', None)
self.views_ind = getattr(obj, 'views_ind', None)
self.view_mode_ = getattr(obj, 'view_mode_', None)
......@@ -537,22 +518,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
stop = int(np.sum(np.asarray(self.shapes_int[0: view+1])))
return self[row, start:stop]
# def add_view(self, data):
# if len(self.shape) > 0:
# if data.shape[0] == self.data.shape[0]:
# print(self.data.shape, data.shape)
# new_data = np.hstack((self.data, data))
# self.shapes_int.append(data.shape[1])
# self.n_views +=1
# print(new_data.shape)
#
# else:
# raise ValueError("New view can't initialazed")
# # self.shapes_int= [data.shape[1]]
# # self.data.reshape(data.shape[0],)
# # np.insert(self.data, data, 0)
# # self.n_views = 1
def _todict(self):
dico = {}
for view in range(self.n_views):
......@@ -560,8 +525,6 @@ class MultiModalArray(np.ndarray, MultiModalData):
return dico
class DataSample(dict):
"""
A DataSample instance
......@@ -596,7 +559,6 @@ class DataSample(dict):
def __init__(self, data=None, **kwargs):
# The dictionary that contains the sample
super(DataSample, self).__init__(kwargs)
self._data = None # Metriclearn_arrayMultiModalArray(np.zeros((0,0)))
......
......@@ -176,7 +176,6 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel):
self.regression_ = True
else:
raise ValueError("MKL algorithms is a binary classifier")
# " or performs regression with float target")
self.y_ = y
n = self.K_.shape[0]
self._calc_nystrom(self.K_, n)
......@@ -233,7 +232,7 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel):
# alpha fixed -> calculate gammas
weights_old = weights.copy()
# first the ||f_t||^2 todo wtf is the formula used here????
# first the ||f_t||^2 todo what is the formula used here ?
ft2 = np.zeros(views)
for v in range(0, views):
if self.nystrom_param < 1 and self.use_approx:
......@@ -274,16 +273,9 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel):
stuck = True
max_diff = np.max([max_diff_gamma, diff_alpha])
# print([max_diff_gamma, diff_alpha]) # print if convergence is interesting
C_old = C.copy()
rounds = rounds + 1
# print("\nlearned the weights:")
# np.set_printoptions(precision=3, suppress=True)
# print(weights)
# print("")
# print if resulting convergence is of interest
# print("convergence of ", max_diff, " at step ", rounds, "/500")
if stuck:
return C_old, weights_old
else:
......
......@@ -82,7 +82,6 @@ class MKernel(metaclass=ABCMeta):
elif isinstance(self.kernel, list):
ind = min(v, len(self.kernel) - 1)
met = self.kernel[ind]
# Y,
return pairwise_kernels(X, Y, metric=met,
filter_params=True, **params)
......@@ -113,7 +112,6 @@ class MKernel(metaclass=ABCMeta):
"""
kernel_dict = {}
flag_sparse = False
X_ = None
y = None
if Y is None:
y = Y
......@@ -124,22 +122,10 @@ class MKernel(metaclass=ABCMeta):
X = X_
if isinstance(X, MultiModalArray):
X_ = X
# if not isinstance(X_, MultiModalArray):
# try:
# X_ = np.asarray(X)
# X_ = MultiModalArray(X_, views_ind)
# except Exception as e:
# pass
# raise TypeError('Reshape your data')
if isinstance(X_, MultiModalArray):
for v in range(X.n_views):
if Y is not None: y = Y.get_view(v) # y = self._global_check_pairwise(X, Y, v)
kernel_dict[v] = self._get_kernel(X_.get_view(v), y)
# if not isinstance(X_, MultiModalArray):
# if sp.sparse.issparse(X):
# raise TypeError("Nonsensical Error: no sparse data are allowed as input")
# raise TypeError('Reshape your data')
K_ = MultiModalArray(kernel_dict)
return X_, K_
......
......@@ -166,7 +166,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
self.nystrom_param = nystrom_param
self.lmbda = lmbda
self.eta = eta
# self.regression_params = regression_params
self.learn_A = learn_A
self.learn_w = learn_w
self.n_loops = n_loops
......@@ -226,10 +225,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
self.regression_ = False
self.X_, self.K_= self._global_kernel_transform(X, views_ind=views_ind)
check_X_y(self.X_, y)
# if type_of_target(y) not in "binary":
# raise ValueError("target should be binary")
if type_of_target(y) in "binary":
check_classification_targets(y)
......@@ -244,7 +239,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
" or performs regression with float target")
self.y_ = y
# n = X[0].shape[0]
n = self.K_.shape[0]
self.n_approx = int(np.floor(self.nystrom_param * n)) # number of samples in approximation, equals n if no approx.
if self.nystrom_param < 1:
......@@ -257,7 +251,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
if self.warning_message:
import logging
logging.warning("warning appears during fit process" + str(self.warning_message))
# print("warning appears during fit process", self.warning_message)
return self
def _learn_mvml(self, learn_A=1, learn_w=0, n_loops=6):
......@@ -335,11 +328,13 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
# first invert A
try:
# Changed because of numerical instability
# A_inv = np.linalg.pinv(A + 1e-09 * np.eye(views * self.n_approx))
cond_A = np.linalg.cond(A + 1e-08 * np.eye(views * self.n_approx))
if cond_A < 10:
A_inv = spli.pinv(A + 1e-8 * np.eye(views * self.n_approx))
else:
# Changed because of numerical instability
# A_inv = self._inverse_precond_LU(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A") # self._inverse_precond_jacobi(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A")
A_inv = self._inv_best_precond(A + 1e-8 * np.eye(views * self.n_approx), pos="precond_A")
except spli.LinAlgError: # pragma: no cover
......@@ -355,7 +350,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
except ValueError: # pragma: no cover
self.warning_message["ValueError"] = self.warning_message.get("ValueError", 0) + 1
return A_prev, g_prev, w_prev
# print("A_inv ",np.sum(A_inv))
# then calculate g (block-sparse multiplications in loop) using A_inv
for v in range(views):
for vv in range(views):
......@@ -365,11 +359,13 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
vv * self.n_approx:(vv + 1) * self.n_approx]
g[v * self.n_approx:(v + 1) * self.n_approx, 0] = np.dot(w[v] * np.transpose(self.U_dict[v]), self.y_)
try:
# Changed because of numerical instability
# minA_inv = np.min(np.absolute(A_inv)) , rcond=self.r_cond*minA_inv
# here A_inv isn't actually inverse of A (changed in above loop)
if np.linalg.cond(A_inv) < 10:
g = np.dot(spli.pinv(A_inv), g)
else:
# Changed because of numerical instability
# g = np.dot(self._inverse_precond_LU(A_inv, pos="precond_A_1"), g)
g = np.dot(self._inv_best_precond(A_inv, pos="precond_A_1"), g)
except spli.LinAlgError: # pragma: no cover
......@@ -413,9 +409,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
Pm, L, U = spli.lu(A)
M = spli.inv(np.dot(L, U))
Pre_lu = np.dot(M, A)
# print("cond a", np.linalg.cond(A))
# print("cond Pre_J", np.linalg.cond(Pre_J))
# print("cond Pre_lu", np.linalg.cond(Pre_lu))
if np.linalg.cond(A) > np.linalg.cond(Pre_J) and np.linalg.cond(Pre_J) <= np.linalg.cond(Pre_lu):
P_inv = spli.pinv(Pre_J)
A_inv = np.dot(P_inv, J_1)
......@@ -430,7 +423,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
def _inverse_precond_jacobi(self, A, pos="precond_A"): # pragma: no cover
J_1 = np.diag(1.0/np.diag(A))
# J_1 = np.linalg.inv(J)
P = np.dot(J_1, A)
if np.linalg.cond(A) > np.linalg.cond(P):
P_inv = spli.pinv(P)
......@@ -532,7 +524,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
"""
views = len(self.U_dict)
# t = test_kernels[0].shape[0]
t = test_kernels.shape[0]
K = np.zeros((t, views * self.n_approx))
for v in range(views):
......@@ -580,7 +571,6 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin, RegressorMixin):
rounds = 0
L = lmbda * np.linalg.norm(np.dot(g, g.T))
# print("L ", L)
while not converged and rounds < 100:
# no line search - this has worked well enough experimentally
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment