Select Git revision
get_train_annot.py
Dataset.py 2.56 KiB
from scipy import sparse
import numpy as np
def getV(DATASET, viewIndex, usedIndices=None):
if usedIndices is None:
usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
if type(usedIndices) is int:
return DATASET.get("View"+str(viewIndex))[usedIndices, :]
else:
usedIndices = np.array(usedIndices)
sortedIndices = np.argsort(usedIndices)
usedIndices = usedIndices[sortedIndices]
if not DATASET.get("View"+str(viewIndex)).attrs["sparse"]:
return DATASET.get("View"+str(viewIndex))[usedIndices, :][np.argsort(sortedIndices),:]
else:
sparse_mat = sparse.csr_matrix((DATASET.get("View"+str(viewIndex)).get("data").value,
DATASET.get("View"+str(viewIndex)).get("indices").value,
DATASET.get("View"+str(viewIndex)).get("indptr").value),
shape=DATASET.get("View"+str(viewIndex)).attrs["shape"])[usedIndices,:][np.argsort(sortedIndices),:]
return sparse_mat
def getShape(DATASET, viewIndex):
if not DATASET.get("View"+str(viewIndex)).attrs["sparse"]:
return DATASET.get("View"+str(viewIndex)).shape
else:
return DATASET.get("View"+str(viewIndex)).attrs["shape"]
def getValue(DATASET):
if not DATASET.attrs["sparse"]:
return DATASET.value
else:
sparse_mat = sparse.csr_matrix((DATASET.get("data").value,
DATASET.get("indices").value,
DATASET.get("indptr").value),
shape=DATASET.attrs["shape"])
return sparse_mat
def extractSubset(matrix, usedIndices):
if sparse.issparse(matrix):
newIndptr = np.zeros(len(usedIndices)+1, dtype=int)
oldindptr = matrix.indptr
for exampleIndexIndex, exampleIndex in enumerate(usedIndices):
newIndptr[exampleIndexIndex+1] = newIndptr[exampleIndexIndex]+(oldindptr[exampleIndex+1]-oldindptr[exampleIndex])
newData = np.ones(newIndptr[-1], dtype=bool)
newIndices = np.zeros(newIndptr[-1], dtype=int)
oldIndices = matrix.indices
for exampleIndexIndex, exampleIndex in enumerate(usedIndices):
newIndices[newIndptr[exampleIndexIndex]:newIndptr[exampleIndexIndex+1]] = oldIndices[oldindptr[exampleIndex]: oldindptr[exampleIndex+1]]
return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1]))
else:
return matrix[usedIndices]