Skip to content
Snippets Groups Projects
Select Git revision
  • f867f02f5682ceb36b7acea390c61770332c2cf9
  • master default
  • object
  • develop protected
  • private_algos
  • cuisine
  • SMOTE
  • revert-76c4cca5
  • archive protected
  • no_graphviz
  • 0.0.2
  • 0.0.1
12 results

Dataset.py

Blame
  • user avatar
    bbauvin authored
    f867f02f
    History
    Dataset.py 2.26 KiB
    from scipy import sparse
    import numpy as np
    
    
    def getV(DATASET, viewIndex, usedIndices=None):
        if usedIndices==None:
            usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
        if not DATASET.get("View"+str(viewIndex)).attrs["sparse"]:
            return DATASET.get("View"+str(viewIndex))[usedIndices, :]
        else:
            sparse_mat = sparse.csr_matrix((DATASET.get("View"+str(viewIndex)).get("data").value,
                                           DATASET.get("View"+str(viewIndex)).get("indices").value,
                                           DATASET.get("View"+str(viewIndex)).get("indptr").value),
                                          shape=DATASET.get("View"+str(viewIndex)).attrs["shape"])[usedIndices,:]
            return sparse_mat
    
    
    def getShape(DATASET, viewIndex):
        if not DATASET.get("View"+str(viewIndex)).attrs["sparse"]:
            return DATASET.get("View"+str(viewIndex)).shape
        else:
            return DATASET.get("View"+str(viewIndex)).attrs["shape"]
    
    
    def getValue(DATASET):
        if not DATASET.attrs["sparse"]:
            return DATASET.value
        else:
            sparse_mat = sparse.csr_matrix((DATASET.get("data").value,
                                      DATASET.get("indices").value,
                                      DATASET.get("indptr").value),
                                     shape=DATASET.attrs["shape"])
            return sparse_mat
    
    def extractSubset(matrix, usedIndices):
        if sparse.issparse(matrix):
            newIndptr = np.zeros(len(usedIndices)+1, dtype=np.int16)
            oldindptr = matrix.indptr
            print oldindptr
            for exampleIndexIndex, exampleIndex in enumerate(usedIndices):
                newIndptr[exampleIndexIndex+1] = newIndptr[exampleIndexIndex]+(oldindptr[exampleIndex+1]-oldindptr[exampleIndex])
            print newIndptr
            newData = np.ones(newIndptr[-1], dtype=bool)
            newIndices =  np.zeros(newIndptr[-1], dtype=np.int32)
            oldIndices = matrix.indices
            for exampleIndexIndex, exampleIndex in enumerate(usedIndices):
                newIndices[newIndptr[exampleIndexIndex]:newIndptr[exampleIndexIndex+1]] = oldIndices[oldindptr[exampleIndex]: oldindptr[exampleIndex+1]]
            return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1]))
        else:
            return matrix[usedIndices]