Skip to content
Snippets Groups Projects
Commit 95810d7c authored by bbauvin's avatar bbauvin
Browse files

Trying to fix segmentation fault

parent 59e9af6c
Branches
Tags
No related merge requests found
......@@ -58,7 +58,6 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path,
CL_type = kwargs["CL_type"]
nbClass = kwargs["nbClass"]
X = getValue(X)
print X.data.nbytes + X.indptr.nbytes + X.indices.nbytes
datasetLength = X.shape[0]
clKWARGS = kwargs[kwargs["CL_type"]+"KWARGS"]
......
......@@ -403,23 +403,24 @@ def makeSparseTotalMatrix(sortedRNASeq):
return sparseFull
def getAdjacenceMatrix(sortedRNASeq, k=1):
indices = np.zeros((sortedRNASeq.shape[0]*k*sortedRNASeq.shape[1]), dtype=np.int32)
data = np.ones((sortedRNASeq.shape[0]*k*sortedRNASeq.shape[1]), dtype=bool)
indptr = np.zeros(sortedRNASeq.shape[0]+1, dtype=np.int16)
nbGenes = sortedRNASeq.shape[1]
def getAdjacenceMatrix(RNASeqRanking, sotredRNASeq, k=2):
k=int(k)/2*2
indices = np.zeros((RNASeqRanking.shape[0]*k*RNASeqRanking.shape[1]), dtype=np.int32)
data = np.ones((RNASeqRanking.shape[0]*k*RNASeqRanking.shape[1]), dtype=bool)
indptr = np.zeros(RNASeqRanking.shape[0]+1, dtype=np.int16)
nbGenes = RNASeqRanking.shape[1]
pointer = 0
for patientIndex, patient in enumerate(sortedRNASeq):
for patientIndex in range(RNASeqRanking.shape[0]):
print patientIndex
for i in range(nbGenes):
for j in range(k):
for j in range(k/2):
try:
indices[pointer]=patient[(i-(j+1))]+patient[i]*nbGenes
indices[pointer]=RNASeqRanking[patientIndex, (sotredRNASeq[patientIndex, i]-(j+1))]+i*nbGenes
pointer+=1
except:
pass
try:
indices[pointer]=patient[i+(j+1)]+patient[i]*nbGenes
indices[pointer]=RNASeqRanking[patientIndex, (sotredRNASeq[patientIndex, i]+(j+1))]+i*nbGenes
pointer+=1
except:
pass
......@@ -431,7 +432,7 @@ def getAdjacenceMatrix(sortedRNASeq, k=1):
# data.append(True)
indptr[patientIndex+1] = pointer
mat = sparse.csr_matrix((data, indices, indptr), shape=(sortedRNASeq.shape[0], sortedRNASeq.shape[1]*sortedRNASeq.shape[1]), dtype=bool)
mat = sparse.csr_matrix((data, indices, indptr), shape=(RNASeqRanking.shape[0], RNASeqRanking.shape[1]*RNASeqRanking.shape[1]), dtype=bool)
return mat
......@@ -478,12 +479,16 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
logging.debug("Start:\t Getting Sorted RNASeq Data")
RNASeq = datasetFile["View2"][...]
modifiedRNASeq = np.zeros(datasetFile.get("View2").shape, dtype=int)
sortedRNASeqGeneIndices = np.zeros(datasetFile.get("View2").shape, dtype=int)
RNASeqRanking = np.zeros(datasetFile.get("View2").shape, dtype=int)
for exampleIndex, exampleArray in enumerate(RNASeq):
RNASeqDictionary = dict((index, value) for index, value in enumerate(exampleArray))
sorted_x = sorted(RNASeqDictionary.items(), key=operator.itemgetter(1))
modifiedRNASeq[exampleIndex] = np.array([index for (index, value) in sorted_x], dtype=int)
mrnaseqDset = datasetFile.create_dataset("View4", modifiedRNASeq.shape, data=modifiedRNASeq)
sortedRNASeqDictionary = dict((index, value) for index, value in enumerate(exampleArray))
sortedRNASeqIndicesDict = sorted(sortedRNASeqDictionary.items(), key=operator.itemgetter(1))
sortedRNASeqIndicesArray = np.array([index for (index, value) in sortedRNASeqIndicesDict], dtype=int)
sortedRNASeqGeneIndices[exampleIndex] = sortedRNASeqIndicesArray
for geneIndex in range(RNASeq.shape[1]):
RNASeqRanking[exampleIndex, sortedRNASeqIndicesArray[geneIndex]] = geneIndex
mrnaseqDset = datasetFile.create_dataset("View4", sortedRNASeqGeneIndices.shape, data=sortedRNASeqGeneIndices)
mrnaseqDset.attrs["name"] = "SRNASeq"
mrnaseqDset.attrs["sparse"] = False
logging.debug("Done:\t Getting Sorted RNASeq Data")
......@@ -496,9 +501,9 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
factorizedLeftBaseMatrix = np.genfromtxt(path+"factorLeft--n-"+str(datasetFile.get("View2").shape[1])+"--k-"+str(100)+".csv", delimiter=',')
except:
factorizedSupBaseMatrix, factorizedLeftBaseMatrix = getBaseMatrices(rnaseqData.shape[1], k)
brnaseqDset = datasetFile.create_dataset("View5", (modifiedRNASeq.shape[0], modifiedRNASeq.shape[1]*k*2), dtype=bool)
for patientIndex, patientSortedArray in enumerate(modifiedRNASeq):
patientMatrix = np.zeros((modifiedRNASeq.shape[1], k * 2), dtype=bool)
brnaseqDset = datasetFile.create_dataset("View5", (sortedRNASeqGeneIndices.shape[0], sortedRNASeqGeneIndices.shape[1]*k*2), dtype=bool)
for patientIndex, patientSortedArray in enumerate(sortedRNASeqGeneIndices):
patientMatrix = np.zeros((sortedRNASeqGeneIndices.shape[1], k * 2), dtype=bool)
for lineIndex, geneIndex in enumerate(patientSortedArray):
patientMatrix[geneIndex]= np.concatenate((factorizedLeftBaseMatrix[lineIndex,:], factorizedSupBaseMatrix[:, lineIndex]))
brnaseqDset[patientIndex] = patientMatrix.flatten()
......@@ -507,7 +512,7 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
logging.debug("Done:\t Getting Binarized RNASeq Data")
# logging.debug("Start:\t Getting Binned RNASeq Data")
# sparseBinnedRNASeq = makeSparseTotalMatrix(modifiedRNASeq)
# sparseBinnedRNASeq = makeSparseTotalMatrix(sortedRNASeqGeneIndices)
# sparseBinnedRNASeqGrp = datasetFile.create_group("View6")
# dataDset = sparseBinnedRNASeqGrp.create_dataset("data", sparseBinnedRNASeq.data.shape, data=sparseBinnedRNASeq.data)
# indicesDset = sparseBinnedRNASeqGrp.create_dataset("indices", sparseBinnedRNASeq.indices.shape, data=sparseBinnedRNASeq.indices)
......@@ -518,7 +523,7 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
# logging.debug("Done:\t Getting Binned RNASeq Data")
logging.debug("Start:\t Getting Adjacence RNASeq Data")
sparseAdjRNASeq = getAdjacenceMatrix(modifiedRNASeq, k=findClosestPowerOfTwo(100)-1)
sparseAdjRNASeq = getAdjacenceMatrix(RNASeqRanking, sortedRNASeqGeneIndices, k=findClosestPowerOfTwo(100)-1)
sparseAdjRNASeqGrp = datasetFile.create_group("View6")
dataDset = sparseAdjRNASeqGrp.create_dataset("data", sparseAdjRNASeq.data.shape, data=sparseAdjRNASeq.data)
indicesDset = sparseAdjRNASeqGrp.create_dataset("indices", sparseAdjRNASeq.indices.shape, data=sparseAdjRNASeq.indices)
......
......@@ -12,8 +12,6 @@ def getV(DATASET, viewIndex, usedIndices=None):
DATASET.get("View"+str(viewIndex)).get("indices").value,
DATASET.get("View"+str(viewIndex)).get("indptr").value),
shape=DATASET.get("View"+str(viewIndex)).attrs["shape"])[usedIndices,:]
print sparse_mat.shape
print sparse_mat.indptr
return sparse_mat
......@@ -32,24 +30,19 @@ def getValue(DATASET):
DATASET.get("indices").value,
DATASET.get("indptr").value),
shape=DATASET.attrs["shape"])
print sparse_mat.shape
print sparse_mat.indptr
return sparse_mat
def extractSubset(matrix, usedIndices):
if sparse.issparse(matrix):
newIndptr = np.zeros(len(usedIndices)+1, dtype=np.int16)
oldindptr = matrix.indptr
print oldindptr
for exampleIndexIndex, exampleIndex in enumerate(usedIndices):
newIndptr[exampleIndexIndex+1] = newIndptr[exampleIndexIndex]+(oldindptr[exampleIndex+1]-oldindptr[exampleIndex])
newData = np.ones(newIndptr[-1], dtype=bool)
newIndices = np.zeros(newIndptr[-1], dtype=np.int32)
oldIndices = matrix.indices
print newIndptr
for exampleIndexIndex, exampleIndex in enumerate(usedIndices):
print newIndptr[exampleIndexIndex], newIndptr[exampleIndexIndex+1]
newIndices[newIndptr[exampleIndexIndex]:newIndptr[exampleIndexIndex+1]] = oldIndices[oldindptr[exampleIndex]: oldindptr[exampleIndex+1]]
return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape))
return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1]))
else:
return matrix[usedIndices]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment