Skip to content
Snippets Groups Projects
Commit 95810d7c authored by bbauvin's avatar bbauvin
Browse files

Trying to fix segmentation fault

parent 59e9af6c
Branches
Tags
No related merge requests found
...@@ -58,7 +58,6 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path, ...@@ -58,7 +58,6 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path,
CL_type = kwargs["CL_type"] CL_type = kwargs["CL_type"]
nbClass = kwargs["nbClass"] nbClass = kwargs["nbClass"]
X = getValue(X) X = getValue(X)
print X.data.nbytes + X.indptr.nbytes + X.indices.nbytes
datasetLength = X.shape[0] datasetLength = X.shape[0]
clKWARGS = kwargs[kwargs["CL_type"]+"KWARGS"] clKWARGS = kwargs[kwargs["CL_type"]+"KWARGS"]
......
...@@ -403,23 +403,24 @@ def makeSparseTotalMatrix(sortedRNASeq): ...@@ -403,23 +403,24 @@ def makeSparseTotalMatrix(sortedRNASeq):
return sparseFull return sparseFull
def getAdjacenceMatrix(sortedRNASeq, k=1): def getAdjacenceMatrix(RNASeqRanking, sotredRNASeq, k=2):
indices = np.zeros((sortedRNASeq.shape[0]*k*sortedRNASeq.shape[1]), dtype=np.int32) k=int(k)/2*2
data = np.ones((sortedRNASeq.shape[0]*k*sortedRNASeq.shape[1]), dtype=bool) indices = np.zeros((RNASeqRanking.shape[0]*k*RNASeqRanking.shape[1]), dtype=np.int32)
indptr = np.zeros(sortedRNASeq.shape[0]+1, dtype=np.int16) data = np.ones((RNASeqRanking.shape[0]*k*RNASeqRanking.shape[1]), dtype=bool)
nbGenes = sortedRNASeq.shape[1] indptr = np.zeros(RNASeqRanking.shape[0]+1, dtype=np.int16)
nbGenes = RNASeqRanking.shape[1]
pointer = 0 pointer = 0
for patientIndex, patient in enumerate(sortedRNASeq): for patientIndex in range(RNASeqRanking.shape[0]):
print patientIndex print patientIndex
for i in range(nbGenes): for i in range(nbGenes):
for j in range(k): for j in range(k/2):
try: try:
indices[pointer]=patient[(i-(j+1))]+patient[i]*nbGenes indices[pointer]=RNASeqRanking[patientIndex, (sotredRNASeq[patientIndex, i]-(j+1))]+i*nbGenes
pointer+=1 pointer+=1
except: except:
pass pass
try: try:
indices[pointer]=patient[i+(j+1)]+patient[i]*nbGenes indices[pointer]=RNASeqRanking[patientIndex, (sotredRNASeq[patientIndex, i]+(j+1))]+i*nbGenes
pointer+=1 pointer+=1
except: except:
pass pass
...@@ -431,7 +432,7 @@ def getAdjacenceMatrix(sortedRNASeq, k=1): ...@@ -431,7 +432,7 @@ def getAdjacenceMatrix(sortedRNASeq, k=1):
# data.append(True) # data.append(True)
indptr[patientIndex+1] = pointer indptr[patientIndex+1] = pointer
mat = sparse.csr_matrix((data, indices, indptr), shape=(sortedRNASeq.shape[0], sortedRNASeq.shape[1]*sortedRNASeq.shape[1]), dtype=bool) mat = sparse.csr_matrix((data, indices, indptr), shape=(RNASeqRanking.shape[0], RNASeqRanking.shape[1]*RNASeqRanking.shape[1]), dtype=bool)
return mat return mat
...@@ -478,12 +479,16 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES): ...@@ -478,12 +479,16 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
logging.debug("Start:\t Getting Sorted RNASeq Data") logging.debug("Start:\t Getting Sorted RNASeq Data")
RNASeq = datasetFile["View2"][...] RNASeq = datasetFile["View2"][...]
modifiedRNASeq = np.zeros(datasetFile.get("View2").shape, dtype=int) sortedRNASeqGeneIndices = np.zeros(datasetFile.get("View2").shape, dtype=int)
RNASeqRanking = np.zeros(datasetFile.get("View2").shape, dtype=int)
for exampleIndex, exampleArray in enumerate(RNASeq): for exampleIndex, exampleArray in enumerate(RNASeq):
RNASeqDictionary = dict((index, value) for index, value in enumerate(exampleArray)) sortedRNASeqDictionary = dict((index, value) for index, value in enumerate(exampleArray))
sorted_x = sorted(RNASeqDictionary.items(), key=operator.itemgetter(1)) sortedRNASeqIndicesDict = sorted(sortedRNASeqDictionary.items(), key=operator.itemgetter(1))
modifiedRNASeq[exampleIndex] = np.array([index for (index, value) in sorted_x], dtype=int) sortedRNASeqIndicesArray = np.array([index for (index, value) in sortedRNASeqIndicesDict], dtype=int)
mrnaseqDset = datasetFile.create_dataset("View4", modifiedRNASeq.shape, data=modifiedRNASeq) sortedRNASeqGeneIndices[exampleIndex] = sortedRNASeqIndicesArray
for geneIndex in range(RNASeq.shape[1]):
RNASeqRanking[exampleIndex, sortedRNASeqIndicesArray[geneIndex]] = geneIndex
mrnaseqDset = datasetFile.create_dataset("View4", sortedRNASeqGeneIndices.shape, data=sortedRNASeqGeneIndices)
mrnaseqDset.attrs["name"] = "SRNASeq" mrnaseqDset.attrs["name"] = "SRNASeq"
mrnaseqDset.attrs["sparse"] = False mrnaseqDset.attrs["sparse"] = False
logging.debug("Done:\t Getting Sorted RNASeq Data") logging.debug("Done:\t Getting Sorted RNASeq Data")
...@@ -496,9 +501,9 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES): ...@@ -496,9 +501,9 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
factorizedLeftBaseMatrix = np.genfromtxt(path+"factorLeft--n-"+str(datasetFile.get("View2").shape[1])+"--k-"+str(100)+".csv", delimiter=',') factorizedLeftBaseMatrix = np.genfromtxt(path+"factorLeft--n-"+str(datasetFile.get("View2").shape[1])+"--k-"+str(100)+".csv", delimiter=',')
except: except:
factorizedSupBaseMatrix, factorizedLeftBaseMatrix = getBaseMatrices(rnaseqData.shape[1], k) factorizedSupBaseMatrix, factorizedLeftBaseMatrix = getBaseMatrices(rnaseqData.shape[1], k)
brnaseqDset = datasetFile.create_dataset("View5", (modifiedRNASeq.shape[0], modifiedRNASeq.shape[1]*k*2), dtype=bool) brnaseqDset = datasetFile.create_dataset("View5", (sortedRNASeqGeneIndices.shape[0], sortedRNASeqGeneIndices.shape[1]*k*2), dtype=bool)
for patientIndex, patientSortedArray in enumerate(modifiedRNASeq): for patientIndex, patientSortedArray in enumerate(sortedRNASeqGeneIndices):
patientMatrix = np.zeros((modifiedRNASeq.shape[1], k * 2), dtype=bool) patientMatrix = np.zeros((sortedRNASeqGeneIndices.shape[1], k * 2), dtype=bool)
for lineIndex, geneIndex in enumerate(patientSortedArray): for lineIndex, geneIndex in enumerate(patientSortedArray):
patientMatrix[geneIndex]= np.concatenate((factorizedLeftBaseMatrix[lineIndex,:], factorizedSupBaseMatrix[:, lineIndex])) patientMatrix[geneIndex]= np.concatenate((factorizedLeftBaseMatrix[lineIndex,:], factorizedSupBaseMatrix[:, lineIndex]))
brnaseqDset[patientIndex] = patientMatrix.flatten() brnaseqDset[patientIndex] = patientMatrix.flatten()
...@@ -507,7 +512,7 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES): ...@@ -507,7 +512,7 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
logging.debug("Done:\t Getting Binarized RNASeq Data") logging.debug("Done:\t Getting Binarized RNASeq Data")
# logging.debug("Start:\t Getting Binned RNASeq Data") # logging.debug("Start:\t Getting Binned RNASeq Data")
# sparseBinnedRNASeq = makeSparseTotalMatrix(modifiedRNASeq) # sparseBinnedRNASeq = makeSparseTotalMatrix(sortedRNASeqGeneIndices)
# sparseBinnedRNASeqGrp = datasetFile.create_group("View6") # sparseBinnedRNASeqGrp = datasetFile.create_group("View6")
# dataDset = sparseBinnedRNASeqGrp.create_dataset("data", sparseBinnedRNASeq.data.shape, data=sparseBinnedRNASeq.data) # dataDset = sparseBinnedRNASeqGrp.create_dataset("data", sparseBinnedRNASeq.data.shape, data=sparseBinnedRNASeq.data)
# indicesDset = sparseBinnedRNASeqGrp.create_dataset("indices", sparseBinnedRNASeq.indices.shape, data=sparseBinnedRNASeq.indices) # indicesDset = sparseBinnedRNASeqGrp.create_dataset("indices", sparseBinnedRNASeq.indices.shape, data=sparseBinnedRNASeq.indices)
...@@ -518,7 +523,7 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES): ...@@ -518,7 +523,7 @@ def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
# logging.debug("Done:\t Getting Binned RNASeq Data") # logging.debug("Done:\t Getting Binned RNASeq Data")
logging.debug("Start:\t Getting Adjacence RNASeq Data") logging.debug("Start:\t Getting Adjacence RNASeq Data")
sparseAdjRNASeq = getAdjacenceMatrix(modifiedRNASeq, k=findClosestPowerOfTwo(100)-1) sparseAdjRNASeq = getAdjacenceMatrix(RNASeqRanking, sortedRNASeqGeneIndices, k=findClosestPowerOfTwo(100)-1)
sparseAdjRNASeqGrp = datasetFile.create_group("View6") sparseAdjRNASeqGrp = datasetFile.create_group("View6")
dataDset = sparseAdjRNASeqGrp.create_dataset("data", sparseAdjRNASeq.data.shape, data=sparseAdjRNASeq.data) dataDset = sparseAdjRNASeqGrp.create_dataset("data", sparseAdjRNASeq.data.shape, data=sparseAdjRNASeq.data)
indicesDset = sparseAdjRNASeqGrp.create_dataset("indices", sparseAdjRNASeq.indices.shape, data=sparseAdjRNASeq.indices) indicesDset = sparseAdjRNASeqGrp.create_dataset("indices", sparseAdjRNASeq.indices.shape, data=sparseAdjRNASeq.indices)
......
...@@ -12,8 +12,6 @@ def getV(DATASET, viewIndex, usedIndices=None): ...@@ -12,8 +12,6 @@ def getV(DATASET, viewIndex, usedIndices=None):
DATASET.get("View"+str(viewIndex)).get("indices").value, DATASET.get("View"+str(viewIndex)).get("indices").value,
DATASET.get("View"+str(viewIndex)).get("indptr").value), DATASET.get("View"+str(viewIndex)).get("indptr").value),
shape=DATASET.get("View"+str(viewIndex)).attrs["shape"])[usedIndices,:] shape=DATASET.get("View"+str(viewIndex)).attrs["shape"])[usedIndices,:]
print sparse_mat.shape
print sparse_mat.indptr
return sparse_mat return sparse_mat
...@@ -32,24 +30,19 @@ def getValue(DATASET): ...@@ -32,24 +30,19 @@ def getValue(DATASET):
DATASET.get("indices").value, DATASET.get("indices").value,
DATASET.get("indptr").value), DATASET.get("indptr").value),
shape=DATASET.attrs["shape"]) shape=DATASET.attrs["shape"])
print sparse_mat.shape
print sparse_mat.indptr
return sparse_mat return sparse_mat
def extractSubset(matrix, usedIndices): def extractSubset(matrix, usedIndices):
if sparse.issparse(matrix): if sparse.issparse(matrix):
newIndptr = np.zeros(len(usedIndices)+1, dtype=np.int16) newIndptr = np.zeros(len(usedIndices)+1, dtype=np.int16)
oldindptr = matrix.indptr oldindptr = matrix.indptr
print oldindptr
for exampleIndexIndex, exampleIndex in enumerate(usedIndices): for exampleIndexIndex, exampleIndex in enumerate(usedIndices):
newIndptr[exampleIndexIndex+1] = newIndptr[exampleIndexIndex]+(oldindptr[exampleIndex+1]-oldindptr[exampleIndex]) newIndptr[exampleIndexIndex+1] = newIndptr[exampleIndexIndex]+(oldindptr[exampleIndex+1]-oldindptr[exampleIndex])
newData = np.ones(newIndptr[-1], dtype=bool) newData = np.ones(newIndptr[-1], dtype=bool)
newIndices = np.zeros(newIndptr[-1], dtype=np.int32) newIndices = np.zeros(newIndptr[-1], dtype=np.int32)
oldIndices = matrix.indices oldIndices = matrix.indices
print newIndptr
for exampleIndexIndex, exampleIndex in enumerate(usedIndices): for exampleIndexIndex, exampleIndex in enumerate(usedIndices):
print newIndptr[exampleIndexIndex], newIndptr[exampleIndexIndex+1]
newIndices[newIndptr[exampleIndexIndex]:newIndptr[exampleIndexIndex+1]] = oldIndices[oldindptr[exampleIndex]: oldindptr[exampleIndex+1]] newIndices[newIndptr[exampleIndexIndex]:newIndptr[exampleIndexIndex+1]] = oldIndices[oldindptr[exampleIndex]: oldindptr[exampleIndex+1]]
return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape)) return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1]))
else: else:
return matrix[usedIndices] return matrix[usedIndices]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment