diff --git a/Code/MonoMultiViewClassifiers/utils/Dataset.py b/Code/MonoMultiViewClassifiers/utils/Dataset.py
index 7c74fc76664c5612ed903657f8bebfa6254c7fcb..75866388d0b1f2210a8396b64fb52385bbdbb435 100644
--- a/Code/MonoMultiViewClassifiers/utils/Dataset.py
+++ b/Code/MonoMultiViewClassifiers/utils/Dataset.py
@@ -10,6 +10,7 @@ from . import GetMultiviewDb as DB
 
 
 def getV(DATASET, viewIndex, usedIndices=None):
+    """Used to extract a view as a numpy array or a sparse mat from the HDF5 dataset"""
     if usedIndices is None:
         usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
     if type(usedIndices) is int:
@@ -32,6 +33,7 @@ def getV(DATASET, viewIndex, usedIndices=None):
 
 
 def getShape(DATASET, viewIndex):
+    """Used to get the dataset shape even if it's sparse"""
     if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]:
         return DATASET.get("View" + str(viewIndex)).shape
     else:
@@ -39,6 +41,7 @@ def getShape(DATASET, viewIndex):
 
 
 def getValue(DATASET):
+    """Used to get the value of a view in the HDF5 dataset even if it sparse"""
     if not DATASET.attrs["sparse"]:
         return DATASET.value
     else:
@@ -50,6 +53,7 @@ def getValue(DATASET):
 
 
 def extractSubset(matrix, usedIndices):
+    """Used to extract a subset of a matrix even if it's sparse"""
     if sparse.issparse(matrix):
         newIndptr = np.zeros(len(usedIndices) + 1, dtype=int)
         oldindptr = matrix.indptr
@@ -69,8 +73,7 @@ def extractSubset(matrix, usedIndices):
 
 
 def initMultipleDatasets(args, nbCores):
-    """Used to create copies of the dataset if multicore computation is used
-    Needs arg.pathF and arg.name"""
+    """Used to create copies of the dataset if multicore computation is used"""
     if nbCores > 1:
         if DB.datasetsAlreadyExist(args.pathF, args.name, nbCores):
             logging.debug("Info:\t Enough copies of the dataset are already available")
@@ -90,6 +93,7 @@ def initMultipleDatasets(args, nbCores):
 
 
 def confirm(resp=True, timeout=15):
+    """Used to process answer"""
     ans = input_(timeout)
     if not ans:
         return resp
@@ -102,6 +106,7 @@ def confirm(resp=True, timeout=15):
 
 
 def input_(timeout=15):
+    """used as a UI to stop if too much HDD space will be used"""
     print("You have " + str(timeout) + " seconds to stop the script by typing n")
     i, o, e = select.select([sys.stdin], [], [], timeout)
     if i:
diff --git a/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
index 28139df754f29c2ccc1fd6f728c2e7c3feb455a0..044620afa87a49a7f6f38b7b2d07fe4ea122665b 100644
--- a/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
+++ b/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
@@ -13,6 +13,7 @@ __status__ = "Prototype"  # Production, Development, Prototype
 
 
 def makeMeNoisy(viewData, randomState, percentage=15):
+    """used to introduce some noise in the generated data"""
     viewData = viewData.astype(bool)
     nbNoisyCoord = int(percentage / 100.0 * viewData.shape[0] * viewData.shape[1])
     rows = range(viewData.shape[0])
@@ -27,6 +28,7 @@ def makeMeNoisy(viewData, randomState, percentage=15):
 
 def getPlausibleDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, nbView=3,
                        nbClass=2, datasetLength=347, randomStateInt=42):
+    """Used to generate a plausible dataset to test the algorithms"""
     randomState = np.random.RandomState(randomStateInt)
     nbFeatures = 250
     if not os.path.exists(os.path.dirname(pathF + "Plausible.hdf5")):
@@ -65,6 +67,7 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, nbView=3,
 
 
 def getFakeDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, randomState):
+    """Was used to generateafake dataset to run tests"""
     NB_VIEW = 4
     DATASET_LENGTH = 30
     NB_CLASS = 2
@@ -115,70 +118,72 @@ def getFakeDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, randomState):
     return datasetFile, LABELS_DICTIONARY
 
 
-def getLabelSupports(CLASS_LABELS):
-    labels = set(CLASS_LABELS)
-    supports = [CLASS_LABELS.tolist().count(label) for label in labels]
-    return supports, dict((label, index) for label, index in zip(labels, range(len(labels))))
+# def getLabelSupports(CLASS_LABELS):
+#     """Used to get the number of example for each label"""
+#     labels = set(CLASS_LABELS)
+#     supports = [CLASS_LABELS.tolist().count(label) for label in labels]
+#     return supports, dict((label, index) for label, index in zip(labels, range(len(labels))))
 
 
-def isUseful(labelSupports, index, CLASS_LABELS, labelDict):
-    if labelSupports[labelDict[CLASS_LABELS[index]]] != 0:
-        labelSupports[labelDict[CLASS_LABELS[index]]] -= 1
-        return True, labelSupports
-    else:
-        return False, labelSupports
-
-
-def splitDataset(DATASET, LEARNING_RATE, DATASET_LENGTH, randomState):
-    LABELS = DATASET.get("Labels")[...]
-    NB_CLASS = int(DATASET["Metadata"].attrs["nbClass"])
-    validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState)
-    validationIndices.sort()
-    return validationIndices
-
-
-def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState):
-    labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS))
-    nbTrainingExamples = [int(support * LEARNING_RATE) for support in labelSupports]
-    trainingExamplesIndices = []
-    usedIndices = []
-    while nbTrainingExamples != [0 for i in range(NB_CLASS)]:
-        isUseFull = False
-        index = int(randomState.randint(0, DATASET_LENGTH - 1))
-        if index not in usedIndices:
-            isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict)
-        if isUseFull:
-            trainingExamplesIndices.append(index)
-            usedIndices.append(index)
-    return trainingExamplesIndices
-
-
-def getKFoldIndices(nbFolds, CLASS_LABELS, NB_CLASS, learningIndices, randomState):
-    labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS[learningIndices]))
-    nbTrainingExamples = [[int(support / nbFolds) for support in labelSupports] for fold in range(nbFolds)]
-    trainingExamplesIndices = []
-    usedIndices = []
-    for foldIndex, fold in enumerate(nbTrainingExamples):
-        trainingExamplesIndices.append([])
-        while fold != [0 for i in range(NB_CLASS)]:
-            index = randomState.randint(0, len(learningIndices))
-            if learningIndices[index] not in usedIndices:
-                isUseFull, fold = isUseful(fold, learningIndices[index], CLASS_LABELS, labelDict)
-                if isUseFull:
-                    trainingExamplesIndices[foldIndex].append(learningIndices[index])
-                    usedIndices.append(learningIndices[index])
-    return trainingExamplesIndices
-
-
-def getPositions(labelsUsed, fullLabels):
-    usedIndices = []
-    for labelIndex, label in enumerate(fullLabels):
-        if label in labelsUsed:
-            usedIndices.append(labelIndex)
-    return usedIndices
+# def isUseful(labelSupports, index, CLASS_LABELS, labelDict):
+    # if labelSupports[labelDict[CLASS_LABELS[index]]] != 0:
+    #     labelSupports[labelDict[CLASS_LABELS[index]]] -= 1
+    #     return True, labelSupports
+    # else:
+    #     return False, labelSupports
+
+
+# def splitDataset(DATASET, LEARNING_RATE, DATASET_LENGTH, randomState):
+#     LABELS = DATASET.get("Labels")[...]
+#     NB_CLASS = int(DATASET["Metadata"].attrs["nbClass"])
+#     validationIndices = extractRandomTrainingSet(LABELS, 1 - LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState)
+#     validationIndices.sort()
+#     return validationIndices
+
+
+# def extractRandomTrainingSet(CLASS_LABELS, LEARNING_RATE, DATASET_LENGTH, NB_CLASS, randomState):
+#     labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS))
+#     nbTrainingExamples = [int(support * LEARNING_RATE) for support in labelSupports]
+#     trainingExamplesIndices = []
+#     usedIndices = []
+#     while nbTrainingExamples != [0 for i in range(NB_CLASS)]:
+#         isUseFull = False
+#         index = int(randomState.randint(0, DATASET_LENGTH - 1))
+#         if index not in usedIndices:
+#             isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict)
+#         if isUseFull:
+#             trainingExamplesIndices.append(index)
+#             usedIndices.append(index)
+#     return trainingExamplesIndices
+
+
+# def getKFoldIndices(nbFolds, CLASS_LABELS, NB_CLASS, learningIndices, randomState):
+#     labelSupports, labelDict = getLabelSupports(np.array(CLASS_LABELS[learningIndices]))
+#     nbTrainingExamples = [[int(support / nbFolds) for support in labelSupports] for fold in range(nbFolds)]
+#     trainingExamplesIndices = []
+#     usedIndices = []
+#     for foldIndex, fold in enumerate(nbTrainingExamples):
+#         trainingExamplesIndices.append([])
+#         while fold != [0 for i in range(NB_CLASS)]:
+#             index = randomState.randint(0, len(learningIndices))
+#             if learningIndices[index] not in usedIndices:
+#                 isUseFull, fold = isUseful(fold, learningIndices[index], CLASS_LABELS, labelDict)
+#                 if isUseFull:
+#                     trainingExamplesIndices[foldIndex].append(learningIndices[index])
+#                     usedIndices.append(learningIndices[index])
+#     return trainingExamplesIndices
+#
+#
+# def getPositions(labelsUsed, fullLabels):
+#     usedIndices = []
+#     for labelIndex, label in enumerate(fullLabels):
+#         if label in labelsUsed:
+#             usedIndices.append(labelIndex)
+#     return usedIndices
 
 
 # def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES, randomState):
+# TODO : Update this one
 #     labelsNamesFile = open(pathF + nameDB + '-ClassLabels-Description.csv')
 #     datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w")
 #     if len(LABELS_NAMES) != NB_CLASS:
@@ -217,6 +222,7 @@ def getPositions(labelsUsed, fullLabels):
 
 
 def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, LABELS_NAMES):
+    """Used to load a hdf5 database"""
     datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
     fullLabels = datasetFile.get("Labels")
     labelsDictionary = dict((labelIndex, labelName) for labelIndex, labelName in
@@ -224,850 +230,856 @@ def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, LABELS_NAMES):
     return datasetFile, labelsDictionary
 
 
-def getCaltechDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES, randomState):
-    datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w")
-    labelsNamesFile = open(pathF + nameDB + '-ClassLabels-Description.csv')
-    if len(LABELS_NAMES) != NB_CLASS:
-        nbLabelsAvailable = 0
-        for l in labelsNamesFile:
-            nbLabelsAvailable += 1
-        LABELS_NAMES = [line.strip().split(";")[1] for lineIdx, line in enumerate(labelsNamesFile) if
-                        lineIdx in randomState.randint(nbLabelsAvailable, size=NB_CLASS)]
-    fullLabels = np.genfromtxt(pathF + nameDB + '-ClassLabels.csv', delimiter=';').astype(int)
-    labelsDictionary = dict((classIndice, labelName) for (classIndice, labelName) in
-                            [(int(line.strip().split(";")[0]), line.strip().split(";")[1]) for lineIndex, line in
-                             labelsNamesFile if line.strip().split(";")[0] in LABELS_NAMES])
-    if len(set(fullLabels)) > NB_CLASS:
-        usedIndices = getPositions(labelsDictionary.keys(), fullLabels)
-    else:
-        usedIndices = range(len(fullLabels))
-    for viewIndex, view in enumerate(views):
-        viewFile = pathF + nameDB + "-" + view + '.csv'
-        viewMatrix = np.array(np.genfromtxt(viewFile, delimiter=';'))[usedIndices, :]
-        viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewMatrix.shape, data=viewMatrix)
-        viewDset.attrs["name"] = view
-
-    labelsDset = datasetFile.create_dataset("Labels", fullLabels[usedIndices].shape, data=fullLabels[usedIndices])
-
-    metaDataGrp = datasetFile.create_group("Metadata")
-    metaDataGrp.attrs["nbView"] = len(views)
-    metaDataGrp.attrs["nbClass"] = NB_CLASS
-    metaDataGrp.attrs["datasetLength"] = len(fullLabels[usedIndices])
-    datasetFile.close()
-    datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
-    return datasetFile, labelsDictionary
-
-
-def getMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES, randomState):
-    datasetFile = h5py.File(path + "MultiOmic.hdf5", "w")
-
-    logging.debug("Start:\t Getting Methylation Data")
-    methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',')
-    methylDset = datasetFile.create_dataset("View0", methylData.shape)
-    methylDset[...] = methylData
-    methylDset.attrs["name"] = "Methyl"
-    methylDset.attrs["sparse"] = False
-    methylDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting Methylation Data")
-
-    logging.debug("Start:\t Getting MiRNA Data")
-    mirnaData = np.genfromtxt(path + "matching_mirna.csv", delimiter=',')
-    mirnaDset = datasetFile.create_dataset("View1", mirnaData.shape)
-    mirnaDset[...] = mirnaData
-    mirnaDset.attrs["name"] = "MiRNA_"
-    mirnaDset.attrs["sparse"] = False
-    mirnaDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting MiRNA Data")
-
-    logging.debug("Start:\t Getting RNASeq Data")
-    rnaseqData = np.genfromtxt(path + "matching_rnaseq.csv", delimiter=',')
-    uselessRows = []
-    for rowIndex, row in enumerate(np.transpose(rnaseqData)):
-        if not row.any():
-            uselessRows.append(rowIndex)
-    usefulRows = [usefulRowIndex for usefulRowIndex in range(rnaseqData.shape[1]) if usefulRowIndex not in uselessRows]
-    rnaseqDset = datasetFile.create_dataset("View2", (rnaseqData.shape[0], len(usefulRows)))
-    rnaseqDset[...] = rnaseqData[:, usefulRows]
-    rnaseqDset.attrs["name"] = "RNASeq_"
-    rnaseqDset.attrs["sparse"] = False
-    rnaseqDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting RNASeq Data")
-
-    logging.debug("Start:\t Getting Clinical Data")
-    clinical = np.genfromtxt(path + "clinicalMatrix.csv", delimiter=',')
-    clinicalDset = datasetFile.create_dataset("View3", clinical.shape)
-    clinicalDset[...] = clinical
-    clinicalDset.attrs["name"] = "Clinic"
-    clinicalDset.attrs["sparse"] = False
-    clinicalDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting Clinical Data")
-
-    labelFile = open(path + 'brca_labels_triple-negatif.csv')
-    labels = np.array([int(line.strip().split(',')[1]) for line in labelFile])
-    labelsDset = datasetFile.create_dataset("Labels", labels.shape)
-    labelsDset[...] = labels
-    labelsDset.attrs["name"] = "Labels"
-
-    metaDataGrp = datasetFile.create_group("Metadata")
-    metaDataGrp.attrs["nbView"] = 4
-    metaDataGrp.attrs["nbClass"] = 2
-    metaDataGrp.attrs["datasetLength"] = len(labels)
-    labelDictionary = {0: "No", 1: "Yes"}
-    datasetFile.close()
-    datasetFile = h5py.File(path + "MultiOmic.hdf5", "r")
-    # datasetFile = getPseudoRNASeq(datasetFile)
-    return datasetFile, labelDictionary
-
-
-def getVector(nbGenes):
-    argmax = [0, 0]
-    maxi = 0
-    for i in range(nbGenes):
-        for j in range(nbGenes):
-            if j == i + 1:
-                value = (i + 1) * (nbGenes - j)
-                if value > maxi:
-                    maxi = value
-                    argmax = [i, j]
-    i, j = argmax
-    vectorLeft = np.zeros(nbGenes, dtype=bool)
-    vectorLeft[:i + 1] = np.ones(i + 1, dtype=bool)
-    vectorSup = np.zeros(nbGenes, dtype=bool)
-    vectorSup[j:] = np.ones(nbGenes - j, dtype=bool)
-    matrixSup = j
-    matrixInf = nbGenes - j
-    return vectorLeft, matrixSup, matrixInf
-
-
-def findClosestPowerOfTwo(factorizationParam):
-    power = 1
-    while factorizationParam - power > 0:
-        power *= 2
-    if abs(factorizationParam - power) < abs(factorizationParam - power / 2):
-        return power
-    else:
-        return power / 2
-
-
-def easyFactorize(nbGenes, factorizationParam, t=0):
-    if math.log(factorizationParam + 1, 2) % 1 == 0.0:
-        pass
-    else:
-        factorizationParam = findClosestPowerOfTwo(factorizationParam) - 1
-
-    if nbGenes == 2:
-        return 1, np.array([True, False])
-
-    if nbGenes == 3:
-        return 1, np.array([True, True, False])
-
-    if factorizationParam == 1:
-        t = 1
-        return t, getVector(nbGenes)[0]
-
-    vectorLeft, matrixSup, matrixInf = getVector(nbGenes)
-
-    t_, vectorLeftSup = easyFactorize(matrixSup, (factorizationParam - 1) / 2, t=t)
-    t__, vectorLeftInf = easyFactorize(matrixInf, (factorizationParam - 1) / 2, t=t)
-
-    factorLeft = np.zeros((nbGenes, t_ + t__ + 1), dtype=bool)
-
-    factorLeft[:matrixSup, :t_] = vectorLeftSup.reshape(factorLeft[:matrixSup, :t_].shape)
-    if nbGenes % 2 == 1:
-        factorLeft[matrixInf - 1:, t_:t__ + t_] = vectorLeftInf.reshape(factorLeft[matrixInf - 1:, t_:t__ + t_].shape)
-    else:
-        factorLeft[matrixInf:, t_:t__ + t_] = vectorLeftInf.reshape(factorLeft[matrixInf:, t_:t__ + t_].shape)
-    factorLeft[:, t__ + t_] = vectorLeft
-
-    # factorSup = np.zeros((t_+t__+1, nbGenes), dtype=bool)
-    #
-    # factorSup[:t_, :matrixSup] = vectorSupLeft.reshape(factorSup[:t_, :matrixSup].shape)
-    # if nbGenes%2==1:
-    #     factorSup[t_:t__+t_, matrixInf-1:] = vectorSupRight.reshape(factorSup[t_:t__+t_, matrixInf-1:].shape)
-    # else:
-    #     factorSup[t_:t__+t_, matrixInf:] = vectorSupRight.reshape(factorSup[t_:t__+t_, matrixInf:].shape)
-    # factorSup[t__+t_, :] = vectorSup
-    return t__ + t_ + 1, factorLeft  # , factorSup
-
-
-def getBaseMatrices(nbGenes, factorizationParam, path):
-    t, factorLeft = easyFactorize(nbGenes, factorizationParam)
-    np.savetxt(path + "factorLeft--n-" + str(nbGenes) + "--k-" + str(factorizationParam) + ".csv", factorLeft,
-               delimiter=",")
-    return factorLeft
-
-
-def findParams(arrayLen, nbPatients, randomState, maxNbBins=2000, minNbBins=10, maxLenBin=70000, minOverlapping=1,
-               minNbBinsOverlapped=0, maxNbSolutions=30):
-    results = []
-    if arrayLen * arrayLen * 10 / 100 > minNbBinsOverlapped * nbPatients:
-        for lenBin in range(arrayLen - 1):
-            lenBin += 1
-            if lenBin < maxLenBin and minNbBins * lenBin < arrayLen:
-                for overlapping in sorted(range(lenBin - 1), reverse=True):
-                    overlapping += 1
-                    if overlapping > minOverlapping and lenBin % (lenBin - overlapping) == 0:
-                        for nbBins in sorted(range(arrayLen - 1), reverse=True):
-                            nbBins += 1
-                            if nbBins < maxNbBins:
-                                if arrayLen == (nbBins - 1) * (lenBin - overlapping) + lenBin:
-                                    results.append({"nbBins": nbBins, "overlapping": overlapping, "lenBin": lenBin})
-                                    if len(results) == maxNbSolutions:
-                                        params = results[randomState.randrange(len(results))]
-                                        return params
-
-
-def findBins(nbBins=142, overlapping=493, lenBin=986):
-    bins = []
-    for binIndex in range(nbBins):
-        bins.append([i + binIndex * (lenBin - overlapping) for i in range(lenBin)])
-    return bins
-
-
-def getBins(array, bins, lenBin, overlapping):
-    binnedcoord = []
-    for coordIndex, coord in enumerate(array):
-        nbBinsFull = 0
-        for binIndex, bin_ in enumerate(bins):
-            if coordIndex in bin_:
-                binnedcoord.append(binIndex + (coord * len(bins)))
-
-    return np.array(binnedcoord)
-
-
-def makeSortedBinsMatrix(nbBins, lenBins, overlapping, arrayLen, path):
-    sortedBinsMatrix = np.zeros((arrayLen, nbBins), dtype=np.uint8)
-    step = lenBins - overlapping
-    for binIndex in range(nbBins):
-        sortedBinsMatrix[step * binIndex:lenBins + (step * binIndex), binIndex] = np.ones(lenBins, dtype=np.uint8)
-    np.savetxt(path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
-               sortedBinsMatrix, delimiter=",")
-    return sortedBinsMatrix
-
-
-def makeSparseTotalMatrix(sortedRNASeq, randomState):
-    nbPatients, nbGenes = sortedRNASeq.shape
-    params = findParams(nbGenes, nbPatients, randomState)
-    nbBins = params["nbBins"]
-    overlapping = params["overlapping"]
-    lenBin = params["lenBin"]
-    bins = findBins(nbBins, overlapping, lenBin)
-    sparseFull = sparse.csc_matrix((nbPatients, nbGenes * nbBins))
-    for patientIndex, patient in enumerate(sortedRNASeq):
-        columnIndices = getBins(patient, bins, lenBin, overlapping)
-        rowIndices = np.zeros(len(columnIndices), dtype=int) + patientIndex
-        data = np.ones(len(columnIndices), dtype=bool)
-        sparseFull = sparseFull + sparse.csc_matrix((data, (rowIndices, columnIndices)),
-                                                    shape=(nbPatients, nbGenes * nbBins))
-    return sparseFull
-
-
-def getAdjacenceMatrix(RNASeqRanking, sotredRNASeq, k=2):
-    k = int(k) / 2 * 2
-    indices = np.zeros((RNASeqRanking.shape[0] * k * RNASeqRanking.shape[1]), dtype=int)
-    data = np.ones((RNASeqRanking.shape[0] * k * RNASeqRanking.shape[1]), dtype=bool)
-    indptr = np.zeros(RNASeqRanking.shape[0] + 1, dtype=int)
-    nbGenes = RNASeqRanking.shape[1]
-    pointer = 0
-    for patientIndex in range(RNASeqRanking.shape[0]):
-        for i in range(nbGenes):
-            for j in range(k / 2):
-                try:
-                    indices[pointer] = RNASeqRanking[
-                                           patientIndex, (sotredRNASeq[patientIndex, i] - (j + 1))] + i * nbGenes
-                    pointer += 1
-                except:
-                    pass
-                try:
-                    indices[pointer] = RNASeqRanking[
-                                           patientIndex, (sotredRNASeq[patientIndex, i] + (j + 1))] + i * nbGenes
-                    pointer += 1
-                except:
-                    pass
-                    # elif i<=k:
-                    # 	indices.append(patient[1]+patient[i]*nbGenes)
-                    # 	data.append(True)
-                    # elif i==nbGenes-1:
-                    # 	indices.append(patient[i-1]+patient[i]*nbGenes)
-                    # 	data.append(True)
-        indptr[patientIndex + 1] = pointer
-
-    mat = sparse.csr_matrix((data, indices, indptr),
-                            shape=(RNASeqRanking.shape[0], RNASeqRanking.shape[1] * RNASeqRanking.shape[1]), dtype=bool)
-    return mat
-
-
-def getKMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
-    datasetFile = h5py.File(path + "KMultiOmic.hdf5", "w")
-
-    # logging.debug("Start:\t Getting Methylation Data")
-    methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',')
-    logging.debug("Done:\t Getting Methylation Data")
-
-    logging.debug("Start:\t Getting Sorted Methyl Data")
-    Methyl = methylData
-    sortedMethylGeneIndices = np.zeros(methylData.shape, dtype=int)
-    MethylRanking = np.zeros(methylData.shape, dtype=int)
-    for exampleIndex, exampleArray in enumerate(Methyl):
-        sortedMethylDictionary = dict((index, value) for index, value in enumerate(exampleArray))
-        sortedMethylIndicesDict = sorted(sortedMethylDictionary.items(), key=operator.itemgetter(1))
-        sortedMethylIndicesArray = np.array([index for (index, value) in sortedMethylIndicesDict], dtype=int)
-        sortedMethylGeneIndices[exampleIndex] = sortedMethylIndicesArray
-        for geneIndex in range(Methyl.shape[1]):
-            MethylRanking[exampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex
-    logging.debug("Done:\t Getting Sorted Methyl Data")
-
-    logging.debug("Start:\t Getting Binarized Methyl Data")
-    k = findClosestPowerOfTwo(9) - 1
-    try:
-        factorizedLeftBaseMatrix = np.genfromtxt(
-            path + "factorLeft--n-" + str(methylData.shape[1]) + "--k-" + str(k) + ".csv", delimiter=',')
-    except:
-        factorizedLeftBaseMatrix = getBaseMatrices(methylData.shape[1], k, path)
-    bMethylDset = datasetFile.create_dataset("View0",
-                                             (sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * k),
-                                             dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
-        patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], k), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
-        bMethylDset[patientIndex] = patientMatrix.flatten()
-    bMethylDset.attrs["name"] = "BMethyl" + str(k)
-    bMethylDset.attrs["sparse"] = False
-    bMethylDset.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binarized Methyl Data")
-
-    logging.debug("Start:\t Getting Binned Methyl Data")
-    lenBins = 3298
-    nbBins = 9
-    overlapping = 463
-    try:
-        sortedBinsMatrix = np.genfromtxt(
-            path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
-            delimiter=",")
-    except:
-        sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, methylData.shape[1], path)
-    binnedMethyl = datasetFile.create_dataset("View1", (
-        sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * nbBins), dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
-        patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], nbBins), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
-        binnedMethyl[patientIndex] = patientMatrix.flatten()
-    binnedMethyl.attrs["name"] = "bMethyl" + str(nbBins)
-    binnedMethyl.attrs["sparse"] = False
-    binnedMethyl.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binned Methyl Data")
-
-    logging.debug("Start:\t Getting Binarized Methyl Data")
-    k = findClosestPowerOfTwo(17) - 1
-    try:
-        factorizedLeftBaseMatrix = np.genfromtxt(
-            path + "factorLeft--n-" + str(methylData.shape[1]) + "--k-" + str(k) + ".csv", delimiter=',')
-    except:
-        factorizedLeftBaseMatrix = getBaseMatrices(methylData.shape[1], k, path)
-    bMethylDset = datasetFile.create_dataset("View2",
-                                             (sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * k),
-                                             dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
-        patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], k), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
-        bMethylDset[patientIndex] = patientMatrix.flatten()
-    bMethylDset.attrs["name"] = "BMethyl" + str(k)
-    bMethylDset.attrs["sparse"] = False
-    bMethylDset.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binarized Methyl Data")
-
-    logging.debug("Start:\t Getting Binned Methyl Data")
-    lenBins = 2038
-    nbBins = 16
-    overlapping = 442
-    try:
-        sortedBinsMatrix = np.genfromtxt(
-            path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
-            delimiter=",")
-    except:
-        sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, methylData.shape[1], path)
-    binnedMethyl = datasetFile.create_dataset("View3", (
-        sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * nbBins), dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
-        patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], nbBins), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
-        binnedMethyl[patientIndex] = patientMatrix.flatten()
-    binnedMethyl.attrs["name"] = "bMethyl" + str(nbBins)
-    binnedMethyl.attrs["sparse"] = False
-    binnedMethyl.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binned Methyl Data")
-
-    labelFile = open(path + 'brca_labels_triple-negatif.csv')
-    labels = np.array([int(line.strip().split(',')[1]) for line in labelFile])
-    labelsDset = datasetFile.create_dataset("Labels", labels.shape)
-    labelsDset[...] = labels
-    labelsDset.attrs["name"] = "Labels"
-
-    metaDataGrp = datasetFile.create_group("Metadata")
-    metaDataGrp.attrs["nbView"] = 4
-    metaDataGrp.attrs["nbClass"] = 2
-    metaDataGrp.attrs["datasetLength"] = len(labels)
-    labelDictionary = {0: "No", 1: "Yes"}
-
-    datasetFile.close()
-    datasetFile = h5py.File(path + "KMultiOmic.hdf5", "r")
-
-    return datasetFile, labelDictionary
-
-
-def getKMultiOmicDBhdf5(features, path, name, NB_CLASS, LABELS_NAMES):
-    datasetFile = h5py.File(path + "KMultiOmic.hdf5", "r")
-    labelDictionary = {0: "No", 1: "Yes"}
-    return datasetFile, labelDictionary
-
-
-def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
-    datasetFile = h5py.File(path + "ModifiedMultiOmic.hdf5", "w")
-
-    logging.debug("Start:\t Getting Methylation Data")
-    methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',')
-    methylDset = datasetFile.create_dataset("View0", methylData.shape)
-    methylDset[...] = methylData
-    methylDset.attrs["name"] = "Methyl_"
-    methylDset.attrs["sparse"] = False
-    methylDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting Methylation Data")
-
-    logging.debug("Start:\t Getting Sorted Methyl Data")
-    Methyl = datasetFile["View0"][...]
-    sortedMethylGeneIndices = np.zeros(datasetFile.get("View0").shape, dtype=int)
-    MethylRanking = np.zeros(datasetFile.get("View0").shape, dtype=int)
-    for exampleIndex, exampleArray in enumerate(Methyl):
-        sortedMethylDictionary = dict((index, value) for index, value in enumerate(exampleArray))
-        sortedMethylIndicesDict = sorted(sortedMethylDictionary.items(), key=operator.itemgetter(1))
-        sortedMethylIndicesArray = np.array([index for (index, value) in sortedMethylIndicesDict], dtype=int)
-        sortedMethylGeneIndices[exampleIndex] = sortedMethylIndicesArray
-        for geneIndex in range(Methyl.shape[1]):
-            MethylRanking[exampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex
-    mMethylDset = datasetFile.create_dataset("View10", sortedMethylGeneIndices.shape, data=sortedMethylGeneIndices)
-    mMethylDset.attrs["name"] = "SMethyl"
-    mMethylDset.attrs["sparse"] = False
-    mMethylDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting Sorted Methyl Data")
-
-    logging.debug("Start:\t Getting Binarized Methyl Data")
-    k = findClosestPowerOfTwo(58) - 1
-    try:
-        factorizedLeftBaseMatrix = np.genfromtxt(
-            path + "factorLeft--n-" + str(datasetFile.get("View0").shape[1]) + "--k-" + str(k) + ".csv", delimiter=',')
-    except:
-        factorizedLeftBaseMatrix = getBaseMatrices(methylData.shape[1], k, path)
-    bMethylDset = datasetFile.create_dataset("View11",
-                                             (sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * k),
-                                             dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
-        patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], k), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
-        bMethylDset[patientIndex] = patientMatrix.flatten()
-    bMethylDset.attrs["name"] = "BMethyl"
-    bMethylDset.attrs["sparse"] = False
-    bMethylDset.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binarized Methyl Data")
-
-    logging.debug("Start:\t Getting Binned Methyl Data")
-    lenBins = 2095
-    nbBins = 58
-    overlapping = 1676
-    try:
-        sortedBinsMatrix = np.genfromtxt(
-            path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
-            delimiter=",")
-    except:
-        sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, datasetFile.get("View0").shape[1], path)
-    binnedMethyl = datasetFile.create_dataset("View12", (
-        sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * nbBins), dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
-        patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], nbBins), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
-        binnedMethyl[patientIndex] = patientMatrix.flatten()
-    binnedMethyl.attrs["name"] = "bMethyl"
-    binnedMethyl.attrs["sparse"] = False
-    binnedMethyl.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binned Methyl Data")
-
-    logging.debug("Start:\t Getting MiRNA Data")
-    mirnaData = np.genfromtxt(path + "matching_mirna.csv", delimiter=',')
-    mirnaDset = datasetFile.create_dataset("View1", mirnaData.shape)
-    mirnaDset[...] = mirnaData
-    mirnaDset.attrs["name"] = "MiRNA__"
-    mirnaDset.attrs["sparse"] = False
-    mirnaDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting MiRNA Data")
-
-    logging.debug("Start:\t Getting Sorted MiRNA Data")
-    MiRNA = datasetFile["View1"][...]
-    sortedMiRNAGeneIndices = np.zeros(datasetFile.get("View1").shape, dtype=int)
-    MiRNARanking = np.zeros(datasetFile.get("View1").shape, dtype=int)
-    for exampleIndex, exampleArray in enumerate(MiRNA):
-        sortedMiRNADictionary = dict((index, value) for index, value in enumerate(exampleArray))
-        sortedMiRNAIndicesDict = sorted(sortedMiRNADictionary.items(), key=operator.itemgetter(1))
-        sortedMiRNAIndicesArray = np.array([index for (index, value) in sortedMiRNAIndicesDict], dtype=int)
-        sortedMiRNAGeneIndices[exampleIndex] = sortedMiRNAIndicesArray
-        for geneIndex in range(MiRNA.shape[1]):
-            MiRNARanking[exampleIndex, sortedMiRNAIndicesArray[geneIndex]] = geneIndex
-    mmirnaDset = datasetFile.create_dataset("View7", sortedMiRNAGeneIndices.shape, data=sortedMiRNAGeneIndices)
-    mmirnaDset.attrs["name"] = "SMiRNA_"
-    mmirnaDset.attrs["sparse"] = False
-    mmirnaDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting Sorted MiRNA Data")
-
-    logging.debug("Start:\t Getting Binarized MiRNA Data")
-    k = findClosestPowerOfTwo(517) - 1
-    try:
-        factorizedLeftBaseMatrix = np.genfromtxt(
-            path + "factorLeft--n-" + str(datasetFile.get("View1").shape[1]) + "--k-" + str(k) + ".csv", delimiter=',')
-    except:
-        factorizedLeftBaseMatrix = getBaseMatrices(mirnaData.shape[1], k, path)
-    bmirnaDset = datasetFile.create_dataset("View8",
-                                            (sortedMiRNAGeneIndices.shape[0], sortedMiRNAGeneIndices.shape[1] * k),
-                                            dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedMiRNAGeneIndices):
-        patientMatrix = np.zeros((sortedMiRNAGeneIndices.shape[1], k), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
-        bmirnaDset[patientIndex] = patientMatrix.flatten()
-    bmirnaDset.attrs["name"] = "BMiRNA_"
-    bmirnaDset.attrs["sparse"] = False
-    bmirnaDset.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binarized MiRNA Data")
-
-    logging.debug("Start:\t Getting Binned MiRNA Data")
-    lenBins = 14
-    nbBins = 517
-    overlapping = 12
-    try:
-        sortedBinsMatrix = np.genfromtxt(
-            path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
-            delimiter=",")
-    except:
-        sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, datasetFile.get("View1").shape[1], path)
-    binnedMiRNA = datasetFile.create_dataset("View9", (
-        sortedMiRNAGeneIndices.shape[0], sortedMiRNAGeneIndices.shape[1] * nbBins), dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedMiRNAGeneIndices):
-        patientMatrix = np.zeros((sortedMiRNAGeneIndices.shape[1], nbBins), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
-        binnedMiRNA[patientIndex] = patientMatrix.flatten()
-    binnedMiRNA.attrs["name"] = "bMiRNA_"
-    binnedMiRNA.attrs["sparse"] = False
-    binnedMiRNA.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binned MiRNA Data")
-
-    logging.debug("Start:\t Getting RNASeq Data")
-    rnaseqData = np.genfromtxt(path + "matching_rnaseq.csv", delimiter=',')
-    uselessRows = []
-    for rowIndex, row in enumerate(np.transpose(rnaseqData)):
-        if not row.any():
-            uselessRows.append(rowIndex)
-    usefulRows = [usefulRowIndex for usefulRowIndex in range(rnaseqData.shape[1]) if usefulRowIndex not in uselessRows]
-    rnaseqDset = datasetFile.create_dataset("View2", (rnaseqData.shape[0], len(usefulRows)))
-    rnaseqDset[...] = rnaseqData[:, usefulRows]
-    rnaseqDset.attrs["name"] = "RNASeq_"
-    rnaseqDset.attrs["sparse"] = False
-    rnaseqDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting RNASeq Data")
-
-    logging.debug("Start:\t Getting Sorted RNASeq Data")
-    RNASeq = datasetFile["View2"][...]
-    sortedRNASeqGeneIndices = np.zeros(datasetFile.get("View2").shape, dtype=int)
-    RNASeqRanking = np.zeros(datasetFile.get("View2").shape, dtype=int)
-    for exampleIndex, exampleArray in enumerate(RNASeq):
-        sortedRNASeqDictionary = dict((index, value) for index, value in enumerate(exampleArray))
-        sortedRNASeqIndicesDict = sorted(sortedRNASeqDictionary.items(), key=operator.itemgetter(1))
-        sortedRNASeqIndicesArray = np.array([index for (index, value) in sortedRNASeqIndicesDict], dtype=int)
-        sortedRNASeqGeneIndices[exampleIndex] = sortedRNASeqIndicesArray
-        for geneIndex in range(RNASeq.shape[1]):
-            RNASeqRanking[exampleIndex, sortedRNASeqIndicesArray[geneIndex]] = geneIndex
-    mrnaseqDset = datasetFile.create_dataset("View4", sortedRNASeqGeneIndices.shape, data=sortedRNASeqGeneIndices)
-    mrnaseqDset.attrs["name"] = "SRNASeq"
-    mrnaseqDset.attrs["sparse"] = False
-    mrnaseqDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting Sorted RNASeq Data")
-
-    logging.debug("Start:\t Getting Binarized RNASeq Data")
-    k = findClosestPowerOfTwo(100) - 1
-    try:
-        factorizedLeftBaseMatrix = np.genfromtxt(
-            path + "factorLeft--n-" + str(datasetFile.get("View2").shape[1]) + "--k-" + str(100) + ".csv",
-            delimiter=',')
-    except:
-        factorizedLeftBaseMatrix = getBaseMatrices(rnaseqData.shape[1], k, path)
-    brnaseqDset = datasetFile.create_dataset("View5",
-                                             (sortedRNASeqGeneIndices.shape[0], sortedRNASeqGeneIndices.shape[1] * k),
-                                             dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedRNASeqGeneIndices):
-        patientMatrix = np.zeros((sortedRNASeqGeneIndices.shape[1], k), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
-        brnaseqDset[patientIndex] = patientMatrix.flatten()
-    brnaseqDset.attrs["name"] = "BRNASeq"
-    brnaseqDset.attrs["sparse"] = False
-    brnaseqDset.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binarized RNASeq Data")
-
-    logging.debug("Start:\t Getting Binned RNASeq Data")
-    lenBins = 986
-    nbBins = 142
-    overlapping = 493
-    try:
-        sortedBinsMatrix = np.genfromtxt(
-            path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
-            delimiter=",")
-    except:
-        sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, datasetFile.get("View2").shape[1], path)
-    binnedRNASeq = datasetFile.create_dataset("View6", (
-        sortedRNASeqGeneIndices.shape[0], sortedRNASeqGeneIndices.shape[1] * nbBins), dtype=np.uint8)
-    for patientIndex, patientSortedArray in enumerate(sortedRNASeqGeneIndices):
-        patientMatrix = np.zeros((sortedRNASeqGeneIndices.shape[1], nbBins), dtype=np.uint8)
-        for lineIndex, geneIndex in enumerate(patientSortedArray):
-            patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
-        binnedRNASeq[patientIndex] = patientMatrix.flatten()
-    binnedRNASeq.attrs["name"] = "bRNASeq"
-    binnedRNASeq.attrs["sparse"] = False
-    binnedRNASeq.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binned RNASeq Data")
-
-    logging.debug("Start:\t Getting Clinical Data")
-    clinical = np.genfromtxt(path + "clinicalMatrix.csv", delimiter=',')
-    clinicalDset = datasetFile.create_dataset("View3", clinical.shape)
-    clinicalDset[...] = clinical
-    clinicalDset.attrs["name"] = "Clinic_"
-    clinicalDset.attrs["sparse"] = False
-    clinicalDset.attrs["binary"] = False
-    logging.debug("Done:\t Getting Clinical Data")
-
-    logging.debug("Start:\t Getting Binarized Clinical Data")
-    binarized_clinical = np.zeros((347, 1951), dtype=np.uint8)
-    nb_already_done = 0
-    for feqtureIndex, feature in enumerate(np.transpose(clinical)):
-        featureSet = set(feature)
-        featureDict = dict((val, valIndex) for valIndex, val in enumerate(list(featureSet)))
-        for valueIndex, value in enumerate(feature):
-            binarized_clinical[valueIndex, featureDict[value] + nb_already_done] = 1
-        nb_already_done += len(featureSet)
-    bClinicalDset = datasetFile.create_dataset("View13", binarized_clinical.shape, dtype=np.uint8,
-                                               data=binarized_clinical)
-    bClinicalDset.attrs["name"] = "bClinic"
-    bClinicalDset.attrs["sparse"] = False
-    bClinicalDset.attrs["binary"] = True
-    logging.debug("Done:\t Getting Binarized Clinical Data")
-
-    # logging.debug("Start:\t Getting Adjacence RNASeq Data")
-    # sparseAdjRNASeq = getAdjacenceMatrix(RNASeqRanking, sortedRNASeqGeneIndices, k=findClosestPowerOfTwo(10)-1)
-    # sparseAdjRNASeqGrp = datasetFile.create_group("View6")
-    # dataDset = sparseAdjRNASeqGrp.create_dataset("data", sparseAdjRNASeq.data.shape, data=sparseAdjRNASeq.data)
-    # indicesDset = sparseAdjRNASeqGrp.create_dataset("indices",
-    # sparseAdjRNASeq.indices.shape, data=sparseAdjRNASeq.indices)
-    # indptrDset = sparseAdjRNASeqGrp.create_dataset("indptr",
-    # sparseAdjRNASeq.indptr.shape, data=sparseAdjRNASeq.indptr)
-    # sparseAdjRNASeqGrp.attrs["name"]="ARNASeq"
-    # sparseAdjRNASeqGrp.attrs["sparse"]=True
-    # sparseAdjRNASeqGrp.attrs["shape"]=sparseAdjRNASeq.shape
-    # logging.debug("Done:\t Getting Adjacence RNASeq Data")
-
-    labelFile = open(path + 'brca_labels_triple-negatif.csv')
-    labels = np.array([int(line.strip().split(',')[1]) for line in labelFile])
-    labelsDset = datasetFile.create_dataset("Labels", labels.shape)
-    labelsDset[...] = labels
-    labelsDset.attrs["name"] = "Labels"
-
-    metaDataGrp = datasetFile.create_group("Metadata")
-    metaDataGrp.attrs["nbView"] = 14
-    metaDataGrp.attrs["nbClass"] = 2
-    metaDataGrp.attrs["datasetLength"] = len(labels)
-    labelDictionary = {0: "No", 1: "Yes"}
-
-    datasetFile.close()
-    datasetFile = h5py.File(path + "ModifiedMultiOmic.hdf5", "r")
-
-    return datasetFile, labelDictionary
-
-
-def getModifiedMultiOmicDBhdf5(features, path, name, NB_CLASS, LABELS_NAMES):
-    datasetFile = h5py.File(path + "ModifiedMultiOmic.hdf5", "r")
-    labelDictionary = {0: "No", 1: "Yes"}
-    return datasetFile, labelDictionary
-
-
-def getMultiOmicDBhdf5(features, path, name, NB_CLASS, LABELS_NAMES):
-    datasetFile = h5py.File(path + "MultiOmic.hdf5", "r")
-    labelDictionary = {0: "No", 1: "Yes"}
-    return datasetFile, labelDictionary
-
-
-def copyHDF5(pathF, name, nbCores):
-    datasetFile = h5py.File(pathF + name + ".hdf5", "r")
-    for coreIndex in range(nbCores):
-        newDataSet = h5py.File(pathF + name + str(coreIndex) + ".hdf5", "w")
-        for dataset in datasetFile:
-            datasetFile.copy("/" + dataset, newDataSet["/"])
-        newDataSet.close()
-
-
-def datasetsAlreadyExist(pathF, name, nbCores):
-    allDatasetExist = True
-    for coreIndex in range(nbCores):
-        import os.path
-        allDatasetExist *= os.path.isfile(pathF + name + str(coreIndex) + ".hdf5")
-    return allDatasetExist
-
-
-def deleteHDF5(pathF, name, nbCores):
-    for coreIndex in range(nbCores):
-        os.remove(pathF + name + str(coreIndex) + ".hdf5")
-
-# def getOneViewFromDB(viewName, pathToDB, DBName):
-#     view = np.genfromtxt(pathToDB + DBName +"-" + viewName, delimiter=';')
-#     return view
-
-
-# def getClassLabels(pathToDB, DBName):
-#     labels = np.genfromtxt(pathToDB + DBName + "-" + "ClassLabels.csv", delimiter=';')
-#     return labels
-
-
-# def getDataset(pathToDB, viewNames, DBName):
-#     dataset = []
-#     for viewName in viewNames:
-#         dataset.append(getOneViewFromDB(viewName, pathToDB, DBName))
-#     return np.array(dataset)
-
-
-# def getAwaLabels(nbLabels, pathToAwa):
-#     labelsFile = open(pathToAwa + 'Animals_with_Attributes/classes.txt', 'U')
-#     linesFile = [''.join(line.strip().split()).translate(None, digits) for line in labelsFile.readlines()]
-#     return linesFile
-
-
-# def getAwaDBcsv(views, pathToAwa, nameDB, nbLabels, LABELS_NAMES):
-#     awaLabels = getAwaLabels(nbLabels, pathToAwa)
-#     nbView = len(views)
-#     nbMaxLabels = len(awaLabels)
-#     if nbLabels == -1:
-#         nbLabels = nbMaxLabels
-#     nbNamesGiven = len(LABELS_NAMES)
-#     if nbNamesGiven > nbLabels:
-#         labelDictionary = {i:LABELS_NAMES[i] for i in np.arange(nbLabels)}
-#     elif nbNamesGiven < nbLabels and nbLabels <= nbMaxLabels:
-#         if LABELS_NAMES != ['']:
-#             labelDictionary = {i:LABELS_NAMES[i] for i in np.arange(nbNamesGiven)}
-#         else:
-#             labelDictionary = {}
-#             nbNamesGiven = 0
-#         nbLabelsToAdd = nbLabels-nbNamesGiven
-#         while nbLabelsToAdd > 0:
-#             currentLabel = random.choice(awaLabels)
-#             if currentLabel not in labelDictionary.values():
-#                 labelDictionary[nbLabels-nbLabelsToAdd]=currentLabel
-#                 nbLabelsToAdd -= 1
-#             else:
-#                 pass
+# def getCaltechDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES, randomState):
+#     datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w")
+#     labelsNamesFile = open(pathF + nameDB + '-ClassLabels-Description.csv')
+#     if len(LABELS_NAMES) != NB_CLASS:
+#         nbLabelsAvailable = 0
+#         for l in labelsNamesFile:
+#             nbLabelsAvailable += 1
+#         LABELS_NAMES = [line.strip().split(";")[1] for lineIdx, line in enumerate(labelsNamesFile) if
+#                         lineIdx in randomState.randint(nbLabelsAvailable, size=NB_CLASS)]
+#     fullLabels = np.genfromtxt(pathF + nameDB + '-ClassLabels.csv', delimiter=';').astype(int)
+#     labelsDictionary = dict((classIndice, labelName) for (classIndice, labelName) in
+#                             [(int(line.strip().split(";")[0]), line.strip().split(";")[1]) for lineIndex, line in
+#                              labelsNamesFile if line.strip().split(";")[0] in LABELS_NAMES])
+#     if len(set(fullLabels)) > NB_CLASS:
+#         usedIndices = getPositions(labelsDictionary.keys(), fullLabels)
 #     else:
-#         labelDictionary = {i: LABELS_NAMES[i] for i in np.arange(nbNamesGiven)}
-#     viewDictionary = {i: views[i] for i in np.arange(nbView)}
-#     rawData = []
-#     labels = []
-#     nbExample = 0
-#     for view in np.arange(nbView):
-#         viewData = []
-#         for labelIndex in np.arange(nbLabels):
-#             pathToExamples = pathToAwa + 'Animals_with_Attributes/Features/' + viewDictionary[view] + '/' + \
-#                              labelDictionary[labelIndex] + '/'
-#             examples = os.listdir(pathToExamples)
-#             if view == 0:
-#                 nbExample += len(examples)
-#             for example in examples:
-#                 if viewDictionary[view]=='decaf':
-#                     exampleFile = open(pathToExamples + example)
-#                     viewData.append([float(line.strip()) for line in exampleFile])
-#                 else:
-#                     exampleFile = open(pathToExamples + example)
-#                     viewData.append([[float(coordinate) for coordinate in raw.split()] for raw in exampleFile][0])
-#                 if view == 0:
-#                     labels.append(labelIndex)
-#
-#         rawData.append(np.array(viewData))
-#     data = rawData
-#     DATASET_LENGTH = len(labels)
-#     return data, labels, labelDictionary, DATASET_LENGTH
-#
-#
-# def getDbfromCSV(path):
-#     files = os.listdir(path)
-#     DATA = np.zeros((3,40,2))
-#     for file in files:
-#         if file[-9:]=='moins.csv' and file[:7]=='sample1':
-#             X = open(path+file)
-#             for x, i in zip(X, range(20)):
-#                 DATA[0, i] = np.array([float(coord) for coord in x.strip().split('\t')])
-#         if file[-9:]=='moins.csv' and file[:7]=='sample2':
-#             X = open(path+file)
-#             for x, i in zip(X, range(20)):
-#                 DATA[1, i] = np.array([float(coord) for coord in x.strip().split('\t')])
-#         if file[-9:]=='moins.csv' and file[:7]=='sample3':
-#             X = open(path+file)
-#             for x, i in zip(X, range(20)):
-#                 DATA[2, i] = np.array([float(coord) for coord in x.strip().split('\t')])
-#
-#     for file in files:
-#         if file[-8:]=='plus.csv' and file[:7]=='sample1':
-#             X = open(path+file)
-#             for x, i in zip(X, range(20)):
-#                 DATA[0, i+20] = np.array([float(coord) for coord in x.strip().split('\t')])
-#         if file[-8:]=='plus.csv' and file[:7]=='sample2':
-#             X = open(path+file)
-#             for x, i in zip(X, range(20)):
-#                 DATA[1, i+20] = np.array([float(coord) for coord in x.strip().split('\t')])
-#         if file[-8:]=='plus.csv' and file[:7]=='sample3':
-#             X = open(path+file)
-#             for x, i in zip(X, range(20)):
-#                 DATA[2, i+20] = np.array([float(coord) for coord in x.strip().split('\t')])
-#     LABELS = np.zeros(40)
-#     LABELS[:20]=LABELS[:20]+1
-#     return DATA, LABELS
-
-# def makeArrayFromTriangular(pseudoRNASeqMatrix):
-#     matrixShape = len(pseudoRNASeqMatrix[0,:])
-#     exampleArray = np.array(((matrixShape-1)*matrixShape)/2)
-#     arrayIndex = 0
-#     for i in range(matrixShape-1):
-#         for j in range(i+1, matrixShape):
-#             exampleArray[arrayIndex]=pseudoRNASeqMatrix[i,j]
-#             arrayIndex += 1
-#     return exampleArray
-
+#         usedIndices = range(len(fullLabels))
+#     for viewIndex, view in enumerate(views):
+#         viewFile = pathF + nameDB + "-" + view + '.csv'
+#         viewMatrix = np.array(np.genfromtxt(viewFile, delimiter=';'))[usedIndices, :]
+#         viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewMatrix.shape, data=viewMatrix)
+#         viewDset.attrs["name"] = view
+#
+#     labelsDset = datasetFile.create_dataset("Labels", fullLabels[usedIndices].shape, data=fullLabels[usedIndices])
+#
+#     metaDataGrp = datasetFile.create_group("Metadata")
+#     metaDataGrp.attrs["nbView"] = len(views)
+#     metaDataGrp.attrs["nbClass"] = NB_CLASS
+#     metaDataGrp.attrs["datasetLength"] = len(fullLabels[usedIndices])
+#     datasetFile.close()
+#     datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
+#     return datasetFile, labelsDictionary
 
-# def getPseudoRNASeq(dataset):
-#     nbGenes = len(dataset["/View2/matrix"][0, :])
-#     pseudoRNASeq = np.zeros((dataset["/datasetlength"][...], ((nbGenes - 1) * nbGenes) / 2), dtype=bool_)
-#     for exampleIndex in xrange(dataset["/datasetlength"][...]):
-#         arrayIndex = 0
-#         for i in xrange(nbGenes):
-#             for j in xrange(nbGenes):
-#                 if i > j:
-#                     pseudoRNASeq[exampleIndex, arrayIndex] =
-# dataset["/View2/matrix"][exampleIndex, j] < dataset["/View2/matrix"][exampleIndex, i]
-#                     arrayIndex += 1
-#     dataset["/View4/matrix"] = pseudoRNASeq
-#     dataset["/View4/name"] = "pseudoRNASeq"
-#     return dataset
+#--------------------------------------------#
+# All the functions below are not useful     #
+# anymore but the binarization methods in    #
+# it must be kept                            #
+#--------------------------------------------#
 
 
-# def allSame(array):
-#     value = array[0]
-#     areAllSame = True
-#     for i in array:
-#         if i != value:
-#             areAllSame = False
-#     return areAllSame
+# def getMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES, randomState):
+#     datasetFile = h5py.File(path + "MultiOmic.hdf5", "w")
+#
+#     logging.debug("Start:\t Getting Methylation Data")
+#     methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',')
+#     methylDset = datasetFile.create_dataset("View0", methylData.shape)
+#     methylDset[...] = methylData
+#     methylDset.attrs["name"] = "Methyl"
+#     methylDset.attrs["sparse"] = False
+#     methylDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting Methylation Data")
+#
+#     logging.debug("Start:\t Getting MiRNA Data")
+#     mirnaData = np.genfromtxt(path + "matching_mirna.csv", delimiter=',')
+#     mirnaDset = datasetFile.create_dataset("View1", mirnaData.shape)
+#     mirnaDset[...] = mirnaData
+#     mirnaDset.attrs["name"] = "MiRNA_"
+#     mirnaDset.attrs["sparse"] = False
+#     mirnaDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting MiRNA Data")
+#
+#     logging.debug("Start:\t Getting RNASeq Data")
+#     rnaseqData = np.genfromtxt(path + "matching_rnaseq.csv", delimiter=',')
+#     uselessRows = []
+#     for rowIndex, row in enumerate(np.transpose(rnaseqData)):
+#         if not row.any():
+#             uselessRows.append(rowIndex)
+#     usefulRows = [usefulRowIndex for usefulRowIndex in range(rnaseqData.shape[1]) if usefulRowIndex not in uselessRows]
+#     rnaseqDset = datasetFile.create_dataset("View2", (rnaseqData.shape[0], len(usefulRows)))
+#     rnaseqDset[...] = rnaseqData[:, usefulRows]
+#     rnaseqDset.attrs["name"] = "RNASeq_"
+#     rnaseqDset.attrs["sparse"] = False
+#     rnaseqDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting RNASeq Data")
+#
+#     logging.debug("Start:\t Getting Clinical Data")
+#     clinical = np.genfromtxt(path + "clinicalMatrix.csv", delimiter=',')
+#     clinicalDset = datasetFile.create_dataset("View3", clinical.shape)
+#     clinicalDset[...] = clinical
+#     clinicalDset.attrs["name"] = "Clinic"
+#     clinicalDset.attrs["sparse"] = False
+#     clinicalDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting Clinical Data")
+#
+#     labelFile = open(path + 'brca_labels_triple-negatif.csv')
+#     labels = np.array([int(line.strip().split(',')[1]) for line in labelFile])
+#     labelsDset = datasetFile.create_dataset("Labels", labels.shape)
+#     labelsDset[...] = labels
+#     labelsDset.attrs["name"] = "Labels"
+#
+#     metaDataGrp = datasetFile.create_group("Metadata")
+#     metaDataGrp.attrs["nbView"] = 4
+#     metaDataGrp.attrs["nbClass"] = 2
+#     metaDataGrp.attrs["datasetLength"] = len(labels)
+#     labelDictionary = {0: "No", 1: "Yes"}
+#     datasetFile.close()
+#     datasetFile = h5py.File(path + "MultiOmic.hdf5", "r")
+#     # datasetFile = getPseudoRNASeq(datasetFile)
+#     return datasetFile, labelDictionary
+#
+#
+# def getVector(nbGenes):
+#     argmax = [0, 0]
+#     maxi = 0
+#     for i in range(nbGenes):
+#         for j in range(nbGenes):
+#             if j == i + 1:
+#                 value = (i + 1) * (nbGenes - j)
+#                 if value > maxi:
+#                     maxi = value
+#                     argmax = [i, j]
+#     i, j = argmax
+#     vectorLeft = np.zeros(nbGenes, dtype=bool)
+#     vectorLeft[:i + 1] = np.ones(i + 1, dtype=bool)
+#     vectorSup = np.zeros(nbGenes, dtype=bool)
+#     vectorSup[j:] = np.ones(nbGenes - j, dtype=bool)
+#     matrixSup = j
+#     matrixInf = nbGenes - j
+#     return vectorLeft, matrixSup, matrixInf
+#
+#
+# def findClosestPowerOfTwo(factorizationParam):
+#     power = 1
+#     while factorizationParam - power > 0:
+#         power *= 2
+#     if abs(factorizationParam - power) < abs(factorizationParam - power / 2):
+#         return power
+#     else:
+#         return power / 2
+#
+#
+# def easyFactorize(nbGenes, factorizationParam, t=0):
+#     if math.log(factorizationParam + 1, 2) % 1 == 0.0:
+#         pass
+#     else:
+#         factorizationParam = findClosestPowerOfTwo(factorizationParam) - 1
+#
+#     if nbGenes == 2:
+#         return 1, np.array([True, False])
+#
+#     if nbGenes == 3:
+#         return 1, np.array([True, True, False])
+#
+#     if factorizationParam == 1:
+#         t = 1
+#         return t, getVector(nbGenes)[0]
+#
+#     vectorLeft, matrixSup, matrixInf = getVector(nbGenes)
+#
+#     t_, vectorLeftSup = easyFactorize(matrixSup, (factorizationParam - 1) / 2, t=t)
+#     t__, vectorLeftInf = easyFactorize(matrixInf, (factorizationParam - 1) / 2, t=t)
+#
+#     factorLeft = np.zeros((nbGenes, t_ + t__ + 1), dtype=bool)
+#
+#     factorLeft[:matrixSup, :t_] = vectorLeftSup.reshape(factorLeft[:matrixSup, :t_].shape)
+#     if nbGenes % 2 == 1:
+#         factorLeft[matrixInf - 1:, t_:t__ + t_] = vectorLeftInf.reshape(factorLeft[matrixInf - 1:, t_:t__ + t_].shape)
+#     else:
+#         factorLeft[matrixInf:, t_:t__ + t_] = vectorLeftInf.reshape(factorLeft[matrixInf:, t_:t__ + t_].shape)
+#     factorLeft[:, t__ + t_] = vectorLeft
+#
+#     # factorSup = np.zeros((t_+t__+1, nbGenes), dtype=bool)
+#     #
+#     # factorSup[:t_, :matrixSup] = vectorSupLeft.reshape(factorSup[:t_, :matrixSup].shape)
+#     # if nbGenes%2==1:
+#     #     factorSup[t_:t__+t_, matrixInf-1:] = vectorSupRight.reshape(factorSup[t_:t__+t_, matrixInf-1:].shape)
+#     # else:
+#     #     factorSup[t_:t__+t_, matrixInf:] = vectorSupRight.reshape(factorSup[t_:t__+t_, matrixInf:].shape)
+#     # factorSup[t__+t_, :] = vectorSup
+#     return t__ + t_ + 1, factorLeft  # , factorSup
+#
+#
+# def getBaseMatrices(nbGenes, factorizationParam, path):
+#     t, factorLeft = easyFactorize(nbGenes, factorizationParam)
+#     np.savetxt(path + "factorLeft--n-" + str(nbGenes) + "--k-" + str(factorizationParam) + ".csv", factorLeft,
+#                delimiter=",")
+#     return factorLeft
+#
+#
+# def findParams(arrayLen, nbPatients, randomState, maxNbBins=2000, minNbBins=10, maxLenBin=70000, minOverlapping=1,
+#                minNbBinsOverlapped=0, maxNbSolutions=30):
+#     results = []
+#     if arrayLen * arrayLen * 10 / 100 > minNbBinsOverlapped * nbPatients:
+#         for lenBin in range(arrayLen - 1):
+#             lenBin += 1
+#             if lenBin < maxLenBin and minNbBins * lenBin < arrayLen:
+#                 for overlapping in sorted(range(lenBin - 1), reverse=True):
+#                     overlapping += 1
+#                     if overlapping > minOverlapping and lenBin % (lenBin - overlapping) == 0:
+#                         for nbBins in sorted(range(arrayLen - 1), reverse=True):
+#                             nbBins += 1
+#                             if nbBins < maxNbBins:
+#                                 if arrayLen == (nbBins - 1) * (lenBin - overlapping) + lenBin:
+#                                     results.append({"nbBins": nbBins, "overlapping": overlapping, "lenBin": lenBin})
+#                                     if len(results) == maxNbSolutions:
+#                                         params = results[randomState.randrange(len(results))]
+#                                         return params
+#
+#
+# def findBins(nbBins=142, overlapping=493, lenBin=986):
+#     bins = []
+#     for binIndex in range(nbBins):
+#         bins.append([i + binIndex * (lenBin - overlapping) for i in range(lenBin)])
+#     return bins
+#
+#
+# def getBins(array, bins, lenBin, overlapping):
+#     binnedcoord = []
+#     for coordIndex, coord in enumerate(array):
+#         nbBinsFull = 0
+#         for binIndex, bin_ in enumerate(bins):
+#             if coordIndex in bin_:
+#                 binnedcoord.append(binIndex + (coord * len(bins)))
+#
+#     return np.array(binnedcoord)
+#
+#
+# def makeSortedBinsMatrix(nbBins, lenBins, overlapping, arrayLen, path):
+#     sortedBinsMatrix = np.zeros((arrayLen, nbBins), dtype=np.uint8)
+#     step = lenBins - overlapping
+#     for binIndex in range(nbBins):
+#         sortedBinsMatrix[step * binIndex:lenBins + (step * binIndex), binIndex] = np.ones(lenBins, dtype=np.uint8)
+#     np.savetxt(path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
+#                sortedBinsMatrix, delimiter=",")
+#     return sortedBinsMatrix
+#
+#
+# def makeSparseTotalMatrix(sortedRNASeq, randomState):
+#     nbPatients, nbGenes = sortedRNASeq.shape
+#     params = findParams(nbGenes, nbPatients, randomState)
+#     nbBins = params["nbBins"]
+#     overlapping = params["overlapping"]
+#     lenBin = params["lenBin"]
+#     bins = findBins(nbBins, overlapping, lenBin)
+#     sparseFull = sparse.csc_matrix((nbPatients, nbGenes * nbBins))
+#     for patientIndex, patient in enumerate(sortedRNASeq):
+#         columnIndices = getBins(patient, bins, lenBin, overlapping)
+#         rowIndices = np.zeros(len(columnIndices), dtype=int) + patientIndex
+#         data = np.ones(len(columnIndices), dtype=bool)
+#         sparseFull = sparseFull + sparse.csc_matrix((data, (rowIndices, columnIndices)),
+#                                                     shape=(nbPatients, nbGenes * nbBins))
+#     return sparseFull
+#
+#
+# def getAdjacenceMatrix(RNASeqRanking, sotredRNASeq, k=2):
+#     k = int(k) / 2 * 2
+#     indices = np.zeros((RNASeqRanking.shape[0] * k * RNASeqRanking.shape[1]), dtype=int)
+#     data = np.ones((RNASeqRanking.shape[0] * k * RNASeqRanking.shape[1]), dtype=bool)
+#     indptr = np.zeros(RNASeqRanking.shape[0] + 1, dtype=int)
+#     nbGenes = RNASeqRanking.shape[1]
+#     pointer = 0
+#     for patientIndex in range(RNASeqRanking.shape[0]):
+#         for i in range(nbGenes):
+#             for j in range(k / 2):
+#                 try:
+#                     indices[pointer] = RNASeqRanking[
+#                                            patientIndex, (sotredRNASeq[patientIndex, i] - (j + 1))] + i * nbGenes
+#                     pointer += 1
+#                 except:
+#                     pass
+#                 try:
+#                     indices[pointer] = RNASeqRanking[
+#                                            patientIndex, (sotredRNASeq[patientIndex, i] + (j + 1))] + i * nbGenes
+#                     pointer += 1
+#                 except:
+#                     pass
+#                     # elif i<=k:
+#                     # 	indices.append(patient[1]+patient[i]*nbGenes)
+#                     # 	data.append(True)
+#                     # elif i==nbGenes-1:
+#                     # 	indices.append(patient[i-1]+patient[i]*nbGenes)
+#                     # 	data.append(True)
+#         indptr[patientIndex + 1] = pointer
+#
+#     mat = sparse.csr_matrix((data, indices, indptr),
+#                             shape=(RNASeqRanking.shape[0], RNASeqRanking.shape[1] * RNASeqRanking.shape[1]), dtype=bool)
+#     return mat
+#
+#
+# def getKMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
+#     datasetFile = h5py.File(path + "KMultiOmic.hdf5", "w")
+#
+#     # logging.debug("Start:\t Getting Methylation Data")
+#     methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',')
+#     logging.debug("Done:\t Getting Methylation Data")
+#
+#     logging.debug("Start:\t Getting Sorted Methyl Data")
+#     Methyl = methylData
+#     sortedMethylGeneIndices = np.zeros(methylData.shape, dtype=int)
+#     MethylRanking = np.zeros(methylData.shape, dtype=int)
+#     for exampleIndex, exampleArray in enumerate(Methyl):
+#         sortedMethylDictionary = dict((index, value) for index, value in enumerate(exampleArray))
+#         sortedMethylIndicesDict = sorted(sortedMethylDictionary.items(), key=operator.itemgetter(1))
+#         sortedMethylIndicesArray = np.array([index for (index, value) in sortedMethylIndicesDict], dtype=int)
+#         sortedMethylGeneIndices[exampleIndex] = sortedMethylIndicesArray
+#         for geneIndex in range(Methyl.shape[1]):
+#             MethylRanking[exampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex
+#     logging.debug("Done:\t Getting Sorted Methyl Data")
+#
+#     logging.debug("Start:\t Getting Binarized Methyl Data")
+#     k = findClosestPowerOfTwo(9) - 1
+#     try:
+#         factorizedLeftBaseMatrix = np.genfromtxt(
+#             path + "factorLeft--n-" + str(methylData.shape[1]) + "--k-" + str(k) + ".csv", delimiter=',')
+#     except:
+#         factorizedLeftBaseMatrix = getBaseMatrices(methylData.shape[1], k, path)
+#     bMethylDset = datasetFile.create_dataset("View0",
+#                                              (sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * k),
+#                                              dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
+#         patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], k), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
+#         bMethylDset[patientIndex] = patientMatrix.flatten()
+#     bMethylDset.attrs["name"] = "BMethyl" + str(k)
+#     bMethylDset.attrs["sparse"] = False
+#     bMethylDset.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binarized Methyl Data")
+#
+#     logging.debug("Start:\t Getting Binned Methyl Data")
+#     lenBins = 3298
+#     nbBins = 9
+#     overlapping = 463
+#     try:
+#         sortedBinsMatrix = np.genfromtxt(
+#             path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
+#             delimiter=",")
+#     except:
+#         sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, methylData.shape[1], path)
+#     binnedMethyl = datasetFile.create_dataset("View1", (
+#         sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * nbBins), dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
+#         patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], nbBins), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
+#         binnedMethyl[patientIndex] = patientMatrix.flatten()
+#     binnedMethyl.attrs["name"] = "bMethyl" + str(nbBins)
+#     binnedMethyl.attrs["sparse"] = False
+#     binnedMethyl.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binned Methyl Data")
+#
+#     logging.debug("Start:\t Getting Binarized Methyl Data")
+#     k = findClosestPowerOfTwo(17) - 1
+#     try:
+#         factorizedLeftBaseMatrix = np.genfromtxt(
+#             path + "factorLeft--n-" + str(methylData.shape[1]) + "--k-" + str(k) + ".csv", delimiter=',')
+#     except:
+#         factorizedLeftBaseMatrix = getBaseMatrices(methylData.shape[1], k, path)
+#     bMethylDset = datasetFile.create_dataset("View2",
+#                                              (sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * k),
+#                                              dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
+#         patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], k), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
+#         bMethylDset[patientIndex] = patientMatrix.flatten()
+#     bMethylDset.attrs["name"] = "BMethyl" + str(k)
+#     bMethylDset.attrs["sparse"] = False
+#     bMethylDset.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binarized Methyl Data")
+#
+#     logging.debug("Start:\t Getting Binned Methyl Data")
+#     lenBins = 2038
+#     nbBins = 16
+#     overlapping = 442
+#     try:
+#         sortedBinsMatrix = np.genfromtxt(
+#             path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
+#             delimiter=",")
+#     except:
+#         sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, methylData.shape[1], path)
+#     binnedMethyl = datasetFile.create_dataset("View3", (
+#         sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * nbBins), dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
+#         patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], nbBins), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
+#         binnedMethyl[patientIndex] = patientMatrix.flatten()
+#     binnedMethyl.attrs["name"] = "bMethyl" + str(nbBins)
+#     binnedMethyl.attrs["sparse"] = False
+#     binnedMethyl.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binned Methyl Data")
+#
+#     labelFile = open(path + 'brca_labels_triple-negatif.csv')
+#     labels = np.array([int(line.strip().split(',')[1]) for line in labelFile])
+#     labelsDset = datasetFile.create_dataset("Labels", labels.shape)
+#     labelsDset[...] = labels
+#     labelsDset.attrs["name"] = "Labels"
+#
+#     metaDataGrp = datasetFile.create_group("Metadata")
+#     metaDataGrp.attrs["nbView"] = 4
+#     metaDataGrp.attrs["nbClass"] = 2
+#     metaDataGrp.attrs["datasetLength"] = len(labels)
+#     labelDictionary = {0: "No", 1: "Yes"}
+#
+#     datasetFile.close()
+#     datasetFile = h5py.File(path + "KMultiOmic.hdf5", "r")
+#
+#     return datasetFile, labelDictionary
+#
+#
+# def getKMultiOmicDBhdf5(features, path, name, NB_CLASS, LABELS_NAMES):
+#     datasetFile = h5py.File(path + "KMultiOmic.hdf5", "r")
+#     labelDictionary = {0: "No", 1: "Yes"}
+#     return datasetFile, labelDictionary
+#
+#
+# def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES):
+#     datasetFile = h5py.File(path + "ModifiedMultiOmic.hdf5", "w")
+#
+#     logging.debug("Start:\t Getting Methylation Data")
+#     methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',')
+#     methylDset = datasetFile.create_dataset("View0", methylData.shape)
+#     methylDset[...] = methylData
+#     methylDset.attrs["name"] = "Methyl_"
+#     methylDset.attrs["sparse"] = False
+#     methylDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting Methylation Data")
+#
+#     logging.debug("Start:\t Getting Sorted Methyl Data")
+#     Methyl = datasetFile["View0"][...]
+#     sortedMethylGeneIndices = np.zeros(datasetFile.get("View0").shape, dtype=int)
+#     MethylRanking = np.zeros(datasetFile.get("View0").shape, dtype=int)
+#     for exampleIndex, exampleArray in enumerate(Methyl):
+#         sortedMethylDictionary = dict((index, value) for index, value in enumerate(exampleArray))
+#         sortedMethylIndicesDict = sorted(sortedMethylDictionary.items(), key=operator.itemgetter(1))
+#         sortedMethylIndicesArray = np.array([index for (index, value) in sortedMethylIndicesDict], dtype=int)
+#         sortedMethylGeneIndices[exampleIndex] = sortedMethylIndicesArray
+#         for geneIndex in range(Methyl.shape[1]):
+#             MethylRanking[exampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex
+#     mMethylDset = datasetFile.create_dataset("View10", sortedMethylGeneIndices.shape, data=sortedMethylGeneIndices)
+#     mMethylDset.attrs["name"] = "SMethyl"
+#     mMethylDset.attrs["sparse"] = False
+#     mMethylDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting Sorted Methyl Data")
+#
+#     logging.debug("Start:\t Getting Binarized Methyl Data")
+#     k = findClosestPowerOfTwo(58) - 1
+#     try:
+#         factorizedLeftBaseMatrix = np.genfromtxt(
+#             path + "factorLeft--n-" + str(datasetFile.get("View0").shape[1]) + "--k-" + str(k) + ".csv", delimiter=',')
+#     except:
+#         factorizedLeftBaseMatrix = getBaseMatrices(methylData.shape[1], k, path)
+#     bMethylDset = datasetFile.create_dataset("View11",
+#                                              (sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * k),
+#                                              dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
+#         patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], k), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
+#         bMethylDset[patientIndex] = patientMatrix.flatten()
+#     bMethylDset.attrs["name"] = "BMethyl"
+#     bMethylDset.attrs["sparse"] = False
+#     bMethylDset.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binarized Methyl Data")
+#
+#     logging.debug("Start:\t Getting Binned Methyl Data")
+#     lenBins = 2095
+#     nbBins = 58
+#     overlapping = 1676
+#     try:
+#         sortedBinsMatrix = np.genfromtxt(
+#             path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
+#             delimiter=",")
+#     except:
+#         sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, datasetFile.get("View0").shape[1], path)
+#     binnedMethyl = datasetFile.create_dataset("View12", (
+#         sortedMethylGeneIndices.shape[0], sortedMethylGeneIndices.shape[1] * nbBins), dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedMethylGeneIndices):
+#         patientMatrix = np.zeros((sortedMethylGeneIndices.shape[1], nbBins), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
+#         binnedMethyl[patientIndex] = patientMatrix.flatten()
+#     binnedMethyl.attrs["name"] = "bMethyl"
+#     binnedMethyl.attrs["sparse"] = False
+#     binnedMethyl.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binned Methyl Data")
+#
+#     logging.debug("Start:\t Getting MiRNA Data")
+#     mirnaData = np.genfromtxt(path + "matching_mirna.csv", delimiter=',')
+#     mirnaDset = datasetFile.create_dataset("View1", mirnaData.shape)
+#     mirnaDset[...] = mirnaData
+#     mirnaDset.attrs["name"] = "MiRNA__"
+#     mirnaDset.attrs["sparse"] = False
+#     mirnaDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting MiRNA Data")
+#
+#     logging.debug("Start:\t Getting Sorted MiRNA Data")
+#     MiRNA = datasetFile["View1"][...]
+#     sortedMiRNAGeneIndices = np.zeros(datasetFile.get("View1").shape, dtype=int)
+#     MiRNARanking = np.zeros(datasetFile.get("View1").shape, dtype=int)
+#     for exampleIndex, exampleArray in enumerate(MiRNA):
+#         sortedMiRNADictionary = dict((index, value) for index, value in enumerate(exampleArray))
+#         sortedMiRNAIndicesDict = sorted(sortedMiRNADictionary.items(), key=operator.itemgetter(1))
+#         sortedMiRNAIndicesArray = np.array([index for (index, value) in sortedMiRNAIndicesDict], dtype=int)
+#         sortedMiRNAGeneIndices[exampleIndex] = sortedMiRNAIndicesArray
+#         for geneIndex in range(MiRNA.shape[1]):
+#             MiRNARanking[exampleIndex, sortedMiRNAIndicesArray[geneIndex]] = geneIndex
+#     mmirnaDset = datasetFile.create_dataset("View7", sortedMiRNAGeneIndices.shape, data=sortedMiRNAGeneIndices)
+#     mmirnaDset.attrs["name"] = "SMiRNA_"
+#     mmirnaDset.attrs["sparse"] = False
+#     mmirnaDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting Sorted MiRNA Data")
+#
+#     logging.debug("Start:\t Getting Binarized MiRNA Data")
+#     k = findClosestPowerOfTwo(517) - 1
+#     try:
+#         factorizedLeftBaseMatrix = np.genfromtxt(
+#             path + "factorLeft--n-" + str(datasetFile.get("View1").shape[1]) + "--k-" + str(k) + ".csv", delimiter=',')
+#     except:
+#         factorizedLeftBaseMatrix = getBaseMatrices(mirnaData.shape[1], k, path)
+#     bmirnaDset = datasetFile.create_dataset("View8",
+#                                             (sortedMiRNAGeneIndices.shape[0], sortedMiRNAGeneIndices.shape[1] * k),
+#                                             dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedMiRNAGeneIndices):
+#         patientMatrix = np.zeros((sortedMiRNAGeneIndices.shape[1], k), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
+#         bmirnaDset[patientIndex] = patientMatrix.flatten()
+#     bmirnaDset.attrs["name"] = "BMiRNA_"
+#     bmirnaDset.attrs["sparse"] = False
+#     bmirnaDset.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binarized MiRNA Data")
+#
+#     logging.debug("Start:\t Getting Binned MiRNA Data")
+#     lenBins = 14
+#     nbBins = 517
+#     overlapping = 12
+#     try:
+#         sortedBinsMatrix = np.genfromtxt(
+#             path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
+#             delimiter=",")
+#     except:
+#         sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, datasetFile.get("View1").shape[1], path)
+#     binnedMiRNA = datasetFile.create_dataset("View9", (
+#         sortedMiRNAGeneIndices.shape[0], sortedMiRNAGeneIndices.shape[1] * nbBins), dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedMiRNAGeneIndices):
+#         patientMatrix = np.zeros((sortedMiRNAGeneIndices.shape[1], nbBins), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
+#         binnedMiRNA[patientIndex] = patientMatrix.flatten()
+#     binnedMiRNA.attrs["name"] = "bMiRNA_"
+#     binnedMiRNA.attrs["sparse"] = False
+#     binnedMiRNA.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binned MiRNA Data")
+#
+#     logging.debug("Start:\t Getting RNASeq Data")
+#     rnaseqData = np.genfromtxt(path + "matching_rnaseq.csv", delimiter=',')
+#     uselessRows = []
+#     for rowIndex, row in enumerate(np.transpose(rnaseqData)):
+#         if not row.any():
+#             uselessRows.append(rowIndex)
+#     usefulRows = [usefulRowIndex for usefulRowIndex in range(rnaseqData.shape[1]) if usefulRowIndex not in uselessRows]
+#     rnaseqDset = datasetFile.create_dataset("View2", (rnaseqData.shape[0], len(usefulRows)))
+#     rnaseqDset[...] = rnaseqData[:, usefulRows]
+#     rnaseqDset.attrs["name"] = "RNASeq_"
+#     rnaseqDset.attrs["sparse"] = False
+#     rnaseqDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting RNASeq Data")
+#
+#     logging.debug("Start:\t Getting Sorted RNASeq Data")
+#     RNASeq = datasetFile["View2"][...]
+#     sortedRNASeqGeneIndices = np.zeros(datasetFile.get("View2").shape, dtype=int)
+#     RNASeqRanking = np.zeros(datasetFile.get("View2").shape, dtype=int)
+#     for exampleIndex, exampleArray in enumerate(RNASeq):
+#         sortedRNASeqDictionary = dict((index, value) for index, value in enumerate(exampleArray))
+#         sortedRNASeqIndicesDict = sorted(sortedRNASeqDictionary.items(), key=operator.itemgetter(1))
+#         sortedRNASeqIndicesArray = np.array([index for (index, value) in sortedRNASeqIndicesDict], dtype=int)
+#         sortedRNASeqGeneIndices[exampleIndex] = sortedRNASeqIndicesArray
+#         for geneIndex in range(RNASeq.shape[1]):
+#             RNASeqRanking[exampleIndex, sortedRNASeqIndicesArray[geneIndex]] = geneIndex
+#     mrnaseqDset = datasetFile.create_dataset("View4", sortedRNASeqGeneIndices.shape, data=sortedRNASeqGeneIndices)
+#     mrnaseqDset.attrs["name"] = "SRNASeq"
+#     mrnaseqDset.attrs["sparse"] = False
+#     mrnaseqDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting Sorted RNASeq Data")
+#
+#     logging.debug("Start:\t Getting Binarized RNASeq Data")
+#     k = findClosestPowerOfTwo(100) - 1
+#     try:
+#         factorizedLeftBaseMatrix = np.genfromtxt(
+#             path + "factorLeft--n-" + str(datasetFile.get("View2").shape[1]) + "--k-" + str(100) + ".csv",
+#             delimiter=',')
+#     except:
+#         factorizedLeftBaseMatrix = getBaseMatrices(rnaseqData.shape[1], k, path)
+#     brnaseqDset = datasetFile.create_dataset("View5",
+#                                              (sortedRNASeqGeneIndices.shape[0], sortedRNASeqGeneIndices.shape[1] * k),
+#                                              dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedRNASeqGeneIndices):
+#         patientMatrix = np.zeros((sortedRNASeqGeneIndices.shape[1], k), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = factorizedLeftBaseMatrix[lineIndex, :]
+#         brnaseqDset[patientIndex] = patientMatrix.flatten()
+#     brnaseqDset.attrs["name"] = "BRNASeq"
+#     brnaseqDset.attrs["sparse"] = False
+#     brnaseqDset.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binarized RNASeq Data")
+#
+#     logging.debug("Start:\t Getting Binned RNASeq Data")
+#     lenBins = 986
+#     nbBins = 142
+#     overlapping = 493
+#     try:
+#         sortedBinsMatrix = np.genfromtxt(
+#             path + "sortedBinsMatrix--t-" + str(lenBins) + "--n-" + str(nbBins) + "--c-" + str(overlapping) + ".csv",
+#             delimiter=",")
+#     except:
+#         sortedBinsMatrix = makeSortedBinsMatrix(nbBins, lenBins, overlapping, datasetFile.get("View2").shape[1], path)
+#     binnedRNASeq = datasetFile.create_dataset("View6", (
+#         sortedRNASeqGeneIndices.shape[0], sortedRNASeqGeneIndices.shape[1] * nbBins), dtype=np.uint8)
+#     for patientIndex, patientSortedArray in enumerate(sortedRNASeqGeneIndices):
+#         patientMatrix = np.zeros((sortedRNASeqGeneIndices.shape[1], nbBins), dtype=np.uint8)
+#         for lineIndex, geneIndex in enumerate(patientSortedArray):
+#             patientMatrix[geneIndex] = sortedBinsMatrix[lineIndex, :]
+#         binnedRNASeq[patientIndex] = patientMatrix.flatten()
+#     binnedRNASeq.attrs["name"] = "bRNASeq"
+#     binnedRNASeq.attrs["sparse"] = False
+#     binnedRNASeq.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binned RNASeq Data")
+#
+#     logging.debug("Start:\t Getting Clinical Data")
+#     clinical = np.genfromtxt(path + "clinicalMatrix.csv", delimiter=',')
+#     clinicalDset = datasetFile.create_dataset("View3", clinical.shape)
+#     clinicalDset[...] = clinical
+#     clinicalDset.attrs["name"] = "Clinic_"
+#     clinicalDset.attrs["sparse"] = False
+#     clinicalDset.attrs["binary"] = False
+#     logging.debug("Done:\t Getting Clinical Data")
+#
+#     logging.debug("Start:\t Getting Binarized Clinical Data")
+#     binarized_clinical = np.zeros((347, 1951), dtype=np.uint8)
+#     nb_already_done = 0
+#     for feqtureIndex, feature in enumerate(np.transpose(clinical)):
+#         featureSet = set(feature)
+#         featureDict = dict((val, valIndex) for valIndex, val in enumerate(list(featureSet)))
+#         for valueIndex, value in enumerate(feature):
+#             binarized_clinical[valueIndex, featureDict[value] + nb_already_done] = 1
+#         nb_already_done += len(featureSet)
+#     bClinicalDset = datasetFile.create_dataset("View13", binarized_clinical.shape, dtype=np.uint8,
+#                                                data=binarized_clinical)
+#     bClinicalDset.attrs["name"] = "bClinic"
+#     bClinicalDset.attrs["sparse"] = False
+#     bClinicalDset.attrs["binary"] = True
+#     logging.debug("Done:\t Getting Binarized Clinical Data")
+#
+#     # logging.debug("Start:\t Getting Adjacence RNASeq Data")
+#     # sparseAdjRNASeq = getAdjacenceMatrix(RNASeqRanking, sortedRNASeqGeneIndices, k=findClosestPowerOfTwo(10)-1)
+#     # sparseAdjRNASeqGrp = datasetFile.create_group("View6")
+#     # dataDset = sparseAdjRNASeqGrp.create_dataset("data", sparseAdjRNASeq.data.shape, data=sparseAdjRNASeq.data)
+#     # indicesDset = sparseAdjRNASeqGrp.create_dataset("indices",
+#     # sparseAdjRNASeq.indices.shape, data=sparseAdjRNASeq.indices)
+#     # indptrDset = sparseAdjRNASeqGrp.create_dataset("indptr",
+#     # sparseAdjRNASeq.indptr.shape, data=sparseAdjRNASeq.indptr)
+#     # sparseAdjRNASeqGrp.attrs["name"]="ARNASeq"
+#     # sparseAdjRNASeqGrp.attrs["sparse"]=True
+#     # sparseAdjRNASeqGrp.attrs["shape"]=sparseAdjRNASeq.shape
+#     # logging.debug("Done:\t Getting Adjacence RNASeq Data")
+#
+#     labelFile = open(path + 'brca_labels_triple-negatif.csv')
+#     labels = np.array([int(line.strip().split(',')[1]) for line in labelFile])
+#     labelsDset = datasetFile.create_dataset("Labels", labels.shape)
+#     labelsDset[...] = labels
+#     labelsDset.attrs["name"] = "Labels"
+#
+#     metaDataGrp = datasetFile.create_group("Metadata")
+#     metaDataGrp.attrs["nbView"] = 14
+#     metaDataGrp.attrs["nbClass"] = 2
+#     metaDataGrp.attrs["datasetLength"] = len(labels)
+#     labelDictionary = {0: "No", 1: "Yes"}
+#
+#     datasetFile.close()
+#     datasetFile = h5py.File(path + "ModifiedMultiOmic.hdf5", "r")
+#
+#     return datasetFile, labelDictionary
+#
+#
+# def getModifiedMultiOmicDBhdf5(features, path, name, NB_CLASS, LABELS_NAMES):
+#     datasetFile = h5py.File(path + "ModifiedMultiOmic.hdf5", "r")
+#     labelDictionary = {0: "No", 1: "Yes"}
+#     return datasetFile, labelDictionary
+#
+#
+# def getMultiOmicDBhdf5(features, path, name, NB_CLASS, LABELS_NAMES):
+#     datasetFile = h5py.File(path + "MultiOmic.hdf5", "r")
+#     labelDictionary = {0: "No", 1: "Yes"}
+#     return datasetFile, labelDictionary
+#
+#
+# def copyHDF5(pathF, name, nbCores):
+#     datasetFile = h5py.File(pathF + name + ".hdf5", "r")
+#     for coreIndex in range(nbCores):
+#         newDataSet = h5py.File(pathF + name + str(coreIndex) + ".hdf5", "w")
+#         for dataset in datasetFile:
+#             datasetFile.copy("/" + dataset, newDataSet["/"])
+#         newDataSet.close()
+#
+#
+# def datasetsAlreadyExist(pathF, name, nbCores):
+#     allDatasetExist = True
+#     for coreIndex in range(nbCores):
+#         import os.path
+#         allDatasetExist *= os.path.isfile(pathF + name + str(coreIndex) + ".hdf5")
+#     return allDatasetExist
+#
+#
+# def deleteHDF5(pathF, name, nbCores):
+#     for coreIndex in range(nbCores):
+#         os.remove(pathF + name + str(coreIndex) + ".hdf5")
+#
+# # def getOneViewFromDB(viewName, pathToDB, DBName):
+# #     view = np.genfromtxt(pathToDB + DBName +"-" + viewName, delimiter=';')
+# #     return view
+#
+#
+# # def getClassLabels(pathToDB, DBName):
+# #     labels = np.genfromtxt(pathToDB + DBName + "-" + "ClassLabels.csv", delimiter=';')
+# #     return labels
+#
+#
+# # def getDataset(pathToDB, viewNames, DBName):
+# #     dataset = []
+# #     for viewName in viewNames:
+# #         dataset.append(getOneViewFromDB(viewName, pathToDB, DBName))
+# #     return np.array(dataset)
+#
+#
+# # def getAwaLabels(nbLabels, pathToAwa):
+# #     labelsFile = open(pathToAwa + 'Animals_with_Attributes/classes.txt', 'U')
+# #     linesFile = [''.join(line.strip().split()).translate(None, digits) for line in labelsFile.readlines()]
+# #     return linesFile
+#
+#
+# # def getAwaDBcsv(views, pathToAwa, nameDB, nbLabels, LABELS_NAMES):
+# #     awaLabels = getAwaLabels(nbLabels, pathToAwa)
+# #     nbView = len(views)
+# #     nbMaxLabels = len(awaLabels)
+# #     if nbLabels == -1:
+# #         nbLabels = nbMaxLabels
+# #     nbNamesGiven = len(LABELS_NAMES)
+# #     if nbNamesGiven > nbLabels:
+# #         labelDictionary = {i:LABELS_NAMES[i] for i in np.arange(nbLabels)}
+# #     elif nbNamesGiven < nbLabels and nbLabels <= nbMaxLabels:
+# #         if LABELS_NAMES != ['']:
+# #             labelDictionary = {i:LABELS_NAMES[i] for i in np.arange(nbNamesGiven)}
+# #         else:
+# #             labelDictionary = {}
+# #             nbNamesGiven = 0
+# #         nbLabelsToAdd = nbLabels-nbNamesGiven
+# #         while nbLabelsToAdd > 0:
+# #             currentLabel = random.choice(awaLabels)
+# #             if currentLabel not in labelDictionary.values():
+# #                 labelDictionary[nbLabels-nbLabelsToAdd]=currentLabel
+# #                 nbLabelsToAdd -= 1
+# #             else:
+# #                 pass
+# #     else:
+# #         labelDictionary = {i: LABELS_NAMES[i] for i in np.arange(nbNamesGiven)}
+# #     viewDictionary = {i: views[i] for i in np.arange(nbView)}
+# #     rawData = []
+# #     labels = []
+# #     nbExample = 0
+# #     for view in np.arange(nbView):
+# #         viewData = []
+# #         for labelIndex in np.arange(nbLabels):
+# #             pathToExamples = pathToAwa + 'Animals_with_Attributes/Features/' + viewDictionary[view] + '/' + \
+# #                              labelDictionary[labelIndex] + '/'
+# #             examples = os.listdir(pathToExamples)
+# #             if view == 0:
+# #                 nbExample += len(examples)
+# #             for example in examples:
+# #                 if viewDictionary[view]=='decaf':
+# #                     exampleFile = open(pathToExamples + example)
+# #                     viewData.append([float(line.strip()) for line in exampleFile])
+# #                 else:
+# #                     exampleFile = open(pathToExamples + example)
+# #                     viewData.append([[float(coordinate) for coordinate in raw.split()] for raw in exampleFile][0])
+# #                 if view == 0:
+# #                     labels.append(labelIndex)
+# #
+# #         rawData.append(np.array(viewData))
+# #     data = rawData
+# #     DATASET_LENGTH = len(labels)
+# #     return data, labels, labelDictionary, DATASET_LENGTH
+# #
+# #
+# # def getDbfromCSV(path):
+# #     files = os.listdir(path)
+# #     DATA = np.zeros((3,40,2))
+# #     for file in files:
+# #         if file[-9:]=='moins.csv' and file[:7]=='sample1':
+# #             X = open(path+file)
+# #             for x, i in zip(X, range(20)):
+# #                 DATA[0, i] = np.array([float(coord) for coord in x.strip().split('\t')])
+# #         if file[-9:]=='moins.csv' and file[:7]=='sample2':
+# #             X = open(path+file)
+# #             for x, i in zip(X, range(20)):
+# #                 DATA[1, i] = np.array([float(coord) for coord in x.strip().split('\t')])
+# #         if file[-9:]=='moins.csv' and file[:7]=='sample3':
+# #             X = open(path+file)
+# #             for x, i in zip(X, range(20)):
+# #                 DATA[2, i] = np.array([float(coord) for coord in x.strip().split('\t')])
+# #
+# #     for file in files:
+# #         if file[-8:]=='plus.csv' and file[:7]=='sample1':
+# #             X = open(path+file)
+# #             for x, i in zip(X, range(20)):
+# #                 DATA[0, i+20] = np.array([float(coord) for coord in x.strip().split('\t')])
+# #         if file[-8:]=='plus.csv' and file[:7]=='sample2':
+# #             X = open(path+file)
+# #             for x, i in zip(X, range(20)):
+# #                 DATA[1, i+20] = np.array([float(coord) for coord in x.strip().split('\t')])
+# #         if file[-8:]=='plus.csv' and file[:7]=='sample3':
+# #             X = open(path+file)
+# #             for x, i in zip(X, range(20)):
+# #                 DATA[2, i+20] = np.array([float(coord) for coord in x.strip().split('\t')])
+# #     LABELS = np.zeros(40)
+# #     LABELS[:20]=LABELS[:20]+1
+# #     return DATA, LABELS
+#
+# # def makeArrayFromTriangular(pseudoRNASeqMatrix):
+# #     matrixShape = len(pseudoRNASeqMatrix[0,:])
+# #     exampleArray = np.array(((matrixShape-1)*matrixShape)/2)
+# #     arrayIndex = 0
+# #     for i in range(matrixShape-1):
+# #         for j in range(i+1, matrixShape):
+# #             exampleArray[arrayIndex]=pseudoRNASeqMatrix[i,j]
+# #             arrayIndex += 1
+# #     return exampleArray
+#
+#
+# # def getPseudoRNASeq(dataset):
+# #     nbGenes = len(dataset["/View2/matrix"][0, :])
+# #     pseudoRNASeq = np.zeros((dataset["/datasetlength"][...], ((nbGenes - 1) * nbGenes) / 2), dtype=bool_)
+# #     for exampleIndex in xrange(dataset["/datasetlength"][...]):
+# #         arrayIndex = 0
+# #         for i in xrange(nbGenes):
+# #             for j in xrange(nbGenes):
+# #                 if i > j:
+# #                     pseudoRNASeq[exampleIndex, arrayIndex] =
+# # dataset["/View2/matrix"][exampleIndex, j] < dataset["/View2/matrix"][exampleIndex, i]
+# #                     arrayIndex += 1
+# #     dataset["/View4/matrix"] = pseudoRNASeq
+# #     dataset["/View4/name"] = "pseudoRNASeq"
+# #     return dataset
+#
+#
+# # def allSame(array):
+# #     value = array[0]
+# #     areAllSame = True
+# #     for i in array:
+# #         if i != value:
+# #             areAllSame = False
+# #     return areAllSame
diff --git a/Code/MonoMultiViewClassifiers/utils/HyperParameterSearch.py b/Code/MonoMultiViewClassifiers/utils/HyperParameterSearch.py
index bdc430ad5113b365bee0a4c93ceca93ea22d9219..a30b1b7a1b35bc726fddd21a26bcbae75f4e8c88 100644
--- a/Code/MonoMultiViewClassifiers/utils/HyperParameterSearch.py
+++ b/Code/MonoMultiViewClassifiers/utils/HyperParameterSearch.py
@@ -8,6 +8,7 @@ from .. import Metrics
 
 def searchBestSettings(dataset, classifierPackage, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None,
                        searchingTool="hyperParamSearch", nIter=1, **kwargs):
+    """Used to select the right hyperparam optimization function to optimize hyper parameters"""
     if viewsIndices is None:
         viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
     thismodule = sys.modules[__name__]
@@ -18,12 +19,13 @@ def searchBestSettings(dataset, classifierPackage, classifierName, metrics, iLea
 
 
 def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
-    # si grid search est selectionne, on veut tester certaines valeurs
+    """Used to perfom gridsearch on the classifiers"""
     pass
 
 
 def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1,
                      nbCores=1, **classificationKWARGS):
+    """Used to perform a random search on the classifiers to optimize hyper parameters"""
     if viewsIndices is None:
         viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
     metric = metrics[0]
@@ -75,10 +77,12 @@ def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learni
 
 
 def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
+    """Used to perform spearmint on the classifiers to optimize hyper parameters"""
     pass
 
 
 def genHeatMaps(params, scoresArray, outputFileName):
+    """Used to generate a heat map for each doublet of hyperparms optimized on the previous function"""
     nbParams = len(params)
     if nbParams > 2:
         combinations = itertools.combinations(range(nbParams), 2)
@@ -110,128 +114,128 @@ def genHeatMaps(params, scoresArray, outputFileName):
         plt.savefig(outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png")
         plt.close()
 
-        # nohup python ~/dev/git/spearmint/spearmint/main.py . &
-
-        # import json
-        # import numpy as np
-        # import math
-        #
-        # from os import system
-        # from os.path import join
-        #
-        #
-        # def run_kover(dataset, split, model_type, p, max_rules, output_dir):
-        #     outdir = join(output_dir, "%s_%f" % (model_type, p))
-        #     kover_command = "kover learn " \
-        #                     "--dataset '%s' " \
-        #                     "--split %s " \
-        #                     "--model-type %s " \
-        #                     "--p %f " \
-        #                     "--max-rules %d " \
-        #                     "--max-equiv-rules 10000 " \
-        #                     "--hp-choice cv " \
-        #                     "--random-seed 0 " \
-        #                     "--output-dir '%s' " \
-        #                     "--n-cpu 1 " \
-        #                     "-v" % (dataset,
-        #                             split,
-        #                             model_type,
-        #                             p,
-        #                             max_rules,
-        #                             outdir)
-        #
-        #     system(kover_command)
-        #
-        #     return json.load(open(join(outdir, "results.json")))["cv"]["best_hp"]["score"]
-        #
-        #
-        # def main(job_id, params):
-        #     print params
-        #
-        #     max_rules = params["MAX_RULES"][0]
-        #
-        #     species = params["SPECIES"][0]
-        #     antibiotic = params["ANTIBIOTIC"][0]
-        #     split = params["SPLIT"][0]
-        #
-        #     model_type = params["model_type"][0]
-        #
-        #     # LS31
-        #     if species == "saureus":
-        #         dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/data/earle_2016/saureus/kover_datasets/%s.kover" % antibiotic
-        #     else:
-        #         dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/genome_scm_paper/data/%s/%s.kover" % (species, antibiotic)
-        #
-        #     output_path = "/home/droale01/droale01-ls31/projects/genome_scm/manifold_scm/spearmint/vanilla_scm/%s/%s" % (species, antibiotic)
-        #
-        #     # MacBook
-        #     #dataset_path = "/Volumes/Einstein 1/kover_phylo/datasets/%s/%s.kover" % (species, antibiotic)
-        #     #output_path = "/Volumes/Einstein 1/manifold_scm/version2/%s_spearmint" % antibiotic
-        #
-        #     return run_kover(dataset=dataset_path,
-        #                      split=split,
-        #                      model_type=model_type,
-        #                      p=params["p"][0],
-        #                      max_rules=max_rules,
-        #                      output_dir=output_path)
-        # killall mongod && sleep 1 && rm -r database/* && rm mongo.log*
-        # mongod --fork --logpath mongo.log --dbpath database
-        #
-        # {
-        #     "language"        : "PYTHON",
-        #     "experiment-name" : "vanilla_scm_cdiff_azithromycin",
-        #     "polling-time"    : 1,
-        #     "resources" : {
-        #         "my-machine" : {
-        #             "scheduler"         : "local",
-        #             "max-concurrent"    : 5,
-        #             "max-finished-jobs" : 100
-        #         }
-        #     },
-        #     "tasks": {
-        #         "resistance" : {
-        #             "type"       : "OBJECTIVE",
-        #             "likelihood" : "NOISELESS",
-        #             "main-file"  : "spearmint_wrapper",
-        #             "resources"  : ["my-machine"]
-        #         }
-        #     },
-        #     "variables": {
-        #
-        #         "MAX_RULES" : {
-        #             "type" : "ENUM",
-        #             "size" : 1,
-        #             "options": [10]
-        #         },
-        #
-        #
-        #         "SPECIES" : {
-        #             "type" : "ENUM",
-        #             "size" : 1,
-        #             "options": ["cdiff"]
-        #         },
-        #         "ANTIBIOTIC" : {
-        #             "type" : "ENUM",
-        #             "size" : 1,
-        #             "options": ["azithromycin"]
-        #         },
-        #         "SPLIT" : {
-        #             "type" : "ENUM",
-        #             "size" : 1,
-        #             "options": ["split_seed_2"]
-        #         },
-        #
-        #
-        #         "model_type" : {
-        #             "type" : "ENUM",
-        #             "size" : 1,
-        #             "options": ["conjunction", "disjunction"]
-        #         },
-        #         "p" : {
-        #             "type" : "FLOAT",
-        #             "size" : 1,
-        #             "min"  : 0.01,
-        #             "max"  : 100
-        #         }
-        #     }
-        # }
+# nohup python ~/dev/git/spearmint/spearmint/main.py . &
+
+# import json
+# import numpy as np
+# import math
+#
+# from os import system
+# from os.path import join
+#
+#
+# def run_kover(dataset, split, model_type, p, max_rules, output_dir):
+#     outdir = join(output_dir, "%s_%f" % (model_type, p))
+#     kover_command = "kover learn " \
+#                     "--dataset '%s' " \
+#                     "--split %s " \
+#                     "--model-type %s " \
+#                     "--p %f " \
+#                     "--max-rules %d " \
+#                     "--max-equiv-rules 10000 " \
+#                     "--hp-choice cv " \
+#                     "--random-seed 0 " \
+#                     "--output-dir '%s' " \
+#                     "--n-cpu 1 " \
+#                     "-v" % (dataset,
+#                             split,
+#                             model_type,
+#                             p,
+#                             max_rules,
+#                             outdir)
+#
+#     system(kover_command)
+#
+#     return json.load(open(join(outdir, "results.json")))["cv"]["best_hp"]["score"]
+#
+#
+# def main(job_id, params):
+#     print params
+#
+#     max_rules = params["MAX_RULES"][0]
+#
+#     species = params["SPECIES"][0]
+#     antibiotic = params["ANTIBIOTIC"][0]
+#     split = params["SPLIT"][0]
+#
+#     model_type = params["model_type"][0]
+#
+#     # LS31
+#     if species == "saureus":
+#         dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/data/earle_2016/saureus/kover_datasets/%s.kover" % antibiotic
+#     else:
+#         dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/genome_scm_paper/data/%s/%s.kover" % (species, antibiotic)
+#
+#     output_path = "/home/droale01/droale01-ls31/projects/genome_scm/manifold_scm/spearmint/vanilla_scm/%s/%s" % (species, antibiotic)
+#
+#     # MacBook
+#     #dataset_path = "/Volumes/Einstein 1/kover_phylo/datasets/%s/%s.kover" % (species, antibiotic)
+#     #output_path = "/Volumes/Einstein 1/manifold_scm/version2/%s_spearmint" % antibiotic
+#
+#     return run_kover(dataset=dataset_path,
+#                      split=split,
+#                      model_type=model_type,
+#                      p=params["p"][0],
+#                      max_rules=max_rules,
+#                      output_dir=output_path)
+# killall mongod && sleep 1 && rm -r database/* && rm mongo.log*
+# mongod --fork --logpath mongo.log --dbpath database
+#
+# {
+#     "language"        : "PYTHON",
+#     "experiment-name" : "vanilla_scm_cdiff_azithromycin",
+#     "polling-time"    : 1,
+#     "resources" : {
+#         "my-machine" : {
+#             "scheduler"         : "local",
+#             "max-concurrent"    : 5,
+#             "max-finished-jobs" : 100
+#         }
+#     },
+#     "tasks": {
+#         "resistance" : {
+#             "type"       : "OBJECTIVE",
+#             "likelihood" : "NOISELESS",
+#             "main-file"  : "spearmint_wrapper",
+#             "resources"  : ["my-machine"]
+#         }
+#     },
+#     "variables": {
+#
+#         "MAX_RULES" : {
+#             "type" : "ENUM",
+#             "size" : 1,
+#             "options": [10]
+#         },
+#
+#
+#         "SPECIES" : {
+#             "type" : "ENUM",
+#             "size" : 1,
+#             "options": ["cdiff"]
+#         },
+#         "ANTIBIOTIC" : {
+#             "type" : "ENUM",
+#             "size" : 1,
+#             "options": ["azithromycin"]
+#         },
+#         "SPLIT" : {
+#             "type" : "ENUM",
+#             "size" : 1,
+#             "options": ["split_seed_2"]
+#         },
+#
+#
+#         "model_type" : {
+#             "type" : "ENUM",
+#             "size" : 1,
+#             "options": ["conjunction", "disjunction"]
+#         },
+#         "p" : {
+#             "type" : "FLOAT",
+#             "size" : 1,
+#             "min"  : 0.01,
+#             "max"  : 100
+#         }
+#     }
+# }
diff --git a/Code/MonoMultiViewClassifiers/utils/Interpret.py b/Code/MonoMultiViewClassifiers/utils/Interpret.py
index 00562135e6cd34d359f1bce174a9072681365d66..e83b2e55b9d9f00adedb65cbd1b44cb8ebb29db1 100644
--- a/Code/MonoMultiViewClassifiers/utils/Interpret.py
+++ b/Code/MonoMultiViewClassifiers/utils/Interpret.py
@@ -5,11 +5,12 @@ import pickle
 
 
 def percent(x, pos):
-    'The two args are the value and tick position'
+    """Used to print percentage of importance on the y axis"""
     return '%1.1f %%' % (x * 100)
 
 
 def getFeatureImportance(classifier, directory, interpretString=""):
+    """Used to generate a graph and a pickle dictionary representing feature importances"""
     featureImportances = classifier.feature_importances_
     sortedArgs = np.argsort(-featureImportances)
     featureImportancesSorted = featureImportances[sortedArgs][:50]
diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py
index 7552f068f7fcafc341ab2c669b9b9229064b67e6..1a2e287705cc8f1eaca7d1ad573b040af99391d9 100644
--- a/Code/MonoMultiViewClassifiers/utils/execution.py
+++ b/Code/MonoMultiViewClassifiers/utils/execution.py
@@ -9,6 +9,7 @@ import sklearn
 
 
 def parseTheArgs(arguments):
+    """Used to parse the args entered by the user"""
 
     parser = argparse.ArgumentParser(
         description='This file is used to benchmark the scores fo multiple classification algorithm on multiview data.',
@@ -183,6 +184,7 @@ def parseTheArgs(arguments):
     return args
 
 def initRandomState(randomStateArg, directory):
+    """Used to init a random state and multiple if needed (multicore)"""
     if randomStateArg is None:
         randomState = np.random.RandomState(randomStateArg)
     else:
@@ -199,6 +201,7 @@ def initRandomState(randomStateArg, directory):
 
 
 def initLogFile(args):
+    """Used to init the directory where the results will be stored and the log file"""
     resultDirectory = "../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
     logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
         args.views) + "-" + args.name + "-LOG"
@@ -226,11 +229,14 @@ def initLogFile(args):
 
 
 def genSplits(statsIter, datasetlength, DATASET, splitRatio, statsIterRandomStates):
+    """Used to gen the train/test splits using one or multiple random states"""
     indices = np.arange(datasetlength)
     if statsIter > 1:
         splits = []
         for randomState in statsIterRandomStates:
-            foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=randomState, test_size=splitRatio)
+            foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,
+                                                                      random_state=randomState,
+                                                                      test_size=splitRatio)
             folds = foldsObj.split(indices, DATASET.get("Labels").value)
             for fold in folds:
                 train_fold, test_fold = fold
@@ -249,6 +255,7 @@ def genSplits(statsIter, datasetlength, DATASET, splitRatio, statsIterRandomStat
 
 
 def genKFolds(statsIter, nbFolds, statsIterRandomStates):
+    """Used to generate folds indices for cross validation and multiple if needed"""
     if statsIter > 1:
         foldsList = []
         for randomState in statsIterRandomStates:
@@ -259,8 +266,7 @@ def genKFolds(statsIter, nbFolds, statsIterRandomStates):
 
 
 def initViews(DATASET, args):
-    """Used to return the views names that will be used by the algos, their indices and all the views names
-    Needs args.views"""
+    """Used to return the views names that will be used by the algos, their indices and all the views names"""
     NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
     if args.views != [""]:
         allowedViews = args.views
@@ -278,6 +284,7 @@ def initViews(DATASET, args):
 
 
 def genDirecortiesNames(directory, statsIter):
+    """Used to generate the different directories of each iteration if needed"""
     if statsIter > 1:
         directories = []
         for i in range(statsIter):