Skip to content
Snippets Groups Projects
Commit 9fd8694c authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Wrote some doc

parent 60b6ce49
No related branches found
No related tags found
No related merge requests found
......@@ -10,6 +10,7 @@ from . import GetMultiviewDb as DB
def getV(DATASET, viewIndex, usedIndices=None):
"""Used to extract a view as a numpy array or a sparse mat from the HDF5 dataset"""
if usedIndices is None:
usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
if type(usedIndices) is int:
......@@ -32,6 +33,7 @@ def getV(DATASET, viewIndex, usedIndices=None):
def getShape(DATASET, viewIndex):
"""Used to get the dataset shape even if it's sparse"""
if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]:
return DATASET.get("View" + str(viewIndex)).shape
else:
......@@ -39,6 +41,7 @@ def getShape(DATASET, viewIndex):
def getValue(DATASET):
"""Used to get the value of a view in the HDF5 dataset even if it sparse"""
if not DATASET.attrs["sparse"]:
return DATASET.value
else:
......@@ -50,6 +53,7 @@ def getValue(DATASET):
def extractSubset(matrix, usedIndices):
"""Used to extract a subset of a matrix even if it's sparse"""
if sparse.issparse(matrix):
newIndptr = np.zeros(len(usedIndices) + 1, dtype=int)
oldindptr = matrix.indptr
......@@ -69,8 +73,7 @@ def extractSubset(matrix, usedIndices):
def initMultipleDatasets(args, nbCores):
"""Used to create copies of the dataset if multicore computation is used
Needs arg.pathF and arg.name"""
"""Used to create copies of the dataset if multicore computation is used"""
if nbCores > 1:
if DB.datasetsAlreadyExist(args.pathF, args.name, nbCores):
logging.debug("Info:\t Enough copies of the dataset are already available")
......@@ -90,6 +93,7 @@ def initMultipleDatasets(args, nbCores):
def confirm(resp=True, timeout=15):
"""Used to process answer"""
ans = input_(timeout)
if not ans:
return resp
......@@ -102,6 +106,7 @@ def confirm(resp=True, timeout=15):
def input_(timeout=15):
"""used as a UI to stop if too much HDD space will be used"""
print("You have " + str(timeout) + " seconds to stop the script by typing n")
i, o, e = select.select([sys.stdin], [], [], timeout)
if i:
......
This diff is collapsed.
......@@ -8,6 +8,7 @@ from .. import Metrics
def searchBestSettings(dataset, classifierPackage, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None,
searchingTool="hyperParamSearch", nIter=1, **kwargs):
"""Used to select the right hyperparam optimization function to optimize hyper parameters"""
if viewsIndices is None:
viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
thismodule = sys.modules[__name__]
......@@ -18,12 +19,13 @@ def searchBestSettings(dataset, classifierPackage, classifierName, metrics, iLea
def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
# si grid search est selectionne, on veut tester certaines valeurs
"""Used to perfom gridsearch on the classifiers"""
pass
def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1,
nbCores=1, **classificationKWARGS):
"""Used to perform a random search on the classifiers to optimize hyper parameters"""
if viewsIndices is None:
viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
metric = metrics[0]
......@@ -75,10 +77,12 @@ def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learni
def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
"""Used to perform spearmint on the classifiers to optimize hyper parameters"""
pass
def genHeatMaps(params, scoresArray, outputFileName):
"""Used to generate a heat map for each doublet of hyperparms optimized on the previous function"""
nbParams = len(params)
if nbParams > 2:
combinations = itertools.combinations(range(nbParams), 2)
......
......@@ -5,11 +5,12 @@ import pickle
def percent(x, pos):
'The two args are the value and tick position'
"""Used to print percentage of importance on the y axis"""
return '%1.1f %%' % (x * 100)
def getFeatureImportance(classifier, directory, interpretString=""):
"""Used to generate a graph and a pickle dictionary representing feature importances"""
featureImportances = classifier.feature_importances_
sortedArgs = np.argsort(-featureImportances)
featureImportancesSorted = featureImportances[sortedArgs][:50]
......
......@@ -9,6 +9,7 @@ import sklearn
def parseTheArgs(arguments):
"""Used to parse the args entered by the user"""
parser = argparse.ArgumentParser(
description='This file is used to benchmark the scores fo multiple classification algorithm on multiview data.',
......@@ -183,6 +184,7 @@ def parseTheArgs(arguments):
return args
def initRandomState(randomStateArg, directory):
"""Used to init a random state and multiple if needed (multicore)"""
if randomStateArg is None:
randomState = np.random.RandomState(randomStateArg)
else:
......@@ -199,6 +201,7 @@ def initRandomState(randomStateArg, directory):
def initLogFile(args):
"""Used to init the directory where the results will be stored and the log file"""
resultDirectory = "../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
args.views) + "-" + args.name + "-LOG"
......@@ -226,11 +229,14 @@ def initLogFile(args):
def genSplits(statsIter, datasetlength, DATASET, splitRatio, statsIterRandomStates):
"""Used to gen the train/test splits using one or multiple random states"""
indices = np.arange(datasetlength)
if statsIter > 1:
splits = []
for randomState in statsIterRandomStates:
foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=randomState, test_size=splitRatio)
foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,
random_state=randomState,
test_size=splitRatio)
folds = foldsObj.split(indices, DATASET.get("Labels").value)
for fold in folds:
train_fold, test_fold = fold
......@@ -249,6 +255,7 @@ def genSplits(statsIter, datasetlength, DATASET, splitRatio, statsIterRandomStat
def genKFolds(statsIter, nbFolds, statsIterRandomStates):
"""Used to generate folds indices for cross validation and multiple if needed"""
if statsIter > 1:
foldsList = []
for randomState in statsIterRandomStates:
......@@ -259,8 +266,7 @@ def genKFolds(statsIter, nbFolds, statsIterRandomStates):
def initViews(DATASET, args):
"""Used to return the views names that will be used by the algos, their indices and all the views names
Needs args.views"""
"""Used to return the views names that will be used by the algos, their indices and all the views names"""
NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
if args.views != [""]:
allowedViews = args.views
......@@ -278,6 +284,7 @@ def initViews(DATASET, args):
def genDirecortiesNames(directory, statsIter):
"""Used to generate the different directories of each iteration if needed"""
if statsIter > 1:
directories = []
for i in range(statsIter):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment