Skip to content
Snippets Groups Projects
Commit ce4ed2b8 authored by bbauvin's avatar bbauvin
Browse files

To push

parent c15b97f6
No related branches found
No related tags found
No related merge requests found
......@@ -239,6 +239,7 @@ def getClassificationIndices(argumentsDictionaries, iterIndex):
for argumentsDictionary in argumentsDictionaries:
if argumentsDictionary["flag"][0]==iterIndex:
pass
......@@ -338,7 +339,7 @@ def publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDict
pass
def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics):
def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics):
"""Used to tranform one versus one results in multiclass results and to publish it"""
multiclassResults = [{} for _ in range(statsIter)]
for iterIndex in range(statsIter):
......@@ -356,9 +357,9 @@ def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLa
for iterIndex, multiclassiterResult in enumerate(multiclassResults):
for key, value in multiclassiterResult.items():
multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)}
multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, argumentDictionaries)
multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries)
multiclassResults = getErrorOnLabels(multiclassResults, multiclassLabels)
publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDictionaries)
publishMulticlassResults(multiclassResults, metrics, statsIter, benchmarkArgumentDictionaries)
return multiclassResults
......@@ -372,11 +373,11 @@ def analyzeIter(results):
pass
def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclassLabels, metrics):
def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics):
if statsIter > 1:
if nbMulticlass > 1:
analyzeBiclass(results)
multiclassResults = analyzeMulticlass(results, statsIter, argumentDictionaries, multiclassLabels, metrics)
multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels, metrics)
analyzeIter(multiclassResults)
else:
biclassResults = analyzeBiclass(results)
......@@ -390,15 +391,25 @@ def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclas
def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore,
ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments):
benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices, testIndices = classificationIndices
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
resultsMonoview = []
labelsNames = list(LABELS_DICTIONARY.values())
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState,
coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics,
nIter=args.CL_GS_iter, **argument)
for argument in argumentDictionaries["Monoview"]]
......@@ -409,7 +420,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
resultsMultiview = []
resultsMultiview += [
ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, **arguments)
for arguments in argumentDictionaries["Multiview"]]
return [flag, resultsMonoview, resultsMultiview]
......@@ -417,9 +428,21 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore,
ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments):
benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore,
ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices, testIndices = classificationIndices
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview = []
labelsNames = list(LABELS_DICTIONARY.values())
......@@ -429,7 +452,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
for stepIndex in range(nbMulticoreToDo):
resultsMonoview += (Parallel(n_jobs=nbCores)(
delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState,
coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter,
**argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
......@@ -444,7 +467,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
for stepIndex in range(nbMulticoreToDo):
resultsMultiview += Parallel(n_jobs=nbCores)(
delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
args.type, args.pathF, LABELS_DICTIONARY, randomState,
args.type, args.pathF, LABELS_DICTIONARY, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter,
**argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
......@@ -452,8 +475,17 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
return [flag, resultsMonoview, resultsMultiview]
def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels,
execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore):
def execOneBenchmarkMonoCore(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
pass
def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore,
execOneBenchmarkMonoCore=execOneBenchmarkMonoCore):
"""Used to execute the needed benchmark(s) on multicore or mono-core functions
The execOneBenchmark and execOneBenchmark_multicore keywords args are only used in the tests"""
# TODO : find a way to flag
......@@ -462,30 +494,29 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multi
results = []
if nbCores > 1:
if statsIter > 1 or nbMulticlass > 1:
nbExpsToDo = nbMulticlass*statsIter
nbExpsToDo = len(benchmarkArgumentsDictionaries)
nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores)))
for stepIndex in nbMulticoreToDo:
results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark)
(coreIndex=coreIndex,
**argumentsDictionaries[coreIndex + stepIndex * nbCores])
**benchmarkArgumentsDictionaries[coreIndex + stepIndex * nbCores])
for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores))))
else:
results += [execOneBenchmark_multicore(nbCores=nbCores, **argumentsDictionaries[0])]
results += [execOneBenchmark_multicore(nbCores=nbCores, **benchmarkArgumentsDictionaries[0])]
else:
for arguments in argumentsDictionaries:
results += [execOneBenchmark(**arguments)]
for arguments in benchmarkArgumentsDictionaries:
results += [execOneBenchmarkMonoCore(**arguments)]
logging.debug("Done:\t Executing all the needed biclass benchmarks")
# Do everything with flagging
logging.debug("Start:\t Analyzing preds")
# getResults(results, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, metrics)
# getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassLabels, metrics)
logging.debug("Done:\t Analyzing preds")
return results
def execClassif(arguments):
"""Main function to execute the benchmark"""
start = time.time()
......@@ -502,7 +533,7 @@ def execClassif(arguments):
if statsIter > 1:
statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)]
else:
statsIterRandomStates = randomState
statsIterRandomStates = [randomState]
if args.name not in ["Fake", "Plausible"]:
getDatabase = getattr(DB, "getClassicDB" + args.type[1:])
......@@ -512,9 +543,9 @@ def execClassif(arguments):
DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass,
args.CL_classes)
classificationIndices = execution.genSplits(statsIter, DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)
classificationIndices = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)
multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod)
multiclassLabels, labelsCombinations, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod, classificationIndices)
kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)
......@@ -549,9 +580,36 @@ def execClassif(arguments):
argumentDictionaries = {"Monoview": [], "Multiview": []}
argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS,
initKWARGS)
directories = execution.genDirecortiesNames(directory, statsIter, labelsIndices,
directories = execution.genDirecortiesNames(directory, statsIter, labelsCombinations,
multiclassMethod, LABELS_DICTIONARY)
# TODO : Gen arguments dictionaries
benchmarkArgumentDictionaries = execution.genArgumentDictionaries(LABELS_DICTIONARY, directories, multiclassLabels,
labelsCombinations, oldIndicesMulticlass,
hyperParamSearch, args, kFolds,
statsIterRandomStates, metrics,
argumentDictionaries, benchmark)
nbMulticlass = len(labelsCombinations)
execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentDictionaries)
if statsIter > 1:
logging.debug("Start:\t Benchmark classification")
......
......@@ -238,12 +238,9 @@ def initLogFile(args):
return resultDirectory
def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMethod):
def genSplits(labels, splitRatio, statsIterRandomStates):
"""Used to gen the train/test splits using one or multiple random states"""
for oldIndices, labels in zip(oldIndicesMulticlass, multiclasslabels):
indices = oldIndices
splitsMulticlass = []
if statsIter > 1:
indices = np.arange(len(labels))
splits = []
for randomState in statsIterRandomStates:
foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,
......@@ -255,16 +252,8 @@ def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMe
trainIndices = indices[train_fold]
testIndices = indices[test_fold]
splits.append([trainIndices, testIndices])
splitsMulticlass.append(splits)
else:
foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=statsIterRandomStates, test_size=splitRatio)
folds = foldsObj.split(indices, labels)
for fold in folds:
train_fold, test_fold = fold
trainIndices = indices[train_fold]
testIndices = indices[test_fold]
splitsMulticlass.append((trainIndices, testIndices))
return splitsMulticlass
return splits
def genKFolds(statsIter, nbFolds, statsIterRandomStates):
......@@ -322,3 +311,32 @@ def genDirecortiesNames(directory, statsIter, labelsIndices, multiclassMethod, l
labelName = labelDictionary[labelIndex]
directories.append(directory +labelName+"_vs_Rest/")
return directories
def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, labelsCombinations, oldIndicesMulticlass, hyperParamSearch, args,
kFolds, statsIterRandomStates, metrics, argumentDictionaries, benchmark):
benchmarkArgumentDictionaries = []
for combinationIndex, labelsCombination in enumerate(labelsCombinations):
for iterIndex, iterRandomState in enumerate(statsIterRandomStates):
benchmarkArgumentDictionary = {"LABELS_DICTIONARY": {0:labelsDictionary[labelsCombination[0]],
1:labelsDictionary[labelsCombination[1]]},
"directory": directories[iterIndex]+
labelsDictionary[labelsCombination[0]]+
"vs"+
labelsDictionary[labelsCombination[1]]+"/",
"classificationIndices": oldIndicesMulticlass[combinationIndex][iterIndex],
"args": args,
"labels": multiclassLabels[combinationIndex],
"kFolds": kFolds[iterIndex],
"randomState": iterRandomState,
"hyperParamSearch": hyperParamSearch,
"metrics": metrics,
"argumentDictionaries": argumentDictionaries,
"benchmark": benchmark,
"views": None,
"viewsIndices": None,
"flag": [iterIndex, labelsCombination]}
benchmarkArgumentDictionaries.append(benchmarkArgumentDictionary)
return benchmarkArgumentDictionaries
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment