Skip to content
Snippets Groups Projects
Commit ce4ed2b8 authored by bbauvin's avatar bbauvin
Browse files

To push

parent c15b97f6
No related branches found
No related tags found
No related merge requests found
...@@ -239,6 +239,7 @@ def getClassificationIndices(argumentsDictionaries, iterIndex): ...@@ -239,6 +239,7 @@ def getClassificationIndices(argumentsDictionaries, iterIndex):
for argumentsDictionary in argumentsDictionaries: for argumentsDictionary in argumentsDictionaries:
if argumentsDictionary["flag"][0]==iterIndex: if argumentsDictionary["flag"][0]==iterIndex:
pass
...@@ -338,7 +339,7 @@ def publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDict ...@@ -338,7 +339,7 @@ def publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDict
pass pass
def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics): def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics):
"""Used to tranform one versus one results in multiclass results and to publish it""" """Used to tranform one versus one results in multiclass results and to publish it"""
multiclassResults = [{} for _ in range(statsIter)] multiclassResults = [{} for _ in range(statsIter)]
for iterIndex in range(statsIter): for iterIndex in range(statsIter):
...@@ -356,9 +357,9 @@ def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLa ...@@ -356,9 +357,9 @@ def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLa
for iterIndex, multiclassiterResult in enumerate(multiclassResults): for iterIndex, multiclassiterResult in enumerate(multiclassResults):
for key, value in multiclassiterResult.items(): for key, value in multiclassiterResult.items():
multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)} multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)}
multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, argumentDictionaries) multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries)
multiclassResults = getErrorOnLabels(multiclassResults, multiclassLabels) multiclassResults = getErrorOnLabels(multiclassResults, multiclassLabels)
publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDictionaries) publishMulticlassResults(multiclassResults, metrics, statsIter, benchmarkArgumentDictionaries)
return multiclassResults return multiclassResults
...@@ -372,11 +373,11 @@ def analyzeIter(results): ...@@ -372,11 +373,11 @@ def analyzeIter(results):
pass pass
def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclassLabels, metrics): def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics):
if statsIter > 1: if statsIter > 1:
if nbMulticlass > 1: if nbMulticlass > 1:
analyzeBiclass(results) analyzeBiclass(results)
multiclassResults = analyzeMulticlass(results, statsIter, argumentDictionaries, multiclassLabels, metrics) multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels, metrics)
analyzeIter(multiclassResults) analyzeIter(multiclassResults)
else: else:
biclassResults = analyzeBiclass(results) biclassResults = analyzeBiclass(results)
...@@ -390,15 +391,25 @@ def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclas ...@@ -390,15 +391,25 @@ def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclas
def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore, benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests""" ExecMultiview_multicore args are only used for tests"""
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices, testIndices = classificationIndices
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
resultsMonoview = [] resultsMonoview = []
labelsNames = list(LABELS_DICTIONARY.values()) labelsNames = list(LABELS_DICTIONARY.values())
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState, coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, hyperParamSearch=hyperParamSearch, metrics=metrics,
nIter=args.CL_GS_iter, **argument) nIter=args.CL_GS_iter, **argument)
for argument in argumentDictionaries["Monoview"]] for argument in argumentDictionaries["Monoview"]]
...@@ -409,7 +420,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class ...@@ -409,7 +420,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
resultsMultiview = [] resultsMultiview = []
resultsMultiview += [ resultsMultiview += [
ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, **arguments) metrics=metrics, nIter=args.CL_GS_iter, **arguments)
for arguments in argumentDictionaries["Multiview"]] for arguments in argumentDictionaries["Multiview"]]
return [flag, resultsMonoview, resultsMultiview] return [flag, resultsMonoview, resultsMultiview]
...@@ -417,9 +428,21 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class ...@@ -417,9 +428,21 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore, benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): ExecMonoview_multicore=ExecMonoview_multicore,
ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices, testIndices = classificationIndices
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview = [] resultsMonoview = []
labelsNames = list(LABELS_DICTIONARY.values()) labelsNames = list(LABELS_DICTIONARY.values())
...@@ -429,7 +452,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non ...@@ -429,7 +452,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
for stepIndex in range(nbMulticoreToDo): for stepIndex in range(nbMulticoreToDo):
resultsMonoview += (Parallel(n_jobs=nbCores)( resultsMonoview += (Parallel(n_jobs=nbCores)(
delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds, delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState, coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, metrics=metrics, nIter=args.CL_GS_iter,
**argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
...@@ -444,7 +467,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non ...@@ -444,7 +467,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
for stepIndex in range(nbMulticoreToDo): for stepIndex in range(nbMulticoreToDo):
resultsMultiview += Parallel(n_jobs=nbCores)( resultsMultiview += Parallel(n_jobs=nbCores)(
delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
args.type, args.pathF, LABELS_DICTIONARY, randomState, args.type, args.pathF, LABELS_DICTIONARY, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter,
**argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
...@@ -452,8 +475,17 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non ...@@ -452,8 +475,17 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
return [flag, resultsMonoview, resultsMultiview] return [flag, resultsMonoview, resultsMultiview]
def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, def execOneBenchmarkMonoCore(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore): kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
pass
def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore,
execOneBenchmarkMonoCore=execOneBenchmarkMonoCore):
"""Used to execute the needed benchmark(s) on multicore or mono-core functions """Used to execute the needed benchmark(s) on multicore or mono-core functions
The execOneBenchmark and execOneBenchmark_multicore keywords args are only used in the tests""" The execOneBenchmark and execOneBenchmark_multicore keywords args are only used in the tests"""
# TODO : find a way to flag # TODO : find a way to flag
...@@ -462,30 +494,29 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multi ...@@ -462,30 +494,29 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multi
results = [] results = []
if nbCores > 1: if nbCores > 1:
if statsIter > 1 or nbMulticlass > 1: if statsIter > 1 or nbMulticlass > 1:
nbExpsToDo = nbMulticlass*statsIter nbExpsToDo = len(benchmarkArgumentsDictionaries)
nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores))) nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores)))
for stepIndex in nbMulticoreToDo: for stepIndex in nbMulticoreToDo:
results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark) results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark)
(coreIndex=coreIndex, (coreIndex=coreIndex,
**argumentsDictionaries[coreIndex + stepIndex * nbCores]) **benchmarkArgumentsDictionaries[coreIndex + stepIndex * nbCores])
for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores)))) for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores))))
else: else:
results += [execOneBenchmark_multicore(nbCores=nbCores, **argumentsDictionaries[0])] results += [execOneBenchmark_multicore(nbCores=nbCores, **benchmarkArgumentsDictionaries[0])]
else: else:
for arguments in argumentsDictionaries: for arguments in benchmarkArgumentsDictionaries:
results += [execOneBenchmark(**arguments)] results += [execOneBenchmarkMonoCore(**arguments)]
logging.debug("Done:\t Executing all the needed biclass benchmarks") logging.debug("Done:\t Executing all the needed biclass benchmarks")
# Do everything with flagging # Do everything with flagging
logging.debug("Start:\t Analyzing preds") logging.debug("Start:\t Analyzing preds")
# getResults(results, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, metrics) # getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassLabels, metrics)
logging.debug("Done:\t Analyzing preds") logging.debug("Done:\t Analyzing preds")
return results return results
def execClassif(arguments): def execClassif(arguments):
"""Main function to execute the benchmark""" """Main function to execute the benchmark"""
start = time.time() start = time.time()
...@@ -502,7 +533,7 @@ def execClassif(arguments): ...@@ -502,7 +533,7 @@ def execClassif(arguments):
if statsIter > 1: if statsIter > 1:
statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)] statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)]
else: else:
statsIterRandomStates = randomState statsIterRandomStates = [randomState]
if args.name not in ["Fake", "Plausible"]: if args.name not in ["Fake", "Plausible"]:
getDatabase = getattr(DB, "getClassicDB" + args.type[1:]) getDatabase = getattr(DB, "getClassicDB" + args.type[1:])
...@@ -512,9 +543,9 @@ def execClassif(arguments): ...@@ -512,9 +543,9 @@ def execClassif(arguments):
DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass, DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass,
args.CL_classes) args.CL_classes)
classificationIndices = execution.genSplits(statsIter, DATASET.get("Labels").value, args.CL_split, statsIterRandomStates) classificationIndices = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)
multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod) multiclassLabels, labelsCombinations, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod, classificationIndices)
kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates) kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)
...@@ -549,9 +580,36 @@ def execClassif(arguments): ...@@ -549,9 +580,36 @@ def execClassif(arguments):
argumentDictionaries = {"Monoview": [], "Multiview": []} argumentDictionaries = {"Monoview": [], "Multiview": []}
argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS, argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS,
initKWARGS) initKWARGS)
directories = execution.genDirecortiesNames(directory, statsIter, labelsIndices, directories = execution.genDirecortiesNames(directory, statsIter, labelsCombinations,
multiclassMethod, LABELS_DICTIONARY) multiclassMethod, LABELS_DICTIONARY)
# TODO : Gen arguments dictionaries # TODO : Gen arguments dictionaries
benchmarkArgumentDictionaries = execution.genArgumentDictionaries(LABELS_DICTIONARY, directories, multiclassLabels,
labelsCombinations, oldIndicesMulticlass,
hyperParamSearch, args, kFolds,
statsIterRandomStates, metrics,
argumentDictionaries, benchmark)
nbMulticlass = len(labelsCombinations)
execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentDictionaries)
if statsIter > 1: if statsIter > 1:
logging.debug("Start:\t Benchmark classification") logging.debug("Start:\t Benchmark classification")
......
...@@ -238,12 +238,9 @@ def initLogFile(args): ...@@ -238,12 +238,9 @@ def initLogFile(args):
return resultDirectory return resultDirectory
def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMethod): def genSplits(labels, splitRatio, statsIterRandomStates):
"""Used to gen the train/test splits using one or multiple random states""" """Used to gen the train/test splits using one or multiple random states"""
for oldIndices, labels in zip(oldIndicesMulticlass, multiclasslabels): indices = np.arange(len(labels))
indices = oldIndices
splitsMulticlass = []
if statsIter > 1:
splits = [] splits = []
for randomState in statsIterRandomStates: for randomState in statsIterRandomStates:
foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,
...@@ -255,16 +252,8 @@ def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMe ...@@ -255,16 +252,8 @@ def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMe
trainIndices = indices[train_fold] trainIndices = indices[train_fold]
testIndices = indices[test_fold] testIndices = indices[test_fold]
splits.append([trainIndices, testIndices]) splits.append([trainIndices, testIndices])
splitsMulticlass.append(splits)
else: return splits
foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=statsIterRandomStates, test_size=splitRatio)
folds = foldsObj.split(indices, labels)
for fold in folds:
train_fold, test_fold = fold
trainIndices = indices[train_fold]
testIndices = indices[test_fold]
splitsMulticlass.append((trainIndices, testIndices))
return splitsMulticlass
def genKFolds(statsIter, nbFolds, statsIterRandomStates): def genKFolds(statsIter, nbFolds, statsIterRandomStates):
...@@ -322,3 +311,32 @@ def genDirecortiesNames(directory, statsIter, labelsIndices, multiclassMethod, l ...@@ -322,3 +311,32 @@ def genDirecortiesNames(directory, statsIter, labelsIndices, multiclassMethod, l
labelName = labelDictionary[labelIndex] labelName = labelDictionary[labelIndex]
directories.append(directory +labelName+"_vs_Rest/") directories.append(directory +labelName+"_vs_Rest/")
return directories return directories
def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, labelsCombinations, oldIndicesMulticlass, hyperParamSearch, args,
kFolds, statsIterRandomStates, metrics, argumentDictionaries, benchmark):
benchmarkArgumentDictionaries = []
for combinationIndex, labelsCombination in enumerate(labelsCombinations):
for iterIndex, iterRandomState in enumerate(statsIterRandomStates):
benchmarkArgumentDictionary = {"LABELS_DICTIONARY": {0:labelsDictionary[labelsCombination[0]],
1:labelsDictionary[labelsCombination[1]]},
"directory": directories[iterIndex]+
labelsDictionary[labelsCombination[0]]+
"vs"+
labelsDictionary[labelsCombination[1]]+"/",
"classificationIndices": oldIndicesMulticlass[combinationIndex][iterIndex],
"args": args,
"labels": multiclassLabels[combinationIndex],
"kFolds": kFolds[iterIndex],
"randomState": iterRandomState,
"hyperParamSearch": hyperParamSearch,
"metrics": metrics,
"argumentDictionaries": argumentDictionaries,
"benchmark": benchmark,
"views": None,
"viewsIndices": None,
"flag": [iterIndex, labelsCombination]}
benchmarkArgumentDictionaries.append(benchmarkArgumentDictionary)
return benchmarkArgumentDictionaries
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment