Select Git revision
ExecClassif.py
ExecClassif.py 40.69 KiB
import errno
import logging
import math
import os
import pkgutil
import time
import matplotlib
import numpy as np
from joblib import Parallel, delayed
matplotlib.use(
'Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure
# Import own modules
from . import MonoviewClassifiers
from . import MultiviewClassifiers
from .Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore
from .Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore
from .utils import GetMultiviewDb as DB
from .ResultAnalysis import \
getResults # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes,
from .utils import execution, Dataset, Multiclass
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args):
r"""Used to create a list of all the algorithm packages names used for the benchmark.
First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right
dictionary the asked algorithms. If none is asked by the user, all will be added.
If the keyword `"Benchmark"` is used, all mono- and multiview algorithms will be added.
Parameters
----------
CL_type : List of string
List of types of needed benchmark
multiviewAlgos : List of strings
List of multiview algorithms needed for the benchmark
monoviewAlgos : Listof strings
List of monoview algorithms needed for the benchmark
args : ParsedArgumentParser args
All the input args (used to tune the algorithms)
Returns
-------
benchmark : Dictionary of dictionaries
Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark.
"""
benchmark = {"Monoview": {}, "Multiview": {}}
allMultiviewPackages = [name for _, name, isPackage
in pkgutil.iter_modules(
['./MonoMultiViewClassifiers/MultiviewClassifiers/']) if isPackage]
if "Monoview" in CL_type:
if monoviewAlgos == ['']:
benchmark["Monoview"] = [name for _, name, isPackage in
pkgutil.iter_modules([
"./MonoMultiViewClassifiers/MonoviewClassifiers"])
if not isPackage]
else:
benchmark["Monoview"] = monoviewAlgos
if "Multiview" in CL_type:
benchmark["Multiview"] = {}
if multiviewAlgos == [""]:
algosMutliview = allMultiviewPackages
else:
algosMutliview = multiviewAlgos
for multiviewPackageName in allMultiviewPackages:
if multiviewPackageName in algosMutliview:
multiviewPackage = getattr(MultiviewClassifiers,
multiviewPackageName)
multiviewModule = getattr(multiviewPackage,
multiviewPackageName + "Module")
benchmark = multiviewModule.getBenchmark(benchmark, args=args)
if CL_type == ["Benchmark"]:
allMonoviewAlgos = [name for _, name, isPackage in
pkgutil.iter_modules([
'./MonoMultiViewClassifiers/MonoviewClassifiers'])
if (not isPackage) and name not in ["framework"]]
benchmark["Monoview"] = allMonoviewAlgos
benchmark["Multiview"] = dict(
(multiviewPackageName, "_") for multiviewPackageName in
allMultiviewPackages)
for multiviewPackageName in allMultiviewPackages:
multiviewPackage = getattr(MultiviewClassifiers,
multiviewPackageName)
multiviewModule = getattr(multiviewPackage,
multiviewPackageName + "Module")
benchmark = multiviewModule.getBenchmark(benchmark, args=args)
return benchmark
def genViewsDictionnary(DATASET, views):
r"""Used to generate a dictionary mapping a view name (key) to it's index in the dataset (value).
Parameters
----------
DATASET : `h5py` dataset file
The full dataset on which the benchmark will be done
views : List of strings
Names of the selected views on which the banchmark will be done
Returns
-------
viewDictionary : Dictionary
Dictionary mapping the view names totheir indexin the full dataset.
"""
datasetsNames = DATASET.keys()
viewsDictionary = {}
for datasetName in datasetsNames:
if datasetName[:4] == "View":
viewName = DATASET.get(datasetName).attrs["name"]
if type(viewName) == bytes:
viewName = viewName.decode("utf-8")
if viewName in views:
viewsDictionary[viewName] = int(datasetName[4:])
return viewsDictionary
def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit):
r"""Used to add each monoview exeperience args to the list of monoview experiences args.
First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right
dictionary the asked algorithms. If none is asked by the user, all will be added.
If the keyword `"Benchmark"` is used, all mono- and multiview algorithms will be added.
Parameters
----------
benchmark : dictionary
All types of monoview and multiview experiments that have to be benchmarked
argumentDictionaries : dictionary
Maps monoview and multiview experiments arguments.
viewDictionary : dictionary
Maps the view names to their index in the HDF5 dataset
nbClass : integer
Number of different labels in the classification
Returns
-------
benchmark : Dictionary of dictionaries
Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark.
"""
argumentDictionaries = {"Monoview": [], "Multiview": []}
if benchmark["Monoview"]:
argumentDictionaries["Monoview"] = []
for viewName, viewIndex in viewsDictionary.items():
for classifier in benchmark["Monoview"]:
arguments = {
"args": {classifier + "KWARGS": kwargsInit[
classifier + "KWARGSInit"], "feat": viewName,
"CL_type": classifier, "nbClass": nbClass},
"viewIndex": viewIndex}
argumentDictionaries["Monoview"].append(arguments)
return argumentDictionaries
def initMonoviewKWARGS(args, classifiersNames):
r"""Used to init kwargs thanks to a function in each monoview classifier package.
Parameters
----------
args : parsed args objects
All the args passed by the user.
classifiersNames : list of strings
List of the benchmarks's monoview classifiers names.
Returns
-------
monoviewKWARGS : Dictionary of dictionaries
Dictionary resuming all the specific arguments for the benchmark, one dictionary for each classifier.
For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`"""
logging.debug("Start:\t Initializing Monoview classifiers arguments")
monoviewKWARGS = {}
for classifiersName in classifiersNames:
try:
classifierModule = getattr(MonoviewClassifiers, classifiersName)
except AttributeError:
raise AttributeError(
classifiersName + " is not implemented in MonoviewClassifiers, "
"please specify the name of the file in MonoviewClassifiers")
monoviewKWARGS[
classifiersName + "KWARGSInit"] = classifierModule.formatCmdArgs(
args)
logging.debug("Done:\t Initializing Monoview classifiers arguments")
return monoviewKWARGS
def initKWARGSFunc(args, benchmark):
monoviewKWARGS = initMonoviewKWARGS(args, benchmark["Monoview"])
return monoviewKWARGS
def initMultiviewArguments(args, benchmark, views, viewsIndices,
argumentDictionaries, randomState, directory,
resultsMonoview, classificationIndices):
"""Used to add each monoview exeperience args to the list of monoview experiences args"""
logging.debug("Start:\t Initializing Multiview classifiers arguments")
multiviewArguments = []
if "Multiview" in benchmark:
for multiviewAlgoName in benchmark["Multiview"]:
multiviewPackage = getattr(MultiviewClassifiers, multiviewAlgoName)
mutliviewModule = getattr(multiviewPackage,
multiviewAlgoName + "Module")
multiviewArguments += mutliviewModule.getArgs(args, benchmark,
views, viewsIndices,
randomState,
directory,
resultsMonoview,
classificationIndices)
argumentDictionaries["Multiview"] = multiviewArguments
logging.debug("Start:\t Initializing Multiview classifiers arguments")
return argumentDictionaries
def arangeMetrics(metrics, metricPrinc):
"""Used to get the metrics list in the right order so that
the first one is the principal metric specified in args"""
if [metricPrinc] in metrics:
metricIndex = metrics.index([metricPrinc])
firstMetric = metrics[0]
metrics[0] = [metricPrinc]
metrics[metricIndex] = firstMetric
else:
raise AttributeError(metricPrinc + " not in metric pool")
return metrics
def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY,
kFolds):
logging.debug("Start:\t Benchmark initialization")
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices = classificationIndices[0]
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
np.savetxt(directory + "train_indices.csv", classificationIndices[0],
delimiter=",")
resultsMonoview = []
folds = kFolds.split(np.arange(len(trainLabels)), trainLabels)
minFoldLen = int(len(trainLabels) / kFolds.n_splits)
for foldIndex, (trainCVIndices, testCVIndices) in enumerate(folds):
fileName = directory + "/folds/test_labels_fold_" + str(
foldIndex) + ".csv"
if not os.path.exists(os.path.dirname(fileName)):
try:
os.makedirs(os.path.dirname(fileName))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
np.savetxt(fileName, trainLabels[testCVIndices[:minFoldLen]],
delimiter=",")
labelsNames = list(LABELS_DICTIONARY.values())
logging.debug("Done:\t Benchmark initialization")
return resultsMonoview, labelsNames
def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None,
classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None,
metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None,
labels=None,
ExecMonoview_multicore=ExecMonoview_multicore,
ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
resultsMonoview, labelsNames = benchmarkInit(directory,
classificationIndices, labels,
LABELS_DICTIONARY, kFolds)
logging.debug("Start:\t Monoview benchmark")
resultsMonoview += [
ExecMonoview_multicore(directory, args.name, labelsNames,
classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState,
labels,
hyperParamSearch=hyperParamSearch,
metrics=metrics,
nIter=args.CL_HPS_iter, **argument)
for argument in argumentDictionaries["Monoview"]]
logging.debug("Done:\t Monoview benchmark")
logging.debug("Start:\t Multiview arguments initialization")
argumentDictionaries = initMultiviewArguments(args, benchmark, views,
viewsIndices,
argumentDictionaries,
randomState, directory,
resultsMonoview,
classificationIndices)
logging.debug("Done:\t Multiview arguments initialization")
logging.debug("Start:\t Multiview benchmark")
resultsMultiview = [
ExecMultiview_multicore(directory, coreIndex, args.name,
classificationIndices, kFolds, args.type,
args.pathF, LABELS_DICTIONARY, randomState,
labels, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_HPS_iter,
**arguments)
for arguments in argumentDictionaries["Multiview"]]
logging.debug("Done:\t Multiview benchmark")
return [flag, resultsMonoview + resultsMultiview]
def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None,
directory=None, classificationIndices=None,
args=None,
kFolds=None, randomState=None,
hyperParamSearch=None, metrics=None,
argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None,
flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore,
ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
resultsMonoview, labelsNames = benchmarkInit(directory,
classificationIndices, labels,
LABELS_DICTIONARY, kFolds)
logging.debug("Start:\t Monoview benchmark")
nbExperiments = len(argumentDictionaries["Monoview"])
nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores))
for stepIndex in range(nbMulticoreToDo):
resultsMonoview += (Parallel(n_jobs=nbCores)(
delayed(ExecMonoview_multicore)(directory, args.name, labelsNames,
classificationIndices, kFolds,
coreIndex, args.type, args.pathF,
randomState, labels,
hyperParamSearch=hyperParamSearch,
metrics=metrics,
nIter=args.CL_HPS_iter,
**argumentDictionaries["Monoview"][
coreIndex + stepIndex * nbCores])
for coreIndex in
range(min(nbCores, nbExperiments - stepIndex * nbCores))))
logging.debug("Done:\t Monoview benchmark")
logging.debug("Start:\t Multiview arguments initialization")
argumentDictionaries = initMultiviewArguments(args, benchmark, views,
viewsIndices,
argumentDictionaries,
randomState, directory,
resultsMonoview,
classificationIndices)
logging.debug("Done:\t Multiview arguments initialization")
logging.debug("Start:\t Multiview benchmark")
resultsMultiview = []
nbExperiments = len(argumentDictionaries["Multiview"])
nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores))
for stepIndex in range(nbMulticoreToDo):
resultsMultiview += Parallel(n_jobs=nbCores)(
delayed(ExecMultiview_multicore)(directory, coreIndex, args.name,
classificationIndices, kFolds,
args.type, args.pathF,
LABELS_DICTIONARY, randomState,
labels,
hyperParamSearch=hyperParamSearch,
metrics=metrics,
nIter=args.CL_HPS_iter,
**
argumentDictionaries["Multiview"][
stepIndex * nbCores + coreIndex])
for coreIndex in
range(min(nbCores, nbExperiments - stepIndex * nbCores)))
logging.debug("Done:\t Multiview benchmark")
return [flag, resultsMonoview + resultsMultiview]
def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None,
directory=None, classificationIndices=None,
args=None,
kFolds=None, randomState=None,
hyperParamSearch=None, metrics=None,
argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None,
flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore,
ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
resultsMonoview, labelsNames = benchmarkInit(directory,
classificationIndices, labels,
LABELS_DICTIONARY, kFolds)
logging.debug("Start:\t Monoview benchmark")
for arguments in argumentDictionaries["Monoview"]:
X = DATASET.get("View" + str(arguments["viewIndex"]))
Y = labels
resultsMonoview += [
ExecMonoview(directory, X, Y, args.name, labelsNames,
classificationIndices, kFolds,
1, args.type, args.pathF, randomState,
hyperParamSearch=hyperParamSearch, metrics=metrics,
nIter=args.CL_HPS_iter, **arguments)]
logging.debug("Done:\t Monoview benchmark")
logging.debug("Start:\t Multiview arguments initialization")
argumentDictionaries = initMultiviewArguments(args, benchmark, views,
viewsIndices,
argumentDictionaries,
randomState, directory,
resultsMonoview,
classificationIndices)
logging.debug("Done:\t Multiview arguments initialization")
logging.debug("Start:\t Multiview benchmark")
resultsMultiview = []
for arguments in argumentDictionaries["Multiview"]:
resultsMultiview += [
ExecMultiview(directory, DATASET, args.name, classificationIndices,
kFolds, 1, args.type,
args.pathF, LABELS_DICTIONARY, randomState, labels,
hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_HPS_iter, **arguments)]
logging.debug("Done:\t Multiview benchmark")
return [flag, resultsMonoview + resultsMultiview]
def execBenchmark(nbCores, statsIter, nbMulticlass,
benchmarkArgumentsDictionaries, classificationIndices,
directories,
directory, multiClassLabels, metrics, labelsDictionary,
nbLabels, DATASET,
execOneBenchmark=execOneBenchmark,
execOneBenchmark_multicore=execOneBenchmark_multicore,
execOneBenchmarkMonoCore=execOneBenchmarkMonoCore,
getResults=getResults, delete=DB.deleteHDF5):
r"""Used to execute the needed benchmark(s) on multicore or mono-core functions.
Parameters
----------
nbCores : int
Number of threads that the benchmarks can use.
statsIter : int
Number of statistical iterations that have to be done.
benchmarkArgumentsDictionaries : list of dictionaries
All the needed arguments for the benchmarks.
classificationIndices : list of lists of numpy.ndarray
For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and
the ones of the testing set.
directories : list of strings
List of the paths to the result directories for each statistical iteration.
directory : string
Path to the main results directory.
multiClassLabels : ist of lists of numpy.ndarray
For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the
indices for the biclass training set, the ones for the biclass testing set and the ones for the
multiclass testing set.
metrics : list of lists
Metrics that will be used to evaluate the algorithms performance.
labelsDictionary : dictionary
Dictionary mapping labels indices to labels names.
nbLabels : int
Total number of different labels in the dataset.
DATASET : HDF5 dataset file
The full dataset that wil be used by the benchmark.
classifiersNames : list of strings
List of the benchmarks's monoview classifiers names.
rest_of_the_args :
Just used for testing purposes
Returns
-------
results : list of lists
The results of the benchmark.
"""
logging.debug("Start:\t Executing all the needed biclass benchmarks")
results = []
if nbCores > 1:
if statsIter > 1 or nbMulticlass > 1:
nbExpsToDo = len(benchmarkArgumentsDictionaries)
nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores)))
for stepIndex in nbMulticoreToDo:
results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark)
(coreIndex=coreIndex,
**
benchmarkArgumentsDictionaries[
coreIndex + stepIndex * nbCores])
for coreIndex in range(
min(nbCores, nbExpsToDo - stepIndex * nbCores))))
else:
results += [execOneBenchmark_multicore(nbCores=nbCores, **
benchmarkArgumentsDictionaries[0])]
else:
for arguments in benchmarkArgumentsDictionaries:
results += [execOneBenchmarkMonoCore(DATASET=DATASET, **arguments)]
logging.debug("Done:\t Executing all the needed biclass benchmarks")
if nbCores > 1:
logging.debug("Start:\t Deleting " + str(
nbCores) + " temporary datasets for multiprocessing")
args = benchmarkArgumentsDictionaries[0]["args"]
datasetFiles = delete(args.pathF, args.name, nbCores)
logging.debug("Start:\t Deleting datasets for multiprocessing")
# Do everything with flagging
nbExamples = len(classificationIndices[0][0]) + len(
classificationIndices[0][1])
multiclassGroundTruth = DATASET.get("Labels").value
logging.debug("Start:\t Analyzing predictions")
getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
multiclassGroundTruth, metrics, classificationIndices,
directories, directory, labelsDictionary, nbExamples, nbLabels)
logging.debug("Done:\t Analyzing predictions")
return results
def execClassif(arguments):
"""Main function to execute the benchmark"""
start = time.time()
args = execution.parseTheArgs(arguments)
os.nice(args.nice)
nbCores = args.nbCores
if nbCores == 1:
os.environ['OPENBLAS_NUM_THREADS'] = '1'
statsIter = args.CL_statsiter
hyperParamSearch = args.CL_HPS_type
multiclassMethod = args.CL_multiclassMethod
CL_type = args.CL_type
monoviewAlgos = args.CL_algos_monoview
multiviewAlgos = args.CL_algos_multiview
directory = execution.initLogFile(args.name, args.views, args.CL_type,
args.log, args.debug, args.label,
args.res_dir)
randomState = execution.initRandomState(args.randomState, directory)
statsIterRandomStates = execution.initStatsIterRandomStates(statsIter,
randomState)
getDatabase = execution.getDatabaseFunction(args.name, args.type)
DATASET, LABELS_DICTIONARY, datasetname = getDatabase(args.views,
args.pathF, args.name,
args.CL_nbClass,
args.CL_classes,
randomState,
args.full,
args.add_noise,
args.noise_std)
args.name = datasetname
splits = execution.genSplits(DATASET.get("Labels").value, args.CL_split,
statsIterRandomStates)
multiclassLabels, labelsCombinations, indicesMulticlass = Multiclass.genMulticlassLabels(
DATASET.get("Labels").value, multiclassMethod, splits)
kFolds = execution.genKFolds(statsIter, args.CL_nbFolds,
statsIterRandomStates)
datasetFiles = Dataset.initMultipleDatasets(args.pathF, args.name, nbCores)
# if not views:
# raise ValueError("Empty views list, modify selected views to match dataset " + args.views)
views, viewsIndices, allViews = execution.initViews(DATASET, args.views)
viewsDictionary = genViewsDictionnary(DATASET, views)
nbViews = len(views)
NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
metrics = [metric.split(":") for metric in args.CL_metrics]
if metrics == [[""]]:
metricsNames = [name for _, name, isPackage
in pkgutil.iter_modules(
['./MonoMultiViewClassifiers/Metrics']) if
not isPackage and name not in ["framework", "log_loss",
"matthews_corrcoef",
"roc_auc_score"]]
metrics = [[metricName] for metricName in metricsNames]
metrics = arangeMetrics(metrics, args.CL_metric_princ)
for metricIndex, metric in enumerate(metrics):
if len(metric) == 1:
metrics[metricIndex] = [metric[0], None]
benchmark = initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args)
initKWARGS = initKWARGSFunc(args, benchmark)
dataBaseTime = time.time() - start
argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary,
NB_CLASS, initKWARGS)
directories = execution.genDirecortiesNames(directory, statsIter)
benchmarkArgumentDictionaries = execution.genArgumentDictionaries(
LABELS_DICTIONARY, directories, multiclassLabels,
labelsCombinations, indicesMulticlass,
hyperParamSearch, args, kFolds,
statsIterRandomStates, metrics,
argumentDictionaries, benchmark, nbViews,
views, viewsIndices)
nbMulticlass = len(labelsCombinations)
execBenchmark(nbCores, statsIter, nbMulticlass,
benchmarkArgumentDictionaries, splits, directories,
directory, multiclassLabels, metrics, LABELS_DICTIONARY,
NB_CLASS, DATASET)
#
# def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices,
# kFolds,
# randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start,
# benchmark,
# views):
# """Used to execute mono and multiview classification and result analysis for one random state
# using multicore classification"""
# resultsMonoview = []
# labelsNames = LABELS_DICTIONARY.values()
# np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
#
# resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
# coreIndex, args.type, args.pathF, randomState,
# hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_HPS_iter,
# **arguments)
# for arguments in argumentDictionaries["Monoview"]]
# monoviewTime = time.time() - dataBaseTime - start
#
# argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries,
# randomState, directory, resultsMonoview, classificationIndices)
#
# resultsMultiview = []
# resultsMultiview += [
# ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
# args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_HPS_iter, **arguments)
# for arguments in argumentDictionaries["Multiview"]]
# multiviewTime = time.time() - monoviewTime - dataBaseTime - start
#
# labels = np.array(
# [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in
# resultsMultiview]).transpose()
# DATASET = h5py.File(args.pathF + args.name + str(0) + ".hdf5", "r")
# trueLabels = DATASET.get("Labels").value
# times = [dataBaseTime, monoviewTime, multiviewTime]
# results = (resultsMonoview, resultsMultiview)
# labelAnalysis = analyzeLabels(labels, trueLabels, results, directory)
# logging.debug("Start:\t Analyze Iteration Results")
# resultAnalysis(benchmark, results, args.name, times, metrics, directory)
# logging.debug("Done:\t Analyze Iteration Results")
# globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime
# totalTime = time.time() - start
# logging.info("Extraction time : " + str(int(dataBaseTime)) +
# "s, Monoview time : " + str(int(monoviewTime)) +
# "s, Multiview Time : " + str(int(multiviewTime)) +
# "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) +
# "s, Iteration Duration : " + str(int(totalTime)) + "s")
# return results, labelAnalysis
#
#
# def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, kFolds,
# randomState, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start,
# benchmark, views):
# """Used to execute mono and multiview classification and result analysis for one random state
# classification"""
# #TODO : Clarify this one
#
#
# argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries,
# randomState, directory, resultsMonoview, classificationIndices)
#
# resultsMultiview = []
# if nbCores > 1:
# nbExperiments = len(argumentDictionaries["Multiview"])
# for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))):
# resultsMultiview += Parallel(n_jobs=nbCores)(
# delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
# args.type,
# args.pathF,
# LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_HPS_iter,
# **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
# for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
# else:
# resultsMultiview = [
# ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF,
# LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_HPS_iter, **arguments) for arguments in
# argumentDictionaries["Multiview"]]
# multiviewTime = time.time() - monoviewTime - dataBaseTime - start
# if nbCores > 1:
# logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing")
# datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores)
# logging.debug("Start:\t Deleting datasets for multiprocessing")
# labels = np.array(
# [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in
# resultsMultiview]).transpose()
# trueLabels = DATASET.get("Labels").value
# times = [dataBaseTime, monoviewTime, multiviewTime]
# results = (resultsMonoview, resultsMultiview)
# labelAnalysis = analyzeLabels(labels, trueLabels, results, directory)
# logging.debug("Start:\t Analyze Iteration Results")
# resultAnalysis(benchmark, results, args.name, times, metrics, directory)
# logging.debug("Done:\t Analyze Iteration Results")
# globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime
# totalTime = time.time() - start
# logging.info("Extraction time : " + str(int(dataBaseTime)) +
# "s, Monoview time : " + str(int(monoviewTime)) +
# "s, Multiview Time : " + str(int(multiviewTime)) +
# "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) +
# "s, Iteration Duration : " + str(int(totalTime)) + "s")
# return results, labelAnalysis
#
#
#
#
#
#
#
# if statsIter > 1:
# logging.debug("Start:\t Benchmark classification")
# for statIterIndex in range(statsIter):
# if not os.path.exists(os.path.dirname(directories[statIterIndex] + "train_labels.csv")):
# try:
# os.makedirs(os.path.dirname(directories[statIterIndex] + "train_labels.csv"))
# except OSError as exc:
# if exc.errno != errno.EEXIST:
# raise
# trainIndices, testIndices = classificationIndices[statIterIndex]
# trainLabels = DATASET.get("Labels").value[trainIndices]
# np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",")
# if nbCores > 1:
# iterResults = []
# nbExperiments = statsIter*len(multiclassLabels)
# for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))):
# iterResults += (Parallel(n_jobs=nbCores)(
# delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1,
# directories[coreIndex + stepIndex * nbCores], args,
# classificationIndices[coreIndex + stepIndex * nbCores],
# kFolds[coreIndex + stepIndex * nbCores],
# statsIterRandomStates[coreIndex + stepIndex * nbCores],
# hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime,
# start, benchmark,
# views)
# for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))))
# logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing")
# datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores)
# logging.debug("Start:\t Deleting datasets for multiprocessing")
# else:
# iterResults = []
# for iterIndex in range(statsIter):
# if not os.path.exists(os.path.dirname(directories[iterIndex] + "train_labels.csv")):
# try:
# os.makedirs(os.path.dirname(directories[iterIndex] + "train_labels.csv"))
# except OSError as exc:
# if exc.errno != errno.EEXIST:
# raise
# trainIndices, testIndices = classificationIndices[iterIndex]
# trainLabels = DATASET.get("Labels").value[trainIndices]
# np.savetxt(directories[iterIndex] + "train_labels.csv", trainLabels, delimiter=",")
# iterResults.append(
# classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args,
# classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex],
# hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark,
# views))
# logging.debug("Done:\t Benchmark classification")
# logging.debug("Start:\t Global Results Analysis")
# classifiersIterResults = []
# iterLabelAnalysis = []
# for result in iterResults:
# classifiersIterResults.append(result[0])
# iterLabelAnalysis.append(result[1])
#
# mono,multi = classifiersIterResults[0]
# classifiersNames = genNamesFromRes(mono, multi)
# analyzeIterLabels(iterLabelAnalysis, directory, classifiersNames)
# analyzeIterResults(classifiersIterResults, args.name, metrics, directory)
# logging.debug("Done:\t Global Results Analysis")
# totalDur = time.time() - start
# m, s = divmod(totalDur, 60)
# h, m = divmod(m, 60)
# d, h = divmod(h, 24)
# # print "%d_%02d_%02d" % (h, m, s)
# logging.info("Info:\t Total duration : " + str(d) + " days, " + str(h) + " hours, " + str(m) + " mins, " + str(
# int(s)) + "secs.")
#
# else:
# logging.debug("Start:\t Benchmark classification")
# if not os.path.exists(os.path.dirname(directories + "train_labels.csv")):
# try:
# os.makedirs(os.path.dirname(directories + "train_labels.csv"))
# except OSError as exc:
# if exc.errno != errno.EEXIST:
# raise
# trainIndices, testIndices = classificationIndices
# trainLabels = DATASET.get("Labels").value[trainIndices]
# np.savetxt(directories + "train_labels.csv", trainLabels, delimiter=",")
# res, labelAnalysis = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices,
# kFolds,
# statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start,
# benchmark, views)
# logging.debug("Done:\t Benchmark classification")
# totalDur = time.time()-start
# m, s = divmod(totalDur, 60)
# h, m = divmod(m, 60)
# d, h = divmod(h, 24)
# # print "%d_%02d_%02d" % (h, m, s)
# logging.info("Info:\t Total duration : "+str(d)+ " days, "+str(h)+" hours, "+str(m)+" mins, "+str(int(s))+"secs.")
#
# if statsIter > 1:
# pass