Skip to content
Snippets Groups Projects
Select Git revision
  • 4a3e213bb8a06410bb1e5e72638e5a993ef3a126
  • master default
  • object
  • develop protected
  • private_algos
  • cuisine
  • SMOTE
  • revert-76c4cca5
  • archive protected
  • no_graphviz
  • 0.0.2
  • 0.0.1
12 results

ExecClassif.py

Blame
  • Baptiste Bauvin's avatar
    Baptiste Bauvin authored
    This reverts commit c38d163b.
    4a3e213b
    History
    ExecClassif.py 40.69 KiB
    import errno
    import logging
    import math
    import os
    import pkgutil
    import time
    
    import matplotlib
    import numpy as np
    from joblib import Parallel, delayed
    
    matplotlib.use(
        'Agg')  # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure
    
    # Import own modules
    from . import MonoviewClassifiers
    from . import MultiviewClassifiers
    from .Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore
    from .Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore
    from .utils import GetMultiviewDb as DB
    from .ResultAnalysis import \
        getResults  # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes,
    from .utils import execution, Dataset, Multiclass
    
    # Author-Info
    __author__ = "Baptiste Bauvin"
    __status__ = "Prototype"  # Production, Development, Prototype
    
    
    def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args):
        r"""Used to create a list of all the algorithm packages names used for the benchmark.
    
        First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right
        dictionary the asked algorithms. If none is asked by the user, all will be added.
    
        If the keyword `"Benchmark"` is used, all mono- and multiview algorithms will be added.
    
        Parameters
        ----------
        CL_type : List of string
            List of types of needed benchmark
        multiviewAlgos : List of strings
            List of multiview algorithms needed for the benchmark
        monoviewAlgos : Listof strings
            List of monoview algorithms needed for the benchmark
        args : ParsedArgumentParser args
            All the input args (used to tune the algorithms)
    
        Returns
        -------
        benchmark : Dictionary of dictionaries
            Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark.
        """
        benchmark = {"Monoview": {}, "Multiview": {}}
        allMultiviewPackages = [name for _, name, isPackage
                                in pkgutil.iter_modules(
                ['./MonoMultiViewClassifiers/MultiviewClassifiers/']) if isPackage]
    
        if "Monoview" in CL_type:
            if monoviewAlgos == ['']:
                benchmark["Monoview"] = [name for _, name, isPackage in
                                         pkgutil.iter_modules([
                                                                  "./MonoMultiViewClassifiers/MonoviewClassifiers"])
                                         if not isPackage]
    
            else:
                benchmark["Monoview"] = monoviewAlgos
    
        if "Multiview" in CL_type:
            benchmark["Multiview"] = {}
            if multiviewAlgos == [""]:
                algosMutliview = allMultiviewPackages
            else:
                algosMutliview = multiviewAlgos
            for multiviewPackageName in allMultiviewPackages:
                if multiviewPackageName in algosMutliview:
                    multiviewPackage = getattr(MultiviewClassifiers,
                                               multiviewPackageName)
                    multiviewModule = getattr(multiviewPackage,
                                              multiviewPackageName + "Module")
                    benchmark = multiviewModule.getBenchmark(benchmark, args=args)
    
        if CL_type == ["Benchmark"]:
            allMonoviewAlgos = [name for _, name, isPackage in
                                pkgutil.iter_modules([
                                                         './MonoMultiViewClassifiers/MonoviewClassifiers'])
                                if (not isPackage) and name not in ["framework"]]
            benchmark["Monoview"] = allMonoviewAlgos
            benchmark["Multiview"] = dict(
                (multiviewPackageName, "_") for multiviewPackageName in
                allMultiviewPackages)
            for multiviewPackageName in allMultiviewPackages:
                multiviewPackage = getattr(MultiviewClassifiers,
                                           multiviewPackageName)
                multiviewModule = getattr(multiviewPackage,
                                          multiviewPackageName + "Module")
                benchmark = multiviewModule.getBenchmark(benchmark, args=args)
    
        return benchmark
    
    
    def genViewsDictionnary(DATASET, views):
        r"""Used to generate a dictionary mapping a view name (key) to it's index in the dataset (value).
    
        Parameters
        ----------
        DATASET : `h5py` dataset file
            The full dataset on which the benchmark will be done
        views : List of strings
            Names of the selected views on which the banchmark will be done
    
        Returns
        -------
        viewDictionary : Dictionary
            Dictionary mapping the view names totheir indexin the full dataset.
            """
        datasetsNames = DATASET.keys()
        viewsDictionary = {}
        for datasetName in datasetsNames:
            if datasetName[:4] == "View":
                viewName = DATASET.get(datasetName).attrs["name"]
                if type(viewName) == bytes:
                    viewName = viewName.decode("utf-8")
                if viewName in views:
                    viewsDictionary[viewName] = int(datasetName[4:])
    
        return viewsDictionary
    
    
    def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit):
        r"""Used to add each monoview exeperience args to the list of monoview experiences args.
    
        First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right
        dictionary the asked algorithms. If none is asked by the user, all will be added.
    
        If the keyword `"Benchmark"` is used, all mono- and multiview algorithms will be added.
    
        Parameters
        ----------
        benchmark : dictionary
            All types of monoview and multiview experiments that have to be benchmarked
        argumentDictionaries : dictionary
            Maps monoview and multiview experiments arguments.
        viewDictionary : dictionary
            Maps the view names to their index in the HDF5 dataset
        nbClass : integer
            Number of different labels in the classification
    
        Returns
        -------
        benchmark : Dictionary of dictionaries
            Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark.
        """
        argumentDictionaries = {"Monoview": [], "Multiview": []}
        if benchmark["Monoview"]:
            argumentDictionaries["Monoview"] = []
            for viewName, viewIndex in viewsDictionary.items():
                for classifier in benchmark["Monoview"]:
                    arguments = {
                        "args": {classifier + "KWARGS": kwargsInit[
                            classifier + "KWARGSInit"], "feat": viewName,
                                 "CL_type": classifier, "nbClass": nbClass},
                        "viewIndex": viewIndex}
                    argumentDictionaries["Monoview"].append(arguments)
        return argumentDictionaries
    
    
    def initMonoviewKWARGS(args, classifiersNames):
        r"""Used to init kwargs thanks to a function in each monoview classifier package.
    
        Parameters
        ----------
        args : parsed args objects
            All the args passed by the user.
        classifiersNames : list of strings
            List of the benchmarks's monoview classifiers names.
    
        Returns
        -------
        monoviewKWARGS : Dictionary of dictionaries
            Dictionary resuming all the specific arguments for the benchmark, one dictionary for each classifier.
    
            For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`"""
    
        logging.debug("Start:\t Initializing Monoview classifiers arguments")
        monoviewKWARGS = {}
        for classifiersName in classifiersNames:
            try:
                classifierModule = getattr(MonoviewClassifiers, classifiersName)
            except AttributeError:
                raise AttributeError(
                    classifiersName + " is not implemented in MonoviewClassifiers, "
                                      "please specify the name of the file in MonoviewClassifiers")
            monoviewKWARGS[
                classifiersName + "KWARGSInit"] = classifierModule.formatCmdArgs(
                args)
        logging.debug("Done:\t Initializing Monoview classifiers arguments")
        return monoviewKWARGS
    
    
    def initKWARGSFunc(args, benchmark):
        monoviewKWARGS = initMonoviewKWARGS(args, benchmark["Monoview"])
        return monoviewKWARGS
    
    
    def initMultiviewArguments(args, benchmark, views, viewsIndices,
                               argumentDictionaries, randomState, directory,
                               resultsMonoview, classificationIndices):
        """Used to add each monoview exeperience args to the list of monoview experiences args"""
        logging.debug("Start:\t Initializing Multiview classifiers arguments")
        multiviewArguments = []
        if "Multiview" in benchmark:
            for multiviewAlgoName in benchmark["Multiview"]:
                multiviewPackage = getattr(MultiviewClassifiers, multiviewAlgoName)
                mutliviewModule = getattr(multiviewPackage,
                                          multiviewAlgoName + "Module")
    
                multiviewArguments += mutliviewModule.getArgs(args, benchmark,
                                                              views, viewsIndices,
                                                              randomState,
                                                              directory,
                                                              resultsMonoview,
                                                              classificationIndices)
        argumentDictionaries["Multiview"] = multiviewArguments
        logging.debug("Start:\t Initializing Multiview classifiers arguments")
        return argumentDictionaries
    
    
    def arangeMetrics(metrics, metricPrinc):
        """Used to get the metrics list in the right order so that
        the first one is the principal metric specified in args"""
        if [metricPrinc] in metrics:
            metricIndex = metrics.index([metricPrinc])
            firstMetric = metrics[0]
            metrics[0] = [metricPrinc]
            metrics[metricIndex] = firstMetric
        else:
            raise AttributeError(metricPrinc + " not in metric pool")
        return metrics
    
    
    def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY,
                      kFolds):
        logging.debug("Start:\t Benchmark initialization")
        if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
            try:
                os.makedirs(os.path.dirname(directory + "train_labels.csv"))
            except OSError as exc:
                if exc.errno != errno.EEXIST:
                    raise
        trainIndices = classificationIndices[0]
        trainLabels = labels[trainIndices]
        np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
        np.savetxt(directory + "train_indices.csv", classificationIndices[0],
                   delimiter=",")
        resultsMonoview = []
        folds = kFolds.split(np.arange(len(trainLabels)), trainLabels)
        minFoldLen = int(len(trainLabels) / kFolds.n_splits)
        for foldIndex, (trainCVIndices, testCVIndices) in enumerate(folds):
            fileName = directory + "/folds/test_labels_fold_" + str(
                foldIndex) + ".csv"
            if not os.path.exists(os.path.dirname(fileName)):
                try:
                    os.makedirs(os.path.dirname(fileName))
                except OSError as exc:
                    if exc.errno != errno.EEXIST:
                        raise
            np.savetxt(fileName, trainLabels[testCVIndices[:minFoldLen]],
                       delimiter=",")
        labelsNames = list(LABELS_DICTIONARY.values())
        logging.debug("Done:\t Benchmark initialization")
        return resultsMonoview, labelsNames
    
    
    def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None,
                         classificationIndices=None, args=None,
                         kFolds=None, randomState=None, hyperParamSearch=None,
                         metrics=None, argumentDictionaries=None,
                         benchmark=None, views=None, viewsIndices=None, flag=None,
                         labels=None,
                         ExecMonoview_multicore=ExecMonoview_multicore,
                         ExecMultiview_multicore=ExecMultiview_multicore,
                         initMultiviewArguments=initMultiviewArguments):
        """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
         ExecMultiview_multicore args are only used for tests"""
    
        resultsMonoview, labelsNames = benchmarkInit(directory,
                                                     classificationIndices, labels,
                                                     LABELS_DICTIONARY, kFolds)
    
        logging.debug("Start:\t Monoview benchmark")
        resultsMonoview += [
            ExecMonoview_multicore(directory, args.name, labelsNames,
                                   classificationIndices, kFolds,
                                   coreIndex, args.type, args.pathF, randomState,
                                   labels,
                                   hyperParamSearch=hyperParamSearch,
                                   metrics=metrics,
                                   nIter=args.CL_HPS_iter, **argument)
            for argument in argumentDictionaries["Monoview"]]
        logging.debug("Done:\t Monoview benchmark")
    
        logging.debug("Start:\t Multiview arguments initialization")
        argumentDictionaries = initMultiviewArguments(args, benchmark, views,
                                                      viewsIndices,
                                                      argumentDictionaries,
                                                      randomState, directory,
                                                      resultsMonoview,
                                                      classificationIndices)
        logging.debug("Done:\t Multiview arguments initialization")
    
        logging.debug("Start:\t Multiview benchmark")
        resultsMultiview = [
            ExecMultiview_multicore(directory, coreIndex, args.name,
                                    classificationIndices, kFolds, args.type,
                                    args.pathF, LABELS_DICTIONARY, randomState,
                                    labels, hyperParamSearch=hyperParamSearch,
                                    metrics=metrics, nIter=args.CL_HPS_iter,
                                    **arguments)
            for arguments in argumentDictionaries["Multiview"]]
        logging.debug("Done:\t Multiview benchmark")
    
        return [flag, resultsMonoview + resultsMultiview]
    
    
    def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None,
                                   directory=None, classificationIndices=None,
                                   args=None,
                                   kFolds=None, randomState=None,
                                   hyperParamSearch=None, metrics=None,
                                   argumentDictionaries=None,
                                   benchmark=None, views=None, viewsIndices=None,
                                   flag=None, labels=None,
                                   ExecMonoview_multicore=ExecMonoview_multicore,
                                   ExecMultiview_multicore=ExecMultiview_multicore,
                                   initMultiviewArguments=initMultiviewArguments):
        """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
         ExecMultiview_multicore args are only used for tests"""
    
        resultsMonoview, labelsNames = benchmarkInit(directory,
                                                     classificationIndices, labels,
                                                     LABELS_DICTIONARY, kFolds)
    
        logging.debug("Start:\t Monoview benchmark")
        nbExperiments = len(argumentDictionaries["Monoview"])
        nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores))
        for stepIndex in range(nbMulticoreToDo):
            resultsMonoview += (Parallel(n_jobs=nbCores)(
                delayed(ExecMonoview_multicore)(directory, args.name, labelsNames,
                                                classificationIndices, kFolds,
                                                coreIndex, args.type, args.pathF,
                                                randomState, labels,
                                                hyperParamSearch=hyperParamSearch,
                                                metrics=metrics,
                                                nIter=args.CL_HPS_iter,
                                                **argumentDictionaries["Monoview"][
                                                    coreIndex + stepIndex * nbCores])
                for coreIndex in
                range(min(nbCores, nbExperiments - stepIndex * nbCores))))
        logging.debug("Done:\t Monoview benchmark")
    
        logging.debug("Start:\t Multiview arguments initialization")
        argumentDictionaries = initMultiviewArguments(args, benchmark, views,
                                                      viewsIndices,
                                                      argumentDictionaries,
                                                      randomState, directory,
                                                      resultsMonoview,
                                                      classificationIndices)
        logging.debug("Done:\t Multiview arguments initialization")
    
        logging.debug("Start:\t Multiview benchmark")
        resultsMultiview = []
        nbExperiments = len(argumentDictionaries["Multiview"])
        nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores))
        for stepIndex in range(nbMulticoreToDo):
            resultsMultiview += Parallel(n_jobs=nbCores)(
                delayed(ExecMultiview_multicore)(directory, coreIndex, args.name,
                                                 classificationIndices, kFolds,
                                                 args.type, args.pathF,
                                                 LABELS_DICTIONARY, randomState,
                                                 labels,
                                                 hyperParamSearch=hyperParamSearch,
                                                 metrics=metrics,
                                                 nIter=args.CL_HPS_iter,
                                                 **
                                                 argumentDictionaries["Multiview"][
                                                     stepIndex * nbCores + coreIndex])
                for coreIndex in
                range(min(nbCores, nbExperiments - stepIndex * nbCores)))
        logging.debug("Done:\t Multiview benchmark")
    
        return [flag, resultsMonoview + resultsMultiview]
    
    
    def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None,
                                 directory=None, classificationIndices=None,
                                 args=None,
                                 kFolds=None, randomState=None,
                                 hyperParamSearch=None, metrics=None,
                                 argumentDictionaries=None,
                                 benchmark=None, views=None, viewsIndices=None,
                                 flag=None, labels=None,
                                 ExecMonoview_multicore=ExecMonoview_multicore,
                                 ExecMultiview_multicore=ExecMultiview_multicore,
                                 initMultiviewArguments=initMultiviewArguments):
        resultsMonoview, labelsNames = benchmarkInit(directory,
                                                     classificationIndices, labels,
                                                     LABELS_DICTIONARY, kFolds)
        logging.debug("Start:\t Monoview benchmark")
        for arguments in argumentDictionaries["Monoview"]:
            X = DATASET.get("View" + str(arguments["viewIndex"]))
            Y = labels
            resultsMonoview += [
                ExecMonoview(directory, X, Y, args.name, labelsNames,
                             classificationIndices, kFolds,
                             1, args.type, args.pathF, randomState,
                             hyperParamSearch=hyperParamSearch, metrics=metrics,
                             nIter=args.CL_HPS_iter, **arguments)]
        logging.debug("Done:\t Monoview benchmark")
    
        logging.debug("Start:\t Multiview arguments initialization")
    
        argumentDictionaries = initMultiviewArguments(args, benchmark, views,
                                                      viewsIndices,
                                                      argumentDictionaries,
                                                      randomState, directory,
                                                      resultsMonoview,
                                                      classificationIndices)
        logging.debug("Done:\t Multiview arguments initialization")
    
        logging.debug("Start:\t Multiview benchmark")
        resultsMultiview = []
        for arguments in argumentDictionaries["Multiview"]:
            resultsMultiview += [
                ExecMultiview(directory, DATASET, args.name, classificationIndices,
                              kFolds, 1, args.type,
                              args.pathF, LABELS_DICTIONARY, randomState, labels,
                              hyperParamSearch=hyperParamSearch,
                              metrics=metrics, nIter=args.CL_HPS_iter, **arguments)]
        logging.debug("Done:\t Multiview benchmark")
    
        return [flag, resultsMonoview + resultsMultiview]
    
    
    def execBenchmark(nbCores, statsIter, nbMulticlass,
                      benchmarkArgumentsDictionaries, classificationIndices,
                      directories,
                      directory, multiClassLabels, metrics, labelsDictionary,
                      nbLabels, DATASET,
                      execOneBenchmark=execOneBenchmark,
                      execOneBenchmark_multicore=execOneBenchmark_multicore,
                      execOneBenchmarkMonoCore=execOneBenchmarkMonoCore,
                      getResults=getResults, delete=DB.deleteHDF5):
        r"""Used to execute the needed benchmark(s) on multicore or mono-core functions.
    
        Parameters
        ----------
        nbCores : int
            Number of threads that the benchmarks can use.
        statsIter : int
            Number of statistical iterations that have to be done.
        benchmarkArgumentsDictionaries : list of dictionaries
            All the needed arguments for the benchmarks.
        classificationIndices : list of lists of numpy.ndarray
            For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and
            the ones of the testing set.
        directories : list of strings
            List of the paths to the result directories for each statistical iteration.
        directory : string
            Path to the main results directory.
        multiClassLabels : ist of lists of numpy.ndarray
            For each label couple, for each statistical iteration a triplet of numpy.ndarrays is stored with the
            indices for the biclass training set, the ones for the biclass testing set and the ones for the
            multiclass testing set.
        metrics : list of lists
            Metrics that will be used to evaluate the algorithms performance.
        labelsDictionary : dictionary
            Dictionary mapping labels indices to labels names.
        nbLabels : int
            Total number of different labels in the dataset.
        DATASET : HDF5 dataset file
            The full dataset that wil be used by the benchmark.
        classifiersNames : list of strings
            List of the benchmarks's monoview classifiers names.
        rest_of_the_args :
            Just used for testing purposes
    
    
        Returns
        -------
        results : list of lists
            The results of the benchmark.
        """
        logging.debug("Start:\t Executing all the needed biclass benchmarks")
        results = []
        if nbCores > 1:
            if statsIter > 1 or nbMulticlass > 1:
                nbExpsToDo = len(benchmarkArgumentsDictionaries)
                nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores)))
                for stepIndex in nbMulticoreToDo:
                    results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark)
                                                         (coreIndex=coreIndex,
                                                          **
                                                          benchmarkArgumentsDictionaries[
                                                              coreIndex + stepIndex * nbCores])
                                                         for coreIndex in range(
                        min(nbCores, nbExpsToDo - stepIndex * nbCores))))
            else:
                results += [execOneBenchmark_multicore(nbCores=nbCores, **
                benchmarkArgumentsDictionaries[0])]
        else:
            for arguments in benchmarkArgumentsDictionaries:
                results += [execOneBenchmarkMonoCore(DATASET=DATASET, **arguments)]
        logging.debug("Done:\t Executing all the needed biclass benchmarks")
        if nbCores > 1:
            logging.debug("Start:\t Deleting " + str(
                nbCores) + " temporary datasets for multiprocessing")
            args = benchmarkArgumentsDictionaries[0]["args"]
            datasetFiles = delete(args.pathF, args.name, nbCores)
            logging.debug("Start:\t Deleting datasets for multiprocessing")
        # Do everything with flagging
        nbExamples = len(classificationIndices[0][0]) + len(
            classificationIndices[0][1])
        multiclassGroundTruth = DATASET.get("Labels").value
        logging.debug("Start:\t Analyzing predictions")
        getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
                   multiclassGroundTruth, metrics, classificationIndices,
                   directories, directory, labelsDictionary, nbExamples, nbLabels)
        logging.debug("Done:\t Analyzing predictions")
    
        return results
    
    
    def execClassif(arguments):
        """Main function to execute the benchmark"""
        start = time.time()
        args = execution.parseTheArgs(arguments)
    
        os.nice(args.nice)
        nbCores = args.nbCores
        if nbCores == 1:
            os.environ['OPENBLAS_NUM_THREADS'] = '1'
        statsIter = args.CL_statsiter
        hyperParamSearch = args.CL_HPS_type
        multiclassMethod = args.CL_multiclassMethod
        CL_type = args.CL_type
        monoviewAlgos = args.CL_algos_monoview
        multiviewAlgos = args.CL_algos_multiview
    
        directory = execution.initLogFile(args.name, args.views, args.CL_type,
                                          args.log, args.debug, args.label,
                                          args.res_dir)
        randomState = execution.initRandomState(args.randomState, directory)
        statsIterRandomStates = execution.initStatsIterRandomStates(statsIter,
                                                                    randomState)
    
        getDatabase = execution.getDatabaseFunction(args.name, args.type)
    
        DATASET, LABELS_DICTIONARY, datasetname = getDatabase(args.views,
                                                              args.pathF, args.name,
                                                              args.CL_nbClass,
                                                              args.CL_classes,
                                                              randomState,
                                                              args.full,
                                                              args.add_noise,
                                                              args.noise_std)
        args.name = datasetname
    
        splits = execution.genSplits(DATASET.get("Labels").value, args.CL_split,
                                     statsIterRandomStates)
    
        multiclassLabels, labelsCombinations, indicesMulticlass = Multiclass.genMulticlassLabels(
            DATASET.get("Labels").value, multiclassMethod, splits)
    
        kFolds = execution.genKFolds(statsIter, args.CL_nbFolds,
                                     statsIterRandomStates)
    
        datasetFiles = Dataset.initMultipleDatasets(args.pathF, args.name, nbCores)
    
        # if not views:
        #     raise ValueError("Empty views list, modify selected views to match dataset " + args.views)
    
        views, viewsIndices, allViews = execution.initViews(DATASET, args.views)
        viewsDictionary = genViewsDictionnary(DATASET, views)
        nbViews = len(views)
        NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
    
        metrics = [metric.split(":") for metric in args.CL_metrics]
        if metrics == [[""]]:
            metricsNames = [name for _, name, isPackage
                            in pkgutil.iter_modules(
                    ['./MonoMultiViewClassifiers/Metrics']) if
                            not isPackage and name not in ["framework", "log_loss",
                                                           "matthews_corrcoef",
                                                           "roc_auc_score"]]
            metrics = [[metricName] for metricName in metricsNames]
            metrics = arangeMetrics(metrics, args.CL_metric_princ)
        for metricIndex, metric in enumerate(metrics):
            if len(metric) == 1:
                metrics[metricIndex] = [metric[0], None]
    
        benchmark = initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args)
    
        initKWARGS = initKWARGSFunc(args, benchmark)
    
        dataBaseTime = time.time() - start
    
        argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary,
                                                NB_CLASS, initKWARGS)
        directories = execution.genDirecortiesNames(directory, statsIter)
        benchmarkArgumentDictionaries = execution.genArgumentDictionaries(
            LABELS_DICTIONARY, directories, multiclassLabels,
            labelsCombinations, indicesMulticlass,
            hyperParamSearch, args, kFolds,
            statsIterRandomStates, metrics,
            argumentDictionaries, benchmark, nbViews,
            views, viewsIndices)
        nbMulticlass = len(labelsCombinations)
    
        execBenchmark(nbCores, statsIter, nbMulticlass,
                      benchmarkArgumentDictionaries, splits, directories,
                      directory, multiclassLabels, metrics, LABELS_DICTIONARY,
                      NB_CLASS, DATASET)
    
        #
    # def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices,
    #                               kFolds,
    #                               randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start,
    #                               benchmark,
    #                               views):
    #     """Used to execute mono and multiview classification and result analysis for one random state
    #      using multicore classification"""
    #     resultsMonoview = []
    #     labelsNames = LABELS_DICTIONARY.values()
    #     np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
    #
    #     resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
    #                                                coreIndex, args.type, args.pathF, randomState,
    #                                                hyperParamSearch=hyperParamSearch,
    #                                                metrics=metrics, nIter=args.CL_HPS_iter,
    #                                                **arguments)
    #                         for arguments in argumentDictionaries["Monoview"]]
    #     monoviewTime = time.time() - dataBaseTime - start
    #
    #     argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries,
    #                                                   randomState, directory, resultsMonoview, classificationIndices)
    #
    #     resultsMultiview = []
    #     resultsMultiview += [
    #         ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
    #                                 args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
    #                                 metrics=metrics, nIter=args.CL_HPS_iter, **arguments)
    #         for arguments in argumentDictionaries["Multiview"]]
    #     multiviewTime = time.time() - monoviewTime - dataBaseTime - start
    #
    #     labels = np.array(
    #         [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in
    #                                                                         resultsMultiview]).transpose()
    #     DATASET = h5py.File(args.pathF + args.name + str(0) + ".hdf5", "r")
    #     trueLabels = DATASET.get("Labels").value
    #     times = [dataBaseTime, monoviewTime, multiviewTime]
    #     results = (resultsMonoview, resultsMultiview)
    #     labelAnalysis = analyzeLabels(labels, trueLabels, results, directory)
    #     logging.debug("Start:\t Analyze Iteration Results")
    #     resultAnalysis(benchmark, results, args.name, times, metrics, directory)
    #     logging.debug("Done:\t Analyze Iteration Results")
    #     globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime
    #     totalTime = time.time() - start
    #     logging.info("Extraction time : " + str(int(dataBaseTime)) +
    #                  "s, Monoview time : " + str(int(monoviewTime)) +
    #                  "s, Multiview Time : " + str(int(multiviewTime)) +
    #                  "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) +
    #                  "s, Iteration Duration : " + str(int(totalTime)) + "s")
    #     return results, labelAnalysis
    #
    #
    # def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, kFolds,
    #                     randomState, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start,
    #                     benchmark, views):
    #     """Used to execute mono and multiview classification and result analysis for one random state
    #          classification"""
    #     #TODO : Clarify this one
    #
    #
    #     argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries,
    #                                                   randomState, directory, resultsMonoview, classificationIndices)
    #
    #     resultsMultiview = []
    #     if nbCores > 1:
    #         nbExperiments = len(argumentDictionaries["Multiview"])
    #         for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))):
    #             resultsMultiview += Parallel(n_jobs=nbCores)(
    #                 delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
    #                                                  args.type,
    #                                                  args.pathF,
    #                                                  LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
    #                                                  metrics=metrics, nIter=args.CL_HPS_iter,
    #                                                  **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
    #                 for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
    #     else:
    #         resultsMultiview = [
    #             ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF,
    #                           LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
    #                           metrics=metrics, nIter=args.CL_HPS_iter, **arguments) for arguments in
    #             argumentDictionaries["Multiview"]]
    #     multiviewTime = time.time() - monoviewTime - dataBaseTime - start
    #     if nbCores > 1:
    #         logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing")
    #         datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores)
    #         logging.debug("Start:\t Deleting datasets for multiprocessing")
    #     labels = np.array(
    #         [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in
    #                                                                         resultsMultiview]).transpose()
    #     trueLabels = DATASET.get("Labels").value
    #     times = [dataBaseTime, monoviewTime, multiviewTime]
    #     results = (resultsMonoview, resultsMultiview)
    #     labelAnalysis = analyzeLabels(labels, trueLabels, results, directory)
    #     logging.debug("Start:\t Analyze Iteration Results")
    #     resultAnalysis(benchmark, results, args.name, times, metrics, directory)
    #     logging.debug("Done:\t Analyze Iteration Results")
    #     globalAnalysisTime = time.time() - monoviewTime - dataBaseTime - start - multiviewTime
    #     totalTime = time.time() - start
    #     logging.info("Extraction time : " + str(int(dataBaseTime)) +
    #                  "s, Monoview time : " + str(int(monoviewTime)) +
    #                  "s, Multiview Time : " + str(int(multiviewTime)) +
    #                  "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) +
    #                  "s, Iteration Duration : " + str(int(totalTime)) + "s")
    #     return results, labelAnalysis
    #
    #
    #
    #
    #
    #
    #
    # if statsIter > 1:
    #     logging.debug("Start:\t Benchmark classification")
    #     for statIterIndex in range(statsIter):
    #         if not os.path.exists(os.path.dirname(directories[statIterIndex] + "train_labels.csv")):
    #             try:
    #                 os.makedirs(os.path.dirname(directories[statIterIndex] + "train_labels.csv"))
    #             except OSError as exc:
    #                 if exc.errno != errno.EEXIST:
    #                     raise
    #         trainIndices, testIndices = classificationIndices[statIterIndex]
    #         trainLabels = DATASET.get("Labels").value[trainIndices]
    #         np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",")
    #     if nbCores > 1:
    #         iterResults = []
    #         nbExperiments = statsIter*len(multiclassLabels)
    #         for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))):
    #             iterResults += (Parallel(n_jobs=nbCores)(
    #                 delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1,
    #                                                    directories[coreIndex + stepIndex * nbCores], args,
    #                                                    classificationIndices[coreIndex + stepIndex * nbCores],
    #                                                    kFolds[coreIndex + stepIndex * nbCores],
    #                                                    statsIterRandomStates[coreIndex + stepIndex * nbCores],
    #                                                    hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime,
    #                                                    start, benchmark,
    #                                                    views)
    #                 for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))))
    #         logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing")
    #         datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores)
    #         logging.debug("Start:\t Deleting datasets for multiprocessing")
    #     else:
    #         iterResults = []
    #         for iterIndex in range(statsIter):
    #             if not os.path.exists(os.path.dirname(directories[iterIndex] + "train_labels.csv")):
    #                 try:
    #                     os.makedirs(os.path.dirname(directories[iterIndex] + "train_labels.csv"))
    #                 except OSError as exc:
    #                     if exc.errno != errno.EEXIST:
    #                         raise
    #             trainIndices, testIndices = classificationIndices[iterIndex]
    #             trainLabels = DATASET.get("Labels").value[trainIndices]
    #             np.savetxt(directories[iterIndex] + "train_labels.csv", trainLabels, delimiter=",")
    #             iterResults.append(
    #                 classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args,
    #                                 classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex],
    #                                 hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark,
    #                                 views))
    #     logging.debug("Done:\t Benchmark classification")
    #     logging.debug("Start:\t Global Results Analysis")
    #     classifiersIterResults = []
    #     iterLabelAnalysis = []
    #     for result in iterResults:
    #         classifiersIterResults.append(result[0])
    #         iterLabelAnalysis.append(result[1])
    #
    #     mono,multi = classifiersIterResults[0]
    #     classifiersNames = genNamesFromRes(mono, multi)
    #     analyzeIterLabels(iterLabelAnalysis, directory, classifiersNames)
    #     analyzeIterResults(classifiersIterResults, args.name, metrics, directory)
    #     logging.debug("Done:\t Global Results Analysis")
    #     totalDur = time.time() - start
    #     m, s = divmod(totalDur, 60)
    #     h, m = divmod(m, 60)
    #     d, h = divmod(h, 24)
    #     # print "%d_%02d_%02d" % (h, m, s)
    #     logging.info("Info:\t Total duration : " + str(d) + " days, " + str(h) + " hours, " + str(m) + " mins, " + str(
    #         int(s)) + "secs.")
    #
    # else:
    #     logging.debug("Start:\t Benchmark classification")
    #     if not os.path.exists(os.path.dirname(directories + "train_labels.csv")):
    #         try:
    #             os.makedirs(os.path.dirname(directories + "train_labels.csv"))
    #         except OSError as exc:
    #             if exc.errno != errno.EEXIST:
    #                 raise
    #     trainIndices, testIndices = classificationIndices
    #     trainLabels = DATASET.get("Labels").value[trainIndices]
    #     np.savetxt(directories + "train_labels.csv", trainLabels, delimiter=",")
    #     res, labelAnalysis = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices,
    #                                          kFolds,
    #                                          statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start,
    #                                          benchmark, views)
    #     logging.debug("Done:\t Benchmark classification")
    #     totalDur = time.time()-start
    #     m, s = divmod(totalDur, 60)
    #     h, m = divmod(m, 60)
    #     d, h = divmod(h, 24)
    #     # print "%d_%02d_%02d" % (h, m, s)
    #     logging.info("Info:\t Total duration : "+str(d)+ " days, "+str(h)+" hours, "+str(m)+" mins, "+str(int(s))+"secs.")
    #
    # if statsIter > 1:
    #     pass