Skip to content
Snippets Groups Projects
Select Git revision
  • 913490d881581b5e4be0cba5da14fd7f3fb6958f
  • master default
  • object
  • develop protected
  • private_algos
  • cuisine
  • SMOTE
  • revert-76c4cca5
  • archive protected
  • no_graphviz
  • 0.0.2
  • 0.0.1
12 results

ExecClassif.py

Blame
  • user avatar
    bbauvin authored
    913490d8
    History
    ExecClassif.py 39.17 KiB
    # Import built-in modules
    import argparse
    import pkgutil  # for TimeStamp in CSVFile
    import os
    import time
    import itertools
    import sys
    import select
    import logging
    import errno
    
    # Import 3rd party modules
    from joblib import Parallel, delayed
    import numpy as np
    import math
    import matplotlib
    
    # Import own modules
    import Multiview
    import Metrics
    import MonoviewClassifiers
    from Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore
    from Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore
    import Multiview.GetMultiviewDb as DB
    from Versions import testVersions
    from ResultAnalysis import resultAnalysis, analyzeLabels
    
    # Author-Info
    __author__ = "Baptiste Bauvin"
    __status__ = "Prototype"  # Production, Development, Prototype
    
    matplotlib.use('Agg')  # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure
    
    
    def initLogFile(args):
        resultDirectory = "../../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
        logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
            args.views) + "-" + args.name + \
                      "-LOG"
        if not os.path.exists(os.path.dirname(resultDirectory + logFileName)):
            try:
                os.makedirs(os.path.dirname(resultDirectory + logFileName))
            except OSError as exc:
                if exc.errno != errno.EEXIST:
                    raise
        logFile = resultDirectory + logFileName
        if os.path.isfile(logFile + ".log"):
            for i in range(1, 20):
                testFileName = logFileName + "-" + str(i) + ".log"
                if not (os.path.isfile(resultDirectory + testFileName)):
                    logfile = resultDirectory + testFileName
                    break
        else:
            logFile += ".log"
        logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG,
                            filemode='w')
        if args.log:
            logging.getLogger().addHandler(logging.StreamHandler())
    
        return resultDirectory
    
    
    def input(timeout=15):
        print "You have " + str(timeout) + " seconds to stop the script by typing n"
    
        i, o, e = select.select([sys.stdin], [], [], timeout)
    
        if i:
            return sys.stdin.readline().strip()
        else:
            return "y"
    
    
    def confirm(resp=True, timeout=15):
        ans = input(timeout)
        if not ans:
            return resp
        if ans not in ['y', 'Y', 'n', 'N']:
            print 'please enter y or n.'
        if ans == 'y' or ans == 'Y':
            return True
        if ans == 'n' or ans == 'N':
            return False
    
    
    def initMultipleDatasets(args, nbCores):
        """Used to create copies of the dataset if multicore computation is used
        Needs arg.pathF and arg.name"""
        if nbCores > 1:
            if DB.datasetsAlreadyExist(args.pathF, args.name, nbCores):
                logging.debug("Info:\t Enough copies of the dataset are already available")
                pass
            else:
                logging.debug("Start:\t Creating " + str(nbCores) + " temporary datasets for multiprocessing")
                logging.warning(" WARNING : /!\ This may use a lot of HDD storage space : " +
                                str(os.path.getsize(args.pathF + args.name + ".hdf5") * nbCores / float(
                                    1024) / 1000 / 1000) + " Gbytes /!\ ")
                confirmation = confirm()
                if not confirmation:
                    sys.exit(0)
                else:
                    datasetFiles = DB.copyHDF5(args.pathF, args.name, nbCores)
                    logging.debug("Start:\t Creating datasets for multiprocessing")
                    return datasetFiles
    
    
    def initViews(DATASET, args):
        """Used to return the views names that will be used by the algos, their indices and all the views names
        Needs args.views"""
        NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
        if args.views != [""]:
            allowedViews = args.views
            allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
            views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW) if
                     str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
            viewsIndices = [viewIndex for viewIndex in range(NB_VIEW) if
                            str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
            return views, viewsIndices, allViews
        else:
            views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
            viewsIndices = np.arange(NB_VIEW)
            allViews = views
            return views, viewsIndices, allViews
    
    
    def initBenchmark(args):
        """Used to create a list of all the algorithm packages names used for the benchmark
        Needs args.CL_type, args.CL_algos_multiview, args.MU_types, args.FU_types, args.FU_late_methods,
        args.FU_early_methods, args.CL_algos_monoview"""
        benchmark = {"Monoview": {}, "Multiview": {}}
        allMultiviewPackages = [name for _, name, isPackage
                                in pkgutil.iter_modules(['Multiview/']) if isPackage]
        if args.CL_type == ["Benchmark"]:
    
            allMonoviewAlgos = [name for _, name, isPackage in
                                pkgutil.iter_modules(['MonoviewClassifiers'])
                                if (not isPackage)]
            benchmark["Monoview"] = allMonoviewAlgos
            benchmark["Multiview"]=dict((multiviewPackageName, "_") for multiviewPackageName in allMultiviewPackages)
            for multiviewPackageName in allMultiviewPackages:
                multiviewPackage = getattr(Multiview, multiviewPackageName)
                multiviewModule = getattr(multiviewPackage, multiviewPackageName)
                benchmark = multiviewModule.getBenchmark(benchmark, args=args)
                # fusionModulesNames = [name for _, name, isPackage
                #                       in pkgutil.iter_modules(['Multiview/Fusion/Methods']) if not isPackage]
                # fusionModules = [getattr(Multiview.Fusion.Methods, fusionModulesName)
                #                  for fusionModulesName in fusionModulesNames]
                # fusionClasses = [getattr(fusionModule, fusionModulesName + "Classifier")
                #                  for fusionModulesName, fusionModule in zip(fusionModulesNames, fusionModules)]
                # fusionMethods = dict((fusionModulesName, [name for _, name, isPackage in
                #                                           pkgutil.iter_modules(
                #                                               ["Multiview/Fusion/Methods/" + fusionModulesName + "Package"])
                #                                           if not isPackage])
                #                      for fusionModulesName, fusionClasse in zip(fusionModulesNames, fusionClasses))
                # fusionMonoviewClassifiers = allMonoviewAlgos
                # allFusionAlgos = {"Methods": fusionMethods, "Classifiers": fusionMonoviewClassifiers}
                # # allMumboAlgos =
                # allMultiviewAlgos = {"Fusion": allFusionAlgos, "Mumbo": allMumboAlgos}
                # benchmark = {"Monoview": allMonoviewAlgos, "Multiview": allMultiviewAlgos}
    
        if "Multiview" in args.CL_type:
            benchmark["Multiview"] = {}
            if args.CL_algos_multiview == [""]:
                algosMutliview = allMultiviewPackages
            else:
                algosMutliview = args.CL_algos_multiview
            for multiviewPackageName in allMultiviewPackages:
                if multiviewPackageName in algosMutliview:
                    multiviewPackage = getattr(Multiview, multiviewPackageName)
                    multiviewModule = getattr(multiviewPackage, multiviewPackageName)
                    benchmark = multiviewModule.getBenchmark(benchmark, args=args)
            # if "Mumbo" in algosMutliview:
            #     benchmark["Multiview"]["Mumbo"] = args.MU_types
            # if "Fusion" in algosMutliview:
            #     benchmark["Multiview"]["Fusion"] = {}
            #     benchmark["Multiview"]["Fusion"]["Methods"] = dict(
            #         (fusionType, []) for fusionType in args.FU_types)
            #     if "LateFusion" in args.FU_types:
            #         if args.FU_late_methods== [""]:
            #             benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"] = [name for _, name, isPackage in
            #                                                                          pkgutil.iter_modules([
            #                                                                              "Multiview/Fusion/Methods/LateFusionPackage"])
            #                                                                          if not isPackage]
            #         else:
            #             benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"] = args.FU_late_methods
            #     if "EarlyFusion" in args.FU_types:
            #         if args.FU_early_methods == [""]:
            #             benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"] = [name for _, name, isPackage in
            #                                                                           pkgutil.iter_modules([
            #                                                                               "Multiview/Fusion/Methods/EarlyFusionPackage"])
            #                                                                           if not isPackage]
            #         else:
            #             benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"] = args.FU_early_methods
            #     if args.CL_algos_monoview == ['']:
            #         benchmark["Multiview"]["Fusion"]["Classifiers"] = [name for _, name, isPackage in
            #                                                            pkgutil.iter_modules(['MonoviewClassifiers'])
            #                                                            if (not isPackage) and (name != "SGD") and (
            #                                                                name[:3] != "SVM")
            #                                                            and (name != "SCM")]
            #     else:
            #         benchmark["Multiview"]["Fusion"]["Classifiers"] = args.CL_algos_monoview
    
        if "Monoview" in args.CL_type:
            if args.CL_algos_monoview == ['']:
                benchmark["Monoview"] = [name for _, name, isPackage in pkgutil.iter_modules(["MonoviewClassifiers"])
                                         if not isPackage]
    
            else:
                benchmark["Monoview"] = args.CL_algos_monoview
        return benchmark
    
    
    def initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS, kwargsInit):
        if benchmark["Monoview"]:
            argumentDictionaries["Monoview"] = []
            for view in views:
                for classifier in benchmark["Monoview"]:
                    if classifier == "SCM":
                        if DATASET.get("View" + str(allViews.index(view))).attrs["binary"]:
                            arguments = {
                                "args": {classifier + "KWARGS": kwargsInit[classifier + "KWARGSInit"], "feat": view,
                                         "CL_type": classifier, "nbClass": NB_CLASS}, "viewIndex": allViews.index(view)}
                            argumentDictionaries["Monoview"].append(arguments)
                        else:
                            pass
                    else:
                        arguments = {
                            "args": {classifier + "KWARGS": kwargsInit[classifier + "KWARGSInit"], "feat": view,
                                     "CL_type": classifier, "nbClass": NB_CLASS}, "viewIndex": allViews.index(view)}
                        argumentDictionaries["Monoview"].append(arguments)
        return argumentDictionaries
    
    
    def initMonoviewKWARGS(args, classifiersNames):
        monoviewKWARGS = {}
        for classifiersName in classifiersNames:
            classifierModule = getattr(MonoviewClassifiers, classifiersName)
            monoviewKWARGS[classifiersName+"KWARGSInit"] = classifierModule.getKWARGS([(key, value) for key, value in vars(args).iteritems() if key.startswith("CL_"+classifiersName)])
        return monoviewKWARGS
    
    
    def initKWARGS(args, benchmark):
        if "Monoview" in benchmark:
            monoviewKWARGS = initMonoviewKWARGS(args, benchmark["Monoview"])
    
    
    
        # kwargsInit = {
        #     "RandomForestKWARGSInit": {"0": map(int, args.CL_RF_trees.split())[0],
        #                                "1": map(int, args.CL_RF_max_depth.split(":"))[0]},
        #     "SVMLinearKWARGSInit": {"0": map(int, args.CL_SVML_C.split(":"))[0]},
        #     "SVMRBFKWARGSInit": {"0": map(int, args.CL_SVMR_C.split(":"))[0]},
        #     "SVMPolyKWARGSInit": {"0": map(int, args.CL_SVMP_C.split(":"))[0],
        #                           '1': map(int, args.CL_SVMP_deg.split(":"))[0]},
        #     "DecisionTreeKWARGSInit": {"0": map(int, args.CL_DT_depth.split(":"))[0]},
        #     "SGDKWARGSInit": {"2": map(float, args.CL_SGD_alpha.split(":"))[0], "1": args.CL_SGD_penalty.split(":")[0],
        #                       "0": args.CL_SGD_loss.split(":")[0]},
        #     "KNNKWARGSInit": {"0": map(float, args.CL_KNN_neigh.split(":"))[0]},
        #     "AdaboostKWARGSInit": {"0": args.CL_Ada_n_est.split(":")[0], "1": args.CL_Ada_b_est.split(":")[0]},
        #     "SCMKWARGSInit": {"0": args.CL_SCM_max_rules.split(":")[0]},
        # }
        return monoviewKWARGS
    
    
    def lateFusionSetArgs(views, viewsIndices, classes, method,
                          classifiersNames, classifiersConfig, fusionMethodConfig):
        arguments = {"CL_type": "Fusion",
                     "views": views,
                     "NB_VIEW": len(views),
                     "viewsIndices": viewsIndices,
                     "NB_CLASS": len(classes),
                     "LABELS_NAMES": args.CL_classes,
                     "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": method,
                                      "classifiersNames": classifiersNames,
                                      "classifiersConfigs": classifiersConfig,
                                      'fusionMethodConfig': fusionMethodConfig,
                                      "nbView": (len(viewsIndices))}}
        return arguments
    
    
    def initMultiviewArguments(args, benchmark, views, viewsIndices, scores, classifiersConfigs, classifiersNames,
                               NB_VIEW, metrics, argumentDictionaries):
        # metricModule = getattr(Metrics, metrics[0])
        multiviewArguments = []
        if "Multiview" in benchmark:
            for multiviewAlgoName in benchmark["Multiview"]:
                multiviewPackage = getattr(Multiview, multiviewAlgoName)
                mutliviewModule = getattr(multiviewPackage, multiviewAlgoName)
                multiviewArguments+= mutliviewModule.getArgs(args, benchmark, views, viewsIndices)
        # if benchmark["Multiview"]:
        #     for multiviewAlgoName in benchmark["Multiview"]:
        #         multiviewPackage = getattr(Multiview, multiviewAlgoName)
        #         multiviewArguments[]
        #     if "Fusion" in benchmark["Multiview"]:
        #         for method in benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]:
        #             import pdb; pdb.set_trace()
        #         if args.FU_cl_names != ['']:
        #             monoClassifiers = args.FU_cl_names
        #             monoClassifiersConfigs = [globals()[classifier + "KWARGS"] for classifier in monoClassifiers]
        #             if args.FU_method_config != [""]:
        #                 fusionMethodConfigs = [map(float, config.split(":")) for config in args.FU_method_config]
        #             elif not hyperParamSearch:
        #                 raise ValueError("No config for fusion method given and no gridearch wanted")
        #             else:
        #                 try:
        #                     fusionMethodConfigs = [["config"] for method in
        #                                            benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]]
        #                 except:
        #                     pass
        #             try:
        #                 for methodIndex, method in enumerate(benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]):
        #                     if args.FU_fixed:
        #                         arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method,
        #                                                       args.FU_cl_names, monoClassifiersConfigs,
        #                                                       fusionMethodConfigs[methodIndex])
        #                         argumentDictionaries["Multiview"].append(arguments)
        #                     else:
        #                         for combination in itertools.combinations_with_replacement(range(len(monoClassifiers)),
        #                                                                                    NB_VIEW):
        #                             monoClassifiersNamesComb = [monoClassifiers[index] for index in combination]
        #                             monoClassifiersConfigsComb = [monoClassifiersConfigs[index] for index in
        #                                                           combination]
        #                             arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method,
        #                                                           monoClassifiersNamesComb, monoClassifiersConfigsComb,
        #                                                           fusionMethodConfigs[methodIndex])
        #                             argumentDictionaries["Multiview"].append(arguments)
        #             except:
        #                 pass
        #         else:
        #             if "LateFusion" in benchmark["Multiview"]["Fusion"]["Methods"] and \
        #                             "Classifiers" in benchmark["Multiview"]["Fusion"]:
        #                 bestClassifiers = []
        #                 bestClassifiersConfigs = []
        #                 if argumentDictionaries["Monoview"] != {}:
        #                     for viewIndex, view in enumerate(views):
        #                         if metricModule.getConfig()[-14] == "h":
        #                             bestClassifiers.append(
        #                                 classifiersNames[viewIndex][np.argmax(np.array(scores[viewIndex]))])
        #                             bestClassifiersConfigs.append(
        #                                 classifiersConfigs[viewIndex][np.argmax(np.array(scores[viewIndex]))])
        #                         else:
        #                             bestClassifiers.append(
        #                                 classifiersNames[viewIndex][np.argmin(np.array(scores[viewIndex]))])
        #                             bestClassifiersConfigs.append(
        #                                 classifiersConfigs[viewIndex][np.argmin(np.array(scores[viewIndex]))])
        #                 else:
        #                     raise AttributeError("No Monoview classifiers asked in args and no monoview benchmark done.")
        #                 for method in benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]:
        #                     arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method,
        #                                                   bestClassifiers, bestClassifiersConfigs,
        #                                                   fusionMethodConfig)
        #                     argumentDictionaries["Multiview"].append(arguments)
        #         if "EarlyFusion" in benchmark["Multiview"]["Fusion"]["Methods"] and \
        #                         "Classifiers" in benchmark["Multiview"]["Fusion"]:
        #             for method in benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"]:
        #                 for classifier in benchmark["Multiview"]["Fusion"]["Classifiers"]:
        #                     arguments = {"CL_type": "Fusion",
        #                                  "views": views,
        #                                  "NB_VIEW": len(views),
        #                                  "viewsIndices": viewsIndices,
        #                                  "NB_CLASS": len(args.CL_classes),
        #                                  "LABELS_NAMES": args.CL_classes,
        #                                  "FusionKWARGS": {"fusionType": "EarlyFusion", "fusionMethod": method,
        #                                                   "classifiersNames": [classifier],
        #                                                   "classifiersConfigs": [
        #                                                       initKWARGS[classifier + "KWARGSInit"]],
        #                                                   'fusionMethodConfig': fusionMethodConfig,
        #                                                   "nbView": (len(viewsIndices))}}
        #                     argumentDictionaries["Multiview"].append(arguments)
        #     if "Mumbo" in benchmark["Multiview"]:
        #         for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])),
        #                                                                    NB_VIEW):
        #             mumboClassifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination]
        #             arguments = {"CL_type": "Mumbo",
        #                          "views": views,
        #                          "NB_VIEW": len(views),
        #                          "viewsIndices": viewsIndices,
        #                          "NB_CLASS": len(args.CL_classes),
        #                          "LABELS_NAMES": args.CL_classes,
        #                          "MumboKWARGS": {"classifiersNames": mumboClassifiersNames,
        #                                          "maxIter": int(args.MU_iter[0]), "minIter": int(args.MU_iter[1]),
        #                                          "threshold": args.MU_iter[2],
        #                                          "classifiersConfigs": [argument.split(":") for argument in
        #                                                                 args.MU_config], "nbView": (len(viewsIndices))}}
        #             argumentDictionaries["Multiview"].append(arguments)
        argumentDictionaries["Multiview"] = multiviewArguments
        return argumentDictionaries
    
    
    def arangeMetrics(metrics, metricPrinc):
        if [metricPrinc] in metrics:
            metricIndex = metrics.index([metricPrinc])
            firstMetric = metrics[0]
            metrics[0]=[metricPrinc]
            metrics[metricIndex]=firstMetric
        else:
            raise AttributeError(metricPrinc+" not in metric pool")
        return metrics
    
    
    testVersions()
    parser = argparse.ArgumentParser(
        description='This file is used to benchmark the scores fo multiple classification algorithm on multiview data.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    
    groupStandard = parser.add_argument_group('Standard arguments')
    groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
    groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',
                               default='Plausible')
    groupStandard.add_argument('--type', metavar='STRING', action='store',
                               help='Type of database : .hdf5 or .csv (default: %(default)s)',
                               default='.hdf5')
    groupStandard.add_argument('--views', metavar='STRING', action='store', nargs="+",
                               help='Name of the views selected for learning (default: %(default)s)',
                               default=[''])
    groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the views (default: %(default)s)',
                               default='/home/bbauvin/Documents/Data/Data_multi_omics/')
    groupStandard.add_argument('--nice', metavar='INT', action='store', type=int,
                               help='Niceness for the process', default=0)
    
    groupClass = parser.add_argument_group('Classification arguments')
    groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
                            help='Determine the split between learning and validation sets', type=float,
                            default=0.7)
    groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',
                            type=int, default=2)
    groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,
                            default=2)
    groupClass.add_argument('--CL_classes', metavar='STRING', action='store', nargs="+",
                            help='Classes used in the dataset (names of the folders) if not filled, random classes will be '
                                 'selected ex. walrus mole leopard', default=["yes","no"])
    groupClass.add_argument('--CL_type', metavar='STRING', action='store', nargs ="+",
                            help='Determine whether to use Multiview and/or Monoview, or Benchmark',
                            default=['Benchmark'])
    groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store', nargs="+",
                            help='Determine which monoview classifier to use if empty, considering all',
                            default=[''])
    groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store', nargs="+",
                            help='Determine which multiview classifier to use if empty, considering all',
                            default=[''])
    groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
                            default=2)
    groupClass.add_argument('--CL_statsiter', metavar='INT', action='store',
                            help='Number of iteration for each algorithm to mean results', type=int,
                            default=2)
    groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+",
                            help='Determine which metrics to use, separate metric and configuration with ":". If multiple, separate with space. If no metric is specified, considering all with accuracy for classification '
                                 , default=[''])
    groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store',
                            help='Determine which metric to use for randomSearch and optimization' , default="f1_score")
    groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
                            help='Determine how many Randomized grid search tests to do', type=int, default=2)
    groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store',
                            help='Determine which hyperparamter search function use', default="randomizedSearch")
    
    groupRF = parser.add_argument_group('Random Forest arguments')
    groupRF.add_argument('--CL_RandomForest_trees', metavar='INT', type=int, action='store', help='Number max trees',
                         default=25)
    groupRF.add_argument('--CL_RandomForest_max_depth', metavar='INT', type=int, action='store', help='Max depth for the trees',
                         default=5)
    groupRF.add_argument('--CL_RandomForest_criterion', metavar='STRING', action='store', help='Criterion for the trees',
                         default="entropy")
    
    groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
    groupSVMLinear.add_argument('--CL_SVMLinear_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
                                default=1)
    
    groupSVMRBF = parser.add_argument_group('SVW-RBF arguments')
    groupSVMRBF.add_argument('--CL_SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
                             default=1)
    
    groupSVMPoly = parser.add_argument_group('Poly SVM arguments')
    groupSVMPoly.add_argument('--CL_SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
                              default=1)
    groupSVMPoly.add_argument('--CL_SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used',
                              default=2)
    
    groupAdaboost = parser.add_argument_group('Adaboost arguments')
    groupAdaboost.add_argument('--CL_Adaboost_n_est', metavar='INT', type=int, action='store', help='Number of estimators',
                               default=2)
    groupAdaboost.add_argument('--CL_Adaboost_b_est', metavar='STRING', action='store', help='Estimators',
                               default='DecisionTreeClassifier')
    
    groupDT = parser.add_argument_group('Decision Trees arguments')
    groupDT.add_argument('--CL_DecisionTree_depth', metavar='INT', type=int, action='store',
                         help='Determine max depth for Decision Trees', default=3)
    groupDT.add_argument('--CL_DecisionTree_criterion', metavar='STRING', action='store',
                         help='Determine max depth for Decision Trees', default="entropy")
    groupDT.add_argument('--CL_DecisionTree_splitter', metavar='STRING', action='store',
                         help='Determine criterion for Decision Trees', default="random")
    
    
    groupSGD = parser.add_argument_group('SGD arguments')
    groupSGD.add_argument('--CL_SGD_alpha', metavar='FLOAT', type=float, action='store',
                          help='Determine alpha for SGDClassifier', default=0.1)
    groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store',
                          help='Determine loss for SGDClassifier', default='log')
    groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store',
                          help='Determine penalty for SGDClassifier', default='l2')
    
    groupKNN = parser.add_argument_group('KNN arguments')
    groupKNN.add_argument('--CL_KNN_neigh', metavar='INT', type=int, action='store',
                          help='Determine number of neighbors for KNN', default=1)
    groupKNN.add_argument('--CL_KNN_weights', metavar='STRING', action='store',
                          help='Determine number of neighbors for KNN', default="distance")
    groupKNN.add_argument('--CL_KNN_algo', metavar='STRING', action='store',
                          help='Determine number of neighbors for KNN', default="auto")
    groupKNN.add_argument('--CL_KNN_p', metavar='INT', type=int, action='store',
                          help='Determine number of neighbors for KNN', default=1)
    
    groupSCM = parser.add_argument_group('SCM arguments')
    groupSCM.add_argument('--CL_SCM_max_rules', metavar='INT', type=int, action='store',
                          help='Max number of rules for SCM', default=1)
    groupSCM.add_argument('--CL_SCM_p', metavar='FLOAT', type=float, action='store',
                          help='Max number of rules for SCM', default=1.0)
    groupSCM.add_argument('--CL_SCM_model_type', metavar='STRING', action='store',
                          help='Max number of rules for SCM', default="conjunction")
    
    
    groupMumbo = parser.add_argument_group('Mumbo arguments')
    groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', nargs="+",
                            help='Determine which monoview classifier to use with Mumbo', default=['DecisionTree', 'DecisionTree', 'DecisionTree'])
    groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',
                            help='Configuration for the monoview classifier in Mumbo',
                            default=['2:0.5', '2:0.5', '2:0.5'])
    groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3,
                            help='Max number of iteration, min number of iteration, convergence threshold', type=float,
                            default=[10, 1, 0.01])
    
    groupFusion = parser.add_argument_group('Fusion arguments')
    groupFusion.add_argument('--FU_types', metavar='STRING', action='store', nargs="+",
                             help='Determine which type of fusion to use',
                             default=[''])
    groupEarlyFusion = parser.add_argument_group('Early Fusion arguments')
    groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', nargs="+",
                             help='Determine which early fusion method of fusion to use',
                             default=[''])
    groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', action='store', nargs='+',
                             help='Configuration for the early fusion methods separate method by space and values by :',
                                  default=[''])
    groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', action='store', nargs='+',
                             help='Configuration for the monoview classifiers used separate classifier by space '
                                  'and configs must be of form argument1_name:value,argument2_name:value',
                                  default=[''])
    groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', action='store', nargs='+',
                             help='Name of the classifiers used for each early fusion method', default=[''])
    
    
    groupLateFusion = parser.add_argument_group('Late Early Fusion arguments')
    groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', nargs="+",
                             help='Determine which late fusion method of fusion to use',
                             default=[''])
    groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', action='store', nargs='+',
                             help='Configuration for the fusion method', default=[''])
    groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', action='store', nargs='+',
                             help='Configuration for the monoview classifiers used', default=[''])
    groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', action='store', nargs="+",
                             help='Names of the classifier used for late fusion', default=[''])
    groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store',
                             help='Determine which method to use to select the monoview classifiers', default="intersect")
    
    args = parser.parse_args()
    os.nice(args.nice)
    nbCores = args.CL_cores
    statsIter = args.CL_statsiter
    start = time.time()
    
    if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake", "Plausible", "KMultiOmic"]:
        getDatabase = getattr(DB, "getClassicDB" + args.type[1:])
    else:
        getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:])
    
    hyperParamSearch = args.CL_HPS_type
    
    directory = initLogFile(args)
    
    DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nb_class,
                                             args.CL_classes)
    
    datasetFiles = initMultipleDatasets(args, nbCores)
    
    views, viewsIndices, allViews = initViews(DATASET, args)
    if not views:
        raise ValueError, "Empty views list, modify selected views to match dataset " + args.views
    NB_VIEW = len(views)
    
    NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
    
    metrics = [metric.split(":") for metric in args.CL_metrics]
    if metrics == [[""]]:
        metricsNames = [name for _, name, isPackage
                        in pkgutil.iter_modules(['Metrics']) if not isPackage and name != "log_loss"]
        metrics = [[metricName] for metricName in metricsNames]
        metrics = arangeMetrics(metrics, args.CL_metric_princ)
    for metricIndex, metric in enumerate(metrics):
        if len(metric) == 1:
            metrics[metricIndex] = [metric[0], None]
    
    logging.info("Start:\t Finding all available mono- & multiview algorithms")
    
    benchmark = initBenchmark(args)
    
    # fusionMethodConfig = [args.FU_method_config[0].split(":"), "b"]
    
    initKWARGS = initKWARGS(args, benchmark)
    
    dataBaseTime = time.time() - start
    
    argumentDictionaries = {"Monoview": [], "Multiview": []}
    argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS,
                                                 initKWARGS)
    
    bestClassifiers = []
    bestClassifiersConfigs = []
    resultsMonoview = []
    labelsNames = LABELS_DICTIONARY.values()
    if nbCores > 1:
        nbExperiments = len(argumentDictionaries["Monoview"])
        for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))):
            resultsMonoview += (Parallel(n_jobs=nbCores)(
                delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, args.CL_split, args.CL_nbFolds,
                                                coreIndex, args.type, args.pathF, statsIter, hyperParamSearch=hyperParamSearch,
                                                metrics=metrics, nIter=args.CL_GS_iter,
                                                **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
                for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))))
        scores = [[result[1][2][metrics[0][0]][1] for result in resultsMonoview if result[0] == viewIndex] for viewIndex in
                  viewsIndices]
        classifiersNames = [[result[1][0] for result in resultsMonoview if result[0] == viewIndex] for viewIndex in
                            viewsIndices]
        classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0] == viewIndex] for viewIndex in
                              viewsIndices]
    
    else:
        resultsMonoview += ([ExecMonoview(directory, DATASET.get("View" + str(arguments["viewIndex"])),
                                          DATASET.get("Labels").value, args.name, labelsNames,
                                          args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, statsIter,
                                          hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter,
                                          **arguments)
                             for arguments in argumentDictionaries["Monoview"]])
        scores = [[result[1][2][metrics[0][0]][1] for result in resultsMonoview if result[0] == viewIndex] for viewIndex
                  in viewsIndices]
        classifiersNames = [[result[1][0] for result in resultsMonoview if result[0] == viewIndex] for viewIndex in
                            viewsIndices]
        classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0] == viewIndex] for viewIndex in
                              viewsIndices]
    monoviewTime = time.time() - dataBaseTime - start
    
    
    argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, scores, classifiersConfigs,
                                                  classifiersNames, NB_VIEW, metrics[0], argumentDictionaries)
    
    if nbCores > 1:
        resultsMultiview = []
        nbExperiments = len(argumentDictionaries["Multiview"])
        for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))):
            resultsMultiview += Parallel(n_jobs=nbCores)(
                delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, args.CL_split, args.CL_nbFolds, args.type,
                                                 args.pathF,
                                                 LABELS_DICTIONARY, statsIter, hyperParamSearch=hyperParamSearch,
                                                 metrics=metrics, nIter=args.CL_GS_iter,
                                                 **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
                for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
    else:
        resultsMultiview = [
            ExecMultiview(directory, DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF,
                          LABELS_DICTIONARY, statsIter, hyperParamSearch=hyperParamSearch,
                          metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in
            argumentDictionaries["Multiview"]]
    multiviewTime = time.time() - monoviewTime - dataBaseTime - start
    if nbCores > 1:
        logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing")
        datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores)
        logging.debug("Start:\t Deleting datasets for multiprocessing")
    labels = np.array(
        [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in
                                                                        resultsMultiview]).transpose()
    trueLabels = DATASET.get("Labels").value
    times = [dataBaseTime, monoviewTime, multiviewTime]
    # times=[]
    results = (resultsMonoview, resultsMultiview)
    analyzeLabels(labels, trueLabels, results, directory)
    logging.debug("Start:\t Analyze Global Results")
    resultAnalysis(benchmark, results, args.name, times, metrics, directory)
    logging.debug("Done:\t Analyze Global Results")