Refactored Execfile

0683bb62 · bbauvin · bcb38103 · 0683bb62 · 0683bb62 · 0683bb62
Commit 0683bb62 authored 7 years ago by bbauvin
--- a/Code/MonoMutliViewClassifiers/ExecClassif.py
+++ b/Code/MonoMutliViewClassifiers/ExecClassif.py
 # Import built-in modules
-import argparse
 import pkgutil  # for TimeStamp in CSVFile
 import os
 import time
 import sys
-import select
 import logging
 import errno
-import cPickle

 # Import 3rd party modules
 from joblib import Parallel, delayed
 import numpy as np
 import math
 import matplotlib
-import sklearn

 # Import own modules
 import Multiview
@@ -25,6 +21,7 @@ from Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore
 import Multiview.GetMultiviewDb as DB
 from Versions import testVersions
 from ResultAnalysis import resultAnalysis, analyzeLabels, analyzeIterResults
+from utils import execution, Dataset

 # Author-Info
 __author__ = "Baptiste Bauvin"
@@ -33,96 +30,6 @@ __status__ = "Prototype"  # Production, Development, Prototype
 matplotlib.use('Agg')  # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure


-def initLogFile(args):
-    resultDirectory = "../../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
-    logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
-        args.views) + "-" + args.name + "-LOG"
-    if not os.path.exists(os.path.dirname(resultDirectory + logFileName)):
-        try:
-            os.makedirs(os.path.dirname(resultDirectory + logFileName))
-        except OSError as exc:
-            if exc.errno != errno.EEXIST:
-                raise
-    logFile = resultDirectory + logFileName
-    if os.path.isfile(logFile + ".log"):
-        for i in range(1, 20):
-            testFileName = logFileName + "-" + str(i) + ".log"
-            if not (os.path.isfile(resultDirectory + testFileName)):
-                logFile = resultDirectory + testFileName
-                break
-    else:
-        logFile += ".log"
-    logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG,
-                        filemode='w')
-    if args.log:
-        logging.getLogger().addHandler(logging.StreamHandler())
-
-    return resultDirectory
-
-
-def input_(timeout=15):
-    print "You have " + str(timeout) + " seconds to stop the script by typing n"
-
-    i, o, e = select.select([sys.stdin], [], [], timeout)
-
-    if i:
-        return sys.stdin.readline().strip()
-    else:
-        return "y"
-
-
-def confirm(resp=True, timeout=15):
-    ans = input_(timeout)
-    if not ans:
-        return resp
-    if ans not in ['y', 'Y', 'n', 'N']:
-        print 'please enter y or n.'
-    if ans == 'y' or ans == 'Y':
-        return True
-    if ans == 'n' or ans == 'N':
-        return False
-
-
-def initMultipleDatasets(args, nbCores):
-    """Used to create copies of the dataset if multicore computation is used
-    Needs arg.pathF and arg.name"""
-    if nbCores > 1:
-        if DB.datasetsAlreadyExist(args.pathF, args.name, nbCores):
-            logging.debug("Info:\t Enough copies of the dataset are already available")
-            pass
-        else:
-            logging.debug("Start:\t Creating " + str(nbCores) + " temporary datasets for multiprocessing")
-            logging.warning(" WARNING : /!\ This may use a lot of HDD storage space : " +
-                            str(os.path.getsize(args.pathF + args.name + ".hdf5") * nbCores / float(
-                                1024) / 1000 / 1000) + " Gbytes /!\ ")
-            confirmation = confirm()
-            if not confirmation:
-                sys.exit(0)
-            else:
-                datasetFiles = DB.copyHDF5(args.pathF, args.name, nbCores)
-                logging.debug("Start:\t Creating datasets for multiprocessing")
-                return datasetFiles
-
-
-def initViews(DATASET, args):
-    """Used to return the views names that will be used by the algos, their indices and all the views names
-    Needs args.views"""
-    NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
-    if args.views != [""]:
-        allowedViews = args.views
-        allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
-        views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW) if
-                 str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
-        viewsIndices = [viewIndex for viewIndex in range(NB_VIEW) if
-                        str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
-        return views, viewsIndices, allViews
-    else:
-        views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
-        viewsIndices = np.arange(NB_VIEW)
-        allViews = views
-        return views, viewsIndices, allViews
-
-
 def initBenchmark(args):
    """Used to create a list of all the algorithm packages names used for the benchmark
    Needs args.CL_type, args.CL_algos_multiview, args.MU_types, args.FU_types, args.FU_late_methods,
@@ -241,43 +148,6 @@ def arangeMetrics(metrics, metricPrinc):
    return metrics


-def genSplits(statsIter, indices, DATASET, splitRatio, statsIterRandomStates):
-    if statsIter > 1:
-        splits = []
-        for randomState in statsIterRandomStates:
-            trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices,
-                                                                                       DATASET.get("Labels").value,
-                                                                                       test_size=splitRatio,
-                                                                                       random_state=randomState)
-            splits.append([trainIndices, testIndices])
-        return splits
-    else:
-        trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices, DATASET.get("Labels").value,
-                                                                                   test_size=splitRatio,
-                                                                                   random_state=statsIterRandomStates)
-        return trainIndices, testIndices
-
-
-def genKFolds(statsIter, nbFolds, statsIterRandomStates):
-    if statsIter > 1:
-        foldsList = []
-        for randomState in statsIterRandomStates:
-            foldsList.append(sklearn.model_selection.KFold(n_splits=nbFolds, random_state=randomState))
-        return foldsList
-    else:
-        return sklearn.model_selection.KFold(n_splits=nbFolds, random_state=statsIterRandomStates)
-
-
-def genDirecortiesNames(directory, statsIter):
-    if statsIter > 1:
-        directories = []
-        for i in range(statsIter):
-            directories.append(directory + "iter_" + str(i + 1) + "/")
-        return directories
-    else:
-        return directory
-
-
 def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices,
                              kFolds,
                              randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start,
@@ -329,7 +199,8 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory,
                    randomState, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start,
                    benchmark, views):
    print classificationIndices[0]
-    import pdb;pdb.set_trace()
+    import pdb;
+    pdb.set_trace()

    np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
    resultsMonoview = []
@@ -401,203 +272,21 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory,
    return results


-def initRandomState(randomStateArg, directory):
-    if randomStateArg is None:
-        randomState = np.random.RandomState(randomStateArg)
-    else:
-        try:
-            seed = int(randomStateArg)
-            randomState = np.random.RandomState(seed)
-        except ValueError:
-            fileName = randomStateArg
-            with open(fileName, 'rb') as handle:
-                randomState = cPickle.load(handle)
-    with open(directory + "randomState.pickle", "wb") as handle:
-        cPickle.dump(randomState, handle)
-    return randomState
-
+# _______________ #
+# __ EXECUTION __ #
+# _______________ #

 testVersions()
-parser = argparse.ArgumentParser(
-    description='This file is used to benchmark the scores fo multiple classification algorithm on multiview data.',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-groupStandard = parser.add_argument_group('Standard arguments')
-groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
-groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',
-                           default='Plausible')
-groupStandard.add_argument('--type', metavar='STRING', action='store',
-                           help='Type of database : .hdf5 or .csv (default: %(default)s)',
-                           default='.hdf5')
-groupStandard.add_argument('--views', metavar='STRING', action='store', nargs="+",
-                           help='Name of the views selected for learning (default: %(default)s)',
-                           default=[''])
-groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the views (default: %(default)s)',
-                           default='/home/bbauvin/Documents/Data/Data_multi_omics/')
-groupStandard.add_argument('--nice', metavar='INT', action='store', type=int,
-                           help='Niceness for the process', default=0)
-groupStandard.add_argument('--randomState', metavar='STRING', action='store',
-                           help="The random state seed to use or a file where we can find it's get_state", default=None)
-
-groupClass = parser.add_argument_group('Classification arguments')
-groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
-                        help='Determine the split between learning and validation sets', type=float,
-                        default=0.2)
-groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',
-                        type=int, default=2)
-groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,
-                        default=2)
-groupClass.add_argument('--CL_classes', metavar='STRING', action='store', nargs="+",
-                        help='Classes used in the dataset (names of the folders) if not filled, random classes will be '
-                             'selected ex. walrus mole leopard', default=["yes", "no"])
-groupClass.add_argument('--CL_type', metavar='STRING', action='store', nargs="+",
-                        help='Determine whether to use Multiview and/or Monoview, or Benchmark',
-                        default=['Benchmark'])
-groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store', nargs="+",
-                        help='Determine which monoview classifier to use if empty, considering all',
-                        default=[''])
-groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store', nargs="+",
-                        help='Determine which multiview classifier to use if empty, considering all',
-                        default=[''])
-groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
-                        default=2)
-groupClass.add_argument('--CL_statsiter', metavar='INT', action='store',
-                        help="Number of iteration for each algorithm to mean results if using multiple cores, it's highly recommended to use statsiter mod(nbCores) = 0",
-                        type=int,
-                        default=2)
-groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+",
-                        help='Determine which metrics to use, separate metric and configuration with ":".'
-                             ' If multiple, separate with space. If no metric is specified, '
-                             'considering all with accuracy for classification '
-                        , default=[''])
-groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store',
-                        help='Determine which metric to use for randomSearch and optimization', default="f1_score")
-groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
-                        help='Determine how many Randomized grid search tests to do', type=int, default=2)
-groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store',
-                        help='Determine which hyperparamter search function use', default="randomizedSearch")
-
-groupRF = parser.add_argument_group('Random Forest arguments')
-groupRF.add_argument('--CL_RandomForest_trees', metavar='INT', type=int, action='store', help='Number max trees',
-                     default=25)
-groupRF.add_argument('--CL_RandomForest_max_depth', metavar='INT', type=int, action='store',
-                     help='Max depth for the trees',
-                     default=5)
-groupRF.add_argument('--CL_RandomForest_criterion', metavar='STRING', action='store', help='Criterion for the trees',
-                     default="entropy")
-
-groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
-groupSVMLinear.add_argument('--CL_SVMLinear_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
-                            default=1)
-
-groupSVMRBF = parser.add_argument_group('SVW-RBF arguments')
-groupSVMRBF.add_argument('--CL_SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
-                         default=1)
-
-groupSVMPoly = parser.add_argument_group('Poly SVM arguments')
-groupSVMPoly.add_argument('--CL_SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
-                          default=1)
-groupSVMPoly.add_argument('--CL_SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used',
-                          default=2)
-
-groupAdaboost = parser.add_argument_group('Adaboost arguments')
-groupAdaboost.add_argument('--CL_Adaboost_n_est', metavar='INT', type=int, action='store', help='Number of estimators',
-                           default=2)
-groupAdaboost.add_argument('--CL_Adaboost_b_est', metavar='STRING', action='store', help='Estimators',
-                           default='DecisionTreeClassifier')
-
-groupDT = parser.add_argument_group('Decision Trees arguments')
-groupDT.add_argument('--CL_DecisionTree_depth', metavar='INT', type=int, action='store',
-                     help='Determine max depth for Decision Trees', default=3)
-groupDT.add_argument('--CL_DecisionTree_criterion', metavar='STRING', action='store',
-                     help='Determine max depth for Decision Trees', default="entropy")
-groupDT.add_argument('--CL_DecisionTree_splitter', metavar='STRING', action='store',
-                     help='Determine criterion for Decision Trees', default="random")
-
-groupSGD = parser.add_argument_group('SGD arguments')
-groupSGD.add_argument('--CL_SGD_alpha', metavar='FLOAT', type=float, action='store',
-                      help='Determine alpha for SGDClassifier', default=0.1)
-groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store',
-                      help='Determine loss for SGDClassifier', default='log')
-groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store',
-                      help='Determine penalty for SGDClassifier', default='l2')
-
-groupKNN = parser.add_argument_group('KNN arguments')
-groupKNN.add_argument('--CL_KNN_neigh', metavar='INT', type=int, action='store',
-                      help='Determine number of neighbors for KNN', default=1)
-groupKNN.add_argument('--CL_KNN_weights', metavar='STRING', action='store',
-                      help='Determine number of neighbors for KNN', default="distance")
-groupKNN.add_argument('--CL_KNN_algo', metavar='STRING', action='store',
-                      help='Determine number of neighbors for KNN', default="auto")
-groupKNN.add_argument('--CL_KNN_p', metavar='INT', type=int, action='store',
-                      help='Determine number of neighbors for KNN', default=1)
-
-groupSCM = parser.add_argument_group('SCM arguments')
-groupSCM.add_argument('--CL_SCM_max_rules', metavar='INT', type=int, action='store',
-                      help='Max number of rules for SCM', default=1)
-groupSCM.add_argument('--CL_SCM_p', metavar='FLOAT', type=float, action='store',
-                      help='Max number of rules for SCM', default=1.0)
-groupSCM.add_argument('--CL_SCM_model_type', metavar='STRING', action='store',
-                      help='Max number of rules for SCM', default="conjunction")
-
-groupMumbo = parser.add_argument_group('Mumbo arguments')
-groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', nargs="+",
-                        help='Determine which monoview classifier to use with Mumbo',
-                        default=[''])
-groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',
-                        help='Configuration for the monoview classifier in Mumbo separate each classifier with sapce and each argument with:',
-                        default=[''])
-groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3,
-                        help='Max number of iteration, min number of iteration, convergence threshold', type=float,
-                        default=[10, 1, 0.01])
-groupMumbo.add_argument('--MU_combination', action='store_true',
-                        help='Try all the monoview classifiers combinations for each view',
-                        default=False)
-
-
-groupFusion = parser.add_argument_group('Fusion arguments')
-groupFusion.add_argument('--FU_types', metavar='STRING', action='store', nargs="+",
-                         help='Determine which type of fusion to use',
-                         default=[''])
-groupEarlyFusion = parser.add_argument_group('Early Fusion arguments')
-groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', nargs="+",
-                              help='Determine which early fusion method of fusion to use',
-                              default=[''])
-groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', action='store', nargs='+',
-                              help='Configuration for the early fusion methods separate '
-                                   'method by space and values by :',
-                              default=[''])
-groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', action='store', nargs='+',
-                              help='Configuration for the monoview classifiers used separate classifier by space '
-                                   'and configs must be of form argument1_name:value,argument2_name:value',
-                              default=[''])
-groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', action='store', nargs='+',
-                              help='Name of the classifiers used for each early fusion method', default=[''])
-
-groupLateFusion = parser.add_argument_group('Late Early Fusion arguments')
-groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', nargs="+",
-                             help='Determine which late fusion method of fusion to use',
-                             default=[''])
-groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', action='store', nargs='+',
-                             help='Configuration for the fusion method', default=[''])
-groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', action='store', nargs='+',
-                             help='Configuration for the monoview classifiers used', default=[''])
-groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', action='store', nargs="+",
-                             help='Names of the classifier used for late fusion', default=[''])
-groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store',
-                             help='Determine which method to use to select the monoview classifiers',
-                             default="intersect")
-
 start = time.time()
-args = parser.parse_args()
+args = execution.parseTheArgs(sys.argv[1:])

 os.nice(args.nice)
 nbCores = args.CL_cores
 statsIter = args.CL_statsiter
 hyperParamSearch = args.CL_HPS_type

-directory = initLogFile(args)
-randomState = initRandomState(args.randomState, directory)
+directory = execution.initLogFile(args)
+randomState = execution.initRandomState(args.randomState, directory)
 if statsIter > 1:
    statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)]
 else:
@@ -613,13 +302,13 @@ DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args

 datasetLength = DATASET.get("Metadata").attrs["datasetLength"]
 indices = np.arange(datasetLength)
-classificationIndices = genSplits(statsIter, indices, DATASET, args.CL_split, statsIterRandomStates)
+classificationIndices = execution.genSplits(statsIter, indices, DATASET, args.CL_split, statsIterRandomStates)

-kFolds = genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)
+kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)

-datasetFiles = initMultipleDatasets(args, nbCores)
+datasetFiles = Dataset.initMultipleDatasets(args, nbCores)

-views, viewsIndices, allViews = initViews(DATASET, args)
+views, viewsIndices, allViews = execution.initViews(DATASET, args)
 if not views:
    raise ValueError, "Empty views list, modify selected views to match dataset " + args.views

@@ -647,7 +336,7 @@ dataBaseTime = time.time() - start
 argumentDictionaries = {"Monoview": [], "Multiview": []}
 argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS,
                                             initKWARGS)
-directories = genDirecortiesNames(directory, statsIter)
+directories = execution.genDirecortiesNames(directory, statsIter)

 if statsIter > 1:
    for statIterIndex in range(statsIter):

--- a/Code/MonoMutliViewClassifiers/utils/Dataset.py
+++ b/Code/MonoMutliViewClassifiers/utils/Dataset.py
 from scipy import sparse
 import numpy as np
+import Multiview.GetMultiviewDb as DB
+import logging
+import os
+import sys
+import select


 def getV(DATASET, viewIndex, usedIndices=None):
@@ -59,3 +64,46 @@ def extractSubset(matrix, usedIndices):
        return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1]))
    else:
        return matrix[usedIndices]
+
+
+def initMultipleDatasets(args, nbCores):
+    """Used to create copies of the dataset if multicore computation is used
+    Needs arg.pathF and arg.name"""
+    if nbCores > 1:
+        if DB.datasetsAlreadyExist(args.pathF, args.name, nbCores):
+            logging.debug("Info:\t Enough copies of the dataset are already available")
+            pass
+        else:
+            logging.debug("Start:\t Creating " + str(nbCores) + " temporary datasets for multiprocessing")
+            logging.warning(" WARNING : /!\ This may use a lot of HDD storage space : " +
+                            str(os.path.getsize(args.pathF + args.name + ".hdf5") * nbCores / float(
+                                1024) / 1000 / 1000) + " Gbytes /!\ ")
+            confirmation = confirm()
+            if not confirmation:
+                sys.exit(0)
+            else:
+                datasetFiles = DB.copyHDF5(args.pathF, args.name, nbCores)
+                logging.debug("Start:\t Creating datasets for multiprocessing")
+                return datasetFiles
+
+
+def confirm(resp=True, timeout=15):
+    ans = input_(timeout)
+    if not ans:
+        return resp
+    if ans not in ['y', 'Y', 'n', 'N']:
+        print 'please enter y or n.'
+    if ans == 'y' or ans == 'Y':
+        return True
+    if ans == 'n' or ans == 'N':
+        return False
+
+
+def input_(timeout=15):
+    print "You have " + str(timeout) + " seconds to stop the script by typing n"
+    i, o, e = select.select([sys.stdin], [], [], timeout)
+    if i:
+        return sys.stdin.readline().strip()
+    else:
+        return "y"
+
--- a/Code/MonoMutliViewClassifiers/utils/execution.py
+++ b/Code/MonoMutliViewClassifiers/utils/execution.py
+import argparse
+import numpy as np
+import cPickle
+import time
+import os
+import errno
+import logging
+import sklearn
+
+def parseTheArgs(arguments):
+
+
+    parser = argparse.ArgumentParser(
+        description='This file is used to benchmark the scores fo multiple classification algorithm on multiview data.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    groupStandard = parser.add_argument_group('Standard arguments')
+    groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
+    groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',
+                               default='Plausible')
+    groupStandard.add_argument('--type', metavar='STRING', action='store',
+                               help='Type of database : .hdf5 or .csv (default: %(default)s)',
+                               default='.hdf5')
+    groupStandard.add_argument('--views', metavar='STRING', action='store', nargs="+",
+                               help='Name of the views selected for learning (default: %(default)s)',
+                               default=[''])
+    groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the views (default: %(default)s)',
+                               default='/home/bbauvin/Documents/Data/Data_multi_omics/')
+    groupStandard.add_argument('--nice', metavar='INT', action='store', type=int,
+                               help='Niceness for the process', default=0)
+    groupStandard.add_argument('--randomState', metavar='STRING', action='store',
+                               help="The random state seed to use or a file where we can find it's get_state", default=None)
+
+    groupClass = parser.add_argument_group('Classification arguments')
+    groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
+                            help='Determine the split between learning and validation sets', type=float,
+                            default=0.2)
+    groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',
+                            type=int, default=2)
+    groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,
+                            default=2)
+    groupClass.add_argument('--CL_classes', metavar='STRING', action='store', nargs="+",
+                            help='Classes used in the dataset (names of the folders) if not filled, random classes will be '
+                                 'selected ex. walrus mole leopard', default=["yes", "no"])
+    groupClass.add_argument('--CL_type', metavar='STRING', action='store', nargs="+",
+                            help='Determine whether to use Multiview and/or Monoview, or Benchmark',
+                            default=['Benchmark'])
+    groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store', nargs="+",
+                            help='Determine which monoview classifier to use if empty, considering all',
+                            default=[''])
+    groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store', nargs="+",
+                            help='Determine which multiview classifier to use if empty, considering all',
+                            default=[''])
+    groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
+                            default=2)
+    groupClass.add_argument('--CL_statsiter', metavar='INT', action='store',
+                            help="Number of iteration for each algorithm to mean results if using multiple cores, it's highly recommended to use statsiter mod(nbCores) = 0",
+                            type=int,
+                            default=2)
+    groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+",
+                            help='Determine which metrics to use, separate metric and configuration with ":".'
+                                 ' If multiple, separate with space. If no metric is specified, '
+                                 'considering all with accuracy for classification '
+                            , default=[''])
+    groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store',
+                            help='Determine which metric to use for randomSearch and optimization', default="f1_score")
+    groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
+                            help='Determine how many Randomized grid search tests to do', type=int, default=2)
+    groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store',
+                            help='Determine which hyperparamter search function use', default="randomizedSearch")
+
+    groupRF = parser.add_argument_group('Random Forest arguments')
+    groupRF.add_argument('--CL_RandomForest_trees', metavar='INT', type=int, action='store', help='Number max trees',
+                         default=25)
+    groupRF.add_argument('--CL_RandomForest_max_depth', metavar='INT', type=int, action='store',
+                         help='Max depth for the trees',
+                         default=5)
+    groupRF.add_argument('--CL_RandomForest_criterion', metavar='STRING', action='store', help='Criterion for the trees',
+                         default="entropy")
+
+    groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
+    groupSVMLinear.add_argument('--CL_SVMLinear_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
+                                default=1)
+
+    groupSVMRBF = parser.add_argument_group('SVW-RBF arguments')
+    groupSVMRBF.add_argument('--CL_SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
+                             default=1)
+
+    groupSVMPoly = parser.add_argument_group('Poly SVM arguments')
+    groupSVMPoly.add_argument('--CL_SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
+                              default=1)
+    groupSVMPoly.add_argument('--CL_SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used',
+                              default=2)
+
+    groupAdaboost = parser.add_argument_group('Adaboost arguments')
+    groupAdaboost.add_argument('--CL_Adaboost_n_est', metavar='INT', type=int, action='store', help='Number of estimators',
+                               default=2)
+    groupAdaboost.add_argument('--CL_Adaboost_b_est', metavar='STRING', action='store', help='Estimators',
+                               default='DecisionTreeClassifier')
+
+    groupDT = parser.add_argument_group('Decision Trees arguments')
+    groupDT.add_argument('--CL_DecisionTree_depth', metavar='INT', type=int, action='store',
+                         help='Determine max depth for Decision Trees', default=3)
+    groupDT.add_argument('--CL_DecisionTree_criterion', metavar='STRING', action='store',
+                         help='Determine max depth for Decision Trees', default="entropy")
+    groupDT.add_argument('--CL_DecisionTree_splitter', metavar='STRING', action='store',
+                         help='Determine criterion for Decision Trees', default="random")
+
+    groupSGD = parser.add_argument_group('SGD arguments')
+    groupSGD.add_argument('--CL_SGD_alpha', metavar='FLOAT', type=float, action='store',
+                          help='Determine alpha for SGDClassifier', default=0.1)
+    groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store',
+                          help='Determine loss for SGDClassifier', default='log')
+    groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store',
+                          help='Determine penalty for SGDClassifier', default='l2')
+
+    groupKNN = parser.add_argument_group('KNN arguments')
+    groupKNN.add_argument('--CL_KNN_neigh', metavar='INT', type=int, action='store',
+                          help='Determine number of neighbors for KNN', default=1)
+    groupKNN.add_argument('--CL_KNN_weights', metavar='STRING', action='store',
+                          help='Determine number of neighbors for KNN', default="distance")
+    groupKNN.add_argument('--CL_KNN_algo', metavar='STRING', action='store',
+                          help='Determine number of neighbors for KNN', default="auto")
+    groupKNN.add_argument('--CL_KNN_p', metavar='INT', type=int, action='store',
+                          help='Determine number of neighbors for KNN', default=1)
+
+    groupSCM = parser.add_argument_group('SCM arguments')
+    groupSCM.add_argument('--CL_SCM_max_rules', metavar='INT', type=int, action='store',
+                          help='Max number of rules for SCM', default=1)
+    groupSCM.add_argument('--CL_SCM_p', metavar='FLOAT', type=float, action='store',
+                          help='Max number of rules for SCM', default=1.0)
+    groupSCM.add_argument('--CL_SCM_model_type', metavar='STRING', action='store',
+                          help='Max number of rules for SCM', default="conjunction")
+
+    groupMumbo = parser.add_argument_group('Mumbo arguments')
+    groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', nargs="+",
+                            help='Determine which monoview classifier to use with Mumbo',
+                            default=[''])
+    groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',
+                            help='Configuration for the monoview classifier in Mumbo separate each classifier with sapce and each argument with:',
+                            default=[''])
+    groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3,
+                            help='Max number of iteration, min number of iteration, convergence threshold', type=float,
+                            default=[10, 1, 0.01])
+    groupMumbo.add_argument('--MU_combination', action='store_true',
+                            help='Try all the monoview classifiers combinations for each view',
+                            default=False)
+
+
+    groupFusion = parser.add_argument_group('Fusion arguments')
+    groupFusion.add_argument('--FU_types', metavar='STRING', action='store', nargs="+",
+                             help='Determine which type of fusion to use',
+                             default=[''])
+    groupEarlyFusion = parser.add_argument_group('Early Fusion arguments')
+    groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', nargs="+",
+                                  help='Determine which early fusion method of fusion to use',
+                                  default=[''])
+    groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', action='store', nargs='+',
+                                  help='Configuration for the early fusion methods separate '
+                                       'method by space and values by :',
+                                  default=[''])
+    groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', action='store', nargs='+',
+                                  help='Configuration for the monoview classifiers used separate classifier by space '
+                                       'and configs must be of form argument1_name:value,argument2_name:value',
+                                  default=[''])
+    groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', action='store', nargs='+',
+                                  help='Name of the classifiers used for each early fusion method', default=[''])
+
+    groupLateFusion = parser.add_argument_group('Late Early Fusion arguments')
+    groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', nargs="+",
+                                 help='Determine which late fusion method of fusion to use',
+                                 default=[''])
+    groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', action='store', nargs='+',
+                                 help='Configuration for the fusion method', default=[''])
+    groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', action='store', nargs='+',
+                                 help='Configuration for the monoview classifiers used', default=[''])
+    groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', action='store', nargs="+",
+                                 help='Names of the classifier used for late fusion', default=[''])
+    groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store',
+                                 help='Determine which method to use to select the monoview classifiers',
+                                 default="intersect")
+    args = parser.parse_args(arguments)
+    return args
+
+def initRandomState(randomStateArg, directory):
+    if randomStateArg is None:
+        randomState = np.random.RandomState(randomStateArg)
+    else:
+        try:
+            seed = int(randomStateArg)
+            randomState = np.random.RandomState(seed)
+        except ValueError:
+            fileName = randomStateArg
+            with open(fileName, 'rb') as handle:
+                randomState = cPickle.load(handle)
+    with open(directory + "randomState.pickle", "wb") as handle:
+        cPickle.dump(randomState, handle)
+    return randomState
+
+def initLogFile(args):
+    resultDirectory = "../../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
+    logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
+        args.views) + "-" + args.name + "-LOG"
+    if not os.path.exists(os.path.dirname(resultDirectory + logFileName)):
+        try:
+            os.makedirs(os.path.dirname(resultDirectory + logFileName))
+        except OSError as exc:
+            if exc.errno != errno.EEXIST:
+                raise
+    logFile = resultDirectory + logFileName
+    if os.path.isfile(logFile + ".log"):
+        for i in range(1, 20):
+            testFileName = logFileName + "-" + str(i) + ".log"
+            if not (os.path.isfile(resultDirectory + testFileName)):
+                logFile = resultDirectory + testFileName
+                break
+    else:
+        logFile += ".log"
+    logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG,
+                        filemode='w')
+    if args.log:
+        logging.getLogger().addHandler(logging.StreamHandler())
+
+    return resultDirectory
+
+
+def genSplits(statsIter, indices, DATASET, splitRatio, statsIterRandomStates):
+    if statsIter > 1:
+        splits = []
+        for randomState in statsIterRandomStates:
+            trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices,
+                                                                                       DATASET.get("Labels").value,
+                                                                                       test_size=splitRatio,
+                                                                                       random_state=randomState)
+            splits.append([trainIndices, testIndices])
+        return splits
+    else:
+        trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices, DATASET.get("Labels").value,
+                                                                                   test_size=splitRatio,
+                                                                                   random_state=statsIterRandomStates)
+        return trainIndices, testIndices
+
+
+def genKFolds(statsIter, nbFolds, statsIterRandomStates):
+    if statsIter > 1:
+        foldsList = []
+        for randomState in statsIterRandomStates:
+            foldsList.append(sklearn.model_selection.KFold(n_splits=nbFolds, random_state=randomState))
+        return foldsList
+    else:
+        return sklearn.model_selection.KFold(n_splits=nbFolds, random_state=statsIterRandomStates)
+
+
+def initViews(DATASET, args):
+    """Used to return the views names that will be used by the algos, their indices and all the views names
+    Needs args.views"""
+    NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
+    if args.views != [""]:
+        allowedViews = args.views
+        allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
+        views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW) if
+                 str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
+        viewsIndices = [viewIndex for viewIndex in range(NB_VIEW) if
+                        str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
+        return views, viewsIndices, allViews
+    else:
+        views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
+        viewsIndices = np.arange(NB_VIEW)
+        allViews = views
+        return views, viewsIndices, allViews
+
+
+def genDirecortiesNames(directory, statsIter):
+    if statsIter > 1:
+        directories = []
+        for i in range(statsIter):
+            directories.append(directory + "iter_" + str(i + 1) + "/")
+        return directories
+    else:
+        return directory