To pull

c15b97f6 · bbauvin · 2e788d38 · 752dda1e · c15b97f6 · c15b97f6
Commit c15b97f6 authored Nov 7, 2017 by bbauvin
--- a/Code/MonoMultiViewClassifiers/ExecClassif.py
+++ b/Code/MonoMultiViewClassifiers/ExecClassif.py
@@ -239,7 +239,6 @@ def getClassificationIndices(argumentsDictionaries, iterIndex):

    for argumentsDictionary in argumentsDictionaries:
        if argumentsDictionary["flag"][0]==iterIndex:
-            pass



@@ -339,7 +338,7 @@ def publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDict
    pass


-def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics):
+def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics):
    """Used to tranform one versus one results in multiclass results and to publish it"""
    multiclassResults = [{} for _ in range(statsIter)]
    for iterIndex in range(statsIter):
@@ -357,9 +356,9 @@ def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamp
    for iterIndex, multiclassiterResult in enumerate(multiclassResults):
        for key, value in multiclassiterResult.items():
            multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)}
-    multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries)
+    multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, argumentDictionaries)
    multiclassResults = getErrorOnLabels(multiclassResults, multiclassLabels)
-    publishMulticlassResults(multiclassResults, metrics, statsIter, benchmarkArgumentDictionaries)
+    publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDictionaries)
    return multiclassResults


@@ -373,11 +372,11 @@ def analyzeIter(results):
    pass


-def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics):
+def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclassLabels, metrics):
    if statsIter > 1:
        if nbMulticlass > 1:
            analyzeBiclass(results)
-            multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels, metrics)
+            multiclassResults = analyzeMulticlass(results, statsIter, argumentDictionaries, multiclassLabels, metrics)
            analyzeIter(multiclassResults)
        else:
            biclassResults = analyzeBiclass(results)
@@ -391,25 +390,15 @@ def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries,

 def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
                     kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
-                     benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
-                     ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
-                     initMultiviewArguments=initMultiviewArguments):
+                     benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore,
+                     ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments):
    """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
     ExecMultiview_multicore args are only used for tests"""
-    if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
-        try:
-            os.makedirs(os.path.dirname(directory + "train_labels.csv"))
-        except OSError as exc:
-            if exc.errno != errno.EEXIST:
-                raise
-    trainIndices, testIndices = classificationIndices
-    trainLabels = labels[trainIndices]
-    np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
    resultsMonoview = []
    labelsNames = list(LABELS_DICTIONARY.values())
    np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
    resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
-                                               coreIndex, args.type, args.pathF, randomState, labels,
+                                               coreIndex, args.type, args.pathF, randomState,
                                               hyperParamSearch=hyperParamSearch, metrics=metrics,
                                               nIter=args.CL_GS_iter, **argument)
                        for argument in argumentDictionaries["Monoview"]]
@@ -420,7 +409,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
    resultsMultiview = []
    resultsMultiview += [
        ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
-                                args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
+                                args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
                                metrics=metrics, nIter=args.CL_GS_iter, **arguments)
        for arguments in argumentDictionaries["Multiview"]]
    return [flag, resultsMonoview, resultsMultiview]
@@ -428,21 +417,9 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class

 def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
                               kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
-                               benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
-                               ExecMonoview_multicore=ExecMonoview_multicore,
-                               ExecMultiview_multicore=ExecMultiview_multicore,
-                               initMultiviewArguments=initMultiviewArguments):
-    """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
-     ExecMultiview_multicore args are only used for tests"""
-    if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
-        try:
-            os.makedirs(os.path.dirname(directory + "train_labels.csv"))
-        except OSError as exc:
-            if exc.errno != errno.EEXIST:
-                raise
-    trainIndices, testIndices = classificationIndices
-    trainLabels = labels[trainIndices]
-    np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
+                               benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore,
+                               ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments):
+
    np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
    resultsMonoview = []
    labelsNames = list(LABELS_DICTIONARY.values())
@@ -452,7 +429,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
    for stepIndex in range(nbMulticoreToDo):
        resultsMonoview += (Parallel(n_jobs=nbCores)(
            delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds,
-                                            coreIndex, args.type, args.pathF, randomState, labels,
+                                            coreIndex, args.type, args.pathF, randomState,
                                            hyperParamSearch=hyperParamSearch,
                                            metrics=metrics, nIter=args.CL_GS_iter,
                                            **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
@@ -467,7 +444,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
    for stepIndex in range(nbMulticoreToDo):
        resultsMultiview += Parallel(n_jobs=nbCores)(
            delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
-                                             args.type, args.pathF, LABELS_DICTIONARY, randomState, labels,
+                                             args.type, args.pathF, LABELS_DICTIONARY, randomState,
                                             hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter,
                                             **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
            for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
@@ -475,17 +452,8 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
    return [flag, resultsMonoview, resultsMultiview]


-def execOneBenchmarkMonoCore(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
-                             kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
-                             benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
-                             ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
-                             initMultiviewArguments=initMultiviewArguments):
-    pass
-
-
-def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
-                  execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore,
-                  execOneBenchmarkMonoCore=execOneBenchmarkMonoCore):
+def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels,
+                  execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore):
    """Used to execute the needed benchmark(s) on multicore or mono-core functions
    The execOneBenchmark and execOneBenchmark_multicore keywords args are only used in the tests"""
    # TODO :  find a way to flag
@@ -494,29 +462,30 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionari
    results = []
    if nbCores > 1:
        if statsIter > 1 or nbMulticlass > 1:
-            nbExpsToDo = len(benchmarkArgumentsDictionaries)
+            nbExpsToDo = nbMulticlass*statsIter
            nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores)))
            for stepIndex in nbMulticoreToDo:
                results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark)
                                                     (coreIndex=coreIndex,
-                                                      **benchmarkArgumentsDictionaries[coreIndex + stepIndex * nbCores])
+                                                      **argumentsDictionaries[coreIndex + stepIndex * nbCores])
                                                     for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores))))
        else:
-            results += [execOneBenchmark_multicore(nbCores=nbCores, **benchmarkArgumentsDictionaries[0])]
+            results += [execOneBenchmark_multicore(nbCores=nbCores, **argumentsDictionaries[0])]
    else:
-        for arguments in benchmarkArgumentsDictionaries:
-            results += [execOneBenchmarkMonoCore(**arguments)]
+        for arguments in argumentsDictionaries:
+            results += [execOneBenchmark(**arguments)]
    logging.debug("Done:\t Executing all the needed biclass benchmarks")

    # Do everything with flagging

    logging.debug("Start:\t Analyzing preds")
-    # getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassLabels, metrics)
+    # getResults(results, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, metrics)
    logging.debug("Done:\t Analyzing preds")

    return results


+
 def execClassif(arguments):
    """Main function to execute the benchmark"""
    start = time.time()
@@ -533,7 +502,7 @@ def execClassif(arguments):
    if statsIter > 1:
        statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)]
    else:
-        statsIterRandomStates = [randomState]
+        statsIterRandomStates = randomState

    if args.name not in ["Fake", "Plausible"]:
        getDatabase = getattr(DB, "getClassicDB" + args.type[1:])
@@ -543,9 +512,9 @@ def execClassif(arguments):
    DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass,
                                             args.CL_classes)

-    classificationIndices = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)
+    classificationIndices = execution.genSplits(statsIter, DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)

-    multiclassLabels, labelsCombinations, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod, classificationIndices)
+    multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod)

    kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)

@@ -580,36 +549,9 @@ def execClassif(arguments):
    argumentDictionaries = {"Monoview": [], "Multiview": []}
    argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS,
                                            initKWARGS)
-    directories = execution.genDirecortiesNames(directory, statsIter, labelsCombinations,
+    directories = execution.genDirecortiesNames(directory, statsIter, labelsIndices,
                                                multiclassMethod, LABELS_DICTIONARY)
    # TODO : Gen arguments dictionaries
-    benchmarkArgumentDictionaries = execution.genArgumentDictionaries(LABELS_DICTIONARY, directories, multiclassLabels,
-                                                                      labelsCombinations, oldIndicesMulticlass,
-                                                                      hyperParamSearch, args, kFolds,
-                                                                      statsIterRandomStates, metrics,
-                                                                      argumentDictionaries, benchmark)
-
-    nbMulticlass = len(labelsCombinations)
-
-    execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentDictionaries)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

    if statsIter > 1:
        logging.debug("Start:\t Benchmark classification")

--- a/Code/MonoMultiViewClassifiers/utils/execution.py
+++ b/Code/MonoMultiViewClassifiers/utils/execution.py
@@ -238,9 +238,12 @@ def initLogFile(args):
    return resultDirectory


-def genSplits(labels, splitRatio, statsIterRandomStates):
+def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMethod):
    """Used to gen the train/test splits using one or multiple random states"""
-    indices = np.arange(len(labels))
+    for oldIndices, labels in zip(oldIndicesMulticlass, multiclasslabels):
+        indices = oldIndices
+    splitsMulticlass = []
+    if statsIter > 1:
        splits = []
        for randomState in statsIterRandomStates:
            foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,
@@ -252,8 +255,16 @@ def genSplits(labels, splitRatio, statsIterRandomStates):
            trainIndices = indices[train_fold]
            testIndices = indices[test_fold]
            splits.append([trainIndices, testIndices])
-
-    return splits
+        splitsMulticlass.append(splits)
+    else:
+        foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=statsIterRandomStates, test_size=splitRatio)
+        folds = foldsObj.split(indices, labels)
+        for fold in folds:
+            train_fold, test_fold = fold
+        trainIndices = indices[train_fold]
+        testIndices = indices[test_fold]
+        splitsMulticlass.append((trainIndices, testIndices))
+    return splitsMulticlass


 def genKFolds(statsIter, nbFolds, statsIterRandomStates):
@@ -311,32 +322,3 @@ def genDirecortiesNames(directory, statsIter, labelsIndices, multiclassMethod, l
                labelName = labelDictionary[labelIndex]
                directories.append(directory +labelName+"_vs_Rest/")
 return directories
\ No newline at end of file
-
-
-def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, labelsCombinations, oldIndicesMulticlass, hyperParamSearch, args,
-                            kFolds, statsIterRandomStates, metrics, argumentDictionaries, benchmark):
-    benchmarkArgumentDictionaries = []
-    for combinationIndex, labelsCombination in enumerate(labelsCombinations):
-        for iterIndex, iterRandomState in enumerate(statsIterRandomStates):
-            benchmarkArgumentDictionary = {"LABELS_DICTIONARY": {0:labelsDictionary[labelsCombination[0]],
-                                                                 1:labelsDictionary[labelsCombination[1]]},
-                                           "directory": directories[iterIndex]+
-                                                        labelsDictionary[labelsCombination[0]]+
-                                                        "vs"+
-                                                        labelsDictionary[labelsCombination[1]]+"/",
-                                           "classificationIndices": oldIndicesMulticlass[combinationIndex][iterIndex],
-                                           "args": args,
-                                           "labels": multiclassLabels[combinationIndex],
-                                           "kFolds": kFolds[iterIndex],
-                                           "randomState": iterRandomState,
-                                           "hyperParamSearch": hyperParamSearch,
-                                           "metrics": metrics,
-                                           "argumentDictionaries": argumentDictionaries,
-                                           "benchmark": benchmark,
-                                           "views": None,
-                                           "viewsIndices": None,
-                                           "flag": [iterIndex, labelsCombination]}
-            benchmarkArgumentDictionaries.append(benchmarkArgumentDictionary)
-    return benchmarkArgumentDictionaries
-
-