To push

ce4ed2b8 · bbauvin · c15b97f6 · ce4ed2b8 · ce4ed2b8
Commit ce4ed2b8 authored Nov 7, 2017 by bbauvin
--- a/Code/MonoMultiViewClassifiers/ExecClassif.py
+++ b/Code/MonoMultiViewClassifiers/ExecClassif.py
@@ -239,6 +239,7 @@ def getClassificationIndices(argumentsDictionaries, iterIndex):

    for argumentsDictionary in argumentsDictionaries:
        if argumentsDictionary["flag"][0]==iterIndex:
+            pass



@@ -338,7 +339,7 @@ def publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDict
    pass


-def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics):
+def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics):
    """Used to tranform one versus one results in multiclass results and to publish it"""
    multiclassResults = [{} for _ in range(statsIter)]
    for iterIndex in range(statsIter):
@@ -356,9 +357,9 @@ def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLa
    for iterIndex, multiclassiterResult in enumerate(multiclassResults):
        for key, value in multiclassiterResult.items():
            multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)}
-    multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, argumentDictionaries)
+    multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries)
    multiclassResults = getErrorOnLabels(multiclassResults, multiclassLabels)
-    publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDictionaries)
+    publishMulticlassResults(multiclassResults, metrics, statsIter, benchmarkArgumentDictionaries)
    return multiclassResults


@@ -372,11 +373,11 @@ def analyzeIter(results):
    pass


-def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclassLabels, metrics):
+def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics):
    if statsIter > 1:
        if nbMulticlass > 1:
            analyzeBiclass(results)
-            multiclassResults = analyzeMulticlass(results, statsIter, argumentDictionaries, multiclassLabels, metrics)
+            multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels, metrics)
            analyzeIter(multiclassResults)
        else:
            biclassResults = analyzeBiclass(results)
@@ -390,15 +391,25 @@ def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclas

 def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
                     kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
-                     benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore,
-                     ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments):
+                     benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
+                     ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
+                     initMultiviewArguments=initMultiviewArguments):
    """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
     ExecMultiview_multicore args are only used for tests"""
+    if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
+        try:
+            os.makedirs(os.path.dirname(directory + "train_labels.csv"))
+        except OSError as exc:
+            if exc.errno != errno.EEXIST:
+                raise
+    trainIndices, testIndices = classificationIndices
+    trainLabels = labels[trainIndices]
+    np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
    resultsMonoview = []
    labelsNames = list(LABELS_DICTIONARY.values())
    np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
    resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
-                                               coreIndex, args.type, args.pathF, randomState,
+                                               coreIndex, args.type, args.pathF, randomState, labels,
                                               hyperParamSearch=hyperParamSearch, metrics=metrics,
                                               nIter=args.CL_GS_iter, **argument)
                        for argument in argumentDictionaries["Monoview"]]
@@ -409,7 +420,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
    resultsMultiview = []
    resultsMultiview += [
        ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
-                                args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
+                                args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
                                metrics=metrics, nIter=args.CL_GS_iter, **arguments)
        for arguments in argumentDictionaries["Multiview"]]
    return [flag, resultsMonoview, resultsMultiview]
@@ -417,9 +428,21 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class

 def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
                               kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
-                               benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore,
-                               ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments):
-
+                               benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
+                               ExecMonoview_multicore=ExecMonoview_multicore,
+                               ExecMultiview_multicore=ExecMultiview_multicore,
+                               initMultiviewArguments=initMultiviewArguments):
+    """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
+     ExecMultiview_multicore args are only used for tests"""
+    if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
+        try:
+            os.makedirs(os.path.dirname(directory + "train_labels.csv"))
+        except OSError as exc:
+            if exc.errno != errno.EEXIST:
+                raise
+    trainIndices, testIndices = classificationIndices
+    trainLabels = labels[trainIndices]
+    np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
    np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
    resultsMonoview = []
    labelsNames = list(LABELS_DICTIONARY.values())
@@ -429,7 +452,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
    for stepIndex in range(nbMulticoreToDo):
        resultsMonoview += (Parallel(n_jobs=nbCores)(
            delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds,
-                                            coreIndex, args.type, args.pathF, randomState,
+                                            coreIndex, args.type, args.pathF, randomState, labels,
                                            hyperParamSearch=hyperParamSearch,
                                            metrics=metrics, nIter=args.CL_GS_iter,
                                            **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
@@ -444,7 +467,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
    for stepIndex in range(nbMulticoreToDo):
        resultsMultiview += Parallel(n_jobs=nbCores)(
            delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
-                                             args.type, args.pathF, LABELS_DICTIONARY, randomState,
+                                             args.type, args.pathF, LABELS_DICTIONARY, randomState, labels,
                                             hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter,
                                             **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
            for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
@@ -452,8 +475,17 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
    return [flag, resultsMonoview, resultsMultiview]


-def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels,
-                  execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore):
+def execOneBenchmarkMonoCore(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
+                             kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
+                             benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
+                             ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
+                             initMultiviewArguments=initMultiviewArguments):
+    pass
+
+
+def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
+                  execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore,
+                  execOneBenchmarkMonoCore=execOneBenchmarkMonoCore):
    """Used to execute the needed benchmark(s) on multicore or mono-core functions
    The execOneBenchmark and execOneBenchmark_multicore keywords args are only used in the tests"""
    # TODO :  find a way to flag
@@ -462,30 +494,29 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multi
    results = []
    if nbCores > 1:
        if statsIter > 1 or nbMulticlass > 1:
-            nbExpsToDo = nbMulticlass*statsIter
+            nbExpsToDo = len(benchmarkArgumentsDictionaries)
            nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores)))
            for stepIndex in nbMulticoreToDo:
                results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark)
                                                     (coreIndex=coreIndex,
-                                                      **argumentsDictionaries[coreIndex + stepIndex * nbCores])
+                                                      **benchmarkArgumentsDictionaries[coreIndex + stepIndex * nbCores])
                                                     for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores))))
        else:
-            results += [execOneBenchmark_multicore(nbCores=nbCores, **argumentsDictionaries[0])]
+            results += [execOneBenchmark_multicore(nbCores=nbCores, **benchmarkArgumentsDictionaries[0])]
    else:
-        for arguments in argumentsDictionaries:
-            results += [execOneBenchmark(**arguments)]
+        for arguments in benchmarkArgumentsDictionaries:
+            results += [execOneBenchmarkMonoCore(**arguments)]
    logging.debug("Done:\t Executing all the needed biclass benchmarks")

    # Do everything with flagging

    logging.debug("Start:\t Analyzing preds")
-    # getResults(results, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, metrics)
+    # getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassLabels, metrics)
    logging.debug("Done:\t Analyzing preds")

    return results


-
 def execClassif(arguments):
    """Main function to execute the benchmark"""
    start = time.time()
@@ -502,7 +533,7 @@ def execClassif(arguments):
    if statsIter > 1:
        statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)]
    else:
-        statsIterRandomStates = randomState
+        statsIterRandomStates = [randomState]

    if args.name not in ["Fake", "Plausible"]:
        getDatabase = getattr(DB, "getClassicDB" + args.type[1:])
@@ -512,9 +543,9 @@ def execClassif(arguments):
    DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass,
                                             args.CL_classes)

-    classificationIndices = execution.genSplits(statsIter, DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)
+    classificationIndices = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)

-    multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod)
+    multiclassLabels, labelsCombinations, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod, classificationIndices)

    kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)

@@ -549,9 +580,36 @@ def execClassif(arguments):
    argumentDictionaries = {"Monoview": [], "Multiview": []}
    argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS,
                                            initKWARGS)
-    directories = execution.genDirecortiesNames(directory, statsIter, labelsIndices,
+    directories = execution.genDirecortiesNames(directory, statsIter, labelsCombinations,
                                                multiclassMethod, LABELS_DICTIONARY)
    # TODO : Gen arguments dictionaries
+    benchmarkArgumentDictionaries = execution.genArgumentDictionaries(LABELS_DICTIONARY, directories, multiclassLabels,
+                                                                      labelsCombinations, oldIndicesMulticlass,
+                                                                      hyperParamSearch, args, kFolds,
+                                                                      statsIterRandomStates, metrics,
+                                                                      argumentDictionaries, benchmark)
+
+    nbMulticlass = len(labelsCombinations)
+
+    execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentDictionaries)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

    if statsIter > 1:
        logging.debug("Start:\t Benchmark classification")

--- a/Code/MonoMultiViewClassifiers/utils/execution.py
+++ b/Code/MonoMultiViewClassifiers/utils/execution.py
@@ -238,12 +238,9 @@ def initLogFile(args):
    return resultDirectory


-def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMethod):
+def genSplits(labels, splitRatio, statsIterRandomStates):
    """Used to gen the train/test splits using one or multiple random states"""
-    for oldIndices, labels in zip(oldIndicesMulticlass, multiclasslabels):
-        indices = oldIndices
-    splitsMulticlass = []
-    if statsIter > 1:
+    indices = np.arange(len(labels))
    splits = []
    for randomState in statsIterRandomStates:
        foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,
@@ -255,16 +252,8 @@ def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMe
        trainIndices = indices[train_fold]
        testIndices = indices[test_fold]
        splits.append([trainIndices, testIndices])
-        splitsMulticlass.append(splits)
-    else:
-        foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=statsIterRandomStates, test_size=splitRatio)
-        folds = foldsObj.split(indices, labels)
-        for fold in folds:
-            train_fold, test_fold = fold
-        trainIndices = indices[train_fold]
-        testIndices = indices[test_fold]
-        splitsMulticlass.append((trainIndices, testIndices))
-    return splitsMulticlass
+
+    return splits


 def genKFolds(statsIter, nbFolds, statsIterRandomStates):
@@ -322,3 +311,32 @@ def genDirecortiesNames(directory, statsIter, labelsIndices, multiclassMethod, l
                labelName = labelDictionary[labelIndex]
                directories.append(directory +labelName+"_vs_Rest/")
    return directories
+
+
+def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, labelsCombinations, oldIndicesMulticlass, hyperParamSearch, args,
+                            kFolds, statsIterRandomStates, metrics, argumentDictionaries, benchmark):
+    benchmarkArgumentDictionaries = []
+    for combinationIndex, labelsCombination in enumerate(labelsCombinations):
+        for iterIndex, iterRandomState in enumerate(statsIterRandomStates):
+            benchmarkArgumentDictionary = {"LABELS_DICTIONARY": {0:labelsDictionary[labelsCombination[0]],
+                                                                 1:labelsDictionary[labelsCombination[1]]},
+                                           "directory": directories[iterIndex]+
+                                                        labelsDictionary[labelsCombination[0]]+
+                                                        "vs"+
+                                                        labelsDictionary[labelsCombination[1]]+"/",
+                                           "classificationIndices": oldIndicesMulticlass[combinationIndex][iterIndex],
+                                           "args": args,
+                                           "labels": multiclassLabels[combinationIndex],
+                                           "kFolds": kFolds[iterIndex],
+                                           "randomState": iterRandomState,
+                                           "hyperParamSearch": hyperParamSearch,
+                                           "metrics": metrics,
+                                           "argumentDictionaries": argumentDictionaries,
+                                           "benchmark": benchmark,
+                                           "views": None,
+                                           "viewsIndices": None,
+                                           "flag": [iterIndex, labelsCombination]}
+            benchmarkArgumentDictionaries.append(benchmarkArgumentDictionary)
+    return benchmarkArgumentDictionaries
+
+