diff --git a/Code/MonoMultiViewClassifiers/ExecClassif.py b/Code/MonoMultiViewClassifiers/ExecClassif.py index dbbf55f5cf12cbc0807d3ee227f5f9ed9c382bf8..cb3f8bf3bce73a70b4a256af6e4589d6d0fba0c4 100644 --- a/Code/MonoMultiViewClassifiers/ExecClassif.py +++ b/Code/MonoMultiViewClassifiers/ExecClassif.py @@ -239,6 +239,7 @@ def getClassificationIndices(argumentsDictionaries, iterIndex): for argumentsDictionary in argumentsDictionaries: if argumentsDictionary["flag"][0]==iterIndex: + pass @@ -338,7 +339,7 @@ def publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDict pass -def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics): +def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics): """Used to tranform one versus one results in multiclass results and to publish it""" multiclassResults = [{} for _ in range(statsIter)] for iterIndex in range(statsIter): @@ -356,9 +357,9 @@ def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLa for iterIndex, multiclassiterResult in enumerate(multiclassResults): for key, value in multiclassiterResult.items(): multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)} - multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, argumentDictionaries) + multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries) multiclassResults = getErrorOnLabels(multiclassResults, multiclassLabels) - publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDictionaries) + publishMulticlassResults(multiclassResults, metrics, statsIter, benchmarkArgumentDictionaries) return multiclassResults @@ -372,11 +373,11 @@ def analyzeIter(results): pass -def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclassLabels, metrics): +def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics): if statsIter > 1: if nbMulticlass > 1: analyzeBiclass(results) - multiclassResults = analyzeMulticlass(results, statsIter, argumentDictionaries, multiclassLabels, metrics) + multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels, metrics) analyzeIter(multiclassResults) else: biclassResults = analyzeBiclass(results) @@ -390,15 +391,25 @@ def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclas def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore, - ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): + benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, + ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore, + initMultiviewArguments=initMultiviewArguments): """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and ExecMultiview_multicore args are only used for tests""" + if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): + try: + os.makedirs(os.path.dirname(directory + "train_labels.csv")) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + trainIndices, testIndices = classificationIndices + trainLabels = labels[trainIndices] + np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",") resultsMonoview = [] labelsNames = list(LABELS_DICTIONARY.values()) np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, randomState, + coreIndex, args.type, args.pathF, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argument) for argument in argumentDictionaries["Monoview"]] @@ -409,7 +420,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class resultsMultiview = [] resultsMultiview += [ ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, - args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, + args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Multiview"]] return [flag, resultsMonoview, resultsMultiview] @@ -417,9 +428,21 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore, - ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): - + benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, + ExecMonoview_multicore=ExecMonoview_multicore, + ExecMultiview_multicore=ExecMultiview_multicore, + initMultiviewArguments=initMultiviewArguments): + """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and + ExecMultiview_multicore args are only used for tests""" + if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): + try: + os.makedirs(os.path.dirname(directory + "train_labels.csv")) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + trainIndices, testIndices = classificationIndices + trainLabels = labels[trainIndices] + np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",") np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") resultsMonoview = [] labelsNames = list(LABELS_DICTIONARY.values()) @@ -429,7 +452,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non for stepIndex in range(nbMulticoreToDo): resultsMonoview += (Parallel(n_jobs=nbCores)( delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, randomState, + coreIndex, args.type, args.pathF, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) @@ -444,7 +467,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non for stepIndex in range(nbMulticoreToDo): resultsMultiview += Parallel(n_jobs=nbCores)( delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, - args.type, args.pathF, LABELS_DICTIONARY, randomState, + args.type, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) @@ -452,8 +475,17 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non return [flag, resultsMonoview, resultsMultiview] -def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, - execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore): +def execOneBenchmarkMonoCore(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, + kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, + benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, + ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore, + initMultiviewArguments=initMultiviewArguments): + pass + + +def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, + execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore, + execOneBenchmarkMonoCore=execOneBenchmarkMonoCore): """Used to execute the needed benchmark(s) on multicore or mono-core functions The execOneBenchmark and execOneBenchmark_multicore keywords args are only used in the tests""" # TODO : find a way to flag @@ -462,30 +494,29 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multi results = [] if nbCores > 1: if statsIter > 1 or nbMulticlass > 1: - nbExpsToDo = nbMulticlass*statsIter + nbExpsToDo = len(benchmarkArgumentsDictionaries) nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores))) for stepIndex in nbMulticoreToDo: results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark) (coreIndex=coreIndex, - **argumentsDictionaries[coreIndex + stepIndex * nbCores]) + **benchmarkArgumentsDictionaries[coreIndex + stepIndex * nbCores]) for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores)))) else: - results += [execOneBenchmark_multicore(nbCores=nbCores, **argumentsDictionaries[0])] + results += [execOneBenchmark_multicore(nbCores=nbCores, **benchmarkArgumentsDictionaries[0])] else: - for arguments in argumentsDictionaries: - results += [execOneBenchmark(**arguments)] + for arguments in benchmarkArgumentsDictionaries: + results += [execOneBenchmarkMonoCore(**arguments)] logging.debug("Done:\t Executing all the needed biclass benchmarks") # Do everything with flagging logging.debug("Start:\t Analyzing preds") - # getResults(results, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, metrics) + # getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassLabels, metrics) logging.debug("Done:\t Analyzing preds") return results - def execClassif(arguments): """Main function to execute the benchmark""" start = time.time() @@ -502,7 +533,7 @@ def execClassif(arguments): if statsIter > 1: statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)] else: - statsIterRandomStates = randomState + statsIterRandomStates = [randomState] if args.name not in ["Fake", "Plausible"]: getDatabase = getattr(DB, "getClassicDB" + args.type[1:]) @@ -512,9 +543,9 @@ def execClassif(arguments): DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass, args.CL_classes) - classificationIndices = execution.genSplits(statsIter, DATASET.get("Labels").value, args.CL_split, statsIterRandomStates) + classificationIndices = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates) - multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod) + multiclassLabels, labelsCombinations, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod, classificationIndices) kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates) @@ -549,9 +580,36 @@ def execClassif(arguments): argumentDictionaries = {"Monoview": [], "Multiview": []} argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS, initKWARGS) - directories = execution.genDirecortiesNames(directory, statsIter, labelsIndices, + directories = execution.genDirecortiesNames(directory, statsIter, labelsCombinations, multiclassMethod, LABELS_DICTIONARY) # TODO : Gen arguments dictionaries + benchmarkArgumentDictionaries = execution.genArgumentDictionaries(LABELS_DICTIONARY, directories, multiclassLabels, + labelsCombinations, oldIndicesMulticlass, + hyperParamSearch, args, kFolds, + statsIterRandomStates, metrics, + argumentDictionaries, benchmark) + + nbMulticlass = len(labelsCombinations) + + execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentDictionaries) + + + + + + + + + + + + + + + + + + if statsIter > 1: logging.debug("Start:\t Benchmark classification") @@ -644,4 +702,4 @@ def execClassif(arguments): logging.info("Info:\t Total duration : "+str(d)+ " days, "+str(h)+" hours, "+str(m)+" mins, "+str(int(s))+"secs.") if statsIter > 1: -pass \ No newline at end of file + pass diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py index e358e85264d32f40bade21e54bcd8546148dcd06..ee934a2fd8e3c1b8c4fbd6c3d2731bd9974fd4be 100644 --- a/Code/MonoMultiViewClassifiers/utils/execution.py +++ b/Code/MonoMultiViewClassifiers/utils/execution.py @@ -238,33 +238,22 @@ def initLogFile(args): return resultDirectory -def genSplits(statsIter, labels, splitRatio, statsIterRandomStates, multiclassMethod): +def genSplits(labels, splitRatio, statsIterRandomStates): """Used to gen the train/test splits using one or multiple random states""" - for oldIndices, labels in zip(oldIndicesMulticlass, multiclasslabels): - indices = oldIndices - splitsMulticlass = [] - if statsIter > 1: - splits = [] - for randomState in statsIterRandomStates: - foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, - random_state=randomState, - test_size=splitRatio) - folds = foldsObj.split(indices, labels) - for fold in folds: - train_fold, test_fold = fold - trainIndices = indices[train_fold] - testIndices = indices[test_fold] - splits.append([trainIndices, testIndices]) - splitsMulticlass.append(splits) - else: - foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=statsIterRandomStates, test_size=splitRatio) + indices = np.arange(len(labels)) + splits = [] + for randomState in statsIterRandomStates: + foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, + random_state=randomState, + test_size=splitRatio) folds = foldsObj.split(indices, labels) for fold in folds: train_fold, test_fold = fold trainIndices = indices[train_fold] testIndices = indices[test_fold] - splitsMulticlass.append((trainIndices, testIndices)) - return splitsMulticlass + splits.append([trainIndices, testIndices]) + + return splits def genKFolds(statsIter, nbFolds, statsIterRandomStates): @@ -321,4 +310,33 @@ def genDirecortiesNames(directory, statsIter, labelsIndices, multiclassMethod, l for labelIndex in labelsIndices: labelName = labelDictionary[labelIndex] directories.append(directory +labelName+"_vs_Rest/") -return directories \ No newline at end of file + return directories + + +def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, labelsCombinations, oldIndicesMulticlass, hyperParamSearch, args, + kFolds, statsIterRandomStates, metrics, argumentDictionaries, benchmark): + benchmarkArgumentDictionaries = [] + for combinationIndex, labelsCombination in enumerate(labelsCombinations): + for iterIndex, iterRandomState in enumerate(statsIterRandomStates): + benchmarkArgumentDictionary = {"LABELS_DICTIONARY": {0:labelsDictionary[labelsCombination[0]], + 1:labelsDictionary[labelsCombination[1]]}, + "directory": directories[iterIndex]+ + labelsDictionary[labelsCombination[0]]+ + "vs"+ + labelsDictionary[labelsCombination[1]]+"/", + "classificationIndices": oldIndicesMulticlass[combinationIndex][iterIndex], + "args": args, + "labels": multiclassLabels[combinationIndex], + "kFolds": kFolds[iterIndex], + "randomState": iterRandomState, + "hyperParamSearch": hyperParamSearch, + "metrics": metrics, + "argumentDictionaries": argumentDictionaries, + "benchmark": benchmark, + "views": None, + "viewsIndices": None, + "flag": [iterIndex, labelsCombination]} + benchmarkArgumentDictionaries.append(benchmarkArgumentDictionary) + return benchmarkArgumentDictionaries + +