diff --git a/Code/Exec.py b/Code/Exec.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index fc98998c5e9aaa039a92f939e3b4a73fa0805deb..a4b70ca03ef6cf9d2e537c77e76942f60d1ef8f5 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -11,6 +11,7 @@ from joblib import Parallel, delayed import numpy as np import math import matplotlib +import h5py # Import own modules import Multiview @@ -100,7 +101,7 @@ def initMonoviewKWARGS(args, classifiersNames): return monoviewKWARGS -def initKWARGS(args, benchmark): +def initKWARGSFunc(args, benchmark): if "Monoview" in benchmark: monoviewKWARGS = initMonoviewKWARGS(args, benchmark["Monoview"]) else: @@ -108,20 +109,20 @@ def initKWARGS(args, benchmark): return monoviewKWARGS -def lateFusionSetArgs(views, viewsIndices, classes, method, - classifiersNames, classifiersConfig, fusionMethodConfig): - arguments = {"CL_type": "Fusion", - "views": views, - "NB_VIEW": len(views), - "viewsIndices": viewsIndices, - "NB_CLASS": len(classes), - "LABELS_NAMES": args.CL_classes, - "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": method, - "classifiersNames": classifiersNames, - "classifiersConfigs": classifiersConfig, - 'fusionMethodConfig': fusionMethodConfig, - "nbView": (len(viewsIndices))}} - return arguments +# def lateFusionSetArgs(views, viewsIndices, classes, method, +# classifiersNames, classifiersConfig, fusionMethodConfig): +# arguments = {"CL_type": "Fusion", +# "views": views, +# "NB_VIEW": len(views), +# "viewsIndices": viewsIndices, +# "NB_CLASS": len(classes), +# "LABELS_NAMES": args.CL_classes, +# "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": method, +# "classifiersNames": classifiersNames, +# "classifiersConfigs": classifiersConfig, +# 'fusionMethodConfig': fusionMethodConfig, +# "nbView": (len(viewsIndices))}} +# return arguments def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, @@ -178,6 +179,7 @@ def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, labels = np.array( [resultMonoview[1][3] for resultMonoview in resultsMonoview] + [resultMultiview[3] for resultMultiview in resultsMultiview]).transpose() + DATASET = h5py.File(args.pathF + args.name + str(0) + ".hdf5", "r") trueLabels = DATASET.get("Labels").value times = [dataBaseTime, monoviewTime, multiviewTime] results = (resultsMonoview, resultsMultiview) @@ -273,130 +275,130 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, # _______________ # # __ EXECUTION __ # # _______________ # +def execClassif(arguments): + testVersions() + start = time.time() + args = execution.parseTheArgs(arguments) + + os.nice(args.nice) + nbCores = args.CL_cores + statsIter = args.CL_statsiter + hyperParamSearch = args.CL_HPS_type + + directory = execution.initLogFile(args) + randomState = execution.initRandomState(args.randomState, directory) + if statsIter > 1: + statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)] + else: + statsIterRandomStates = randomState -testVersions() -start = time.time() -args = execution.parseTheArgs(sys.argv[1:]) - -os.nice(args.nice) -nbCores = args.CL_cores -statsIter = args.CL_statsiter -hyperParamSearch = args.CL_HPS_type - -directory = execution.initLogFile(args) -randomState = execution.initRandomState(args.randomState, directory) -if statsIter > 1: - statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)] -else: - statsIterRandomStates = randomState + if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake", "Plausible", "KMultiOmic"]: + getDatabase = getattr(DB, "getClassicDB" + args.type[1:]) + else: + getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:]) -if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake", "Plausible", "KMultiOmic"]: - getDatabase = getattr(DB, "getClassicDB" + args.type[1:]) -else: - getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:]) + DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nb_class, + args.CL_classes) -DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nb_class, - args.CL_classes) + datasetLength = DATASET.get("Metadata").attrs["datasetLength"] + indices = np.arange(datasetLength) + classificationIndices = execution.genSplits(statsIter, indices, DATASET, args.CL_split, statsIterRandomStates) -datasetLength = DATASET.get("Metadata").attrs["datasetLength"] -indices = np.arange(datasetLength) -classificationIndices = execution.genSplits(statsIter, indices, DATASET, args.CL_split, statsIterRandomStates) + kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates) -kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates) + datasetFiles = Dataset.initMultipleDatasets(args, nbCores) -datasetFiles = Dataset.initMultipleDatasets(args, nbCores) + views, viewsIndices, allViews = execution.initViews(DATASET, args) + if not views: + raise ValueError, "Empty views list, modify selected views to match dataset " + args.views -views, viewsIndices, allViews = execution.initViews(DATASET, args) -if not views: - raise ValueError, "Empty views list, modify selected views to match dataset " + args.views + NB_VIEW = len(views) + NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] -NB_VIEW = len(views) -NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] + metrics = [metric.split(":") for metric in args.CL_metrics] + if metrics == [[""]]: + metricsNames = [name for _, name, isPackage + in pkgutil.iter_modules(['Metrics']) if not isPackage and name != "log_loss"] + metrics = [[metricName] for metricName in metricsNames] + metrics = arangeMetrics(metrics, args.CL_metric_princ) + for metricIndex, metric in enumerate(metrics): + if len(metric) == 1: + metrics[metricIndex] = [metric[0], None] -metrics = [metric.split(":") for metric in args.CL_metrics] -if metrics == [[""]]: - metricsNames = [name for _, name, isPackage - in pkgutil.iter_modules(['Metrics']) if not isPackage and name != "log_loss"] - metrics = [[metricName] for metricName in metricsNames] - metrics = arangeMetrics(metrics, args.CL_metric_princ) -for metricIndex, metric in enumerate(metrics): - if len(metric) == 1: - metrics[metricIndex] = [metric[0], None] + logging.info("Start:\t Finding all available mono- & multiview algorithms") -logging.info("Start:\t Finding all available mono- & multiview algorithms") + benchmark = initBenchmark(args) -benchmark = initBenchmark(args) + initKWARGS = initKWARGSFunc(args, benchmark) -initKWARGS = initKWARGS(args, benchmark) + dataBaseTime = time.time() - start -dataBaseTime = time.time() - start + argumentDictionaries = {"Monoview": [], "Multiview": []} + argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS, + initKWARGS) + directories = execution.genDirecortiesNames(directory, statsIter) -argumentDictionaries = {"Monoview": [], "Multiview": []} -argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS, - initKWARGS) -directories = execution.genDirecortiesNames(directory, statsIter) + if statsIter > 1: + for statIterIndex in range(statsIter): + if not os.path.exists(os.path.dirname(directories[statIterIndex] + "train_labels.csv")): + try: + os.makedirs(os.path.dirname(directories[statIterIndex] + "train_labels.csv")) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + trainIndices, testIndices = classificationIndices[statIterIndex] + trainLabels = DATASET.get("Labels").value[trainIndices] + np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",") + if nbCores > 1: + iterResults = [] + nbExperiments = statsIter + for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): + iterResults += (Parallel(n_jobs=nbCores)( + delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1, + directories[coreIndex + stepIndex * nbCores], args, + classificationIndices[coreIndex + stepIndex * nbCores], + kFolds[coreIndex + stepIndex * nbCores], + statsIterRandomStates[coreIndex + stepIndex * nbCores], + hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, + start, benchmark, + views) + for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) + logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") + datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) + logging.debug("Start:\t Deleting datasets for multiprocessing") + else: + iterResults = [] + for iterIndex in range(statsIter): + if not os.path.exists(os.path.dirname(directories[iterIndex] + "train_labels.csv")): + try: + os.makedirs(os.path.dirname(directories[iterIndex] + "train_labels.csv")) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + trainIndices, testIndices = classificationIndices[iterIndex] + trainLabels = DATASET.get("Labels").value[trainIndices] + np.savetxt(directories[iterIndex] + "train_labels.csv", trainLabels, delimiter=",") + iterResults.append( + classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args, + classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex], + hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark, + views)) + analyzeIterResults(iterResults, args.name, metrics, directory) -if statsIter > 1: - for statIterIndex in range(statsIter): - if not os.path.exists(os.path.dirname(directories[statIterIndex] + "train_labels.csv")): + else: + if not os.path.exists(os.path.dirname(directories + "train_labels.csv")): try: - os.makedirs(os.path.dirname(directories[statIterIndex] + "train_labels.csv")) + os.makedirs(os.path.dirname(directories + "train_labels.csv")) except OSError as exc: if exc.errno != errno.EEXIST: raise - trainIndices, testIndices = classificationIndices[statIterIndex] + trainIndices, testIndices = classificationIndices trainLabels = DATASET.get("Labels").value[trainIndices] - np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",") - if nbCores > 1: - iterResults = [] - nbExperiments = statsIter - for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): - iterResults += (Parallel(n_jobs=nbCores)( - delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1, - directories[coreIndex + stepIndex * nbCores], args, - classificationIndices[coreIndex + stepIndex * nbCores], - kFolds[coreIndex + stepIndex * nbCores], - statsIterRandomStates[coreIndex + stepIndex * nbCores], - hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, - start, benchmark, - views) - for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) - logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") - datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) - logging.debug("Start:\t Deleting datasets for multiprocessing") - else: - iterResults = [] - for iterIndex in range(statsIter): - if not os.path.exists(os.path.dirname(directories[iterIndex] + "train_labels.csv")): - try: - os.makedirs(os.path.dirname(directories[iterIndex] + "train_labels.csv")) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - trainIndices, testIndices = classificationIndices[iterIndex] - trainLabels = DATASET.get("Labels").value[trainIndices] - np.savetxt(directories[iterIndex] + "train_labels.csv", trainLabels, delimiter=",") - iterResults.append( - classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args, - classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex], - hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark, - views)) - analyzeIterResults(iterResults, args.name, metrics, directory) - -else: - if not os.path.exists(os.path.dirname(directories + "train_labels.csv")): - try: - os.makedirs(os.path.dirname(directories + "train_labels.csv")) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - trainIndices, testIndices = classificationIndices - trainLabels = DATASET.get("Labels").value[trainIndices] - np.savetxt(directories + "train_labels.csv", trainLabels, delimiter=",") - res = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, - kFolds, - statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, - benchmark, views) - -if statsIter > 1: - pass + np.savetxt(directories + "train_labels.csv", trainLabels, delimiter=",") + res = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, + kFolds, + statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, + benchmark, views) + + if statsIter > 1: + pass