diff --git a/Code/MonoMultiViewClassifiers/ExecClassif.py b/Code/MonoMultiViewClassifiers/ExecClassif.py index c0c311984fd2b2cadf22b6b926fdde829558810a..e7e6ca9d4d40272a181d1cfcf6d9a0aa3a5ae48d 100644 --- a/Code/MonoMultiViewClassifiers/ExecClassif.py +++ b/Code/MonoMultiViewClassifiers/ExecClassif.py @@ -167,7 +167,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, coreIndex, args.type, args.pathF, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args.CL_GS_iter, **argument) + nIter=args.CL_HPS_iter, **argument) for argument in argumentDictionaries["Monoview"]] logging.debug("Done:\t Monoview benchmark") @@ -181,7 +181,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class resultsMultiview += [ ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter, **arguments) + metrics=metrics, nIter=args.CL_HPS_iter, **arguments) for arguments in argumentDictionaries["Multiview"]] logging.debug("Done:\t Multiview benchmark") @@ -220,7 +220,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds, coreIndex, args.type, args.pathF, randomState, labels, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter, + metrics=metrics, nIter=args.CL_HPS_iter, **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) logging.debug("Done:\t Monoview benchmark") @@ -238,7 +238,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non resultsMultiview += Parallel(n_jobs=nbCores)( delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, args.pathF, LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, + hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_HPS_iter, **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) logging.debug("Done:\t Multiview benchmark") @@ -280,7 +280,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non resultsMonoview += [ExecMonoview(directory, X, Y, args.name, labelsNames, classificationIndices, kFolds, 1, args.type, args.pathF, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args.CL_GS_iter, **arguments)] + nIter=args.CL_HPS_iter, **arguments)] logging.debug("Done:\t Monoview benchmark") logging.debug("Start:\t Multiview arguments initialization") @@ -294,7 +294,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non resultsMultiview += [ ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_GS_iter, **arguments)] + metrics=metrics, nIter=args.CL_HPS_iter, **arguments)] logging.debug("Done:\t Multiview benchmark") return [flag, resultsMonoview, resultsMultiview] @@ -444,7 +444,7 @@ def execClassif(arguments): # resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, # coreIndex, args.type, args.pathF, randomState, # hyperParamSearch=hyperParamSearch, -# metrics=metrics, nIter=args.CL_GS_iter, +# metrics=metrics, nIter=args.CL_HPS_iter, # **arguments) # for arguments in argumentDictionaries["Monoview"]] # monoviewTime = time.time() - dataBaseTime - start @@ -456,7 +456,7 @@ def execClassif(arguments): # resultsMultiview += [ # ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, # args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, -# metrics=metrics, nIter=args.CL_GS_iter, **arguments) +# metrics=metrics, nIter=args.CL_HPS_iter, **arguments) # for arguments in argumentDictionaries["Multiview"]] # multiviewTime = time.time() - monoviewTime - dataBaseTime - start # @@ -501,14 +501,14 @@ def execClassif(arguments): # args.type, # args.pathF, # LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, -# metrics=metrics, nIter=args.CL_GS_iter, +# metrics=metrics, nIter=args.CL_HPS_iter, # **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) # for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) # else: # resultsMultiview = [ # ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF, # LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, -# metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in +# metrics=metrics, nIter=args.CL_HPS_iter, **arguments) for arguments in # argumentDictionaries["Multiview"]] # multiviewTime = time.time() - monoviewTime - dataBaseTime - start # if nbCores > 1: diff --git a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index fc0102a0aa970d2398ac9281a6a6fc0ffc5a7c05..4f6d844a5d69d32d53b81d6fc4576df49fec9809 100644 --- a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -196,85 +196,106 @@ if __name__ == '__main__': import argparse import pickle + from ..utils import Dataset + parser = argparse.ArgumentParser( description='This methods is used to execute a multiclass classification with one single view. ', formatter_class=argparse.ArgumentDefaultsHelpFormatter) groupStandard = parser.add_argument_group('Standard arguments') groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') - groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5") groupStandard.add_argument('--name', metavar='STRING', action='store', - help='Name of Database (default: %(default)s)', default='DB') + help='Name of Database', default='Plausible') + groupStandard.add_argument('--cl_name', metavar='STRING', action='store', + help='THe name of the monoview classifier to use', default='DecisionTree') groupStandard.add_argument('--view', metavar='STRING', action='store', - help='Name of Feature for Classification (default: %(default)s)', default='View0') + help='Name of the view used', default='View0') groupStandard.add_argument('--pathF', metavar='STRING', action='store', - help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') + help='Path to the database hdf5 file', default='../../../Data/Plausible') groupStandard.add_argument('--directory', metavar='STRING', action='store', - help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') + help='Path of the output directory', default='') groupStandard.add_argument('--labelsNames', metavar='STRING', action='store', nargs='+', - help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv') + help='Name of the labels used for classification', default=['Yes', 'No']) groupStandard.add_argument('--classificationIndices', metavar='STRING', action='store', - help='Name of classLabels-Description CSV-file (default: %(default)s)', - default='classLabels-Description.csv') - groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, - default=1) + help='Path to the classificationIndices pickle file', + default='') + groupStandard.add_argument('--KFolds', metavar='STRING', action='store', + help='Path to the kFolds pickle file', + default='') + groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all', + type=int, default=1) groupStandard.add_argument('--randomState', metavar='INT', action='store', help='Seed for the random state or pickable randomstate file', default=42) groupStandard.add_argument('--hyperParamSearch', metavar='STRING', action='store', - help='The type of method used tosearch the best set of hyper parameters', default='randomizedSearch') - groupStandard.add_argument('--metrics', metavar='STRING', action='store', nargs="+", - help='Metrics used in the experimentation, the first will be the one used in CV', - default=['']) - groupStandard.add_argument('--nIter', metavar='INT', action='store', help='Number of itetarion in hyper parameter search', type=int, + help='The type of method used to search the best set of hyper parameters', + default='randomizedSearch') + groupStandard.add_argument('--metrics', metavar='STRING', action='store', + help='Path to the pickle file describing the metricsused to analyze the performance', + default='') + groupStandard.add_argument('--kwargs', metavar='STRING', action='store', + help='Path to the pickle file containing the key-words arguments used for classification', + default='') + groupStandard.add_argument('--nIter', metavar='INT', action='store', + help='Number of itetarion in hyper parameter search', type=int, default=10) args = parser.parse_args() directory = args.directory name = args.name + classifierName = args.cl_name labelsNames = args.labelsNames - classificationIndices = args.classificationIndices - KFolds = args.KFolds + viewName = args.view + with open(args.classificationIndices, 'rb') as handle: + classificationIndices = pickle.load(handle) + with open(args.KFolds, 'rb') as handle: + KFolds = pickle.load(handle) nbCores = args.nbCores - databaseType = None path = args.pathF - randomState = args.randomState + with open(args.randomState, 'rb') as handle: + randomState = pickle.load(handle) hyperParamSearch = args.hyperParamSearch - metrics = args.metrics + with open(args.metrics, 'rb') as handle: + metrics = pickle.load(handle) nIter = args.nIter - kwargs = args.kwargs - - # Extract the data using MPI - X = None - Y = None + with open(args.kwargs, 'rb') as handle: + kwargs = pickle.load(handle) - logfilename = "gen a goodlogfilename" + databaseType = None + # Extract the data using MPI + X, Y = Dataset.getMonoviewShared(path, name, viewName) - logfile = directory + logfilename - if os.path.isfile(logfile + ".log"): + # Init log + logFileName = time.strftime("%Y_%m_%d-%H:%M:%S") + "-" + name + "-"+ viewName +"-" + classifierName +'-LOG' + if not os.path.exists(os.path.dirname(directory + logFileName)): + try: + os.makedirs(os.path.dirname(directory + logFileName)) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + logFile = directory + logFileName + if os.path.isfile(logFile + ".log"): for i in range(1, 20): - testFileName = logfilename + "-" + str(i) + ".log" - if not os.path.isfile(directory + testFileName): - logfile = directory + testFileName + testFileName = logFileName + "-" + str(i) + ".log" + if not (os.path.isfile(directory + testFileName)): + logFile = directory + testFileName break else: - logfile += ".log" - - logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG, + logFile += ".log" + logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG, filemode='w') - if args.log: logging.getLogger().addHandler(logging.StreamHandler()) - + # Computing on multiple cores res = ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **kwargs) with open(directory + "res.pickle", "wb") as handle: - pickle.dump(randomState, handle) + pickle.dump(res, handle) # Pickle the res in a file to be reused. diff --git a/Code/MonoMultiViewClassifiers/utils/Dataset.py b/Code/MonoMultiViewClassifiers/utils/Dataset.py index 738136b6453a0395535745936c6d97b6a8709181..c2ab4805eed7d0187625ae58d8d782bc1d5c0592 100644 --- a/Code/MonoMultiViewClassifiers/utils/Dataset.py +++ b/Code/MonoMultiViewClassifiers/utils/Dataset.py @@ -2,7 +2,7 @@ import logging import os import select import sys - +import h5py import numpy as np from scipy import sparse @@ -114,3 +114,10 @@ def input_(timeout=15): else: return "y" + +def getMonoviewShared(path, name, viewName, labelsNames, classificationIndices): + """ATM is not used with shared memory, but soon :)""" + HDF5_dataset_file = h5py.File(path + name + ".hdf5", "w") + X = HDF5_dataset_file.get(viewName).value + Y = HDF5_dataset_file.get("Labels").value + return X, Y \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py index e6cf110737b9ca98b8710ae34d8e1b6a8ee5b0b4..25ae201ffe33260a3d7ad1160a489c6afc68b790 100644 --- a/Code/MonoMultiViewClassifiers/utils/execution.py +++ b/Code/MonoMultiViewClassifiers/utils/execution.py @@ -76,7 +76,7 @@ def parseTheArgs(arguments): , default=['']) groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store', help='Determine which metric to use for randomSearch and optimization', default="f1_score") - groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store', + groupClass.add_argument('--CL_HPS_iter', metavar='INT', action='store', help='Determine how many hyper parameters optimization tests to do', type=int, default=2) groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store', help='Determine which hyperparamter search function use', default="randomizedSearch") @@ -239,7 +239,7 @@ def initRandomState(randomStateArg, directory): def initLogFile(args): """Used to init the directory where the preds will be stored and the log file""" resultDirectory = "../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/" - logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join( + logFileName = time.strftime("%Y_%m_%d-%H:%M:%S") + "-" + ''.join(args.CL_type) + "-" + "_".join( args.views) + "-" + args.name + "-LOG" if not os.path.exists(os.path.dirname(resultDirectory + logFileName)): try: