diff --git a/config_files/config.ini b/config_files/config.ini index 29b0823c8a3aead18d720d3f8c086bb8979e7915..027ca27a52ab8706682e1dd4770a103aa8cce11f 100644 --- a/config_files/config.ini +++ b/config_files/config.ini @@ -27,7 +27,7 @@ type = list_str ; Monoview Multiview algos_monoview = list_str ; all algos_multiview = list_str ; all statsiter = int ; 2 -metrics = list_str ; all +metrics = list_str ; accuracy_score f1_score metric_princ = str ; f1_score HPS_type = str ; randomizedSearch HPS_iter = int ; 2 @@ -62,6 +62,11 @@ n_estimators = list_int ; 50 base_estimator = list_str ; DecisionTreeClassifier n_stumps = list_int ; 1 +[AdaboostPregen10] +n_estimators = list_int ; 50 +base_estimator = list_str ; DecisionTreeClassifier +n_stumps = list_int ; 1 + [AdaboostGraalpy] n_iterations = list_int ; 50 n_stumps = list_int ; 1 @@ -108,6 +113,30 @@ n_stumps = list_int ; 1 n_max_iterations = list_int ; 10 n_stumps = list_int ; 1 +[CGDesc10] +n_max_iterations = list_int ; 10 +n_stumps = list_int ; 1 + +[CGreed] +n_max_iterations = list_int ; 10 +n_stumps = list_int ; 1 + +[QarBoost] +n_max_iterations = list_int ; 10 +n_stumps = list_int ; 1 + +[QarBoostNC3] +n_max_iterations = list_int ; 10 +n_stumps = list_int ; 1 + +[QarBoostv2] +n_max_iterations = list_int ; 10 +n_stumps = list_int ; 1 + +[QarBoostv3] +n_max_iterations = list_int ; 10 +n_stumps = list_int ; 1 + [CBBoost] n_max_iterations = list_int ; 10 n_stumps = list_int ; 1 diff --git a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py index 25b3e5acc143f602b86313651a97649838828eb9..4559834641ffd30f1e6931d031159481e63d4b23 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py +++ b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py @@ -232,14 +232,16 @@ def initMonoviewKWARGS(args, classifiersNames): monoviewKWARGS = {} for classifiersName in classifiersNames: try: - classifierModule = getattr(MonoviewClassifiers, classifiersName) + getattr(MonoviewClassifiers, classifiersName) except AttributeError: raise AttributeError( classifiersName + " is not implemented in MonoviewClassifiers, " "please specify the name of the file in MonoviewClassifiers") - monoviewKWARGS[ - classifiersName + "KWARGSInit"] = classifierModule.formatCmdArgs( - args) + try: + monoviewKWARGS[ + classifiersName + "KWARGSInit"] = args[classifiersName] + except KeyError: + raise KeyError("No configuration was provided for "+classifiersName+", please add it's configuration to the .ini file") logging.debug("Done:\t Initializing Monoview classifiers arguments") return monoviewKWARGS @@ -336,13 +338,13 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, logging.debug("Start:\t Monoview benchmark") resultsMonoview += [ - ExecMonoview_multicore(directory, args.name, labelsNames, + ExecMonoview_multicore(directory, args["Base"]["name"], labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, randomState, + coreIndex, args["Base"]["type"], args["Base"]["pathf"], randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args.CL_HPS_iter, **argument) + nIter=args["Classification"]["hps_iter"], **argument) for argument in argumentDictionaries["Monoview"]] logging.debug("Done:\t Monoview benchmark") @@ -357,11 +359,11 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, logging.debug("Start:\t Multiview benchmark") resultsMultiview = [ - ExecMultiview_multicore(directory, coreIndex, args.name, - classificationIndices, kFolds, args.type, - args.pathF, LABELS_DICTIONARY, randomState, + ExecMultiview_multicore(directory, coreIndex, args["Base"]["name"], + classificationIndices, kFolds, args["Base"]["type"], + args["Base"]["pathf"], LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_HPS_iter, + metrics=metrics, nIter=args["Classification"]["hps_iter"], **arguments) for arguments in argumentDictionaries["Multiview"]] logging.debug("Done:\t Multiview benchmark") @@ -392,13 +394,13 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) for stepIndex in range(nbMulticoreToDo): resultsMonoview += (Parallel(n_jobs=nbCores)( - delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, + delayed(ExecMonoview_multicore)(directory, args["Base"]["name"], labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, + coreIndex, args["Base"]["type"], args["Base"]["pathf"], randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args.CL_HPS_iter, + nIter=args["Classification"]["hps_iter"], **argumentDictionaries["Monoview"][ coreIndex + stepIndex * nbCores]) for coreIndex in @@ -420,14 +422,14 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) for stepIndex in range(nbMulticoreToDo): resultsMultiview += Parallel(n_jobs=nbCores)( - delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, + delayed(ExecMultiview_multicore)(directory, coreIndex, args["Base"]["name"], classificationIndices, kFolds, - args.type, args.pathF, + args["Base"]["type"], args["Base"]["pathf"], LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args.CL_HPS_iter, + nIter=args["Classification"]["hps_iter"], ** argumentDictionaries["Multiview"][ stepIndex * nbCores + coreIndex]) @@ -457,11 +459,11 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, X = DATASET.get("View" + str(arguments["viewIndex"])) Y = labels resultsMonoview += [ - ExecMonoview(directory, X, Y, args.name, labelsNames, + ExecMonoview(directory, X, Y, args["Base"]["name"], labelsNames, classificationIndices, kFolds, - 1, args.type, args.pathF, randomState, + 1, args["Base"]["type"], args["Base"]["pathf"], randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args.CL_HPS_iter, **arguments)] + nIter=args["Classification"]["hps_iter"], **arguments)] logging.debug("Done:\t Monoview benchmark") logging.debug("Start:\t Multiview arguments initialization") @@ -478,11 +480,11 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, resultsMultiview = [] for arguments in argumentDictionaries["Multiview"]: resultsMultiview += [ - ExecMultiview(directory, DATASET, args.name, classificationIndices, - kFolds, 1, args.type, - args.pathF, LABELS_DICTIONARY, randomState, labels, + ExecMultiview(directory, DATASET, args["Base"]["name"], classificationIndices, + kFolds, 1, args["Base"]["type"], + args["Base"]["pathf"], LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_HPS_iter, **arguments)] + metrics=metrics, nIter=args["Classification"]["hps_iter"], **arguments)] logging.debug("Done:\t Multiview benchmark") return [flag, resultsMonoview + resultsMultiview] @@ -633,11 +635,12 @@ def execClassif(arguments): views, viewsIndices, allViews = execution.initViews(DATASET, args["Base"]["views"]) viewsDictionary = genViewsDictionnary(DATASET, views) + print(viewsDictionary) nbViews = len(views) NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] metrics = [metric.split(":") for metric in args["Classification"]["metrics"]] - if metrics == [[""]]: + if metrics == [["all"]]: metricsNames = [name for _, name, isPackage in pkgutil.iter_modules( ['./MonoMultiViewClassifiers/Metrics']) if @@ -664,7 +667,6 @@ def execClassif(arguments): argumentDictionaries, benchmark, nbViews, views, viewsIndices) nbMulticlass = len(labelsCombinations) - results_mean_stds = execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentDictionaries, splits, directories, directory, multiclassLabels, metrics, LABELS_DICTIONARY, diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index 0e6890857c8e9b56801acf2d759f1f9ccb00bb32..27f9d69d23e22c2ccef05fdbda05c18031e13a37 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -228,132 +228,132 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, imagesAnalysis[imageName].savefig( outputFileName + imageName + '.png', transparent=True) - -if __name__ == '__main__': - """The goal of this part of the module is to be able to execute a monoview experimentation - on a node of a cluster independently. - So one need to fill in all the ExecMonoview function arguments with the parse arg function - It could be a good idea to use pickle to store all the 'simple' args in order to reload them easily""" - import argparse - import pickle - - from ..utils import Dataset - - parser = argparse.ArgumentParser( - description='This methods is used to execute a multiclass classification with one single view. ', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - groupStandard = parser.add_argument_group('Standard arguments') - groupStandard.add_argument('-log', action='store_true', - help='Use option to activate Logging to Console') - groupStandard.add_argument('--name', metavar='STRING', action='store', - help='Name of Database', default='Plausible') - groupStandard.add_argument('--cl_name', metavar='STRING', action='store', - help='THe name of the monoview classifier to use', - default='DecisionTree') - groupStandard.add_argument('--view', metavar='STRING', action='store', - help='Name of the view used', default='View0') - groupStandard.add_argument('--pathF', metavar='STRING', action='store', - help='Path to the database hdf5 file', - default='../../../Data/Plausible') - groupStandard.add_argument('--directory', metavar='STRING', action='store', - help='Path of the output directory', default='') - groupStandard.add_argument('--labelsNames', metavar='STRING', - action='store', nargs='+', - help='Name of the labels used for classification', - default=['Yes', 'No']) - groupStandard.add_argument('--classificationIndices', metavar='STRING', - action='store', - help='Path to the classificationIndices pickle file', - default='') - groupStandard.add_argument('--KFolds', metavar='STRING', action='store', - help='Path to the kFolds pickle file', - default='') - groupStandard.add_argument('--nbCores', metavar='INT', action='store', - help='Number of cores, -1 for all', - type=int, default=1) - groupStandard.add_argument('--randomState', metavar='INT', action='store', - help='Seed for the random state or pickable randomstate file', - default=42) - groupStandard.add_argument('--hyperParamSearch', metavar='STRING', - action='store', - help='The type of method used to search the best set of hyper parameters', - default='randomizedSearch') - groupStandard.add_argument('--metrics', metavar='STRING', action='store', - help='Path to the pickle file describing the metricsused to analyze the performance', - default='') - groupStandard.add_argument('--kwargs', metavar='STRING', action='store', - help='Path to the pickle file containing the key-words arguments used for classification', - default='') - groupStandard.add_argument('--nIter', metavar='INT', action='store', - help='Number of itetarion in hyper parameter search', - type=int, - default=10) - - args = parser.parse_args() - - directory = args.directory - name = args.name - classifierName = args.cl_name - labelsNames = args.labelsNames - viewName = args.view - with open(args.classificationIndices, 'rb') as handle: - classificationIndices = pickle.load(handle) - with open(args.KFolds, 'rb') as handle: - KFolds = pickle.load(handle) - nbCores = args.nbCores - path = args.pathF - with open(args.randomState, 'rb') as handle: - randomState = pickle.load(handle) - hyperParamSearch = args.hyperParamSearch - with open(args.metrics, 'rb') as handle: - metrics = pickle.load(handle) - nIter = args.nIter - with open(args.kwargs, 'rb') as handle: - kwargs = pickle.load(handle) - - databaseType = None - - # Extract the data using MPI - X, Y = Dataset.getMonoviewShared(path, name, viewName) - - # Init log - logFileName = time.strftime( - "%Y_%m_%d-%H_%M_%S") + "-" + name + "-" + viewName + "-" + classifierName + '-LOG' - if not os.path.exists(os.path.dirname(directory + logFileName)): - try: - os.makedirs(os.path.dirname(directory + logFileName)) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - logFile = directory + logFileName - if os.path.isfile(logFile + ".log"): - for i in range(1, 20): - testFileName = logFileName + "-" + str(i) + ".log" - if not (os.path.isfile(directory + testFileName)): - logFile = directory + testFileName - break - else: - logFile += ".log" - logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', - filename=logFile, level=logging.DEBUG, - filemode='w') - if args.log: - logging.getLogger().addHandler(logging.StreamHandler()) - - # Computing on multiple cores - res = ExecMonoview(directory, X, Y, name, labelsNames, - classificationIndices, KFolds, nbCores, databaseType, - path, - randomState, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=nIter, **kwargs) - - with open(directory + "res.pickle", "wb") as handle: - pickle.dump(res, handle) - - # Pickle the res in a file to be reused. - # Go put a token in the token files without breaking everything. - - # Need to write a function to be able to know the timeu sed - # for a monoview experimentation approximately and the ressource it uses to write automatically the file in the shell - # it will have to be a not-too close approx as the taskswont be long and Ram-o-phage +# +# if __name__ == '__main__': +# """The goal of this part of the module is to be able to execute a monoview experimentation +# on a node of a cluster independently. +# So one need to fill in all the ExecMonoview function arguments with the parse arg function +# It could be a good idea to use pickle to store all the 'simple' args in order to reload them easily""" +# import argparse +# import pickle +# +# from ..utils import Dataset +# +# parser = argparse.ArgumentParser( +# description='This methods is used to execute a multiclass classification with one single view. ', +# formatter_class=argparse.ArgumentDefaultsHelpFormatter) +# +# groupStandard = parser.add_argument_group('Standard arguments') +# groupStandard.add_argument('-log', action='store_true', +# help='Use option to activate Logging to Console') +# groupStandard.add_argument('--name', metavar='STRING', action='store', +# help='Name of Database', default='Plausible') +# groupStandard.add_argument('--cl_name', metavar='STRING', action='store', +# help='THe name of the monoview classifier to use', +# default='DecisionTree') +# groupStandard.add_argument('--view', metavar='STRING', action='store', +# help='Name of the view used', default='View0') +# groupStandard.add_argument('--pathF', metavar='STRING', action='store', +# help='Path to the database hdf5 file', +# default='../../../Data/Plausible') +# groupStandard.add_argument('--directory', metavar='STRING', action='store', +# help='Path of the output directory', default='') +# groupStandard.add_argument('--labelsNames', metavar='STRING', +# action='store', nargs='+', +# help='Name of the labels used for classification', +# default=['Yes', 'No']) +# groupStandard.add_argument('--classificationIndices', metavar='STRING', +# action='store', +# help='Path to the classificationIndices pickle file', +# default='') +# groupStandard.add_argument('--KFolds', metavar='STRING', action='store', +# help='Path to the kFolds pickle file', +# default='') +# groupStandard.add_argument('--nbCores', metavar='INT', action='store', +# help='Number of cores, -1 for all', +# type=int, default=1) +# groupStandard.add_argument('--randomState', metavar='INT', action='store', +# help='Seed for the random state or pickable randomstate file', +# default=42) +# groupStandard.add_argument('--hyperParamSearch', metavar='STRING', +# action='store', +# help='The type of method used to search the best set of hyper parameters', +# default='randomizedSearch') +# groupStandard.add_argument('--metrics', metavar='STRING', action='store', +# help='Path to the pickle file describing the metricsused to analyze the performance', +# default='') +# groupStandard.add_argument('--kwargs', metavar='STRING', action='store', +# help='Path to the pickle file containing the key-words arguments used for classification', +# default='') +# groupStandard.add_argument('--nIter', metavar='INT', action='store', +# help='Number of itetarion in hyper parameter search', +# type=int, +# default=10) +# +# args = parser.parse_args() +# +# directory = args.directory +# name = args.name +# classifierName = args.cl_name +# labelsNames = args.labelsNames +# viewName = args.view +# with open(args.classificationIndices, 'rb') as handle: +# classificationIndices = pickle.load(handle) +# with open(args.KFolds, 'rb') as handle: +# KFolds = pickle.load(handle) +# nbCores = args.nbCores +# path = args.pathF +# with open(args.randomState, 'rb') as handle: +# randomState = pickle.load(handle) +# hyperParamSearch = args.hyperParamSearch +# with open(args.metrics, 'rb') as handle: +# metrics = pickle.load(handle) +# nIter = args.nIter +# with open(args.kwargs, 'rb') as handle: +# kwargs = pickle.load(handle) +# +# databaseType = None +# +# # Extract the data using MPI +# X, Y = Dataset.getMonoviewShared(path, name, viewName) +# +# # Init log +# logFileName = time.strftime( +# "%Y_%m_%d-%H_%M_%S") + "-" + name + "-" + viewName + "-" + classifierName + '-LOG' +# if not os.path.exists(os.path.dirname(directory + logFileName)): +# try: +# os.makedirs(os.path.dirname(directory + logFileName)) +# except OSError as exc: +# if exc.errno != errno.EEXIST: +# raise +# logFile = directory + logFileName +# if os.path.isfile(logFile + ".log"): +# for i in range(1, 20): +# testFileName = logFileName + "-" + str(i) + ".log" +# if not (os.path.isfile(directory + testFileName)): +# logFile = directory + testFileName +# break +# else: +# logFile += ".log" +# logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', +# filename=logFile, level=logging.DEBUG, +# filemode='w') +# if args.log: +# logging.getLogger().addHandler(logging.StreamHandler()) +# +# # Computing on multiple cores +# res = ExecMonoview(directory, X, Y, name, labelsNames, +# classificationIndices, KFolds, nbCores, databaseType, +# path, +# randomState, hyperParamSearch=hyperParamSearch, +# metrics=metrics, nIter=nIter, **kwargs) +# +# with open(directory + "res.pickle", "wb") as handle: +# pickle.dump(res, handle) +# +# # Pickle the res in a file to be reused. +# # Go put a token in the token files without breaking everything. +# +# # Need to write a function to be able to know the timeu sed +# # for a monoview experimentation approximately and the ressource it uses to write automatically the file in the shell +# # it will have to be a not-too close approx as the taskswont be long and Ram-o-phage diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py index 668ff11bf85654f458e998ef66f9ed2c28ad2ef4..5821bf0261b618ba2d945c3e20a20e70bce25202 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py @@ -68,7 +68,7 @@ def deleteHDF5(benchmarkArgumentsDictionaries, nbCores, DATASET): logging.debug("Start:\t Deleting datasets for multiprocessing") for coreIndex in range(nbCores): - os.remove(args.pathF + args.name + str(coreIndex) + ".hdf5") + os.remove(args["Base"]["pathf"] + args["Base"]["name"] + str(coreIndex) + ".hdf5") filename = DATASET.filename DATASET.close() if "_temp_" in filename: diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index 06305d19c4c67016be62f16b0b34b64332fa7381..d297a232a1d6091734a8c90818cde9978b6b86b0 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -911,8 +911,7 @@ def initViews(DATASET, argViews): Names of all the available views in the dataset. """ NB_VIEW = DATASET.get("Metadata").attrs["nbView"] - print(NB_VIEW) - if argViews != [""]: + if argViews != ["all"]: allowedViews = argViews allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) if type(