Skip to content
Snippets Groups Projects
Commit fb8b2fe4 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Some doc"

parent 8b9094c9
No related branches found
No related tags found
No related merge requests found
# The base configuration of the benchmark
Base :
log: true
# Enable logging
log: True
# The name of each dataset in the directory on which the benchmark should be run
name: ["plausible"]
# A label for the resul directory
label: "_"
# The type of dataset, currently supported ".hdf5", and ".csv"
type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored
pathf: "../data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: False
debug: True
# Used to be able to run more than one benchmark per minute
debug: False
# To add noise to the data, will add gaussian noise with noise_std
add_noise: False
noise_std: 0.0
# The directory in which the results will be stored
res_dir: "../results/"
# All the classification-realted configuration options
Classification:
# If the dataset is multiclass, will use this multiclass-to-biclass method
multiclass_method: "oneVersusOne"
# The ratio number of test exmaples/number of train examples
split: 0.8
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 2
# The number of classes to select in the dataset
nb_class: 2
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["all"]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["all"]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 2
# The metrics that will be use din the result analysis
metrics: ["accuracy_score", "f1_score"]
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "f1_score"
# The type of hyper-parameter optimization method
hps_type: "randomized_search"
# The number of iteration in the hyper-parameter optimization process
hps_iter: 2
# The following arguments are classifier-specific, and are documented in each
# of the corresponding modules.
# In order to run multiple sets of parameters, use multiple values in the
# following lists, and set hps_type to None.
#####################################
# The Monoview Classifier arguments #
#####################################
random_forest:
n_estimators: [25]
max_depth: [3]
......
......@@ -57,9 +57,7 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args):
Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark.
"""
benchmark = {"monoview": {}, "multiview": {}}
all_multiview_packages = [name for _, name, isPackage
in pkgutil.iter_modules(
['./mono_multi_view_classifiers/multiview_classifiers/']) if isPackage]
if "monoview" in cl_type:
if monoview_algos == ['all']:
......@@ -82,34 +80,6 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args):
return benchmark
# def gen_views_dictionnary(dataset_var, views):
# r"""Used to generate a dictionary mapping a view name (key) to it's index in the dataset (value).
#
# Parameters
# ----------
# dataset_var : `h5py` dataset file
# The full dataset on which the benchmark will be done
# views : List of strings
# Names of the selected views on which the banchmark will be done
#
# Returns
# -------
# viewDictionary : Dictionary
# Dictionary mapping the view names totheir indexin the full dataset.
# """
# datasets_names = dataset_var.get_view_dict().keys()
# views_dictionary = {}
# for dataset_name in datasets_names:
# if dataset_name[:4] == "View":
# view_name = dataset_var.get(dataset_name).attrs["name"]
# if type(view_name) == bytes:
# view_name = view_name.decode("utf-8")
# if view_name in views:
# views_dictionary[view_name] = int(dataset_name[4:])
#
# return views_dictionary
def init_argument_dictionaries(benchmark, views_dictionary,
nb_class, init_kwargs):
argument_dictionaries = {"monoview": [], "multiview": []}
......@@ -263,6 +233,17 @@ def get_path_dict(multiview_classifier_args):
def is_dict_in(dictionary):
"""
Returns True if any of the dictionary value is a dictionary itself.
Parameters
----------
dictionary
Returns
-------
"""
paths = []
for key, value in dictionary.items():
if isinstance(value, dict):
......@@ -271,6 +252,24 @@ def is_dict_in(dictionary):
def gen_multiple_kwargs_combinations(cl_kwrags):
"""
Generates all the possible combination of the asked args
Parameters
----------
cl_kwrags : dict
The arguments, with one at least having multiple values
Returns
-------
kwargs_combination : list
The list of all the combinations of arguments
reduced_kwargs_combination : list
The reduced names and values of the arguments will be used in the naming
process of the different classifiers
"""
values = list(cl_kwrags.values())
listed_values = [[_] if type(_) is not list else _ for _ in values]
values_cartesian_prod = [_ for _ in itertools.product(*listed_values)]
......@@ -292,6 +291,39 @@ def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier,
view_name=None, view_index=None,
views_dictionary=None,
framework="monoview"):
"""
Used in the case of mutliple arguments asked in the config file.
Will combine the arguments to explore all the possibilities.
Parameters
----------
nb_class : int,
The number of classes in the dataset
kwargs_init : dict
The arguments given in the config file
classifier : str
The name of the classifier for which multiple arguments have been asked
view_name : str
The name of the view in consideration.
view_index : int
The index of the view in consideration
views_dictionary : dict
The dictionary of all the views indices and their names
framework : str
Either monoview or multiview
Returns
-------
args_dictionaries : list
The list of all the possible combination of asked arguments
"""
if framework=="multiview":
classifier_config = get_path_dict(kwargs_init[classifier])
else:
......@@ -322,12 +354,12 @@ def init_kwargs(args, classifiers_names, framework="monoview"):
----------
args : parsed args objects
All the args passed by the user.
classifiers-names : list of strings
classifiers_names : list of strings
List of the benchmarks's monoview classifiers names.
Returns
-------
monoviewKWARGS : Dictionary of dictionaries
kwargs : Dictionary
Dictionary resuming all the specific arguments for the benchmark, one dictionary for each classifier.
For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`"""
......@@ -351,7 +383,25 @@ def init_kwargs(args, classifiers_names, framework="monoview"):
def init_kwargs_func(args, benchmark):
monoview_kwargs = init_kwargs(args, benchmark["monoview"])
"""
Dispached the kwargs initialization to monoview and multiview and creates
the kwargs variable
Parameters
----------
args : parsed args objects
All the args passed by the user.
benchmark : dict
The name of the mono- and mutli-view classifiers to run in the benchmark
Returns
-------
kwargs : dict
The arguments for each mono- and multiview algorithms
"""
monoview_kwargs = init_kwargs(args, benchmark["monoview"], framework="monoview")
multiview_kwargs = init_kwargs(args, benchmark["multiview"], framework="multiview")
kwargs = {"monoview":monoview_kwargs, "multiview":multiview_kwargs}
return kwargs
......@@ -373,31 +423,45 @@ def init_kwargs_func(args, benchmark):
# return multiview_kwargs
def init_multiview_arguments(args, benchmark, views, views_indices,
argument_dictionaries, random_state, directory,
results_monoview, classification_indices):
"""Used to add each monoview exeperience args to the list of monoview experiences args"""
logging.debug("Start:\t Initializing multiview classifiers arguments")
multiview_arguments = []
if "multiview" in benchmark:
for multiview_algo_name in benchmark["multiview"]:
mutliview_module = getattr(multiview_classifiers,
multiview_algo_name)
multiview_arguments += mutliview_module.getArgs(args, benchmark,
views, views_indices,
random_state,
directory,
results_monoview,
classification_indices)
argument_dictionaries["multiview"] = multiview_arguments
logging.debug("Start:\t Initializing multiview classifiers arguments")
return argument_dictionaries
# def init_multiview_arguments(args, benchmark, views, views_indices,
# argument_dictionaries, random_state, directory,
# results_monoview, classification_indices):
# """Used to add each monoview exeperience args to the list of monoview experiences args"""
# logging.debug("Start:\t Initializing multiview classifiers arguments")
# multiview_arguments = []
# if "multiview" in benchmark:
# for multiview_algo_name in benchmark["multiview"]:
# mutliview_module = getattr(multiview_classifiers,
# multiview_algo_name)
#
# multiview_arguments += mutliview_module.getArgs(args, benchmark,
# views, views_indices,
# random_state,
# directory,
# results_monoview,
# classification_indices)
# argument_dictionaries["multiview"] = multiview_arguments
# logging.debug("Start:\t Initializing multiview classifiers arguments")
# return argument_dictionaries
def arange_metrics(metrics, metric_princ):
"""Used to get the metrics list in the right order so that
the first one is the principal metric specified in args"""
the first one is the principal metric specified in args
Parameters
----------
metrics : list of lists
The metrics that will be used in the benchmark
metric_princ : str
The name of the metric that need to be used for the hyper-parameter
optimization process
Returns
-------
metrics : list of lists
The metrics list, but arranged so the first one is the principal one."""
if [metric_princ] in metrics:
metric_index = metrics.index([metric_princ])
first_metric = metrics[0]
......@@ -410,6 +474,31 @@ def arange_metrics(metrics, metric_princ):
def benchmark_init(directory, classification_indices, labels, labels_dictionary,
k_folds):
"""
Initializes the benchmark, by saving the indices of the train
examples and the cross validation folds.
Parameters
----------
directory : str
The benchmark's result directory
classification_indices : numpy array
The indices of the examples, splitted for the train/test split
labels : numpy array
The labels of the dataset
labels_dictionary : dict
The dictionary with labels as keys and their names as values
k_folds : sklearn.model_selection.Folds object
The folds for the cross validation process
Returns
-------
"""
logging.debug("Start:\t Benchmark initialization")
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
......@@ -448,8 +537,7 @@ def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None,
benchmark=None, views=None, views_indices=None, flag=None,
labels=None,
exec_monoview_multicore=exec_monoview_multicore,
exec_multiview_multicore=exec_multiview_multicore,
init_multiview_arguments=init_multiview_arguments):
exec_multiview_multicore=exec_multiview_multicore,):
"""Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
exec_multiview_multicore args are only used for tests"""
......@@ -469,14 +557,6 @@ def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None,
for argument in argument_dictionaries["Monoview"]]
logging.debug("Done:\t monoview benchmark")
logging.debug("Start:\t multiview arguments initialization")
# argument_dictionaries = initMultiviewArguments(args, benchmark, views,
# views_indices,
# argument_dictionaries,
# random_state, directory,
# resultsMonoview,
# classification_indices)
logging.debug("Done:\t multiview arguments initialization")
logging.debug("Start:\t multiview benchmark")
results_multiview = [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment