From fb8b2fe4228cbd3dcb8e2e34156fa6987bd60aa4 Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Fri, 8 Nov 2019 08:21:22 -0500
Subject: [PATCH] Some doc"

---
 config_files/config.yml                       |  38 +++-
 .../exec_classif.py                           | 212 ++++++++++++------
 2 files changed, 182 insertions(+), 68 deletions(-)

diff --git a/config_files/config.yml b/config_files/config.yml
index ba74aca0..b2df8fac 100644
--- a/config_files/config.yml
+++ b/config_files/config.yml
@@ -1,41 +1,75 @@
 # The base configuration of the benchmark
 Base :
-  log: true
+  # Enable logging
+  log: True
+  # The name of each dataset in the directory on which the benchmark should be run
   name: ["plausible"]
+  # A label for the resul directory
   label: "_"
+  # The type of dataset, currently supported ".hdf5", and ".csv"
   type: ".hdf5"
+  # The views to use in the banchmark, an empty value will result in using all the views
   views:
+  # The path to the directory where the datasets are stored
   pathf: "../data/"
+  # The niceness of the processes, useful to lower their priority
   nice: 0
+  # The random state of the benchmark, useful for reproducibility
   random_state: 42
+  # The number of parallel computing threads
   nb_cores: 1
+  # Used to run the benchmark on the full dataset
   full: False
-  debug: True
+  # Used to be able to run more than one benchmark per minute
+  debug: False
+  # To add noise to the data, will add gaussian noise with noise_std
   add_noise: False
   noise_std: 0.0
+  # The directory in which the results will be stored
   res_dir: "../results/"
 
 # All the classification-realted configuration options
 Classification:
+  # If the dataset is multiclass, will use this multiclass-to-biclass method
   multiclass_method: "oneVersusOne"
+  # The ratio number of test exmaples/number of train examples
   split: 0.8
+  # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
   nb_folds: 2
+  # The number of classes to select in the dataset
   nb_class: 2
+  # The name of the classes to select in the dataset
   classes:
+  # The type of algorithms to run during the benchmark (monoview and/or multiview)
   type: ["monoview","multiview"]
+  # The name of the monoview algorithms to run, ["all"] to run all the available classifiers
   algos_monoview: ["all"]
+  # The names of the multiview algorithms to run, ["all"] to run all the available classifiers
   algos_multiview: ["all"]
+  # The number of times the benchamrk is repeated with different train/test
+  # split, to have more statistically significant results
   stats_iter: 2
+  # The metrics that will be use din the result analysis
   metrics: ["accuracy_score", "f1_score"]
+  # The metric that will be used in the hyper-parameter optimization process
   metric_princ: "f1_score"
+  # The type of hyper-parameter optimization method
   hps_type: "randomized_search"
+  # The number of iteration in the hyper-parameter optimization process
   hps_iter: 2
 
 
+# The following arguments are classifier-specific, and are documented in each
+# of the corresponding modules.
+
+# In order to run multiple sets of parameters, use multiple values in the
+# following lists, and set hps_type to None.
+
 #####################################
 # The Monoview Classifier arguments #
 #####################################
 
+
 random_forest:
   n_estimators: [25]
   max_depth: [3]
diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py
index aa342b54..ce66f705 100644
--- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py
+++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py
@@ -57,15 +57,13 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args):
         Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark.
     """
     benchmark = {"monoview": {}, "multiview": {}}
-    all_multiview_packages = [name for _, name, isPackage
-                            in pkgutil.iter_modules(
-            ['./mono_multi_view_classifiers/multiview_classifiers/']) if isPackage]
+
 
     if "monoview" in cl_type:
         if monoview_algos == ['all']:
             benchmark["monoview"] = [name for _, name, isPackage in
                                      pkgutil.iter_modules([
-                                                              "./mono_multi_view_classifiers/monoview_classifiers"])
+                                     "./mono_multi_view_classifiers/monoview_classifiers"])
                                      if not isPackage]
 
         else:
@@ -82,34 +80,6 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args):
     return benchmark
 
 
-# def gen_views_dictionnary(dataset_var, views):
-#     r"""Used to generate a dictionary mapping a view name (key) to it's index in the dataset (value).
-#
-#     Parameters
-#     ----------
-#     dataset_var : `h5py` dataset file
-#         The full dataset on which the benchmark will be done
-#     views : List of strings
-#         Names of the selected views on which the banchmark will be done
-#
-#     Returns
-#     -------
-#     viewDictionary : Dictionary
-#         Dictionary mapping the view names totheir indexin the full dataset.
-#         """
-#     datasets_names = dataset_var.get_view_dict().keys()
-#     views_dictionary = {}
-#     for dataset_name in datasets_names:
-#         if dataset_name[:4] == "View":
-#             view_name = dataset_var.get(dataset_name).attrs["name"]
-#             if type(view_name) == bytes:
-#                 view_name = view_name.decode("utf-8")
-#             if view_name in views:
-#                 views_dictionary[view_name] = int(dataset_name[4:])
-#
-#     return views_dictionary
-
-
 def init_argument_dictionaries(benchmark, views_dictionary,
                                 nb_class, init_kwargs):
     argument_dictionaries = {"monoview": [], "multiview": []}
@@ -263,6 +233,17 @@ def get_path_dict(multiview_classifier_args):
 
 
 def is_dict_in(dictionary):
+    """
+    Returns True if any of the dictionary value is a dictionary itself.
+
+    Parameters
+    ----------
+    dictionary
+
+    Returns
+    -------
+
+    """
     paths = []
     for key, value in dictionary.items():
         if isinstance(value, dict):
@@ -271,6 +252,24 @@ def is_dict_in(dictionary):
 
 
 def gen_multiple_kwargs_combinations(cl_kwrags):
+    """
+    Generates all the possible combination of the asked args
+
+    Parameters
+    ----------
+    cl_kwrags : dict
+        The arguments, with one at least having multiple values
+
+    Returns
+    -------
+    kwargs_combination : list
+        The list of all the combinations of arguments
+
+    reduced_kwargs_combination : list
+        The reduced names and values of the arguments will be used in the naming
+        process of the different classifiers
+
+    """
     values = list(cl_kwrags.values())
     listed_values = [[_] if type(_) is not list else _ for _ in values]
     values_cartesian_prod = [_ for _ in itertools.product(*listed_values)]
@@ -292,6 +291,39 @@ def gen_multiple_args_dictionnaries(nb_class, kwargs_init, classifier,
                                     view_name=None, view_index=None,
                                     views_dictionary=None,
                                     framework="monoview"):
+    """
+    Used in the case of mutliple arguments asked in the config file.
+    Will combine the arguments to explore all the possibilities.
+
+    Parameters
+    ----------
+    nb_class : int,
+        The number of classes in the dataset
+
+    kwargs_init : dict
+        The arguments given in the config file
+
+    classifier : str
+        The name of the classifier for which multiple arguments have been asked
+
+    view_name : str
+        The name of the view in consideration.
+
+    view_index : int
+        The index of the view in consideration
+
+    views_dictionary : dict
+        The dictionary of all the views indices and their names
+
+    framework : str
+        Either monoview or multiview
+
+    Returns
+    -------
+    args_dictionaries : list
+        The list of all the possible combination of asked arguments
+
+    """
     if framework=="multiview":
         classifier_config = get_path_dict(kwargs_init[classifier])
     else:
@@ -322,12 +354,12 @@ def init_kwargs(args, classifiers_names, framework="monoview"):
     ----------
     args : parsed args objects
         All the args passed by the user.
-    classifiers-names : list of strings
+    classifiers_names : list of strings
         List of the benchmarks's monoview classifiers names.
 
     Returns
     -------
-    monoviewKWARGS : Dictionary of dictionaries
+    kwargs : Dictionary
         Dictionary resuming all the specific arguments for the benchmark, one dictionary for each classifier.
 
         For example, for Adaboost, the KWARGS will be `{"n_estimators":<value>, "base_estimator":<value>}`"""
@@ -351,7 +383,25 @@ def init_kwargs(args, classifiers_names, framework="monoview"):
 
 
 def init_kwargs_func(args, benchmark):
-    monoview_kwargs = init_kwargs(args, benchmark["monoview"])
+    """
+    Dispached the kwargs initialization to monoview and multiview and creates
+    the kwargs variable
+
+    Parameters
+    ----------
+    args : parsed args objects
+        All the args passed by the user.
+
+    benchmark : dict
+        The name of the mono- and mutli-view classifiers to run in the benchmark
+
+    Returns
+    -------
+
+    kwargs : dict
+        The arguments for each mono- and multiview algorithms
+    """
+    monoview_kwargs = init_kwargs(args, benchmark["monoview"], framework="monoview")
     multiview_kwargs = init_kwargs(args, benchmark["multiview"], framework="multiview")
     kwargs = {"monoview":monoview_kwargs, "multiview":multiview_kwargs}
     return kwargs
@@ -373,31 +423,45 @@ def init_kwargs_func(args, benchmark):
 #     return multiview_kwargs
 
 
-def init_multiview_arguments(args, benchmark, views, views_indices,
-                             argument_dictionaries, random_state, directory,
-                             results_monoview, classification_indices):
-    """Used to add each monoview exeperience args to the list of monoview experiences args"""
-    logging.debug("Start:\t Initializing multiview classifiers arguments")
-    multiview_arguments = []
-    if "multiview" in benchmark:
-        for multiview_algo_name in benchmark["multiview"]:
-            mutliview_module = getattr(multiview_classifiers,
-                                      multiview_algo_name)
-
-            multiview_arguments += mutliview_module.getArgs(args, benchmark,
-                                                          views, views_indices,
-                                                          random_state,
-                                                          directory,
-                                                          results_monoview,
-                                                          classification_indices)
-    argument_dictionaries["multiview"] = multiview_arguments
-    logging.debug("Start:\t Initializing multiview classifiers arguments")
-    return argument_dictionaries
+# def init_multiview_arguments(args, benchmark, views, views_indices,
+#                              argument_dictionaries, random_state, directory,
+#                              results_monoview, classification_indices):
+#     """Used to add each monoview exeperience args to the list of monoview experiences args"""
+#     logging.debug("Start:\t Initializing multiview classifiers arguments")
+#     multiview_arguments = []
+#     if "multiview" in benchmark:
+#         for multiview_algo_name in benchmark["multiview"]:
+#             mutliview_module = getattr(multiview_classifiers,
+#                                       multiview_algo_name)
+#
+#             multiview_arguments += mutliview_module.getArgs(args, benchmark,
+#                                                           views, views_indices,
+#                                                           random_state,
+#                                                           directory,
+#                                                           results_monoview,
+#                                                           classification_indices)
+#     argument_dictionaries["multiview"] = multiview_arguments
+#     logging.debug("Start:\t Initializing multiview classifiers arguments")
+#     return argument_dictionaries
 
 
 def arange_metrics(metrics, metric_princ):
     """Used to get the metrics list in the right order so that
-    the first one is the principal metric specified in args"""
+    the first one is the principal metric specified in args
+
+    Parameters
+    ----------
+    metrics : list of lists
+        The metrics that will be used in the benchmark
+
+    metric_princ : str
+        The name of the metric that need to be used for the hyper-parameter
+        optimization process
+
+    Returns
+    -------
+    metrics : list of lists
+        The metrics list, but arranged  so the first one is the principal one."""
     if [metric_princ] in metrics:
         metric_index = metrics.index([metric_princ])
         first_metric = metrics[0]
@@ -410,6 +474,31 @@ def arange_metrics(metrics, metric_princ):
 
 def benchmark_init(directory, classification_indices, labels, labels_dictionary,
                    k_folds):
+    """
+    Initializes the benchmark, by saving the indices of the train
+    examples and the cross validation folds.
+
+    Parameters
+    ----------
+    directory : str
+        The benchmark's result directory
+
+    classification_indices : numpy array
+        The indices of the examples, splitted for the train/test split
+
+    labels : numpy array
+        The labels of the dataset
+
+    labels_dictionary : dict
+        The dictionary with labels as keys and their names as values
+
+    k_folds : sklearn.model_selection.Folds object
+        The folds for the cross validation process
+
+    Returns
+    -------
+
+    """
     logging.debug("Start:\t Benchmark initialization")
     if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
         try:
@@ -448,8 +537,7 @@ def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None,
                      benchmark=None, views=None, views_indices=None, flag=None,
                      labels=None,
                      exec_monoview_multicore=exec_monoview_multicore,
-                     exec_multiview_multicore=exec_multiview_multicore,
-                     init_multiview_arguments=init_multiview_arguments):
+                     exec_multiview_multicore=exec_multiview_multicore,):
     """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
      exec_multiview_multicore args are only used for tests"""
 
@@ -469,14 +557,6 @@ def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None,
         for argument in argument_dictionaries["Monoview"]]
     logging.debug("Done:\t monoview benchmark")
 
-    logging.debug("Start:\t multiview arguments initialization")
-    # argument_dictionaries = initMultiviewArguments(args, benchmark, views,
-    #                                               views_indices,
-    #                                               argument_dictionaries,
-    #                                               random_state, directory,
-    #                                               resultsMonoview,
-    #                                               classification_indices)
-    logging.debug("Done:\t multiview arguments initialization")
 
     logging.debug("Start:\t multiview benchmark")
     results_multiview = [
-- 
GitLab