From c62f8b70dfbdd81aba032aa640814247c38c6cc6 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Wed, 19 Feb 2020 21:02:38 +0100 Subject: [PATCH] Updated documentation --- docs/source/tutorials/example1.rst | 98 ++++---- docs/source/tutorials/example3.rst | 129 +++++----- .../config_files/config_example_1.yml | 233 +++++------------ .../config_files/config_example_2_1_1.yml | 233 +++++------------ .../config_files/config_example_2_1_2.yml | 235 +++++------------- 5 files changed, 292 insertions(+), 636 deletions(-) diff --git a/docs/source/tutorials/example1.rst b/docs/source/tutorials/example1.rst index 023bb37d..a7f3f1c8 100644 --- a/docs/source/tutorials/example1.rst +++ b/docs/source/tutorials/example1.rst @@ -81,49 +81,49 @@ From here the result directory has the structure that follows : .. code-block:: bash | started_1560_12_25-15_42 - | ├── No-vs-Yes - | | ├── adaboost - | | | ├── ViewNumber0 - | | | | ├── 1560_12_25-15_42-*-summary.txt - | | | | ├── <other classifier dependant files> - | | | ├── ViewNumber1 - | | | | ├── 1560_12_25-15_42-*-summary.txt - | | | | ├── <other classifier dependant files> - | | | ├── ViewNumber2 - | | | | ├── 1560_12_25-15_42-*-summary.txt - | | | | ├── <other classifier dependant files> - | | ├── decision_tree - | | | ├── ViewNumber0 - | | | | ├── <summary & classifier dependant files> - | | | ├── ViewNumber1 - | | | | ├── <summary & classifier dependant files> - | | | ├── ViewNumber2 - | | | | ├── <summary & classifier dependant files> - | | ├── [.. - | | ├── ..] - | | ├── weighted_linear_late_fusion - | | | ├── <summary & classifier dependant files> - | | ├── [.. - | | ├── ..] - | | ├── train_labels.csv - | │ └── train_indices.csv + | ├── adaboost + | | ├── ViewNumber0 + | | | ├── *-summary.txt + | | | ├── <other classifier dependant files> + | | ├── ViewNumber1 + | | | ├── *-summary.txt + | | | ├── <other classifier dependant files> + | | ├── ViewNumber2 + | | | ├── *-summary.txt + | | | ├── <other classifier dependant files> + | ├── decision_tree + | | ├── ViewNumber0 + | | | ├── <summary & classifier dependant files> + | | ├── ViewNumber1 + | | | ├── <summary & classifier dependant files> + | | ├── ViewNumber2 + | | | ├── <summary & classifier dependant files> + | ├── [.. + | ├── ..] + | ├── weighted_linear_late_fusion + | | ├── <summary & classifier dependant files> + | ├── [.. + | ├── ..] + | ├── train_labels.csv + | └── train_indices.csv | ├── 1560_12_25-15_42-*-LOG.log | ├── config_file.yml - | ├── 1560_12_25-15_42-*-accuracy_score.png - | ├── 1560_12_25-15_42-*-accuracy_score.csv - | ├── 1560_12_25-15_42-*-f1_score.png - | ├── 1560_12_25-15_42-*-f1_score.csv - | ├── 1560_12_25-15_42-*-error_analysis_2D.png - | ├── 1560_12_25-15_42-*-error_analysis_2D.html - | ├── 1560_12_25-15_42-*-error_analysis_bar.png - | ├── 1560_12_25-15_42-*-ViewNumber0-feature_importance.html - | ├── 1560_12_25-15_42-*-ViewNumber0-feature_importance_dataframe.csv - | ├── 1560_12_25-15_42-*-ViewNumber1-feature_importance.html - | ├── 1560_12_25-15_42-*-ViewNumber1-feature_importance_dataframe.csv - | ├── 1560_12_25-15_42-*-ViewNumber2-feature_importance.html - | ├── 1560_12_25-15_42-*-ViewNumber2-feature_importance_dataframe.csv - | ├── 1560_12_25-15_42-*-bar_plot_data.csv - | ├── 1560_12_25-15_42-*-2D_plot_data.csv + | ├── *-accuracy_score.png + | ├── *-accuracy_score.csv + | ├── *-f1_score.png + | ├── *-f1_score.csv + | ├── *-error_analysis_2D.png + | ├── *-error_analysis_2D.html + | ├── *-error_analysis_bar.png + | ├── feature_importances + | | ├── *-ViewNumber0-feature_importance.html + | | ├── *-ViewNumber0-feature_importance_dataframe.csv + | | ├── *-ViewNumber1-feature_importance.html + | | ├── *-ViewNumber1-feature_importance_dataframe.csv + | | ├── *-ViewNumber2-feature_importance.html + | | ├── *-ViewNumber2-feature_importance_dataframe.csv + | ├── *-bar_plot_data.csv + | ├── *-2D_plot_data.csv | └── random_state.pickle @@ -131,7 +131,7 @@ The structure can seem complex, but it priovides a lot of information, from the Let's comment each file : -``1560_12_25-15_42-*-accuracy_score.png`` and ``1560_12_25-15_42-*-accuracy_score.csv`` +``*-accuracy_score.png`` and ``*-accuracy_score.csv`` <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< These files contain the scores of each classifier for the accuracy metric, ordered with le best ones on the right and @@ -147,15 +147,15 @@ The image version is as follows : The csv file is a matrix with the score on train stored in the first row and the score on test stored in the second one. Each classifier is presented in a row. It is loadable with pandas. -Similar files have been generated for the f1 metric (``1560_12_25-15_42-*-f1_score.png`` and ``1560_12_25-15_42-*-f1_score.csv``). +Similar files have been generated for the f1 metric (``*-f1_score.png`` and ``*-f1_score.csv``). -``1560_12_25-15_42-*-error_analysis_2D.png`` and ``1560_12_25-15_42-*-error_analysis_2D.html`` +``*-error_analysis_2D.png`` and ``*-error_analysis_2D.html`` <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< In these files, one can visualize the success or failure of each classifier on each example. -Below, ``1560_12_25-15_42-*-error_analysis_2D.html`` is displayed. +Below, ``*-error_analysis_2D.html`` is displayed. It is the representation of a matrix, where the rows are the examples, and the columns are the classifiers. @@ -185,9 +185,9 @@ It is an understandable behaviour as the Plausible dataset's view are generated Morever, as confirmed by the accuracy graph, four monoview classifiers classified all the example to the same class, and then, display a black half-column. -The data used to generate those matrices is available in ``1560_12_25-15_42-*-2D_plot_data.csv`` +The data used to generate those matrices is available in ``*-2D_plot_data.csv`` -``1560_12_25-15_42-*-error_analysis_bar.png`` +``*-error_analysis_bar.png`` <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< This file is a different way to visualize the same information as the two previous ones. Indeed, it is a bar plot, @@ -198,7 +198,7 @@ with a bar for each example, counting the number of classifiers that failed to c The bar plot showing for each example how many classifiers failed on it. -The data used to generate this graph is available in ``1560_12_25-15_42-*-bar_plot_data.csv`` +The data used to generate this graph is available in ``*-bar_plot_data.csv`` ``config_file.yml`` <<<<<<<<<<<<<<<<<<< @@ -218,7 +218,7 @@ The log file Classifier-dependant files <<<<<<<<<<<<<<<<<<<<<<<<<< -For each classifier, at least one file is generated, called ``1560_12_25-15_42-*-summary.txt``. +For each classifier, at least one file is generated, called ``*-summary.txt``. .. include:: ./images/summary.txt :literal: diff --git a/docs/source/tutorials/example3.rst b/docs/source/tutorials/example3.rst index e15b675a..c81e31ef 100644 --- a/docs/source/tutorials/example3.rst +++ b/docs/source/tutorials/example3.rst @@ -44,83 +44,78 @@ The result directory will be structured as : .. code-block:: bash | started_1560_12_25-15_42 - | ├── No-vs-Yes - | | ├── 1560_12_25-15_42-*-accuracy_score.png - | | ├── 1560_12_25-15_42-*-accuracy_score.csv - | | ├── 1560_12_25-15_42-*-f1_score.png - | | ├── 1560_12_25-15_42-*-f1_score.csv - | | ├── 1560_12_25-15_42-*-error_analysis_2D.png - | | ├── 1560_12_25-15_42-*-error_analysis_2D.html - | | ├── 1560_12_25-15_42-*-error_analysis_bar.png - | | ├── 1560_12_25-15_42-*-ViewNumber0-feature_importance.html - | | ├── 1560_12_25-15_42-*-ViewNumber0-feature_importance_dataframe.csv - | | ├── 1560_12_25-15_42-*-ViewNumber1-feature_importance.html - | | ├── 1560_12_25-15_42-*-ViewNumber1-feature_importance_dataframe.csv - | | ├── 1560_12_25-15_42-*-ViewNumber2-feature_importance.html - | | ├── 1560_12_25-15_42-*-ViewNumber2-feature_importance_dataframe.csv - | | ├── 1560_12_25-15_42-*-bar_plot_data.csv - | | ├── 1560_12_25-15_42-*-2D_plot_data.csv | ├── iter_1 - | | ├── No-vs-Yes - | | | ├── adaboost - | | | | ├── ViewNumber0 - | | | | | ├── 1560_12_25-15_42-*-summary.txt - | | | | | ├── <other classifier dependant files> - | | | | ├── ViewNumber1 - | | | | | ├── 1560_12_25-15_42-*-summary.txt - | | | | | ├── <other classifier dependant files> + | | ├── train_labels.csv + | | └── train_indices.csv + | | ├── 1560_12_25-15_42-*-LOG.log + | | ├── config_file.yml + | | ├── *-accuracy_score.png + | | ├── *-accuracy_score.csv + | | ├── *-f1_score.png + | | ├── *-f1_score.csv + | | ├── *-error_analysis_2D.png + | | ├── *-error_analysis_2D.html + | | ├── *-error_analysis_bar.png + | | ├── *-bar_plot_data.csv + | | ├── *-2D_plot_data.csv + | | ├── adaboost + | | | ├── ViewNumber0 + | | | | ├── *-summary.txt + | | | | ├── <other classifier dependant files> + | | | ├── ViewNumber1 + | | | | ├── *-summary.txt + | | | | ├── <other classifier dependant files> | | | | ├── ViewNumber2 - | | | | | ├── 1560_12_25-15_42-*-summary.txt - | | | | | ├── <other classifier dependant files> - | | | ├── decision_tree - | | | | ├── ViewNumber0 - | | | | | ├── <summary & classifier dependant files> - | | | | ├── ViewNumber1 - | | | | | ├── <summary & classifier dependant files> - | | | | ├── ViewNumber2 - | | | | | ├── <summary & classifier dependant files> - | | | ├── [.. - | | | ├── ..] - | | | ├── weighted_linear_late_fusion - | | | | ├── <summary & classifier dependant files> - | | | | ├── [.. - | | | | ├── ..] - | | | ├── train_labels.csv - | | │ └── train_indices.csv + | | | | ├── *-summary.txt + | | | | ├── <other classifier dependant files> + | | ├── decision_tree + | | | ├── ViewNumber0 + | | | | ├── <summary & classifier dependant files> + | | | ├── ViewNumber1 + | | | | ├── <summary & classifier dependant files> + | | | ├── ViewNumber2 + | | | | ├── <summary & classifier dependant files> + | | ├── [.. + | | ├── ..] + | | ├── weighted_linear_late_fusion + | | | ├── <summary & classifier dependant files> + | | ├── [.. + | | ├── ..] + | ├── iter_2 + | | ├── [.. + | | ├── ..] + | ├── [.. + | ├── ..] + | ├── train_labels.csv + | └── train_indices.csv | ├── 1560_12_25-15_42-*-LOG.log | ├── config_file.yml - | | ├── 1560_12_25-15_42-*-accuracy_score.png - | | ├── 1560_12_25-15_42-*-accuracy_score.csv - | | ├── 1560_12_25-15_42-*-f1_score.png - | | ├── 1560_12_25-15_42-*-f1_score.csv - | | ├── 1560_12_25-15_42-*-error_analysis_2D.png - | | ├── 1560_12_25-15_42-*-error_analysis_2D.html - | | ├── 1560_12_25-15_42-*-error_analysis_bar.png - | | ├── 1560_12_25-15_42-*-ViewNumber0-feature_importance.html - | | ├── 1560_12_25-15_42-*-ViewNumber0-feature_importance_dataframe.csv - | | ├── 1560_12_25-15_42-*-ViewNumber1-feature_importance.html - | | ├── 1560_12_25-15_42-*-ViewNumber1-feature_importance_dataframe.csv - | | ├── 1560_12_25-15_42-*-ViewNumber2-feature_importance.html - | | ├── 1560_12_25-15_42-*-ViewNumber2-feature_importance_dataframe.csv - | | ├── 1560_12_25-15_42-*-bar_plot_data.csv - | | ├── 1560_12_25-15_42-*-2D_plot_data.csv - | ├── iter_2 - | | ├── No-vs-Yes - | | | ├─[... - | | | ├─...] - | ├── iter_3 - | ├── [... - | ├── ...] + | ├── *-accuracy_score.png + | ├── *-accuracy_score.csv + | ├── *-f1_score.png + | ├── *-f1_score.csv + | ├── *-error_analysis_2D.png + | ├── *-error_analysis_2D.html + | ├── *-error_analysis_bar.png + | ├── *-bar_plot_data.csv + | ├── *-2D_plot_data.csv + | ├── feature_importances + | | ├── *-ViewNumber0-feature_importance.html + | | ├── *-ViewNumber0-feature_importance_dataframe.csv + | | ├── *-ViewNumber1-feature_importance.html + | | ├── *-ViewNumber1-feature_importance_dataframe.csv + | | ├── *-ViewNumber2-feature_importance.html + | | ├── *-ViewNumber2-feature_importance_dataframe.csv | └── random_state.pickle -If you look closely, nearly all the files from Example 1 are in each ``iter_`` directory, and a new ``No-vs-Yes`` directory ha appeared, in which the main figures are saved. -So, the files saved in ``started_1560_12_25-15_42/No-vs-Yes/`` are the one that show th mean results on all the statistical iterations. -For example, ``started_1560_12_25-15_42/No-vs-Yes/1560_12_25-15_42-*-accuracy_score.png`` looks like : +If you look closely, nearly all the files from Example 1 are in each ``iter_`` directory, and some files have appeared, in which the main figures are saved. +So, the files stored in ``started_1560_12_25-15_42/`` are the one that show the mean results on all the statistical iterations. +For example, ``started_1560_12_25-15_42/*-accuracy_score.png`` looks like : .. figure:: ./images/accuracy_mean.png :scale: 25 - The main difference between this plot an the one from Example 1 is that here, the scores are means over all the satatisitcal iterations, and the standard deviations are plotted as vertical lines on top of the bars and printed after each score under the bars as "± <std>". + The main difference between this plot an the one from Example 1 is that here, the scores are means over all the statistical iterations, and the standard deviations are plotted as vertical lines on top of the bars and printed after each score under the bars as "± <std>". Then, each iteration's directory regroups all the results, structured as in Example 1. diff --git a/multiview_platform/examples/config_files/config_example_1.yml b/multiview_platform/examples/config_files/config_example_1.yml index 3ece2c82..46c27f97 100644 --- a/multiview_platform/examples/config_files/config_example_1.yml +++ b/multiview_platform/examples/config_files/config_example_1.yml @@ -1,62 +1,66 @@ # The base configuration of the benchmark -Base : - # Enable logging - log: True - # The name of each dataset in the directory on which the benchmark should be run - name: ["plausible"] - # A label for the resul directory - label: "_" - # The type of dataset, currently supported ".hdf5", and ".csv" - type: ".hdf5" - # The views to use in the banchmark, an empty value will result in using all the views - views: - # The path to the directory where the datasets are stored, an absolute path is advised - pathf: "examples/data/example_1/" - # The niceness of the processes, useful to lower their priority - nice: 0 - # The random state of the benchmark, useful for reproducibility - random_state: 42 - # The number of parallel computing threads - nb_cores: 1 - # Used to run the benchmark on the full dataset - full: True - # Used to be able to run more than one benchmark per minute - debug: True - # To add noise to the data, will add gaussian noise with noise_std - add_noise: False - noise_std: 0.0 - # The directory in which the results will be stored, an absolute path is advised - res_dir: "examples/results/example_1/" + +# Enable logging +log: True +# The name of each dataset in the directory on which the benchmark should be run +name: ["plausible"] +# A label for the resul directory +label: "_" +# The type of dataset, currently supported ".hdf5", and ".csv" +file_type: ".hdf5" +# The views to use in the banchmark, an empty value will result in using all the views +views: +# The path to the directory where the datasets are stored, an absolute path is advised +pathf: "examples/data/example_1/" +# The niceness of the processes, useful to lower their priority +nice: 0 +# The random state of the benchmark, useful for reproducibility +random_state: 42 +# The number of parallel computing threads +nb_cores: 1 +# Used to run the benchmark on the full dataset +full: True +# Used to be able to run more than one benchmark per minute +debug: True +# To add noise to the data, will add gaussian noise with noise_std +add_noise: False +noise_std: 0.0 +# The directory in which the results will be stored, an absolute path is advised +res_dir: "examples/results/example_1/" +# If an error occurs in a classifier, if track_tracebacks is set to True, the +# benchmark saves the traceback and continues, if it is set to False, it will +# stop the benchmark and raise the error +track_tracebacks: True # All the classification-realted configuration options -Classification: - # If the dataset is multiclass, will use this multiclass-to-biclass method - multiclass_method: "oneVersusOne" - # The ratio number of test exmaples/number of train examples - split: 0.8 - # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed - nb_folds: 2 - # The number of classes to select in the dataset - nb_class: 2 - # The name of the classes to select in the dataset - classes: - # The type of algorithms to run during the benchmark (monoview and/or multiview) - type: ["monoview","multiview"] - # The name of the monoview algorithms to run, ["all"] to run all the available classifiers - algos_monoview: ["all"] - # The names of the multiview algorithms to run, ["all"] to run all the available classifiers - algos_multiview: ["all"] - # The number of times the benchamrk is repeated with different train/test - # split, to have more statistically significant results - stats_iter: 1 - # The metrics that will be use din the result analysis - metrics: ["accuracy_score", "f1_score"] - # The metric that will be used in the hyper-parameter optimization process - metric_princ: "accuracy_score" - # The type of hyper-parameter optimization method - hps_type: None - # The number of iteration in the hyper-parameter optimization process - hps_iter: 2 + +# If the dataset is multiclass, will use this multiclass-to-biclass method +multiclass_method: "oneVersusOne" +# The ratio number of test exmaples/number of train examples +split: 0.8 +# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed +nb_folds: 2 +# The number of classes to select in the dataset +nb_class: 2 +# The name of the classes to select in the dataset +classes: +# The type of algorithms to run during the benchmark (monoview and/or multiview) +type: ["monoview","multiview"] +# The name of the monoview algorithms to run, ["all"] to run all the available classifiers +algos_monoview: ["all"] +# The names of the multiview algorithms to run, ["all"] to run all the available classifiers +algos_multiview: ["all"] +# The number of times the benchamrk is repeated with different train/test +# split, to have more statistically significant results +stats_iter: 1 +# The metrics that will be use din the result analysis +metrics: ["accuracy_score", "f1_score"] +# The metric that will be used in the hyper-parameter optimization process +metric_princ: "accuracy_score" +# The type of hyper-parameter optimization method +hps_type: None +# The number of iteration in the hyper-parameter optimization process +hps_iter: 2 # The following arguments are classifier-specific, and are documented in each @@ -64,120 +68,3 @@ Classification: # In order to run multiple sets of parameters, use multiple values in the # following lists, and set hps_type to None. - -##################################### -# The Monoview Classifier arguments # -##################################### - - -random_forest: - n_estimators: [25] - max_depth: [3] - criterion: ["entropy"] - -svm_linear: - C: [1] - -svm_rbf: - C: [1] - -svm_poly: - C: [1] - degree: [2] - -adaboost: - n_estimators: [50] - base_estimator: ["DecisionTreeClassifier"] - -decision_tree: - max_depth: [10] - criterion: ["gini"] - splitter: ["best"] - -sgd: - loss: ["hinge"] - penalty: [l2] - alpha: [0.0001] - -knn: - n_neighbors: [5] - weights: ["uniform"] - algorithm: ["auto"] - -lasso: - alpha: [1] - max_iter: [2] - -gradient_boosting: - n_estimators: [2] - - -###################################### -# The Multiview Classifier arguments # -###################################### - -weighted_linear_early_fusion: - view_weights: [null] - monoview_classifier_name: ["decision_tree"] - monoview_classifier_config: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -entropy_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -disagree_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - - -double_fault_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -difficulty_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -majority_voting_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -bayesian_inference_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -weighted_linear_late_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] diff --git a/multiview_platform/examples/config_files/config_example_2_1_1.yml b/multiview_platform/examples/config_files/config_example_2_1_1.yml index 2d036206..8725b3f9 100644 --- a/multiview_platform/examples/config_files/config_example_2_1_1.yml +++ b/multiview_platform/examples/config_files/config_example_2_1_1.yml @@ -1,62 +1,66 @@ # The base configuration of the benchmark -Base : - # Enable logging - log: True - # The name of each dataset in the directory on which the benchmark should be run - name: ["plausible"] - # A label for the resul directory - label: "_" - # The type of dataset, currently supported ".hdf5", and ".csv" - type: ".hdf5" - # The views to use in the banchmark, an empty value will result in using all the views - views: - # The path to the directory where the datasets are stored, an absolute path is advised - pathf: "examples/data/example_2_1_1/" - # The niceness of the processes, useful to lower their priority - nice: 0 - # The random state of the benchmark, useful for reproducibility - random_state: 42 - # The number of parallel computing threads - nb_cores: 1 - # Used to run the benchmark on the full dataset - full: True - # Used to be able to run more than one benchmark per minute - debug: True - # To add noise to the data, will add gaussian noise with noise_std - add_noise: False - noise_std: 0.0 - # The directory in which the results will be stored, an absolute path is advised - res_dir: "examples/results/example_1/" + +# Enable logging +log: True +# The name of each dataset in the directory on which the benchmark should be run +name: ["plausible"] +# A label for the resul directory +label: "_" +# The type of dataset, currently supported ".hdf5", and ".csv" +file_type: ".hdf5" +# The views to use in the banchmark, an empty value will result in using all the views +views: +# The path to the directory where the datasets are stored, an absolute path is advised +pathf: "examples/data/example_2_1_1/" +# The niceness of the processes, useful to lower their priority +nice: 0 +# The random state of the benchmark, useful for reproducibility +random_state: 42 +# The number of parallel computing threads +nb_cores: 1 +# Used to run the benchmark on the full dataset +full: True +# Used to be able to run more than one benchmark per minute +debug: True +# To add noise to the data, will add gaussian noise with noise_std +add_noise: False +noise_std: 0.0 +# The directory in which the results will be stored, an absolute path is advised +res_dir: "examples/results/example_1/" +# If an error occurs in a classifier, if track_tracebacks is set to True, the +# benchmark saves the traceback and continues, if it is set to False, it will +# stop the benchmark and raise the error +track_tracebacks: True # All the classification-realted configuration options -Classification: - # If the dataset is multiclass, will use this multiclass-to-biclass method - multiclass_method: "oneVersusOne" - # The ratio number of test exmaples/number of train examples - split: 0.8 - # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed - nb_folds: 2 - # The number of classes to select in the dataset - nb_class: 2 - # The name of the classes to select in the dataset - classes: - # The type of algorithms to run during the benchmark (monoview and/or multiview) - type: ["monoview","multiview"] - # The name of the monoview algorithms to run, ["all"] to run all the available classifiers - algos_monoview: ["decision_tree", "adaboost", ] - # The names of the multiview algorithms to run, ["all"] to run all the available classifiers - algos_multiview: ["majority_voting_fusion", ] - # The number of times the benchamrk is repeated with different train/test - # split, to have more statistically significant results - stats_iter: 1 - # The metrics that will be use din the result analysis - metrics: ["accuracy_score", "f1_score"] - # The metric that will be used in the hyper-parameter optimization process - metric_princ: "accuracy_score" - # The type of hyper-parameter optimization method - hps_type: None - # The number of iteration in the hyper-parameter optimization process - hps_iter: 2 + +# If the dataset is multiclass, will use this multiclass-to-biclass method +multiclass_method: "oneVersusOne" +# The ratio number of test exmaples/number of train examples +split: 0.8 +# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed +nb_folds: 2 +# The number of classes to select in the dataset +nb_class: 2 +# The name of the classes to select in the dataset +classes: +# The type of algorithms to run during the benchmark (monoview and/or multiview) +type: ["monoview","multiview"] +# The name of the monoview algorithms to run, ["all"] to run all the available classifiers +algos_monoview: ["decision_tree", "adaboost", ] +# The names of the multiview algorithms to run, ["all"] to run all the available classifiers +algos_multiview: ["majority_voting_fusion", ] +# The number of times the benchamrk is repeated with different train/test +# split, to have more statistically significant results +stats_iter: 1 +# The metrics that will be use din the result analysis +metrics: ["accuracy_score", "f1_score"] +# The metric that will be used in the hyper-parameter optimization process +metric_princ: "accuracy_score" +# The type of hyper-parameter optimization method +hps_type: None +# The number of iteration in the hyper-parameter optimization process +hps_iter: 2 # The following arguments are classifier-specific, and are documented in each @@ -64,120 +68,3 @@ Classification: # In order to run multiple sets of parameters, use multiple values in the # following lists, and set hps_type to None. - -##################################### -# The Monoview Classifier arguments # -##################################### - - -random_forest: - n_estimators: [25] - max_depth: [3] - criterion: ["entropy"] - -svm_linear: - C: [1] - -svm_rbf: - C: [1] - -svm_poly: - C: [1] - degree: [2] - -adaboost: - n_estimators: [50] - base_estimator: ["DecisionTreeClassifier"] - -decision_tree: - max_depth: [10] - criterion: ["gini"] - splitter: ["best"] - -sgd: - loss: ["hinge"] - penalty: [l2] - alpha: [0.0001] - -knn: - n_neighbors: [5] - weights: ["uniform"] - algorithm: ["auto"] - -lasso: - alpha: [1] - max_iter: [2] - -gradient_boosting: - n_estimators: [2] - - -###################################### -# The Multiview Classifier arguments # -###################################### - -weighted_linear_early_fusion: - view_weights: [null] - monoview_classifier_name: ["decision_tree"] - monoview_classifier_config: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -entropy_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -disagree_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - - -double_fault_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -difficulty_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -majority_voting_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -bayesian_inference_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -weighted_linear_late_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] diff --git a/multiview_platform/examples/config_files/config_example_2_1_2.yml b/multiview_platform/examples/config_files/config_example_2_1_2.yml index 83583ff3..7b11b0b5 100644 --- a/multiview_platform/examples/config_files/config_example_2_1_2.yml +++ b/multiview_platform/examples/config_files/config_example_2_1_2.yml @@ -1,183 +1,70 @@ # The base configuration of the benchmark -Base : - # Enable logging - log: True - # The name of each dataset in the directory on which the benchmark should be run - name: ["plausible"] - # A label for the resul directory - label: "_" - # The type of dataset, currently supported ".hdf5", and ".csv" - type: ".hdf5" - # The views to use in the banchmark, an empty value will result in using all the views - views: - # The path to the directory where the datasets are stored, an absolute path is advised - pathf: "examples/data/example_2_1_2/" - # The niceness of the processes, useful to lower their priority - nice: 0 - # The random state of the benchmark, useful for reproducibility - random_state: 42 - # The number of parallel computing threads - nb_cores: 1 - # Used to run the benchmark on the full dataset - full: True - # Used to be able to run more than one benchmark per minute - debug: True - # To add noise to the data, will add gaussian noise with noise_std - add_noise: False - noise_std: 0.0 - # The directory in which the results will be stored, an absolute path is advised - res_dir: "examples/results/example_1/" + +# Enable logging +log: True +# The name of each dataset in the directory on which the benchmark should be run +name: ["plausible"] +# A label for the resul directory +label: "_" +# The type of dataset, currently supported ".hdf5", and ".csv" +file_type: ".hdf5" +# The views to use in the banchmark, an empty value will result in using all the views +views: +# The path to the directory where the datasets are stored, an absolute path is advised +pathf: "examples/data/example_2_1_2/" +# The niceness of the processes, useful to lower their priority +nice: 0 +# The random state of the benchmark, useful for reproducibility +random_state: 42 +# The number of parallel computing threads +nb_cores: 1 +# Used to run the benchmark on the full dataset +full: True +# Used to be able to run more than one benchmark per minute +debug: True +# To add noise to the data, will add gaussian noise with noise_std +add_noise: False +noise_std: 0.0 +# The directory in which the results will be stored, an absolute path is advised +res_dir: "examples/results/example_1/" +# If an error occurs in a classifier, if track_tracebacks is set to True, the +# benchmark saves the traceback and continues, if it is set to False, it will +# stop the benchmark and raise the error +track_tracebacks: True # All the classification-realted configuration options -Classification: - # If the dataset is multiclass, will use this multiclass-to-biclass method - multiclass_method: "oneVersusOne" - # The ratio number of test exmaples/number of train examples - split: 0.2 - # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed - nb_folds: 2 - # The number of classes to select in the dataset - nb_class: 2 - # The name of the classes to select in the dataset - classes: - # The type of algorithms to run during the benchmark (monoview and/or multiview) - type: ["monoview","multiview"] - # The name of the monoview algorithms to run, ["all"] to run all the available classifiers - algos_monoview: ["decision_tree", "adaboost", ] - # The names of the multiview algorithms to run, ["all"] to run all the available classifiers - algos_multiview: ["majority_voting_fusion", ] - # The number of times the benchamrk is repeated with different train/test - # split, to have more statistically significant results - stats_iter: 1 - # The metrics that will be use din the result analysis - metrics: ["accuracy_score", "f1_score"] - # The metric that will be used in the hyper-parameter optimization process - metric_princ: "accuracy_score" - # The type of hyper-parameter optimization method - hps_type: None - # The number of iteration in the hyper-parameter optimization process - hps_iter: 2 + +# If the dataset is multiclass, will use this multiclass-to-biclass method +multiclass_method: "oneVersusOne" +# The ratio number of test exmaples/number of train examples +split: 0.2 +# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed +nb_folds: 2 +# The number of classes to select in the dataset +nb_class: 2 +# The name of the classes to select in the dataset +classes: +# The type of algorithms to run during the benchmark (monoview and/or multiview) +type: ["monoview","multiview"] +# The name of the monoview algorithms to run, ["all"] to run all the available classifiers +algos_monoview: ["decision_tree", "adaboost", ] +# The names of the multiview algorithms to run, ["all"] to run all the available classifiers +algos_multiview: ["majority_voting_fusion", ] +# The number of times the benchamrk is repeated with different train/test +# split, to have more statistically significant results +stats_iter: 1 +# The metrics that will be use din the result analysis +metrics: ["accuracy_score", "f1_score"] +# The metric that will be used in the hyper-parameter optimization process +metric_princ: "accuracy_score" +# The type of hyper-parameter optimization method +hps_type: None +# The number of iteration in the hyper-parameter optimization process +hps_iter: 2 # The following arguments are classifier-specific, and are documented in each # of the corresponding modules. # In order to run multiple sets of parameters, use multiple values in the -# following lists, and set hps_type to None. - -##################################### -# The Monoview Classifier arguments # -##################################### - - -random_forest: - n_estimators: [25] - max_depth: [3] - criterion: ["entropy"] - -svm_linear: - C: [1] - -svm_rbf: - C: [1] - -svm_poly: - C: [1] - degree: [2] - -adaboost: - n_estimators: [50] - base_estimator: ["DecisionTreeClassifier"] - -decision_tree: - max_depth: [10] - criterion: ["gini"] - splitter: ["best"] - -sgd: - loss: ["hinge"] - penalty: [l2] - alpha: [0.0001] - -knn: - n_neighbors: [5] - weights: ["uniform"] - algorithm: ["auto"] - -lasso: - alpha: [1] - max_iter: [2] - -gradient_boosting: - n_estimators: [2] - - -###################################### -# The Multiview Classifier arguments # -###################################### - -weighted_linear_early_fusion: - view_weights: [null] - monoview_classifier_name: ["decision_tree"] - monoview_classifier_config: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -entropy_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -disagree_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - - -double_fault_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -difficulty_fusion: - classifier_names: [["decision_tree"]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -majority_voting_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -bayesian_inference_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] - -weighted_linear_late_fusion: - classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] - classifier_configs: - decision_tree: - max_depth: [1] - criterion: ["gini"] - splitter: ["best"] +# following lists, and set hps_type to None. \ No newline at end of file -- GitLab