From 149576d39427dfa79af352ffe39276e231d32513 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Fri, 1 May 2020 07:40:24 -0400 Subject: [PATCH] Reworking' --- README.md | 106 ------ README.rst | 113 ++++++ docs/source/analyzeresult.rst | 5 - docs/source/api.rst | 15 +- docs/source/conf.py | 4 +- docs/source/execution.rst | 6 - docs/source/index.rst | 4 +- docs/source/modules.rst | 7 - docs/source/readme_link.rst | 3 +- .../references/monomulti/exec_classif.rst | 6 - docs/source/references/monomulti/metrics.rst | 6 - .../multiview_classifiers/classifiers.rst | 8 - .../diversity_fusion.rst | 5 - .../references/monomulti/utils/execution.rst | 6 - .../references/monomulti/utils/multiclass.rst | 6 - docs/source/references/monomultidoc.rst | 14 - ...ultiview_classifiers.difficulty_fusion.rst | 30 -- ....multiview_classifiers.disagree_fusion.rst | 30 -- ...tiview_classifiers.double_fault_fusion.rst | 30 -- ...s.multiview_classifiers.entropy_fusion.rst | 30 -- ....multiview_classifiers.fat_late_fusion.rst | 30 -- ...tiview_classifiers.fat_scm_late_fusion.rst | 30 -- ...iers.fusion.Methods.EarlyFusionPackage.rst | 22 -- ...fiers.fusion.Methods.LateFusionPackage.rst | 54 --- ...s.multiview_classifiers.fusion.Methods.rst | 38 -- ...assifiers.multiview_classifiers.fusion.rst | 37 -- ...multiview_classifiers.pseudo_cq_fusion.rst | 30 -- ...view_classifiers.multiview_classifiers.rst | 24 -- ...w_platform.mono_multi_view_classifiers.rst | 42 --- ...form.mono_multi_view_classifiers.utils.rst | 86 ----- docs/source/references/multiview_platform.rst | 46 --- .../references/multiview_platform.tests.rst | 41 --- .../multiview_platform.tests.test_metrics.rst | 22 -- ...ultiview_platform.tests.test_mono_view.rst | 30 -- ...atform.tests.test_monoview_classifiers.rst | 30 -- ...iew_classifiers.Test_DifficultyMeasure.rst | 22 -- ...tiview_classifiers.Test_DisagreeFusion.rst | 22 -- ...iew_classifiers.Test_DoubleFaultFusion.rst | 22 -- ...ltiview_classifiers.Test_EntropyFusion.rst | 22 -- ...test_multiview_classifiers.Test_Fusion.rst | 29 -- ...iview_classifiers.Test_PseudoCQMeasure.rst | 22 -- ...tform.tests.test_multiview_classifiers.rst | 34 -- .../multiview_platform.tests.test_utils.rst | 46 --- docs/source/sphinxext/recommon.py | 5 - docs/source/tutorials/example0.rst | 10 +- docs/source/tutorials/example1.rst | 24 +- docs/source/tutorials/example2.rst | 10 +- docs/source/tutorials/example3.rst | 2 +- docs/source/tutorials/example4.rst | 34 +- docs/source/tutorials/example5.rst | 26 +- docs/source/tutorials/hps_theory.rst | 12 +- format_dataset.py | 12 +- requirements.txt | 8 +- setup.py | 14 +- summit/__init__.py | 2 +- .../config_files/config_example_0.yml | 2 +- .../config_files/config_example_1.yml | 2 +- .../config_files/config_example_2_1_2.yml | 2 +- .../config_files/config_example_2_2_1.yml | 4 +- .../config_files/config_example_2_3.yml | 2 +- .../config_files/config_example_3.yml | 2 +- summit/execute.py | 49 ++- summit/multiview_platform/exec_classif.py | 77 ++-- .../metrics/accuracy_score.py | 8 +- summit/multiview_platform/metrics/f1_score.py | 4 - .../multiview_platform/metrics/fbeta_score.py | 5 - .../metrics/hamming_loss.py | 4 - .../metrics/jaccard_score.py | 5 - summit/multiview_platform/metrics/log_loss.py | 4 - .../metrics/matthews_corrcoef.py | 4 - .../metrics/precision_score.py | 4 - .../metrics/recall_score.py | 4 - .../metrics/roc_auc_score.py | 5 - .../metrics/zero_one_loss.py | 5 - .../monoview/exec_classif_mono_view.py | 71 ++-- .../monoview/monoview_utils.py | 27 +- .../monoview_classifiers/adaboost.py | 26 +- .../monoview_classifiers/decision_tree.py | 6 +- .../monoview_classifiers/gradient_boosting.py | 26 +- .../monoview_classifiers/knn.py | 3 +- .../monoview_classifiers/lasso.py | 6 +- .../monoview_classifiers/random_forest.py | 9 +- .../monoview_classifiers/sgd.py | 3 +- .../monoview_classifiers/svm_linear.py | 3 +- .../monoview_classifiers/svm_poly.py | 5 +- .../monoview_classifiers/svm_rbf.py | 3 +- .../multiview/exec_multiview.py | 69 ++-- .../multiview/multiview_utils.py | 51 +-- .../multiview_classifiers/__init__.py | 4 +- .../additions/diversity_utils.py | 52 +-- .../additions/fusion_utils.py | 2 +- .../additions/jumbo_fusion_utils.py | 34 +- .../additions/late_fusion_utils.py | 16 +- .../multiview_classifiers/additions/utils.py | 6 +- .../bayesian_inference_fusion.py | 12 +- .../difficulty_fusion.py | 6 +- .../multiview_classifiers/entropy_fusion.py | 8 +- .../majority_voting_fusion.py | 26 +- .../multiview_classifiers/svm_jumbo_fusion.py | 5 +- .../weighted_linear_early_fusion.py | 31 +- .../weighted_linear_late_fusion.py | 12 +- .../result_analysis/duration_analysis.py | 22 +- .../result_analysis/error_analysis.py | 176 +++++----- .../result_analysis/execution.py | 84 +++-- .../result_analysis/feature_importances.py | 16 +- .../result_analysis/metric_analysis.py | 79 +++-- .../result_analysis/tracebacks_analysis.py | 4 +- summit/multiview_platform/utils/base.py | 106 +++--- .../multiview_platform/utils/configuration.py | 35 +- summit/multiview_platform/utils/dataset.py | 330 ++++++++++-------- summit/multiview_platform/utils/execution.py | 25 +- .../utils/get_multiview_db.py | 110 +++--- .../utils/hyper_parameter_search.py | 79 ++--- .../utils/make_file_config.py | 1 + summit/multiview_platform/utils/multiclass.py | 52 +-- .../multiview_platform/utils/organization.py | 4 +- summit/tests/test_config_hps.yml | 2 +- summit/tests/test_config_iter.yml | 2 +- summit/tests/test_config_simple.yml | 2 +- summit/tests/test_exec_classif.py | 234 +++++++------ summit/tests/test_metrics/test_metrics.py | 10 +- .../test_exec_classif_mono_view.py | 75 ++-- .../test_mono_view/test_monoview_utils.py | 15 +- .../test_compatibility.py | 3 +- .../test_multi_view/test_exec_multiview.py | 63 ++-- .../test_multi_view/test_multiview_utils.py | 18 +- .../test_additions/test_diversity_utils.py | 29 +- .../test_additions/test_jumbo_fusion_utils.py | 12 +- .../test_difficulty_fusion.py | 5 +- .../test_disagree_fusion.py | 4 +- .../test_double_fault_fusion.py | 4 +- .../test_entropy_fusion.py | 5 +- .../test_weighted_linear_early_fusion.py | 51 ++- .../test_duration_analysis.py | 14 +- .../test_error_analysis.py | 76 ++-- .../test_result_analysis/test_execution.py | 118 ++++--- .../test_feature_importances.py | 24 +- .../test_metric_analysis.py | 63 ++-- .../test_tracebacks_analysis.py | 18 +- .../tests/test_utils/test_GetMultiviewDB.py | 57 +-- summit/tests/test_utils/test_base.py | 98 +++--- summit/tests/test_utils/test_configuration.py | 15 +- summit/tests/test_utils/test_dataset.py | 169 ++++----- summit/tests/test_utils/test_execution.py | 62 ++-- .../test_utils/test_hyper_parameter_search.py | 37 +- summit/tests/test_utils/test_multiclass.py | 88 ++--- summit/tests/utils.py | 22 +- 147 files changed, 1925 insertions(+), 2681 deletions(-) delete mode 100644 README.md create mode 100644 README.rst delete mode 100644 docs/source/analyzeresult.rst delete mode 100644 docs/source/execution.rst delete mode 100644 docs/source/modules.rst delete mode 100644 docs/source/references/monomulti/exec_classif.rst delete mode 100644 docs/source/references/monomulti/metrics.rst delete mode 100644 docs/source/references/monomulti/multiview_classifiers/classifiers.rst delete mode 100644 docs/source/references/monomulti/multiview_classifiers/diversity_fusion.rst delete mode 100644 docs/source/references/monomulti/utils/execution.rst delete mode 100644 docs/source/references/monomulti/utils/multiclass.rst delete mode 100644 docs/source/references/monomultidoc.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.rst delete mode 100644 docs/source/references/multiview_platform.mono_multi_view_classifiers.utils.rst delete mode 100644 docs/source/references/multiview_platform.rst delete mode 100644 docs/source/references/multiview_platform.tests.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_metrics.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_mono_view.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_monoview_classifiers.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_Fusion.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_multiview_classifiers.rst delete mode 100644 docs/source/references/multiview_platform.tests.test_utils.rst delete mode 100644 docs/source/sphinxext/recommon.py diff --git a/README.md b/README.md deleted file mode 100644 index 9a15dc29..00000000 --- a/README.md +++ /dev/null @@ -1,106 +0,0 @@ -[](http://www.gnu.org/licenses/gpl-3.0) -[](https://gitlab.lis-lab.fr/baptiste.bauvin/summit/badges/master/pipeline.svg) -[](http://baptiste.bauvin.pages.lis-lab.fr/summit/coverage/index.html) -# Supervised MultiModal Integration Tool's Readme - -This project aims to be an easy-to-use solution to run a prior benchmark on a dataset and evaluate mono- & multi-view algorithms capacity to classify it correctly. - -## Getting Started - -### Prerequisites (will be automatically installed) - -To be able to use this project, you'll need : - -* [Python 3.6](https://docs.python.org/3/) - -And the following python modules : - -* [numpy](http://www.numpy.org/), [scipy](https://scipy.org/), -* [matplotlib](http://matplotlib.org/) - Used to plot results, -* [sklearn](http://scikit-learn.org/stable/) - Used for the monoview classifiers, -* [joblib](https://pypi.python.org/pypi/joblib) - Used to compute on multiple threads, -* [h5py](https://www.h5py.org) - Used to generate HDF5 datasets on hard drive and use them to spare RAM, -* [pickle](https://docs.python.org/3/library/pickle.html) - Used to store some results, -* [pandas](https://pandas.pydata.org/) - Used to manipulate data efficiently, -* [six](https://pypi.org/project/six/) - -* [m2r](https://pypi.org/project/m2r/) - Used to generate documentation from the readme, -* [docutils](https://pypi.org/project/docutils/) - Used to generate documentation, -* [pyyaml](https://pypi.org/project/PyYAML/) - Used to read the config files, -* [plotly](https://plot.ly/) - Used to generate interactive HTML visuals, -* [tabulate](https://pypi.org/project/tabulate/) - Used to generated the confusion matrix. - - -### Installing - -Once you cloned the project from the [gitlab repository](https://gitlab.lis-lab.fr/baptiste.bauvin/summit/), you just have to use : - -``` -cd path/to/summit/ -pip install -e . -``` -In the `summit` directory to install SuMMIT and its dependencies. - -### Running the tests - -To run the test suite of SuMMIT, run : -``` -cd path/to/summit -pytest -``` - -### Running on simulated data - -In order to run it you'll need to try on **simulated** data with the command -```python -from multiview_platform.execute import execute -execute("example 1") -``` -This will run the first example. - -For more information about the examples, see the [documentation](http://baptiste.bauvin.pages.lis-lab.fr/summit/). -Results will be stored in the results directory of the installation path : -`path/to/summit/multiview_platform/examples/results`. - -The documentation proposes a detailed interpretation of the results through [6 tutorials](http://baptiste.bauvin.pages.lis-lab.fr/summit/). - -### Discovering the arguments - -All the arguments of the platform are stored in a YAML config file. Some config files are given as examples. -The file stored in `summit/config_files/config.yml` is documented and it is highly recommended -to read it carefully before playing around with the parameters. - -You can create your own configuration file. In order to run the platform with it, run : -```python -from summit.execute import execute -execute(config_path="/absolute/path/to/your/config/file") -``` - -For further information about classifier-specific arguments, see the [documentation](http://baptiste.bauvin.pages.lis-lab.fr/summit/). - - -### Dataset compatibility - - -In order to start a benchmark on your own dataset, you need to format it so SuMMIT can use it. To do so, a [python script](https://gitlab.lis-lab.fr/baptiste.bauvin/summit/-/blob/master/format_dataset.py) is provided. - -For more information, see [Example 6](http://baptiste.bauvin.pages.lis-lab.fr/summit/tutorials/example4.html) - -### Running on your dataset - -Once you have formatted your dataset, to run SuMMIT on it you need to modify the config file as -```yaml -name: ["your_file_name"] -* -pathf: "path/to/your/dataset" -``` -This will run a full benchmark on your dataset using all available views and labels. - -It is highly recommended to follow the documentation's [tutorials](http://baptiste.bauvin.pages.lis-lab.fr/summit/tutorials/index.html) to learn the use of each parameter. - - -## Author - -* **Baptiste BAUVIN** -* **Dominique BENIELLI** -* **Alexis PROD'HOMME** - diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..77d62adb --- /dev/null +++ b/README.rst @@ -0,0 +1,113 @@ +.. image:: https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0 + :alt: License: GPL v3 + +.. image:: https://gitlab.lis-lab.fr/baptiste.bauvin/summit/badges/master/pipeline.svg + :alt: Pipeline status + +.. image:: https://gitlab.lis-lab.fr/baptiste.bauvin/summit/badges/master/coverage.svg + :target: http://baptiste.bauvin.pages.lis-lab.fr/summit/coverage/index.html + :alt: Coverage + + +Supervised MultiModal Integration Tool's Readme +=============================================== + +This project aims to be an easy-to-use solution to run a prior benchmark on a dataset and evaluate mono- & multi-view algorithms capacity to classify it correctly. + +Getting Started +--------------- + +Prerequisites (will be automatically installed) +<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +To be able to use this project, you'll need : + +* `Python 3.6 <https://docs.python.org/3/>`_ + +And the following python modules : + +* `numpy <http://www.numpy.org/>`_, `scipy <https://scipy.org/>`_, +* `matplotlib <http://matplotlib.org/>`_ - Used to plot results, +* `sklearn <http://scikit-learn.org/stable/>`_ - Used for the monoview classifiers, +* `joblib <https://pypi.python.org/pypi/joblib>`_ - Used to compute on multiple threads, +* `h5py <https://www.h5py.org>`_ - Used to generate HDF5 datasets on hard drive and use them to spare RAM, +* `pickle <https://docs.python.org/3/library/pickle.html>`_ - Used to store some results, +* `pandas <https://pandas.pydata.org/>`_ - Used to manipulate data efficiently, +* `six <https://pypi.org/project/six/>`_ - +* `m2r <https://pypi.org/project/m2r/>`_ - Used to generate documentation from the readme, +* `docutils <https://pypi.org/project/docutils/>`_ - Used to generate documentation, +* `pyyaml <https://pypi.org/project/PyYAML/>`_ - Used to read the config files, +* `plotly <https://plot.ly/>`_ - Used to generate interactive HTML visuals, +* `tabulate <https://pypi.org/project/tabulate/>`_ - Used to generated the confusion matrix. + + +Installing +<<<<<<<<<< + +Once you cloned the project from the `gitlab repository <https://gitlab.lis-lab.fr/baptiste.bauvin/summit/>`_, you just have to use : + +.. code:: bash + + cd path/to/summit/ + pip install -e . + + +In the `summit` directory to install SuMMIT and its dependencies. + +Running the tests +<<<<<<<<<<<<<<<<< + +To run the test suite of SuMMIT, run : + +.. code:: bash + + cd path/to/summit + pytest + + +Running on simulated data +<<<<<<<<<<<<<<<<<<<<<<<<< + +For your first go with SuMMIT, you can run it on simulated data with + +.. code:: bash + + python + >>> from summit.execute import execute + >>> execute("example 1") + +This will run the benchmark of `documentation's Example 1 <http://baptiste.bauvin.pages.lis-lab.fr/summit/tutorials/example1.html>`_. + +For more information about the examples, see the `documentation <http://baptiste.bauvin.pages.lis-lab.fr/summit/>`_. +Results will, by default, be stored in the results directory of the installation path : +``path/to/summit/multiview_platform/examples/results``. + +The documentation proposes a detailed interpretation of the results and arguments of SuMMIT through `6 tutorials <http://baptiste.bauvin.pages.lis-lab.fr/summit/>`_. + +Dataset compatibility +<<<<<<<<<<<<<<<<<<<<< + +In order to start a benchmark on your own dataset, you need to format it so SuMMIT can use it. To do so, a `python script <https://gitlab.lis-lab.fr/baptiste.bauvin/summit/-/blob/master/format_dataset.py>`_ is provided. + +For more information, see `Example 5 <http://baptiste.bauvin.pages.lis-lab.fr/summit/tutorials/example5.html>`_ + +Running on your dataset ++++++++++++++++++++++++ + +Once you have formatted your dataset, to run SuMMIT on it you need to modify the config file as + +.. code:: yaml + + name: ["your_file_name"] + pathf: "path/to/your/dataset" + + +It is however highly recommended to follow the documentation's `tutorials <http://baptiste.bauvin.pages.lis-lab.fr/summit/tutorials/index.html>`_ to learn the use of each parameter. + + +## Author + +* **Baptiste BAUVIN** +* **Dominique BENIELLI** +* **Alexis PROD'HOMME** + diff --git a/docs/source/analyzeresult.rst b/docs/source/analyzeresult.rst deleted file mode 100644 index 69a31ddb..00000000 --- a/docs/source/analyzeresult.rst +++ /dev/null @@ -1,5 +0,0 @@ -Result analysis module -====================== - -.. automodule:: summit.multiview_platform.result_analysis - :members: diff --git a/docs/source/api.rst b/docs/source/api.rst index 367a94fb..6999ad11 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -1,10 +1,9 @@ -Multiview Platform -================== +API Documentation +================= -.. toctree:: - :maxdepth: 3 - :caption: Contents: +Multiview dataset management module +----------------------------------- - execution - monomultidoc - analyzeresult +.. automodule:: summit.multiview_platform.utils.dataset + :members: + :inherited-members: diff --git a/docs/source/conf.py b/docs/source/conf.py index 2ab75d49..52905c53 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -199,8 +199,8 @@ rst_prolog = """ """ -extlinks = {'base_source': ('https://gitlab.lis-lab.fr/baptiste.bauvin/summit/-/tree/master/', "base_source"), - 'base_doc': ('http://baptiste.bauvin.pages.lis-lab.fr/summit/', 'base_doc')} +extlinks = {'base_source': ('https://gitlab.lis-lab.fr/baptiste.bauvin/summit/-/tree/master/%s', "_"), + 'base_doc': ('http://baptiste.bauvin.pages.lis-lab.fr/summit/%s', '_')} diff --git a/docs/source/execution.rst b/docs/source/execution.rst deleted file mode 100644 index 50fe10ef..00000000 --- a/docs/source/execution.rst +++ /dev/null @@ -1,6 +0,0 @@ -Welcome to the exection documentation -===================================== - -.. automodule:: summit.execute - :members: - diff --git a/docs/source/index.rst b/docs/source/index.rst index 7e682d07..dfa5e0db 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,12 +15,12 @@ This documentation consists in a short read me, with instructions to install and :maxdepth: 1 tutorials/index - references/multiview_platform + api + readme_link .. examples - Indices and tables ================== diff --git a/docs/source/modules.rst b/docs/source/modules.rst deleted file mode 100644 index 994afd41..00000000 --- a/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -summit -====== - -.. toctree:: - :maxdepth: 4 - - summit diff --git a/docs/source/readme_link.rst b/docs/source/readme_link.rst index e7ce18d9..a6210d3d 100644 --- a/docs/source/readme_link.rst +++ b/docs/source/readme_link.rst @@ -1,2 +1 @@ - -<../../README.md> +.. include:: ../../README.rst diff --git a/docs/source/references/monomulti/exec_classif.rst b/docs/source/references/monomulti/exec_classif.rst deleted file mode 100644 index 31dd4af5..00000000 --- a/docs/source/references/monomulti/exec_classif.rst +++ /dev/null @@ -1,6 +0,0 @@ -Classification execution module -=============================== - -.. automodule:: multiview_platform.mono_multi_view_classifiers.exec_classif - :members: - :inherited-members: diff --git a/docs/source/references/monomulti/metrics.rst b/docs/source/references/monomulti/metrics.rst deleted file mode 100644 index 310b33ff..00000000 --- a/docs/source/references/monomulti/metrics.rst +++ /dev/null @@ -1,6 +0,0 @@ -Metrics framework -================= - -.. automodule:: multiview_platform.mono_multi_view_classifiers.metrics.framework - :members: - :inherited-members: diff --git a/docs/source/references/monomulti/multiview_classifiers/classifiers.rst b/docs/source/references/monomulti/multiview_classifiers/classifiers.rst deleted file mode 100644 index 0ca3191d..00000000 --- a/docs/source/references/monomulti/multiview_classifiers/classifiers.rst +++ /dev/null @@ -1,8 +0,0 @@ -Classifiers -=========== - -.. autosummary:: - :toctree: DIRNAME - - multiview_platform.mono_multi_view_classifiers.monoview_classifiers - diff --git a/docs/source/references/monomulti/multiview_classifiers/diversity_fusion.rst b/docs/source/references/monomulti/multiview_classifiers/diversity_fusion.rst deleted file mode 100644 index a60545a2..00000000 --- a/docs/source/references/monomulti/multiview_classifiers/diversity_fusion.rst +++ /dev/null @@ -1,5 +0,0 @@ -Diversity Fusion Classifiers -============================ - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview.additions.diversity_utils - :members: diff --git a/docs/source/references/monomulti/utils/execution.rst b/docs/source/references/monomulti/utils/execution.rst deleted file mode 100644 index f761534a..00000000 --- a/docs/source/references/monomulti/utils/execution.rst +++ /dev/null @@ -1,6 +0,0 @@ -Utils execution module -====================== - -.. automodule:: multiview_platform.MonoMultiViewClassifiers.utils.execution -:members: - :inherited-members: \ No newline at end of file diff --git a/docs/source/references/monomulti/utils/multiclass.rst b/docs/source/references/monomulti/utils/multiclass.rst deleted file mode 100644 index 9f79bc8d..00000000 --- a/docs/source/references/monomulti/utils/multiclass.rst +++ /dev/null @@ -1,6 +0,0 @@ -Utils Multiclass module -======================= - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.multiclass -:members: - :inherited-members: diff --git a/docs/source/references/monomultidoc.rst b/docs/source/references/monomultidoc.rst deleted file mode 100644 index 4ada7eec..00000000 --- a/docs/source/references/monomultidoc.rst +++ /dev/null @@ -1,14 +0,0 @@ -Mono and mutliview classification -================================= - -.. toctree:: - :maxdepth: 3 - :caption: Contents: - - monomulti/metrics - monomulti/monoview_classifier/classifiers - monomulti/multiview_classifier - monomulti/exec_classif - monomulti/multiview_classifiers/diversity_fusion - monomulti/utils/execution - monomulti/utils/multiclass diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion.rst deleted file mode 100644 index 58de9705..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion package -============================================================================================== - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion.analyze_results module -------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion.analyze_results - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion.difficulty_fusion module ---------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion.difficulty_fusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion.rst deleted file mode 100644 index 909fe1c0..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion package -============================================================================================ - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion.analyze_results module ------------------------------------------------------------------------------------------------------------ - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion.analyze_results - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion.disagree_fusion module ------------------------------------------------------------------------------------------------------------ - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion.disagree_fusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion.rst deleted file mode 100644 index c56ae059..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion package -================================================================================================ - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion.analyze_results module ---------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion.analyze_results - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion.double_fault_fusion module -------------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion.double_fault_fusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion.rst deleted file mode 100644 index de1ffb8c..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion package -=========================================================================================== - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion.analyze_results module ----------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion.analyze_results - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion.entropy_fusion module ---------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion.entropy_fusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion.rst deleted file mode 100644 index 702dd00f..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion package -============================================================================================ - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion.analyze_results module ------------------------------------------------------------------------------------------------------------ - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion.analyze_results - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion.fat_late_fusion module ------------------------------------------------------------------------------------------------------------ - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion.fat_late_fusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion.rst deleted file mode 100644 index ab7cf76f..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion package -================================================================================================ - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion.analyze_results module ---------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion.analyze_results - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion.fat_scm_late_fusion module -------------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion.fat_scm_late_fusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage.rst deleted file mode 100644 index 3bfc05d0..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage.rst +++ /dev/null @@ -1,22 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage package -============================================================================================================== - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage.WeightedLinear module ----------------------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage.WeightedLinear - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.rst deleted file mode 100644 index 1432bf60..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.rst +++ /dev/null @@ -1,54 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage package -============================================================================================================= - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.BayesianInference module ------------------------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.BayesianInference - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.MajorityVoting module ---------------------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.MajorityVoting - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.SCMForLinear module -------------------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.SCMForLinear - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.SVMForLinear module -------------------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.SVMForLinear - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.WeightedLinear module ---------------------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage.WeightedLinear - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.rst deleted file mode 100644 index df094446..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.rst +++ /dev/null @@ -1,38 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods package -=========================================================================================== - -Subpackages ------------ - -.. toctree:: - - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusionPackage - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusionPackage - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusion module ------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.EarlyFusion - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusion module ------------------------------------------------------------------------------------------------------ - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods.LateFusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.rst deleted file mode 100644 index e6276082..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.rst +++ /dev/null @@ -1,37 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion package -=================================================================================== - -Subpackages ------------ - -.. toctree:: - - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.Methods - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.analyze_results module --------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.analyze_results - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.fusion module ------------------------------------------------------------------------------------------ - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion.fusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion.rst deleted file mode 100644 index cc50a88a..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion package -============================================================================================= - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion.analyze_results module ------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion.analyze_results - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion.pseudo_cq_fusion module -------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion.pseudo_cq_fusion - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.rst deleted file mode 100644 index 41d2ea7c..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.multiview_classifiers.rst +++ /dev/null @@ -1,24 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.multiview_classifiers package -============================================================================ - -Subpackages ------------ - -.. toctree:: - - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.difficulty_fusion - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.disagree_fusion - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.double_fault_fusion - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.entropy_fusion - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_late_fusion - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fat_scm_late_fusion - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.fusion - multiview_platform.mono_multi_view_classifiers.multiview_classifiers.pseudo_cq_fusion - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview_classifiers - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.rst deleted file mode 100644 index b02337b5..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.rst +++ /dev/null @@ -1,42 +0,0 @@ -multiview_platform.mono_multi_view_classifiers package -====================================================== - -Subpackages ------------ - -.. toctree:: - - multiview_platform.mono_multi_view_classifiers.metrics - multiview_platform.mono_multi_view_classifiers.monoview - multiview_platform.mono_multi_view_classifiers.monoview_classifiers - multiview_platform.mono_multi_view_classifiers.multiview - multiview_platform.mono_multi_view_classifiers.multiview_classifiers - multiview_platform.mono_multi_view_classifiers.utils - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.exec_classif module ------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.exec_classif - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.result_analysis module ---------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.result_analysis - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.mono_multi_view_classifiers.utils.rst b/docs/source/references/multiview_platform.mono_multi_view_classifiers.utils.rst deleted file mode 100644 index 1595e2f2..00000000 --- a/docs/source/references/multiview_platform.mono_multi_view_classifiers.utils.rst +++ /dev/null @@ -1,86 +0,0 @@ -multiview_platform.mono_multi_view_classifiers.utils package -============================================================ - -Submodules ----------- - -multiview_platform.mono_multi_view_classifiers.utils.configuration module -------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.configuration - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.utils.dataset module -------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.dataset - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.utils.execution module ---------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.execution - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.utils.get_multiview_db module ----------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.get_multiview_db - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.utils.hyper_parameter_search module ----------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.hyper_parameter_search - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.utils.make_file_config module ----------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.make_file_config - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.utils.multiclass module ----------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.multiclass - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.utils.multiview_result_analysis module -------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.multiview_result_analysis - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.mono_multi_view_classifiers.utils.transformations module ---------------------------------------------------------------------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.transformations - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.mono_multi_view_classifiers.utils - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.rst b/docs/source/references/multiview_platform.rst deleted file mode 100644 index 5fbecb87..00000000 --- a/docs/source/references/multiview_platform.rst +++ /dev/null @@ -1,46 +0,0 @@ -multiview_platform references -============================= - -Subpackages ------------ - -.. toctree:: - - multiview_platform.mono_multi_view_classifiers - multiview_platform.tests - -Submodules ----------- - -multiview_platform.execute module ---------------------------------- - -.. automodule:: multiview_platform.execute - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.tests module -------------------------------- - -.. automodule:: multiview_platform.tests - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.versions module ----------------------------------- - -.. automodule:: multiview_platform.versions - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.rst b/docs/source/references/multiview_platform.tests.rst deleted file mode 100644 index 245f3599..00000000 --- a/docs/source/references/multiview_platform.tests.rst +++ /dev/null @@ -1,41 +0,0 @@ -multiview_platform.tests package -================================ - -Subpackages ------------ - -.. toctree:: - - multiview_platform.tests.test_metrics - multiview_platform.tests.test_mono_view - multiview_platform.tests.test_monoview_classifiers - multiview_platform.tests.test_multiview_classifiers - multiview_platform.tests.test_utils - -Submodules ----------- - -multiview_platform.tests.test_ExecClassif module ------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_ExecClassif - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.tests.test_ResultAnalysis module ---------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_ResultAnalysis - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_metrics.rst b/docs/source/references/multiview_platform.tests.test_metrics.rst deleted file mode 100644 index 592e0b0f..00000000 --- a/docs/source/references/multiview_platform.tests.test_metrics.rst +++ /dev/null @@ -1,22 +0,0 @@ -multiview_platform.tests.test_metrics package -============================================= - -Submodules ----------- - -multiview_platform.tests.test_metrics.test_accuracy_score module ----------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_metrics.test_accuracy_score - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_metrics - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_mono_view.rst b/docs/source/references/multiview_platform.tests.test_mono_view.rst deleted file mode 100644 index 37c6df13..00000000 --- a/docs/source/references/multiview_platform.tests.test_mono_view.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.tests.test_mono_view package -=============================================== - -Submodules ----------- - -multiview_platform.tests.test_mono_view.test_ExecClassifMonoView module ------------------------------------------------------------------------ - -.. automodule:: multiview_platform.tests.test_mono_view.test_ExecClassifMonoView - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.tests.test_mono_view.test_MonoviewUtils module ------------------------------------------------------------------ - -.. automodule:: multiview_platform.tests.test_mono_view.test_MonoviewUtils - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_mono_view - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_monoview_classifiers.rst b/docs/source/references/multiview_platform.tests.test_monoview_classifiers.rst deleted file mode 100644 index bc743a50..00000000 --- a/docs/source/references/multiview_platform.tests.test_monoview_classifiers.rst +++ /dev/null @@ -1,30 +0,0 @@ -multiview_platform.tests.test_monoview_classifiers package -========================================================== - -Submodules ----------- - -multiview_platform.tests.test_monoview_classifiers.test_adaboost module ------------------------------------------------------------------------ - -.. automodule:: multiview_platform.tests.test_monoview_classifiers.test_adaboost - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.tests.test_monoview_classifiers.test_compatibility module ----------------------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_monoview_classifiers.test_compatibility - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_monoview_classifiers - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure.rst b/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure.rst deleted file mode 100644 index cd3805ae..00000000 --- a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure.rst +++ /dev/null @@ -1,22 +0,0 @@ -multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure package -================================================================================== - -Submodules ----------- - -multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure.test_DifficultyMeasureModule module --------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure.test_DifficultyMeasureModule - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion.rst b/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion.rst deleted file mode 100644 index 11cb2606..00000000 --- a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion.rst +++ /dev/null @@ -1,22 +0,0 @@ -multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion package -=============================================================================== - -Submodules ----------- - -multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion.test_DisagreeFusionModule module --------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion.test_DisagreeFusionModule - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion.rst b/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion.rst deleted file mode 100644 index e4537767..00000000 --- a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion.rst +++ /dev/null @@ -1,22 +0,0 @@ -multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion package -================================================================================== - -Submodules ----------- - -multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion.test_DoubleFaultFusionModule module --------------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion.test_DoubleFaultFusionModule - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion.rst b/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion.rst deleted file mode 100644 index c00615f7..00000000 --- a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion.rst +++ /dev/null @@ -1,22 +0,0 @@ -multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion package -============================================================================== - -Submodules ----------- - -multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion.test_EntropyFusionModule module ------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion.test_EntropyFusionModule - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_Fusion.rst b/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_Fusion.rst deleted file mode 100644 index ff9028e5..00000000 --- a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_Fusion.rst +++ /dev/null @@ -1,29 +0,0 @@ -multiview_platform.tests.test_multiview_classifiers.Test_Fusion package -======================================================================= - -Subpackages ------------ - -.. toctree:: - - multiview_platform.tests.test_multiview_classifiers.Test_Fusion.Test_Methods - -Submodules ----------- - -multiview_platform.tests.test_multiview_classifiers.Test_Fusion.test_FusionModule module ----------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_Fusion.test_FusionModule - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_Fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure.rst b/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure.rst deleted file mode 100644 index aea2b8e2..00000000 --- a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure.rst +++ /dev/null @@ -1,22 +0,0 @@ -multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure package -================================================================================ - -Submodules ----------- - -multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure.test_PseudoCQFusionModule module ---------------------------------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure.test_PseudoCQFusionModule - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.rst b/docs/source/references/multiview_platform.tests.test_multiview_classifiers.rst deleted file mode 100644 index 287d34d0..00000000 --- a/docs/source/references/multiview_platform.tests.test_multiview_classifiers.rst +++ /dev/null @@ -1,34 +0,0 @@ -multiview_platform.tests.test_multiview_classifiers package -=========================================================== - -Subpackages ------------ - -.. toctree:: - - multiview_platform.tests.test_multiview_classifiers.Test_DifficultyMeasure - multiview_platform.tests.test_multiview_classifiers.Test_DisagreeFusion - multiview_platform.tests.test_multiview_classifiers.Test_DoubleFaultFusion - multiview_platform.tests.test_multiview_classifiers.Test_EntropyFusion - multiview_platform.tests.test_multiview_classifiers.Test_Fusion - multiview_platform.tests.test_multiview_classifiers.Test_PseudoCQMeasure - -Submodules ----------- - -multiview_platform.tests.test_multiview_classifiers.test_diversity_utils module -------------------------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers.test_diversity_utils - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_multiview_classifiers - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/references/multiview_platform.tests.test_utils.rst b/docs/source/references/multiview_platform.tests.test_utils.rst deleted file mode 100644 index d8820a45..00000000 --- a/docs/source/references/multiview_platform.tests.test_utils.rst +++ /dev/null @@ -1,46 +0,0 @@ -multiview_platform.tests.test_utils package -=========================================== - -Submodules ----------- - -multiview_platform.tests.test_utils.test_GetMultiviewDB module --------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_utils.test_GetMultiviewDB - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.tests.test_utils.test_configuration module -------------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_utils.test_configuration - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.tests.test_utils.test_execution module ---------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_utils.test_execution - :members: - :undoc-members: - :show-inheritance: - -multiview_platform.tests.test_utils.test_multiclass module ----------------------------------------------------------- - -.. automodule:: multiview_platform.tests.test_utils.test_multiclass - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: multiview_platform.tests.test_utils - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/sphinxext/recommon.py b/docs/source/sphinxext/recommon.py deleted file mode 100644 index 4137b7c4..00000000 --- a/docs/source/sphinxext/recommon.py +++ /dev/null @@ -1,5 +0,0 @@ -from recommonmark.transform import AutoStructify - - -def setup(app): - app.add_transform(AutoStructify) diff --git a/docs/source/tutorials/example0.rst b/docs/source/tutorials/example0.rst index dbac9f92..c71d55d0 100644 --- a/docs/source/tutorials/example0.rst +++ b/docs/source/tutorials/example0.rst @@ -66,14 +66,14 @@ With these results, we are able to assess which classifier perfroms the best, he Getting more information on the classification <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -Once one knows the scores of each classifier, an interesting analysis could be to verify on which examples each classifier failed, to detect potential outliers. +Once one knows the scores of each classifier, an interesting analysis could be to verify on which samples each classifier failed, to detect potential outliers. This is possible with another result analysis, available in :base_source:`png <multiview_platform/examples/results/example_0/digits/result_example/digits-error_analysis_2D.png>`, :base_source:`csv <multiview_platform/examples/results/example_0/digits/result_example/digits_2D_plot_data.csv>` and :base_source:`html <multiview_platform/examples/results/example_0/digits/result_example/digits-error_analysis_2D.html>` : .. raw:: html :file: images/example_0/err.html -This figure represents a matrix, with the examples in rows and classifiers in columns, with a white rectangle on row i, column j if classifier j succeerecded to classify example i. +This figure represents a matrix, with the samples in rows and classifiers in columns, with a white rectangle on row i, column j if classifier j succeerecded to classify sample i. .. note:: To zoom on the image use your mouse to either draw a rectangle or drag it in a unique direction to zoom on an axis : @@ -83,9 +83,9 @@ This figure represents a matrix, with the examples in rows and classifiers in co :align: center -A quick analysis of it shows that a decision tree (DT) on the view ``digit_col_grad_0`` is unable to classify any example of labels 1, 2, 3 or 4. That both the other DTs have a similar behavior with other labels. +A quick analysis of it shows that a decision tree (DT) on the view ``digit_col_grad_0`` is unable to classify any sample of labels 1, 2, 3 or 4. That both the other DTs have a similar behavior with other labels. -Concerning the fusions, if you zoom in on the examples labelled "2"", you may see that some errors made by the early fusion classifier are on examples that were mis-classified by the three DTs : +Concerning the fusions, if you zoom in on the samples labelled "2"", you may see that some errors made by the early fusion classifier are on samples that were mis-classified by the three DTs : .. image:: images/example_0/lab_2.png :scale: 100 @@ -96,4 +96,4 @@ Conclusion ---------- Thanks to |platf| we were able to get a benchmark of mono- and multiview algorithms on a classification task. -In the following tutorials, we will develop the features of |platf| on several examples. \ No newline at end of file +In the following tutorials, we will develop the features of |platf| on several samples. \ No newline at end of file diff --git a/docs/source/tutorials/example1.rst b/docs/source/tutorials/example1.rst index e6fa5db2..909d9bfe 100644 --- a/docs/source/tutorials/example1.rst +++ b/docs/source/tutorials/example1.rst @@ -14,13 +14,13 @@ For all the following tutorials, we will use the same dataset. A generated dataset to rule them all ------------------------------------ -The :base_source:`dataset <multiview_platform/examples/data/doc_summit.hdf5>` that will be used in the examples consists in +The :base_source:`dataset <multiview_platform/examples/data/doc_summit.hdf5>` that will be used in the samples consists in -+ 500 examples that are either ++ 500 samples that are either + mis-described by all the views (labelled ``Mutual_error_*``), + well-described by all the views (labelled ``Redundant_*``), + well-described by the majority of the views (labelled ``Complementary_*``), - + randomly well- or mis-described by the views (labelled ``example_*``). + + randomly well- or mis-described by the views (labelled ``sample_*``). + 8 balanced classes named ``'label_1'``, ..., ``'label_8'``, @@ -143,13 +143,13 @@ Here, for each classifier, 8 bars are plotted, one for each class. It is clear t ``*-error_analysis_2D.png`` and ``*-error_analysis_2D.html`` <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -In these files, one can visualize the success or failure of each classifier on each example. +In these files, one can visualize the success or failure of each classifier on each sample. Below, ``*-error_analysis_2D.html`` is displayed. -It is the representation of a matrix, where the rows are the examples, and the columns are the classifiers. +It is the representation of a matrix, where the rows are the samples, and the columns are the classifiers. -The examples labelled as ``Mutual_error_*`` are mis-classified by most of the algorithms, the redundant ones are well-classified, and the complementary ones are mixly classified. +The samples labelled as ``Mutual_error_*`` are mis-classified by most of the algorithms, the redundant ones are well-classified, and the complementary ones are mixly classified. .. note:: It is highly recommended to zoom in the html figure to see each row. @@ -160,16 +160,16 @@ The examples labelled as ``Mutual_error_*`` are mis-classified by most of the al This figure is the html version of the classifiers errors' visualization. It is interactive, so, by hovering it, the information on -each classifier and example is printed. The classifiers are ordered as follows: +each classifier and sample is printed. The classifiers are ordered as follows: From left to right : all the monoview classifiers on the first view, all the ones on the second one, ..., then at the far right, the multiview classifiers This html image is also available in ``.png`` format, but is then not interactive, so harder to analyze. -In terms of information, this is useful to detect possible outlier examples in the dataset and failing classifers. +In terms of information, this is useful to detect possible outlier samples in the dataset and failing classifers. -For example, a mainly black horizontal line for an example means that it has been missclassified by most of the classifiers. -It could mean that the example is incorrectly labeled in the dataset or is very hard to classify. +For example, a mainly black horizontal line for an sample means that it has been missclassified by most of the classifiers. +It could mean that the sample is incorrectly labeled in the dataset or is very hard to classify. Symmetrically, a mainly-black column means that a classifier spectacularly failed. @@ -178,12 +178,12 @@ The data used to generate this matrix is available in ``*-2D_plot_data.csv`` ``*-error_analysis_bar.png`` and ``*-error_analysis_bar.html`` <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -This file is a different way to visualize the same information as the two previous ones. Indeed, it is a bar plot, with a bar for each example, counting the ratio of classifiers that failed to classify this particular example. +This file is a different way to visualize the same information as the two previous ones. Indeed, it is a bar plot, with a bar for each sample, counting the ratio of classifiers that failed to classify this particular sample. .. raw:: html :file: ./images/example_1/bar.html -All the spikes are the mutual error examples, the complementary ones are the 0.33 bars and the redundant are the empty spaces. +All the spikes are the mutual error samples, the complementary ones are the 0.33 bars and the redundant are the empty spaces. The data used to generate this graph is available in ``*-bar_plot_data.csv`` diff --git a/docs/source/tutorials/example2.rst b/docs/source/tutorials/example2.rst index 232a06c6..a96a85be 100644 --- a/docs/source/tutorials/example2.rst +++ b/docs/source/tutorials/example2.rst @@ -72,7 +72,7 @@ And for the late fusion : (It will build a vote with one decision tree on each view, with the specified configuration for the decision trees) -Learning on a few examples +Learning on a few samples >>>>>>>>>>>>>>>>>>>>>>>>>> This example focuses on one line of the config file : @@ -95,9 +95,10 @@ The results for accuracy metric are stored in ``multiview_platform/examples/resu These results were generated learning on 20% of the dataset and testing on 80% (see the :base_source:`config file <multiview_platform/examples/config_files/config_example_2_1_1.yml#L37>`). -.. _learning-on-more-examples: -Learning on more examples ->>>>>>>>>>>>>>>>>>>>>>>>> +.. _learning-on-more-samples: + +Learning on more samples +>>>>>>>>>>>>>>>>>>>>>>>> Now, if you run : @@ -128,6 +129,7 @@ The split ratio has two consequences : - The second consequence is that increasing train size will increase the benchmark duration as the classifiers will have to learn on more examples, this duration modification is higher if the dataset has high dimensionality and if the algorithms are complex. .. _random: + Example 2.2 : Usage of randomized hyper-parameter optimization : <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/docs/source/tutorials/example3.rst b/docs/source/tutorials/example3.rst index 0f5cc416..5bd82570 100644 --- a/docs/source/tutorials/example3.rst +++ b/docs/source/tutorials/example3.rst @@ -70,7 +70,7 @@ Similarly for the f1-score : The main difference between this plot an the one from :base_doc:`Example 1 <tutorials/example1.html>` is that here, the scores are means over all the statistical iterations, and the standard deviations are plotted as vertical lines on top of the bars and printed after each score under the bars as "± <std>". -This has also an impact on the display of error analysis. Indeed, now it has multiple shades of gray depending on the number of iterations that succeeded or failed on the example : +This has also an impact on the display of error analysis. Indeed, now it has multiple shades of gray depending on the number of iterations that succeeded or failed on the sample : .. raw:: html :file: ./images/example_3/err.html diff --git a/docs/source/tutorials/example4.rst b/docs/source/tutorials/example4.rst index 077d6d79..68c81c6b 100644 --- a/docs/source/tutorials/example4.rst +++ b/docs/source/tutorials/example4.rst @@ -10,16 +10,16 @@ The bare necessities At the moment, in order for the platform to work, the dataset must satisfy the following minimum requirements : -- Each example must be described in each view, with no missing data (you can use external tools to fill the gaps, or use only the fully-described examples of your dataset) +- Each sample must be described in each view, with no missing data (you can use external tools to fill the gaps, or use only the fully-described samples of your dataset) The dataset structure --------------------- -Let's suppose that one has a multiview dataset consisting of 3 views describing 200 examples: +Let's suppose that one has a multiview dataset consisting of 3 views describing 200 samples: -1. A sound recoding of each example, described by 100 features, -2. An image of each example, described by 40 features, -3. A written commentary for each example, described by 55 features. +1. A sound recoding of each sample, described by 100 features, +2. An image of each sample, described by 40 features, +3. A written commentary for each sample, described by 55 features. So three matrices (200x100 ; 200x40 ; 200x55) make up the dataset. The most usual way to save matrices are `.csv` files. So let us suppose that one has @@ -27,7 +27,7 @@ So three matrices (200x100 ; 200x40 ; 200x55) make up the dataset. The most usua 2. ``image.csv`` 3. ``commentary.csv``. -Let us suppose that all this data should be used to classify the examples in three classes : "Human", "Animal" or "Object" and that on has a ``labels.csv`` file with one value for each example, 0 if the example is a human, 1 if it is an animal an 2 if it is an object. +Let us suppose that all this data should be used to classify the examples in three classes : "Human", "Animal" or "Object" and that on has a ``labels.csv`` file with one value for each sample, 0 if the sample is a human, 1 if it is an animal an 2 if it is an object. In order to run a benchmark on this dataset, one has to format it using HDF5. @@ -49,7 +49,7 @@ Let's define the variables that will be used to load the csv matrices : view_names = ["sound", "image", "commentary", ] data_file_paths = ["path/to/sound.csv", "path/to/image.csv", "path/to/commentary.csv",] labels_file_path = "path/to/labels/file.csv" - example_ids_path = "path/to/example_ids/file.csv" + sample_ids_path = "path/to/sample_ids/file.csv" labels_names = ["Human", "Animal", "Object"] Let's create the HDF5 file : @@ -119,7 +119,7 @@ Let's now store the metadata : # do not modify the attribute's key metadata_group.attrs["nbClass"] = np.unique(labels_data) - # Store the number of examples in the dataset, + # Store the number of samples in the dataset, # do not modify the attribute's key metadata_group.attrs["datasetLength"] = labels_data.shape[0] @@ -127,35 +127,35 @@ Here, we store - The number of views in the :python:`"nbView"` attribute, - The number of different labels in the :python:`"nbClass"` attribute, -- The number of examples in the :python:`"datasetLength"` attribute. +- The number of samples in the :python:`"datasetLength"` attribute. Now, the dataset is ready to be used in the platform. Let's suppose it is stored in ``path/to/file.hdf5``, then by setting the ``pathf:`` line of the config file to ``pathf: path/to/`` and the ``name:`` line to ``name: ["file.hdf5"]``, the benchmark will run on the created dataset. -Adding additional information on the examples +Adding additional information on the samples --------------------------------------------- -In order to be able to analyze the results with more clarity, one can add the examples IDs to the dataset, by adding a dataset to the metadata group. +In order to be able to analyze the results with more clarity, one can add the samples IDs to the dataset, by adding a dataset to the metadata group. Let's suppose that the objects we are trying to classify between "Human", "Animal" and "Object" are all people, bears, cars, planes, and birds. And that one has a ``.csv`` file with an ID for each of them (:python:`"john_115", "doe_562", "bear_112", "plane_452", "bird_785", "car_369", ...` for example) -Then as long as the IDs order corresponds to the example order in the lines of the previous matrices, to add the IDs in the hdf5 file, just add : +Then as long as the IDs order corresponds to the sample order in the lines of the previous matrices, to add the IDs in the hdf5 file, just add : .. code-block:: python - # Let us suppose that the examples have string ids, available in a csv file, + # Let us suppose that the samples have string ids, available in a csv file, # they can be stored in the HDF5 and will be used in the result analysis. - example_ids = np.genfromtxt(example_ids_path, delimiter=',') + sample_ids = np.genfromtxt(sample_ids_path, delimiter=',') # To sore the strings in an HDF5 dataset, be sure to use the S<max_length> type, # do not modify the name of the dataset. - metadata_group.create_dataset("example_ids", - data=np.array(example_ids).astype(np.dtype("S100")), + metadata_group.create_dataset("sample_ids", + data=np.array(sample_ids).astype(np.dtype("S100")), dtype=np.dtype("S100")) -Be sure to keep the name :python:`"example_ids"`, as it is mandatory for the platform to find the dataset in the file. +Be sure to keep the name :python:`"sample_ids"`, as it is mandatory for the platform to find the dataset in the file. diff --git a/docs/source/tutorials/example5.rst b/docs/source/tutorials/example5.rst index ba04eb9d..ecb618cb 100644 --- a/docs/source/tutorials/example5.rst +++ b/docs/source/tutorials/example5.rst @@ -127,12 +127,12 @@ Moreover, one has to add a variable called :python:`classifier_class_name` that self.param_names = ["param_1", "random_state", "param_2"] self.distribs = [CustomRandint(5,200), [random_state], ["val_1", "val_2"]] -In |platf| the input of the :python:`fit()` method is `X`, a dataset object that provide access to each view with a method : :python:`dataset_var.get_v(view_index, example_indices)`. +In |platf| the input of the :python:`fit()` method is `X`, a dataset object that provide access to each view with a method : :python:`dataset_var.get_v(view_index, sample_indices)`. So in order to add a mutliview classifier to |platf|, one will probably have to add a data-transformation step before using the class's :python:`fit()` method. -Moreover, to get restrain the examples and descriptors used in the method, |platf| provides two supplementary arguments : +Moreover, to get restrain the samples and descriptors used in the method, |platf| provides two supplementary arguments : -- ``train_indices`` is an array of examples indices that compose the training set, +- ``train_indices`` is an array of samples indices that compose the training set, - ``view_indices`` is an array of view indices to restrain the number of views on which the algorithm will train. These two arguments are useful to reduce memory usage. Indeed, `X`, the dataset object is just a wrapper for an HDF5 file object, so the data will only be loaded once the `get_v` method is called, so the train and test set are not loaded at the same time. @@ -142,18 +142,18 @@ These two arguments are useful to reduce memory usage. Indeed, `X`, the dataset .. code-block:: python def fit(self, X, y, train_indices=None, view_indices=None): - # This function is used to initialize the example and view indices, in case they are None, it transforms them in the correct values - train_indices, view_indices = get_examples_views_indices(X, + # This function is used to initialize the sample and view indices, in case they are None, it transforms them in the correct values + train_indices, view_indices = get_samples_views_indices(X, train_indices, view_indices) needed_input = transform_data_if_needed(X, train_indices, view_indices) return NewMVAlgo.fit(self, needed_input, y[train_indices]) - def predict(self, X, example_indices=None, view_indices=None): - example_indices, view_indices = get_examples_views_indices(X, - example_indices, + def predict(self, X, sample_indices=None, view_indices=None): + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, view_indices) - needed_input = transform_data_if_needed(X, example_indices, view_indices) + needed_input = transform_data_if_needed(X, sample_indices, view_indices) return NewMVAlgo.predict(self, needed_input) Similarly to monoview algorithms, it is possible to add an interpretation method. @@ -177,16 +177,16 @@ Example : build a list of all the views arrays Let us suppose that the mutliview algorithm that one wants to add to |platf| takes as input a list :python:`list_X` of all the views. -Then an example of :python:`self.transform_data_if_needed(X, example_indices, view_indices)` could be : +Then an example of :python:`self.transform_data_if_needed(X, sample_indices, view_indices)` could be : .. code-block:: python - def transform_data_if_needed(self, X, example_indices, view_indices): + def transform_data_if_needed(self, X, sample_indices, view_indices): views_list = [] # Browse the asked views indices for view_index in view_indices: - # Get the data from the dataset object, for the asked examples - view_data = X.get_v(view_index, example_indices=example_indices) + # Get the data from the dataset object, for the asked samples + view_data = X.get_v(view_index, sample_indices=sample_indices) # Store it in the list views_list.append(view_data) return views_list diff --git a/docs/source/tutorials/hps_theory.rst b/docs/source/tutorials/hps_theory.rst index 5da342f4..ae819bcf 100644 --- a/docs/source/tutorials/hps_theory.rst +++ b/docs/source/tutorials/hps_theory.rst @@ -30,9 +30,9 @@ model's generalization capacity. This split ratio is controlled by the config file's argument ``split:``. It uses a float to pass the ratio between the size of the testing set and the training set : :math:`\text{split} = \frac{\text{test size}}{\text{dataset size}}`. In order to be as fair as possible, this split is made by keeping the ratio between each class in the training set and in the testing set. -So if a dataset has 100 examples with 60% of them in class A, and 40% of them in class B, using ``split: 0.2`` -will generate a training set with 48 examples of class A and 32 examples of class B and a testing set -with 12 examples of class A and 8 examples of class B. +So if a dataset has 100 samples with 60% of them in class A, and 40% of them in class B, using ``split: 0.2`` +will generate a training set with 48 samples of class A and 32 samples of class B and a testing set +with 12 samples of class A and 8 samples of class B. Ths process uses sklearn's StratifiedShuffleSplit_ to split the dataset at random while being reproductible thanks to the random_state. @@ -102,9 +102,9 @@ testing it on the last one, evaluating it's predictive performance on unseen dat This learning-and-testing process is repeated :math:`k` times and the estimated performance is the mean of the performance on each testing set. -In the platform, the training set (the 48 examples of class A and 32 examples of class B from last example) will be -divided in k folds for the cross-validation process and the testing set (the 12 examples of class A and 8 examples of -class B for last examples) will in no way be involved in the training process of the classifier. +In the platform, the training set (the 48 samples of class A and 32 samples of class B from last example) will be +divided in k folds for the cross-validation process and the testing set (the 12 samples of class A and 8 samples of +class B for last samples) will in no way be involved in the training process of the classifier. The cross-validation process can be controlled with the ``nb_folds:`` line of the configuration file in which the number of folds is specified. diff --git a/format_dataset.py b/format_dataset.py index 02ef7399..2f70515e 100644 --- a/format_dataset.py +++ b/format_dataset.py @@ -14,7 +14,7 @@ import h5py view_names = ["sound", "image", "commentary", ] data_file_paths = ["path/to/sound.csv", "path/to/image.csv", "path/to/commentary.csv",] labels_file_path = "path/to/labels/file.csv" -example_ids_path = "path/to/example_ids/file.csv" +sample_ids_path = "path/to/sample_ids/file.csv" labels_names = ["Human", "Animal", "Object"] @@ -69,18 +69,18 @@ metadata_group.attrs["nbView"] = len(view_names) # do not modify the attribute's key metadata_group.attrs["nbClass"] = np.unique(labels_data) -# Store the number of examples in the dataset, +# Store the number of samples in the dataset, # do not modify the attribute's key metadata_group.attrs["datasetLength"] = labels_data.shape[0] -# Let us suppose that the examples have string ids, available in a csv file, +# Let us suppose that the samples have string ids, available in a csv file, # they can be stored in the HDF5 and will be used in the result analysis. -example_ids = np.genfromtxt(example_ids_path, delimiter=',') +sample_ids = np.genfromtxt(sample_ids_path, delimiter=',') # To sore the strings in an HDF5 dataset, be sure to use the S<max_length> type, # do not modify the name of the dataset. -metadata_group.create_dataset("example_ids", - data=np.array(example_ids).astype(np.dtype("S100")), +metadata_group.create_dataset("sample_ids", + data=np.array(sample_ids).astype(np.dtype("S100")), dtype=np.dtype("S100")) hdf5_file.close() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e507215a..5f4af0b8 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,5 @@ -Sphinx>=1.4.8 -cycler>=0.10.0 h5py>=2.9.0 joblib>=0.13.2 -kiwisolver>=1.1.0 numpy>=1.16.4 pyparsing>=2.4.0 python-dateutil>=2.8.0 @@ -10,11 +7,8 @@ scikit-learn>=0.19.0 scipy>=1.3.0 six>=1.12.0 pandas>=0.23.3 -m2r>=0.2.1 -docutils>=0.12 pyyaml>=3.12 -cvxopt>=1.2.0 plotly>=4.2.1 matplotlib>=3.1.1 tabulate>=0.8.6 -recommonmark>=0.3.0 + diff --git a/setup.py b/setup.py index 96743820..407b77d6 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ def setup_package(): # Une description longue, sera affichée pour présenter la lib # Généralement on dump le README ici - long_description=open('README.md').read(), + long_description=open('README.rst').read(), # Vous pouvez rajouter une liste de dépendances pour votre lib # et même préciser une version. A l'installation, Python essayera de @@ -58,7 +58,7 @@ def setup_package(): install_requires=requirements, extras_requires={ 'dev': ['pytest', 'pytest-cov'], - 'doc': ['sphinx', 'numpydoc', 'sphinx_gallery', 'matplotlib']}, + 'doc': ['sphinx', 'numpydoc', 'docutils']}, # Il est d'usage de mettre quelques metadata à propos de sa lib # Pour que les robots puissent facilement la classer. @@ -83,11 +83,11 @@ def setup_package(): # va faire pointer ce nom vers la fonction proclamer(). La commande sera # créé automatiquement. # La syntaxe est "nom-de-commande-a-creer = package.module:fonction". - entry_points={ - 'console_scripts': [ - 'exec_multiview = summit.execute:exec', - ], - }, + # entry_points={ + # 'console_scripts': [ + # 'exec_multiview = summit.execute:exec', + # ], + # }, # A fournir uniquement si votre licence n'est pas listée dans "classifiers" # ce qui est notre cas diff --git a/summit/__init__.py b/summit/__init__.py index 257ff6cd..b11a66d7 100644 --- a/summit/__init__.py +++ b/summit/__init__.py @@ -1,4 +1,4 @@ -"""This is a test docstring to test stuff""" + __version__ = "0.0.0.0" diff --git a/summit/examples/config_files/config_example_0.yml b/summit/examples/config_files/config_example_0.yml index d16f5843..7d9c58cf 100644 --- a/summit/examples/config_files/config_example_0.yml +++ b/summit/examples/config_files/config_example_0.yml @@ -31,7 +31,7 @@ track_tracebacks: True # All the classification-realted configuration options -# The ratio of test examples/number of train examples +# The ratio of test samples/number of train samples split: 0.25 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 2 diff --git a/summit/examples/config_files/config_example_1.yml b/summit/examples/config_files/config_example_1.yml index fb9ab405..1ab646bf 100644 --- a/summit/examples/config_files/config_example_1.yml +++ b/summit/examples/config_files/config_example_1.yml @@ -31,7 +31,7 @@ track_tracebacks: True # All the classification-realted configuration options -# The ratio of test examples/number of train examples +# The ratio of test samples/number of train samples split: 0.35 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 2 diff --git a/summit/examples/config_files/config_example_2_1_2.yml b/summit/examples/config_files/config_example_2_1_2.yml index 256e18a8..81fc57ca 100644 --- a/summit/examples/config_files/config_example_2_1_2.yml +++ b/summit/examples/config_files/config_example_2_1_2.yml @@ -33,7 +33,7 @@ track_tracebacks: True # If the dataset is multiclass, will use this multiclass-to-biclass method multiclass_method: "oneVersusOne" -# The ratio number of test exmaples/number of train examples +# The ratio number of test exmaples/number of train samples split: 0.2 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 2 diff --git a/summit/examples/config_files/config_example_2_2_1.yml b/summit/examples/config_files/config_example_2_2_1.yml index d462bee1..193482d4 100644 --- a/summit/examples/config_files/config_example_2_2_1.yml +++ b/summit/examples/config_files/config_example_2_2_1.yml @@ -33,12 +33,12 @@ track_tracebacks: True # If the dataset is multiclass, will use this multiclass-to-biclass method multiclass_method: "oneVersusOne" -# The ratio number of test exmaples/number of train examples +# The ratio number of test samples/number of train samples split: 0.8 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 5 # The number of classes to select in the dataset -nb_class: 2 +nb_class: 2² # The name of the classes to select in the dataset classes: # The type of algorithms to run during the benchmark (monoview and/or multiview) diff --git a/summit/examples/config_files/config_example_2_3.yml b/summit/examples/config_files/config_example_2_3.yml index bb8fb31e..1938ed8f 100644 --- a/summit/examples/config_files/config_example_2_3.yml +++ b/summit/examples/config_files/config_example_2_3.yml @@ -33,7 +33,7 @@ track_tracebacks: True # If the dataset is multiclass, will use this multiclass-to-biclass method multiclass_method: "oneVersusOne" -# The ratio number of test exmaples/number of train examples +# The ratio number of test exmaples/number of train samples split: 0.8 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 5 diff --git a/summit/examples/config_files/config_example_3.yml b/summit/examples/config_files/config_example_3.yml index 67ef06ea..b17156c9 100644 --- a/summit/examples/config_files/config_example_3.yml +++ b/summit/examples/config_files/config_example_3.yml @@ -33,7 +33,7 @@ track_tracebacks: True # If the dataset is multiclass, will use this multiclass-to-biclass method multiclass_method: "oneVersusOne" -# The ratio number of test exmaples/number of train examples +# The ratio number of test exmaples/number of train samples split: 0.8 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 5 diff --git a/summit/execute.py b/summit/execute.py index a6a6f74b..30196e99 100644 --- a/summit/execute.py +++ b/summit/execute.py @@ -11,19 +11,54 @@ def execute(config_path=None): # pragma: no cover exec_classif.exec_classif(sys.argv[1:]) else: if config_path == "example 0": - config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_0.yml") + config_path = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + "examples", + "config_files", + "config_example_0.yml") elif config_path == "example 1": - config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_1.yml") + config_path = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + "examples", + "config_files", + "config_example_1.yml") elif config_path == "example 2.1.1": - config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_2_1_1.yml") + config_path = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + "examples", + "config_files", + "config_example_2_1_1.yml") elif config_path == "example 2.1.2": - config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_2_1_2.yml") + config_path = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + "examples", + "config_files", + "config_example_2_1_2.yml") elif config_path == "example 2.2": - config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_2_2.yml") + config_path = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + "examples", + "config_files", + "config_example_2_2.yml") elif config_path == "example 2.3": - config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_2_3.yml") + config_path = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + "examples", + "config_files", + "config_example_2_3.yml") elif config_path == "example 3": - config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_3.yml") + config_path = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + "examples", + "config_files", + "config_example_3.yml") exec_classif.exec_classif(["--config_path", config_path]) diff --git a/summit/multiview_platform/exec_classif.py b/summit/multiview_platform/exec_classif.py index 91d931be..608856a8 100644 --- a/summit/multiview_platform/exec_classif.py +++ b/summit/multiview_platform/exec_classif.py @@ -1,4 +1,3 @@ -import itertools import logging import os import pkgutil @@ -7,7 +6,6 @@ import traceback import matplotlib import numpy as np -from sklearn.tree import DecisionTreeClassifier # Import own modules from . import monoview_classifiers @@ -16,8 +14,8 @@ from .monoview.exec_classif_mono_view import exec_monoview from .multiview.exec_multiview import exec_multiview from .result_analysis.execution import analyze_iterations, analyze from .utils import execution, dataset, configuration -from .utils.organization import secure_file_path from .utils.dataset import delete_HDF5 +from .utils.organization import secure_file_path matplotlib.use( 'Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure @@ -95,7 +93,8 @@ def init_argument_dictionaries(benchmark, views_dictionary, def init_multiview_exps(classifier_names, views_dictionary, nb_class, - kwargs_init, hps_method, hps_kwargs): # pragma: no cover + kwargs_init, hps_method, + hps_kwargs): # pragma: no cover multiview_arguments = [] for classifier_name in classifier_names: arguments = get_path_dict(kwargs_init[classifier_name]) @@ -104,7 +103,8 @@ def init_multiview_exps(classifier_names, views_dictionary, nb_class, gen_single_multiview_arg_dictionary(classifier_name, arguments, nb_class, - {"param_grid":hps_kwargs[classifier_name]}, + {"param_grid": hps_kwargs[ + classifier_name]}, views_dictionary=views_dictionary)] elif hps_method == "Random": hps_kwargs = dict((key, value) @@ -134,7 +134,7 @@ def init_multiview_exps(classifier_names, views_dictionary, nb_class, def init_monoview_exps(classifier_names, views_dictionary, nb_class, kwargs_init, hps_method, - hps_kwargs): # pragma: no cover + hps_kwargs): # pragma: no cover r"""Used to add each monoview exeperience args to the list of monoview experiences args. First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right @@ -163,12 +163,13 @@ def init_monoview_exps(classifier_names, for classifier_name in classifier_names: if hps_method == "Grid": arguments = gen_single_monoview_arg_dictionary(classifier_name, - kwargs_init, - nb_class, - view_index, - view_name, + kwargs_init, + nb_class, + view_index, + view_name, {"param_grid": - hps_kwargs[classifier_name]}) + hps_kwargs[ + classifier_name]}) elif hps_method == "Random": hps_kwargs = dict((key, value) for key, value in hps_kwargs.items() @@ -188,10 +189,11 @@ def init_monoview_exps(classifier_names, hps_kwargs) else: - raise ValueError('At the moment only "None", "Random" or "Grid" ' - 'are available as hyper-parameter search ' - 'methods, sadly "{}" is not'.format(hps_method) - ) + raise ValueError( + 'At the moment only "None", "Random" or "Grid" ' + 'are available as hyper-parameter search ' + 'methods, sadly "{}" is not'.format(hps_method) + ) monoview_arguments.append(arguments) return monoview_arguments @@ -208,7 +210,7 @@ def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class, "view_index": view_index, "classifier_name": classifier_name, "nb_class": nb_class, - "hps_kwargs":hps_kwargs } + "hps_kwargs": hps_kwargs} def gen_single_multiview_arg_dictionary(classifier_name, arguments, nb_class, @@ -280,6 +282,7 @@ def is_dict_in(dictionary): paths.append(key) return paths + def init_kwargs(args, classifiers_names, framework="monoview"): r"""Used to init kwargs thanks to a function in each monoview classifier package. @@ -363,10 +366,12 @@ def arange_metrics(metrics, metric_princ): metrics : list of lists The metrics list, but arranged so the first one is the principal one.""" if metric_princ in metrics: - metrics = dict((key, value) if not key == metric_princ else (key+"*", value) for key, value in metrics.items()) + metrics = dict( + (key, value) if not key == metric_princ else (key + "*", value) for + key, value in metrics.items()) else: raise ValueError("{} not in metric pool ({})".format(metric_princ, - metrics)) + metrics)) return metrics @@ -374,7 +379,7 @@ def benchmark_init(directory, classification_indices, labels, labels_dictionary, k_folds, dataset_var): """ Initializes the benchmark, by saving the indices of the train - examples and the cross validation folds. + samples and the cross validation folds. Parameters ---------- @@ -382,7 +387,7 @@ def benchmark_init(directory, classification_indices, labels, labels_dictionary, The benchmark's result directory classification_indices : numpy array - The indices of the examples, splitted for the train/test split + The indices of the samples, splitted for the train/test split labels : numpy array The labels of the dataset @@ -400,7 +405,7 @@ def benchmark_init(directory, classification_indices, labels, labels_dictionary, logging.debug("Start:\t Benchmark initialization") secure_file_path(os.path.join(directory, "train_labels.csv")) train_indices = classification_indices[0] - train_labels = dataset_var.get_labels(example_indices=train_indices) + train_labels = dataset_var.get_labels(sample_indices=train_indices) np.savetxt(os.path.join(directory, "train_labels.csv"), train_labels, delimiter=",") np.savetxt(os.path.join(directory, "train_indices.csv"), @@ -558,7 +563,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, hyper_param_search=hyper_param_search, metrics=metrics, **arguments)] - except: + except BaseException: if track_tracebacks: traceback_outputs[ arguments["classifier_name"] + "-" + arguments[ @@ -591,7 +596,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, hps_method=hyper_param_search, metrics=metrics, n_iter=args["hps_iter"], **arguments)] - except: + except BaseException: if track_tracebacks: traceback_outputs[ arguments["classifier_name"]] = traceback.format_exc() @@ -673,7 +678,7 @@ def exec_benchmark(nb_cores, stats_iter, **arguments) analyze_iterations([benchmark_results], benchmark_arguments_dictionaries, stats_iter, - metrics, example_ids=dataset_var.example_ids, + metrics, sample_ids=dataset_var.sample_ids, labels=dataset_var.get_labels()) results += [benchmark_results] logging.debug("Done:\t Executing all the needed benchmarks") @@ -684,7 +689,7 @@ def exec_benchmark(nb_cores, stats_iter, benchmark_arguments_dictionaries, metrics, directory, - dataset_var.example_ids, + dataset_var.sample_ids, dataset_var.get_labels()) logging.debug("Done:\t Analyzing predictions") delete(benchmark_arguments_dictionaries, nb_cores, dataset_var) @@ -704,7 +709,7 @@ def exec_classif(arguments): # pragma: no cover >>> exec_classif([--config_path, /path/to/config/files/]) - >>> + >>> """ start = time.time() args = execution.parse_the_args(arguments) @@ -720,8 +725,8 @@ def exec_classif(arguments): # pragma: no cover monoview_algos = args["algos_monoview"] multiview_algos = args["algos_multiview"] path, dataset_list = execution.find_dataset_names(args["pathf"], - args["file_type"], - args["name"]) + args["file_type"], + args["name"]) args["pathf"] = path for dataset_name in dataset_list: # noise_results = [] @@ -750,7 +755,7 @@ def exec_classif(arguments): # pragma: no cover args["classes"], random_state, args["full"], - ) + ) args["name"] = datasetname splits = execution.gen_splits(dataset_var.get_labels(), args["split"], @@ -777,9 +782,9 @@ def exec_classif(arguments): # pragma: no cover if metrics == "all": metrics_names = [name for _, name, isPackage in pkgutil.iter_modules( - [os.path.join(os.path.dirname( - os.path.dirname(os.path.realpath(__file__))), - 'metrics')]) if + [os.path.join(os.path.dirname( + os.path.dirname(os.path.realpath(__file__))), + 'metrics')]) if not isPackage and name not in ["framework", "log_loss", "matthews_corrcoef", @@ -788,7 +793,7 @@ def exec_classif(arguments): # pragma: no cover for metric_name in metrics_names) metrics = arange_metrics(metrics, args["metric_princ"]) - benchmark = init_benchmark(cl_type, monoview_algos, multiview_algos,) + benchmark = init_benchmark(cl_type, monoview_algos, multiview_algos, ) init_kwargs = init_kwargs_func(args, benchmark) data_base_time = time.time() - start argument_dictionaries = init_argument_dictionaries( @@ -809,6 +814,6 @@ def exec_classif(arguments): # pragma: no cover benchmark_argument_dictionaries, directory, metrics, dataset_var, args["track_tracebacks"]) - # noise_results.append([noise_std, results_mean_stds]) - # plot_results_noise(directory, noise_results, metrics[0][0], - # dataset_name) + # noise_results.append([noise_std, results_mean_stds]) + # plot_results_noise(directory, noise_results, metrics[0][0], + # dataset_name) diff --git a/summit/multiview_platform/metrics/accuracy_score.py b/summit/multiview_platform/metrics/accuracy_score.py index e9faae69..ba07df93 100644 --- a/summit/multiview_platform/metrics/accuracy_score.py +++ b/summit/multiview_platform/metrics/accuracy_score.py @@ -3,14 +3,9 @@ get_scorer: returns a sklearn scorer for grid search """ -import warnings - from sklearn.metrics import accuracy_score as metric from sklearn.metrics import make_scorer -warnings.warn("the accuracy_score module is deprecated", DeprecationWarning, - stacklevel=2) - # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype @@ -41,5 +36,6 @@ def get_scorer(**kwargs): def get_config(**kwargs): - config_string = "Accuracy score using {}, (higher is better)".format(kwargs) + config_string = "Accuracy score using {}, (higher is better)".format( + kwargs) return config_string diff --git a/summit/multiview_platform/metrics/f1_score.py b/summit/multiview_platform/metrics/f1_score.py index 6b9b89df..d9924cf5 100644 --- a/summit/multiview_platform/metrics/f1_score.py +++ b/summit/multiview_platform/metrics/f1_score.py @@ -3,13 +3,9 @@ get_scorer: returns a sklearn scorer for grid search """ -import warnings - from sklearn.metrics import f1_score as metric from sklearn.metrics import make_scorer -warnings.warn("the f1_score module is deprecated", DeprecationWarning, - stacklevel=2) # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/fbeta_score.py b/summit/multiview_platform/metrics/fbeta_score.py index 60a5141a..2b3de2ac 100644 --- a/summit/multiview_platform/metrics/fbeta_score.py +++ b/summit/multiview_platform/metrics/fbeta_score.py @@ -1,11 +1,6 @@ -import warnings - from sklearn.metrics import fbeta_score as metric from sklearn.metrics import make_scorer -warnings.warn("the fbeta_score module is deprecated", DeprecationWarning, - stacklevel=2) - # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/hamming_loss.py b/summit/multiview_platform/metrics/hamming_loss.py index 665dd243..3ee875f2 100644 --- a/summit/multiview_platform/metrics/hamming_loss.py +++ b/summit/multiview_platform/metrics/hamming_loss.py @@ -1,10 +1,6 @@ -import warnings - from sklearn.metrics import hamming_loss as metric from sklearn.metrics import make_scorer -warnings.warn("the hamming_loss module is deprecated", DeprecationWarning, - stacklevel=2) # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/jaccard_score.py b/summit/multiview_platform/metrics/jaccard_score.py index 248ec66d..f58d6da5 100644 --- a/summit/multiview_platform/metrics/jaccard_score.py +++ b/summit/multiview_platform/metrics/jaccard_score.py @@ -1,11 +1,6 @@ -import warnings - from sklearn.metrics import jaccard_score as metric from sklearn.metrics import make_scorer -warnings.warn("the jaccard_similarity_score module is deprecated", - DeprecationWarning, - stacklevel=2) # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/log_loss.py b/summit/multiview_platform/metrics/log_loss.py index 2b5ab917..3e4cdef4 100644 --- a/summit/multiview_platform/metrics/log_loss.py +++ b/summit/multiview_platform/metrics/log_loss.py @@ -1,10 +1,6 @@ -import warnings - from sklearn.metrics import log_loss as metric from sklearn.metrics import make_scorer -warnings.warn("the log_loss module is deprecated", DeprecationWarning, - stacklevel=2) # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/matthews_corrcoef.py b/summit/multiview_platform/metrics/matthews_corrcoef.py index b3b8ec6c..a7e4eb5c 100644 --- a/summit/multiview_platform/metrics/matthews_corrcoef.py +++ b/summit/multiview_platform/metrics/matthews_corrcoef.py @@ -1,10 +1,6 @@ -import warnings - from sklearn.metrics import make_scorer from sklearn.metrics import matthews_corrcoef as metric -warnings.warn("the matthews_corrcoef module is deprecated", DeprecationWarning, - stacklevel=2) # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/precision_score.py b/summit/multiview_platform/metrics/precision_score.py index d1c861f9..1e5c44cc 100644 --- a/summit/multiview_platform/metrics/precision_score.py +++ b/summit/multiview_platform/metrics/precision_score.py @@ -1,10 +1,6 @@ -import warnings - from sklearn.metrics import make_scorer from sklearn.metrics import precision_score as metric -warnings.warn("the precision_score module is deprecated", DeprecationWarning, - stacklevel=2) # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/recall_score.py b/summit/multiview_platform/metrics/recall_score.py index 26126199..0b7583c9 100644 --- a/summit/multiview_platform/metrics/recall_score.py +++ b/summit/multiview_platform/metrics/recall_score.py @@ -1,10 +1,6 @@ -import warnings - from sklearn.metrics import make_scorer from sklearn.metrics import recall_score as metric -warnings.warn("the recall_score module is deprecated", DeprecationWarning, - stacklevel=2) # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/roc_auc_score.py b/summit/multiview_platform/metrics/roc_auc_score.py index ae21428b..3249f209 100644 --- a/summit/multiview_platform/metrics/roc_auc_score.py +++ b/summit/multiview_platform/metrics/roc_auc_score.py @@ -1,11 +1,6 @@ -import warnings - from sklearn.metrics import make_scorer from sklearn.metrics import roc_auc_score as metric -from sklearn.preprocessing import MultiLabelBinarizer -warnings.warn("the roc_auc_score module is deprecated", DeprecationWarning, - stacklevel=2) # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/metrics/zero_one_loss.py b/summit/multiview_platform/metrics/zero_one_loss.py index e3a34492..3db87fc2 100644 --- a/summit/multiview_platform/metrics/zero_one_loss.py +++ b/summit/multiview_platform/metrics/zero_one_loss.py @@ -1,11 +1,6 @@ -import warnings - from sklearn.metrics import make_scorer from sklearn.metrics import zero_one_loss as metric -warnings.warn("the zero_one_loss module is deprecated", DeprecationWarning, - stacklevel=2) - # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype diff --git a/summit/multiview_platform/monoview/exec_classif_mono_view.py b/summit/multiview_platform/monoview/exec_classif_mono_view.py index eed46949..324fdc38 100644 --- a/summit/multiview_platform/monoview/exec_classif_mono_view.py +++ b/summit/multiview_platform/monoview/exec_classif_mono_view.py @@ -20,7 +20,7 @@ from ..utils.multiclass import get_mc_estim from ..utils.organization import secure_file_path # Author-Info -__author__ = "Nikolas Huelsmann, Baptiste BAUVIN" +__author__ = "Baptiste BAUVIN" __status__ = "Prototype" # Production, Development, Prototype @@ -33,10 +33,9 @@ def exec_monoview_multicore(directory, name, labels_names, path, random_state, labels, hyper_param_search="randomized_search", metrics=[["accuracy_score", None]], n_iter=30, - **args): # pragma: no cover + **args): # pragma: no cover dataset_var = HDF5Dataset( - hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", - "r")) + hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r")) neededViewIndex = args["view_index"] X = dataset_var.get_v(neededViewIndex) Y = labels @@ -50,28 +49,30 @@ def exec_monoview_multicore(directory, name, labels_names, **args) -def exec_monoview(directory, X, Y, database_name, labels_names, classification_indices, +def exec_monoview(directory, X, Y, database_name, labels_names, + classification_indices, k_folds, nb_cores, databaseType, path, random_state, hyper_param_search="Random", - metrics={"accuracy_score*":{}}, n_iter=30, view_name="", + metrics={"accuracy_score*": {}}, n_iter=30, view_name="", hps_kwargs={}, **args): logging.debug("Start:\t Loading data") kwargs, \ - t_start, \ - view_name, \ - classifier_name, \ - X, \ - learningRate, \ - labelsString, \ - output_file_name,\ - directory,\ - base_file_name = init_constants(args, X, classification_indices, - labels_names, - database_name, directory, view_name, ) + t_start, \ + view_name, \ + classifier_name, \ + X, \ + learningRate, \ + labelsString, \ + output_file_name, \ + directory, \ + base_file_name = init_constants(args, X, classification_indices, + labels_names, + database_name, directory, view_name, ) logging.debug("Done:\t Loading data") logging.debug( - "Info:\t Classification - Database:" + str(database_name) + " View:" + str( + "Info:\t Classification - Database:" + str( + database_name) + " View:" + str( view_name) + " train ratio:" + str(learningRate) + ", CrossValidation k-folds: " + str( k_folds.n_splits) + ", cores:" @@ -92,11 +93,11 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i classifier_class_name = classifier_module.classifier_class_name hyper_param_beg = time.monotonic() cl_kwargs = get_hyper_params(classifier_module, hyper_param_search, - classifier_name, - classifier_class_name, - X_train, y_train, - random_state, output_file_name, - k_folds, nb_cores, metrics, kwargs, + classifier_name, + classifier_class_name, + X_train, y_train, + random_state, output_file_name, + k_folds, nb_cores, metrics, kwargs, **hps_kwargs) hyper_param_duration = time.monotonic() - hyper_param_beg logging.debug("Done:\t Generate classifier args") @@ -130,7 +131,8 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i whole_duration = time.monotonic() - t_start logging.debug( - "Info:\t Duration for training and predicting: " + str(whole_duration) + "[s]") + "Info:\t Duration for training and predicting: " + str( + whole_duration) + "[s]") logging.debug("Start:\t Getting results") result_analyzer = MonoviewResultAnalyzer(view_name=view_name, @@ -151,7 +153,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i nb_cores=nb_cores, duration=whole_duration) string_analysis, images_analysis, metrics_scores, class_metrics_scores, \ - confusion_matrix = result_analyzer.analyze() + confusion_matrix = result_analyzer.analyze() logging.debug("Done:\t Getting results") logging.debug("Start:\t Saving preds") @@ -163,7 +165,8 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i return MonoviewResult(view_index, classifier_name, view_name, metrics_scores, full_pred, cl_kwargs, classifier, X_train.shape[1], - hyper_param_duration, fit_duration, pred_duration, class_metrics_scores) + hyper_param_duration, fit_duration, pred_duration, + class_metrics_scores) def init_constants(args, X, classification_indices, labels_names, @@ -175,10 +178,10 @@ def init_constants(args, X, classification_indices, labels_names, t_start = time.monotonic() cl_type = kwargs["classifier_name"] learning_rate = float(len(classification_indices[0])) / ( - len(classification_indices[0]) + len(classification_indices[1])) + len(classification_indices[0]) + len(classification_indices[1])) labels_string = "-".join(labels_names) cl_type_string = cl_type - directory = os.path.join(directory, cl_type_string, view_name,) + directory = os.path.join(directory, cl_type_string, view_name, ) base_file_name = cl_type_string + '-' + name + "-" + view_name + "-" output_file_name = os.path.join(directory, base_file_name) secure_file_path(output_file_name) @@ -204,8 +207,8 @@ def get_hyper_params(classifier_module, search_method, classifier_module_name, "Start:\t " + search_method + " best settings for " + classifier_module_name) classifier_hp_search = getattr(hyper_parameter_search, search_method) estimator = getattr(classifier_module, classifier_class_name)( - random_state=random_state, - **kwargs[classifier_module_name]) + random_state=random_state, + **kwargs[classifier_module_name]) estimator = get_mc_estim(estimator, random_state, multiview=False, y=y_train) hps = classifier_hp_search(estimator, scoring=metrics, cv=k_folds, @@ -223,12 +226,14 @@ def get_hyper_params(classifier_module, search_method, classifier_module_name, def save_results(string_analysis, output_file_name, full_labels_pred, y_train_pred, - y_train, images_analysis, y_test, confusion_matrix): # pragma: no cover + y_train, images_analysis, y_test, + confusion_matrix): # pragma: no cover logging.info(string_analysis) - output_text_file = open(output_file_name + 'summary.txt', 'w', encoding="utf-8") + output_text_file = open(output_file_name + 'summary.txt', 'w', + encoding="utf-8") output_text_file.write(string_analysis) output_text_file.close() - np.savetxt(output_file_name+"confusion_matrix.csv", confusion_matrix, + np.savetxt(output_file_name + "confusion_matrix.csv", confusion_matrix, delimiter=', ') np.savetxt(output_file_name + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",") diff --git a/summit/multiview_platform/monoview/monoview_utils.py b/summit/multiview_platform/monoview/monoview_utils.py index dcecfa6f..c46c8b2e 100644 --- a/summit/multiview_platform/monoview/monoview_utils.py +++ b/summit/multiview_platform/monoview/monoview_utils.py @@ -1,12 +1,12 @@ -import pickle import os +import pickle + import matplotlib.pyplot as plt import numpy as np from matplotlib.ticker import FuncFormatter -from scipy.stats import uniform, randint from ..utils.base import BaseClassifier, ResultAnalyser -from ..utils.hyper_parameter_search import CustomRandint, CustomUniform +from ..utils.hyper_parameter_search import CustomRandint # Author-Info __author__ = "Baptiste Bauvin" @@ -53,7 +53,7 @@ def change_label_to_zero(y): def compute_possible_combinations(params_dict): n_possibs = np.ones(len(params_dict)) * np.inf for value_index, value in enumerate(params_dict.values()): - if type(value) == list: + if isinstance(value, list): n_possibs[value_index] = len(value) elif isinstance(value, CustomRandint): n_possibs[value_index] = value.get_nb_possibilities() @@ -115,13 +115,14 @@ def gen_test_folds_preds(X_train, y_train, KFolds, estimator): class BaseMonoviewClassifier(BaseClassifier): - def get_feature_importance(self, directory, base_file_name, nb_considered_feats=50): + def get_feature_importance(self, directory, base_file_name, + nb_considered_feats=50): """Used to generate a graph and a pickle dictionary representing feature importances""" feature_importances = self.feature_importances_ sorted_args = np.argsort(-feature_importances) feature_importances_sorted = feature_importances[sorted_args][ - :nb_considered_feats] + :nb_considered_feats] feature_indices_sorted = sorted_args[:nb_considered_feats] fig, ax = plt.subplots() x = np.arange(len(feature_indices_sorted)) @@ -129,8 +130,8 @@ class BaseMonoviewClassifier(BaseClassifier): ax.yaxis.set_major_formatter(formatter) plt.bar(x, feature_importances_sorted) plt.title("Importance depending on feature") - fig.savefig(os.path.join(directory, base_file_name + "feature_importances.png") - , transparent=True) + fig.savefig( + os.path.join(directory, base_file_name + "feature_importances.png"), transparent=True) plt.close() features_importances_dict = dict((featureIndex, featureImportance) for featureIndex, featureImportance in @@ -180,8 +181,9 @@ class MonoviewResult(object): def get_accuracy_graph(plotted_data, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, - boosting_bound=None, set="train", zero_to_one=True): # pragma: no cover - if type(name) is not str: + boosting_bound=None, set="train", + zero_to_one=True): # pragma: no cover + if not isinstance(name, str): name = " ".join(name.getConfig().strip().split(" ")[:2]) f, ax = plt.subplots(nrows=1, ncols=1) if zero_to_one: @@ -211,7 +213,8 @@ class MonoviewResultAnalyzer(ResultAnalyser): def __init__(self, view_name, classifier_name, shape, classifier, classification_indices, k_folds, hps_method, metrics_dict, n_iter, class_label_names, pred, - directory, base_file_name, labels, database_name, nb_cores, duration): + directory, base_file_name, labels, database_name, nb_cores, + duration): ResultAnalyser.__init__(self, classifier, classification_indices, k_folds, hps_method, metrics_dict, n_iter, class_label_names, pred, @@ -228,4 +231,4 @@ class MonoviewResultAnalyzer(ResultAnalyser): def get_view_specific_info(self): return "\t- View name : {}\t View shape : {}\n".format(self.view_name, - self.shape) \ No newline at end of file + self.shape) diff --git a/summit/multiview_platform/monoview_classifiers/adaboost.py b/summit/multiview_platform/monoview_classifiers/adaboost.py index 88a042ec..5bd49ef4 100644 --- a/summit/multiview_platform/monoview_classifiers/adaboost.py +++ b/summit/multiview_platform/monoview_classifiers/adaboost.py @@ -1,13 +1,12 @@ -import time import os +import time import numpy as np from sklearn.ensemble import AdaBoostClassifier -from sklearn.tree import DecisionTreeClassifier from .. import metrics -from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier, \ - get_accuracy_graph +from ..monoview.monoview_utils import BaseMonoviewClassifier, get_accuracy_graph +from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint from ..utils.base import base_boosting_estimators # Author-Info @@ -56,10 +55,9 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, n_estimators=50, base_estimator=None, base_estimator_config=None, **kwargs): - base_estimator = BaseMonoviewClassifier.get_base_estimator(self, base_estimator, - base_estimator_config) + base_estimator_config) AdaBoostClassifier.__init__(self, random_state=random_state, n_estimators=n_estimators, @@ -69,7 +67,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): self.param_names = ["n_estimators", "base_estimator"] self.classed_params = ["base_estimator"] self.distribs = [CustomRandint(low=1, high=500), - base_boosting_estimators] + base_boosting_estimators] self.weird_strings = {"base_estimator": "class_name"} self.plotted_metric = metrics.zero_one_loss self.plotted_metric_name = "zero_one_loss" @@ -128,9 +126,11 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): [step_pred for step_pred in self.staged_predict(X)]) return pred - def get_interpretation(self, directory, base_file_name, y_test, multi_class=False): # pragma: no cover + def get_interpretation(self, directory, base_file_name, y_test, + multi_class=False): # pragma: no cover interpretString = "" - interpretString += self.get_feature_importance(directory, base_file_name) + interpretString += self.get_feature_importance(directory, + base_file_name) interpretString += "\n\n Estimator error | Estimator weight\n" interpretString += "\n".join( [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for @@ -140,13 +140,15 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): [self.plotted_metric.score(y_test, step_pred) for step_pred in self.step_predictions]) get_accuracy_graph(step_test_metrics, "Adaboost", - os.path.join(directory, base_file_name +"test_metrics.png"), + os.path.join(directory, + base_file_name + "test_metrics.png"), self.plotted_metric_name, set="test") np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"), step_test_metrics, delimiter=',') - np.savetxt(os.path.join(directory, base_file_name + "train_metrics.csv"), - self.metrics, delimiter=',') + np.savetxt( + os.path.join(directory, base_file_name + "train_metrics.csv"), + self.metrics, delimiter=',') np.savetxt(os.path.join(directory, base_file_name + "times.csv"), np.array([self.train_time, self.pred_time]), delimiter=',') return interpretString diff --git a/summit/multiview_platform/monoview_classifiers/decision_tree.py b/summit/multiview_platform/monoview_classifiers/decision_tree.py index be9f5e24..6a6a3b2f 100644 --- a/summit/multiview_platform/monoview_classifiers/decision_tree.py +++ b/summit/multiview_platform/monoview_classifiers/decision_tree.py @@ -1,6 +1,7 @@ from sklearn.tree import DecisionTreeClassifier -from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint # Author-Info __author__ = "Baptiste Bauvin" @@ -32,5 +33,6 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier): interpretString = "First featrue : \n\t{} <= {}\n".format( self.tree_.feature[0], self.tree_.threshold[0]) - interpretString += self.get_feature_importance(directory, base_file_name) + interpretString += self.get_feature_importance(directory, + base_file_name) return interpretString diff --git a/summit/multiview_platform/monoview_classifiers/gradient_boosting.py b/summit/multiview_platform/monoview_classifiers/gradient_boosting.py index 7136370f..8651dbc6 100644 --- a/summit/multiview_platform/monoview_classifiers/gradient_boosting.py +++ b/summit/multiview_platform/monoview_classifiers/gradient_boosting.py @@ -1,13 +1,13 @@ -import time import os +import time import numpy as np from sklearn.ensemble import GradientBoostingClassifier from sklearn.tree import DecisionTreeClassifier from .. import metrics -from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier, \ - get_accuracy_graph +from ..monoview.monoview_utils import BaseMonoviewClassifier, get_accuracy_graph +from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint # Author-Info __author__ = "Baptiste Bauvin" @@ -39,7 +39,7 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): self.param_names = ["n_estimators", "max_depth"] self.classed_params = [] self.distribs = [CustomRandint(low=50, high=500), - CustomRandint(low=1, high=10),] + CustomRandint(low=1, high=10), ] self.weird_strings = {} self.plotted_metric = metrics.zero_one_loss self.plotted_metric_name = "zero_one_loss" @@ -71,12 +71,14 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): [step_pred for step_pred in self.staged_predict(X)]) return pred - def get_interpretation(self, directory, base_file_name, y_test, multi_class=False): + def get_interpretation(self, directory, base_file_name, y_test, + multi_class=False): interpretString = "" if multi_class: return interpretString else: - interpretString += self.get_feature_importance(directory, base_file_name) + interpretString += self.get_feature_importance(directory, + base_file_name) step_test_metrics = np.array( [self.plotted_metric.score(y_test, step_pred) for step_pred in self.step_predictions]) @@ -86,10 +88,14 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): get_accuracy_graph(self.metrics, "AdaboostClassic", directory + "metrics.png", self.plotted_metric_name) - np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"), step_test_metrics, - delimiter=',') - np.savetxt(os.path.join(directory, base_file_name + "train_metrics.csv"), self.metrics, - delimiter=',') + np.savetxt( + os.path.join(directory, base_file_name + "test_metrics.csv"), + step_test_metrics, + delimiter=',') + np.savetxt( + os.path.join(directory, base_file_name + "train_metrics.csv"), + self.metrics, + delimiter=',') np.savetxt(os.path.join(directory, base_file_name + "times.csv"), np.array([self.train_time, self.pred_time]), delimiter=',') diff --git a/summit/multiview_platform/monoview_classifiers/knn.py b/summit/multiview_platform/monoview_classifiers/knn.py index 2991f086..0aeb093f 100644 --- a/summit/multiview_platform/monoview_classifiers/knn.py +++ b/summit/multiview_platform/monoview_classifiers/knn.py @@ -1,6 +1,7 @@ from sklearn.neighbors import KNeighborsClassifier -from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint # Author-Info __author__ = "Baptiste Bauvin" diff --git a/summit/multiview_platform/monoview_classifiers/lasso.py b/summit/multiview_platform/monoview_classifiers/lasso.py index c91d2355..9359a300 100644 --- a/summit/multiview_platform/monoview_classifiers/lasso.py +++ b/summit/multiview_platform/monoview_classifiers/lasso.py @@ -1,8 +1,8 @@ import numpy as np from sklearn.linear_model import Lasso as LassoSK -from ..monoview.monoview_utils import CustomRandint, CustomUniform, \ - BaseMonoviewClassifier +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform, CustomRandint # Author-Info __author__ = "Baptiste Bauvin" @@ -71,4 +71,4 @@ class Lasso(LassoSK, BaseMonoviewClassifier): prediction = LassoSK.predict(self, X) signed = np.sign(prediction) signed[np.where(signed == -1)] = 0 - return signed \ No newline at end of file + return signed diff --git a/summit/multiview_platform/monoview_classifiers/random_forest.py b/summit/multiview_platform/monoview_classifiers/random_forest.py index ece278a5..d39943a4 100644 --- a/summit/multiview_platform/monoview_classifiers/random_forest.py +++ b/summit/multiview_platform/monoview_classifiers/random_forest.py @@ -1,6 +1,7 @@ from sklearn.ensemble import RandomForestClassifier -from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint # Author-Info __author__ = "Baptiste Bauvin" @@ -65,7 +66,8 @@ class RandomForest(RandomForestClassifier, BaseMonoviewClassifier): ["gini", "entropy"], [random_state]] self.weird_strings = {} - def get_interpretation(self, directory, base_file_name, y_test, multiclass=False): + def get_interpretation(self, directory, base_file_name, y_test, + multiclass=False): """ Parameters @@ -78,5 +80,6 @@ class RandomForest(RandomForestClassifier, BaseMonoviewClassifier): string for interpretation interpret_string """ interpret_string = "" - interpret_string += self.get_feature_importance(directory, base_file_name) + interpret_string += self.get_feature_importance(directory, + base_file_name) return interpret_string diff --git a/summit/multiview_platform/monoview_classifiers/sgd.py b/summit/multiview_platform/monoview_classifiers/sgd.py index 09c345ba..e5f01a95 100644 --- a/summit/multiview_platform/monoview_classifiers/sgd.py +++ b/summit/multiview_platform/monoview_classifiers/sgd.py @@ -1,6 +1,7 @@ from sklearn.linear_model import SGDClassifier -from ..monoview.monoview_utils import CustomUniform, BaseMonoviewClassifier +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform # Author-Info __author__ = "Baptiste Bauvin" diff --git a/summit/multiview_platform/monoview_classifiers/svm_linear.py b/summit/multiview_platform/monoview_classifiers/svm_linear.py index 8c15d4e9..40eaa483 100644 --- a/summit/multiview_platform/monoview_classifiers/svm_linear.py +++ b/summit/multiview_platform/monoview_classifiers/svm_linear.py @@ -1,6 +1,7 @@ from summit.multiview_platform.monoview_classifiers.additions.SVCClassifier import \ SVCClassifier -from ..monoview.monoview_utils import CustomUniform, BaseMonoviewClassifier +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform # Author-Info __author__ = "Baptiste Bauvin" diff --git a/summit/multiview_platform/monoview_classifiers/svm_poly.py b/summit/multiview_platform/monoview_classifiers/svm_poly.py index b0fc0e8b..86f93db5 100644 --- a/summit/multiview_platform/monoview_classifiers/svm_poly.py +++ b/summit/multiview_platform/monoview_classifiers/svm_poly.py @@ -1,7 +1,8 @@ from summit.multiview_platform.monoview_classifiers.additions.SVCClassifier import \ SVCClassifier -from ..monoview.monoview_utils import CustomUniform, CustomRandint, \ - BaseMonoviewClassifier +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import \ + CustomUniform, CustomRandint # Author-Info __author__ = "Baptiste Bauvin" diff --git a/summit/multiview_platform/monoview_classifiers/svm_rbf.py b/summit/multiview_platform/monoview_classifiers/svm_rbf.py index c197811e..450ed630 100644 --- a/summit/multiview_platform/monoview_classifiers/svm_rbf.py +++ b/summit/multiview_platform/monoview_classifiers/svm_rbf.py @@ -1,6 +1,7 @@ from summit.multiview_platform.monoview_classifiers.additions.SVCClassifier import \ SVCClassifier -from ..monoview.monoview_utils import CustomUniform, BaseMonoviewClassifier +from ..monoview.monoview_utils import BaseMonoviewClassifier +from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform # Author-Info __author__ = "Baptiste Bauvin" diff --git a/summit/multiview_platform/multiview/exec_multiview.py b/summit/multiview_platform/multiview/exec_multiview.py index c89034cf..bed8317a 100644 --- a/summit/multiview_platform/multiview/exec_multiview.py +++ b/summit/multiview_platform/multiview/exec_multiview.py @@ -47,7 +47,7 @@ def init_constants(kwargs, classification_indices, metrics, views = kwargs["view_names"] views_indices = kwargs["view_indices"] if metrics is None: - metrics = {"f1_score*":{}} + metrics = {"f1_score*": {}} classifier_name = kwargs["classifier_name"] classifier_config = kwargs[classifier_name] learning_rate = len(classification_indices[0]) / float( @@ -55,24 +55,24 @@ def init_constants(kwargs, classification_indices, metrics, t_start = time.time() logging.info("Info\t: Classification - Database : " + str( name) + " ; Views : " + ", ".join(views) + - " ; Algorithm : " + classifier_name + " ; Cores : " + str( + " ; Algorithm : " + classifier_name + " ; Cores : " + str( nb_cores) + ", Train ratio : " + str(learning_rate) + - ", CV on " + str(k_folds.n_splits) + " folds") + ", CV on " + str(k_folds.n_splits) + " folds") for view_index, view_name in zip(views_indices, views): logging.info("Info:\t Shape of " + str(view_name) + " :" + str( dataset_var.get_shape())) labels = dataset_var.get_labels() directory = os.path.join(directory, classifier_name) - base_file_name = classifier_name+"-"+dataset_var.get_name()+"-" + base_file_name = classifier_name + "-" + dataset_var.get_name() + "-" output_file_name = os.path.join(directory, base_file_name) return classifier_name, t_start, views_indices, \ - classifier_config, views, learning_rate, labels, output_file_name,\ - directory, base_file_name, metrics + classifier_config, views, learning_rate, labels, output_file_name, \ + directory, base_file_name, metrics def save_results(string_analysis, images_analysis, output_file_name, - confusion_matrix): # pragma: no cover + confusion_matrix): # pragma: no cover """ Save results in derectory @@ -102,10 +102,11 @@ def save_results(string_analysis, images_analysis, output_file_name, """ logging.info(string_analysis) secure_file_path(output_file_name) - output_text_file = open(output_file_name + 'summary.txt', 'w', encoding="utf-8") + output_text_file = open(output_file_name + 'summary.txt', 'w', + encoding="utf-8") output_text_file.write(string_analysis) output_text_file.close() - np.savetxt(output_file_name+"confusion_matrix.csv", confusion_matrix, + np.savetxt(output_file_name + "confusion_matrix.csv", confusion_matrix, delimiter=',') if images_analysis is not None: @@ -128,7 +129,7 @@ def exec_multiview_multicore(directory, core_index, name, learning_rate, database_type, path, labels_dictionary, random_state, labels, hyper_param_search=False, nb_cores=1, metrics=None, - n_iter=30, **arguments): # pragma: no cover + n_iter=30, **arguments): # pragma: no cover """ execute multiview process on @@ -238,17 +239,17 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.debug("Start:\t Initialize constants") cl_type, \ - t_start, \ - views_indices, \ - classifier_config, \ - views, \ - learning_rate, \ - labels, \ - output_file_name,\ - directory,\ - base_file_name, \ - metrics = init_constants(kwargs, classification_indices, metrics, name, - nb_cores, k_folds, dataset_var, directory) + t_start, \ + views_indices, \ + classifier_config, \ + views, \ + learning_rate, \ + labels, \ + output_file_name, \ + directory, \ + base_file_name, \ + metrics = init_constants(kwargs, classification_indices, metrics, name, + nb_cores, k_folds, dataset_var, directory) logging.debug("Done:\t Initialize constants") extraction_time = time.time() - t_start @@ -260,7 +261,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.debug("Start:\t Getting classifiers modules") # classifierPackage = getattr(multiview_classifiers, - # CL_type) # Permet d'appeler un module avec une string + # CL_type) # Permet d'appeler un module avec une string classifier_module = getattr(multiview_classifiers, cl_type) classifier_name = classifier_module.classifier_class_name # classifierClass = getattr(classifierModule, CL_type + "Class") @@ -271,11 +272,11 @@ def exec_multiview(directory, dataset_var, name, classification_indices, if hps_method != "None": hps_method_class = getattr(hyper_parameter_search, hps_method) estimator = getattr(classifier_module, classifier_name)( - random_state=random_state, - **classifier_config) + random_state=random_state, + **classifier_config) estimator = get_mc_estim(estimator, random_state, - multiview=True, - y=dataset_var.get_labels()[learning_indices]) + multiview=True, + y=dataset_var.get_labels()[learning_indices]) hps = hps_method_class(estimator, scoring=metrics, cv=k_folds, random_state=random_state, framework="multiview", n_jobs=nb_cores, @@ -308,12 +309,12 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.debug("Start:\t Predicting") train_pred = classifier.predict(dataset_var, - example_indices=learning_indices, - view_indices=views_indices) + sample_indices=learning_indices, + view_indices=views_indices) pred_beg = time.monotonic() test_pred = classifier.predict(dataset_var, - example_indices=validation_indices, - view_indices=views_indices) + sample_indices=validation_indices, + view_indices=views_indices) pred_duration = time.monotonic() - pred_beg full_pred = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100 full_pred[learning_indices] = train_pred @@ -335,7 +336,8 @@ def exec_multiview(directory, dataset_var, name, classification_indices, hps_method=hps_method, metrics_dict=metrics, n_iter=n_iter, - class_label_names=list(labels_dictionary.values()), + class_label_names=list( + labels_dictionary.values()), pred=full_pred, directory=directory, base_file_name=base_file_name, @@ -344,11 +346,12 @@ def exec_multiview(directory, dataset_var, name, classification_indices, nb_cores=nb_cores, duration=whole_duration) string_analysis, images_analysis, metrics_scores, class_metrics_scores, \ - confusion_matrix = result_analyzer.analyze() + confusion_matrix = result_analyzer.analyze() logging.info("Done:\t Result Analysis for " + cl_type) logging.debug("Start:\t Saving preds") - save_results(string_analysis, images_analysis, output_file_name, confusion_matrix) + save_results(string_analysis, images_analysis, output_file_name, + confusion_matrix) logging.debug("Start:\t Saving preds") return MultiviewResult(cl_type, classifier_config, metrics_scores, diff --git a/summit/multiview_platform/multiview/multiview_utils.py b/summit/multiview_platform/multiview/multiview_utils.py index 9ad93b6c..88c4ac53 100644 --- a/summit/multiview_platform/multiview/multiview_utils.py +++ b/summit/multiview_platform/multiview/multiview_utils.py @@ -1,16 +1,17 @@ +from .. import multiview_classifiers from abc import abstractmethod import numpy as np from .. import monoview_classifiers from ..utils.base import BaseClassifier, ResultAnalyser -from ..utils.dataset import RAMDataset, get_examples_views_indices +from ..utils.dataset import RAMDataset # class FakeEstimator(): # -# def predict(self, X, example_indices=None, view_indices=None): -# return np.zeros(example_indices.shape[0]) +# def predict(self, X, sample_indices=None, view_indices=None): +# return np.zeros(sample_indices.shape[0]) class BaseMultiviewClassifier(BaseClassifier): @@ -32,16 +33,21 @@ class BaseMultiviewClassifier(BaseClassifier): self.used_views = None @abstractmethod - def fit(self, X, y, train_indices=None, view_indices=None): # pragma: no cover + def fit(self, X, y, train_indices=None, + view_indices=None): # pragma: no cover pass @abstractmethod - def predict(self, X, example_indices=None, view_indices=None): # pragma: no cover + def predict(self, X, sample_indices=None, + view_indices=None): # pragma: no cover pass - def _check_views(self, view_indices): # pragma: no cover - if self.used_views is not None and not np.array_equal(np.sort(self.used_views), np.sort(view_indices)): - raise ValueError('Used {} views to fit, and trying to predict on {}'.format(self.used_views, view_indices)) + def _check_views(self, view_indices): # pragma: no cover + if self.used_views is not None and not np.array_equal( + np.sort(self.used_views), np.sort(view_indices)): + raise ValueError( + 'Used {} views to fit, and trying to predict on {}'.format( + self.used_views, view_indices)) # def to_str(self, param_name): # if param_name in self.weird_strings: @@ -67,16 +73,17 @@ class BaseMultiviewClassifier(BaseClassifier): n_classes)) fake_mc_X = RAMDataset( views=[random_state.randint(low=0, high=101, - size=(n_samples, dim)) + size=(n_samples, dim)) for i in range(n_views)], labels=[class_index for _ in range(int(n_samples / n_classes)) for class_index in range(n_classes)], are_sparse=False, name="mc_dset", - labels_names=[str(class_index) for class_index in range(n_classes)], + labels_names=[str(class_index) + for class_index in range(n_classes)], view_names=["V0", "V1"], - ) + ) fake_mc_y = [class_index for _ in range(int(n_samples / n_classes)) @@ -125,7 +132,7 @@ def get_available_monoview_classifiers(need_probas=False): available_classifiers = [module_name for module_name in dir(monoview_classifiers) if not ( - module_name.startswith("__") or module_name == "additions")] + module_name.startswith("__") or module_name == "additions")] if need_probas: proba_classifiers = [] for module_name in available_classifiers: @@ -145,9 +152,6 @@ def get_monoview_classifier(classifier_name, multiclass=False): return classifier_class -from .. import multiview_classifiers - - class MultiviewResult(object): def __init__(self, classifier_name, classifier_config, metrics_scores, full_labels, hps_duration, fit_duration, @@ -169,7 +173,7 @@ class MultiviewResult(object): multiview_classifier_module.classifier_class_name)( 42, **self.classifier_config) return multiview_classifier.short_name - except: + except BaseException: return self.classifier_name @@ -180,9 +184,11 @@ class MultiviewResultAnalyzer(ResultAnalyser): pred, directory, base_file_name, labels, database_name, nb_cores, duration): if hps_method.endswith("equiv"): - n_iter = n_iter*len(view_names) - ResultAnalyser.__init__(self, classifier, classification_indices, k_folds, - hps_method, metrics_dict, n_iter, class_label_names, + n_iter = n_iter * len(view_names) + ResultAnalyser.__init__(self, classifier, classification_indices, + k_folds, + hps_method, metrics_dict, n_iter, + class_label_names, pred, directory, base_file_name, labels, database_name, nb_cores, duration) @@ -190,8 +196,9 @@ class MultiviewResultAnalyzer(ResultAnalyser): self.view_names = view_names def get_base_string(self, ): - return "Multiview classification on {} with {}\n\n".format(self.database_name, - self.classifier_name) + return "Multiview classification on {} with {}\n\n".format( + self.database_name, + self.classifier_name) def get_view_specific_info(self): - return "\t- Views : " + ', '.join(self.view_names) + "\n" \ No newline at end of file + return "\t- Views : " + ', '.join(self.view_names) + "\n" diff --git a/summit/multiview_platform/multiview_classifiers/__init__.py b/summit/multiview_platform/multiview_classifiers/__init__.py index 6e242133..a121c84f 100644 --- a/summit/multiview_platform/multiview_classifiers/__init__.py +++ b/summit/multiview_platform/multiview_classifiers/__init__.py @@ -2,8 +2,8 @@ import os for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): if module == '__init__.py' or module[ - -4:] == '.pyc' or module == '__pycache__' or module[ - -3:] != '.py': + -4:] == '.pyc' or module == '__pycache__' or module[ + -3:] != '.py': continue __import__(module[:-3], locals(), globals(), [], 1) del module diff --git a/summit/multiview_platform/multiview_classifiers/additions/diversity_utils.py b/summit/multiview_platform/multiview_classifiers/additions/diversity_utils.py index a4984519..b27c479b 100644 --- a/summit/multiview_platform/multiview_classifiers/additions/diversity_utils.py +++ b/summit/multiview_platform/multiview_classifiers/additions/diversity_utils.py @@ -7,7 +7,7 @@ from .fusion_utils import BaseFusionClassifier from ...multiview.multiview_utils import ConfigGenerator, \ get_available_monoview_classifiers, \ BaseMultiviewClassifier -from ...utils.dataset import get_examples_views_indices +from ...utils.dataset import get_samples_views_indices class DiversityFusionClassifier(BaseMultiviewClassifier, @@ -27,9 +27,9 @@ class DiversityFusionClassifier(BaseMultiviewClassifier, self.classifier_configs = classifier_configs def fit(self, X, y, train_indices=None, view_indices=None): - train_indices, view_indices = get_examples_views_indices(X, - train_indices, - view_indices) + train_indices, view_indices = get_samples_views_indices(X, + train_indices, + view_indices) self.used_views = view_indices # TODO : Finer analysis, may support a bit of mutliclass if np.unique(y[train_indices]).shape[0] > 2: @@ -52,21 +52,21 @@ class DiversityFusionClassifier(BaseMultiviewClassifier, self.choose_combination(X, y, train_indices, view_indices) return self - def predict(self, X, example_indices=None, view_indices=None): + def predict(self, X, sample_indices=None, view_indices=None): """Just a weighted majority vote""" - example_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) self._check_views(view_indices) nb_class = X.get_nb_class() if nb_class > 2: nb_class = 3 - votes = np.zeros((len(example_indices), nb_class), dtype=float) + votes = np.zeros((len(sample_indices), nb_class), dtype=float) monoview_predictions = [ - monoview_estimator.predict(X.get_v(view_idx, example_indices)) + monoview_estimator.predict(X.get_v(view_idx, sample_indices)) for view_idx, monoview_estimator in zip(view_indices, self.monoview_estimators)] - for idx, example_index in enumerate(example_indices): + for idx, sample_index in enumerate(sample_indices): for monoview_estimator_index, monoview_prediciton in enumerate( monoview_predictions): if int(monoview_prediciton[idx]) == -100: @@ -76,20 +76,20 @@ class DiversityFusionClassifier(BaseMultiviewClassifier, predicted_labels = np.argmax(votes, axis=1) return predicted_labels - def get_classifiers_decisions(self, X, view_indices, examples_indices): + def get_classifiers_decisions(self, X, view_indices, samples_indices): classifiers_decisions = np.zeros((len(self.monoview_estimators), len(view_indices), - len(examples_indices))) + len(samples_indices))) for estimator_idx, estimator in enumerate(self.monoview_estimators): for idx, view_index in enumerate(view_indices): classifiers_decisions[estimator_idx, idx, :] = estimator[ - idx].predict(X.get_v(view_index, examples_indices)) + idx].predict(X.get_v(view_index, samples_indices)) return classifiers_decisions - def init_combinations(self, X, example_indices, view_indices): + def init_combinations(self, X, sample_indices, view_indices): classifiers_decisions = self.get_classifiers_decisions(X, view_indices, - example_indices) - nb_classifiers, nb_views, n_examples = classifiers_decisions.shape + sample_indices) + nb_classifiers, nb_views, n_samples = classifiers_decisions.shape combinations = itertools.combinations_with_replacement( range(nb_classifiers), nb_views) @@ -104,15 +104,15 @@ class DiversityFusionClassifier(BaseMultiviewClassifier, class GlobalDiversityFusionClassifier(DiversityFusionClassifier): - def choose_combination(self, X, y, examples_indices, view_indices): + def choose_combination(self, X, y, samples_indices, view_indices): combinations, combis, div_measure, classifiers_decisions, nb_views = self.init_combinations( - X, examples_indices, view_indices) + X, samples_indices, view_indices) for combinationsIndex, combination in enumerate(combinations): combis[combinationsIndex] = combination div_measure[combinationsIndex] = self.diversity_measure( classifiers_decisions, combination, - y[examples_indices]) + y[samples_indices]) best_combi_index = np.argmax(div_measure) best_combination = combis[best_combi_index] self.monoview_estimators = [ @@ -123,9 +123,9 @@ class GlobalDiversityFusionClassifier(DiversityFusionClassifier): class CoupleDiversityFusionClassifier(DiversityFusionClassifier): - def choose_combination(self, X, y, examples_indices, view_indices): + def choose_combination(self, X, y, samples_indices, view_indices): combinations, combis, div_measure, classifiers_decisions, nb_views = self.init_combinations( - X, examples_indices, view_indices) + X, samples_indices, view_indices) for combinations_index, combination in enumerate(combinations): combis[combinations_index] = combination combi_with_view = [(viewIndex, combiIndex) for viewIndex, combiIndex @@ -140,9 +140,11 @@ class CoupleDiversityFusionClassifier(DiversityFusionClassifier): view_index_2, classifier_index_2) = binome couple_diversity = np.mean( self.diversity_measure( - classifiers_decisions[classifier_index_1, view_index_1], - classifiers_decisions[classifier_index_2, view_index_2], - y[examples_indices]) + classifiers_decisions[classifier_index_1, + view_index_1], + classifiers_decisions[classifier_index_2, + view_index_2], + y[samples_indices]) ) couple_diversities[binome_index] = couple_diversity div_measure[combinations_index] = np.mean(couple_diversities) diff --git a/summit/multiview_platform/multiview_classifiers/additions/fusion_utils.py b/summit/multiview_platform/multiview_classifiers/additions/fusion_utils.py index 29447d15..361a664d 100644 --- a/summit/multiview_platform/multiview_classifiers/additions/fusion_utils.py +++ b/summit/multiview_platform/multiview_classifiers/additions/fusion_utils.py @@ -9,7 +9,7 @@ class BaseFusionClassifier(): def init_monoview_estimator(self, classifier_name, classifier_config, classifier_index=None, multiclass=False): if classifier_index is not None: - if classifier_config is not None : + if classifier_config is not None: classifier_configs = classifier_config else: classifier_configs = None diff --git a/summit/multiview_platform/multiview_classifiers/additions/jumbo_fusion_utils.py b/summit/multiview_platform/multiview_classifiers/additions/jumbo_fusion_utils.py index e9cbac4c..3959ff46 100644 --- a/summit/multiview_platform/multiview_classifiers/additions/jumbo_fusion_utils.py +++ b/summit/multiview_platform/multiview_classifiers/additions/jumbo_fusion_utils.py @@ -1,8 +1,8 @@ import numpy as np from .late_fusion_utils import LateFusionClassifier -from ...monoview.monoview_utils import CustomRandint -from ...utils.dataset import get_examples_views_indices +from ...utils.hyper_parameter_search import CustomRandint +from ...utils.dataset import get_samples_views_indices class BaseJumboFusion(LateFusionClassifier): @@ -23,27 +23,27 @@ class BaseJumboFusion(LateFusionClassifier): self.nb_monoview_per_view = nb_monoview_per_view LateFusionClassifier.set_params(self, **params) - def predict(self, X, example_indices=None, view_indices=None): - example_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) + def predict(self, X, sample_indices=None, view_indices=None): + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) self._check_views(view_indices) monoview_decisions = self.predict_monoview(X, - example_indices=example_indices, + sample_indices=sample_indices, view_indices=view_indices) return self.aggregation_estimator.predict(monoview_decisions) def fit(self, X, y, train_indices=None, view_indices=None): - train_indices, view_indices = get_examples_views_indices(X, - train_indices, - view_indices) + train_indices, view_indices = get_samples_views_indices(X, + train_indices, + view_indices) self.used_views = view_indices self.init_classifiers(len(view_indices), nb_monoview_per_view=self.nb_monoview_per_view) self.fit_monoview_estimators(X, y, train_indices=train_indices, view_indices=view_indices) monoview_decisions = self.predict_monoview(X, - example_indices=train_indices, + sample_indices=train_indices, view_indices=view_indices) self.aggregation_estimator.fit(monoview_decisions, y[train_indices]) return self @@ -65,13 +65,13 @@ class BaseJumboFusion(LateFusionClassifier): self.monoview_estimators = [[estimator.fit( X.get_v(view_indices[idx], train_indices), y[train_indices]) - for estimator in view_estimators] - for idx, view_estimators in - enumerate(self.monoview_estimators)] + for estimator in view_estimators] + for idx, view_estimators in + enumerate(self.monoview_estimators)] return self - def predict_monoview(self, X, example_indices=None, view_indices=None): - monoview_decisions = np.zeros((len(example_indices), + def predict_monoview(self, X, sample_indices=None, view_indices=None): + monoview_decisions = np.zeros((len(sample_indices), len(view_indices) * len( self.classifiers_names))) for idx, view_estimators in enumerate(self.monoview_estimators): @@ -79,5 +79,5 @@ class BaseJumboFusion(LateFusionClassifier): monoview_decisions[:, len( self.classifiers_names) * idx + estimator_index] = estimator.predict( X.get_v(view_indices[idx], - example_indices)) + sample_indices)) return monoview_decisions diff --git a/summit/multiview_platform/multiview_classifiers/additions/late_fusion_utils.py b/summit/multiview_platform/multiview_classifiers/additions/late_fusion_utils.py index 0916f76f..39bb77b4 100644 --- a/summit/multiview_platform/multiview_classifiers/additions/late_fusion_utils.py +++ b/summit/multiview_platform/multiview_classifiers/additions/late_fusion_utils.py @@ -3,7 +3,7 @@ import numpy as np from .fusion_utils import BaseFusionClassifier from ...multiview.multiview_utils import BaseMultiviewClassifier, \ get_available_monoview_classifiers, ConfigGenerator -from ...utils.dataset import get_examples_views_indices +from ...utils.dataset import get_samples_views_indices class ClassifierDistribution: @@ -94,9 +94,9 @@ class LateFusionClassifier(BaseMultiviewClassifier, BaseFusionClassifier): np.arange(1000)] def fit(self, X, y, train_indices=None, view_indices=None): - train_indices, view_indices = get_examples_views_indices(X, - train_indices, - view_indices) + train_indices, view_indices = get_samples_views_indices(X, + train_indices, + view_indices) self.used_views = view_indices if np.unique(y).shape[0] > 2: multiclass = True @@ -148,8 +148,12 @@ class LateFusionClassifier(BaseMultiviewClassifier, BaseFusionClassifier): for _ in range(nb_clfs)] if isinstance(self.classifier_configs, ConfigDistribution): - self.classifier_configs = [{classifier_name : config[classifier_name]} for config, classifier_name in zip(self.classifier_configs.draw(nb_clfs, - self.rs), self.classifiers_names)] + self.classifier_configs = [ + {classifier_name: config[classifier_name]} for + config, classifier_name in + zip(self.classifier_configs.draw(nb_clfs, + self.rs), + self.classifiers_names)] elif isinstance(self.classifier_configs, dict): self.classifier_configs = [ {classifier_name: self.classifier_configs[classifier_name]} for diff --git a/summit/multiview_platform/multiview_classifiers/additions/utils.py b/summit/multiview_platform/multiview_classifiers/additions/utils.py index 5fbd4d56..998556b1 100644 --- a/summit/multiview_platform/multiview_classifiers/additions/utils.py +++ b/summit/multiview_platform/multiview_classifiers/additions/utils.py @@ -1,11 +1,9 @@ import numpy as np -from sklearn.base import BaseEstimator, ClassifierMixin def get_names(classed_list): return np.array([object_.__class__.__name__ for object_ in classed_list]) - # class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): # # def __init__(self, random_state): @@ -56,9 +54,9 @@ def get_names(classed_list): # # def get_train_views_indices(dataset, train_indices, view_indices, ): -# """This function is used to get all the examples indices and view indices if needed""" +# """This function is used to get all the samples indices and view indices if needed""" # if view_indices is None: # view_indices = np.arange(dataset.nb_view) # if train_indices is None: -# train_indices = range(dataset.get_nb_examples()) +# train_indices = range(dataset.get_nb_samples()) # return train_indices, view_indices diff --git a/summit/multiview_platform/multiview_classifiers/bayesian_inference_fusion.py b/summit/multiview_platform/multiview_classifiers/bayesian_inference_fusion.py index b1cd5f9e..bca25f2b 100644 --- a/summit/multiview_platform/multiview_classifiers/bayesian_inference_fusion.py +++ b/summit/multiview_platform/multiview_classifiers/bayesian_inference_fusion.py @@ -2,7 +2,7 @@ import numpy as np from ..multiview_classifiers.additions.late_fusion_utils import \ LateFusionClassifier -from ..utils.dataset import get_examples_views_indices +from ..utils.dataset import get_samples_views_indices classifier_class_name = "BayesianInferenceClassifier" @@ -19,10 +19,10 @@ class BayesianInferenceClassifier(LateFusionClassifier): weights=weights, rs=rs) - def predict(self, X, example_indices=None, view_indices=None): - example_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) + def predict(self, X, sample_indices=None, view_indices=None): + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) self._check_views(view_indices) if sum(self.weights) != 1.0: self.weights = self.weights / sum(self.weights) @@ -32,7 +32,7 @@ class BayesianInferenceClassifier(LateFusionClassifier): view_scores.append(np.power( self.monoview_estimators[index].predict_proba( X.get_v(view_index, - example_indices)), + sample_indices)), self.weights[index])) view_scores = np.array(view_scores) predicted_labels = np.argmax(np.prod(view_scores, axis=0), axis=1) diff --git a/summit/multiview_platform/multiview_classifiers/difficulty_fusion.py b/summit/multiview_platform/multiview_classifiers/difficulty_fusion.py index 2ab4e195..47dad295 100644 --- a/summit/multiview_platform/multiview_classifiers/difficulty_fusion.py +++ b/summit/multiview_platform/multiview_classifiers/difficulty_fusion.py @@ -9,15 +9,15 @@ classifier_class_name = "DifficultyFusion" class DifficultyFusion(GlobalDiversityFusionClassifier): def diversity_measure(self, classifiers_decisions, combination, y): - _, nb_view, nb_examples = classifiers_decisions.shape - scores = np.zeros((nb_view, nb_examples), dtype=int) + _, nb_view, nb_samples = classifiers_decisions.shape + scores = np.zeros((nb_view, nb_samples), dtype=int) for view_index, classifier_index in enumerate(combination): scores[view_index, :] = np.logical_not( np.logical_xor(classifiers_decisions[classifier_index, view_index], y) ) - # Table of the nuber of views that succeeded for each example : + # Table of the nuber of views that succeeded for each sample : difficulty_scores = np.sum(scores, axis=0) difficulty_score = np.var( diff --git a/summit/multiview_platform/multiview_classifiers/entropy_fusion.py b/summit/multiview_platform/multiview_classifiers/entropy_fusion.py index 12c0bc8a..56b0e458 100644 --- a/summit/multiview_platform/multiview_classifiers/entropy_fusion.py +++ b/summit/multiview_platform/multiview_classifiers/entropy_fusion.py @@ -9,8 +9,8 @@ classifier_class_name = "EntropyFusion" class EntropyFusion(GlobalDiversityFusionClassifier): def diversity_measure(self, classifiers_decisions, combination, y): - _, nb_view, nb_examples = classifiers_decisions.shape - scores = np.zeros((nb_view, nb_examples), dtype=int) + _, nb_view, nb_samples = classifiers_decisions.shape + scores = np.zeros((nb_view, nb_samples), dtype=int) for view_index, classifier_index in enumerate(combination): scores[view_index] = np.logical_not( np.logical_xor( @@ -18,9 +18,9 @@ class EntropyFusion(GlobalDiversityFusionClassifier): y) ) entropy_scores = np.sum(scores, axis=0) - nb_view_matrix = np.zeros((nb_examples), + nb_view_matrix = np.zeros((nb_samples), dtype=int) + nb_view - entropy_scores entropy_score = np.mean( np.minimum(entropy_scores, nb_view_matrix).astype(float) / ( - nb_view - int(nb_view / 2))) + nb_view - int(nb_view / 2))) return entropy_score diff --git a/summit/multiview_platform/multiview_classifiers/majority_voting_fusion.py b/summit/multiview_platform/multiview_classifiers/majority_voting_fusion.py index 53a255c7..1afed357 100644 --- a/summit/multiview_platform/multiview_classifiers/majority_voting_fusion.py +++ b/summit/multiview_platform/multiview_classifiers/majority_voting_fusion.py @@ -2,7 +2,7 @@ import numpy as np from ..multiview_classifiers.additions.late_fusion_utils import \ LateFusionClassifier -from ..utils.dataset import get_examples_views_indices +from ..utils.dataset import get_samples_views_indices classifier_class_name = "MajorityVoting" @@ -22,27 +22,27 @@ class MajorityVoting(LateFusionClassifier): weights=weights, rs=rs) - def predict(self, X, example_indices=None, view_indices=None): - examples_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) + def predict(self, X, sample_indices=None, view_indices=None): + samples_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) self._check_views(view_indices) - n_examples = len(examples_indices) - votes = np.zeros((n_examples, X.get_nb_class(example_indices)), + n_samples = len(samples_indices) + votes = np.zeros((n_samples, X.get_nb_class(sample_indices)), dtype=float) - monoview_decisions = np.zeros((len(examples_indices), X.nb_view), + monoview_decisions = np.zeros((len(samples_indices), X.nb_view), dtype=int) for index, view_index in enumerate(view_indices): monoview_decisions[:, index] = self.monoview_estimators[ index].predict( - X.get_v(view_index, examples_indices)) - for example_index in range(n_examples): + X.get_v(view_index, samples_indices)) + for sample_index in range(n_samples): for view_index, feature_classification in enumerate( - monoview_decisions[example_index, :]): - votes[example_index, feature_classification] += self.weights[ + monoview_decisions[sample_index, :]): + votes[sample_index, feature_classification] += self.weights[ view_index] nb_maximum = len( - np.where(votes[example_index] == max(votes[example_index]))[0]) + np.where(votes[sample_index] == max(votes[sample_index]))[0]) if nb_maximum == X.nb_view: raise VotingIndecision( "Majority voting can't decide, each classifier has voted for a different class") diff --git a/summit/multiview_platform/multiview_classifiers/svm_jumbo_fusion.py b/summit/multiview_platform/multiview_classifiers/svm_jumbo_fusion.py index d9a2e38d..4d826efe 100644 --- a/summit/multiview_platform/multiview_classifiers/svm_jumbo_fusion.py +++ b/summit/multiview_platform/multiview_classifiers/svm_jumbo_fusion.py @@ -1,7 +1,7 @@ from sklearn.svm import SVC from .additions.jumbo_fusion_utils import BaseJumboFusion -from ..monoview.monoview_utils import CustomUniform, CustomRandint +from ..utils.hyper_parameter_search import CustomUniform, CustomRandint classifier_class_name = "SVMJumboFusion" @@ -32,5 +32,6 @@ class SVMJumboFusion(BaseJumboFusion): self.C = C self.degree = degree self.kernel = kernel - self.aggregation_estimator.set_params(C=C, kernel=kernel, degree=degree) + self.aggregation_estimator.set_params( + C=C, kernel=kernel, degree=degree) return self diff --git a/summit/multiview_platform/multiview_classifiers/weighted_linear_early_fusion.py b/summit/multiview_platform/multiview_classifiers/weighted_linear_early_fusion.py index 7fd7b669..ec86f9b9 100644 --- a/summit/multiview_platform/multiview_classifiers/weighted_linear_early_fusion.py +++ b/summit/multiview_platform/multiview_classifiers/weighted_linear_early_fusion.py @@ -1,10 +1,9 @@ import numpy as np -from summit.multiview_platform import monoview_classifiers from .additions.fusion_utils import BaseFusionClassifier from ..multiview.multiview_utils import get_available_monoview_classifiers, \ BaseMultiviewClassifier, ConfigGenerator -from ..utils.dataset import get_examples_views_indices +from ..utils.dataset import get_samples_views_indices from ..utils.multiclass import get_mc_estim, MultiClassWrapper # from ..utils.dataset import get_v @@ -42,7 +41,8 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier): # self.monoview_classifier_name) # monoview_classifier_class = getattr(monoview_classifier_module, # monoview_classifier_module.classifier_class_name) - self.monoview_classifier = self.init_monoview_estimator(monoview_classifier_name, monoview_classifier_config) + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, monoview_classifier_config) self.param_names = ["monoview_classifier_name", "monoview_classifier_config"] self.distribs = [get_available_monoview_classifiers(), @@ -80,35 +80,34 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier): self.monoview_classifier_config = self.monoview_classifier.get_params() return self - def predict(self, X, example_indices=None, view_indices=None): - _, X = self.transform_data_to_monoview(X, example_indices, view_indices) + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) self._check_views(self.view_indices) predicted_labels = self.monoview_classifier.predict(X) return predicted_labels - def transform_data_to_monoview(self, dataset, example_indices, + def transform_data_to_monoview(self, dataset, sample_indices, view_indices): """Here, we extract the data from the HDF5 dataset file and store all the concatenated views in one variable""" - example_indices, self.view_indices = get_examples_views_indices(dataset, - example_indices, - view_indices) + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) if self.view_weights is None: self.view_weights = np.ones(len(self.view_indices), dtype=float) else: self.view_weights = np.array(self.view_weights) self.view_weights /= float(np.sum(self.view_weights)) - X = self.hdf5_to_monoview(dataset, example_indices) - return example_indices, X + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X - def hdf5_to_monoview(self, dataset, examples): - """Here, we concatenate the views for the asked examples """ + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ monoview_data = np.concatenate( - [dataset.get_v(view_idx, examples) + [dataset.get_v(view_idx, samples) for view_weight, (index, view_idx) - in zip(self.view_weights, enumerate(self.view_indices))] - , axis=1) + in zip(self.view_weights, enumerate(self.view_indices))], axis=1) return monoview_data # def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): diff --git a/summit/multiview_platform/multiview_classifiers/weighted_linear_late_fusion.py b/summit/multiview_platform/multiview_classifiers/weighted_linear_late_fusion.py index 403791ce..1b7b4c2f 100644 --- a/summit/multiview_platform/multiview_classifiers/weighted_linear_late_fusion.py +++ b/summit/multiview_platform/multiview_classifiers/weighted_linear_late_fusion.py @@ -2,7 +2,7 @@ import numpy as np from ..multiview_classifiers.additions.late_fusion_utils import \ LateFusionClassifier -from ..utils.dataset import get_examples_views_indices +from ..utils.dataset import get_samples_views_indices classifier_class_name = "WeightedLinearLateFusion" @@ -16,16 +16,16 @@ class WeightedLinearLateFusion(LateFusionClassifier): classifier_configs=classifier_configs, nb_cores=nb_cores, weights=weights, rs=rs) - def predict(self, X, example_indices=None, view_indices=None): - example_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) + def predict(self, X, sample_indices=None, view_indices=None): + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) self._check_views(view_indices) view_scores = [] for index, viewIndex in enumerate(view_indices): view_scores.append( np.array(self.monoview_estimators[index].predict_proba( - X.get_v(viewIndex, example_indices))) * self.weights[index]) + X.get_v(viewIndex, sample_indices))) * self.weights[index]) view_scores = np.array(view_scores) predicted_labels = np.argmax(np.sum(view_scores, axis=0), axis=1) return predicted_labels diff --git a/summit/multiview_platform/result_analysis/duration_analysis.py b/summit/multiview_platform/result_analysis/duration_analysis.py index fb3a539c..5c109ed0 100644 --- a/summit/multiview_platform/result_analysis/duration_analysis.py +++ b/summit/multiview_platform/result_analysis/duration_analysis.py @@ -1,6 +1,7 @@ import os -import plotly + import pandas as pd +import plotly def get_duration(results): @@ -14,34 +15,39 @@ def get_duration(results): "pred"] = classifier_result.pred_duration return df -def plot_durations(durations, directory, database_name, durations_stds=None): # pragma: no cover + +def plot_durations(durations, directory, database_name, + durations_stds=None): # pragma: no cover file_name = os.path.join(directory, database_name + "-durations") - durations.to_csv(file_name+"_dataframe.csv") + durations.to_csv(file_name + "_dataframe.csv") fig = plotly.graph_objs.Figure() if durations_stds is None: durations_stds = pd.DataFrame(0, durations.index, durations.columns) else: - durations_stds.to_csv(file_name+"_stds_dataframe.csv") + durations_stds.to_csv(file_name + "_stds_dataframe.csv") fig.add_trace(plotly.graph_objs.Bar(name='Hyper-parameter Optimization', x=durations.index, y=durations['hps'], error_y=dict(type='data', - array=durations_stds["hps"]), + array=durations_stds[ + "hps"]), marker_color="grey")) fig.add_trace(plotly.graph_objs.Bar(name='Fit (on train set)', x=durations.index, y=durations['fit'], error_y=dict(type='data', - array=durations_stds["fit"]), + array=durations_stds[ + "fit"]), marker_color="black")) fig.add_trace(plotly.graph_objs.Bar(name='Prediction (on test set)', x=durations.index, y=durations['pred'], error_y=dict(type='data', - array=durations_stds["pred"]), + array=durations_stds[ + "pred"]), marker_color="lightgrey")) fig.update_layout(title="Durations for each classfier", yaxis_title="Duration (s)") fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') - plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False) \ No newline at end of file + plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False) diff --git a/summit/multiview_platform/result_analysis/error_analysis.py b/summit/multiview_platform/result_analysis/error_analysis.py index 97aa6baa..12f01807 100644 --- a/summit/multiview_platform/result_analysis/error_analysis.py +++ b/summit/multiview_platform/result_analysis/error_analysis.py @@ -6,112 +6,111 @@ import matplotlib as mpl # Import third party modules import matplotlib.pyplot as plt import numpy as np -import pandas as pd import plotly -from matplotlib.patches import Patch + # Import own Modules -def get_example_errors(groud_truth, results): - r"""Used to get for each classifier and each example whether the classifier - has misclassified the example or not. +def get_sample_errors(groud_truth, results): + r"""Used to get for each classifier and each sample whether the classifier + has misclassified the sample or not. Parameters ---------- ground_truth : numpy array of 0, 1 and -100 (if multiclass) - The array with the real labels of the examples + The array with the real labels of the samples results : list of MonoviewResult and MultiviewResults objects A list containing all the resluts for all the mono- & multi-view experimentations. Returns ------- - example_errors : dict of np.array - For each classifier, has an entry with a `np.array` over the examples, - with a 1 if the examples was + sample_errors : dict of np.array + For each classifier, has an entry with a `np.array` over the samples, + with a 1 if the samples was well-classified, a 0 if not and if it's multiclass classification, a - -100 if the examples was not seen during + -100 if the samples was not seen during the one versus one classification. """ - example_errors = {} + sample_errors = {} for classifier_result in results: - error_on_examples = np.equal(classifier_result.full_labels_pred, - groud_truth).astype(int) - unseen_examples = np.where(groud_truth == -100)[0] - error_on_examples[unseen_examples] = -100 - example_errors[ - classifier_result.get_classifier_name()] = error_on_examples - return example_errors + error_on_samples = np.equal(classifier_result.full_labels_pred, + groud_truth).astype(int) + unseen_samples = np.where(groud_truth == -100)[0] + error_on_samples[unseen_samples] = -100 + sample_errors[ + classifier_result.get_classifier_name()] = error_on_samples + return sample_errors -def publish_example_errors(example_errors, directory, databaseName, - labels_names, example_ids, labels): # pragma: no cover +def publish_sample_errors(sample_errors, directory, databaseName, + labels_names, sample_ids, labels): # pragma: no cover logging.debug("Start:\t Label analysis figure generation") - base_file_name = os.path.join(directory, databaseName + "-" ) + base_file_name = os.path.join(directory, databaseName + "-") - nb_classifiers, nb_examples, classifiers_names, \ - data_2d, error_on_examples = gen_error_data(example_errors) + nb_classifiers, nb_samples, classifiers_names, \ + data_2d, error_on_samples = gen_error_data(sample_errors) np.savetxt(base_file_name + "2D_plot_data.csv", data_2d, delimiter=",") - np.savetxt(base_file_name + "bar_plot_data.csv", error_on_examples, + np.savetxt(base_file_name + "bar_plot_data.csv", error_on_samples, delimiter=",") plot_2d(data_2d, classifiers_names, nb_classifiers, base_file_name, - example_ids=example_ids, labels=labels) + sample_ids=sample_ids, labels=labels) - plot_errors_bar(error_on_examples, nb_examples, - base_file_name, example_ids=example_ids) + plot_errors_bar(error_on_samples, nb_samples, + base_file_name, sample_ids=sample_ids) logging.debug("Done:\t Label analysis figures generation") -def publish_all_example_errors(iter_results, directory, - stats_iter, - example_ids, labels): # pragma: no cover +def publish_all_sample_errors(iter_results, directory, + stats_iter, + sample_ids, labels): # pragma: no cover logging.debug( "Start:\t Global label analysis figure generation") - nb_examples, nb_classifiers, data, \ - error_on_examples, classifier_names = gen_error_data_glob(iter_results, - stats_iter) + nb_samples, nb_classifiers, data, \ + error_on_samples, classifier_names = gen_error_data_glob(iter_results, + stats_iter) np.savetxt(os.path.join(directory, "clf_errors.csv"), data, delimiter=",") - np.savetxt(os.path.join(directory, "example_errors.csv"), error_on_examples, + np.savetxt(os.path.join(directory, "sample_errors.csv"), error_on_samples, delimiter=",") plot_2d(data, classifier_names, nb_classifiers, os.path.join(directory, ""), stats_iter=stats_iter, - example_ids=example_ids, labels=labels) - plot_errors_bar(error_on_examples, nb_examples, os.path.join(directory, ""), - example_ids=example_ids) + sample_ids=sample_ids, labels=labels) + plot_errors_bar(error_on_samples, nb_samples, os.path.join(directory, ""), + sample_ids=sample_ids) logging.debug( "Done:\t Global label analysis figures generation") -def gen_error_data(example_errors): +def gen_error_data(sample_errors): r"""Used to format the error data in order to plot it efficiently. The data is saves in a `.csv` file. Parameters ---------- - example_errors : dict of dicts of np.arrays + sample_errors : dict of dicts of np.arrays A dictionary conatining all the useful data. Organized as : - `example_errors[<classifier_name>]["error_on_examples"]` is a np.array + `sample_errors[<classifier_name>]["error_on_samples"]` is a np.array of ints with a - - 1 if the classifier `<classifier_name>` classifier well the example, - - 0 if it fail to classify the example, - - -100 if it did not classify the example (multiclass one versus one). + - 1 if the classifier `<classifier_name>` classifier well the sample, + - 0 if it fail to classify the sample, + - -100 if it did not classify the sample (multiclass one versus one). Returns ------- nbClassifiers : int Number of different classifiers. nbExamples : int - NUmber of examples. + NUmber of samples. nbCopies : int The number of times the data is copied (classifier wise) in order for the figure to be more readable. @@ -119,48 +118,48 @@ def gen_error_data(example_errors): The names fo the classifiers. data : np.array of shape `(nbClassifiers, nbExamples)` A matrix with zeros where the classifier failed to classifiy the - example, ones where it classified it well - and -100 if the example was not classified. - error_on_examples : np.array of shape `(nbExamples,)` + sample, ones where it classified it well + and -100 if the sample was not classified. + error_on_samples : np.array of shape `(nbExamples,)` An array counting how many classifiers failed to classifiy each - examples. + samples. """ - nb_classifiers = len(example_errors) - nb_examples = len(list(example_errors.values())[0]) - classifiers_names = list(example_errors.keys()) + nb_classifiers = len(sample_errors) + nb_samples = len(list(sample_errors.values())[0]) + classifiers_names = list(sample_errors.keys()) - data_2d = np.zeros((nb_examples, nb_classifiers)) - for classifierIndex, (classifier_name, error_on_examples) in enumerate( - example_errors.items()): - data_2d[:, classifierIndex] = error_on_examples - error_on_examples = np.sum(data_2d, axis=1) / nb_classifiers - return nb_classifiers, nb_examples, classifiers_names, data_2d, error_on_examples + data_2d = np.zeros((nb_samples, nb_classifiers)) + for classifierIndex, (classifier_name, error_on_samples) in enumerate( + sample_errors.items()): + data_2d[:, classifierIndex] = error_on_samples + error_on_samples = np.sum(data_2d, axis=1) / nb_classifiers + return nb_classifiers, nb_samples, classifiers_names, data_2d, error_on_samples def gen_error_data_glob(iter_results, stats_iter): - nb_examples = next(iter(iter_results.values())).shape[0] + nb_samples = next(iter(iter_results.values())).shape[0] nb_classifiers = len(iter_results) - data = np.zeros((nb_examples, nb_classifiers), dtype=int) + data = np.zeros((nb_samples, nb_classifiers), dtype=int) classifier_names = [] for clf_index, (classifier_name, error_data) in enumerate( iter_results.items()): data[:, clf_index] = error_data classifier_names.append(classifier_name) - error_on_examples = np.sum(data, axis=1) / ( - nb_classifiers * stats_iter) - return nb_examples, nb_classifiers, data, error_on_examples, \ - classifier_names + error_on_samples = np.sum(data, axis=1) / ( + nb_classifiers * stats_iter) + return nb_samples, nb_classifiers, data, error_on_samples, \ + classifier_names def plot_2d(data, classifiers_names, nb_classifiers, file_name, labels=None, - stats_iter=1, use_plotly=True, example_ids=None): # pragma: no cover + stats_iter=1, use_plotly=True, sample_ids=None): # pragma: no cover r"""Used to generate a 2D plot of the errors. Parameters ---------- data : np.array of shape `(nbClassifiers, nbExamples)` - A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well - and -100 if the example was not classified. + A matrix with zeros where the classifier failed to classifiy the sample, ones where it classified it well + and -100 if the sample was not classified. classifiers_names : list of str The names of the classifiers. nb_classifiers : int @@ -172,7 +171,7 @@ def plot_2d(data, classifiers_names, nb_classifiers, file_name, labels=None, width_denominator : float, optional, default: 1.0 To obtain the image width, the number of classifiers will be divided by this number. height_denominator : float, optional, default: 1.0 - To obtain the image width, the number of examples will be divided by this number. + To obtain the image width, the number of samples will be divided by this number. stats_iter : int, optional, default: 1 The number of statistical iterations realized. @@ -198,19 +197,19 @@ def plot_2d(data, classifiers_names, nb_classifiers, file_name, labels=None, fig.savefig(file_name + "error_analysis_2D.png", bbox_inches="tight", transparent=True) plt.close() - ### The following part is used to generate an interactive graph. + # The following part is used to generate an interactive graph. if use_plotly: - # [np.where(labels==i)[0] for i in np.unique(labels)] - hover_text = [[example_ids[example_index] + " failed " + str( + # [np.where(labels==i)[0] for i in np.unique(labels)] + hover_text = [[sample_ids[sample_index] + " failed " + str( stats_iter - data[ - example_index, classifier_index]) + " time(s), labelled " + str( - labels[example_index]) - for classifier_index in range(data.shape[1])] - for example_index in range(data.shape[0])] + sample_index, classifier_index]) + " time(s), labelled " + str( + labels[sample_index]) + for classifier_index in range(data.shape[1])] + for sample_index in range(data.shape[0])] fig = plotly.graph_objs.Figure() fig.add_trace(plotly.graph_objs.Heatmap( x=list(classifiers_names), - y=[example_ids[label_ind] for label_ind in label_index_list], + y=[sample_ids[label_ind] for label_ind in label_index_list], z=data[label_index_list, :], text=[hover_text[label_ind] for label_ind in label_index_list], hoverinfo=["y", "x", "text"], @@ -227,20 +226,20 @@ def plot_2d(data, classifiers_names, nb_classifiers, file_name, labels=None, del fig -def plot_errors_bar(error_on_examples, nb_examples, file_name, - use_plotly=True, example_ids=None): # pragma: no cover - r"""Used to generate a barplot of the muber of classifiers that failed to classify each examples +def plot_errors_bar(error_on_samples, nb_samples, file_name, + use_plotly=True, sample_ids=None): # pragma: no cover + r"""Used to generate a barplot of the muber of classifiers that failed to classify each samples Parameters ---------- - error_on_examples : np.array of shape `(nbExamples,)` - An array counting how many classifiers failed to classifiy each examples. + error_on_samples : np.array of shape `(nbExamples,)` + An array counting how many classifiers failed to classifiy each samples. classifiers_names : list of str The names of the classifiers. nb_classifiers : int The number of classifiers. - nb_examples : int - The number of examples. + nb_samples : int + The number of samples. file_name : str The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end) @@ -248,22 +247,21 @@ def plot_errors_bar(error_on_examples, nb_examples, file_name, ------- """ fig, ax = plt.subplots() - x = np.arange(nb_examples) - plt.bar(x, 1-error_on_examples) - plt.title("Number of classifiers that failed to classify each example") + x = np.arange(nb_samples) + plt.bar(x, 1 - error_on_samples) + plt.title("Number of classifiers that failed to classify each sample") fig.savefig(file_name + "error_analysis_bar.png", transparent=True) plt.close() if use_plotly: - fig = plotly.graph_objs.Figure([plotly.graph_objs.Bar(x=example_ids, y=1-error_on_examples)]) + fig = plotly.graph_objs.Figure( + [plotly.graph_objs.Bar(x=sample_ids, y=1 - error_on_samples)]) fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') plotly.offline.plot(fig, filename=file_name + "error_analysis_bar.html", auto_open=False) - - -def iter_cmap(statsIter): # pragma: no cover +def iter_cmap(statsIter): # pragma: no cover r"""Used to generate a colormap that will have a tick for each iteration : the whiter the better. Parameters diff --git a/summit/multiview_platform/result_analysis/execution.py b/summit/multiview_platform/result_analysis/execution.py index e620a934..7d3c9c6f 100644 --- a/summit/multiview_platform/result_analysis/execution.py +++ b/summit/multiview_platform/result_analysis/execution.py @@ -1,32 +1,37 @@ import logging + import pandas as pd -from .tracebacks_analysis import save_failed, publish_tracebacks from .duration_analysis import plot_durations, get_duration -from .metric_analysis import get_metrics_scores, publish_metrics_graphs, publish_all_metrics_scores -from .error_analysis import get_example_errors, publish_example_errors, publish_all_example_errors -from .feature_importances import get_feature_importances, publish_feature_importances +from .error_analysis import get_sample_errors, publish_sample_errors, \ + publish_all_sample_errors +from .feature_importances import get_feature_importances, \ + publish_feature_importances +from .metric_analysis import get_metrics_scores, publish_metrics_graphs, \ + publish_all_metrics_scores +from .tracebacks_analysis import save_failed, publish_tracebacks + def analyze(results, stats_iter, benchmark_argument_dictionaries, - metrics, directory, example_ids, labels): # pragma: no cover + metrics, directory, sample_ids, labels): # pragma: no cover """Used to analyze the results of the previous benchmarks""" data_base_name = benchmark_argument_dictionaries[0]["args"]["name"] results_means_std, iter_results, flagged_failed, label_names = analyze_iterations( results, benchmark_argument_dictionaries, - stats_iter, metrics, example_ids, labels) + stats_iter, metrics, sample_ids, labels) if flagged_failed: save_failed(flagged_failed, directory) if stats_iter > 1: results_means_std = analyze_all( iter_results, stats_iter, directory, - data_base_name, example_ids, label_names) + data_base_name, sample_ids, label_names) return results_means_std def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, - metrics, example_ids, labels): + metrics, sample_ids, labels): r"""Used to extract and format the results of the different experimentations performed. @@ -58,43 +63,44 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, The list contains a dictionary for each statistical iteration. This dictionary contains a dictionary for each label combination, regrouping the scores for each metrics and the - information useful to plot errors on examples. + information useful to plot errors on samples. """ logging.debug("Start:\t Analyzing all results") iter_results = {"metrics_scores": [i for i in range(stats_iter)], "class_metrics_scores": [i for i in range(stats_iter)], - "example_errors": [i for i in range(stats_iter)], + "sample_errors": [i for i in range(stats_iter)], "feature_importances": [i for i in range(stats_iter)], - "durations":[i for i in range(stats_iter)]} + "durations": [i for i in range(stats_iter)]} flagged_tracebacks_list = [] fig_errors = [] for iter_index, result, tracebacks in results: arguments = get_arguments(benchmark_argument_dictionaries, iter_index) labels_names = list(arguments["labels_dictionary"].values()) - metrics_scores, class_metric_scores = get_metrics_scores(metrics, result, labels_names) - example_errors = get_example_errors(labels, result) + metrics_scores, class_metric_scores = get_metrics_scores(metrics, + result, + labels_names) + sample_errors = get_sample_errors(labels, result) feature_importances = get_feature_importances(result) durations = get_duration(result) directory = arguments["directory"] database_name = arguments["args"]["name"] - flagged_tracebacks_list += publish_tracebacks(directory, database_name, labels_names, tracebacks, iter_index) res = publish_metrics_graphs(metrics_scores, directory, database_name, labels_names, class_metric_scores) - publish_example_errors(example_errors, directory, database_name, - labels_names, example_ids, labels) + publish_sample_errors(sample_errors, directory, database_name, + labels_names, sample_ids, labels) publish_feature_importances(feature_importances, directory, database_name) plot_durations(durations, directory, database_name) iter_results["metrics_scores"][iter_index] = metrics_scores iter_results["class_metrics_scores"][iter_index] = class_metric_scores - iter_results["example_errors"][iter_index] = example_errors + iter_results["sample_errors"][iter_index] = sample_errors iter_results["feature_importances"][iter_index] = feature_importances iter_results["labels"] = labels iter_results["durations"][iter_index] = durations @@ -105,23 +111,26 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, def analyze_all(iter_results, stats_iter, directory, data_base_name, - example_ids, label_names): # pragma: no cover + sample_ids, label_names): # pragma: no cover """Used to format the results in order to plot the mean results on the iterations""" metrics_analysis, class_metrics_analysis, error_analysis, feature_importances, \ - feature_importances_stds, labels, duration_means, \ - duration_stds = format_previous_results(iter_results) + feature_importances_stds, labels, duration_means, \ + duration_stds = format_previous_results(iter_results) - results = publish_all_metrics_scores(metrics_analysis, class_metrics_analysis, + results = publish_all_metrics_scores(metrics_analysis, + class_metrics_analysis, directory, - data_base_name, stats_iter, label_names) - publish_all_example_errors(error_analysis, directory, stats_iter, - example_ids, labels) + data_base_name, stats_iter, + label_names) + publish_all_sample_errors(error_analysis, directory, stats_iter, + sample_ids, labels) publish_feature_importances(feature_importances, directory, data_base_name, feature_importances_stds) plot_durations(duration_means, directory, data_base_name, duration_stds) return results + def get_arguments(benchmark_argument_dictionaries, iter_index): r"""Used to get the arguments passed to the benchmark executing function corresponding to the flag of an @@ -157,7 +166,7 @@ def format_previous_results(iter_results_lists): contains - biclass_results[i]["metrics_scores"] is a dictionary with a pd.dataframe for each metrics - - biclass_results[i]["example_errors"], a dicaitonary with a np.array + - biclass_results[i]["sample_errors"], a dicaitonary with a np.array for each classifier. Returns @@ -212,7 +221,8 @@ def format_previous_results(iter_results_lists): durations_df_concat = pd.concat((durations_df_concat, durations_df), axis=1) durations_df_concat = durations_df_concat.astype(float) - grouped_df = durations_df_concat.groupby(durations_df_concat.columns, axis=1) + grouped_df = durations_df_concat.groupby(durations_df_concat.columns, + axis=1) duration_means = grouped_df.mean() duration_stds = grouped_df.std() @@ -233,15 +243,15 @@ def format_previous_results(iter_results_lists): feature_importances_stds[view_name] = dataframe.groupby( dataframe.index).std(ddof=0) - added_example_errors = {} - for example_errors in iter_results_lists["example_errors"]: - for classifier_name, errors in example_errors.items(): - if classifier_name not in added_example_errors: - added_example_errors[classifier_name] = errors + added_sample_errors = {} + for sample_errors in iter_results_lists["sample_errors"]: + for classifier_name, errors in sample_errors.items(): + if classifier_name not in added_sample_errors: + added_sample_errors[classifier_name] = errors else: - added_example_errors[classifier_name] += errors - error_analysis = added_example_errors - return metrics_analysis, class_metrics_analysis ,error_analysis, \ - feature_importances_analysis, \ - feature_importances_stds, iter_results_lists["labels"], \ - duration_means, duration_stds + added_sample_errors[classifier_name] += errors + error_analysis = added_sample_errors + return metrics_analysis, class_metrics_analysis, error_analysis, \ + feature_importances_analysis, \ + feature_importances_stds, iter_results_lists["labels"], \ + duration_means, duration_stds diff --git a/summit/multiview_platform/result_analysis/feature_importances.py b/summit/multiview_platform/result_analysis/feature_importances.py index 459f664f..86c31366 100644 --- a/summit/multiview_platform/result_analysis/feature_importances.py +++ b/summit/multiview_platform/result_analysis/feature_importances.py @@ -1,7 +1,8 @@ import os -import plotly -import pandas as pd + import numpy as np +import pandas as pd +import plotly from ..monoview.monoview_utils import MonoviewResult @@ -37,6 +38,7 @@ def get_feature_importances(result, feature_names=None): classifier_result.n_features) return feature_importances + def publish_feature_importances(feature_importances, directory, database_name, feature_stds=None): # pragma: no cover for view_name, feature_importance in feature_importances.items(): @@ -55,16 +57,17 @@ def publish_feature_importances(feature_importances, directory, database_name, plot_feature_importances(file_name, feature_importance, feature_std) -def plot_feature_importances(file_name, feature_importance, feature_std): # pragma: no cover +def plot_feature_importances(file_name, feature_importance, + feature_std): # pragma: no cover feature_importance.to_csv(file_name + "_dataframe.csv") hover_text = [["-Feature :" + str(feature_name) + "<br>-Classifier : " + classifier_name + "<br>-Importance : " + str( feature_importance.loc[feature_name][classifier_name]) + - "<br>-STD : " + str( + "<br>-STD : " + str( feature_std.loc[feature_name][classifier_name]) - for classifier_name in list(feature_importance.columns)] - for feature_name in list(feature_importance.index)] + for classifier_name in list(feature_importance.columns)] + for feature_name in list(feature_importance.index)] fig = plotly.graph_objs.Figure(data=plotly.graph_objs.Heatmap( x=list(feature_importance.columns), y=list(feature_importance.index), @@ -81,4 +84,3 @@ def plot_feature_importances(file_name, feature_importance, feature_std): # prag plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False) del fig - diff --git a/summit/multiview_platform/result_analysis/metric_analysis.py b/summit/multiview_platform/result_analysis/metric_analysis.py index fff1e365..3b9e4b08 100644 --- a/summit/multiview_platform/result_analysis/metric_analysis.py +++ b/summit/multiview_platform/result_analysis/metric_analysis.py @@ -1,9 +1,10 @@ +import logging +import os + import matplotlib.pyplot as plt import numpy as np -import os import pandas as pd import plotly -import logging from ..utils.organization import secure_file_path @@ -37,37 +38,42 @@ def get_metrics_scores(metrics, results, label_names): if classifier_result.get_classifier_name() not in classifier_names] metrics_scores = dict((metric, pd.DataFrame(data=np.zeros((2, - len( - classifier_names))), - index=["train", "test"], - columns=classifier_names)) + len( + classifier_names))), + index=["train", "test"], + columns=classifier_names)) for metric in metrics.keys()) class_metric_scores = dict((metric, pd.DataFrame( index=pd.MultiIndex.from_product([["train", "test"], label_names]), columns=classifier_names, dtype=float)) - for metric in metrics) + for metric in metrics) for metric in metrics.keys(): for classifier_result in results: metrics_scores[metric].loc[ "train", classifier_result.get_classifier_name()] = \ - classifier_result.metrics_scores[metric][0] + classifier_result.metrics_scores[metric][0] metrics_scores[metric].loc[ "test", classifier_result.get_classifier_name()] = \ classifier_result.metrics_scores[metric][1] for label_index, label_name in enumerate(label_names): class_metric_scores[metric].loc[( - "train", label_name),classifier_result.get_classifier_name()] = \ - classifier_result.class_metric_scores[metric][0][label_index] + "train", + label_name), classifier_result.get_classifier_name()] = \ + classifier_result.class_metric_scores[metric][0][ + label_index] class_metric_scores[metric].loc[( - "test", label_name), classifier_result.get_classifier_name()] = \ - classifier_result.class_metric_scores[metric][1][label_index] + "test", + label_name), classifier_result.get_classifier_name()] = \ + classifier_result.class_metric_scores[metric][1][ + label_index] return metrics_scores, class_metric_scores def publish_metrics_graphs(metrics_scores, directory, database_name, - labels_names, class_metric_scores): # pragma: no cover + labels_names, + class_metric_scores): # pragma: no cover r"""Used to sort the results (names and both scores) in descending test score order. @@ -92,19 +98,19 @@ def publish_metrics_graphs(metrics_scores, directory, database_name, logging.debug( "Start:\t Score graph generation for " + metric_name) train_scores, test_scores, classifier_names, \ - file_name, nb_results, results,\ - class_test_scores = init_plot(results, metric_name, - metrics_scores[metric_name], - directory, - database_name, - class_metric_scores[metric_name]) + file_name, nb_results, results, \ + class_test_scores = init_plot(results, metric_name, + metrics_scores[metric_name], + directory, + database_name, + class_metric_scores[metric_name]) plot_metric_scores(train_scores, test_scores, classifier_names, nb_results, metric_name, file_name, tag=" " + " vs ".join(labels_names)) class_file_name = os.path.join(directory, database_name + "-" - + metric_name+"-class") + + metric_name + "-class") plot_class_metric_scores(class_test_scores, class_file_name, labels_names, classifier_names, metric_name) logging.debug( @@ -114,7 +120,7 @@ def publish_metrics_graphs(metrics_scores, directory, database_name, def publish_all_metrics_scores(iter_results, class_iter_results, directory, data_base_name, stats_iter, label_names, - min_size=10): # pragma: no cover + min_size=10): # pragma: no cover results = [] secure_file_path(os.path.join(directory, "a")) @@ -137,16 +143,22 @@ def publish_all_metrics_scores(iter_results, class_iter_results, directory, in zip(classifier_names, test, test_std)] for metric_name, scores in class_iter_results.items(): - test = np.array([np.array(scores["mean"].iloc[i, :]) for i in range(scores["mean"].shape[0]) if scores["mean"].iloc[i, :].name[0]=='test']) + test = np.array([np.array(scores["mean"].iloc[i, :]) for i in + range(scores["mean"].shape[0]) if + scores["mean"].iloc[i, :].name[0] == 'test']) classifier_names = np.array(scores["mean"].columns) - test_std = np.array([np.array(scores["std"].iloc[i, :]) for i in range(scores["std"].shape[0]) if scores["std"].iloc[i, :].name[0]=='test']) + test_std = np.array([np.array(scores["std"].iloc[i, :]) for i in + range(scores["std"].shape[0]) if + scores["std"].iloc[i, :].name[0] == 'test']) file_name = os.path.join(directory, data_base_name + "-mean_on_" + str( - stats_iter) + "_iter-" + metric_name+"-class") + stats_iter) + "_iter-" + metric_name + "-class") - plot_class_metric_scores(test, file_name, label_names, classifier_names, metric_name, stds=test_std, tag="averaged") + plot_class_metric_scores(test, file_name, label_names, classifier_names, + metric_name, stds=test_std, tag="averaged") return results + def init_plot(results, metric_name, metric_dataframe, directory, database_name, class_metric_scores): train = np.array(metric_dataframe.loc["train"]) @@ -163,14 +175,14 @@ def init_plot(results, metric_name, metric_dataframe, zip(classifier_names, test, np.transpose(class_test), np.zeros(len(test)))] return train, test, classifier_names, file_name, nb_results, results, \ - class_test + class_test def plot_metric_scores(train_scores, test_scores, names, nb_results, metric_name, file_name, tag="", train_STDs=None, test_STDs=None, - use_plotly=True): # pragma: no cover + use_plotly=True): # pragma: no cover r"""Used to plot and save the score barplot for a specific metric. Parameters @@ -220,7 +232,7 @@ def plot_metric_scores(train_scores, test_scores, names, nb_results, try: plt.tight_layout() - except: + except BaseException: pass f.savefig(file_name + '.png', transparent=True) plt.close() @@ -263,7 +275,7 @@ def plot_metric_scores(train_scores, test_scores, names, nb_results, def plot_class_metric_scores(class_test_scores, class_file_name, labels_names, classifier_names, metric_name, - stds=None, tag=""): # pragma: no cover + stds=None, tag=""): # pragma: no cover fig = plotly.graph_objs.Figure() for lab_index, scores in enumerate(class_test_scores): if stds is None: @@ -274,12 +286,13 @@ def plot_class_metric_scores(class_test_scores, class_file_name, name=labels_names[lab_index], x=classifier_names, y=scores, error_y=dict(type='data', array=std), - )) + )) fig.update_layout( title=metric_name + "<br>" + tag + " scores for each classifier") fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') - plotly.offline.plot(fig, filename=class_file_name + ".html", auto_open=False) + plotly.offline.plot(fig, filename=class_file_name + ".html", + auto_open=False) del fig @@ -312,7 +325,7 @@ def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35): return fig_kwargs, bar_width -def autolabel(rects, ax, set=1, std=None): # pragma: no cover +def autolabel(rects, ax, set=1, std=None): # pragma: no cover r"""Used to print the score below the bars. Parameters @@ -390,4 +403,4 @@ def sort_by_test_score(train_scores, test_scores, names, train_STDs=None, else: sorted_train_STDs = None sorted_test_STDs = None - return sorted_names, sorted_train_scores, sorted_test_scores, sorted_train_STDs, sorted_test_STDs \ No newline at end of file + return sorted_names, sorted_train_scores, sorted_test_scores, sorted_train_STDs, sorted_test_STDs diff --git a/summit/multiview_platform/result_analysis/tracebacks_analysis.py b/summit/multiview_platform/result_analysis/tracebacks_analysis.py index 329a27f6..174b2386 100644 --- a/summit/multiview_platform/result_analysis/tracebacks_analysis.py +++ b/summit/multiview_platform/result_analysis/tracebacks_analysis.py @@ -5,8 +5,8 @@ def publish_tracebacks(directory, database_name, labels_names, tracebacks, iter_index): if tracebacks: with open(os.path.join(directory, database_name + - "-iter" + str(iter_index) + - "-tacebacks.txt"), + "-iter" + str(iter_index) + + "-tacebacks.txt"), "w") as traceback_file: failed_list = save_dict_to_text(tracebacks, traceback_file) flagged_list = [_ + "-iter" + str(iter_index) for _ in failed_list] diff --git a/summit/multiview_platform/utils/base.py b/summit/multiview_platform/utils/base.py index aa235578..8dcaaf81 100644 --- a/summit/multiview_platform/utils/base.py +++ b/summit/multiview_platform/utils/base.py @@ -1,11 +1,12 @@ -import numpy as np -from sklearn.base import BaseEstimator from abc import abstractmethod from datetime import timedelta as hms -from tabulate import tabulate + +import numpy as np +from sklearn.base import BaseEstimator +from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier from sklearn.metrics import confusion_matrix as confusion from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier +from tabulate import tabulate from summit.multiview_platform import metrics @@ -32,13 +33,13 @@ class BaseClassifier(BaseEstimator, ): if self.classed_params: classed_dict = dict((classed_param, get_names( detector.cv_results_["param_" + classed_param])) - for classed_param in self.classed_params) + for classed_param in self.classed_params) if self.param_names: return [(param_name, np.array(detector.cv_results_["param_" + param_name])) if param_name not in self.classed_params else ( param_name, classed_dict[param_name]) - for param_name in self.param_names] + for param_name in self.param_names] else: return [()] @@ -77,12 +78,15 @@ class BaseClassifier(BaseEstimator, ): elif base_estimator == "RandomForestClassifier": return RandomForestClassifier(**estimator_config) else: - raise ValueError('Base estimator string {} does not match an available classifier.'.format(base_estimator)) + raise ValueError( + 'Base estimator string {} does not match an available classifier.'.format( + base_estimator)) elif isinstance(base_estimator, BaseEstimator): return base_estimator.set_params(**estimator_config) else: - raise ValueError('base_estimator must be either a string or a BaseEstimator child class, it is {}'.format(type(base_estimator))) - + raise ValueError( + 'base_estimator must be either a string or a BaseEstimator child class, it is {}'.format( + type(base_estimator))) def to_str(self, param_name): """ @@ -122,7 +126,7 @@ class BaseClassifier(BaseEstimator, ): # if hasattr(self, "accepts_mutli_class"): # return self.accepts_multi_class fake_mc_X = random_state.randint(low=0, high=101, - size=(n_samples, dim)) + size=(n_samples, dim)) fake_mc_y = [class_index for _ in range(int(n_samples / n_classes)) for class_index in range(n_classes)] @@ -220,7 +224,7 @@ class ResultAnalyser(): ------- """ for metric, metric_args in self.metrics_dict.items(): - class_train_scores, class_test_scores, train_score, test_score\ + class_train_scores, class_test_scores, train_score, test_score \ = self.get_metric_score(metric, metric_args) self.class_metric_scores[metric] = (class_train_scores, class_test_scores) @@ -248,23 +252,28 @@ class ResultAnalyser(): class_train_scores = [] class_test_scores = [] for label_value in np.unique(self.labels): - train_example_indices = self.train_indices[np.where(self.labels[self.train_indices]==label_value)[0]] - test_example_indices = self.test_indices[np.where(self.labels[self.test_indices] == label_value)[0]] - class_train_scores.append(metric_module.score(y_true=self.labels[train_example_indices], - y_pred=self.pred[train_example_indices], - **metric_kwargs)) - class_test_scores.append(metric_module.score(y_true=self.labels[test_example_indices], - y_pred=self.pred[test_example_indices], - **metric_kwargs)) - train_score = metric_module.score(y_true=self.labels[self.train_indices], - y_pred=self.pred[self.train_indices], - **metric_kwargs) + train_sample_indices = self.train_indices[ + np.where(self.labels[self.train_indices] == label_value)[0]] + test_sample_indices = self.test_indices[ + np.where(self.labels[self.test_indices] == label_value)[0]] + class_train_scores.append( + metric_module.score(y_true=self.labels[train_sample_indices], + y_pred=self.pred[train_sample_indices], + **metric_kwargs)) + class_test_scores.append( + metric_module.score(y_true=self.labels[test_sample_indices], + y_pred=self.pred[test_sample_indices], + **metric_kwargs)) + train_score = metric_module.score( + y_true=self.labels[self.train_indices], + y_pred=self.pred[self.train_indices], + **metric_kwargs) test_score = metric_module.score(y_true=self.labels[self.test_indices], - y_pred=self.pred[self.test_indices], - **metric_kwargs) + y_pred=self.pred[self.test_indices], + **metric_kwargs) return class_train_scores, class_test_scores, train_score, test_score - def print_metric_score(self,): + def print_metric_score(self, ): """ Generates a string, formatting the metrics configuration and scores @@ -284,27 +293,34 @@ class ResultAnalyser(): metric_module = getattr(metrics, metric[:-1]) else: metric_module = getattr(metrics, metric) - metric_score_string += "\tFor {} : ".format(metric_module.get_config( - **metric_kwargs)) - metric_score_string += "\n\t\t- Score on train : {}".format(self.metric_scores[metric][0]) - metric_score_string += "\n\t\t- Score on test : {}".format(self.metric_scores[metric][1]) + metric_score_string += "\tFor {} : ".format( + metric_module.get_config( + **metric_kwargs)) + metric_score_string += "\n\t\t- Score on train : {}".format( + self.metric_scores[metric][0]) + metric_score_string += "\n\t\t- Score on test : {}".format( + self.metric_scores[metric][1]) metric_score_string += "\n\n" metric_score_string += "Test set confusion matrix : \n\n" - self.confusion_matrix = confusion(y_true=self.labels[self.test_indices], y_pred=self.pred[self.test_indices]) - formatted_conf = [[label_name]+list(row) for label_name, row in zip(self.class_label_names, self.confusion_matrix)] - metric_score_string+=tabulate(formatted_conf, headers= ['']+self.class_label_names, tablefmt='fancy_grid') + self.confusion_matrix = confusion(y_true=self.labels[self.test_indices], + y_pred=self.pred[self.test_indices]) + formatted_conf = [[label_name] + list(row) for label_name, row in + zip(self.class_label_names, self.confusion_matrix)] + metric_score_string += tabulate(formatted_conf, + headers=[''] + self.class_label_names, + tablefmt='fancy_grid') metric_score_string += "\n\n" return metric_score_string @abstractmethod - def get_view_specific_info(self): # pragma: no cover + def get_view_specific_info(self): # pragma: no cover pass @abstractmethod - def get_base_string(self): # pragma: no cover + def get_base_string(self): # pragma: no cover pass - def get_db_config_string(self,): + def get_db_config_string(self, ): """ Generates a string, formatting all the information on the database @@ -316,14 +332,16 @@ class ResultAnalyser(): db_config_string string, formatting all the information on the database """ learning_ratio = len(self.train_indices) / ( - len(self.train_indices) + len(self.test_indices)) + len(self.train_indices) + len(self.test_indices)) db_config_string = "Database configuration : \n" - db_config_string += "\t- Database name : {}\n".format(self.database_name) + db_config_string += "\t- Database name : {}\n".format( + self.database_name) db_config_string += self.get_view_specific_info() db_config_string += "\t- Learning Rate : {}\n".format(learning_ratio) db_config_string += "\t- Labels used : " + ", ".join( self.class_label_names) + "\n" - db_config_string += "\t- Number of cross validation folds : {}\n\n".format(self.k_folds.n_splits) + db_config_string += "\t- Number of cross validation folds : {}\n\n".format( + self.k_folds.n_splits) return db_config_string def get_classifier_config_string(self, ): @@ -335,12 +353,13 @@ class ResultAnalyser(): A string explaining the classifier's configuration """ classifier_config_string = "Classifier configuration : \n" - classifier_config_string += "\t- " + self.classifier.get_config()+ "\n" + classifier_config_string += "\t- " + self.classifier.get_config() + "\n" classifier_config_string += "\t- Executed on {} core(s) \n".format( self.nb_cores) if self.hps_method.startswith('randomized_search'): - classifier_config_string += "\t- Got configuration using randomized search with {} iterations \n" .format(self.n_iter) + classifier_config_string += "\t- Got configuration using randomized search with {} iterations \n".format( + self.n_iter) return classifier_config_string def analyze(self, ): @@ -360,18 +379,19 @@ class ResultAnalyser(): string_analysis += self.get_classifier_config_string() self.get_all_metrics_scores() string_analysis += self.print_metric_score() - string_analysis += "\n\n Classification took {}".format(hms(seconds=int(self.duration))) + string_analysis += "\n\n Classification took {}".format( + hms(seconds=int(self.duration))) string_analysis += "\n\n Classifier Interpretation : \n" string_analysis += self.classifier.get_interpretation( self.directory, self.base_file_name, self.labels[self.test_indices]) image_analysis = {} return string_analysis, image_analysis, self.metric_scores, \ - self.class_metric_scores, self.confusion_matrix + self.class_metric_scores, self.confusion_matrix base_boosting_estimators = [DecisionTreeClassifier(max_depth=1), DecisionTreeClassifier(max_depth=2), DecisionTreeClassifier(max_depth=3), DecisionTreeClassifier(max_depth=4), - DecisionTreeClassifier(max_depth=5), ] \ No newline at end of file + DecisionTreeClassifier(max_depth=5), ] diff --git a/summit/multiview_platform/utils/configuration.py b/summit/multiview_platform/utils/configuration.py index fcd62c6d..4adefc4a 100644 --- a/summit/multiview_platform/utils/configuration.py +++ b/summit/multiview_platform/utils/configuration.py @@ -45,43 +45,12 @@ def pass_default_config(log=True, algos_monoview=["all"], algos_multiview=["svm_jumbo_fusion", ], stats_iter=2, - metrics={"accuracy_score":{}, "f1_score":{}}, + metrics={"accuracy_score": {}, "f1_score": {}}, metric_princ="accuracy_score", hps_type="Random", hps_iter=1, - hps_kwargs={'n_iter':10, "equivalent_draws":True}, + hps_kwargs={'n_iter': 10, "equivalent_draws": True}, **kwargs): - """ - - :param log: - :param name: - :param label: - :param file_type: - :param views: - :param pathf: - :param nice: - :param random_state: - :param nb_cores: - :param full: - :param debug: - :param add_noise: - :param noise_std: - :param res_dir: - :param track_tracebacks: - :param split: - :param nb_folds: - :param nb_class: - :param classes: - :param type: - :param algos_monoview: - :param algos_multiview: - :param stats_iter: - :param metrics: - :param metric_princ: - :param hps_type: - :param hps_iter: - :return: - """ args = dict( (key, value) for key, value in locals().items() if key != "kwargs") args = dict(args, **kwargs) diff --git a/summit/multiview_platform/utils/dataset.py b/summit/multiview_platform/utils/dataset.py index 00ea3aad..168903bd 100644 --- a/summit/multiview_platform/utils/dataset.py +++ b/summit/multiview_platform/utils/dataset.py @@ -6,87 +6,102 @@ from abc import abstractmethod import h5py import numpy as np -from scipy import sparse from .organization import secure_file_path +'''This is the multiview dataset module. It garthers all the method to interact + with the dataset objects passed as arguments in hte multiview classifiers + of SuMMIT''' + + class Dataset(): + """ + This is the base class for all the type of multiview datasets of SuMMIT. + """ @abstractmethod - def get_nb_examples(self): # pragma: no cover + def get_nb_samples(self): # pragma: no cover pass @abstractmethod - def get_v(self, view_index, example_indices=None): # pragma: no cover + def get_v(self, view_index, sample_indices=None): # pragma: no cover pass @abstractmethod - def get_label_names(self, example_indices=None): # pragma: no cover + def get_label_names(self, sample_indices=None): # pragma: no cover pass @abstractmethod - def get_labels(self, example_indices=None): # pragma: no cover + def get_labels(self, sample_indices=None): # pragma: no cover pass @abstractmethod - def filter(self, labels, label_names, example_indices, view_names, - path=None): # pragma: no cover + def filter(self, labels, label_names, sample_indices, view_names, + path=None): # pragma: no cover pass - def init_example_indices(self, example_indices=None): - """If no example indices are provided, selects all the examples.""" - if example_indices is None: - return range(self.get_nb_examples()) + def init_sample_indices(self, sample_indices=None): + """ + If no sample indices are provided, selects all the available samples. + + Parameters + ---------- + sample_indices: np.array, + An array-like containing the indices of the samples. + + """ + if sample_indices is None: + return range(self.get_nb_samples()) else: - return example_indices + return sample_indices - def get_shape(self, view_index=0, example_indices=None): + def get_shape(self, view_index=0, sample_indices=None): """ - Gets the shape of the needed view on the asked examples + Gets the shape of the needed view on the asked samples Parameters ---------- view_index : int The index of the view to extract - example_indices : numpy.ndarray - The array containing the indices of the examples to extract. + sample_indices : numpy.ndarray + The array containing the indices of the samples to extract. Returns ------- Tuple containing the shape """ - return self.get_v(view_index, example_indices=example_indices).shape + return self.get_v(view_index, sample_indices=sample_indices).shape - def to_numpy_array(self, example_indices=None, view_indices=None): + def to_numpy_array(self, sample_indices=None, view_indices=None): """ To concatenate the needed views in one big numpy array while saving the limits of each view in a list, to be able to retrieve them later. Parameters ---------- - example_indices : array like, - The indices of the examples to extract from the dataset + sample_indices : array like + The indices of the samples to extract from the dataset - view_indices : array like, - The indices of the view to concatenate in the numpy array + view_indices : array like + The indices of the view to concatenate in the numpy array Returns ------- concat_views : numpy array, - The numpy array containing all the needed views. + The numpy array containing all the needed views. view_limits : list of int - The limits of each slice used to extract the views. + The limits of each slice used to extract the views. """ view_limits = [0] for view_index in view_indices: - view_data = self.get_v(view_index, example_indices=example_indices) + view_data = self.get_v(view_index, sample_indices=sample_indices) nb_features = view_data.shape[1] view_limits.append(view_limits[-1] + nb_features) concat_views = np.concatenate([self.get_v(view_index, - example_indices=example_indices) + sample_indices=sample_indices) for view_index in view_indices], axis=1) return concat_views, view_limits @@ -106,15 +121,15 @@ class Dataset(): def select_views_and_labels(self, nb_labels=None, selected_label_names=None, random_state=None, view_names=None, path_for_new="../data/"): - if view_names is None and selected_label_names is None and nb_labels is None: # pragma: no cover + if view_names is None and selected_label_names is None and nb_labels is None: # pragma: no cover pass else: selected_label_names = self.check_selected_label_names(nb_labels, selected_label_names, random_state) - labels, label_names, example_indices = self.select_labels( + labels, label_names, sample_indices = self.select_labels( selected_label_names) - self.filter(labels, label_names, example_indices, view_names, + self.filter(labels, label_names, sample_indices, view_names, path_for_new) labels_dictionary = dict( (labelIndex, labelName) for labelIndex, labelName in @@ -154,18 +169,18 @@ class Dataset(): class RAMDataset(Dataset): def __init__(self, views=None, labels=None, are_sparse=False, - view_names=None, labels_names=None, example_ids=None, + view_names=None, labels_names=None, sample_ids=None, name=None): self.saved_on_disk = False self.views = views self.labels = np.asarray(labels) - if isinstance(are_sparse, bool): # pragma: no cover + if isinstance(are_sparse, bool): # pragma: no cover self.are_sparse = [are_sparse for _ in range(len(views))] else: self.are_sparse = are_sparse self.view_names = view_names self.labels_names = labels_names - self.example_ids = example_ids + self.sample_ids = sample_ids self.view_dict = dict((view_name, view_ind) for view_name, view_ind in zip(view_names, range(len(views)))) @@ -190,11 +205,11 @@ class RAMDataset(Dataset): self.view_dict = dict((view_ind, self.view_names[view_ind]) for view_ind in range(self.nb_view)) - def get_nb_examples(self): + def get_nb_samples(self): return self.views[0].shape[0] - def get_label_names(self, example_indices=None, decode=True): - selected_labels = self.get_labels(example_indices) + def get_label_names(self, sample_indices=None, decode=True): + selected_labels = self.get_labels(sample_indices) if decode: return [label_name.encode("utf-8") for label, label_name in enumerate(self.labels_names) @@ -204,35 +219,38 @@ class RAMDataset(Dataset): for label, label_name in enumerate(self.labels_names) if label in selected_labels] - def get_labels(self, example_indices=None): - example_indices = self.init_example_indices(example_indices) - return self.labels[example_indices] + def get_labels(self, sample_indices=None): + sample_indices = self.init_sample_indices(sample_indices) + return self.labels[sample_indices] - def get_v(self, view_index, example_indices=None): - example_indices = self.init_example_indices(example_indices) - if type(example_indices) is int: - return self.views[view_index][example_indices, :] + def get_v(self, view_index, sample_indices=None): + sample_indices = self.init_sample_indices(sample_indices) + if isinstance(sample_indices, int): + return self.views[view_index][sample_indices, :] else: - example_indices = np.asarray(example_indices) - # sorted_indices = np.argsort(example_indices) - # example_indices = example_indices[sorted_indices] + sample_indices = np.asarray(sample_indices) + # sorted_indices = np.argsort(sample_indices) + # sample_indices = sample_indices[sorted_indices] if not self.are_sparse[view_index]: return self.views[view_index][ - example_indices, :] - else: # pragma: no cover + sample_indices, :] + else: # pragma: no cover # TODO Sparse support pass - def get_nb_class(self, example_indices=None): - """Gets the number of class of the dataset""" - example_indices = self.init_example_indices(example_indices) - return len(np.unique(self.labels[example_indices])) + def get_nb_class(self, sample_indices=None): + """ + Gets the number of class of the dataset - def filter(self, labels, label_names, example_indices, view_names, + """ + sample_indices = self.init_sample_indices(sample_indices) + return len(np.unique(self.labels[sample_indices])) + + def filter(self, labels, label_names, sample_indices, view_names, path=None): - if self.example_ids is not None: - self.example_ids = self.example_ids[example_indices] - self.labels = self.labels[example_indices] + if self.sample_ids is not None: + self.sample_ids = self.sample_ids[sample_indices] + self.labels = self.labels[sample_indices] self.labels_names = [name for lab_index, name in enumerate(self.labels_names) if lab_index in np.unique(self.labels)] @@ -243,7 +261,7 @@ class RAMDataset(Dataset): new_views = [] for new_view_ind, view_name in enumerate(self.view_names): new_views.append( - self.views[self.view_dict[view_name]][example_indices, :]) + self.views[self.view_dict[view_name]][sample_indices, :]) self.views = new_views self.view_dict = dict((view_name, view_ind) for view_ind, view_name @@ -268,10 +286,10 @@ class HDF5Dataset(Dataset): ---------- views : list of numpy arrays or None The list containing each view of the dataset as a numpy array of shape - (nb examples, nb features). + (nb samples, nb features). labels : numpy array or None - The labels for the multiview dataset, of shape (nb examples, ). + The labels for the multiview dataset, of shape (nb samples, ). are_sparse : list of bool, or None The list of boolean telling if each view is sparse or not. @@ -306,6 +324,7 @@ class HDF5Dataset(Dataset): view_dict : dict The dictionnary with the name of each view as the keys and their indices as values + """ # The following methods use hdf5 @@ -313,7 +332,7 @@ class HDF5Dataset(Dataset): def __init__(self, views=None, labels=None, are_sparse=False, file_name="dataset.hdf5", view_names=None, path="", hdf5_file=None, labels_names=None, is_temp=False, - example_ids=None, ): + sample_ids=None, ): self.is_temp = False if hdf5_file is not None: self.dataset = hdf5_file @@ -324,7 +343,7 @@ class HDF5Dataset(Dataset): if view_names is None: view_names = ["View" + str(index) for index in range(len(views))] - if isinstance(are_sparse, bool): # pragma: no cover + if isinstance(are_sparse, bool): # pragma: no cover are_sparse = [are_sparse for _ in views] for view_index, (view_name, view, is_sparse) in enumerate( zip(view_names, views, are_sparse)): @@ -350,42 +369,43 @@ class HDF5Dataset(Dataset): meta_data_grp.attrs["datasetLength"] = len(labels) dataset_file.close() self.update_hdf5_dataset(os.path.join(path, file_name)) - if example_ids is not None: - example_ids = [example_id if not is_just_number(example_id) - else "ID_" + example_id for example_id in - example_ids] - self.example_ids = example_ids + if sample_ids is not None: + sample_ids = [sample_id if not is_just_number(sample_id) + else "ID_" + sample_id for sample_id in + sample_ids] + self.sample_ids = sample_ids else: - self.example_ids = ["ID_" + str(i) - for i in range(labels.shape[0])] + self.sample_ids = ["ID_" + str(i) + for i in range(labels.shape[0])] - def get_v(self, view_index, example_indices=None): + def get_v(self, view_index, sample_indices=None): r""" Extract the view and returns a numpy.ndarray containing the description - of the examples specified in example_indices + of the samples specified in sample_indices Parameters ---------- view_index : int The index of the view to extract - example_indices : numpy.ndarray - The array containing the indices of the examples to extract. + sample_indices : numpy.ndarray + The array containing the indices of the samples to extract. Returns ------- - A numpy.ndarray containing the view data for the needed examples + A numpy.ndarray containing the view data for the needed samples + """ - example_indices = self.init_example_indices(example_indices) - if type(example_indices) is int: - return self.dataset["View" + str(view_index)][example_indices, :] + sample_indices = self.init_sample_indices(sample_indices) + if isinstance(sample_indices, int): + return self.dataset["View" + str(view_index)][sample_indices, :] else: - example_indices = np.array(example_indices) - # sorted_indices = np.argsort(example_indices) - # example_indices = example_indices[sorted_indices] + sample_indices = np.array(sample_indices) + # sorted_indices = np.argsort(sample_indices) + # sample_indices = sample_indices[sorted_indices] if not self.dataset["View" + str(view_index)].attrs["sparse"]: return self.dataset["View" + str(view_index)][()][ - example_indices, :] # [np.argsort(sorted_indices), :] - else: # pragma: no cover + sample_indices, :] # [np.argsort(sorted_indices), :] + else: # pragma: no cover # Work in progress pass @@ -416,19 +436,19 @@ class HDF5Dataset(Dataset): """ self.nb_view = self.dataset["Metadata"].attrs["nbView"] self.view_dict = self.get_view_dict() - if "example_ids" in self.dataset["Metadata"].keys(): - self.example_ids = [example_id.decode() - if not is_just_number(example_id.decode()) - else "ID_" + example_id.decode() - for example_id in - self.dataset["Metadata"]["example_ids"]] + if "sample_ids" in self.dataset["Metadata"].keys(): + self.sample_ids = [sample_id.decode() + if not is_just_number(sample_id.decode()) + else "ID_" + sample_id.decode() + for sample_id in + self.dataset["Metadata"]["sample_ids"]] else: - self.example_ids = ["ID_"+str(i) for i in - range(self.dataset["Labels"].shape[0])] + self.sample_ids = ["ID_" + str(i) for i in + range(self.dataset["Labels"].shape[0])] - def get_nb_examples(self): + def get_nb_samples(self): """ - Used to get the number of examples available in hte dataset + Used to get the number of samples available in hte dataset Returns ------- @@ -447,23 +467,23 @@ class HDF5Dataset(Dataset): "name"]] = view_index return view_dict - def get_label_names(self, decode=True, example_indices=None): + def get_label_names(self, decode=True, sample_indices=None): """ - Used to get the list of the label names for the given set of examples + Used to get the list of the label names for the given set of samples Parameters ---------- decode : bool If True, will decode the label names before listing them - example_indices : numpy.ndarray - The array containing the indices of the needed examples + sample_indices : numpy.ndarray + The array containing the indices of the needed samples Returns ------- """ - selected_labels = self.get_labels(example_indices) + selected_labels = self.get_labels(sample_indices) if decode: return [label_name.decode("utf-8") for label, label_name in @@ -475,38 +495,38 @@ class HDF5Dataset(Dataset): enumerate(self.dataset["Labels"].attrs["names"]) if label in selected_labels] - def get_nb_class(self, example_indices=None): + def get_nb_class(self, sample_indices=None): """ - Gets the number of classes of the dataset for the asked examples + Gets the number of classes of the dataset for the asked samples - Parameters + Parameters ---------- - example_indices : numpy.ndarray - The array containing the indices of the examples to extract. + sample_indices : numpy.ndarray + The array containing the indices of the samples to extract. Returns ------- int : The number of classes """ - example_indices = self.init_example_indices(example_indices) - return len(np.unique(self.dataset["Labels"][()][example_indices])) + sample_indices = self.init_sample_indices(sample_indices) + return len(np.unique(self.dataset["Labels"][()][sample_indices])) - def get_labels(self, example_indices=None): - """Gets the label array for the asked examples + def get_labels(self, sample_indices=None): + """Gets the label array for the asked samples - Parameters + Parameters ---------- - example_indices : numpy.ndarray - The array containing the indices of the examples to extract. + sample_indices : numpy.ndarray + The array containing the indices of the samples to extract. Returns ------- - numpy.ndarray containing the labels of the asked examples""" - example_indices = self.init_example_indices(example_indices) - return self.dataset["Labels"][()][example_indices] + numpy.ndarray containing the labels of the asked samples""" + sample_indices = self.init_sample_indices(sample_indices) + return self.dataset["Labels"][()][sample_indices] - def rm(self): # pragma: no cover + def rm(self): # pragma: no cover """ Method used to delete the dataset file on the disk if the dataset is temporary. @@ -520,16 +540,15 @@ class HDF5Dataset(Dataset): if self.is_temp: os.remove(filename) - def copy_view(self, target_dataset=None, source_view_name=None, - target_view_index=None, example_indices=None): - example_indices = self.init_example_indices(example_indices) + target_view_index=None, sample_indices=None): + sample_indices = self.init_sample_indices(sample_indices) new_d_set = target_dataset.create_dataset( "View" + str(target_view_index), data=self.get_v(self.view_dict[source_view_name], - example_indices=example_indices)) + sample_indices=sample_indices)) for key, value in self.dataset[ - "View" + str(self.view_dict[source_view_name])].attrs.items(): + "View" + str(self.view_dict[source_view_name])].attrs.items(): new_d_set.attrs[key] = value def init_view_names(self, view_names=None): @@ -545,45 +564,46 @@ class HDF5Dataset(Dataset): self.is_temp = True self.init_attrs() - def filter(self, labels, label_names, example_indices, view_names, + def filter(self, labels, label_names, sample_indices, view_names, path=None): dataset_file_path = os.path.join(path, self.get_name() + "_temp_filter.hdf5") new_dataset_file = h5py.File(dataset_file_path, "w") self.dataset.copy("Metadata", new_dataset_file) - if "example_ids" in self.dataset["Metadata"].keys(): - del new_dataset_file["Metadata"]["example_ids"] - ex_ids = new_dataset_file["Metadata"].create_dataset("example_ids", + if "sample_ids" in self.dataset["Metadata"].keys(): + del new_dataset_file["Metadata"]["sample_ids"] + ex_ids = new_dataset_file["Metadata"].create_dataset("sample_ids", data=np.array( - self.example_ids)[ - example_indices].astype( + self.sample_ids)[ + sample_indices].astype( np.dtype( "S100"))) else: - new_dataset_file["Metadata"].create_dataset("example_ids", + new_dataset_file["Metadata"].create_dataset("sample_ids", ( - len(self.example_ids),), + len( + self.sample_ids),), data=np.array( - self.example_ids).astype( + self.sample_ids).astype( np.dtype("S100")), dtype=np.dtype("S100")) new_dataset_file["Metadata"].attrs["datasetLength"] = len( - example_indices) + sample_indices) new_dataset_file["Metadata"].attrs["nbClass"] = np.unique(labels) new_dataset_file.create_dataset("Labels", data=labels) new_dataset_file["Labels"].attrs["names"] = [label_name.encode() if not isinstance( label_name, bytes) - else label_name - for label_name in - label_names] + else label_name + for label_name in + label_names] view_names = self.init_view_names(view_names) new_dataset_file["Metadata"].attrs["nbView"] = len(view_names) for new_index, view_name in enumerate(view_names): self.copy_view(target_dataset=new_dataset_file, source_view_name=view_name, target_view_index=new_index, - example_indices=example_indices) + sample_indices=sample_indices) new_dataset_file.close() self.update_hdf5_dataset(dataset_file_path) @@ -605,9 +625,10 @@ class HDF5Dataset(Dataset): view_key = "View" + str(view_index) view_dset = noisy_dataset[view_key] view_limits = self.dataset[ - "Metadata/View" + str(view_index) + "_limits"][()] + "Metadata/View" + str(view_index) + "_limits"][()] view_ranges = view_limits[:, 1] - view_limits[:, 0] - normal_dist = random_state.normal(0, noise_std, view_dset[()].shape) + normal_dist = random_state.normal( + 0, noise_std, view_dset[()].shape) noise = normal_dist * view_ranges noised_data = view_dset[()] + noise noised_data = np.where(noised_data < view_limits[:, 0], @@ -648,27 +669,27 @@ def extract_subset(matrix, used_indices): # if sparse.issparse(matrix): # new_indptr = np.zeros(len(used_indices) + 1, dtype=int) # oldindptr = matrix.indptr - # for exampleIndexIndex, exampleIndex in enumerate(used_indices): - # new_indptr[exampleIndexIndex + 1] = new_indptr[ - # exampleIndexIndex] + ( + # for sampleIndexIndex, sampleIndex in enumerate(used_indices): + # new_indptr[sampleIndexIndex + 1] = new_indptr[ + # sampleIndexIndex] + ( # oldindptr[ - # exampleIndex + 1] - - # oldindptr[exampleIndex]) + # sampleIndex + 1] - + # oldindptr[sampleIndex]) # new_data = np.ones(new_indptr[-1], dtype=bool) # new_indices = np.zeros(new_indptr[-1], dtype=int) # old_indices = matrix.indices - # for exampleIndexIndex, exampleIndex in enumerate(used_indices): - # new_indices[new_indptr[exampleIndexIndex]:new_indptr[ - # exampleIndexIndex + 1]] = old_indices[ - # oldindptr[exampleIndex]: - # oldindptr[exampleIndex + 1]] + # for sampleIndexIndex, sampleIndex in enumerate(used_indices): + # new_indices[new_indptr[sampleIndexIndex]:new_indptr[ + # sampleIndexIndex + 1]] = old_indices[ + # oldindptr[sampleIndex]: + # oldindptr[sampleIndex + 1]] # return sparse.csr_matrix((new_data, new_indices, new_indptr), # shape=(len(used_indices), matrix.shape[1])) # else: return matrix[used_indices] -def init_multiple_datasets(path_f, name, nb_cores): # pragma: no cover +def init_multiple_datasets(path_f, name, nb_cores): # pragma: no cover r"""Used to create copies of the dataset if multicore computation is used. This is a temporary solution to fix the sharing memory issue with HDF5 datasets. @@ -693,12 +714,15 @@ def init_multiple_datasets(path_f, name, nb_cores): # pragma: no cover "Info:\t Enough copies of the dataset are already available") pass else: - if os.path.getsize(os.path.join(path_f, name + ".hdf5")) * nb_cores / float(1024) / 1000 / 1000 > 0.1: + if os.path.getsize( + os.path.join(path_f, name + ".hdf5")) * nb_cores / float( + 1024) / 1000 / 1000 > 0.1: logging.debug("Start:\t Creating " + str( nb_cores) + " temporary datasets for multiprocessing") logging.warning( - " WARNING : /!\ This may use a lot of HDD storage space : " + - str(os.path.getsize(os.path.join(path_f, name + ".hdf5")) * nb_cores / float( + r" WARNING : /!\ This may use a lot of HDD storage space : " + + str(os.path.getsize(os.path.join(path_f, + name + ".hdf5")) * nb_cores / float( 1024) / 1000 / 1000) + " Gbytes /!\ ") confirmation = confirm() if not confirmation: @@ -736,7 +760,7 @@ def delete_HDF5(benchmarkArgumentsDictionaries, nbCores, dataset): dataset.rm() -def confirm(resp=True, timeout=15): # pragma: no cover +def confirm(resp=True, timeout=15): # pragma: no cover """Used to process answer""" ans = input_(timeout) if not ans: @@ -749,7 +773,7 @@ def confirm(resp=True, timeout=15): # pragma: no cover return False -def input_(timeout=15): # pragma: no cover +def input_(timeout=15): # pragma: no cover """used as a UI to stop if too much HDD space will be used""" logging.warning("You have " + str( timeout) + " seconds to stop the dataset copy by typing n") @@ -760,10 +784,10 @@ def input_(timeout=15): # pragma: no cover return "y" -def get_examples_views_indices(dataset, examples_indices, view_indices, ): - """This function is used to get all the examples indices and view indices if needed""" +def get_samples_views_indices(dataset, samples_indices, view_indices, ): + """This function is used to get all the samples indices and view indices if needed""" if view_indices is None: view_indices = np.arange(dataset.nb_view) - if examples_indices is None: - examples_indices = np.arange(dataset.get_nb_examples()) - return examples_indices, view_indices + if samples_indices is None: + samples_indices = np.arange(dataset.get_nb_samples()) + return samples_indices, view_indices diff --git a/summit/multiview_platform/utils/execution.py b/summit/multiview_platform/utils/execution.py index 51514dd5..0ce98864 100644 --- a/summit/multiview_platform/utils/execution.py +++ b/summit/multiview_platform/utils/execution.py @@ -166,7 +166,7 @@ def init_log_file(name, views, cl_type, log, debug, label, "%Y_%m_%d-%H_%M") + "_" + label) log_file_name = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join( cl_type) + "-" + "_".join(views) + "-" + name + "-LOG.log" - if os.path.exists(result_directory): # pragma: no cover + if os.path.exists(result_directory): # pragma: no cover raise NameError("The result dir already exists, wait 1 min and retry") log_file_path = os.path.join(result_directory, log_file_name) os.makedirs(os.path.dirname(log_file_path)) @@ -187,7 +187,7 @@ def gen_splits(labels, split_ratio, stats_iter_random_states): labels : numpy.ndarray Name of the database. split_ratio : float - The ratio of examples between train and test set. + The ratio of samples between train and test set. stats_iter_random_states : list of numpy.random.RandomState The random states for each statistical iteration. @@ -270,21 +270,21 @@ def init_views(dataset_var, arg_views): if arg_views is not None: allowed_views = arg_views all_views = [str(dataset_var.get_view_name(view_index)) - if type(dataset_var.get_view_name(view_index)) != bytes + if not isinstance(dataset_var.get_view_name(view_index), bytes) else dataset_var.get_view_name(view_index).decode("utf-8") for view_index in range(nb_view)] views = [] views_indices = [] for view_index in range(nb_view): view_name = dataset_var.get_view_name(view_index) - if type(view_name) == bytes: + if isinstance(view_name, bytes): view_name = view_name.decode("utf-8") if view_name in allowed_views: views.append(view_name) views_indices.append(view_index) else: views = [str(dataset_var.get_view_name(view_index)) - if type(dataset_var.get_view_name(view_index)) != bytes + if not isinstance(dataset_var.get_view_name(view_index), bytes) else dataset_var.get_view_name(view_index).decode("utf-8") for view_index in range(nb_view)] views_indices = range(nb_view) @@ -321,7 +321,8 @@ def find_dataset_names(path, type, names): the needed dataset names.""" package_path = os.path.dirname( os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) - print(package_path, os.path.isdir(path), os.path.isdir(os.path.join(package_path,path )),) + print(package_path, os.path.isdir(path), + os.path.isdir(os.path.join(package_path, path)), ) if os.path.isdir(path): pass elif os.path.isdir(os.path.join(package_path, path)): @@ -339,7 +340,8 @@ def find_dataset_names(path, type, names): if names == ["all"]: return path, available_file_names elif isinstance(names, str): - return path, [used_name for used_name in available_file_names if names == used_name] + return path, [used_name for used_name in available_file_names if + names == used_name] elif len(names) > 1: selected_names = [used_name for used_name in available_file_names if used_name in names] @@ -348,11 +350,13 @@ def find_dataset_names(path, type, names): "None of the provided dataset names are available. Available datasets are {}".format( available_file_names)) return path, [used_name for used_name in available_file_names if - used_name in names] + used_name in names] elif names[0] in available_file_names: return path, names else: - raise ValueError("The asked dataset ({}) is not available in {}. \n The available ones are {}".format(names[0], path, available_file_names)) + raise ValueError( + "The asked dataset ({}) is not available in {}. \n The available ones are {}".format( + names[0], path, available_file_names)) def gen_argument_dictionaries(labels_dictionary, directories, @@ -360,7 +364,8 @@ def gen_argument_dictionaries(labels_dictionary, directories, hyper_param_search, args, k_folds, stats_iter_random_states, metrics, argument_dictionaries, - benchmark, views, views_indices,): # pragma: no cover + benchmark, views, + views_indices, ): # pragma: no cover r"""Used to generate a dictionary for each benchmark. One for each label combination (if multiclass), for each statistical iteration, generates an dictionary with diff --git a/summit/multiview_platform/utils/get_multiview_db.py b/summit/multiview_platform/utils/get_multiview_db.py index b3d2a24c..81d842fe 100644 --- a/summit/multiview_platform/utils/get_multiview_db.py +++ b/summit/multiview_platform/utils/get_multiview_db.py @@ -30,31 +30,31 @@ def get_plausible_db_hdf5(features, path, file_name, nb_class=3, label_names=["No".encode(), "Yes".encode(), "Maybe".encode()], random_state=None, full=True, add_noise=False, - noise_std=0.15, nb_view=3, nb_examples=100, + noise_std=0.15, nb_view=3, nb_samples=100, nb_features=10): """Used to generate a plausible dataset to test the algorithms""" secure_file_path(os.path.join(path, "plausible.hdf5")) - example_ids = ["exmaple_id_" + str(i) for i in range(nb_examples)] + sample_ids = ["exmaple_id_" + str(i) for i in range(nb_samples)] views = [] view_names = [] are_sparse = [] if nb_class == 2: labels = np.array( - [0 for _ in range(int(nb_examples / 2))] + [1 for _ in range( - nb_examples - int(nb_examples / 2))]) + [0 for _ in range(int(nb_samples / 2))] + [1 for _ in range( + nb_samples - int(nb_samples / 2))]) label_names = ["No".encode(), "Yes".encode()] for view_index in range(nb_view): view_data = np.array( - [np.zeros(nb_features) for _ in range(int(nb_examples / 2))] + + [np.zeros(nb_features) for _ in range(int(nb_samples / 2))] + [np.ones(nb_features) for _ in - range(nb_examples - int(nb_examples / 2))]) - fake_one_indices = random_state.randint(0, int(nb_examples / 2), - int(nb_examples / 12)) - fake_zero_indices = random_state.randint(int(nb_examples / 2), - nb_examples, - int(nb_examples / 12)) + range(nb_samples - int(nb_samples / 2))]) + fake_one_indices = random_state.randint(0, int(nb_samples / 2), + int(nb_samples / 12)) + fake_zero_indices = random_state.randint(int(nb_samples / 2), + nb_samples, + int(nb_samples / 12)) for index in np.concatenate((fake_one_indices, fake_zero_indices)): - example_ids[index] += "noised" + sample_ids[index] += "noised" view_data[fake_one_indices] = np.ones( (len(fake_one_indices), nb_features)) @@ -67,15 +67,15 @@ def get_plausible_db_hdf5(features, path, file_name, nb_class=3, dataset = RAMDataset(views=views, labels=labels, labels_names=label_names, view_names=view_names, - are_sparse=are_sparse, example_ids=example_ids, + are_sparse=are_sparse, sample_ids=sample_ids, name='plausible') labels_dictionary = {0: "No", 1: "Yes"} return dataset, labels_dictionary, "plausible" elif nb_class >= 3: - firstBound = int(nb_examples / 3) - rest = nb_examples - 2 * int(nb_examples / 3) - scndBound = 2 * int(nb_examples / 3) - thrdBound = nb_examples + firstBound = int(nb_samples / 3) + rest = nb_samples - 2 * int(nb_samples / 3) + scndBound = 2 * int(nb_samples / 3) + thrdBound = nb_samples labels = np.array( [0 for _ in range(firstBound)] + [1 for _ in range(firstBound)] + @@ -87,11 +87,11 @@ def get_plausible_db_hdf5(features, path, file_name, nb_class=3, [np.ones(nb_features) for _ in range(firstBound)] + [np.ones(nb_features) + 1 for _ in range(rest)]) fake_one_indices = random_state.randint(0, firstBound, - int(nb_examples / 12)) + int(nb_samples / 12)) fakeTwoIndices = random_state.randint(firstBound, scndBound, - int(nb_examples / 12)) + int(nb_samples / 12)) fake_zero_indices = random_state.randint(scndBound, thrdBound, - int(nb_examples / 12)) + int(nb_samples / 12)) view_data[fake_one_indices] = np.ones( (len(fake_one_indices), nb_features)) @@ -107,7 +107,7 @@ def get_plausible_db_hdf5(features, path, file_name, nb_class=3, labels_names=label_names, view_names=view_names, are_sparse=are_sparse, name="plausible", - example_ids=example_ids) + sample_ids=sample_ids) labels_dictionary = {0: "No", 1: "Yes", 2: "Maybe"} return dataset, labels_dictionary, "plausible" @@ -156,7 +156,8 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, dtype='str', delimiter=delimiter) datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") labels = np.genfromtxt(pathF + nameDB + "-labels.csv", delimiter=delimiter) - labelsDset = datasetFile.create_dataset("Labels", labels.shape, data=labels) + labelsDset = datasetFile.create_dataset( + "Labels", labels.shape, data=labels) labelsDset.attrs["names"] = [labelName.encode() for labelName in labels_names] viewFileNames = [viewFileName for viewFileName in @@ -309,7 +310,7 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # # def copyhdf5_dataset(source_data_file, destination_data_file, source_dataset_name, # destination_dataset_name, used_indices): -# """Used to copy a view in a new dataset file using only the examples of +# """Used to copy a view in a new dataset file using only the samples of # usedIndices, and copying the args""" # new_d_set = destination_data_file.create_dataset(destination_dataset_name, # data=source_data_file.get( @@ -361,10 +362,11 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # def getLabelSupports(CLASS_LABELS): -# """Used to get the number of example for each label""" +# """Used to get the number of sample for each label""" # labels = set(CLASS_LABELS) # supports = [CLASS_LABELS.tolist().count(label) for label in labels] -# return supports, dict((label, index) for label, index in zip(labels, range(len(labels)))) +# return supports, dict((label, index) for label, index in zip(labels, +# range(len(labels)))) # def isUseful(labelSupports, index, CLASS_LABELS, labelDict): @@ -714,13 +716,13 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # Methyl = methylData # sortedMethylGeneIndices = np.zeros(methylData.shape, dtype=int) # MethylRanking = np.zeros(methylData.shape, dtype=int) -# for exampleIndex, exampleArray in enumerate(Methyl): -# sortedMethylDictionary = dict((index, value) for index, value in enumerate(exampleArray)) +# for sampleIndex, sampleArray in enumerate(Methyl): +# sortedMethylDictionary = dict((index, value) for index, value in enumerate(sampleArray)) # sortedMethylIndicesDict = sorted(sortedMethylDictionary.items(), key=operator.itemgetter(1)) # sortedMethylIndicesArray = np.array([index for (index, value) in sortedMethylIndicesDict], dtype=int) -# sortedMethylGeneIndices[exampleIndex] = sortedMethylIndicesArray +# sortedMethylGeneIndices[sampleIndex] = sortedMethylIndicesArray # for geneIndex in range(Methyl.shape[1]): -# MethylRanking[exampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex +# MethylRanking[sampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex # logging.debug("Done:\t Getting Sorted Methyl data") # # logging.debug("Start:\t Getting Binarized Methyl data") @@ -847,13 +849,13 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # Methyl = datasetFile["View0"][...] # sortedMethylGeneIndices = np.zeros(datasetFile.get("View0").shape, dtype=int) # MethylRanking = np.zeros(datasetFile.get("View0").shape, dtype=int) -# for exampleIndex, exampleArray in enumerate(Methyl): -# sortedMethylDictionary = dict((index, value) for index, value in enumerate(exampleArray)) +# for sampleIndex, sampleArray in enumerate(Methyl): +# sortedMethylDictionary = dict((index, value) for index, value in enumerate(sampleArray)) # sortedMethylIndicesDict = sorted(sortedMethylDictionary.items(), key=operator.itemgetter(1)) # sortedMethylIndicesArray = np.array([index for (index, value) in sortedMethylIndicesDict], dtype=int) -# sortedMethylGeneIndices[exampleIndex] = sortedMethylIndicesArray +# sortedMethylGeneIndices[sampleIndex] = sortedMethylIndicesArray # for geneIndex in range(Methyl.shape[1]): -# MethylRanking[exampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex +# MethylRanking[sampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex # mMethylDset = datasetFile.create_dataset("View10", sortedMethylGeneIndices.shape, data=sortedMethylGeneIndices) # mMethylDset.attrs["name"] = "SMethyl" # mMethylDset.attrs["sparse"] = False @@ -915,13 +917,13 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # MiRNA = datasetFile["View1"][...] # sortedMiRNAGeneIndices = np.zeros(datasetFile.get("View1").shape, dtype=int) # MiRNARanking = np.zeros(datasetFile.get("View1").shape, dtype=int) -# for exampleIndex, exampleArray in enumerate(MiRNA): -# sortedMiRNADictionary = dict((index, value) for index, value in enumerate(exampleArray)) +# for sampleIndex, sampleArray in enumerate(MiRNA): +# sortedMiRNADictionary = dict((index, value) for index, value in enumerate(sampleArray)) # sortedMiRNAIndicesDict = sorted(sortedMiRNADictionary.items(), key=operator.itemgetter(1)) # sortedMiRNAIndicesArray = np.array([index for (index, value) in sortedMiRNAIndicesDict], dtype=int) -# sortedMiRNAGeneIndices[exampleIndex] = sortedMiRNAIndicesArray +# sortedMiRNAGeneIndices[sampleIndex] = sortedMiRNAIndicesArray # for geneIndex in range(MiRNA.shape[1]): -# MiRNARanking[exampleIndex, sortedMiRNAIndicesArray[geneIndex]] = geneIndex +# MiRNARanking[sampleIndex, sortedMiRNAIndicesArray[geneIndex]] = geneIndex # mmirnaDset = datasetFile.create_dataset("View7", sortedMiRNAGeneIndices.shape, data=sortedMiRNAGeneIndices) # mmirnaDset.attrs["name"] = "SMiRNA_" # mmirnaDset.attrs["sparse"] = False @@ -988,13 +990,13 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # RNASeq = datasetFile["View2"][...] # sortedRNASeqGeneIndices = np.zeros(datasetFile.get("View2").shape, dtype=int) # RNASeqRanking = np.zeros(datasetFile.get("View2").shape, dtype=int) -# for exampleIndex, exampleArray in enumerate(RNASeq): -# sortedRNASeqDictionary = dict((index, value) for index, value in enumerate(exampleArray)) +# for sampleIndex, sampleArray in enumerate(RNASeq): +# sortedRNASeqDictionary = dict((index, value) for index, value in enumerate(sampleArray)) # sortedRNASeqIndicesDict = sorted(sortedRNASeqDictionary.items(), key=operator.itemgetter(1)) # sortedRNASeqIndicesArray = np.array([index for (index, value) in sortedRNASeqIndicesDict], dtype=int) -# sortedRNASeqGeneIndices[exampleIndex] = sortedRNASeqIndicesArray +# sortedRNASeqGeneIndices[sampleIndex] = sortedRNASeqIndicesArray # for geneIndex in range(RNASeq.shape[1]): -# RNASeqRanking[exampleIndex, sortedRNASeqIndicesArray[geneIndex]] = geneIndex +# RNASeqRanking[sampleIndex, sortedRNASeqIndicesArray[geneIndex]] = geneIndex # mrnaseqDset = datasetFile.create_dataset("View4", sortedRNASeqGeneIndices.shape, data=sortedRNASeqGeneIndices) # mrnaseqDset.attrs["name"] = "SRNASeq" # mrnaseqDset.attrs["sparse"] = False @@ -1170,16 +1172,16 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # # for labelIndex in np.arange(nbLabels): # # pathToExamples = pathToAwa + 'Animals_with_Attributes/Features/' + viewDictionary[view] + '/' + \ # # labelDictionary[labelIndex] + '/' -# # examples = os.listdir(pathToExamples) +# # samples = os.listdir(pathToExamples) # # if view == 0: -# # nbExample += len(examples) -# # for example in examples: +# # nbExample += len(samples) +# # for sample in samples: # # if viewDictionary[view]=='decaf': -# # exampleFile = open(pathToExamples + example) -# # viewData.append([float(line.strip()) for line in exampleFile]) +# # sampleFile = open(pathToExamples + sample) +# # viewData.append([float(line.strip()) for line in sampleFile]) # # else: -# # exampleFile = open(pathToExamples + example) -# # viewData.append([[float(coordinate) for coordinate in raw.split()] for raw in exampleFile][0]) +# # sampleFile = open(pathToExamples + sample) +# # viewData.append([[float(coordinate) for coordinate in raw.split()] for raw in sampleFile][0]) # # if view == 0: # # labels.append(labelIndex) # # @@ -1225,25 +1227,25 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # # # def makeArrayFromTriangular(pseudoRNASeqMatrix): # # matrixShape = len(pseudoRNASeqMatrix[0,:]) -# # exampleArray = np.array(((matrixShape-1)*matrixShape)/2) +# # sampleArray = np.array(((matrixShape-1)*matrixShape)/2) # # arrayIndex = 0 # # for i in range(matrixShape-1): # # for j in range(i+1, matrixShape): -# # exampleArray[arrayIndex]=pseudoRNASeqMatrix[i,j] +# # sampleArray[arrayIndex]=pseudoRNASeqMatrix[i,j] # # arrayIndex += 1 -# # return exampleArray +# # return sampleArray # # # # def getPseudoRNASeq(dataset): # # nbGenes = len(dataset["/View2/matrix"][0, :]) # # pseudoRNASeq = np.zeros((dataset["/datasetlength"][...], ((nbGenes - 1) * nbGenes) / 2), dtype=bool_) -# # for exampleIndex in xrange(dataset["/datasetlength"][...]): +# # for sampleIndex in xrange(dataset["/datasetlength"][...]): # # arrayIndex = 0 # # for i in xrange(nbGenes): # # for j in xrange(nbGenes): # # if i > j: -# # pseudoRNASeq[exampleIndex, arrayIndex] = -# # dataset["/View2/matrix"][exampleIndex, j] < dataset["/View2/matrix"][exampleIndex, i] +# # pseudoRNASeq[sampleIndex, arrayIndex] = +# # dataset["/View2/matrix"][sampleIndex, j] < dataset["/View2/matrix"][sampleIndex, i] # # arrayIndex += 1 # # dataset["/View4/matrix"] = pseudoRNASeq # # dataset["/View4/name"] = "pseudoRNASeq" diff --git a/summit/multiview_platform/utils/hyper_parameter_search.py b/summit/multiview_platform/utils/hyper_parameter_search.py index a13f6cab..0fd65b93 100644 --- a/summit/multiview_platform/utils/hyper_parameter_search.py +++ b/summit/multiview_platform/utils/hyper_parameter_search.py @@ -1,10 +1,7 @@ -import itertools -import sys import traceback import yaml from abc import abstractmethod -import matplotlib.pyplot as plt import numpy as np from scipy.stats import randint, uniform from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \ @@ -14,7 +11,19 @@ from sklearn.base import clone, BaseEstimator from .multiclass import MultiClassWrapper from .organization import secure_file_path from .base import get_metric -from .. import metrics +import traceback +from abc import abstractmethod + +import numpy as np +import yaml +from scipy.stats import randint, uniform +from sklearn.base import clone, BaseEstimator +from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \ + ParameterGrid, ParameterSampler + +from .base import get_metric +from .multiclass import MultiClassWrapper +from .organization import secure_file_path class HPSearch: @@ -40,10 +49,12 @@ class HPSearch: self.cv_results_["params"] = [] n_failed = 0 self.tracebacks_params = [] - for candidate_param_idx, candidate_param in enumerate(self.candidate_params): + for candidate_param_idx, candidate_param in enumerate( + self.candidate_params): test_scores = np.zeros(n_splits) + 1000 try: - for fold_idx, (train_indices, test_indices) in enumerate(folds): + for fold_idx, (train_indices, + test_indices) in enumerate(folds): current_estimator = clone(base_estimator) current_estimator.set_params(**candidate_param) current_estimator.fit(X, y, @@ -66,9 +77,10 @@ class HPSearch: cross_validation_score) results[candidate_param_idx] = cross_validation_score if cross_validation_score >= max(results.values()): - self.best_params_ = self.candidate_params[candidate_param_idx] + self.best_params_ = self.candidate_params[ + candidate_param_idx] self.best_score_ = cross_validation_score - except: + except BaseException: if self.track_tracebacks: n_failed += 1 self.tracebacks.append(traceback.format_exc()) @@ -89,7 +101,7 @@ class HPSearch: return self @abstractmethod - def get_candidate_params(self, X): # pragma: no cover + def get_candidate_params(self, X): # pragma: no cover raise NotImplementedError def get_best_params(self): @@ -102,16 +114,18 @@ class HPSearch: scores_array = self.cv_results_['mean_test_score'] sorted_indices = np.argsort(-scores_array) tested_params = [self.cv_results_["params"][score_index] - for score_index in sorted_indices] + for score_index in sorted_indices] scores_array = scores_array[sorted_indices] output_string = "" for parameters, score in zip(tested_params, scores_array): formatted_params = format_params(parameters) - output_string += "\n{}\n\t\t{}".format(yaml.dump(formatted_params), score) + output_string += "\n{}\n\t\t{}".format(yaml.dump(formatted_params), + score) if self.tracebacks: output_string += "Failed : \n\n\n" - for traceback, params in zip(self.tracebacks, self.tracebacks_params): - output_string+= '{}\n\n{}\n'.format(params, traceback) + for traceback, params in zip(self.tracebacks, + self.tracebacks_params): + output_string += '{}\n\n{}\n'.format(params, traceback) secure_file_path(output_file_name + "hps_report.txt") with open(output_file_name + "hps_report.txt", "w") as output_file: output_file.write(output_string) @@ -136,7 +150,7 @@ class Random(RandomizedSearchCV, HPSearch): self.view_indices = view_indices self.equivalent_draws = equivalent_draws self.track_tracebacks = track_tracebacks - self.tracebacks=[] + self.tracebacks = [] def get_param_distribs(self, estimator): if isinstance(estimator, MultiClassWrapper): @@ -144,14 +158,14 @@ class Random(RandomizedSearchCV, HPSearch): else: return estimator.gen_distribs() - def fit(self, X, y=None, groups=None, **fit_params): # pragma: no cover + def fit(self, X, y=None, groups=None, **fit_params): # pragma: no cover if self.framework == "monoview": return RandomizedSearchCV.fit(self, X, y=y, groups=groups, **fit_params) elif self.framework == "multiview": return HPSearch.fit_multiview(self, X, y=y, groups=groups, - **fit_params) + **fit_params) def get_candidate_params(self, X): if self.equivalent_draws: @@ -166,11 +180,10 @@ class Random(RandomizedSearchCV, HPSearch): # y[self.available_indices]) - - class Grid(GridSearchCV, HPSearch): - def __init__(self, estimator, param_grid={}, refit=False, n_jobs=1, scoring=None, cv=None, + def __init__(self, estimator, param_grid={}, refit=False, n_jobs=1, + scoring=None, cv=None, learning_indices=None, view_indices=None, framework="monoview", random_state=None, track_tracebacks=True): scoring = HPSearch.get_scoring(self, scoring) @@ -186,10 +199,10 @@ class Grid(GridSearchCV, HPSearch): def fit(self, X, y=None, groups=None, **fit_params): if self.framework == "monoview": return GridSearchCV.fit(self, X, y=y, groups=groups, - **fit_params) + **fit_params) elif self.framework == "multiview": return HPSearch.fit_multiview(self, X, y=y, groups=groups, - **fit_params) + **fit_params) def get_candidate_params(self, X): self.candidate_params = list(ParameterGrid(self.param_grid)) @@ -211,9 +224,7 @@ class Grid(GridSearchCV, HPSearch): # index_step = floor(len(distribution)/n_points_per_param-2) # selected_params[param_name] = distribution[0]+[distribution[index*index_step+1] # for index -# in range(n_points_per_param)] - - +# in range(n_points_per_param)] # @@ -231,7 +242,6 @@ class Grid(GridSearchCV, HPSearch): # pass - # class RS(HPSSearch): # # def __init__(self, X, y, framework, random_state, output_file_name, @@ -244,7 +254,6 @@ class Grid(GridSearchCV, HPSearch): # HPSSearch.__init__() - # def randomized_search(X, y, framework, random_state, output_file_name, # classifier_module, # classifier_name, folds=4, nb_cores=1, @@ -289,13 +298,6 @@ class Grid(GridSearchCV, HPSearch): # return best_params, scores_array, params - - - - - - - # # def spear_mint(dataset, classifier_name, views_indices=None, k_folds=None, # n_iter=1, @@ -352,7 +354,6 @@ class Grid(GridSearchCV, HPSearch): # - class CustomRandint: """Used as a distribution returning a integer between low and high-1. It can be used with a multiplier agrument to be able to perform more complex generation @@ -360,8 +361,8 @@ class CustomRandint: def __init__(self, low=0, high=0, multiplier=""): self.randint = randint(low, high) - self.low=low - self.high=high + self.low = low + self.high = high self.multiplier = multiplier def rvs(self, random_state=None): @@ -403,8 +404,9 @@ def format_params(params, pref=""): pass elif isinstance(value, BaseEstimator): dictionary[key] = value.__class__.__name__ - for second_key, second_value in format_params(value.get_params()).items(): - dictionary[str(key)+"__"+second_key] = second_value + for second_key, second_value in format_params( + value.get_params()).items(): + dictionary[str(key) + "__" + second_key] = second_value else: dictionary[str(key)] = format_params(value) return dictionary @@ -421,7 +423,6 @@ def format_params(params, pref=""): else: return params - # def randomized_search_(dataset_var, labels, classifier_package, classifier_name, # metrics_list, learning_indices, k_folds, random_state, # views_indices=None, n_iter=1, diff --git a/summit/multiview_platform/utils/make_file_config.py b/summit/multiview_platform/utils/make_file_config.py index 361fe798..019ca606 100644 --- a/summit/multiview_platform/utils/make_file_config.py +++ b/summit/multiview_platform/utils/make_file_config.py @@ -1,6 +1,7 @@ import importlib import inspect + class ConfigurationMaker(): """ Find the name of the classifier from the dict classier to report diff --git a/summit/multiview_platform/utils/multiclass.py b/summit/multiview_platform/utils/multiclass.py index 0b7210a7..98c6c1d6 100644 --- a/summit/multiview_platform/utils/multiclass.py +++ b/summit/multiview_platform/utils/multiclass.py @@ -7,7 +7,7 @@ from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier from sklearn.multiclass import _ovr_decision_function from sklearn.preprocessing import LabelBinarizer -from .dataset import get_examples_views_indices +from .dataset import get_samples_views_indices def get_mc_estim(estimator, random_state, y=None, multiview=False, @@ -68,7 +68,8 @@ class MultiClassWrapper: return self def get_config(self): - return "multiclass_adaptation : "+self.__class__.__name__+ ", " +self.estimator.get_config() + return "multiclass_adaptation : " + self.__class__.__name__ + \ + ", " + self.estimator.get_config() def format_params(self, params, deep=True): if hasattr(self, 'estimators_'): @@ -80,8 +81,6 @@ class MultiClassWrapper: params.pop("estimator") return params - - def get_interpretation(self, directory, base_file_name, y_test=None): # TODO : Multiclass interpretation return "Multiclass wrapper is not interpretable yet" @@ -133,15 +132,15 @@ def _multiview_fit_binary(estimator, X, y, train_indices, return estimator -def _multiview_predict_binary(estimator, X, example_indices, view_indices): +def _multiview_predict_binary(estimator, X, sample_indices, view_indices): if is_regressor(estimator): - return estimator.predict(X, example_indices=example_indices, + return estimator.predict(X, sample_indices=sample_indices, view_indices=view_indices) try: score = np.ravel(estimator.decision_function(X)) except (AttributeError, NotImplementedError): # probabilities of the positive class - score = estimator.predict_proba(X, example_indices=example_indices, + score = estimator.predict_proba(X, sample_indices=sample_indices, view_indices=view_indices)[:, 1] return score @@ -173,22 +172,22 @@ class MultiviewOVRWrapper(MultiviewWrapper, OneVsRestClassifier): enumerate(columns)] return self - def predict(self, X, example_indices=None, view_indices=None): - example_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) - n_samples = len(example_indices) + def predict(self, X, sample_indices=None, view_indices=None): + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) + n_samples = len(sample_indices) if self.label_binarizer_.y_type_ == "multiclass": maxima = np.empty(n_samples, dtype=float) maxima.fill(-np.inf) argmaxima = np.zeros(n_samples, dtype=int) for i, e in enumerate(self.estimators_): - pred = _multiview_predict_binary(e, X, example_indices, + pred = _multiview_predict_binary(e, X, sample_indices, view_indices) np.maximum(maxima, pred, out=maxima) argmaxima[maxima == pred] = i return self.classes_[argmaxima] - else: # pragma: no cover + else: # pragma: no cover if (hasattr(self.estimators_[0], "decision_function") and is_classifier(self.estimators_[0])): thresh = 0 @@ -199,7 +198,7 @@ class MultiviewOVRWrapper(MultiviewWrapper, OneVsRestClassifier): for e in self.estimators_: indices.extend( np.where(_multiview_predict_binary(e, X, - example_indices, + sample_indices, view_indices) > thresh)[ 0]) indptr.append(len(indices)) @@ -221,7 +220,7 @@ def _multiview_fit_ovo_binary(estimator, X, y, i, j, train_indices, y_binary = np.empty(y.shape, np.int) y_binary[y == i] = 0 y_binary[y == j] = 1 - indcond = np.arange(X.get_nb_examples())[cond] + indcond = np.arange(X.get_nb_samples())[cond] train_indices = np.intersect1d(train_indices, indcond) return _multiview_fit_binary(estimator, X, @@ -248,9 +247,9 @@ class MultiviewOVOWrapper(MultiviewWrapper, OneVsOneClassifier): """ # X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']) # check_classification_targets(y) - train_indices, view_indices = get_examples_views_indices(X, - train_indices, - view_indices) + train_indices, view_indices = get_samples_views_indices(X, + train_indices, + view_indices) self.classes_ = np.unique(y) if len(self.classes_) == 1: raise ValueError("OneVsOneClassifier can not be fit when only one" @@ -270,7 +269,7 @@ class MultiviewOVOWrapper(MultiviewWrapper, OneVsOneClassifier): return self - def predict(self, X, example_indices=None, view_indices=None): + def predict(self, X, sample_indices=None, view_indices=None): """Estimate the best class label for each sample in X. This is implemented as ``argmax(decision_function(X), axis=1)`` which @@ -287,16 +286,17 @@ class MultiviewOVOWrapper(MultiviewWrapper, OneVsOneClassifier): y : numpy array of shape [n_samples] Predicted multi-class targets. """ - example_indices, view_indices = get_examples_views_indices(X, - example_indices, - view_indices) - Y = self.multiview_decision_function(X, example_indices=example_indices, + sample_indices, view_indices = get_samples_views_indices(X, + sample_indices, + view_indices) + Y = self.multiview_decision_function(X, sample_indices=sample_indices, view_indices=view_indices) if self.n_classes_ == 2: return self.classes_[(Y > 0).astype(np.int)] return self.classes_[Y.argmax(axis=1)] - def multiview_decision_function(self, X, example_indices, view_indices): # pragma: no cover + def multiview_decision_function(self, X, sample_indices, + view_indices): # pragma: no cover # check_is_fitted(self) indices = self.pairwise_indices_ @@ -306,7 +306,7 @@ class MultiviewOVOWrapper(MultiviewWrapper, OneVsOneClassifier): # TODO Gram matrix compatibility Xs = [X[:, idx] for idx in indices] predictions = np.vstack( - [est.predict(Xi, example_indices=example_indices, + [est.predict(Xi, sample_indices=sample_indices, view_indices=view_indices) for est, Xi in zip(self.estimators_, Xs)]).T confidences = np.ones(predictions.shape) diff --git a/summit/multiview_platform/utils/organization.py b/summit/multiview_platform/utils/organization.py index 1fdc0ecf..ca859bf5 100644 --- a/summit/multiview_platform/utils/organization.py +++ b/summit/multiview_platform/utils/organization.py @@ -1,8 +1,8 @@ -import os import errno +import os -def secure_file_path(file_name): # pragma: no cover +def secure_file_path(file_name): # pragma: no cover if not os.path.exists(os.path.dirname(file_name)): try: os.makedirs(os.path.dirname(file_name)) diff --git a/summit/tests/test_config_hps.yml b/summit/tests/test_config_hps.yml index e651eee7..246b9fea 100644 --- a/summit/tests/test_config_hps.yml +++ b/summit/tests/test_config_hps.yml @@ -31,7 +31,7 @@ track_tracebacks: True # All the classification-realted configuration options -# The ratio of test examples/number of train examples +# The ratio of test samples/number of train samples split: 0.25 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 2 diff --git a/summit/tests/test_config_iter.yml b/summit/tests/test_config_iter.yml index aed7d6e7..a48f967a 100644 --- a/summit/tests/test_config_iter.yml +++ b/summit/tests/test_config_iter.yml @@ -31,7 +31,7 @@ track_tracebacks: True # All the classification-realted configuration options -# The ratio of test examples/number of train examples +# The ratio of test samples/number of train samples split: 0.25 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 2 diff --git a/summit/tests/test_config_simple.yml b/summit/tests/test_config_simple.yml index bda78efa..c18a21a0 100644 --- a/summit/tests/test_config_simple.yml +++ b/summit/tests/test_config_simple.yml @@ -31,7 +31,7 @@ track_tracebacks: True # All the classification-realted configuration options -# The ratio of test examples/number of train examples +# The ratio of test samples/number of train samples split: 0.25 # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed nb_folds: 2 diff --git a/summit/tests/test_exec_classif.py b/summit/tests/test_exec_classif.py index b1b33787..1696d720 100644 --- a/summit/tests/test_exec_classif.py +++ b/summit/tests/test_exec_classif.py @@ -38,39 +38,47 @@ class Test_gen_single_monoview_arg_dictionary(unittest.TestCase): "view_name", "hps_kwargs") self.assertEqual(conf, {"classifier_name1": {}, - "view_name": "view_name", - "view_index": "view_index", - "classifier_name": "classifier_name1", - "nb_class": "nb_class", - "hps_kwargs":"hps_kwargs" } ) + "view_name": "view_name", + "view_index": "view_index", + "classifier_name": "classifier_name1", + "nb_class": "nb_class", + "hps_kwargs": "hps_kwargs"}) + class Test_initBenchmark(unittest.TestCase): def test_benchmark_wanted(self): - benchmark_output = exec_classif.init_benchmark(cl_type=["monoview", "multiview"], monoview_algos=["decision_tree"], multiview_algos=["weighted_linear_late_fusion"]) - self.assertEqual(benchmark_output , {'monoview': ['decision_tree'], 'multiview': ['weighted_linear_late_fusion']}) + benchmark_output = exec_classif.init_benchmark( + cl_type=[ + "monoview", + "multiview"], + monoview_algos=["decision_tree"], + multiview_algos=["weighted_linear_late_fusion"]) + self.assertEqual(benchmark_output, + {'monoview': ['decision_tree'], + 'multiview': ['weighted_linear_late_fusion']}) benchmark_output = exec_classif.init_benchmark( cl_type=["monoview", "multiview"], monoview_algos=["all"], multiview_algos=["all"]) self.assertEqual(benchmark_output, {'monoview': ['adaboost', - 'decision_tree', - 'gradient_boosting', - 'knn', - 'lasso', - 'random_forest', - 'sgd', - 'svm_linear', - 'svm_poly', - 'svm_rbf'], - 'multiview': ['bayesian_inference_fusion', - 'difficulty_fusion', - 'disagree_fusion', - 'double_fault_fusion', - 'entropy_fusion', - 'majority_voting_fusion', - 'svm_jumbo_fusion', - 'weighted_linear_early_fusion', - 'weighted_linear_late_fusion']}) + 'decision_tree', + 'gradient_boosting', + 'knn', + 'lasso', + 'random_forest', + 'sgd', + 'svm_linear', + 'svm_poly', + 'svm_rbf'], + 'multiview': ['bayesian_inference_fusion', + 'difficulty_fusion', + 'disagree_fusion', + 'double_fault_fusion', + 'entropy_fusion', + 'majority_voting_fusion', + 'svm_jumbo_fusion', + 'weighted_linear_early_fusion', + 'weighted_linear_late_fusion']}) class Test_Functs(unittest.TestCase): @@ -89,40 +97,50 @@ class Test_Functs(unittest.TestCase): self.assertEqual(args, {"monoview": {}, "multiview": {}}) def test_init_kwargs(self): - kwargs = exec_classif.init_kwargs({"decision_tree":""},["decision_tree"]) - self.assertEqual(kwargs, {"decision_tree":""}) + kwargs = exec_classif.init_kwargs( + {"decision_tree": ""}, ["decision_tree"]) + self.assertEqual(kwargs, {"decision_tree": ""}) kwargs = exec_classif.init_kwargs({"weighted_linear_late_fusion": ""}, ["weighted_linear_late_fusion"], framework="multiview") self.assertEqual(kwargs, {"weighted_linear_late_fusion": ""}) kwargs = exec_classif.init_kwargs({}, ["decision_tree"],) - self.assertEqual(kwargs, {"decision_tree":{}}) - self.assertRaises(AttributeError, exec_classif.init_kwargs, {}, ["test"]) + self.assertEqual(kwargs, {"decision_tree": {}}) + self.assertRaises( + AttributeError, + exec_classif.init_kwargs, + {}, + ["test"]) def test_arange_metrics(self): - metrics = exec_classif.arange_metrics({"accuracy_score":{}}, "accuracy_score") - self.assertEqual(metrics, {"accuracy_score*":{}}) - self.assertRaises(ValueError, exec_classif.arange_metrics, {"test1":{}}, "test") + metrics = exec_classif.arange_metrics( + {"accuracy_score": {}}, "accuracy_score") + self.assertEqual(metrics, {"accuracy_score*": {}}) + self.assertRaises( + ValueError, exec_classif.arange_metrics, { + "test1": {}}, "test") def test_banchmark_init(self): from sklearn.model_selection import StratifiedKFold folds = StratifiedKFold(n_splits=2) res, lab_names = exec_classif.benchmark_init(directory=tmp_path, - classification_indices=[np.array([0,1,2,3]), np.array([4])], + classification_indices=[ + np.array([0, 1, 2, 3]), np.array([4])], labels=test_dataset.get_labels(), - labels_dictionary={"yes":0, "no":1}, + labels_dictionary={ + "yes": 0, "no": 1}, k_folds=folds, dataset_var=test_dataset) self.assertEqual(res, []) self.assertEqual(lab_names, [0, 1]) - - class Test_InitArgumentDictionaries(unittest.TestCase): @classmethod def setUpClass(cls): rm_tmp() - cls.benchmark = {"monoview": ["fake_monoview_classifier"], "multiview": {}} + cls.benchmark = { + "monoview": ["fake_monoview_classifier"], + "multiview": {}} cls.views_dictionnary = {'test_view_0': 0, 'test_view': 1} cls.nb_class = 2 cls.monoview_classifier_name = "fake_monoview_classifier" @@ -132,13 +150,13 @@ class Test_InitArgumentDictionaries(unittest.TestCase): cls.multiview_classifier_arg_name = "fake_arg_mv" cls.multiview_classifier_arg_value = "fake_value_2" cls.init_kwargs = { - 'monoview':{ + 'monoview': { cls.monoview_classifier_name: - {cls.monoview_classifier_arg_name:cls.monoview_classifier_arg_value} + {cls.monoview_classifier_arg_name: cls.monoview_classifier_arg_value} }, - "multiview":{ - cls.multiview_classifier_name:{ - cls.multiview_classifier_arg_name:cls.multiview_classifier_arg_value} + "multiview": { + cls.multiview_classifier_name: { + cls.multiview_classifier_arg_name: cls.multiview_classifier_arg_value} } } @@ -149,21 +167,21 @@ class Test_InitArgumentDictionaries(unittest.TestCase): self.init_kwargs, "None", {}) expected_output = [{ - self.monoview_classifier_name: { - self.monoview_classifier_arg_name:self.monoview_classifier_arg_value}, - "view_name": "test_view_0", - 'hps_kwargs': {}, - "classifier_name": self.monoview_classifier_name, - "nb_class": self.nb_class, - "view_index": 0}, - {self.monoview_classifier_name: { - self.monoview_classifier_arg_name: self.monoview_classifier_arg_value}, - "view_name": "test_view", - 'hps_kwargs': {}, - "classifier_name": self.monoview_classifier_name, - "nb_class": self.nb_class, - "view_index": 1}, - ] + self.monoview_classifier_name: { + self.monoview_classifier_arg_name: self.monoview_classifier_arg_value}, + "view_name": "test_view_0", + 'hps_kwargs': {}, + "classifier_name": self.monoview_classifier_name, + "nb_class": self.nb_class, + "view_index": 0}, + {self.monoview_classifier_name: { + self.monoview_classifier_arg_name: self.monoview_classifier_arg_value}, + "view_name": "test_view", + 'hps_kwargs': {}, + "classifier_name": self.monoview_classifier_name, + "nb_class": self.nb_class, + "view_index": 1}, + ] self.assertEqual(arguments["monoview"], expected_output) def test_init_argument_dictionaries_multiview(self): @@ -175,21 +193,21 @@ class Test_InitArgumentDictionaries(unittest.TestCase): self.init_kwargs, "None", {}) expected_output = [{ - "classifier_name": self.multiview_classifier_name, - "view_indices": [0,1], - "view_names": ["test_view_0", "test_view"], - "nb_class": self.nb_class, - 'hps_kwargs': {}, - "labels_names":None, - self.multiview_classifier_name: { - self.multiview_classifier_arg_name: - self.multiview_classifier_arg_value}, - },] + "classifier_name": self.multiview_classifier_name, + "view_indices": [0, 1], + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + 'hps_kwargs': {}, + "labels_names": None, + self.multiview_classifier_name: { + self.multiview_classifier_arg_name: + self.multiview_classifier_arg_value}, + }, ] self.assertEqual(arguments["multiview"][0], expected_output[0]) - def test_init_argument_dictionaries_multiview_complex(self): - self.multiview_classifier_arg_value = {"fake_value_2":"plif", "plaf":"plouf"} + self.multiview_classifier_arg_value = { + "fake_value_2": "plif", "plaf": "plouf"} self.init_kwargs = { 'monoview': { self.monoview_classifier_name: @@ -209,15 +227,15 @@ class Test_InitArgumentDictionaries(unittest.TestCase): self.init_kwargs, "None", {}) expected_output = [{ - "classifier_name": self.multiview_classifier_name, - "view_indices": [0,1], - 'hps_kwargs': {}, - "view_names": ["test_view_0", "test_view"], - "nb_class": self.nb_class, - "labels_names":None, - self.multiview_classifier_name: { - self.multiview_classifier_arg_name: - self.multiview_classifier_arg_value}, + "classifier_name": self.multiview_classifier_name, + "view_indices": [0, 1], + 'hps_kwargs': {}, + "view_names": ["test_view_0", "test_view"], + "nb_class": self.nb_class, + "labels_names": None, + self.multiview_classifier_name: { + self.multiview_classifier_arg_name: + self.multiview_classifier_arg_value}, }] self.assertEqual(arguments["multiview"][0], expected_output[0]) @@ -230,22 +248,25 @@ def fakeBenchmarkExec_mutlicore(nb_cores=-1, a=6, args=1): return [nb_cores, a] -def fakeBenchmarkExec_monocore(dataset_var=1, a=4, args=1, track_tracebacks=False): +def fakeBenchmarkExec_monocore( + dataset_var=1, a=4, args=1, track_tracebacks=False): return [a] def fakegetResults(results, stats_iter, benchmark_arguments_dictionaries, metrics, directory, - example_ids, labels): + sample_ids, labels): return 3 def fakeDelete(a, b, c): return 9 -def fake_analyze(a, b, c, d, example_ids=None, labels=None): + +def fake_analyze(a, b, c, d, sample_ids=None, labels=None): pass + class Test_execBenchmark(unittest.TestCase): @classmethod @@ -256,8 +277,8 @@ class Test_execBenchmark(unittest.TestCase): cls.Dataset = test_dataset cls.argument_dictionaries = [{"a": 4, "args": {}}] cls.args = { - "Base":{"name": "chicken_is_heaven", "type": "type", "pathf": "pathF"}, - "Classification":{"hps_iter": 1}} + "Base": {"name": "chicken_is_heaven", "type": "type", "pathf": "pathF"}, + "Classification": {"hps_iter": 1}} def test_simple(cls): res = exec_classif.exec_benchmark(nb_cores=1, @@ -277,7 +298,7 @@ class Test_execBenchmark(unittest.TestCase): def test_multiclass_no_iter(cls): cls.argument_dictionaries = [{"a": 10, "args": cls.args}, - {"a": 4, "args": cls.args}] + {"a": 4, "args": cls.args}] res = exec_classif.exec_benchmark(nb_cores=1, stats_iter=1, benchmark_arguments_dictionaries=cls.argument_dictionaries, @@ -295,9 +316,9 @@ class Test_execBenchmark(unittest.TestCase): def test_multiclass_and_iter(cls): cls.argument_dictionaries = [{"a": 10, "args": cls.args}, - {"a": 4, "args": cls.args}, - {"a": 55, "args": cls.args}, - {"a": 24, "args": cls.args}] + {"a": 4, "args": cls.args}, + {"a": 55, "args": cls.args}, + {"a": 24, "args": cls.args}] res = exec_classif.exec_benchmark(nb_cores=1, stats_iter=2, benchmark_arguments_dictionaries=cls.argument_dictionaries, @@ -333,6 +354,7 @@ class Test_execBenchmark(unittest.TestCase): def tearDownClass(cls): rm_tmp() + def fakeExecMono(directory, name, labels_names, classification_indices, k_folds, coreIndex, type, pathF, random_state, labels, hyper_param_search="try", metrics="try", n_iter=1, **arguments): @@ -367,15 +389,15 @@ class Test_set_element(unittest.TestCase): @classmethod def setUpClass(cls): cls.dictionary = {"a": - {"b":{ - "c":{ - "d":{ - "e":1, - "f":[1] - } + {"b": { + "c": { + "d": { + "e": 1, + "f": [1] } - }}} - cls.elements = {"a.b.c.d.e":1, "a.b.c.d.f":[1]} + } + }}} + cls.elements = {"a.b.c.d.e": 1, "a.b.c.d.f": [1]} @classmethod def tearDownClass(cls): @@ -384,7 +406,8 @@ class Test_set_element(unittest.TestCase): def test_simple(self): simplified_dict = {} for path, value in self.elements.items(): - simplified_dict = exec_classif.set_element(simplified_dict, path, value) + simplified_dict = exec_classif.set_element( + simplified_dict, path, value) self.assertEqual(simplified_dict, self.dictionary) @@ -393,14 +416,14 @@ class Test_get_path_dict(unittest.TestCase): @classmethod def setUpClass(cls): cls.dictionary = {"a": - {"b":{ - "c":{ - "d":{ - "e":1, - "f":[1] - } + {"b": { + "c": { + "d": { + "e": 1, + "f": [1] } - }}} + } + }}} @classmethod def tearDownClass(cls): @@ -408,9 +431,8 @@ class Test_get_path_dict(unittest.TestCase): def test_simple(self): path_dict = exec_classif.get_path_dict(self.dictionary) - self.assertEqual(path_dict, {"a.b.c.d.e":1, "a.b.c.d.f":[1]}) - + self.assertEqual(path_dict, {"a.b.c.d.e": 1, "a.b.c.d.f": [1]}) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/summit/tests/test_metrics/test_metrics.py b/summit/tests/test_metrics/test_metrics.py index fa76d63b..8b42273a 100644 --- a/summit/tests/test_metrics/test_metrics.py +++ b/summit/tests/test_metrics/test_metrics.py @@ -5,25 +5,23 @@ import os from sklearn.metrics._scorer import _BaseScorer # Tester que chaque metrique a bien les bonnes fonctions qui renvoient bien les bons types d'outputs avec les bons types d'inputs -# Faire de meme pour les differents classifeurs monovues et les differents classifeurs multivues +# Faire de meme pour les differents classifeurs monovues et les differents +# classifeurs multivues class Test_metric(unittest.TestCase): @classmethod def setUpClass(cls): - cls.test="a" - + cls.test = "a" def test_simple(self): pkgpath = os.path.dirname(metrics.__file__) for _, metric, _ in pkgutil.iter_modules([pkgpath]): module = getattr(metrics, metric) self.assertTrue(hasattr(module, "score")) - self.assertTrue(isinstance(module.score([1,0],[1,0]), float)) + self.assertTrue(isinstance(module.score([1, 0], [1, 0]), float)) self.assertTrue(hasattr(module, "get_scorer")) self.assertTrue(isinstance(module.get_scorer(), _BaseScorer)) self.assertTrue(hasattr(module, "get_config")) self.assertTrue(isinstance(module.get_config(), str)) - - diff --git a/summit/tests/test_mono_view/test_exec_classif_mono_view.py b/summit/tests/test_mono_view/test_exec_classif_mono_view.py index 41aff9e2..6388f630 100644 --- a/summit/tests/test_mono_view/test_exec_classif_mono_view.py +++ b/summit/tests/test_mono_view/test_exec_classif_mono_view.py @@ -17,9 +17,9 @@ class Test_initConstants(unittest.TestCase): def setUpClass(cls): rm_tmp() os.mkdir(tmp_path) - cls.view_name="test_dataset" + cls.view_name = "test_dataset" cls.datasetFile = h5py.File( - tmp_path+"test.hdf5", "w") + tmp_path + "test.hdf5", "w") cls.random_state = np.random.RandomState(42) cls.args = {"classifier_name": "test_clf"} cls.X_value = cls.random_state.randint(0, 500, (10, 20)) @@ -27,29 +27,29 @@ class Test_initConstants(unittest.TestCase): cls.X.attrs["name"] = "test_dataset" cls.X.attrs["sparse"] = False cls.classification_indices = [np.array([0, 2, 4, 6, 8]), - np.array([1, 3, 5, 7, 9]), - np.array([1, 3, 5, 7, 9])] + np.array([1, 3, 5, 7, 9]), + np.array([1, 3, 5, 7, 9])] cls.labels_names = ["test_true", "test_false"] cls.name = "test" cls.directory = os.path.join(tmp_path, "test_dir/") def test_simple(cls): kwargs, \ - t_start, \ - feat, \ - CL_type, \ - X, \ - learningRate, \ - labelsString, \ - output_file_name,\ - directory,\ - base_file_name = exec_classif_mono_view.init_constants(cls.args, - cls.X, - cls.classification_indices, - cls.labels_names, - cls.name, - cls.directory, - cls.view_name) + t_start, \ + feat, \ + CL_type, \ + X, \ + learningRate, \ + labelsString, \ + output_file_name,\ + directory,\ + base_file_name = exec_classif_mono_view.init_constants(cls.args, + cls.X, + cls.classification_indices, + cls.labels_names, + cls.name, + cls.directory, + cls.view_name) cls.assertEqual(kwargs, cls.args) cls.assertEqual(feat, "test_dataset") cls.assertEqual(CL_type, "test_clf") @@ -60,11 +60,11 @@ class Test_initConstants(unittest.TestCase): @classmethod def tearDownClass(cls): - os.remove(tmp_path+"test.hdf5") + os.remove(tmp_path + "test.hdf5") os.rmdir( - tmp_path+"test_dir/test_clf/test_dataset") - os.rmdir(tmp_path+"test_dir/test_clf") - os.rmdir(tmp_path+"test_dir") + tmp_path + "test_dir/test_clf/test_dataset") + os.rmdir(tmp_path + "test_dir/test_clf") + os.rmdir(tmp_path + "test_dir") os.rmdir(tmp_path) @@ -77,8 +77,8 @@ class Test_initTrainTest(unittest.TestCase): cls.X = cls.random_state.randint(0, 500, (10, 5)) cls.Y = cls.random_state.randint(0, 2, 10) cls.classification_indices = [np.array([0, 2, 4, 6, 8]), - np.array([1, 3, 5, 7, 9]), - ] + np.array([1, 3, 5, 7, 9]), + ] def test_simple(cls): X_train, y_train, X_test, y_test = exec_classif_mono_view.init_train_test( @@ -110,15 +110,18 @@ class Test_getHPs(unittest.TestCase): cls.hyper_param_search = "Random" cls.classifier_name = "decision_tree" cls.random_state = np.random.RandomState(42) - cls.X = cls.random_state.randint(0,10,size=(10,5)) - cls.y = cls.random_state.randint(0,2,size=10) + cls.X = cls.random_state.randint(0, 10, size=(10, 5)) + cls.y = cls.random_state.randint(0, 2, size=10) cls.output_file_name = tmp_path - cls.cv = StratifiedKFold(n_splits=2, random_state=cls.random_state, shuffle=True) + cls.cv = StratifiedKFold( + n_splits=2, + random_state=cls.random_state, + shuffle=True) cls.nb_cores = 1 cls.metrics = {"accuracy_score*": {}} - cls.kwargs = {"decision_tree" : {"max_depth": 1, - "criterion": "gini", - "splitter": "best"}} + cls.kwargs = {"decision_tree": {"max_depth": 1, + "criterion": "gini", + "splitter": "best"}} cls.classifier_class_name = "DecisionTree" cls.hps_kwargs = {"n_iter": 2} @@ -143,6 +146,7 @@ class Test_getHPs(unittest.TestCase): self.metrics, self.kwargs, **self.hps_kwargs) + def test_simple_config(self): kwargs = exec_classif_mono_view.get_hyper_params(self.classifierModule, "None", @@ -168,7 +172,8 @@ class Test_exec_monoview(unittest.TestCase): test_dataset.get_labels(), "test dataset", ["yes", "no"], - [np.array([0,1,2,4]), np.array([4])], + [np.array( + [0, 1, 2, 4]), np.array([4])], StratifiedKFold(n_splits=2), 1, "", @@ -176,9 +181,9 @@ class Test_exec_monoview(unittest.TestCase): np.random.RandomState(42), "Random", n_iter=2, - **{"classifier_name":"decision_tree", - "view_index":0, - "decision_tree":{}}) + **{"classifier_name": "decision_tree", + "view_index": 0, + "decision_tree": {}}) rm_tmp() # class Test_getKWARGS(unittest.TestCase): diff --git a/summit/tests/test_mono_view/test_monoview_utils.py b/summit/tests/test_mono_view/test_monoview_utils.py index 9c62d006..a26df470 100644 --- a/summit/tests/test_mono_view/test_monoview_utils.py +++ b/summit/tests/test_mono_view/test_monoview_utils.py @@ -7,6 +7,7 @@ from sklearn.tree import DecisionTreeClassifier from summit.multiview_platform.monoview import monoview_utils from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint + class TestFunctions(unittest.TestCase): def test_gen_test_folds_preds(self): @@ -24,20 +25,22 @@ class TestFunctions(unittest.TestCase): self.estimator) self.assertEqual(testFoldsPreds.shape, (3, 10)) np.testing.assert_array_equal(testFoldsPreds[0], np.array( - [ 1, 1, -1, -1, 1, 1, -1, 1, -1, 1])) + [1, 1, -1, -1, 1, 1, -1, 1, -1, 1])) def test_change_label_to_minus(self): - lab = monoview_utils.change_label_to_minus(np.array([0,1,0])) - np.testing.assert_array_equal(lab, np.array([-1,1,-1])) + lab = monoview_utils.change_label_to_minus(np.array([0, 1, 0])) + np.testing.assert_array_equal(lab, np.array([-1, 1, -1])) def test_change_label_to_zero(self): - lab = monoview_utils.change_label_to_zero(np.array([-1,1,-1])) - np.testing.assert_array_equal(lab, np.array([0,1,0])) + lab = monoview_utils.change_label_to_zero(np.array([-1, 1, -1])) + np.testing.assert_array_equal(lab, np.array([0, 1, 0])) def test_compute_possible_combinations(self): - n_possib = monoview_utils.compute_possible_combinations({"a":[1, 2], "b":{"c":[2,3]}, "d":CustomRandint(0,10)}) + n_possib = monoview_utils.compute_possible_combinations( + {"a": [1, 2], "b": {"c": [2, 3]}, "d": CustomRandint(0, 10)}) np.testing.assert_array_equal(n_possib, np.array([2, np.inf, 10])) + class FakeClf(monoview_utils.BaseMonoviewClassifier): def __init__(self): diff --git a/summit/tests/test_monoview_classifiers/test_compatibility.py b/summit/tests/test_monoview_classifiers/test_compatibility.py index c29baab8..e821e822 100644 --- a/summit/tests/test_monoview_classifiers/test_compatibility.py +++ b/summit/tests/test_monoview_classifiers/test_compatibility.py @@ -2,7 +2,8 @@ # import unittest # -# Actuellement problématique a cause de la pep8isation du code. A voir plus tard +# Actuellement problématique a cause de la pep8isation du code. A voir +# plus tard # import numpy as np diff --git a/summit/tests/test_multi_view/test_exec_multiview.py b/summit/tests/test_multi_view/test_exec_multiview.py index 8c104001..eec5aa5e 100644 --- a/summit/tests/test_multi_view/test_exec_multiview.py +++ b/summit/tests/test_multi_view/test_exec_multiview.py @@ -23,46 +23,46 @@ class Test_init_constants(unittest.TestCase): def test_simple(self): classifier_name, t_start, views_indices, \ - classifier_config, views, learning_rate, labels, output_file_name, \ - directory, base_file_name, metrics = exec_multiview.init_constants( - kwargs={"view_names":["ViewN0", "ViewN2", "ViewN1", ], - "view_indices": [0,2,1], - "classifier_name":"test_clf", - "test_clf":{}}, - classification_indices=[np.array([0,1,4,2]), np.array([3])], - metrics={"accuracy_score*":{}}, - name="test_dataset", - nb_cores=1, - k_folds=StratifiedKFold(n_splits=2), - dataset_var=test_dataset, - directory=tmp_path - ) + classifier_config, views, learning_rate, labels, output_file_name, \ + directory, base_file_name, metrics = exec_multiview.init_constants( + kwargs={"view_names": ["ViewN0", "ViewN2", "ViewN1", ], + "view_indices": [0, 2, 1], + "classifier_name": "test_clf", + "test_clf": {}}, + classification_indices=[np.array([0, 1, 4, 2]), np.array([3])], + metrics={"accuracy_score*": {}}, + name="test_dataset", + nb_cores=1, + k_folds=StratifiedKFold(n_splits=2), + dataset_var=test_dataset, + directory=tmp_path + ) self.assertEqual(classifier_name, "test_clf") - self.assertEqual(views_indices, [0,2,1]) + self.assertEqual(views_indices, [0, 2, 1]) self.assertEqual(classifier_config, {}) self.assertEqual(views, ["ViewN0", "ViewN2", "ViewN1", ]) - self.assertEqual(learning_rate, 4/5) + self.assertEqual(learning_rate, 4 / 5) def test_exec_multiview_no_hps(self): res = exec_multiview.exec_multiview( directory=tmp_path, dataset_var=test_dataset, name="test_dataset", - classification_indices=[np.array([0,1,4,2]), np.array([3])], + classification_indices=[np.array([0, 1, 4, 2]), np.array([3])], k_folds=StratifiedKFold(n_splits=2), nb_cores=1, database_type="", path="", - labels_dictionary={0:"yes", 1:"no"}, + labels_dictionary={0: "yes", 1: "no"}, random_state=np.random.RandomState(42), labels=test_dataset.get_labels(), hps_method="None", hps_kwargs={}, metrics=None, n_iter=30, - **{"view_names":["ViewN0", "ViewN2", "ViewN1", ], - "view_indices": [0,2,1], - "classifier_name":"weighted_linear_early_fusion", - "weighted_linear_early_fusion":{}} + **{"view_names": ["ViewN0", "ViewN2", "ViewN1", ], + "view_indices": [0, 2, 1], + "classifier_name": "weighted_linear_early_fusion", + "weighted_linear_early_fusion": {}} ) def test_exec_multiview(self): @@ -70,21 +70,22 @@ class Test_init_constants(unittest.TestCase): directory=tmp_path, dataset_var=test_dataset, name="test_dataset", - classification_indices=[np.array([0,1,4,2]), np.array([3])], + classification_indices=[np.array([0, 1, 4, 2]), np.array([3])], k_folds=StratifiedKFold(n_splits=2), nb_cores=1, database_type="", path="", - labels_dictionary={0:"yes", 1:"no"}, + labels_dictionary={0: "yes", 1: "no"}, random_state=np.random.RandomState(42), labels=test_dataset.get_labels(), hps_method="Grid", hps_kwargs={"param_grid": - {"monoview_classifier_config":[{"max_depth":3}, {"max_depth":1}]}, - }, + {"monoview_classifier_config": [ + {"max_depth": 3}, {"max_depth": 1}]}, + }, metrics=None, n_iter=30, - **{"view_names":["ViewN0", "ViewN2", "ViewN1", ], - "view_indices": [0,2,1], - "classifier_name":"weighted_linear_early_fusion", - "weighted_linear_early_fusion":{}} - ) \ No newline at end of file + **{"view_names": ["ViewN0", "ViewN2", "ViewN1", ], + "view_indices": [0, 2, 1], + "classifier_name": "weighted_linear_early_fusion", + "weighted_linear_early_fusion": {}} + ) diff --git a/summit/tests/test_multi_view/test_multiview_utils.py b/summit/tests/test_multi_view/test_multiview_utils.py index 54dc6f8b..aae8a743 100644 --- a/summit/tests/test_multi_view/test_multiview_utils.py +++ b/summit/tests/test_multi_view/test_multiview_utils.py @@ -13,7 +13,7 @@ from summit.multiview_platform.multiview import multiview_utils class FakeMVClassif(multiview_utils.BaseMultiviewClassifier): def __init__(self, mc=True): - self.mc=mc + self.mc = mc pass def fit(self, X, y): @@ -23,7 +23,6 @@ class FakeMVClassif(multiview_utils.BaseMultiviewClassifier): pass - class TestBaseMultiviewClassifier(unittest.TestCase): @classmethod @@ -40,7 +39,9 @@ class TestBaseMultiviewClassifier(unittest.TestCase): self.assertEqual(accepts, True) accepts = FakeMVClassif(mc=False).accepts_multi_class(rs) self.assertEqual(accepts, False) - self.assertRaises(ValueError, FakeMVClassif(mc=False).accepts_multi_class, rs,**{"n_samples":2, "n_classes":3}) + self.assertRaises(ValueError, FakeMVClassif( + mc=False).accepts_multi_class, rs, **{"n_samples": 2, "n_classes": 3}) + class TestConfigGenerator(unittest.TestCase): @@ -49,11 +50,13 @@ class TestConfigGenerator(unittest.TestCase): cls.rs = np.random.RandomState(42) def test_simple(self): - cfg_gen = multiview_utils.ConfigGenerator(["decision_tree", "decision_tree"]) + cfg_gen = multiview_utils.ConfigGenerator( + ["decision_tree", "decision_tree"]) sample = cfg_gen.rvs(self.rs) self.assertEqual(sample, {'decision_tree': {'criterion': 'entropy', - 'max_depth': 103, - 'splitter': 'best'}}) + 'max_depth': 103, + 'splitter': 'best'}}) + class TestFunctions(unittest.TestCase): @@ -78,7 +81,8 @@ class TestFunctions(unittest.TestCase): 'svm_linear', 'svm_poly', 'svm_rbf']) - avail = multiview_utils.get_available_monoview_classifiers(need_probas=True) + avail = multiview_utils.get_available_monoview_classifiers( + need_probas=True) self.assertEqual(avail, ['adaboost', 'decision_tree', 'gradient_boosting', diff --git a/summit/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py b/summit/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py index 26ecbe1b..ac499d53 100644 --- a/summit/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py +++ b/summit/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py @@ -1,7 +1,7 @@ import unittest import numpy as np -import summit.multiview_platform.multiview_classifiers.additions.diversity_utils as du +import summit.multiview_platform.multiview_classifiers.additions.diversity_utils as du class FakeDataset(): @@ -12,11 +12,11 @@ class FakeDataset(): self.views = views self.labels = labels - def get_v(self, view_index, example_indices): - return self.views[view_index, example_indices] + def get_v(self, view_index, sample_indices): + return self.views[view_index, sample_indices] - def get_nb_class(self, example_indices): - return np.unique(self.labels[example_indices]) + def get_nb_class(self, sample_indices): + return np.unique(self.labels[sample_indices]) class FakeDivCoupleClf(du.CoupleDiversityFusionClassifier): @@ -30,7 +30,7 @@ class FakeDivCoupleClf(du.CoupleDiversityFusionClassifier): self.rs = rs def diversity_measure(self, a, b, c): - return self.rs.randint(0,100) + return self.rs.randint(0, 100) class FakeDivGlobalClf(du.GlobalDiversityFusionClassifier): @@ -44,23 +44,24 @@ class FakeDivGlobalClf(du.GlobalDiversityFusionClassifier): self.rs = rs def diversity_measure(self, a, b, c): - return self.rs.randint(0,100) + return self.rs.randint(0, 100) + class Test_DiversityFusion(unittest.TestCase): @classmethod def setUpClass(cls): cls.classifier_names = ["adaboost", "decision_tree"] - cls.classifiers_config = {"adaboost":{"n_estimators":5,}} + cls.classifiers_config = {"adaboost": {"n_estimators": 5, }} cls.random_state = np.random.RandomState(42) - cls.y = cls.random_state.randint(0,2,6) - cls.X = FakeDataset(cls.random_state.randint(0,100,(2,5,6)), cls.y) - cls.train_indices = [0,1,2,4] - cls.views_indices = [0,1] + cls.y = cls.random_state.randint(0, 2, 6) + cls.X = FakeDataset(cls.random_state.randint(0, 100, (2, 5, 6)), cls.y) + cls.train_indices = [0, 1, 2, 4] + cls.views_indices = [0, 1] def test_simple_couple(self): clf = FakeDivCoupleClf(self.random_state, classifier_names=self.classifier_names, - classifiers_config=self.classifiers_config) + classifiers_config=self.classifiers_config) clf.fit(self.X, self.y, self.train_indices, self.views_indices) def test_simple_global(self): @@ -71,4 +72,4 @@ class Test_DiversityFusion(unittest.TestCase): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/summit/tests/test_multiview_classifiers/test_additions/test_jumbo_fusion_utils.py b/summit/tests/test_multiview_classifiers/test_additions/test_jumbo_fusion_utils.py index 9ff28733..d78d9ec1 100644 --- a/summit/tests/test_multiview_classifiers/test_additions/test_jumbo_fusion_utils.py +++ b/summit/tests/test_multiview_classifiers/test_additions/test_jumbo_fusion_utils.py @@ -1,7 +1,7 @@ import unittest import numpy as np -import summit.multiview_platform.multiview_classifiers.additions.jumbo_fusion_utils as ju +import summit.multiview_platform.multiview_classifiers.additions.jumbo_fusion_utils as ju class FakeDataset(): @@ -12,11 +12,11 @@ class FakeDataset(): self.views = views self.labels = labels - def get_v(self, view_index, example_indices): - return self.views[view_index, example_indices] + def get_v(self, view_index, sample_indices): + return self.views[view_index, sample_indices] - def get_nb_class(self, example_indices): - return np.unique(self.labels[example_indices]) + def get_nb_class(self, sample_indices): + return np.unique(self.labels[sample_indices]) -#TODO \ No newline at end of file +# TODO diff --git a/summit/tests/test_multiview_classifiers/test_difficulty_fusion.py b/summit/tests/test_multiview_classifiers/test_difficulty_fusion.py index 8d3a44b5..e36c7816 100644 --- a/summit/tests/test_multiview_classifiers/test_difficulty_fusion.py +++ b/summit/tests/test_multiview_classifiers/test_difficulty_fusion.py @@ -9,8 +9,9 @@ class Test_difficulty_fusion(unittest.TestCase): @classmethod def setUpClass(cls): - cls.random_state=np.random.RandomState(42) - cls.classifiers_decisions = cls.random_state.randint(0, 2, size=(5, 3, 5)) + cls.random_state = np.random.RandomState(42) + cls.classifiers_decisions = cls.random_state.randint( + 0, 2, size=(5, 3, 5)) cls.combination = [1, 3, 4] cls.y = np.array([1, 1, 0, 0, 1]) cls.difficulty_fusion_clf = difficulty_fusion.DifficultyFusion() diff --git a/summit/tests/test_multiview_classifiers/test_disagree_fusion.py b/summit/tests/test_multiview_classifiers/test_disagree_fusion.py index c7e0f0bc..eec8f8bd 100644 --- a/summit/tests/test_multiview_classifiers/test_disagree_fusion.py +++ b/summit/tests/test_multiview_classifiers/test_disagree_fusion.py @@ -17,7 +17,7 @@ class Test_disagree(unittest.TestCase): def test_simple(cls): disagreement = cls.clf.diversity_measure(cls.monoview_decision_1, - cls.monoview_decision_2, - cls.ground_truth) + cls.monoview_decision_2, + cls.ground_truth) np.testing.assert_array_equal(disagreement, np.array([False, True, True, False])) diff --git a/summit/tests/test_multiview_classifiers/test_double_fault_fusion.py b/summit/tests/test_multiview_classifiers/test_double_fault_fusion.py index c6a96028..5ccdb84d 100644 --- a/summit/tests/test_multiview_classifiers/test_double_fault_fusion.py +++ b/summit/tests/test_multiview_classifiers/test_double_fault_fusion.py @@ -16,7 +16,7 @@ class Test_disagree(unittest.TestCase): def test_simple(cls): double_fault = cls.clf.diversity_measure(cls.monoview_decision_1, - cls.monoview_decision_2, - cls.ground_truth) + cls.monoview_decision_2, + cls.ground_truth) np.testing.assert_array_equal(double_fault, np.array([False, True, False, False, False, False, True, False])) diff --git a/summit/tests/test_multiview_classifiers/test_entropy_fusion.py b/summit/tests/test_multiview_classifiers/test_entropy_fusion.py index f3ff894b..56095201 100644 --- a/summit/tests/test_multiview_classifiers/test_entropy_fusion.py +++ b/summit/tests/test_multiview_classifiers/test_entropy_fusion.py @@ -9,8 +9,9 @@ class Test_difficulty_fusion(unittest.TestCase): @classmethod def setUpClass(cls): - cls.random_state=np.random.RandomState(42) - cls.classifiers_decisions = cls.random_state.randint(0, 2, size=(5, 3, 5)) + cls.random_state = np.random.RandomState(42) + cls.classifiers_decisions = cls.random_state.randint( + 0, 2, size=(5, 3, 5)) cls.combination = [1, 3, 4] cls.y = np.array([1, 1, 0, 0, 1]) cls.clf = entropy_fusion.EntropyFusion() diff --git a/summit/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py b/summit/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py index 9b6cda02..eec5485c 100644 --- a/summit/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py +++ b/summit/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py @@ -8,6 +8,7 @@ from summit.tests.utils import rm_tmp, tmp_path, test_dataset from summit.multiview_platform.multiview_classifiers import \ weighted_linear_early_fusion + class Test_WeightedLinearEarlyFusion(unittest.TestCase): @classmethod @@ -16,7 +17,8 @@ class Test_WeightedLinearEarlyFusion(unittest.TestCase): cls.random_state = np.random.RandomState(42) cls.view_weights = [0.5, 0.5] cls.monoview_classifier_name = "decision_tree" - cls.monoview_classifier_config = {"max_depth":1, "criterion": "gini", "splitter": "best"} + cls.monoview_classifier_config = { + "max_depth": 1, "criterion": "gini", "splitter": "best"} cls.classifier = weighted_linear_early_fusion.WeightedLinearEarlyFusion( random_state=cls.random_state, view_weights=cls.view_weights, monoview_classifier_name=cls.monoview_classifier_name, @@ -28,40 +30,51 @@ class Test_WeightedLinearEarlyFusion(unittest.TestCase): rm_tmp() def test_simple(self): - np.testing.assert_array_equal(self.view_weights, self.classifier.view_weights) + np.testing.assert_array_equal( + self.view_weights, self.classifier.view_weights) def test_fit(self): self.assertRaises(AttributeError, getattr, self.classifier.monoview_classifier, "classes_") - self.classifier.fit(self.dataset, test_dataset.get_labels(), None, None) + self.classifier.fit( + self.dataset, + test_dataset.get_labels(), + None, + None) np.testing.assert_array_equal(self.classifier.monoview_classifier.classes_, - np.array([0,1])) + np.array([0, 1])) def test_predict(self): - self.classifier.fit(self.dataset, test_dataset.get_labels(), None, None) + self.classifier.fit( + self.dataset, + test_dataset.get_labels(), + None, + None) predicted_labels = self.classifier.predict(self.dataset, None, None) - np.testing.assert_array_equal(predicted_labels, test_dataset.get_labels()) + np.testing.assert_array_equal( + predicted_labels, test_dataset.get_labels()) def test_transform_data_to_monoview_simple(self): - example_indices, X = self.classifier.transform_data_to_monoview(self.dataset, - None, None) - self.assertEqual(X.shape, (5,12)) - np.testing.assert_array_equal(X, np.concatenate((self.dataset.get_v(0), self.dataset.get_v(1)), axis=1)) - np.testing.assert_array_equal(example_indices, np.arange(5)) + sample_indices, X = self.classifier.transform_data_to_monoview(self.dataset, + None, None) + self.assertEqual(X.shape, (5, 12)) + np.testing.assert_array_equal(X, np.concatenate( + (self.dataset.get_v(0), self.dataset.get_v(1)), axis=1)) + np.testing.assert_array_equal(sample_indices, np.arange(5)) def test_transform_data_to_monoview_view_select(self): - example_indices, X = self.classifier.transform_data_to_monoview( + sample_indices, X = self.classifier.transform_data_to_monoview( self.dataset, None, np.array([0])) self.assertEqual(X.shape, (5, 6)) np.testing.assert_array_equal(X, self.dataset.get_v(0)) - np.testing.assert_array_equal(example_indices, np.arange(5)) + np.testing.assert_array_equal(sample_indices, np.arange(5)) - def test_transform_data_to_monoview_example_view_select(self): - example_indices, X = self.classifier.transform_data_to_monoview( + def test_transform_data_to_monoview_sample_view_select(self): + sample_indices, X = self.classifier.transform_data_to_monoview( self.dataset, - np.array([1,2,3]), np.array([0])) + np.array([1, 2, 3]), np.array([0])) self.assertEqual(X.shape, (3, 6)) - np.testing.assert_array_equal(X, self.dataset.get_v(0)[np.array([1,2,3]), :]) - np.testing.assert_array_equal(example_indices, np.array([1,2,3])) - + np.testing.assert_array_equal(X, self.dataset.get_v(0)[ + np.array([1, 2, 3]), :]) + np.testing.assert_array_equal(sample_indices, np.array([1, 2, 3])) diff --git a/summit/tests/test_result_analysis/test_duration_analysis.py b/summit/tests/test_result_analysis/test_duration_analysis.py index e4c22e61..faa84935 100644 --- a/summit/tests/test_result_analysis/test_duration_analysis.py +++ b/summit/tests/test_result_analysis/test_duration_analysis.py @@ -4,10 +4,11 @@ import pandas as pd from summit.multiview_platform.result_analysis import duration_analysis + class FakeClassifierResult: def __init__(self, i=0): - self.i=i + self.i = i if i == 0: self.hps_duration = 10 self.fit_duration = 12 @@ -17,7 +18,6 @@ class FakeClassifierResult: self.fit_duration = 2 self.pred_duration = 5 - def get_classifier_name(self): if self.i == 0: return 'test1' @@ -25,7 +25,6 @@ class FakeClassifierResult: return 'test2' - class Test_get_duration(unittest.TestCase): def test_simple(self): @@ -33,9 +32,8 @@ class Test_get_duration(unittest.TestCase): durs = duration_analysis.get_duration(results) pd.testing.assert_frame_equal(durs, pd.DataFrame(index=['test1', 'test2'], - columns=['hps', 'fit', 'pred'], - data=np.array([np.array([10,12,15]), - np.array([1,2,5])]), + columns=[ + 'hps', 'fit', 'pred'], + data=np.array([np.array([10, 12, 15]), + np.array([1, 2, 5])]), dtype=object)) - - diff --git a/summit/tests/test_result_analysis/test_error_analysis.py b/summit/tests/test_result_analysis/test_error_analysis.py index f5c67114..9d6d6c05 100644 --- a/summit/tests/test_result_analysis/test_error_analysis.py +++ b/summit/tests/test_result_analysis/test_error_analysis.py @@ -4,49 +4,51 @@ import numpy as np from summit.multiview_platform.monoview.monoview_utils import MonoviewResult from summit.multiview_platform.multiview.multiview_utils import MultiviewResult -from summit.multiview_platform.result_analysis.error_analysis import get_example_errors, gen_error_data, gen_error_data_glob +from summit.multiview_platform.result_analysis.error_analysis import get_sample_errors, gen_error_data, gen_error_data_glob -class Test_get_example_errors(unittest.TestCase): +class Test_get_sample_errors(unittest.TestCase): def test_simple(self): - ground_truth = np.array([0,1,0,1,0,1,0,1, -100]) + ground_truth = np.array([0, 1, 0, 1, 0, 1, 0, 1, -100]) results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75], "f1_score": [0.71, 0.76]}, - np.array([0,0,0,0,1,1,1,1,1]), - 0,0,0, {}), + np.array([0, 0, 0, 0, 1, 1, 1, 1, 1]), + 0, 0, 0, {}), MonoviewResult(0, "dt", "1", {"accuracy_score": [0.8, 0.85], - "f1_score": [0.81, 0.86]} - , np.array([0,0,1,1,0,0,1,1,0]), "", "", - "", "",0,0, {}) + "f1_score": [0.81, 0.86]}, np.array([0, 0, 1, 1, 0, 0, 1, 1, 0]), "", "", + "", "", 0, 0, {}) ] - example_errors = get_example_errors(ground_truth, - results) - self.assertIsInstance(example_errors, dict) - np.testing.assert_array_equal(example_errors["mv"], - np.array([1,0,1,0,0,1,0,1,-100])) - np.testing.assert_array_equal(example_errors["dt-1"], - np.array([1, 0, 0, 1, 1, 0, 0, 1,-100])) + sample_errors = get_sample_errors(ground_truth, + results) + self.assertIsInstance(sample_errors, dict) + np.testing.assert_array_equal(sample_errors["mv"], + np.array([1, 0, 1, 0, 0, 1, 0, 1, -100])) + np.testing.assert_array_equal(sample_errors["dt-1"], + np.array([1, 0, 0, 1, 1, 0, 0, 1, -100])) + class Test_gen_error_data(unittest.TestCase): def test_simple(self): random_state = np.random.RandomState(42) - ada_data = random_state.randint(0,2,size=7) + ada_data = random_state.randint(0, 2, size=7) mv_data = random_state.randint(0, 2, size=7) - example_errors = {"ada-1": ada_data, - "mv": mv_data} - nb_classifiers, nb_examples, classifiers_names, \ - data_2d, error_on_examples = gen_error_data(example_errors) + sample_errors = {"ada-1": ada_data, + "mv": mv_data} + nb_classifiers, nb_samples, classifiers_names, \ + data_2d, error_on_samples = gen_error_data(sample_errors) self.assertEqual(nb_classifiers, 2) - self.assertEqual(nb_examples, 7) + self.assertEqual(nb_samples, 7) self.assertEqual(classifiers_names, ["ada-1", "mv"]) - np.testing.assert_array_equal(data_2d, np.array([ada_data, mv_data]).transpose()) - np.testing.assert_array_equal(error_on_examples, (ada_data+mv_data)/nb_classifiers) - + np.testing.assert_array_equal( + data_2d, np.array([ada_data, mv_data]).transpose()) + np.testing.assert_array_equal( + error_on_samples, + (ada_data + mv_data) / nb_classifiers) class Test_gen_error_data_glob(unittest.TestCase): @@ -54,23 +56,25 @@ class Test_gen_error_data_glob(unittest.TestCase): def test_simple(self): random_state = np.random.RandomState(42) - ada_error_data_1 = random_state.randint(0,2,7) + ada_error_data_1 = random_state.randint(0, 2, 7) ada_error_data_2 = random_state.randint(0, 2, 7) - ada_sum = ada_error_data_1+ada_error_data_2 + ada_sum = ada_error_data_1 + ada_error_data_2 mv_error_data_1 = random_state.randint(0, 2, 7) mv_error_data_2 = random_state.randint(0, 2, 7) - mv_sum = mv_error_data_1+mv_error_data_2 + mv_sum = mv_error_data_1 + mv_error_data_2 - combi_results = {"ada-1":ada_sum, "mv": mv_sum} + combi_results = {"ada-1": ada_sum, "mv": mv_sum} stats_iter = 2 - nb_examples, nb_classifiers, \ - data, error_on_examples, \ - classifier_names = gen_error_data_glob(combi_results, - stats_iter) - self.assertEqual(nb_examples, 7) + nb_samples, nb_classifiers, \ + data, error_on_samples, \ + classifier_names = gen_error_data_glob(combi_results, + stats_iter) + self.assertEqual(nb_samples, 7) self.assertEqual(nb_classifiers, 2) - np.testing.assert_array_equal(data, np.array([ada_sum, mv_sum]).transpose()) - np.testing.assert_array_equal(error_on_examples, np.sum(np.array([ada_sum, mv_sum]), axis=0)/(nb_classifiers*stats_iter)) - self.assertEqual(classifier_names, ["ada-1", "mv"]) \ No newline at end of file + np.testing.assert_array_equal( + data, np.array([ada_sum, mv_sum]).transpose()) + np.testing.assert_array_equal(error_on_samples, np.sum( + np.array([ada_sum, mv_sum]), axis=0) / (nb_classifiers * stats_iter)) + self.assertEqual(classifier_names, ["ada-1", "mv"]) diff --git a/summit/tests/test_result_analysis/test_execution.py b/summit/tests/test_result_analysis/test_execution.py index edd50b89..b5c2ad4b 100644 --- a/summit/tests/test_result_analysis/test_execution.py +++ b/summit/tests/test_result_analysis/test_execution.py @@ -13,24 +13,31 @@ from summit.tests.utils import rm_tmp, tmp_path, test_dataset class FakeClassifierResult: def __init__(self, i=1): - self.classifier_name='test'+str(i) - self.full_labels_pred = np.array([0,1,1,2,1]) - self.hps_duration=i - self.fit_duration=i - self.pred_duration=i + self.classifier_name = 'test' + str(i) + self.full_labels_pred = np.array([0, 1, 1, 2, 1]) + self.hps_duration = i + self.fit_duration = i + self.pred_duration = i def get_classifier_name(self): return self.classifier_name + class Test_format_previous_results(unittest.TestCase): def test_simple(self): - iter_results = {"metrics_scores":[], "example_errors":[], "feature_importances":[], "labels":[], "durations":[], "class_metrics_scores":[]} + iter_results = { + "metrics_scores": [], + "sample_errors": [], + "feature_importances": [], + "labels": [], + "durations": [], + "class_metrics_scores": []} random_state = np.random.RandomState(42) # Gen metrics data - metrics_1_data = random_state.uniform(size=(2,2)) - metrics_2_data = random_state.uniform(size=(2,2)) + metrics_1_data = random_state.uniform(size=(2, 2)) + metrics_2_data = random_state.uniform(size=(2, 2)) metric_1_df = pd.DataFrame(data=metrics_1_data, index=["train", "test"], columns=["ada-1", "mv"]) metric_2_df = pd.DataFrame(data=metrics_2_data, index=["train", "test"], @@ -39,63 +46,66 @@ class Test_format_previous_results(unittest.TestCase): iter_results["metrics_scores"].append({"acc": metric_2_df}) # Gen error data - ada_error_data_1 = random_state.randint(0,2,7) + ada_error_data_1 = random_state.randint(0, 2, 7) ada_error_data_2 = random_state.randint(0, 2, 7) - ada_sum = ada_error_data_1+ada_error_data_2 + ada_sum = ada_error_data_1 + ada_error_data_2 mv_error_data_1 = random_state.randint(0, 2, 7) mv_error_data_2 = random_state.randint(0, 2, 7) - mv_sum = mv_error_data_1+mv_error_data_2 - iter_results["example_errors"].append({}) - iter_results["example_errors"].append({}) - iter_results["example_errors"][0]["ada-1"] = ada_error_data_1 - iter_results["example_errors"][0]["mv"] = mv_error_data_1 - iter_results["example_errors"][1]["ada-1"] = ada_error_data_2 - iter_results["example_errors"][1]["mv"] = mv_error_data_2 + mv_sum = mv_error_data_1 + mv_error_data_2 + iter_results["sample_errors"].append({}) + iter_results["sample_errors"].append({}) + iter_results["sample_errors"][0]["ada-1"] = ada_error_data_1 + iter_results["sample_errors"][0]["mv"] = mv_error_data_1 + iter_results["sample_errors"][1]["ada-1"] = ada_error_data_2 + iter_results["sample_errors"][1]["mv"] = mv_error_data_2 iter_results["durations"].append(pd.DataFrame(index=["ada-1", "mv"], - columns=["plif", "plaf"], - data=np.zeros((2,2)))) + columns=["plif", "plaf"], + data=np.zeros((2, 2)))) iter_results["durations"].append(pd.DataFrame(index=["ada-1", "mv"], - columns=["plif", - "plaf"], - data=np.ones((2, 2)))) + columns=["plif", + "plaf"], + data=np.ones((2, 2)))) # Running the function metric_analysis, class_met, error_analysis, \ - feature_importances, feature_stds, \ - labels, durations_mean, duration_std = format_previous_results(iter_results) + feature_importances, feature_stds, \ + labels, durations_mean, duration_std = format_previous_results( + iter_results) mean_df = pd.DataFrame(data=np.mean(np.array([metrics_1_data, metrics_2_data]), axis=0), index=["train", "test"], columns=["ada-1", "mvm"]) - std_df = pd.DataFrame(data=np.std(np.array([metrics_1_data, - metrics_2_data]), - axis=0), - index=["train", "test"], - columns=["ada-1", "mvm"]) + std_df = pd.DataFrame(data=np.std(np.array([metrics_1_data, + metrics_2_data]), + axis=0), + index=["train", "test"], + columns=["ada-1", "mvm"]) # Testing np.testing.assert_array_equal(metric_analysis["acc"]["mean"].loc["train"], mean_df.loc["train"]) np.testing.assert_array_equal(metric_analysis["acc"]["mean"].loc["test"], - mean_df.loc["test"]) + mean_df.loc["test"]) np.testing.assert_array_equal(metric_analysis["acc"]["std"].loc["train"], - std_df.loc["train"]) + std_df.loc["train"]) np.testing.assert_array_equal(metric_analysis["acc"]["std"].loc["test"], - std_df.loc["test"]) + std_df.loc["test"]) np.testing.assert_array_equal(ada_sum, error_analysis["ada-1"]) np.testing.assert_array_equal(mv_sum, error_analysis["mv"]) self.assertEqual(durations_mean.at["ada-1", 'plif'], 0.5) + class Test_get_arguments(unittest.TestCase): def setUp(self): - self.benchamrk_argument_dictionaries = [{"flag":"good_flag", "valid":True}, - {"flag":"bad_flag", "valid":False}] + self.benchamrk_argument_dictionaries = [{"flag": "good_flag", "valid": True}, + {"flag": "bad_flag", "valid": False}] def test_benchmark_wanted(self): - argument_dict = get_arguments(self.benchamrk_argument_dictionaries, "good_flag") + argument_dict = get_arguments( + self.benchamrk_argument_dictionaries, "good_flag") self.assertTrue(argument_dict["valid"]) @@ -104,13 +114,20 @@ class Test_analyze_iterations(unittest.TestCase): @classmethod def setUpClass(cls): os.mkdir(tmp_path) - cls.results = [[0, [FakeClassifierResult(), FakeClassifierResult(i=2)], []], [1, [FakeClassifierResult(), FakeClassifierResult(i=2)], []]] - cls.benchmark_argument_dictionaries = [{"labels_dictionary":{0:"zero",1:"one",2:"two"}, "flag":0, "directory":tmp_path, "args":{"name":"test_dataset"}},{"labels_dictionary":{0:"zero",1:"one",2:"two"}, "flag":1, "directory":tmp_path, "args":{"name":"test_dataset"}} ] + cls.results = [[0, [FakeClassifierResult(), FakeClassifierResult(i=2)], []], [ + 1, [FakeClassifierResult(), FakeClassifierResult(i=2)], []]] + cls.benchmark_argument_dictionaries = [ + { + "labels_dictionary": { + 0: "zero", 1: "one", 2: "two"}, "flag": 0, "directory": tmp_path, "args": { + "name": "test_dataset"}}, { + "labels_dictionary": { + 0: "zero", 1: "one", 2: "two"}, "flag": 1, "directory": tmp_path, "args": { + "name": "test_dataset"}}] cls.stats_iter = 2 cls.metrics = {} - cls.example_ids = ['ex1', 'ex5','ex4','ex3','ex2',] - cls.labels = np.array([0,1,2,1,1]) - + cls.sample_ids = ['ex1', 'ex5', 'ex4', 'ex3', 'ex2', ] + cls.labels = np.array([0, 1, 2, 1, 1]) @classmethod def tearDownClass(cls): @@ -121,19 +138,20 @@ class Test_analyze_iterations(unittest.TestCase): self.benchmark_argument_dictionaries, self.stats_iter, self.metrics, - self.example_ids, + self.sample_ids, self.labels) res, iter_res, tracebacks, labels_names = analysis self.assertEqual(labels_names, ['zero', 'one', 'two']) self.assertEqual(iter_res['class_metrics_scores'], [{}, {}]) - pd.testing.assert_frame_equal(iter_res['durations'][0], pd.DataFrame(index=['test1','test2'], - columns=['hps', 'fit', 'pred'], - data=np.array([1,1,1,2,2,2]).reshape((2,3)), dtype=object)) - np.testing.assert_array_equal(iter_res['example_errors'][0]['test1'], np.array([1, 1, 0, 0, 1])) - self.assertEqual(iter_res["feature_importances"], [{},{}]) - np.testing.assert_array_equal(iter_res['labels'], np.array([0, 1, 2, 1, 1])) - self.assertEqual(iter_res['metrics_scores'], [{},{}]) - - + pd.testing.assert_frame_equal(iter_res['durations'][0], pd.DataFrame(index=['test1', 'test2'], + columns=[ + 'hps', 'fit', 'pred'], + data=np.array([1, 1, 1, 2, 2, 2]).reshape((2, 3)), dtype=object)) + np.testing.assert_array_equal( + iter_res['sample_errors'][0]['test1'], np.array([1, 1, 0, 0, 1])) + self.assertEqual(iter_res["feature_importances"], [{}, {}]) + np.testing.assert_array_equal( + iter_res['labels'], np.array([0, 1, 2, 1, 1])) + self.assertEqual(iter_res['metrics_scores'], [{}, {}]) diff --git a/summit/tests/test_result_analysis/test_feature_importances.py b/summit/tests/test_result_analysis/test_feature_importances.py index 12055329..0aa01fdc 100644 --- a/summit/tests/test_result_analysis/test_feature_importances.py +++ b/summit/tests/test_result_analysis/test_feature_importances.py @@ -5,32 +5,34 @@ import pandas as pd from summit.multiview_platform.result_analysis import feature_importances from summit.multiview_platform.monoview.monoview_utils import MonoviewResult + class FakeClassifier: def __init__(self, i=0): - self.feature_importances_ = [i, i+1] + self.feature_importances_ = [i, i + 1] + class FakeClassifierResult(MonoviewResult): def __init__(self, i=0): - self.i=i - self.hps_duration = i*10 - self.fit_duration = (i+2)*10 - self.pred_duration = (i+5)*10 + self.i = i + self.hps_duration = i * 10 + self.fit_duration = (i + 2) * 10 + self.pred_duration = (i + 5) * 10 self.clf = FakeClassifier(i) - self.view_name = 'testview'+str(i) - self.classifier_name = "test"+str(i) + self.view_name = 'testview' + str(i) + self.classifier_name = "test" + str(i) def get_classifier_name(self): return self.classifier_name - class Test_get_duration(unittest.TestCase): def test_simple(self): results = [FakeClassifierResult(), FakeClassifierResult(i=1)] feat_importance = feature_importances.get_feature_importances(results) pd.testing.assert_frame_equal(feat_importance["testview1"], - pd.DataFrame(index=None,columns=['test1'], - data=np.array([1,2]).reshape((2,1)), - )) \ No newline at end of file + pd.DataFrame(index=None, columns=['test1'], + data=np.array( + [1, 2]).reshape((2, 1)), + )) diff --git a/summit/tests/test_result_analysis/test_metric_analysis.py b/summit/tests/test_result_analysis/test_metric_analysis.py index a8a00ecf..1a2c3fc9 100644 --- a/summit/tests/test_result_analysis/test_metric_analysis.py +++ b/summit/tests/test_result_analysis/test_metric_analysis.py @@ -8,22 +8,25 @@ from summit.multiview_platform.multiview.multiview_utils import MultiviewResult from summit.multiview_platform.result_analysis.metric_analysis import get_metrics_scores, init_plot, get_fig_size, sort_by_test_score -class Test_get_metrics_scores(unittest.TestCase): +class Test_get_metrics_scores(unittest.TestCase): def test_simple(self): - metrics = {"accuracy_score*":{},"f1_score":{}} + metrics = {"accuracy_score*": {}, "f1_score": {}} results = [MonoviewResult(0, "ada", "0", - {"accuracy_score*":[0.9, 0.95], - "f1_score":[0.91, 0.96]} - , "", "", "", "", "",0,0,{})] + {"accuracy_score*": [0.9, 0.95], + "f1_score":[0.91, 0.96]}, "", "", "", "", "", 0, 0, {})] metrics_scores, class_met = get_metrics_scores(metrics, - results, []) + results, []) self.assertIsInstance(metrics_scores, dict) self.assertIsInstance(metrics_scores["accuracy_score*"], pd.DataFrame) - np.testing.assert_array_equal(np.array(metrics_scores["accuracy_score*"].loc["train"]), np.array([0.9])) + np.testing.assert_array_equal( + np.array( + metrics_scores["accuracy_score*"].loc["train"]), + np.array( + [0.9])) np.testing.assert_array_equal( np.array(metrics_scores["accuracy_score*"].loc["test"]), np.array([0.95])) @@ -37,12 +40,12 @@ class Test_get_metrics_scores(unittest.TestCase): np.array(["ada-0"])) def test_multiple_monoview_classifiers(self): - metrics = {"accuracy_score*":{},"f1_score":{}} + metrics = {"accuracy_score*": {}, "f1_score": {}} results = [MonoviewResult(view_index=0, classifier_name="ada", view_name="0", metrics_scores={"accuracy_score*": [0.9, 0.95], - "f1_score": [0.91, 0.96]}, + "f1_score": [0.91, 0.96]}, full_labels_pred="", classifier_config="", classifier="", @@ -55,7 +58,7 @@ class Test_get_metrics_scores(unittest.TestCase): classifier_name="dt", view_name="1", metrics_scores={"accuracy_score*": [0.8, 0.85], - "f1_score": [0.81, 0.86]}, + "f1_score": [0.81, 0.86]}, full_labels_pred="", classifier_config="", classifier="", @@ -66,7 +69,7 @@ class Test_get_metrics_scores(unittest.TestCase): class_metric_scores={}) ] metrics_scores, class_met = get_metrics_scores(metrics, - results, []) + results, []) self.assertIsInstance(metrics_scores, dict) self.assertIsInstance(metrics_scores["accuracy_score*"], pd.DataFrame) np.testing.assert_array_equal( @@ -86,9 +89,9 @@ class Test_get_metrics_scores(unittest.TestCase): np.array(["ada-0", "dt-1"])) def test_mutiview_result(self): - metrics = {"accuracy_score*":{},"f1_score":{}} + metrics = {"accuracy_score*": {}, "f1_score": {}} results = [MultiviewResult("mv", "", {"accuracy_score*": [0.7, 0.75], - "f1_score": [0.71, 0.76]}, "",0,0,0, {}), + "f1_score": [0.71, 0.76]}, "", 0, 0, 0, {}), MonoviewResult(view_index=0, classifier_name="dt", view_name="1", @@ -104,7 +107,7 @@ class Test_get_metrics_scores(unittest.TestCase): class_metric_scores={}) ] metrics_scores, class_met = get_metrics_scores(metrics, - results, []) + results, []) self.assertIsInstance(metrics_scores, dict) self.assertIsInstance(metrics_scores["accuracy_score*"], pd.DataFrame) np.testing.assert_array_equal( @@ -129,52 +132,54 @@ class Test_init_plot(unittest.TestCase): def test_simple(self): results = [] metric_name = "acc" - data = np.random.RandomState(42).uniform(0,1,(2,2)) + data = np.random.RandomState(42).uniform(0, 1, (2, 2)) metric_dataframe = pd.DataFrame(index=["train", "test"], columns=["dt-1", "mv"], data=data) directory = "dir" database_name = 'db' labels_names = ['lb1', "lb2"] class_met = metric_dataframe = pd.DataFrame(index=["train", "test"], - columns=["dt-1", "mv"], data=data) + columns=["dt-1", "mv"], data=data) train, test, classifier_names, \ - file_name, nb_results, results, class_test = init_plot(results, + file_name, nb_results, results, class_test = init_plot(results, metric_name, metric_dataframe, directory, database_name, class_met) self.assertEqual(file_name, os.path.join("dir", "db-acc")) - np.testing.assert_array_equal(train, data[0,:]) + np.testing.assert_array_equal(train, data[0, :]) np.testing.assert_array_equal(test, data[1, :]) - np.testing.assert_array_equal(classifier_names, np.array(["dt-1", "mv"])) + np.testing.assert_array_equal( + classifier_names, np.array(["dt-1", "mv"])) self.assertEqual(nb_results, 2) - self.assertEqual(results, [["dt-1", "acc", data[1,0], 0.0, data[1,0]], - ["mv", "acc", data[1,1], 0.0, data[1,1]]]) + self.assertEqual(results, [["dt-1", "acc", data[1, 0], 0.0, data[1, 0]], + ["mv", "acc", data[1, 1], 0.0, data[1, 1]]]) class Test_small_func(unittest.TestCase): def test_fig_size(self): kw, width = get_fig_size(5) - self.assertEqual(kw, {"figsize":(15,5)}) + self.assertEqual(kw, {"figsize": (15, 5)}) self.assertEqual(width, 0.35) kw, width = get_fig_size(100) - self.assertEqual(kw, {"figsize": (100, 100/3)}) + self.assertEqual(kw, {"figsize": (100, 100 / 3)}) self.assertEqual(width, 0.35) def test_sort_by_test_scores(self): - train_scores = np.array([1,2,3,4]) + train_scores = np.array([1, 2, 3, 4]) test_scores = np.array([4, 3, 2, 1]) train_STDs = np.array([1, 2, 3, 4]) test_STDs = np.array([1, 2, 3, 4]) names = np.array(['1', '2', '3', '4']) sorted_names, sorted_train_scores, \ - sorted_test_scores, sorted_train_STDs, \ - sorted_test_STDs = sort_by_test_score(train_scores, test_scores, - names, train_STDs, test_STDs) - np.testing.assert_array_equal(sorted_names, np.array(['4', '3', '2', '1'])) + sorted_test_scores, sorted_train_STDs, \ + sorted_test_STDs = sort_by_test_score(train_scores, test_scores, + names, train_STDs, test_STDs) + np.testing.assert_array_equal( + sorted_names, np.array(['4', '3', '2', '1'])) np.testing.assert_array_equal(sorted_test_scores, [1, 2, 3, 4]) np.testing.assert_array_equal(sorted_test_STDs, [4, 3, 2, 1]) np.testing.assert_array_equal(sorted_train_scores, [4, 3, 2, 1]) - np.testing.assert_array_equal(sorted_train_STDs, [4, 3, 2, 1]) \ No newline at end of file + np.testing.assert_array_equal(sorted_train_STDs, [4, 3, 2, 1]) diff --git a/summit/tests/test_result_analysis/test_tracebacks_analysis.py b/summit/tests/test_result_analysis/test_tracebacks_analysis.py index d1942eac..b1059d8a 100644 --- a/summit/tests/test_result_analysis/test_tracebacks_analysis.py +++ b/summit/tests/test_result_analysis/test_tracebacks_analysis.py @@ -6,10 +6,11 @@ import os from summit.multiview_platform.result_analysis import tracebacks_analysis from summit.tests.utils import tmp_path, rm_tmp + class FakeClassifierResult: def __init__(self, i=0): - self.i=i + self.i = i if i == 0: self.hps_duration = 10 self.fit_duration = 12 @@ -19,7 +20,6 @@ class FakeClassifierResult: self.fit_duration = 2 self.pred_duration = 5 - def get_classifier_name(self): if self.i == 0: return 'test1' @@ -27,21 +27,23 @@ class FakeClassifierResult: return 'test2' - class Test_funcs(unittest.TestCase): @classmethod def setUpClass(cls): os.mkdir(tmp_path) - cls.res_file = open(os.path.join(tmp_path,"tmp.txt"), "w") + cls.res_file = open(os.path.join(tmp_path, "tmp.txt"), "w") @classmethod def tearDownClass(cls): rm_tmp() def test_save_dict_to_text(self): - keys = tracebacks_analysis.save_dict_to_text({"a":"i", "b":"j"}, self.res_file) + keys = tracebacks_analysis.save_dict_to_text( + {"a": "i", "b": "j"}, self.res_file) self.res_file.close() - self.assertEqual(list(keys),["a", "b"]) - with open(os.path.join(tmp_path,"tmp.txt"), 'r') as res_file: - self.assertEqual(res_file.read(), 'Failed algorithms : \n\ta,\n\tb.\n\n\na\n\ni\n\n\nb\n\nj\n\n\n') + self.assertEqual(list(keys), ["a", "b"]) + with open(os.path.join(tmp_path, "tmp.txt"), 'r') as res_file: + self.assertEqual( + res_file.read(), + 'Failed algorithms : \n\ta,\n\tb.\n\n\na\n\ni\n\n\nb\n\nj\n\n\n') diff --git a/summit/tests/test_utils/test_GetMultiviewDB.py b/summit/tests/test_utils/test_GetMultiviewDB.py index ea36e2ec..2941c0ba 100644 --- a/summit/tests/test_utils/test_GetMultiviewDB.py +++ b/summit/tests/test_utils/test_GetMultiviewDB.py @@ -16,12 +16,13 @@ class Test_get_classic_db_hdf5(unittest.TestCase): self.rs = np.random.RandomState(42) self.nb_view = 3 self.file_name = "test.hdf5" - self.nb_examples = 5 + self.nb_samples = 5 self.nb_class = 3 - self.views = [self.rs.randint(0, 10, size=(self.nb_examples, 7)) + self.views = [self.rs.randint(0, 10, size=(self.nb_samples, 7)) for _ in range(self.nb_view)] - self.labels = self.rs.randint(0, self.nb_class, self.nb_examples) - self.dataset_file = h5py.File(os.path.join(tmp_path, self.file_name), 'w') + self.labels = self.rs.randint(0, self.nb_class, self.nb_samples) + self.dataset_file = h5py.File( + os.path.join(tmp_path, self.file_name), 'w') self.view_names = ["ViewN" + str(index) for index in range(len(self.views))] self.are_sparse = [False for _ in self.views] @@ -45,24 +46,25 @@ class Test_get_classic_db_hdf5(unittest.TestCase): meta_data_grp.attrs["datasetLength"] = len(self.labels) def test_simple(self): - dataset , labels_dictionary, dataset_name = get_multiview_db.get_classic_db_hdf5( + dataset, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_hdf5( ["ViewN2"], tmp_path, self.file_name.split(".")[0], self.nb_class, ["0", "2"], self.rs, path_for_new=tmp_path) self.assertEqual(dataset.nb_view, 1) self.assertEqual(labels_dictionary, - {0: "0", 1: "2", 2:"1"}) - self.assertEqual(dataset.get_nb_examples(), 5) + {0: "0", 1: "2", 2: "1"}) + self.assertEqual(dataset.get_nb_samples(), 5) self.assertEqual(len(np.unique(dataset.get_labels())), 3) - def test_all_views_asked(self): dataset, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_hdf5( None, tmp_path, self.file_name.split(".")[0], self.nb_class, ["0", "2"], self.rs, path_for_new=tmp_path) self.assertEqual(dataset.nb_view, 3) - self.assertEqual(dataset.get_view_dict(), {'ViewN0': 0, 'ViewN1': 1, 'ViewN2': 2}) + self.assertEqual( + dataset.get_view_dict(), { + 'ViewN0': 0, 'ViewN1': 1, 'ViewN2': 2}) def test_asked_the_whole_dataset(self): dataset, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_hdf5( @@ -100,45 +102,46 @@ class Test_get_classic_db_csv(unittest.TestCase): data, delimiter=",") self.datas.append(data) - def test_simple(self): dataset, labels_dictionary, dataset_name = get_multiview_db.get_classic_db_csv( self.views, self.pathF, self.nameDB, self.NB_CLASS, self.askedLabelsNames, self.random_state, delimiter=",", path_for_new=tmp_path) self.assertEqual(dataset.nb_view, 2) - self.assertEqual(dataset.get_view_dict(), {'test_view_1': 0, 'test_view_3': 1}) + self.assertEqual( + dataset.get_view_dict(), { + 'test_view_1': 0, 'test_view_3': 1}) self.assertEqual(labels_dictionary, {0: "test_label_1", 1: "test_label_3"}) - self.assertEqual(dataset.get_nb_examples(), 3) + self.assertEqual(dataset.get_nb_samples(), 3) self.assertEqual(dataset.get_nb_class(), 2) - @classmethod def tearDown(self): for i in range(4): os.remove( - tmp_path+"Views/test_view_" + str( + tmp_path + "Views/test_view_" + str( i) + ".csv") - os.rmdir(tmp_path+"Views") + os.rmdir(tmp_path + "Views") os.remove( - tmp_path+"test_dataset-labels-names.csv") - os.remove(tmp_path+"test_dataset-labels.csv") - os.remove(tmp_path+"test_dataset.hdf5") + tmp_path + "test_dataset-labels-names.csv") + os.remove(tmp_path + "test_dataset-labels.csv") + os.remove(tmp_path + "test_dataset.hdf5") os.remove( - tmp_path+"test_dataset_temp_filter.hdf5") + tmp_path + "test_dataset_temp_filter.hdf5") os.rmdir(tmp_path) + class Test_get_plausible_db_hdf5(unittest.TestCase): @classmethod def setUpClass(cls): rm_tmp() cls.path = tmp_path - cls.nb_class=3 + cls.nb_class = 3 cls.rs = np.random.RandomState(42) - cls.nb_view=3 - cls.nb_examples = 5 + cls.nb_view = 3 + cls.nb_samples = 5 cls.nb_features = 4 @classmethod @@ -148,19 +151,19 @@ class Test_get_plausible_db_hdf5(unittest.TestCase): def test_simple(self): dataset, labels_dict, name = get_multiview_db.get_plausible_db_hdf5( "", self.path, "", nb_class=self.nb_class, random_state=self.rs, - nb_view=3, nb_examples=self.nb_examples, + nb_view=3, nb_samples=self.nb_samples, nb_features=self.nb_features) - self.assertEqual(dataset.init_example_indices(), range(5)) + self.assertEqual(dataset.init_sample_indices(), range(5)) self.assertEqual(dataset.get_nb_class(), self.nb_class) def test_two_class(self): dataset, labels_dict, name = get_multiview_db.get_plausible_db_hdf5( "", self.path, "", nb_class=2, random_state=self.rs, - nb_view=3, nb_examples=self.nb_examples, + nb_view=3, nb_samples=self.nb_samples, nb_features=self.nb_features) - self.assertEqual(dataset.init_example_indices(), range(5)) + self.assertEqual(dataset.init_sample_indices(), range(5)) self.assertEqual(dataset.get_nb_class(), 2) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/summit/tests/test_utils/test_base.py b/summit/tests/test_utils/test_base.py index 69720b7d..dc4ccb68 100644 --- a/summit/tests/test_utils/test_base.py +++ b/summit/tests/test_utils/test_base.py @@ -23,10 +23,10 @@ class FakeClassifier(base.BaseClassifier): def get_params(self, deep=True): return {"test1": 10, - "test2": "test"} + "test2": "test"} def fit(self, X, y): - if np.unique(y).shape[0]>2 and not self.accepts_mc: + if np.unique(y).shape[0] > 2 and not self.accepts_mc: raise ValueError('Does not accept MC') else: return self @@ -37,7 +37,8 @@ class FakeDetector: self.best_params_ = {"test1": 10, "test2": "test"} self.cv_results_ = {"param_test1": [10], - "param_test2": ["test"]} + "param_test2": ["test"]} + class FakeResultAnalyzer(base.ResultAnalyser): @@ -47,42 +48,43 @@ class FakeResultAnalyzer(base.ResultAnalyser): def get_base_string(self): return 'test2' + class Test_ResultAnalyzer(unittest.TestCase): @classmethod def setUpClass(cls): cls.rs = np.random.RandomState(42) cls.classifier = FakeClassifier() - cls.n_examples = 50 + cls.n_samples = 50 cls.n_classes = 3 cls.train_length = 24 - cls.train_indices = cls.rs.choice(np.arange(cls.n_examples), + cls.train_indices = cls.rs.choice(np.arange(cls.n_samples), size=cls.train_length, replace=False) - cls.test_indices = np.array([i for i in range(cls.n_examples) + cls.test_indices = np.array([i for i in range(cls.n_samples) if i not in cls.train_indices]) cls.test_length = cls.test_indices.shape[0] cls.classification_indices = [cls.train_indices, cls.test_indices] cls.n_splits = 5 cls.k_folds = StratifiedKFold(n_splits=cls.n_splits, ) cls.hps_method = "randomized_search" - cls.metrics_list = {"accuracy_score": {}, "f1_score*":{}} + cls.metrics_list = {"accuracy_score": {}, "f1_score*": {}} cls.n_iter = 6 - cls.class_label_names = ["class{}".format(ind+1) - for ind in range(cls.n_classes)] + cls.class_label_names = ["class{}".format(ind + 1) + for ind in range(cls.n_classes)] cls.pred = cls.rs.randint(0, cls.n_classes, - size=cls.n_examples) + size=cls.n_samples) cls.directory = "fake_directory" cls.base_file_name = "fake_file" cls.labels = cls.rs.randint(0, cls.n_classes, - size=cls.n_examples) + size=cls.n_samples) cls.database_name = "test_database" cls.nb_cores = 0.5 cls.duration = -4 cls.train_accuracy = accuracy_score(cls.labels[cls.train_indices], cls.pred[cls.train_indices]) cls.test_accuracy = accuracy_score(cls.labels[cls.test_indices], - cls.pred[cls.test_indices]) + cls.pred[cls.test_indices]) cls.train_f1 = f1_score(cls.labels[cls.train_indices], cls.pred[cls.train_indices], average='micro') cls.test_f1 = f1_score(cls.labels[cls.test_indices], @@ -106,7 +108,8 @@ class Test_ResultAnalyzer(unittest.TestCase): self.directory, self.base_file_name, self.labels, self.database_name, self.nb_cores, self.duration) - cl_train, cl_test,train_score, test_score = RA.get_metric_score("accuracy_score", {}) + cl_train, cl_test, train_score, test_score = RA.get_metric_score( + "accuracy_score", {}) np.testing.assert_array_equal(train_score, self.train_accuracy) np.testing.assert_array_equal(test_score, self.test_accuracy) @@ -145,14 +148,16 @@ class Test_ResultAnalyzer(unittest.TestCase): def test_get_db_config_string(self): RA = FakeResultAnalyzer(self.classifier, self.classification_indices, - self.k_folds, self.hps_method, - self.metrics_list, - self.n_iter, self.class_label_names, - self.pred, - self.directory, self.base_file_name, - self.labels, self.database_name, - self.nb_cores, self.duration) - self.assertEqual(RA.get_db_config_string(), 'Database configuration : \n\t- Database name : test_database\ntest\t- Learning Rate : 0.48\n\t- Labels used : class1, class2, class3\n\t- Number of cross validation folds : 5\n\n') + self.k_folds, self.hps_method, + self.metrics_list, + self.n_iter, self.class_label_names, + self.pred, + self.directory, self.base_file_name, + self.labels, self.database_name, + self.nb_cores, self.duration) + self.assertEqual( + RA.get_db_config_string(), + 'Database configuration : \n\t- Database name : test_database\ntest\t- Learning Rate : 0.48\n\t- Labels used : class1, class2, class3\n\t- Number of cross validation folds : 5\n\n') def test_get_classifier_config_string(self): RA = base.ResultAnalyser(self.classifier, self.classification_indices, @@ -163,36 +168,37 @@ class Test_ResultAnalyzer(unittest.TestCase): self.directory, self.base_file_name, self.labels, self.database_name, self.nb_cores, self.duration) - self.assertEqual(RA.get_classifier_config_string(), 'Classifier configuration : \n\t- FakeClassifier with test1 : 10, test2 : test\n\t- Executed on 0.5 core(s) \n\t- Got configuration using randomized search with 6 iterations \n') + self.assertEqual( + RA.get_classifier_config_string(), + 'Classifier configuration : \n\t- FakeClassifier with test1 : 10, test2 : test\n\t- Executed on 0.5 core(s) \n\t- Got configuration using randomized search with 6 iterations \n') def test_analyze(self): RA = FakeResultAnalyzer(self.classifier, self.classification_indices, - self.k_folds, self.hps_method, - self.metrics_list, - self.n_iter, self.class_label_names, - self.pred, - self.directory, self.base_file_name, - self.labels, self.database_name, - self.nb_cores, self.duration) + self.k_folds, self.hps_method, + self.metrics_list, + self.n_iter, self.class_label_names, + self.pred, + self.directory, self.base_file_name, + self.labels, self.database_name, + self.nb_cores, self.duration) str_analysis, img_analysis, metric_scores, class_metric_scores, conf_mat = RA.analyze() print(repr(str_analysis)) self.assertEqual(str_analysis, 'test2Database configuration : \n\t- Database name : test_database\ntest\t- Learning Rate : 0.48\n\t- Labels used : class1, class2, class3\n\t- Number of cross validation folds : 5\n\nClassifier configuration : \n\t- FakeClassifier with test1 : 10, test2 : test\n\t- Executed on 0.5 core(s) \n\t- Got configuration using randomized search with 6 iterations \n\n\n\tFor Accuracy score using {}, (higher is better) : \n\t\t- Score on train : 0.25\n\t\t- Score on test : 0.2692307692307692\n\n\tFor F1 score using average: micro, {} (higher is better) : \n\t\t- Score on train : 0.25\n\t\t- Score on test : 0.2692307692307692\n\nTest set confusion matrix : \n\n╒════════╤══════════╤══════════╤══════════╕\n│ │ class1 │ class2 │ class3 │\n╞════════╪══════════╪══════════╪══════════╡\n│ class1 │ 3 │ 1 │ 2 │\n├────────┼──────────┼──────────┼──────────┤\n│ class2 │ 3 │ 2 │ 2 │\n├────────┼──────────┼──────────┼──────────┤\n│ class3 │ 3 │ 8 │ 2 │\n╘════════╧══════════╧══════════╧══════════╛\n\n\n\n Classification took -1 day, 23:59:56\n\n Classifier Interpretation : \n') - class Test_BaseClassifier(unittest.TestCase): @classmethod def setUpClass(cls): cls.base_estimator = "DecisionTreeClassifier" - cls.base_estimator_config = {"max_depth":10, + cls.base_estimator_config = {"max_depth": 10, "splitter": "best"} cls.est = base.BaseClassifier() cls.rs = np.random.RandomState(42) def test_simple(self): base_estim = self.est.get_base_estimator(self.base_estimator, - self.base_estimator_config) + self.base_estimator_config) self.assertTrue(isinstance(base_estim, DecisionTreeClassifier)) self.assertEqual(base_estim.max_depth, 10) self.assertEqual(base_estim.splitter, "best") @@ -200,14 +206,16 @@ class Test_BaseClassifier(unittest.TestCase): def test_gen_best_params(self): fake_class = FakeClassifier() best_params = fake_class.gen_best_params(FakeDetector()) - self.assertEqual(best_params, {"test1":10, "test2":"test"}) + self.assertEqual(best_params, {"test1": 10, "test2": "test"}) def test_gen_params_from_detector(self): fake_class = FakeClassifier() params = fake_class.gen_params_from_detector(FakeDetector()) - self.assertEqual(params, [("test1",np.array([10])), - ("test2",np.array(["str"], dtype='<U3'))]) - params = FakeClassifier(no_params=True).gen_params_from_detector(FakeDetector()) + self.assertEqual(params, [("test1", np.array([10])), + ("test2", np.array(["str"], dtype='<U3'))]) + params = FakeClassifier( + no_params=True).gen_params_from_detector( + FakeDetector()) self.assertEqual(params, [()]) def test_params_to_string(self): @@ -224,13 +232,16 @@ class Test_BaseClassifier(unittest.TestCase): self.assertEqual(accepts, True) accepts = FakeClassifier(accepts_mc=False).accepts_multi_class(self.rs) self.assertEqual(accepts, False) - self.assertRaises(ValueError, FakeClassifier().accepts_multi_class, self.rs, **{"n_samples":2}) - + self.assertRaises(ValueError, + FakeClassifier().accepts_multi_class, + self.rs, + **{"n_samples": 2}) def test_class(self): - base_estimator = DecisionTreeClassifier(max_depth=15, splitter="random") + base_estimator = DecisionTreeClassifier( + max_depth=15, splitter="random") base_estim = self.est.get_base_estimator(base_estimator, - self.base_estimator_config) + self.base_estimator_config) self.assertTrue(isinstance(base_estim, DecisionTreeClassifier)) self.assertEqual(base_estim.max_depth, 10) self.assertEqual(base_estim.splitter, "best") @@ -246,16 +257,15 @@ class Test_BaseClassifier(unittest.TestCase): conf = FakeClassifier(no_params=True).get_config() self.assertEqual(conf, 'FakeClassifier with no config.') + class Test_Functions(unittest.TestCase): def test_get_name(self): classed_list = ["test", 42] np.testing.assert_array_equal(base.get_names(classed_list), - np.array(["str", "int"], dtype="<U3")) - + np.array(["str", "int"], dtype="<U3")) def test_get_metric(self): from summit.multiview_platform.metrics import accuracy_score - metrics_dict = {"accuracy_score*":{}} + metrics_dict = {"accuracy_score*": {}} self.assertEqual(base.get_metric(metrics_dict), (accuracy_score, {})) - diff --git a/summit/tests/test_utils/test_configuration.py b/summit/tests/test_utils/test_configuration.py index a9074c55..a43906dc 100644 --- a/summit/tests/test_utils/test_configuration.py +++ b/summit/tests/test_utils/test_configuration.py @@ -12,17 +12,17 @@ class Test_get_the_args(unittest.TestCase): @classmethod def setUpClass(cls): rm_tmp() - cls.path_to_config_file = tmp_path+"config_temp.yml" + cls.path_to_config_file = tmp_path + "config_temp.yml" path_file = os.path.dirname(os.path.abspath(__file__)) make_tmp_dir = os.path.join(path_file, "../tmp_tests") os.mkdir(make_tmp_dir) - data = {"log": 10, "name":[12.5, 1e-06], "type":True} + data = {"log": 10, "name": [12.5, 1e-06], "type": True} with open(cls.path_to_config_file, "w") as config_file: yaml.dump(data, config_file) @classmethod def tearDownClass(cls): - os.remove(tmp_path+"config_temp.yml") + os.remove(tmp_path + "config_temp.yml") os.rmdir(tmp_path) def test_file_loading(self): @@ -40,6 +40,7 @@ class Test_get_the_args(unittest.TestCase): self.assertEqual(config_dict["name"], [12.5, 1e-06]) self.assertEqual(config_dict["type"], True) + class Test_save_config(unittest.TestCase): @classmethod def setUpClass(cls): @@ -49,10 +50,10 @@ class Test_save_config(unittest.TestCase): os.mkdir(make_tmp_dir) def test_simple(self): - configuration.save_config(tmp_path, {"test":10}) - with open(os.path.join(tmp_path,"config_file.yml" ), 'r') as stream: + configuration.save_config(tmp_path, {"test": 10}) + with open(os.path.join(tmp_path, "config_file.yml"), 'r') as stream: yaml_config = yaml.safe_load(stream) - self.assertEqual(yaml_config,{"test":10} ) + self.assertEqual(yaml_config, {"test": 10}) @classmethod def tearDownClass(cls): @@ -60,4 +61,4 @@ class Test_save_config(unittest.TestCase): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/summit/tests/test_utils/test_dataset.py b/summit/tests/test_utils/test_dataset.py index 85ef3afb..2ffdb823 100644 --- a/summit/tests/test_utils/test_dataset.py +++ b/summit/tests/test_utils/test_dataset.py @@ -16,14 +16,16 @@ class Test_Dataset(unittest.TestCase): cls.rs = np.random.RandomState(42) cls.nb_view = 3 cls.file_name = "test.hdf5" - cls.nb_examples = 5 + cls.nb_samples = 5 cls.nb_attr = 7 cls.nb_class = 3 - cls.views = [cls.rs.randint(0, 10, size=(cls.nb_examples, cls.nb_attr)) + cls.views = [cls.rs.randint(0, 10, size=(cls.nb_samples, cls.nb_attr)) for _ in range(cls.nb_view)] - cls.labels = cls.rs.randint(0, cls.nb_class, cls.nb_examples) - cls.dataset_file = h5py.File(os.path.join(tmp_path, cls.file_name), "w") - cls.view_names = ["ViewN" + str(index) for index in range(len(cls.views))] + cls.labels = cls.rs.randint(0, cls.nb_class, cls.nb_samples) + cls.dataset_file = h5py.File( + os.path.join(tmp_path, cls.file_name), "w") + cls.view_names = ["ViewN" + str(index) + for index in range(len(cls.views))] cls.are_sparse = [False for _ in cls.views] for view_index, (view_name, view, is_sparse) in enumerate( zip(cls.view_names, cls.views, cls.are_sparse)): @@ -56,7 +58,7 @@ class Test_Dataset(unittest.TestCase): path=tmp_path, labels_names=self.labels_names) shape = dataset_object.get_shape(0) - self.assertEqual(shape, (5,7)) + self.assertEqual(shape, (5, 7)) def test_to_numpy_array(self): dataset_object = dataset.HDF5Dataset(views=self.views, @@ -66,7 +68,7 @@ class Test_Dataset(unittest.TestCase): view_names=self.view_names, path=tmp_path, labels_names=self.labels_names) - array, limits = dataset_object.to_numpy_array(view_indices=[0,1,2]) + array, limits = dataset_object.to_numpy_array(view_indices=[0, 1, 2]) self.assertEqual(array.shape, (5, 21)) @@ -83,8 +85,8 @@ class Test_Dataset(unittest.TestCase): view_dataset.attrs["name"] = view_name view_dataset.attrs["sparse"] = is_sparse labels_dataset = dataset_file_filter.create_dataset("Labels", - shape=self.labels.shape, - data=self.labels) + shape=self.labels.shape, + data=self.labels) labels_dataset.attrs["names"] = [label_name.encode() for label_name in self.labels_names] meta_data_grp = dataset_file_filter.create_group("Metadata") @@ -107,71 +109,74 @@ class Test_Dataset(unittest.TestCase): dataset_object = dataset.HDF5Dataset(views=self.views, labels=self.labels, are_sparse=self.are_sparse, - file_name="from_scratch"+self.file_name, + file_name="from_scratch" + self.file_name, view_names=self.view_names, path=tmp_path, labels_names=self.labels_names) nb_class = dataset_object.get_nb_class() self.assertEqual(nb_class, self.nb_class) - example_indices = dataset_object.init_example_indices() - self.assertEqual(example_indices, range(self.nb_examples)) + sample_indices = dataset_object.init_sample_indices() + self.assertEqual(sample_indices, range(self.nb_samples)) view = dataset_object.get_v(0) np.testing.assert_array_equal(view, self.views[0]) - def test_init_example_indices(self): - example_indices = dataset.HDF5Dataset( - hdf5_file=self.dataset_file).init_example_indices() - self.assertEqual(example_indices, range(self.nb_examples)) - example_indices = dataset.HDF5Dataset( - hdf5_file=self.dataset_file).init_example_indices([0, 1, 2]) - self.assertEqual(example_indices, [0,1,2]) + def test_init_sample_indices(self): + sample_indices = dataset.HDF5Dataset( + hdf5_file=self.dataset_file).init_sample_indices() + self.assertEqual(sample_indices, range(self.nb_samples)) + sample_indices = dataset.HDF5Dataset( + hdf5_file=self.dataset_file).init_sample_indices([0, 1, 2]) + self.assertEqual(sample_indices, [0, 1, 2]) def test_get_v(self): view = dataset.HDF5Dataset(hdf5_file=self.dataset_file).get_v(0) np.testing.assert_array_equal(view, self.views[0]) - view = dataset.HDF5Dataset(hdf5_file=self.dataset_file).get_v(1, [0,1,2]) - np.testing.assert_array_equal(view, self.views[1][[0,1,2,], :]) + view = dataset.HDF5Dataset( + hdf5_file=self.dataset_file).get_v(1, [0, 1, 2]) + np.testing.assert_array_equal(view, self.views[1][[0, 1, 2, ], :]) def test_get_nb_class(self): - nb_class = dataset.HDF5Dataset(hdf5_file=self.dataset_file).get_nb_class() + nb_class = dataset.HDF5Dataset( + hdf5_file=self.dataset_file).get_nb_class() self.assertEqual(nb_class, self.nb_class) - nb_class = dataset.HDF5Dataset(hdf5_file=self.dataset_file).get_nb_class([0]) + nb_class = dataset.HDF5Dataset( + hdf5_file=self.dataset_file).get_nb_class( + [0]) self.assertEqual(nb_class, 1) - - def test_get_view_dict(self): dataset_object = dataset.HDF5Dataset(views=self.views, - labels=self.labels, - are_sparse=self.are_sparse, - file_name="from_scratch" + self.file_name, - view_names=self.view_names, - path=tmp_path, - labels_names=self.labels_names) - self.assertEqual(dataset_object.get_view_dict(), {"ViewN0":0, + labels=self.labels, + are_sparse=self.are_sparse, + file_name="from_scratch" + self.file_name, + view_names=self.view_names, + path=tmp_path, + labels_names=self.labels_names) + self.assertEqual(dataset_object.get_view_dict(), {"ViewN0": 0, "ViewN1": 1, - "ViewN2": 2,}) + "ViewN2": 2, }) def test_get_label_names(self): dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) raw_label_names = dataset_object.get_label_names(decode=False) decoded_label_names = dataset_object.get_label_names() - restricted_label_names = dataset_object.get_label_names(example_indices=[3,4]) + restricted_label_names = dataset_object.get_label_names( + sample_indices=[3, 4]) self.assertEqual(raw_label_names, [b'0', b'1', b'2']) self.assertEqual(decoded_label_names, ['0', '1', '2']) self.assertEqual(restricted_label_names, ['2']) def test_get_nb_exmaples(self): dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) - nb_examples = dataset_object.get_nb_examples() - self.assertEqual(nb_examples, self.nb_examples) + nb_samples = dataset_object.get_nb_samples() + self.assertEqual(nb_samples, self.nb_samples) def test_get_labels(self): dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) labels = dataset_object.get_labels() np.testing.assert_array_equal(labels, self.labels) - labels = dataset_object.get_labels([1,2,0]) - np.testing.assert_array_equal(labels, self.labels[[1,2,0]]) + labels = dataset_object.get_labels([1, 2, 0]) + np.testing.assert_array_equal(labels, self.labels[[1, 2, 0]]) def test_copy_view(self): dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) @@ -180,7 +185,8 @@ class Test_Dataset(unittest.TestCase): source_view_name="ViewN0", target_view_index=1) self.assertIn("View1", list(new_dataset.keys())) - np.testing.assert_array_equal(dataset_object.get_v(0), new_dataset["View1"][()]) + np.testing.assert_array_equal( + dataset_object.get_v(0), new_dataset["View1"][()]) self.assertEqual(new_dataset["View1"].attrs["name"], "ViewN0") new_dataset.close() os.remove(os.path.join(tmp_path, "test_copy.hdf5")) @@ -192,12 +198,13 @@ class Test_Dataset(unittest.TestCase): def test_select_labels(self): dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) labels, label_names, indices = dataset_object.select_labels(["0", "2"]) - np.testing.assert_array_equal(np.unique(labels), np.array([0,1])) - self.assertEqual(label_names, ["0","2"]) + np.testing.assert_array_equal(np.unique(labels), np.array([0, 1])) + self.assertEqual(label_names, ["0", "2"]) def test_check_selected_label_names(self): dataset_object = dataset.HDF5Dataset(hdf5_file=self.dataset_file) - names = dataset_object.check_selected_label_names(nb_labels=2, random_state=self.rs) + names = dataset_object.check_selected_label_names( + nb_labels=2, random_state=self.rs) self.assertEqual(names, ["1", "0"]) names = dataset_object.check_selected_label_names(selected_label_names=['0', '2'], random_state=self.rs) @@ -224,7 +231,11 @@ class Test_Dataset(unittest.TestCase): meta_data_grp.attrs["nbClass"] = len(np.unique(self.labels)) meta_data_grp.attrs["datasetLength"] = len(self.labels) dataset_object = dataset.HDF5Dataset(hdf5_file=dataset_file_select) - names = dataset_object.select_views_and_labels(nb_labels=2, view_names=["ViewN0"], random_state=self.rs, path_for_new=tmp_path) + names = dataset_object.select_views_and_labels( + nb_labels=2, + view_names=["ViewN0"], + random_state=self.rs, + path_for_new=tmp_path) self.assertEqual(names, {0: '2', 1: '1'}) self.assertEqual(dataset_object.nb_view, 1) dataset_object.dataset.close() @@ -245,7 +256,8 @@ class Test_Dataset(unittest.TestCase): data=view) view_dataset.attrs["name"] = view_name view_dataset.attrs["sparse"] = is_sparse - meta_data_grp.create_dataset("View"+str(view_index)+"_limits", data= limits) + meta_data_grp.create_dataset( + "View" + str(view_index) + "_limits", data=limits) labels_dataset = dataset_file_select.create_dataset("Labels", shape=self.labels.shape, data=self.labels) @@ -260,6 +272,7 @@ class Test_Dataset(unittest.TestCase): os.remove(os.path.join(tmp_path, "test_noise_noised.hdf5")) os.remove(os.path.join(tmp_path, "test_noise.hdf5")) + class TestRAMDataset(unittest.TestCase): @classmethod @@ -267,12 +280,12 @@ class TestRAMDataset(unittest.TestCase): cls.rs = np.random.RandomState(42) cls.nb_view = 3 cls.file_name = "test.hdf5" - cls.nb_examples = 5 + cls.nb_samples = 5 cls.nb_attr = 7 cls.nb_class = 3 - cls.views = [cls.rs.randint(0, 10, size=(cls.nb_examples, cls.nb_attr)) + cls.views = [cls.rs.randint(0, 10, size=(cls.nb_samples, cls.nb_attr)) for _ in range(cls.nb_view)] - cls.labels = cls.rs.randint(0, cls.nb_class, cls.nb_examples) + cls.labels = cls.rs.randint(0, cls.nb_class, cls.nb_samples) cls.view_names = ["ViewN" + str(index) for index in range(len(cls.views))] cls.are_sparse = [False for _ in cls.views] @@ -280,10 +293,10 @@ class TestRAMDataset(unittest.TestCase): def test_get_view_name(self): dataset_object = dataset.RAMDataset(views=self.views, - labels=self.labels, - are_sparse=self.are_sparse, - view_names=self.view_names, - labels_names=self.labels_names) + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) self.assertEqual(dataset_object.get_view_name(0), "ViewN0") @@ -294,16 +307,15 @@ class TestRAMDataset(unittest.TestCase): view_names=self.view_names, labels_names=self.labels_names) - dataset_object.init_attrs() self.assertEqual(dataset_object.nb_view, 3) def test_get_label_names(self): dataset_object = dataset.RAMDataset(views=self.views, - labels=self.labels, - are_sparse=self.are_sparse, - view_names=self.view_names, - labels_names=self.labels_names) + labels=self.labels, + are_sparse=self.are_sparse, + view_names=self.view_names, + labels_names=self.labels_names) shape = dataset_object.get_label_names() self.assertEqual(shape, ['0'.encode('utf-8'), '1'.encode('utf-8'), @@ -323,10 +335,10 @@ class TestRAMDataset(unittest.TestCase): np.testing.assert_array_equal(data, np.array([6, 7, 4, 3, 7, 7, 2])) data = dataset_object.get_v(0, None) np.testing.assert_array_equal(data, np.array([[6, 3, 7, 4, 6, 9, 2], - [6, 7, 4, 3, 7, 7, 2], - [5, 4, 1, 7, 5, 1, 4], - [0, 9, 5, 8, 0, 9, 2], - [6, 3, 8, 2, 4, 2, 6]])) + [6, 7, 4, 3, 7, 7, 2], + [5, 4, 1, 7, 5, 1, 4], + [0, 9, 5, 8, 0, 9, 2], + [6, 3, 8, 2, 4, 2, 6]])) def test_filter(self): dataset_object = dataset.RAMDataset(views=self.views, @@ -334,10 +346,10 @@ class TestRAMDataset(unittest.TestCase): are_sparse=self.are_sparse, view_names=self.view_names, labels_names=self.labels_names) - dataset_object.filter("", "", np.array([1,2]), ["ViewN0", "ViewN1"], - path=None) + dataset_object.filter("", "", np.array([1, 2]), ["ViewN0", "ViewN1"], + path=None) self.assertEqual(dataset_object.nb_view, 2) - self.assertEqual(dataset_object.labels.shape, (2,1)) + self.assertEqual(dataset_object.labels.shape, (2, 1)) def test_get_view_dict(self): dataset_object = dataset.RAMDataset(views=self.views, @@ -357,6 +369,7 @@ class TestRAMDataset(unittest.TestCase): n = dataset_object.get_name() self.assertEqual(n, None) + class Test_Functions(unittest.TestCase): @classmethod def setUpClass(cls): @@ -365,13 +378,14 @@ class Test_Functions(unittest.TestCase): cls.rs = np.random.RandomState(42) cls.nb_view = 3 cls.file_name = "test0.hdf5" - cls.nb_examples = 5 + cls.nb_samples = 5 cls.nb_attr = 7 cls.nb_class = 3 - cls.views = [cls.rs.randint(0, 10, size=(cls.nb_examples, cls.nb_attr)) + cls.views = [cls.rs.randint(0, 10, size=(cls.nb_samples, cls.nb_attr)) for _ in range(cls.nb_view)] - cls.labels = cls.rs.randint(0, cls.nb_class, cls.nb_examples) - cls.dataset_file = h5py.File(os.path.join(tmp_path, cls.file_name), "w") + cls.labels = cls.rs.randint(0, cls.nb_class, cls.nb_samples) + cls.dataset_file = h5py.File( + os.path.join(tmp_path, cls.file_name), "w") cls.view_names = ["ViewN" + str(index) for index in range(len(cls.views))] cls.are_sparse = [False for _ in cls.views] @@ -400,24 +414,17 @@ class Test_Functions(unittest.TestCase): rm_tmp() def test_datasets_already_exist(self): - self.assertEqual(True, dataset.datasets_already_exist(tmp_path, "test", 1)) + self.assertEqual( + True, dataset.datasets_already_exist( + tmp_path, "test", 1)) def test_init_multiple_datasets(self): dataset.init_multiple_datasets(tmp_path, "test0", 2) - self.assertTrue(os.path.isfile(os.path.join(tmp_path,'test00.hdf5'))) - dataset.delete_HDF5([{"args":{"pathf":tmp_path, "name":"test0"}}], + self.assertTrue(os.path.isfile(os.path.join(tmp_path, 'test00.hdf5'))) + dataset.delete_HDF5([{"args": {"pathf": tmp_path, "name": "test0"}}], 2, dataset.HDF5Dataset(hdf5_file=self.dataset_file)) - self.assertFalse(os.path.isfile(os.path.join(tmp_path,'test00.hdf5'))) - - - - - - - - - + self.assertFalse(os.path.isfile(os.path.join(tmp_path, 'test00.hdf5'))) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/summit/tests/test_utils/test_execution.py b/summit/tests/test_utils/test_execution.py index f981e432..f678dd4f 100644 --- a/summit/tests/test_utils/test_execution.py +++ b/summit/tests/test_utils/test_execution.py @@ -16,6 +16,7 @@ class Test_parseTheArgs(unittest.TestCase): def test_empty_args(self): args = execution.parse_the_args([]) + class Test_init_log_file(unittest.TestCase): @classmethod @@ -35,7 +36,15 @@ class Test_init_log_file(unittest.TestCase): label="No", result_directory=tmp_path, args={}) - self.assertTrue(res_dir.startswith(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))),"tmp_tests", "test_dataset", "started" ))) + self.assertTrue( + res_dir.startswith( + os.path.join( + os.path.dirname( + os.path.dirname( + os.path.realpath(__file__))), + "tmp_tests", + "test_dataset", + "started"))) def test_no_log(self): res_dir = execution.init_log_file(name="test_dataset", @@ -63,6 +72,7 @@ class Test_init_log_file(unittest.TestCase): os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "tmp_tests", "test_dataset", "debug_started"))) + class Test_gen_k_folds(unittest.TestCase): @classmethod @@ -108,12 +118,14 @@ class Test_init_views(unittest.TestCase): pass def test_simple(self): - views, views_indices, all_views = execution.init_views(test_dataset, ["ViewN1", "ViewN2"]) - self.assertEqual(views, ["ViewN1", "ViewN2"]) - self.assertEqual(views_indices, [1,2]) + views, views_indices, all_views = execution.init_views( + test_dataset, ["ViewN1", "ViewN2"]) + self.assertEqual(views, ["ViewN1", "ViewN2"]) + self.assertEqual(views_indices, [1, 2]) self.assertEqual(all_views, ["ViewN0", "ViewN1", "ViewN2"]) - views, views_indices, all_views = execution.init_views(test_dataset,None) + views, views_indices, all_views = execution.init_views( + test_dataset, None) self.assertEqual(views, ["ViewN0", "ViewN1", "ViewN2"]) self.assertEqual(views_indices, range(3)) self.assertEqual(all_views, ["ViewN0", "ViewN1", "ViewN2"]) @@ -129,8 +141,6 @@ class Test_find_dataset_names(unittest.TestCase): with open(os.path.join(tmp_path, "test1.txt"), "w") as file_stream: file_stream.write("test") - - @classmethod def tearDownClass(cls): rm_tmp() @@ -139,14 +149,21 @@ class Test_find_dataset_names(unittest.TestCase): path, names = execution.find_dataset_names(tmp_path, ".txt", ["test"]) self.assertEqual(path, tmp_path) self.assertEqual(names, ["test"]) - path, names = execution.find_dataset_names(tmp_path, ".txt", ["test", 'test1']) + path, names = execution.find_dataset_names( + tmp_path, ".txt", ["test", 'test1']) self.assertEqual(path, tmp_path) self.assertIn("test1", names) - path, names = execution.find_dataset_names("examples/data", ".hdf5", ["all"]) + path, names = execution.find_dataset_names( + "examples/data", ".hdf5", ["all"]) self.assertIn("doc_summit", names) - self.assertRaises(ValueError, execution.find_dataset_names, tmp_path+"test", ".txt", - ["test"]) - self.assertRaises(ValueError, execution.find_dataset_names, tmp_path, ".txt", ["ah"]) + self.assertRaises(ValueError, execution.find_dataset_names, tmp_path + "test", ".txt", + ["test"]) + self.assertRaises( + ValueError, + execution.find_dataset_names, + tmp_path, + ".txt", + ["ah"]) class Test_initStatsIterRandomStates(unittest.TestCase): @@ -219,18 +236,18 @@ class Test_initRandomState(unittest.TestCase): def test_random_state_42(self): randomState_42 = np.random.RandomState(42) randomState = execution.init_random_state("42", - tmp_path) - os.remove(tmp_path+"random_state.pickle") + tmp_path) + os.remove(tmp_path + "random_state.pickle") np.testing.assert_array_equal(randomState.beta(1, 100, 100), randomState_42.beta(1, 100, 100)) def test_random_state_pickle(self): randomState_to_pickle = execution.init_random_state(None, - tmp_path) + tmp_path) pickled_randomState = execution.init_random_state( - tmp_path+"random_state.pickle", + tmp_path + "random_state.pickle", tmp_path) - os.remove(tmp_path+"random_state.pickle") + os.remove(tmp_path + "random_state.pickle") np.testing.assert_array_equal(randomState_to_pickle.beta(1, 100, 100), pickled_randomState.beta(1, 100, 100)) @@ -282,7 +299,7 @@ class Test_genSplits(unittest.TestCase): def test_simple(self): splits = execution.gen_splits(self.labels, self.splitRatio, - self.statsIterRandomStates) + self.statsIterRandomStates) self.assertEqual(len(splits), 3) self.assertEqual(len(splits[1]), 2) self.assertEqual(type(splits[1][0]), np.ndarray) @@ -297,7 +314,7 @@ class Test_genSplits(unittest.TestCase): def test_genSplits_no_iter(self): splits = execution.gen_splits(self.labels, self.splitRatio, - self.statsIterRandomStates) + self.statsIterRandomStates) self.assertEqual(len(splits), 3) self.assertEqual(len(splits[0]), 2) self.assertEqual(type(splits[0][0]), np.ndarray) @@ -332,7 +349,7 @@ class Test_genDirecortiesNames(unittest.TestCase): def test_simple_ovo(cls): directories = execution.gen_direcorties_names(cls.directory, - cls.stats_iter) + cls.stats_iter) cls.assertEqual(len(directories), 5) cls.assertEqual(directories[0], os.path.join(tmp_path, "iter_1")) cls.assertEqual(directories[-1], os.path.join(tmp_path, "iter_5")) @@ -340,7 +357,7 @@ class Test_genDirecortiesNames(unittest.TestCase): def test_ovo_no_iter(cls): cls.stats_iter = 1 directories = execution.gen_direcorties_names(cls.directory, - cls.stats_iter) + cls.stats_iter) cls.assertEqual(len(directories), 1) cls.assertEqual(directories[0], tmp_path) @@ -357,5 +374,6 @@ class Test_genArgumentDictionaries(unittest.TestCase): cls.labelsCombinations = [[0, 1], [0, 2], [1, 2]] cls.indicesMulticlass = [[[[], []], [[], []], [[], []]], [[], [], []]] + if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/summit/tests/test_utils/test_hyper_parameter_search.py b/summit/tests/test_utils/test_hyper_parameter_search.py index 3275bc59..e1b848c5 100644 --- a/summit/tests/test_utils/test_hyper_parameter_search.py +++ b/summit/tests/test_utils/test_hyper_parameter_search.py @@ -15,9 +15,6 @@ from summit.multiview_platform.utils import hyper_parameter_search from summit.multiview_platform.multiview_classifiers import weighted_linear_early_fusion - - - class FakeEstim(BaseEstimator): def __init__(self, param1=None, param2=None, random_state=None): self.param1 = param1 @@ -32,42 +29,42 @@ class FakeEstim(BaseEstimator): def predict(self, X): return np.zeros(X.shape[0]) + class FakeEstimMV(BaseEstimator): def __init__(self, param1=None, param2=None): self.param1 = param1 self.param2 = param2 - def fit(self, X, y,train_indices=None, view_indices=None): + def fit(self, X, y, train_indices=None, view_indices=None): self.y = y return self - def predict(self, X, example_indices=None, view_indices=None): - if self.param1=="return exact": - return self.y[example_indices] + def predict(self, X, sample_indices=None, view_indices=None): + if self.param1 == "return exact": + return self.y[sample_indices] else: - return np.zeros(example_indices.shape[0]) - + return np.zeros(sample_indices.shape[0]) class Test_Random(unittest.TestCase): @classmethod def setUpClass(cls): - n_splits=2 + n_splits = 2 cls.estimator = FakeEstim() - cls.param_distributions = {"param1":[10,100], "param2":[11, 101]} + cls.param_distributions = {"param1": [10, 100], "param2": [11, 101]} cls.n_iter = 4 cls.refit = True cls.n_jobs = 1 cls.scoring = make_scorer(accuracy_score, ) cls.cv = StratifiedKFold(n_splits=n_splits, ) cls.random_state = np.random.RandomState(42) - cls.learning_indices = np.array([0,1,2, 3, 4,]) + cls.learning_indices = np.array([0, 1, 2, 3, 4, ]) cls.view_indices = None cls.framework = "monoview" cls.equivalent_draws = False - cls.X = cls.random_state.randint(0,100, (10,11)) - cls.y = cls.random_state.randint(0,2, 10) + cls.X = cls.random_state.randint(0, 100, (10, 11)) + cls.y = cls.random_state.randint(0, 2, 10) def test_simple(self): hyper_parameter_search.Random( @@ -91,8 +88,8 @@ class Test_Random(unittest.TestCase): equivalent_draws=self.equivalent_draws ) RSCV.fit(self.X, self.y, ) - tested_param1 = np.ma.masked_array(data=[10,10,100,100], - mask=[False, False, False, False]) + tested_param1 = np.ma.masked_array(data=[10, 10, 100, 100], + mask=[False, False, False, False]) np.testing.assert_array_equal(RSCV.cv_results_['param_param1'], tested_param1) @@ -111,7 +108,7 @@ class Test_Random(unittest.TestCase): self.assertEqual(RSCV.n_iter, self.n_iter) def test_fit_multiview_equiv(self): - self.n_iter=1 + self.n_iter = 1 RSCV = hyper_parameter_search.Random( FakeEstimMV(), self.param_distributions, n_iter=self.n_iter, refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, @@ -123,11 +120,11 @@ class Test_Random(unittest.TestCase): equivalent_draws=True ) RSCV.fit(test_dataset, self.y, ) - self.assertEqual(RSCV.n_iter, self.n_iter*test_dataset.nb_view) + self.assertEqual(RSCV.n_iter, self.n_iter * test_dataset.nb_view) def test_gets_good_params(self): self.param_distributions["param1"].append('return exact') - self.n_iter=6 + self.n_iter = 6 RSCV = hyper_parameter_search.Random( FakeEstimMV(), self.param_distributions, n_iter=self.n_iter, refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, @@ -146,7 +143,7 @@ class Test_Grid(unittest.TestCase): @classmethod def setUpClass(cls): - cls.parameter_grid = {"param1":[5,6], "param2":[7,8]} + cls.parameter_grid = {"param1": [5, 6], "param2": [7, 8]} cls.estimator = FakeEstim() def test_simple(self): diff --git a/summit/tests/test_utils/test_multiclass.py b/summit/tests/test_utils/test_multiclass.py index 3a9914c6..95e156ad 100644 --- a/summit/tests/test_utils/test_multiclass.py +++ b/summit/tests/test_utils/test_multiclass.py @@ -4,16 +4,18 @@ import numpy as np from sklearn.base import BaseEstimator from summit.multiview_platform.utils.multiclass import get_mc_estim, \ -OVRWrapper, OVOWrapper, MultiviewOVOWrapper, MultiviewOVRWrapper + OVRWrapper, OVOWrapper, MultiviewOVOWrapper, MultiviewOVRWrapper + class FakeMCEstim(BaseEstimator): def __init__(self): - self.short_name="short_name" + self.short_name = "short_name" def accepts_multi_class(self, random_state): return False + class FakeEstimNative(FakeMCEstim): def accepts_multi_class(self, random_state): @@ -38,24 +40,27 @@ class Test_get_mc_estim(unittest.TestCase): cls.y = cls.random_state.randint(0, 3, 10) def test_biclass(self): - y = self.random_state.randint(0,2,10) - estimator="Test" + y = self.random_state.randint(0, 2, 10) + estimator = "Test" returned_estimator = get_mc_estim(estimator, self.random_state, y=y) self.assertEqual(returned_estimator, estimator) def test_multiclass_native(self): estimator = FakeEstimNative() - returned_estimator = get_mc_estim(estimator, self.random_state, y=self.y) + returned_estimator = get_mc_estim( + estimator, self.random_state, y=self.y) self.assertIsInstance(returned_estimator, FakeEstimNative) def test_multiclass_ovo(self): estimator = FakeNonProbaEstim() - returned_estimator = get_mc_estim(estimator, self.random_state, y=self.y) + returned_estimator = get_mc_estim( + estimator, self.random_state, y=self.y) self.assertIsInstance(returned_estimator, OVOWrapper) def test_multiclass_ovr(self): estimator = FakeProbaEstim() - returned_estimator = get_mc_estim(estimator, self.random_state, y=self.y) + returned_estimator = get_mc_estim( + estimator, self.random_state, y=self.y) self.assertIsInstance(returned_estimator, OVRWrapper) def test_multiclass_ovo_multiview(self): @@ -70,6 +75,7 @@ class Test_get_mc_estim(unittest.TestCase): multiview=True, y=self.y,) self.assertIsInstance(returned_estimator, MultiviewOVRWrapper) + class FakeMVClassifier(BaseEstimator): def __init__(self, short_name="None"): @@ -79,17 +85,19 @@ class FakeMVClassifier(BaseEstimator): self.n_classes = np.unique(y[train_indices]).shape[0] self.views_indices = view_indices - def predict(self, X, example_indices=None, view_indices=None): - self.example_indices = example_indices + def predict(self, X, sample_indices=None, view_indices=None): + self.sample_indices = sample_indices self.views_indices = view_indices - return np.zeros((example_indices.shape[0])) + return np.zeros((sample_indices.shape[0])) + class FakeMVClassifierProb(FakeMVClassifier): - def predict_proba(self, X, example_indices=None, view_indices=None): - self.example_indices = example_indices + def predict_proba(self, X, sample_indices=None, view_indices=None): + self.sample_indices = sample_indices self.views_indices = view_indices - return np.zeros((example_indices.shape[0], 2)) + return np.zeros((sample_indices.shape[0], 2)) + class Test_MultiviewOVRWrapper_fit(unittest.TestCase): @@ -97,68 +105,70 @@ class Test_MultiviewOVRWrapper_fit(unittest.TestCase): def setUpClass(cls): cls.random_state = np.random.RandomState(42) cls.X = "dataset" - cls.n_classes=3 - cls.y = cls.random_state.randint(0,cls.n_classes,50) + cls.n_classes = 3 + cls.y = cls.random_state.randint(0, cls.n_classes, 50) cls.train_indices = np.arange(25) - cls.example_indices = np.arange(25)+25 - cls.view_indices="None" + cls.sample_indices = np.arange(25) + 25 + cls.view_indices = "None" cls.wrapper = MultiviewOVRWrapper(FakeMVClassifierProb(), ) def test_fit(self): fitted = self.wrapper.fit(self.X, self.y, train_indices=self.train_indices, view_indices=self.view_indices) for estimator in fitted.estimators_: - self.assertEqual(estimator.n_classes,2) + self.assertEqual(estimator.n_classes, 2) self.assertEqual(estimator.views_indices, "None") def test_predict(self): fitted = self.wrapper.fit(self.X, self.y, train_indices=self.train_indices, view_indices=self.view_indices) - pred = fitted.predict(self.X, example_indices=self.example_indices, - view_indices=self.view_indices) + pred = fitted.predict(self.X, sample_indices=self.sample_indices, + view_indices=self.view_indices) for estimator in fitted.estimators_: - np.testing.assert_array_equal(estimator.example_indices, - self.example_indices) + np.testing.assert_array_equal(estimator.sample_indices, + self.sample_indices) class FakeDset: - def __init__(self, n_examples): - self.n_examples = n_examples + def __init__(self, n_samples): + self.n_samples = n_samples + + def get_nb_samples(self): + return self.n_samples - def get_nb_examples(self): - return self.n_examples class Test_MultiviewOVOWrapper_fit(unittest.TestCase): @classmethod def setUpClass(cls): cls.random_state = np.random.RandomState(42) - cls.n_examples=50 - cls.X = FakeDset(n_examples=cls.n_examples) - cls.n_classes=3 - cls.y = cls.random_state.randint(0,cls.n_classes,cls.n_examples) - cls.train_indices = np.arange(int(cls.n_examples/2)) - cls.example_indices = np.arange(int(cls.n_examples/2))+int(cls.n_examples/2) - cls.view_indices="None" + cls.n_samples = 50 + cls.X = FakeDset(n_samples=cls.n_samples) + cls.n_classes = 3 + cls.y = cls.random_state.randint(0, cls.n_classes, cls.n_samples) + cls.train_indices = np.arange(int(cls.n_samples / 2)) + cls.sample_indices = np.arange( + int(cls.n_samples / 2)) + int(cls.n_samples / 2) + cls.view_indices = "None" cls.wrapper = MultiviewOVOWrapper(FakeMVClassifier(), ) def test_fit(self): fitted = self.wrapper.fit(self.X, self.y, train_indices=self.train_indices, view_indices=self.view_indices) for estimator in fitted.estimators_: - self.assertEqual(estimator.n_classes,2) + self.assertEqual(estimator.n_classes, 2) self.assertEqual(estimator.views_indices, "None") def test_predict(self): fitted = self.wrapper.fit(self.X, self.y, train_indices=self.train_indices, view_indices=self.view_indices) - pred = fitted.predict(self.X, example_indices=self.example_indices, - view_indices=self.view_indices) + pred = fitted.predict(self.X, sample_indices=self.sample_indices, + view_indices=self.view_indices) for estimator in fitted.estimators_: - np.testing.assert_array_equal(estimator.example_indices, - self.example_indices) + np.testing.assert_array_equal(estimator.sample_indices, + self.sample_indices) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/summit/tests/utils.py b/summit/tests/utils.py index 4559007b..ca4fe43a 100644 --- a/summit/tests/utils.py +++ b/summit/tests/utils.py @@ -5,9 +5,19 @@ import h5py from ..multiview_platform.utils.dataset import HDF5Dataset -tmp_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp_tests/") +tmp_path = os.path.join( + os.path.dirname( + os.path.abspath(__file__)), + "tmp_tests/") # TODO Convert to ram dataset -test_dataset = HDF5Dataset(hdf5_file=h5py.File(os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_database.hdf5"), "r")) +test_dataset = HDF5Dataset( + hdf5_file=h5py.File( + os.path.join( + os.path.dirname( + os.path.abspath(__file__)), + "test_database.hdf5"), + "r")) + def rm_tmp(path=tmp_path): try: @@ -17,16 +27,16 @@ def rm_tmp(path=tmp_path): else: os.remove(os.path.join(path, file_name)) os.rmdir(path) - except: + except BaseException: pass def gen_test_dataset(random_state=np.random.RandomState(42)): - dataset_file = h5py.File("test_database.hdf5", "w") + dataset_file = h5py.File("test_database.hdf5", "w") view_names = ["ViewN0", "ViewN1", "ViewN2"] - views = [random_state.randint(0,100,(5,6)) + views = [random_state.randint(0, 100, (5, 6)) for _ in range(len(view_names))] - labels = random_state.randint(0,2, 5) + labels = random_state.randint(0, 2, 5) label_names = ["yes", "no"] for view_index, (view_name, view) in enumerate( zip(view_names, views)): -- GitLab