Doc

4ed48096 · Baptiste Bauvin · 4d0493da · 4ed48096 · 4ed48096 · 4ed48096
Commit 4ed48096 authored Jun 11, 2021 by Baptiste Bauvin
--- a/demo/tutorials/getting_started.ipynb
+++ b/demo/tutorials/getting_started.ipynb
--- a/demo/tutorials/sample_types.ipynb
+++ b/demo/tutorials/sample_types.ipynb
@@ -190,12 +190,12 @@
    "* the complementary ones tagged `_c-` and\n",
    "<!-- * the filling ones tagged `example_`.  -->\n",
    "\n",
-    "To get a visualization on these properties, we will use SuMMIT with decision trees on each view. "
+    "To get a visualization on these properties, we will use  [SuMMIT](https://gitlab.lis-lab.fr/baptiste.bauvin/summit) with decision trees on each view. "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "metadata": {
    "pycharm": {
     "is_executing": false,
@@ -219,7 +219,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
   "metadata": {
    "pycharm": {
     "is_executing": false
@@ -233,17 +233,17 @@
       "        <iframe\n",
       "            width=\"900\"\n",
       "            height=\"500\"\n",
-       "            src=\"supplementary_material/tuto/started_2021_06_10-09_11_/error_analysis_2D.html\"\n",
+       "            src=\"supplementary_material/error_analysis_2D.html\"\n",
       "            frameborder=\"0\"\n",
       "            allowfullscreen\n",
       "        ></iframe>\n",
       "        "
      ],
      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f149d3a6f98>"
+       "<IPython.lib.display.IFrame at 0x7ff88f74e4a8>"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -268,7 +268,7 @@
    "experiment_directory = fetch_latest_dir(os.listdir(os.path.join('supplementary_material', 'tuto')))\n",
    "error_fig_path = os.path.join('supplementary_material','tuto', experiment_directory, \"error_analysis_2D.html\")\n",
    "\n",
-    "IFrame(src=error_fig_path, width=900, height=500)\n"
+    "IFrame(src=os.path.join('supplementary_material',\"error_analysis_2D.html\") , width=900, height=500)\n"
   ]
  },
  {
@@ -280,7 +280,7 @@
   },
   "source": [
    "This graph represents the failure of each classifier on each sample. So a black rectangle on row i, column j means that classifier j always failed to classify example i. \n",
-    "So, by [zooming in](link_to_gif), we can focus on several samples and we see that the type of samples are well defined as the mutual error ones are systematically misclassified by the decision trees, the redundant ones are well-classified and the complementary ones are classified only by a portion of the views. \n",
+    "So, by [zooming in](https://baptiste.bauvin.pages.lis-lab.fr/summit/_images/zoom_plotly.gif), we can focus on several samples and we see that the type of samples are well defined as the mutual error ones are systematically misclassified by the decision trees, the redundant ones are well-classified and the complementary ones are classified only by a portion of the views. \n",
    "  \n",
    "\n"
   ]

 %% Cell type:markdown id: tags:
 # MAGE tutorial : the sample types
 In this tutorial, we will learn how to generate a multiview dataset presenting :
 * redundancy,
 * complementarity and
 * mutual error.
 ## Definitions
 In this tutorial, will will denote a sample as
 * **Redundant** if all the views have enough information to classify it correctly without collaboration,
 * **Complementary** if only some of the views have enough information to classify it correctly without collaboration it is useful the assess the ability to extract the relevant information among the views.
 * Part of the **Mutual Error** if none of the views has enough information to classify it correctly without collaboration. A mutliview classifier able to classify these examples is apt to get information from several features from different views and combine it to classify the examples.
 ## Hands on experience : initialization
 We will initialize the arguments as earlier :
 %% Cell type:code id: tags:
 ``` python
 from multiview_generator.gaussian_classes import MultiViewGaussianSubProblemsGenerator
 from tabulate import tabulate
 import numpy as np
 import os
 random_state = np.random.RandomState(42)
 name = "tuto"
 n_views = 4
 n_classes = 3
 error_matrix = [
   [0.4, 0.4, 0.4, 0.4],
   [0.55, 0.4, 0.4, 0.4],
   [0.4, 0.5, 0.52, 0.55]
 ]
 n_samples = 2000
 n_features = 3
 class_weights = [0.333, 0.333, 0.333,]
 ```
 %% Cell type:markdown id: tags:
 To control the three previously introduced characteristics, we have to provide three floats :
 %% Cell type:code id: tags:
 ``` python
 complementarity = 0.3
 redundancy = 0.2
 mutual_error = 0.1
 ```
 %% Cell type:markdown id: tags:
 Now we can generate the dataset with the given configuration.
 %% Cell type:code id: tags:
 ``` python
 generator = MultiViewGaussianSubProblemsGenerator(name=name, n_views=n_views,
                                          n_classes=n_classes,
                                          n_samples=n_samples,
                                          n_features=n_features,
                                          class_weights=class_weights,
                                          error_matrix=error_matrix,
                                          random_state=random_state,
                                          redundancy=redundancy,
                                          complementarity=complementarity,
                                          mutual_error=mutual_error)
 dataset, y = generator.generate_multi_view_dataset()
 ```
 %% Cell type:markdown id: tags:
 Here, the generator distinguishes four types of examples, the thrre previously introduced and the ones that were used to fill the dataset.
 ## Dataset analysis using [SuMMIT](https://gitlab.lis-lab.fr/baptiste.bauvin/summit)
 In order to differentiate them, we use `generator.sample_ids`. In this attribute, we can find an array with the ids of all the generated exmaples, characterizing their type :
 %% Cell type:code id: tags:
 ``` python
 generator.sample_ids[:10]
 ```
 %% Output
    ['0_l_0_m-0_0.37-1_0.04-2_0.27-3_0.81',
     '1_l_0_m-0_0.48-1_1.28-2_0.28-3_0.55',
     '2_l_0_m-0_0.96-1_0.32-2_0.08-3_0.56',
     '3_l_0_m-0_2.49-1_0.18-2_0.97-3_0.35',
     '4_l_0_m-0_0.11-1_0.92-2_0.21-3_0.4',
     '5_l_0_m-0_0.84-1_0.43-2_0.48-3_1.17',
     '6_l_0_m-0_0.84-1_1.41-2_0.13-3_0.46',
     '7_l_0_m-0_0.14-1_0.64-2_0.62-3_0.4',
     '8_l_0_m-0_0.04-1_0.31-2_0.63-3_0.21',
     '9_l_0_m-0_0.86-1_1.18-2_0.09-3_0.35']
 %% Cell type:markdown id: tags:
 Here, we printed the 10 first ones, and we have :
 * the redundant samples tagged `_r-`,
 * the mutual error ones tagged `_m-`,
 * the complementary ones tagged `_c-` and
 <!-- * the filling ones tagged `example_`.  -->
-To get a visualization on these properties, we will use SuMMIT with decision trees on each view.
+To get a visualization on these properties, we will use  [SuMMIT](https://gitlab.lis-lab.fr/baptiste.bauvin/summit) with decision trees on each view.
 %% Cell type:code id: tags:
 ``` python
 from summit.execute import execute
 generator.to_hdf5_mc('supplementary_material')
 execute(config_path=os.path.join('supplementary_material','config_summit.yml'))
 ```
 %% Cell type:markdown id: tags:
 To extract the result, we need a small script that will fetch the right folder :
 %% Cell type:code id: tags:
 ``` python
 import os
 from datetime import datetime
 from IPython.display import display
 from IPython.display import IFrame
 def fetch_latest_dir(experiment_directories, latest_date=datetime(1560,12,25,12,12)):
    for experiment_directory in experiment_directories:
        experiment_time = experiment_directory.split("-")[0].split("_")[1:]
        experiment_time += experiment_directory.split('-')[1].split("_")[:2]
        experiment_time = map(int, experiment_time)
        dt = datetime(*experiment_time)
        if dt > latest_date:
            latest_date=dt
            latest_experiment_dir = experiment_directory
    return latest_experiment_dir
 experiment_directory = fetch_latest_dir(os.listdir(os.path.join('supplementary_material', 'tuto')))
 error_fig_path = os.path.join('supplementary_material','tuto', experiment_directory, "error_analysis_2D.html")
-IFrame(src=error_fig_path, width=900, height=500)
+IFrame(src=os.path.join('supplementary_material',"error_analysis_2D.html") , width=900, height=500)
 ```
 %% Output
-    <IPython.lib.display.IFrame at 0x7f149d3a6f98>
+    <IPython.lib.display.IFrame at 0x7ff88f74e4a8>
 %% Cell type:markdown id: tags:
 This graph represents the failure of each classifier on each sample. So a black rectangle on row i, column j means that classifier j always failed to classify example i.
-So, by [zooming in](link_to_gif), we can focus on several samples and we see that the type of samples are well defined as the mutual error ones are systematically misclassified by the decision trees, the redundant ones are well-classified and the complementary ones are classified only by a portion of the views.
+So, by [zooming in](https://baptiste.bauvin.pages.lis-lab.fr/summit/_images/zoom_plotly.gif), we can focus on several samples and we see that the type of samples are well defined as the mutual error ones are systematically misclassified by the decision trees, the redundant ones are well-classified and the complementary ones are classified only by a portion of the views.

--- a/demo/tutorials/supplementary_material/error_analysis_2D.html
+++ b/demo/tutorials/supplementary_material/error_analysis_2D.html
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -12,11 +12,18 @@
 #
 import os
 import sys
-sys.path.insert(0, os.path.abspath('../../multiview_generator'))
+repo_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+# print(repo_path)
+# print(os.path.join(repo_path, "multiview_generator",  "base"))
+# quit()
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(0, os.path.join(repo_path, "multiview_generator'"))
+sys.path.insert(0, repo_path)
 # -- Project information -----------------------------------------------------
-project = 'Mulitivew Generator'
+project = 'MAGE'
 copyright = '2020, Baptiste Bauvin'
 author = 'Baptiste Bauvin'
@@ -31,6 +38,7 @@ release = '0.0'
 # ones.
 extensions = ['sphinx.ext.autodoc',
              'sphinx.ext.extlinks',
+                'sphinx_rtd_theme',
 #              'sphinx.ext.doctest',
 #              'sphinx.ext.intersphinx',
 #              'sphinx.ext.todo',
@@ -42,11 +50,24 @@ extensions = ['sphinx.ext.autodoc',
 #              'sphinx.ext.viewcode',
 #              'sphinx.ext.githubpages',
               'sphinx.ext.napoleon',
+                "autoapi.extension",
              'nbsphinx',
              "nbsphinx_link"
               # 'm2r'
              ]
+autoapi_type = 'python'
+autoapi_dirs = [os.path.join(repo_path, "multiview_generator",""),]
+autoapi_options = ["members", "show-module-summary", 'undoc-members']
+autoapi_ignore = ["*tests*"]
+autoapi_keep_files = False
+autoapi_add_toctree_entry = False
+add_module_names = False
+autoapi_template_dir = os.path.join(repo_path, "docs", "source", "templates_autoapi")
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates', 'templates_autoapi']
 source_suffix = ['.rst', '.md', '.ipynb', ".nblink"]
 # Add any paths that contain templates here, relative to this directory.
@@ -63,12 +84,12 @@ exclude_patterns = ['_build', '**.ipynb_checkpoints']
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'nature'
+html_theme = 'sphinx_rtd_theme'
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ['_static',]
 rst_prolog = """
 .. role:: python(code)
@@ -77,18 +98,18 @@ rst_prolog = """
 .. role :: yaml(code)
    :language: yaml
-.. |gene| replace:: SMuDGE
+.. |gene| replace:: MAGE
-.. |gene_f| replace:: Supervised MUltimodal Dataset Generation Engine
+.. |gene_f| replace:: Multi-view Artificial Generation Engine
 .. |HPO| replace:: hyper-parameters optimization
 """
 extlinks = {'base_source': (
-'https://gitlab.lis-lab.fr/baptiste.bauvin/smudge/-/tree/master/',
+'https://gitlab.lis-lab.fr/dev/multiview_generator',
 "base_source"),
            'base_doc': (
-            'http://baptiste.bauvin.pages.lis-lab.fr/smudge/', 'base_doc'),
+            'https://dev.pages.lis-lab.fr/multiview_generator/', 'base_doc'),
            'summit':('https://gitlab.lis-lab.fr/baptiste.bauvin/summit', 'summit')}
 html_js_files = [

--- a/docs/source/documentation.rst
+++ b/docs/source/documentation.rst
 |gene| documentation
 ====================
-.. automodule:: multiple_sub_problems
+.. toctree::
+   :maxdepth: 2
-.. autoclass:: MultiViewSubProblemsGenerator
+   autoapi/multiview_generator/base/index
-    :members:
+   autoapi/multiview_generator/gaussian_classes/index
+   autoapi/multiview_generator/sub_problems/index
+   autoapi/multiview_generator/utils/index
\ No newline at end of file
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -3,10 +3,10 @@
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.
-Welcome to multiview_generator's documentation!
+Welcome to |gene|'s documentation
 ===============================================
-To install MAGE, clone the gitlab repository and run
+To install |gene|, clone the gitlab repository and run
 .. code-block::
@@ -24,8 +24,8 @@ To install MAGE, clone the gitlab repository and run
   include_tuto3
   documentation
-Read me
+Read Me
-=========
+=======
 .. include:: readme_link.rst

--- a/multiview_generator/gaussian_classes.py
+++ b/multiview_generator/gaussian_classes.py
 import numpy as np
 import itertools
 import math
@@ -22,7 +21,6 @@ class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator):
                 sub_problem_generators="StumpsGenerator", random_vertices=False,
                 min_rndm_val=-1, max_rndm_val=1, **kwargs):
        """
        :param random_state: int or np.random.RandomState object to fix the
        random seed
        :param n_samples: int representing the number of samples in the dataset
@@ -74,18 +72,15 @@ class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator):
    def generate_multi_view_dataset(self, ):
        """
-        This is the main method. It will generate a multiview dataset according
+        This is the main method. It will generate a multiview dataset according to the configuration.
-        to the configuration.
        To do so,
        * it generates the labels of the multiview dataset,
        * then it assigns all the subsets of samples (redundant, ...)
-        * finally, for each view it generates a monoview dataset according
+        * finally, for each view it generates a monoview dataset according to the configuration
-        to the configuration
-        :return: view_data a list containing the views np.ndarrays and y, the
+        :return: view_data a list containing the views np.ndarrays and y, the label array.
-        label array.
        """
        # Generate the labels

--- a/multiview_generator/sub_problems.py
+++ b/multiview_generator/sub_problems.py
@@ -59,7 +59,7 @@ class StumpsGenerator(BaseSubProblem):
        uniform noise features : all the remaining ones
-        :return: data a np.ndarray of dimension n_classes, n_samples_per_class,
+        :return: data a np.ndarray of dimension n_classes, n_samples_per_class, \
        n_features containing the samples' descriptions, sorted by class
        """
        self.n_relevant_features = math.ceil(math.log2(self.n_classes))
@@ -223,16 +223,14 @@ class TreesGenerator(BaseSubProblem):  # pragma: no cover
 class RingsGenerator(BaseSubProblem):
    def gen_data(self):
-        """
+        r"""Generates the samples according to gaussian distributions with scales
-        Generates the samples according to gaussian distributions with scales
        computed with the given error and class separation. The generator first
        computes a radius according to the gaussian distribution, then
        generates n_features-1 random angles to build the polar coordinates of
        the samples. The dataset returned is the cartesian version of this
        "polar" dataset.
-        :return: data a np.ndarray of dimension n_classes, n_samples_per_class,
+        :return: data a np.ndarray of dimension n_classes, n_samples_per_class, n_features containing the samples' descriptions, sorted by class
-        n_features containing the samples' descriptions, sorted by class
        """
        if self.n_features<2:
            raise ValueError("n_features for view {} must be at least 2, (now: {})".format(1, self.n_features))

--- a/setup.py
+++ b/setup.py
@@ -180,9 +180,11 @@ def setup_package():
    extras_require = {
        'dev': ['pytest', 'pytest-cov'],
        'doc': ['sphinx>=1.8', 'numpydoc', 'sphinx_gallery', 'matplotlib', "jupyter",
-                'pandoc', 'nbshpinx', 'nbsphinx_link']}
+                'pandoc', 'nbshpinx', 'nbsphinx_link', 'sphinx_rtd_theme']}
    include_package_data = True
+    command_options = {'build_sphinx': {'build_dir':('setup.py', './docs/build/')}}
    setup(name=name,
          version=version,
          description=description,
@@ -198,7 +200,8 @@ def setup_package():
          install_requires=install_requires,
          python_requires=python_requires,
          extras_require=extras_require,
-          include_package_data=include_package_data)
+          include_package_data=include_package_data,
+          command_options=command_options)
 if __name__ == "__main__":