Updated

74791924 · Baptiste Bauvin · 5950be9c · 74791924 · 74791924 · 74791924
Commit 74791924 authored Jun 11, 2021 by Baptiste Bauvin
--- a/README.rst
+++ b/README.rst
@@ -11,26 +11,93 @@
 |pipeline| |license| |coverage|
-Multiview Generator
+MAGE : Multi-view Artificial Generation Engine
-===================
+==============================================
-This package aims at generating customized mutliview datasets to facilitate the
+This package aims at generating customized mutli-view datasets to facilitate the
-development of new multiview algorithms and their testing on simulated data
+development of new multi-view algorithms and their testing on simulated data
 representing specific tasks.
-Understanding the concept
+Getting started
-------------------------
+---------------
-The main idea of the generator is to build several monoview sub-problems that
+This code has been originally developed on Ubuntu, but if the compatibility
+with Mac or Windows is mandatory for you, contact us so we adapt it.
+----------+-------------------+
+| Platform | Last positive test|
+==========+===================+
+|   Linux  |  |pipeline|       |
+----------+-------------------+
+| Mac      | Not verified yet  |
+----------+-------------------+
+| Windows  | Not verified yet  |
+----------+-------------------+
-.. image:: _static/fig_rec.png
+Prerequisites
-   :width: 100%
+<<<<<<<<<<<<<
-   :align: center
+To be able to use this project, you'll need :
-Structure
+* `Python 3 <https://docs.python.org/3/>`_
---------
-The class of intereset is located in ``generator/multiple_sub_problems.py`` and called ``MultiViewSubProblemsGenerator``.
-A demo is available in ``demo/demo.py`` and generates a 3D dataset, along with a figure that analyzes it.
+And the following python modules will be automatically installed  :
\ No newline at end of file
+* `numpy <http://www.numpy.org/>`_, `scipy <https://scipy.org/>`_,
+* `matplotlib <http://matplotlib.org/>`_ - Used to plot results,
+* `sklearn <http://scikit-learn.org/stable/>`_ - Used for the monoview classifiers,
+* `h5py <https://www.h5py.org>`_ - Used to generate HDF5 datasets on hard drive and use them to spare RAM,
+* `pandas <https://pandas.pydata.org/>`_ - Used to manipulate data efficiently,
+* `docutils <https://pypi.org/project/docutils/>`_ - Used to generate documentation,
+* `pyyaml <https://pypi.org/project/PyYAML/>`_ - Used to read the config files,
+* `plotly <https://plot.ly/>`_ - Used to generate interactive HTML visuals,
+* `tabulate <https://pypi.org/project/tabulate/>`_ - Used to generated the confusion matrix,
+* `jupyter <https://jupyter.org/>`_ - Used for the tutorials
+Installing
+<<<<<<<<<<
+Once you cloned the project from the `gitlab repository <https://gitlab.lis-lab.fr/dev/multiview_generator/>`_, you just have to use :
+.. code:: bash
+    cd path/to/multiview_generator/
+    pip3 install -e .
+In the `multiview_generator` directory to install MAGE and its dependencies.
+Running the tests
+<<<<<<<<<<<<<<<<<
+To run the test suite of MAGE, run :
+.. code:: bash
+    cd path/to/multiview_generator
+    pip install -e .[dev]
+    pytest
+The coverage report is automatically generated and stored in the ``htmlcov/`` directory
+Building the documentation
+<<<<<<<<<<<<<<<<<<<<<<<<<<
+To locally build the `documentation <https://dev.pages.lis-lab.fr/multiview_generator/>`_ run :
+.. code:: bash
+    cd path/to/multiview_generator
+    pip install -e .[doc]
+    python setup.py build_sphinx
+The locally built html files will be stored in ``path/to/multiview_generator/build/sphinx/html``
+Authors
+-------
+* **Baptiste BAUVIN**
+* **Dominique BENIELLI**
+* **Sokol Koço**
\ No newline at end of file
--- a/demo/tutorials/getting_started.ipynb
+++ b/demo/tutorials/getting_started.ipynb
--- a/demo/tutorials/sample_types.ipynb
+++ b/demo/tutorials/sample_types.ipynb
@@ -9,7 +9,7 @@
    }
   },
   "source": [
-    "# SMuDGE tutorial : the sample types \n",
+    "# MAGE tutorial : the sample types \n",
    "\n",
    "In this tutorial, we will learn how to generate a multiview dataset presenting :\n",
    "\n",
@@ -44,7 +44,7 @@
   },
   "outputs": [],
   "source": [
-    "from multiview_generator.multiple_sub_problems import MultiViewSubProblemsGenerator\n",
+    "from multiview_generator.gaussian_classes import MultiViewGaussianSubProblemsGenerator\n",
    "from tabulate import tabulate\n",
    "import numpy as np\n",
    "import os\n",
@@ -110,18 +110,9 @@
     "name": "#%% \n"
    }
   },
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[array([399, 399, 399, 399]), array([299, 399, 399, 399]), array([399, 333, 319, 299])]\n",
-      "400.0\n"
-     ]
-    }
-   ],
   "source": [
-    "generator = MultiViewSubProblemsGenerator(name=name, n_views=n_views, \n",
+    "generator = MultiViewGaussianSubProblemsGenerator(name=name, n_views=n_views, \n",
    "                                          n_classes=n_classes, \n",
    "                                          n_samples=n_samples, \n",
    "                                          n_features=n_features, \n",
@@ -132,7 +123,7 @@
    "                                          complementarity=complementarity, \n",
    "                                          mutual_error=mutual_error)\n",
    "\n",
-    "view_data, y = generator.generate_multi_view_dataset()"
+    "dataset, y = generator.generate_multi_view_dataset()"
   ]
  },
  {
@@ -147,7 +138,7 @@
    "\n",
    "## Dataset analysis using [SuMMIT](https://gitlab.lis-lab.fr/baptiste.bauvin/summit)\n",
    "\n",
-    "In order to differentiate them, we use `generator.example_ids`. In this attribute, we can find an array with the ids of all the generated exmaples, characterizing their type :"
+    "In order to differentiate them, we use `generator.sample_ids`. In this attribute, we can find an array with the ids of all the generated exmaples, characterizing their type :"
   ]
  },
  {
@@ -163,16 +154,16 @@
    {
     "data": {
      "text/plain": [
-       "['Complementary_193_1',\n",
+       "['0_l_0_m-0_0.37-1_0.04-2_0.27-3_0.81',\n",
-       " 'redundancy_56_2',\n",
+       " '1_l_0_m-0_0.48-1_1.28-2_0.28-3_0.55',\n",
-       " 'Complementary_64_0',\n",
+       " '2_l_0_m-0_0.96-1_0.32-2_0.08-3_0.56',\n",
-       " 'redundancy_26_1',\n",
+       " '3_l_0_m-0_2.49-1_0.18-2_0.97-3_0.35',\n",
-       " 'Complementary_141_2',\n",
+       " '4_l_0_m-0_0.11-1_0.92-2_0.21-3_0.4',\n",
-       " 'example_5',\n",
+       " '5_l_0_m-0_0.84-1_0.43-2_0.48-3_1.17',\n",
-       " 'redundancy_54_1',\n",
+       " '6_l_0_m-0_0.84-1_1.41-2_0.13-3_0.46',\n",
-       " 'Complementary_157_1',\n",
+       " '7_l_0_m-0_0.14-1_0.64-2_0.62-3_0.4',\n",
-       " 'example_8',\n",
+       " '8_l_0_m-0_0.04-1_0.31-2_0.63-3_0.21',\n",
-       " 'example_9']"
+       " '9_l_0_m-0_0.86-1_1.18-2_0.09-3_0.35']"
      ]
     },
     "execution_count": 4,
@@ -181,7 +172,7 @@
    }
   ],
   "source": [
-    "generator.example_ids[:10]"
+    "generator.sample_ids[:10]"
   ]
  },
  {
@@ -194,17 +185,17 @@
   "source": [
    "Here, we printed the 10 first ones, and we have : \n",
    "\n",
-    "* the redundant samples tagged `redundancy_`,\n",
+    "* the redundant samples tagged `_r-`,\n",
-    "* the mutual error ones tagged `mutual_error_`,\n",
+    "* the mutual error ones tagged `_m-`,\n",
-    "* the complementary ones tagged `complementary_` and\n",
+    "* the complementary ones tagged `_c-` and\n",
-    "* the filling ones tagged `example_`. \n",
+    "<!-- * the filling ones tagged `example_`.  -->\n",
    "\n",
    "To get a visualization on these properties, we will use SuMMIT with decision trees on each view. "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {
    "pycharm": {
     "is_executing": false,
@@ -213,7 +204,7 @@
   },
   "outputs": [],
   "source": [
-    "from multiview_platform.execute import execute  \n",
+    "from summit.execute import execute  \n",
    "\n",
    "generator.to_hdf5_mc('supplementary_material')\n",
    "execute(config_path=os.path.join('supplementary_material','config_summit.yml'))\n"
@@ -242,14 +233,14 @@
       "        <iframe\n",
       "            width=\"900\"\n",
       "            height=\"500\"\n",
-       "            src=\"supplementary_material/tuto/started_2020_04_29-09_36_/error_analysis_2D.html\"\n",
+       "            src=\"supplementary_material/tuto/started_2021_06_10-09_11_/error_analysis_2D.html\"\n",
       "            frameborder=\"0\"\n",
       "            allowfullscreen\n",
       "        ></iframe>\n",
       "        "
      ],
      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f71927ec9e8>"
+       "<IPython.lib.display.IFrame at 0x7f149d3a6f98>"
      ]
     },
     "execution_count": 7,

 %% Cell type:markdown id: tags:
-# SMuDGE tutorial : the sample types
+# MAGE tutorial : the sample types
 In this tutorial, we will learn how to generate a multiview dataset presenting :
 * redundancy,
 * complementarity and
 * mutual error.
 ## Definitions
 In this tutorial, will will denote a sample as
 * **Redundant** if all the views have enough information to classify it correctly without collaboration,
 * **Complementary** if only some of the views have enough information to classify it correctly without collaboration it is useful the assess the ability to extract the relevant information among the views.
 * Part of the **Mutual Error** if none of the views has enough information to classify it correctly without collaboration. A mutliview classifier able to classify these examples is apt to get information from several features from different views and combine it to classify the examples.
 ## Hands on experience : initialization
 We will initialize the arguments as earlier :
 %% Cell type:code id: tags:
 ``` python
-from multiview_generator.multiple_sub_problems import MultiViewSubProblemsGenerator
+from multiview_generator.gaussian_classes import MultiViewGaussianSubProblemsGenerator
 from tabulate import tabulate
 import numpy as np
 import os
 random_state = np.random.RandomState(42)
 name = "tuto"
 n_views = 4
 n_classes = 3
 error_matrix = [
   [0.4, 0.4, 0.4, 0.4],
   [0.55, 0.4, 0.4, 0.4],
   [0.4, 0.5, 0.52, 0.55]
 ]
 n_samples = 2000
 n_features = 3
 class_weights = [0.333, 0.333, 0.333,]
 ```
 %% Cell type:markdown id: tags:
 To control the three previously introduced characteristics, we have to provide three floats :
 %% Cell type:code id: tags:
 ``` python
 complementarity = 0.3
 redundancy = 0.2
 mutual_error = 0.1
 ```
 %% Cell type:markdown id: tags:
 Now we can generate the dataset with the given configuration.
 %% Cell type:code id: tags:
 ``` python
-generator = MultiViewSubProblemsGenerator(name=name, n_views=n_views,
+generator = MultiViewGaussianSubProblemsGenerator(name=name, n_views=n_views,
                                          n_classes=n_classes,
                                          n_samples=n_samples,
                                          n_features=n_features,
                                          class_weights=class_weights,
                                          error_matrix=error_matrix,
                                          random_state=random_state,
                                          redundancy=redundancy,
                                          complementarity=complementarity,
                                          mutual_error=mutual_error)
-view_data, y = generator.generate_multi_view_dataset()
+dataset, y = generator.generate_multi_view_dataset()
 ```
-%% Output
-    [array([399, 399, 399, 399]), array([299, 399, 399, 399]), array([399, 333, 319, 299])]
-    400.0
 %% Cell type:markdown id: tags:
 Here, the generator distinguishes four types of examples, the thrre previously introduced and the ones that were used to fill the dataset.
 ## Dataset analysis using [SuMMIT](https://gitlab.lis-lab.fr/baptiste.bauvin/summit)
-In order to differentiate them, we use `generator.example_ids`. In this attribute, we can find an array with the ids of all the generated exmaples, characterizing their type :
+In order to differentiate them, we use `generator.sample_ids`. In this attribute, we can find an array with the ids of all the generated exmaples, characterizing their type :
 %% Cell type:code id: tags:
 ``` python
-generator.example_ids[:10]
+generator.sample_ids[:10]
 ```
 %% Output
-    ['Complementary_193_1',
+    ['0_l_0_m-0_0.37-1_0.04-2_0.27-3_0.81',
-     'redundancy_56_2',
+     '1_l_0_m-0_0.48-1_1.28-2_0.28-3_0.55',
-     'Complementary_64_0',
+     '2_l_0_m-0_0.96-1_0.32-2_0.08-3_0.56',
-     'redundancy_26_1',
+     '3_l_0_m-0_2.49-1_0.18-2_0.97-3_0.35',
-     'Complementary_141_2',
+     '4_l_0_m-0_0.11-1_0.92-2_0.21-3_0.4',
-     'example_5',
+     '5_l_0_m-0_0.84-1_0.43-2_0.48-3_1.17',
-     'redundancy_54_1',
+     '6_l_0_m-0_0.84-1_1.41-2_0.13-3_0.46',
-     'Complementary_157_1',
+     '7_l_0_m-0_0.14-1_0.64-2_0.62-3_0.4',
-     'example_8',
+     '8_l_0_m-0_0.04-1_0.31-2_0.63-3_0.21',
-     'example_9']
+     '9_l_0_m-0_0.86-1_1.18-2_0.09-3_0.35']
 %% Cell type:markdown id: tags:
 Here, we printed the 10 first ones, and we have :
-* the redundant samples tagged `redundancy_`,
+* the redundant samples tagged `_r-`,
-* the mutual error ones tagged `mutual_error_`,
+* the mutual error ones tagged `_m-`,
-* the complementary ones tagged `complementary_` and
+* the complementary ones tagged `_c-` and
-* the filling ones tagged `example_`.
+<!-- * the filling ones tagged `example_`.  -->
 To get a visualization on these properties, we will use SuMMIT with decision trees on each view.
 %% Cell type:code id: tags:
 ``` python
-from multiview_platform.execute import execute
+from summit.execute import execute
 generator.to_hdf5_mc('supplementary_material')
 execute(config_path=os.path.join('supplementary_material','config_summit.yml'))
 ```
 %% Cell type:markdown id: tags:
 To extract the result, we need a small script that will fetch the right folder :
 %% Cell type:code id: tags:
 ``` python
 import os
 from datetime import datetime
 from IPython.display import display
 from IPython.display import IFrame
 def fetch_latest_dir(experiment_directories, latest_date=datetime(1560,12,25,12,12)):
    for experiment_directory in experiment_directories:
        experiment_time = experiment_directory.split("-")[0].split("_")[1:]
        experiment_time += experiment_directory.split('-')[1].split("_")[:2]
        experiment_time = map(int, experiment_time)
        dt = datetime(*experiment_time)
        if dt > latest_date:
            latest_date=dt
            latest_experiment_dir = experiment_directory
    return latest_experiment_dir
 experiment_directory = fetch_latest_dir(os.listdir(os.path.join('supplementary_material', 'tuto')))
 error_fig_path = os.path.join('supplementary_material','tuto', experiment_directory, "error_analysis_2D.html")
 IFrame(src=error_fig_path, width=900, height=500)
 ```
 %% Output
-    <IPython.lib.display.IFrame at 0x7f71927ec9e8>
+    <IPython.lib.display.IFrame at 0x7f149d3a6f98>
 %% Cell type:markdown id: tags:
 This graph represents the failure of each classifier on each sample. So a black rectangle on row i, column j means that classifier j always failed to classify example i.
 So, by [zooming in](link_to_gif), we can focus on several samples and we see that the type of samples are well defined as the mutual error ones are systematically misclassified by the decision trees, the redundant ones are well-classified and the complementary ones are classified only by a portion of the views.

--- a/demo/tutorials/sub_problems_configuration.ipynb
+++ b/demo/tutorials/sub_problems_configuration.ipynb
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -6,7 +6,7 @@
 Welcome to multiview_generator's documentation!
 ===============================================
-To install SMuDGE, clone the gitlab repository and run
+To install MAGE, clone the gitlab repository and run
 .. code-block::

--- a/multiview_generator/base.py
+++ b/multiview_generator/base.py
@@ -46,7 +46,6 @@ class MultiViewSubProblemsGenerator:
    :type n_classes: int
    :type n_views: int
    :type error_matrix: np.ndarray
-    :type latent_size_multiplicator: float
    :type n_features: int or array-like
    :type class_weights: float or array-like
    :type redundancy: float
@@ -60,7 +59,7 @@ class MultiViewSubProblemsGenerator:
    """
    def __init__(self, random_state=42, n_samples=100, n_classes=4, n_views=4,
-                 error_matrix=None, latent_size_multiplicator=2, n_features=3,
+                 error_matrix=None, n_features=3,
                 class_weights=1.0, redundancy=0.0, complementarity=0.0,
                 complementarity_level=3,
                 mutual_error=0.0, name="generated_dataset", config_file=None,
@@ -88,7 +87,6 @@ class MultiViewSubProblemsGenerator:
                                                type_needed=float).reshape(
                (n_classes, 1))
            self.complementarity_level = format_array(complementarity_level, n_classes, type_needed=int).reshape(((n_classes, 1)))
-            self.latent_size_mult = latent_size_multiplicator
            self._init_sub_problem_config(sub_problem_configurations,
                                          sub_problem_type)
            self.error_matrix = init_error_matrix(error_matrix, n_classes,
@@ -190,7 +188,7 @@ class MultiViewSubProblemsGenerator:
        report_string += "\n\n## Statistical analysis"
-        bayes_error = pd.DataFrame(self.bayes_error/self.n_samples_per_class,
+        bayes_error = pd.DataFrame(self.bayes_error,
                     columns=["Class " + str(i + 1)
                            for i in range(self.n_classes)],
                     index=['View ' + str(i + 1) for i in
@@ -211,8 +209,9 @@ class MultiViewSubProblemsGenerator:
        report_string += tabulate(dt_error, headers='keys', tablefmt='github')
-        self._plot_2d_error(output_path, error=self.error_2D, name="report_bayesian_error_2D.html")
+        if save:
-        self._plot_2d_error(output_path, error=self.error_2D_dt, name="report_dt_error_2D.html")
+            self._plot_2d_error(output_path, error=self.error_2D, file_name="report_bayesian_error_2D.html")
+            self._plot_2d_error(output_path, error=self.error_2D_dt, file_name="report_dt_error_2D.html")
        report_string += "\n\nThis report has been automatically generated on {}".format(datetime.now().strftime("%B %d, %Y at %H:%M:%S"))
        if save:
@@ -221,7 +220,7 @@ class MultiViewSubProblemsGenerator:
        self.report = report_string
        return report_string
-    def _plot_2d_error(self, output_path, error=None, name=""):
+    def _plot_2d_error(self, output_path, error=None, file_name=""):
        label_index_list = np.concatenate([np.where(self.y == i)[0] for i in
                                           np.unique(
                                               self.y)])
@@ -244,17 +243,19 @@ class MultiViewSubProblemsGenerator:
        fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
                          plot_bgcolor='rgba(0,0,0,0)')
        fig.update_xaxes(showticklabels=True, )
-        plotly.offline.plot(fig, filename=os.path.join(output_path, name),
+        plotly.offline.plot(fig, filename=os.path.join(output_path, self.name + file_name),
                            auto_open=False)
    def _gen_dt_error_mat(self, n_cv=10):
        # TODO : Seems to rely on random state, but unsure
        self.dt_error = np.zeros((self.n_classes, self.n_views))
        self.error_2D_dt = np.zeros((self.n_samples, self.n_views,))
+        self.dt_preds = np.zeros((self.n_samples, self.n_views,))
        classifiers = [generator.get_bayes_classifier() for generator in self._sub_problem_generators]
        for view_index, view_data in enumerate(self.dataset):
            pred = cross_val_predict(classifiers[view_index], view_data, self.y, cv=n_cv, )
+            self.dt_preds[:,view_index] = pred
            self.error_2D_dt[:, view_index] = np.equal(self.y, pred).astype(int)
            label_indices = [np.where(self.y == i)[0] for i in
                             range(self.n_classes)]

--- a/multiview_generator/base_strs.py
+++ b/multiview_generator/base_strs.py
-GENE = "SMuDGE"
+GENE = "MAGE"
-GENE_F = "Synthetic Multimodal Dataset Generation Engine"
+GENE_F = "Multiview Artificial Generation Engine"
 LINK = "https://gitlab.lis-lab.fr/dev/multiview_generator"
\ No newline at end of file
--- a/multiview_generator/gaussian_classes.py
+++ b/multiview_generator/gaussian_classes.py
+import numpy as np
+import itertools
+import math
+from scipy.special import erfinv
+from .utils import format_array, get_config_from_file, \
+    init_random_state, init_error_matrix, init_list
+from .base_strs import *
+from .base import MultiViewSubProblemsGenerator
+from multiview_generator import sub_problems
+class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator):
+    def __init__(self, random_state=42, n_samples=100, n_classes=4, n_views=4,
+                 error_matrix=None, n_features=3,
+                 class_weights=1.0, redundancy=0.05, complementarity=0.05,
+                 complementarity_level=3,
+                 mutual_error=0.01, name="generated_dataset", config_file=None,
+                 sub_problem_type="base", sub_problem_configurations=None,
+                 sub_problem_generators="StumpsGenerator", random_vertices=False
+                 , **kwargs):
+        """
+        :param random_state: int or np.random.RandomState object to fix the
+        random seed
+        :param n_samples: int representing the number of samples in the dataset
+        (the real number of samples can be different in the output dataset, as
+        it will depend on the class distribution of the samples)
+        :param n_classes: int the number of classes in the dataset
+        :param n_views: int the number of views in the dataset
+        :param error_matrix: the error matrix of size n_classes x n_views
+        :param n_features: list of int containing the number fo features for
+        each view
+        :param class_weights: list of floats containing the proportion of
+        samples in each class.
+        :param redundancy: float controlling the ratio of redundant samples
+        :param complementarity: float controlling the ratio of complementary
+        samples
+        :param complementarity_level: float controlling the ratio of views
+        having a good description of the complementary samples.
+        :param mutual_error: float controlling the ratio of complementary
+        samples
+        :param name: string naming the generated dataset
+        :param config_file: string path pointing to a yaml config file
+        :param sub_problem_type: list of string containing the class names for
+        each sub problem type
+        :param sub_problem_configurations: list of dict containing the specific
+        configuration for each sub-problem generator
+        :param kwargs: additional arguments
+        """
+        MultiViewSubProblemsGenerator.__init__(self, random_state=random_state,
+                                               n_samples=n_samples,
+                                               n_classes=n_classes,
+                                               n_views=n_views,
+                                               error_matrix=error_matrix,
+                                               n_features=n_features,
+                                               class_weights=class_weights,
+                                               redundancy=redundancy,
+                                               complementarity=complementarity,
+                                               complementarity_level=complementarity_level,
+                                               mutual_error=mutual_error,
+                                               name=name,
+                                               config_file=config_file,
+                                               sub_problem_type=sub_problem_type,
+                                               sub_problem_configurations=sub_problem_configurations,
+                                               **kwargs)
+        self.random_vertices = format_array(random_vertices, n_views, bool)
+        self.sub_problem_generators = format_array(sub_problem_generators, n_views, str)
+    def generate_multi_view_dataset(self, ):
+        """
+        This is the main method. It will generate a multiview dataset according
+        to the configuration.
+        To do so,
+        * it generates the labels of the multiview dataset,
+        * then it assigns all the subsets of samples (redundant, ...)
+        * finally, for each view it generates a monoview dataset according
+        to the configuration
+        :return: view_data a list containing the views np.ndarrays and y, the
+        label array.
+        """
+        # Generate the labels
+        self.error_2D = np.ones((self.n_samples, self.n_views))
+        # Generate the sample descriptions according to the error matrix
+        self._sub_problem_generators = [_ for _ in range(self.n_views)]
+        for view_index in range(self.n_views):
+            sub_problem_generator = getattr(sub_problems,
+                                            self.sub_problem_generators[view_index])(
+                n_classes=self.n_classes,
+                n_features=self.n_features[view_index],
+                random_vertices=self.random_vertices[view_index],
+                errors=self.error_matrix[:,view_index],
+                random_state=self.rs,
+                n_samples_per_class=self.n_samples_per_class,
+                **self.sub_problem_configurations[view_index])
+            vec = sub_problem_generator.gen_data()
+            self._sub_problem_generators[view_index] = sub_problem_generator
+            self.view_names[view_index] = "view_{}_{}".format(view_index, sub_problem_generator.view_name)
+            self.bayes_error[view_index, :] = sub_problem_generator.bayes_error/self.n_samples_per_class
+            self.generated_data[view_index, :, :,:self.n_features[view_index]] = vec
+            self.selected_vertices[view_index] = sub_problem_generator.selected_vertices
+            self.descriptions[view_index, :,:] = sub_problem_generator.descriptions
+        self.y = []
+        for ind, n_samples_ in enumerate(self.n_samples_per_class):
+            self.y += [ind for _ in range(n_samples_)]
+        self.y = np.array(self.y, dtype=int)
+        self.sample_ids = ["{}_l_{}".format(ind, self.y[ind]) for ind in
+                           range(self.n_samples)]
+        self.dataset = [np.zeros((self.n_total_samples,
+                                  self.n_features[view_index]))
+                        for view_index in range(self.n_views)]
+        self.assign_mutual_error()
+        self.assign_complementarity()
+        self.assign_redundancy()
+        self.get_distance()
+        return self.dataset, self.y
+    def assign_mutual_error(self):
+        """
+        Method assigning the mis-describing views to the mutual error samples.
+        """
+        for class_ind in range(self.n_classes):
+            mutual_start = np.sum(self.n_samples_per_class[:class_ind])
+            mutual_end = np.sum(self.n_samples_per_class[:class_ind])+self.mutual_error_per_class[class_ind]
+            for view_index in range(self.n_views):
+                if len(np.where(self.descriptions[view_index, class_ind, :]==-1)[0])<self.mutual_error_per_class[class_ind]:
+                    raise ValueError('For class {}, view {}, the amount of '
+                                     'available mis-described samples is {}, '
+                                     'and for mutual error to be assigned MAGE '
+                                     'needs {}, please reduce the amount of '
+                                     'mutual error or increase the error in '
+                                     'class {}, view {}'.format(class_ind,
+                                                                view_index,
+                                                                len(np.where(self.descriptions[view_index, class_ind, :]==-1)[0]),
+                                                                self.mutual_error_per_class[class_ind],
+                                                                class_ind,
+                                                                view_index))
+                mis_described_random_ind = self.rs.choice(np.where(self.descriptions[view_index, class_ind, :]==-1)[0], self.mutual_error_per_class[class_ind], replace=False)
+                self.dataset[view_index][mutual_start:mutual_end, :] = self.generated_data[view_index, class_ind, mis_described_random_ind, :self.n_features[view_index]]
+                self.error_2D[mutual_start:mutual_end, view_index] = 0
+                self.descriptions[view_index, class_ind, mis_described_random_ind] = 0
+            for sample_ind in np.arange(start=mutual_start, stop=mutual_end):
+                self.sample_ids[sample_ind] = self.sample_ids[sample_ind]+"_m"
+    def assign_complementarity(self):
+        """
+        Method assigning mis-described and well-described views to build
+        complementary samples
+        """
+        self.complementarity_ratio = 0
+        for class_ind in range(self.n_classes):
+            complem_level = int(self.complementarity_level[class_ind])
+            complem_start = np.sum(self.n_samples_per_class[:class_ind])+self.mutual_error_per_class[class_ind]
+            complem_ind = 0
+            while complem_level != 0:
+                avail_errors = np.array([len(np.where(self.descriptions[view_index, class_ind, :] ==-1)[0]) for view_index in range(self.n_views)])
+                avail_success = np.array([len(np.where(self.descriptions[view_index, class_ind, :] == 1)[0]) for view_index in range(self.n_views)])
+                cond=True
+                while cond:
+                    if np.sum(avail_errors) == 0 or np.sum(avail_success) < self.n_views - complem_level:
+                        cond = False
+                        break
+                    elif len(np.where(avail_errors > 0)[0]) < complem_level:
+                        cond = False
+                        break
+                    self.sample_ids[complem_start+complem_ind] += "_c"
+                    self.complementarity_ratio += 1/self.n_samples
+                    sorted_inds = np.argsort(-avail_errors)
+                    selected_failed_views = sorted_inds[:complem_level]
+                    sorted_inds = np.array([i for i in np.argsort(-avail_success) if
+                                            i not in selected_failed_views])
+                    selected_succeeded_views = sorted_inds[
+                                               :self.n_views - complem_level]
+                    for view_index in range(self.n_views):
+                        if view_index in selected_failed_views:
+                            self.error_2D[complem_start+complem_ind, view_index] = 0
+                            chosen_ind = int(self.rs.choice(np.where(self.descriptions[view_index, class_ind, :]==-1)[0],size=1, replace=False))
+                            self.dataset[view_index][complem_start+complem_ind, :] = self.generated_data[view_index, class_ind, chosen_ind, :self.n_features[view_index]]
+                            self.descriptions[view_index, class_ind, chosen_ind] = 0
+                            self.sample_ids[complem_start+complem_ind] += "_{}".format(view_index)
+                            avail_errors[view_index]-=1
+                        elif view_index in selected_succeeded_views:
+                            chosen_ind = int(self.rs.choice(np.where(self.descriptions[view_index, class_ind, :]==1)[0],size=1, replace=False))
+                            self.dataset[view_index][complem_start + complem_ind,:] = self.generated_data[view_index, class_ind, chosen_ind, :self.n_features[view_index]]
+                            self.descriptions[view_index, class_ind, chosen_ind] = 0
+                            avail_success[view_index] -= 1
+                    complem_ind += 1
+                complem_level -= 1
+            self.n_complem[class_ind] = complem_ind
+    def assign_redundancy(self):
+        """
+        Method assigning the well-describing views to the redundant samples.
+        """
+        self.real_redundancy_level=0
+        for class_ind in range(self.n_classes):
+            redun_start = int(np.sum(self.n_samples_per_class[:class_ind])+self.mutual_error_per_class[class_ind]+self.n_complem[class_ind])
+            redun_end = np.sum(self.n_samples_per_class[:class_ind+1])
+            for view_index in range(self.n_views):
+                if len(np.where(self.descriptions[view_index, class_ind, :] == 1)[0]) < redun_end - redun_start and len(np.where(self.descriptions[view_index, class_ind, :] == -1)[0])>0:
+                    raise ValueError("For class {}, view {}, reduce the error "
+                                     "(now: {}), or increase the complemetarity "
+                                     "level (now: {}), there is not enough good "
+                                     "descriptions with the current "
+                                     "configuration".format(class_ind,
+                                                            view_index,
+                                                            self.error_matrix[class_ind,
+                                                                              view_index],
+                                                            self.complementarity_level[class_ind]))
+                remaining_good_desc = np.where(self.descriptions[view_index, class_ind, :] == 1)[0]
+                self.dataset[view_index][redun_start:redun_end,:] = self.generated_data[view_index, class_ind,remaining_good_desc, :self.n_features[view_index]]
+                self.descriptions[view_index, class_ind, remaining_good_desc] = 0
+            for sample_ind in np.arange(start=redun_start, stop=redun_end):
+                self.sample_ids[sample_ind] = self.sample_ids[sample_ind] + "_r"
+                self.real_redundancy_level+=1/self.n_samples
+    def get_distance(self):
+        """
+        Method that records the distance of each description to the ideal
+        decision limit, will be used later to quantify more precisely the
+        quality of a description.
+        """
+        self.distances = np.zeros((self.n_views, self.n_samples))
+        for view_index, view_data in enumerate(self.dataset):
+            for sample_ind, data in enumerate(view_data):
+                # The closest dimension to the limit
+                dist = np.min(np.abs(data))
+                # dist = np.linalg.norm(data-self.selected_vertices[view_index][self.y[sample_ind]])
+                self.sample_ids[sample_ind] += "-{}_{}".format(view_index, round(dist, 2))
+                self.distances[view_index,sample_ind] = dist
+    def _get_generator_report(self, view_index, doc_type=".md"):
+        return "home made gaussian generator"
+    def _init_sub_problem_config(self, sub_problem_configs, sub_problem_type):
+        """
+        Initialize the sub problem configurations.
+        :param sub_problem_configs:
+        :param sub_problem_type:
+        :return:
+        """
+        if sub_problem_configs is None:
+            self.sub_problem_configurations = [
+                {"n_clusters_per_class": 1,
+                 "class_sep": 1.0, }
+                for _ in range(self.n_views)]
+        else:
+            self.sub_problem_configurations = init_list(sub_problem_configs,
+                                                        size=self.n_views,
+                                                        type_needed=dict)
+    def _init_base_arguments(self):
+        self.n_samples_per_class = (
+                self.class_weights * self.n_samples).astype(int)
+        self.n_max_samples = np.max(self.n_samples_per_class)
+        self.n_samples = np.sum(self.n_samples_per_class)
+        self.n_complem  =np.zeros(self.n_classes)
+        self.n_max_features = np.max(self.n_features)
+        self.generated_data = self.rs.uniform(low=-self.latent_size_mult, high=self.latent_size_mult, size=(self.n_views, self.n_classes, self.n_max_samples, self.n_max_features))
+        self.descriptions = np.zeros((self.n_views, self.n_classes, self.n_max_samples,))
+        self.n_total_samples = np.sum(self.n_samples_per_class)
+        sample_indices = np.arange(int(np.sum(self.n_samples_per_class)))
+        self.rs.shuffle(sample_indices)
+        self.class_sample_indices = [
+            sample_indices[sum(self.n_samples_per_class[:ind]):
+                            sum(self.n_samples_per_class[:ind + 1])]
+            for ind in range(self.n_classes)]
+        self.well_described = [[_ for _ in range(self.n_views)] for _ in
+                               range(self.n_classes)]
+        self.misdescribed = [[_ for _ in range(self.n_views)] for _ in
+                             range(self.n_classes)]
+        self.redundancy_indices = [_ for _ in range(self.n_classes)]
+        self.mutual_error_indices = [_ for _ in range(self.n_classes)]
+        self.complementarity_samples = [_ for _ in range(self.n_classes)]
+        self.good_views_indices = [_ for _ in range(self.n_classes)]
+        self.bad_views_indices = [_ for _ in range(self.n_classes)]
+        self.available_init_indices = self.class_sample_indices.copy()
+        self.sample_ids = ["sample_{}".format(ind)
+                            for ind
+                            in range(int(np.sum(self.n_samples_per_class)))]
+        self.bayes_error = np.zeros((self.n_views, self.n_classes))
+        self.sub_problems = [[] for _ in range(self.n_views)]
+        self.mutual_error_per_class = np.array(
+            [int(float(self.mutual_error[class_ind]) * n_sample_) for class_ind, n_sample_ in
+             enumerate(self.n_samples_per_class)])
+        self.redundancy_per_class = np.array(
+            [int(self.redundancy[class_ind] * n_sample_) for class_ind, n_sample_ in enumerate(self.n_samples_per_class)])
+        self.view_data = [np.zeros((self.n_samples, self.n_features[view_ind])) for view_ind in range(self.n_views)]
+        self.all_mis_described = [[] for _ in range(self.n_views)]
+        self.all_well_described = [[] for _ in range(self.n_views)]
+        self.selected_vertices = [_ for _ in range(self.n_views)]
+        self.avail_well_described = [[] for _ in range(self.n_views)]
+        self.avail_mis_described = [[] for _ in range(self.n_views)]
+        self.mutual_error_indices = [[] for _ in range(self.n_views)]
+        self.redundancy_indices = [[] for _ in range(self.n_views)]
+        self.complementarity_indices = [[[] for _ in range(self.n_classes)] for _
+                                   in
+                                   range(self.n_views)]
+        self.complem_names = [[] for _ in range(self.n_classes)]
+        self.complem_error = [[] for _ in range(self.n_classes)]
\ No newline at end of file
--- a/multiview_generator/sub_problems.py
+++ b/multiview_generator/sub_problems.py
@@ -5,9 +5,21 @@ from scipy.special import erfinv
 import yaml
-class BaseSubProblem():
+class BaseSubProblem:
+    """
+    The base class for all the sub-problem generators.
+    """
    def __init__(self, n_classes=2, n_features=2, random_vertices=True, errors=np.array([0.5,0.5]), random_state=np.random.RandomState(42), n_samples_per_class=np.array([100,100]), **configuration):
+        """
+        :param n_classes: The number of classes
+        :param n_features: The nuber of features describing the samples
+        :param errors: The error rate for each class
+        :param random_state: A numpy.random.RandomState object
+        :param n_samples_per_class: A list conatining the number of samples for each class
+        :param configuration: sub-problem specific configuration.
+        """
        self.n_classes = n_classes
        self.random_vertices = random_vertices
        self.errors = errors
@@ -20,6 +32,11 @@ class BaseSubProblem():
        self.view_name = "generated"
    def gen_report(self):
+        """
+        General method the generate the report on the view.
+        :return: A string containing the general report for the view
+        """
        view_string = "\n\nThis view is generated with {}, with the following configuration : \n```yaml\n".format(
            self.__class__.__name__)
        view_string += yaml.dump(self.config,
@@ -34,10 +51,16 @@ class StumpsGenerator(BaseSubProblem):
    def gen_data(self):
        """
        Generates the samples according to gaussian distributions with scales
-        computed with the given error and class separation
+        computed with the given error and class separation. This sub-problem is
+        easily understandable by a decision tree.
+        The features are built as :
+        relevant_features : the  math.ceil(math.log2(self.n_classes)) first ones,
+        uniform noise features : all the remaining ones
-        :param view_index:
+        :return: data a np.ndarray of dimension n_classes, n_samples_per_class,
-        :return:
+        n_features containing the samples' descriptions, sorted by class
        """
        self.n_relevant_features = math.ceil(math.log2(self.n_classes))
        self.view_name = "stumps"
@@ -83,6 +106,9 @@ class StumpsGenerator(BaseSubProblem):
        return data
    def gen_report(self):
+        """
+        Generates the specific report for StumpsGenerator.
+        """
        base_str = BaseSubProblem.gen_report(self)
        base_str += "\n\nThis view has {} features, among which {} are relevant for classification (they are the {} first columns of the view) the other are filled with uniform noise.".format(
            self.n_features, self.n_relevant_features, self.n_relevant_features)
@@ -91,18 +117,16 @@ class StumpsGenerator(BaseSubProblem):
    def get_bayes_classifier(self):
        from sklearn.tree import DecisionTreeClassifier
-        return DecisionTreeClassifier(max_depth=1)
+        return DecisionTreeClassifier(max_depth=math.ceil(math.log2(self.n_classes)))
 class TreesGenerator(BaseSubProblem):
-    """We stay with depth 2 trees ATM"""
+    """ Work in progress : Similar generator as StumpsGenerator, but that
+    generates several blobs per class """
    def gen_data(self):
        """
-        Generates the samples according to gaussian distributions with scales
+        WIP
-        computed with the given error and class separation
-        :param view_index:
-        :return:
        """
        self.n_relevant_features = math.ceil(math.log2(self.n_classes))
        self.view_name = "tree_depth_2"
@@ -142,7 +166,6 @@ class TreesGenerator(BaseSubProblem):
            # mis_described += list(np.unique(np.where(
            #     np.any(abs(vec[class_ind] - center_coord)>class_sep, axis=1))[0]))
-            # print(len(mis_described)*2/self.n_samples_per_class)
            n_samples_per_blob = int(self.n_samples_per_class[class_ind]/(self.n_relevant_features+1))
            external_error_percentage = self.n_relevant_features / (
                        self.n_relevant_features * 2 + self.n_relevant_features ** 2)
@@ -151,7 +174,6 @@ class TreesGenerator(BaseSubProblem):
                        1 / self.n_relevant_features) - 1)))
            cov = np.identity(
                self.n_relevant_features) * external_scale**2
-            # print(internal_scale, external_scale)
            for dim_index, update_coord in enumerate(center_coord):
                beg = n_samples+dim_index*n_samples_per_blob
                end = n_samples+(dim_index+1)*n_samples_per_blob
@@ -185,6 +207,9 @@ class TreesGenerator(BaseSubProblem):
        return data
    def gen_report(self):
+        """
+        WIP
+        """
        base_str = BaseSubProblem.gen_report(self)
        base_str += "\n\nThis view has {} features, among which {} are relevant for classification (they are the {} first columns of the view).".format(self.n_features, self.n_relevant_features, self.n_relevant_features)
        base_str += "\n\n Its empirical bayesian classifier is a decision tree of depth 3"
@@ -194,22 +219,27 @@ class TreesGenerator(BaseSubProblem):
        from sklearn.tree import DecisionTreeClassifier
        return DecisionTreeClassifier(max_depth=2)
 class RingsGenerator(BaseSubProblem):
    def gen_data(self):
        """
        Generates the samples according to gaussian distributions with scales
-        computed with the given error and class separation
+        computed with the given error and class separation. The generator first
+        computes a radius according to the gaussian distribution, then
+        generates n_features-1 random angles to build the polar coordinates of
+        the samples. The dataset returned is the cartesian version of this
+        "polar" dataset.
-        :param view_index:
+        :return: data a np.ndarray of dimension n_classes, n_samples_per_class,
-        :return:
+        n_features containing the samples' descriptions, sorted by class
        """
        if self.n_features<2:
            raise ValueError("n_features for view {} must be at least 2, (now: {})".format(1, self.n_features))
        self.view_name = "rings"
        data = np.zeros((self.n_classes, max(self.n_samples_per_class), self.n_features))
        class_sep = self.config["class_sep"]
-        vertices = (np.arange(self.n_classes)+2)*class_sep
+        vertices = (np.arange(self.n_classes)+1)*class_sep
        if self.random_vertices == True:
            selected_vertices = self.rs.choice(np.arange(len(vertices)),
@@ -222,8 +252,12 @@ class RingsGenerator(BaseSubProblem):
        for class_ind, center_coord in enumerate(
                self.selected_vertices):
            error = self.errors[class_ind]
+            if class_ind==0 or class_ind==self.n_classes-1:
                scale = ((class_sep/2) / math.sqrt(2)) *  (1 /
                    erfinv(1 - 2*error))
+            else:
+                scale = ((class_sep/2) / math.sqrt(2)) *  (1 /
+                    erfinv( 2*(1-error)**(1/2)-1))
            radii[class_ind, :] = self.rs.normal(center_coord, scale,
                                                 self.n_samples_per_class[
                                                     class_ind])
@@ -254,6 +288,9 @@ class RingsGenerator(BaseSubProblem):
        return data
    def gen_report(self):
+        """
+        Generates the specific report for StumpsGenerator.
+        """
        base_str = BaseSubProblem.gen_report(self)
        base_str += "\n\nThis view has {} features, all of them are relevant for classification.".format(
            self.n_features)
@@ -262,9 +299,13 @@ class RingsGenerator(BaseSubProblem):
    def get_bayes_classifier(self):
        from sklearn.svm import SVC
-        return SVC(kernel='rbf', gamma=0.1, C=0.001)
+        return SVC(kernel='rbf', gamma='scale', C=0.1)
 def to_cartesian(radius, angles):
+    """
+    Transforms polar coordinates to cartesian coordinates.
+    """
    a = np.concatenate((np.array([2 * np.pi]), angles))
    si = np.sin(a)
    si[0] = 1

--- a/multiview_generator/tests/test_multiple_sub_problems.py
+++ b/multiview_generator/tests/test_multiple_sub_problems.py
-import unittest
+# import unittest
-import numpy as np
+# import numpy as np
+#
-from ..multiple_sub_problems import MultiViewSubProblemsGenerator
+# from ..multiple_sub_problems import MultiViewSubProblemsGenerator
+#
+#
-class Test_MultiViewSubProblemsGenerator(unittest.TestCase):
+# class Test_MultiViewSubProblemsGenerator(unittest.TestCase):
+#
-    @classmethod
+#     @classmethod
-    def setUpClass(cls):
+#     def setUpClass(cls):
-        pass
+#         pass
+#
-    @classmethod
+#     @classmethod
-    def tearDownClass(cls):
+#     def tearDownClass(cls):
-        pass
+#         pass
+#
-    def test_simple(self):
+#     def test_simple(self):
-        gene = MultiViewSubProblemsGenerator()
+#         gene = MultiViewSubProblemsGenerator()
--- a/multiview_generator/utils.py
+++ b/multiview_generator/utils.py
@@ -36,26 +36,49 @@ def format_array(input, size, type_needed=int):
 def get_config_from_file(file_path):
+    """
+    Loads the configuration for the yaml config file
+    :param file_path: path to the config file.
+    :return:
+    """
    with open(file_path) as config_file:
        yaml_config = yaml.safe_load(config_file)
    return yaml_config
 def init_class_weights(class_weights, n_classes):
+    """
+    Initializes the class weights. Sets a unifrom distribution if no
+    distribution is specified.
+    :param class_weights:
+    :param n_classes:
+    :return:
+    """
    if class_weights is None:
        class_weights = np.ones(n_classes)
    return class_weights / np.sum(class_weights)
-def init_sub_problem_config(sub_problem_configs, n_views):
+# def init_sub_problem_config(sub_problem_configs, n_views):
-    if sub_problem_configs is None:
+#     if sub_problem_configs is None:
-        return [{"n_informative":1,
+#         return [{"n_informative":1,
-                 "n_redundant":1,
+#                  "n_redundant":1,
-                 "n_repeated":1,
+#                  "n_repeated":1,
-                 "n_clusters_per_class":1,
+#                  "n_clusters_per_class":1,
-                 "class_sep":1,} for _ in range(n_views)]
+#                  "class_sep":1,} for _ in range(n_views)]
 def init_error_matrix(error_matrix, n_classes, n_views):
+    """
+    Initializes the error matrix
+    :param error_matrix:
+    :param n_classes:
+    :param n_views:
+    :return:
+    """
    if error_matrix is None:
        error_matrix = np.zeros((n_classes, n_views)) + 0.3
    elif isinstance(error_matrix, np.ndarray):
@@ -78,6 +101,12 @@ def init_error_matrix(error_matrix, n_classes, n_views):
 def init_random_state(random_state):
+    """
+    Initalizes the random state.
+    :param random_state:
+    :return:
+    """
    if isinstance(random_state, int):
        rs = np.random.RandomState(random_state)
    elif isinstance(random_state, np.random.RandomState):
@@ -90,6 +119,14 @@ def init_random_state(random_state):
 def init_array_attr(attr, n_repeat, base_val=0):
+    """
+    Transforms a unique attribute into an array with the same value.
+    :param attr:
+    :param n_repeat:
+    :param base_val:
+    :return:
+    """
    if attr is None:
        return np.ones((n_repeat, 1)) * base_val
    elif type(attr) == float or type(attr) == int:
@@ -101,6 +138,14 @@ def init_array_attr(attr, n_repeat, base_val=0):
 def init_list(input, size, type_needed=dict):
+    """
+    Transforms a unique attribute into a list with the same value.
+    :param attr:
+    :param n_repeat:
+    :param base_val:
+    :return:
+    """
    if isinstance(input, type_needed):
        return [input for _ in range(size)]
    elif isinstance(input, list):

--- a/requirements.txt
+++ b/requirements.txt
-numpy
+.
-scipy
\ No newline at end of file
-scikit-learn>=0.19
-plotly
-h5py
-pyyaml
-jupyter
-tabulate
-pandas
-sphinx>=1.8
-numpydoc
-pandoc
-nbsphinx
-nbsphinx_link
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
@@ -143,7 +143,8 @@ def setup_package():
    version = get_version()
    multiview_generator_dir = 'multiview_generator'
    set_version(multiview_generator_dir, version)
-    description = 'A multiview dataset generator '
+    description = 'MAGE : Multi-view Artificial Generation Engine, a non-naïve ' \
+                  'multiview dataset generator '
    here = os.path.abspath(os.path.dirname(__file__))
    with open(os.path.join(here, 'README.rst'), encoding='utf-8') as readme:
        long_description = readme.read()
@@ -171,13 +172,15 @@ def setup_package():
        'Operating System :: POSIX :: Linux',
        'Operating System :: MacOS'],
    keywords = ('machine learning, supervised learning, classification, '
-                'ensemble methods, boosting, kernel')
+                'datat generation, multi-view, multi-modal, multi-class')
    packages = find_packages(exclude=['*.tests'])
-    install_requires = ['scikit-learn>=0.19', 'numpy', 'scipy', 'cvxopt' ]
+    install_requires = ['scikit-learn>=0.19', 'numpy', 'scipy', "plotly",
+                        "h5py", 'pyyaml', 'tabulate', 'pandas', ]
    python_requires = '>=3.5'
    extras_require = {
        'dev': ['pytest', 'pytest-cov'],
-        'doc': ['sphinx', 'numpydoc', 'sphinx_gallery', 'matplotlib']}
+        'doc': ['sphinx>=1.8', 'numpydoc', 'sphinx_gallery', 'matplotlib', "jupyter",
+                'pandoc', 'nbshpinx', 'nbsphinx_link']}
    include_package_data = True
    setup(name=name,