From 14ea72d88d7e90b5b25acb0ff16ffa7c568ad4f6 Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Thu, 17 Oct 2019 08:37:12 -0400
Subject: [PATCH] didi some doc

---
 .../utils/dataset.py                          | 102 +++++++++++++-----
 1 file changed, 77 insertions(+), 25 deletions(-)

diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py
index e5f204a9..6c930162 100644
--- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py
+++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py
@@ -13,22 +13,53 @@ from scipy import sparse
 
 class Dataset():
     """
-    Dataset
+    Class of Dataset
 
-    This class
+    This class is used to encapsulate the multiview dataset
 
 
     Parameters
     ----------
-    views
-    labels
-    are_sparse
-    file_name
-    view_names
-    path
-    hdf5_file
-    labels_names
-    is_temp
+    views : list of numpy arrays or None
+        The list containing each view of the dataset as a numpy array of shape
+        (nb examples, nb features).
+
+    labels : numpy array or None
+        The labels for the multiview dataset, of shape (nb examples, ).
+
+    are_sparse : list of bool, or None
+        The list of boolean telling if each view is sparse or not.
+
+    file_name : str, or None
+        The name of the hdf5 file that will be created to store the multiview
+        dataset.
+
+    view_names : list of str, or None
+        The name of each view.
+
+    path : str, or None
+        The path where the hdf5 dataset file will be stored
+
+    hdf5_file : h5py.File object, or None
+        If not None, the dataset will be imported directly from this file.
+
+    labels_names : list of str, or None
+        The name for each unique value of the labels given in labels.
+
+    is_temp : bool
+        Used if a temporary dataset has to be used by the benchmark.
+
+    Attributes
+    ----------
+    dataset : h5py.File object
+        The h5py file pbject that points to the hdf5 dataset on the disk.
+
+    nb_view : int
+        The number of views in the dataset.
+
+    view_dict : dict
+        The dictionnary with the name of each view as the keys and their indices
+         as values
     """
 
     # The following methods use hdf5
@@ -36,20 +67,6 @@ class Dataset():
     def __init__(self, views=None, labels=None, are_sparse=False,
                  file_name="dataset.hdf5", view_names=None, path="",
                  hdf5_file=None, labels_names=None, is_temp=False):
-        """
-
-        Parameters
-        ----------
-        views
-        labels
-        are_sparse
-        file_name
-        view_names
-        path
-        hdf5_file
-        labels_names
-        is_temp
-        """
         self.is_temp = False
         if hdf5_file is not None:
             self.dataset=hdf5_file
@@ -89,19 +106,54 @@ class Dataset():
             self.update_hdf5_dataset(os.path.join(path, file_name))
 
     def rm(self):
+        """
+        Method used to delete the dataset file on the disk if the dataset is
+        temporary.
+
+        Returns
+        -------
+
+        """
         filename = self.dataset.filename
         self.dataset.close()
         if self.is_temp:
             os.remove(filename)
 
     def get_view_name(self, view_idx):
+        """
+        Method to get a view's name for it's index.
+
+        Parameters
+        ----------
+        view_idx : int
+            The index of the view in the dataset
+
+        Returns
+        -------
+            The view's name.
+
+        """
         return self.dataset["View"+str(view_idx)].attrs["name"]
 
     def init_attrs(self):
+        """
+        Used to init the two attributes that are modified when self.dataset
+        changes
+
+        Returns
+        -------
+
+        """
         self.nb_view = self.dataset.get("Metadata").attrs["nbView"]
         self.view_dict = self.get_view_dict()
 
     def get_nb_examples(self):
+        """
+        Used to get the number of examples available
+        Returns
+        -------
+
+        """
         return self.dataset.get("Metadata").attrs["datasetLength"]
 
     def get_view_dict(self):
-- 
GitLab