From 7fb76c8d4e540db4312c0fcdfdcc10c8f02a63fc Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Wed, 13 Feb 2019 14:06:57 -0500
Subject: [PATCH] Proof verif done, added noise

---
 README.md                                     |  5 +-
 docs/source/conf.py                           | 29 ++++----
 docs/source/readme.rst                        |  6 +-
 docs/source/sphinxext/recommon.py             |  4 ++
 .../MonoMultiViewClassifiers/ExecClassif.py   |  2 +-
 .../Monoview/Additions/BoostUtils.py          | 10 +--
 .../Monoview/Additions/QarBoostUtils.py       | 72 ++++++++++++-------
 .../MonoviewClassifiers/CGreed.py             | 16 +++--
 .../utils/GetMultiviewDb.py                   | 70 +++++++++++++-----
 .../utils/execution.py                        | 15 ++--
 10 files changed, 155 insertions(+), 74 deletions(-)
 create mode 100644 docs/source/sphinxext/recommon.py

diff --git a/README.md b/README.md
index 47dad1a5..7782eb97 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,10 @@ With `top_directory` being the last directory in the `pathF` argument
 ##### If you already have an HDF5 dataset file it must be formatted as : 
 One dataset for each view called `ViewX` with `X` being the view index with 2 attribures : 
 * `attrs["name"]` a string for the name of the view
-* `attrs["name"]` a boolean specifying whether the view is sparse or not
+* `attrs["sparse"]` a boolean specifying whether the view is sparse or not
+* `attrs["ranges"]` a `np.array` containing the ranges of each attribute in the view (for ex. : for a pixel the range will be 255, for a real attribute in [-1,1], the range will be 2).
+* `attrs["limits"]` a `np.array` containing all the limits of the attributes int he view. (for ex. : for a pixel the limits will be `[0, 255]`, for a real attribute in [-1,1], the limits will be `[-1,1]`).
+ 
 
 One dataset for the labels called `Labels` with one attribute : 
 * `attrs["names"]` a list of strings encoded in utf-8 namig the labels in the right order
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2a8198dc..a62e70c9 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 from recommonmark.parser import CommonMarkParser
 from recommonmark.transform import AutoStructify
+# import os, sys
 #
 # MultiviewPlatform documentation build configuration file, created by
 # sphinx-quickstart on Mon Jan 29 17:13:09 2018.
@@ -31,6 +32,8 @@ from recommonmark.transform import AutoStructify
 
 add_module_names = False
 
+# sys.path.append(os.path.abspath('sphinxext'))
+
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
@@ -44,7 +47,9 @@ extensions = ['sphinx.ext.autodoc',
     'sphinx.ext.ifconfig',
     'sphinx.ext.viewcode',
     'sphinx.ext.githubpages',
-    'sphinx.ext.napoleon']
+    'sphinx.ext.napoleon',
+    'recommonmark']
+
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -52,19 +57,20 @@ templates_path = ['_templates']
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-source_suffix = ['.rst', '.md']
+source_suffix = {'.rst': 'restructuredtext', '.md':'markdown'}
 # source_suffix = '.rst'
+# source_suffix = ['.rst', '.md']
 
-source_parsers = {
-   '.md': CommonMarkParser,
-}
+# source_parsers = {
+#    '.md': CommonMarkParser,
+# }
 
 # The master toctree document.
 master_doc = 'index'
 
 # General information about the project.
 project = u'MultiviewPlatform'
-copyright = u'2018, Baptiste BAUVIN'
+copyright = u'2019, Baptiste BAUVIN'
 author = u'Baptiste BAUVIN'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -176,9 +182,8 @@ texinfo_documents = [
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {'https://docs.python.org/': None}
 
-def setup(app):
-    app.add_config_value('recommonmark_config', {
-            'url_resolver': lambda url: github_doc_root + url,
-            'auto_toc_tree_section': 'Contents',
-            }, True)
-    app.add_transform(AutoStructify)
\ No newline at end of file
+# def setup(app):
+#     app.add_config_value('recommonmark_config', {
+#             'auto_toc_tree_section': 'Contents',
+#             }, True)
+#     app.add_transform(AutoStructify)
\ No newline at end of file
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index 8ba7870d..33481978 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -1,3 +1,7 @@
 Read me
 =======
-   .. include:: ../../README.md
\ No newline at end of file
+
+.. toctree::
+   :maxdepth: 1
+
+   ../../README.md
\ No newline at end of file
diff --git a/docs/source/sphinxext/recommon.py b/docs/source/sphinxext/recommon.py
new file mode 100644
index 00000000..6b1cb8c8
--- /dev/null
+++ b/docs/source/sphinxext/recommon.py
@@ -0,0 +1,4 @@
+from recommonmark.transform import AutoStructify
+
+def setup(app):
+    app.add_transform(AutoStructify)
\ No newline at end of file
diff --git a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py
index bf6e0b12..f445fbe3 100644
--- a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py
+++ b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py
@@ -460,7 +460,7 @@ def execClassif(arguments):
     getDatabase = execution.getDatabaseFunction(args.name,args.type)
 
     DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass,
-                                             args.CL_classes, randomState, args.full)
+                                             args.CL_classes, randomState, args.full, args.add_noise, args.noise_std)
 
     splits = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)
 
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
index 503bae21..43c92866 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
@@ -669,12 +669,13 @@ class ConvexProgram(object):
         return signs
 
 
-def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None):
+def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None, set="train"):
     if type(name) is not str:
         name = " ".join(name.getConfig().strip().split(" ")[:2])
     if bounds:
         f, ax = plt.subplots(nrows=1, ncols=1)
-        ax.set_title(name+" during train for "+classifier_name)
+        ax.set_ylim(bottom=0.0,top=1.0)
+        ax.set_title(name+" during "+set+" for "+classifier_name)
         x = np.arange(len(train_accuracies))
         scat = ax.scatter(x, np.array(train_accuracies), marker=".")
         if boosting_bound:
@@ -690,7 +691,8 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur
         plt.close()
     else:
         f, ax = plt.subplots(nrows=1, ncols=1)
-        ax.set_title(name+" during train for "+classifier_name)
+        ax.set_ylim(bottom=0.0, top=1.0)
+        ax.set_title(name + " during "+set+" for " + classifier_name)
         x = np.arange(len(train_accuracies))
         scat = ax.scatter(x, np.array(train_accuracies), marker=".", )
         ax.legend((scat,), (name,))
@@ -702,7 +704,7 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur
 class BaseBoost(object):
 
     def __init__(self):
-        self.n_stumps = 1
+        self.n_stumps = 10
 
     def _collect_probas(self, X):
         return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_])
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
index 2ca59563..e0ea8568 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
@@ -54,6 +54,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         self.train_time = 0
         self.train_shape = None
         self.step_decisions = None
+        self.step_prod = None
         self.n_max_iterations = n_max_iterations
         self.estimators_generator = estimators_generator
         self.self_complemented = self_complemented
@@ -73,13 +74,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                                        "n_stumps", "use_r", "c_bound_sol"]
 
     def set_params(self, **params):
-        self.self_complemented = params["self_complemented"]
-        self.twice_the_same = params["twice_the_same"]
-        self.c_bound_choice = params["c_bound_choice"]
-        self.random_start = params["random_start"]
+        # self.self_complemented = params["self_complemented"]
+        # self.twice_the_same = params["twice_the_same"]
+        # self.c_bound_choice = params["c_bound_choice"]
+        # self.random_start = params["random_start"]
         self.n_max_iterations = params["n_max_iterations"]
-        self.n_stumps = params["n_stumps_per_attribute"]
-        self.use_r = params["use_r"]
+        # self.n_stumps = params["n_stumps_per_attribute"]
+        # self.use_r = params["use_r"]
 
     def fit(self, X, y):
 
@@ -96,20 +97,23 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         self.n_total_examples = m
 
         self.init_boosting(m, formatted_y, y_kernel_matrix)
+
         self.break_cause = " the maximum number of iterations was attained."
 
         for k in range(min(n - 1,
                            self.n_max_iterations - 1 if self.n_max_iterations is not None else np.inf)):
 
+
             # Print dynamically the step and the error of the current classifier
             self.it = k
-            print(
-                "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound,
-                                                              self.bounds[-1] > self.train_metrics[-1],
-                                                          k + 2,
-                                                          self.n_max_iterations,
-                                                          self.voter_perfs[-1]),
-                end="\r")
+
+            # print(
+            #     "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound,
+            #                                                   self.bounds[-1] > self.train_metrics[-1],
+            #                                               k + 2,
+            #                                               self.n_max_iterations,
+            #                                               self.voter_perfs[-1]),
+            #     end="\r")
             sol, new_voter_index = self.choose_new_voter(y_kernel_matrix,
                                                          formatted_y)
 
@@ -125,8 +129,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
 
             self.update_example_weights(formatted_y)
 
+
             self.update_info_containers(formatted_y, voter_perf, k)
 
+
         self.nb_opposed_voters = self.check_opposed_voters()
         self.estimators_generator.estimators_ = \
         self.estimators_generator.estimators_[self.chosen_columns_]
@@ -155,7 +161,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         classification_matrix = self._binary_classification_matrix(X)
         self.step_predict(classification_matrix)
         margins = np.sum(classification_matrix * self.weights_, axis=1)
-        # print(margins)
         signs_array = np.array([int(x) for x in sign(margins)])
         signs_array[signs_array == -1] = 0
         end = time.time()
@@ -165,24 +170,18 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
     def step_predict(self, classification_matrix):
         if classification_matrix.shape != self.train_shape:
             self.step_decisions = np.zeros(classification_matrix.shape)
+            self.step_prod = np.zeros(classification_matrix.shape)
             for weight_index in range(self.weights_.shape[0]-1):
                 margins = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1)
-                # print(margins)
                 signs_array = np.array([int(x) for x in sign(margins)])
                 signs_array[signs_array == -1] = 0
                 self.step_decisions[:, weight_index] = signs_array
+                self.step_prod[:, weight_index] = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1)
 
     def update_info_containers(self, y, voter_perf, k):
         """Is used at each iteration to compute and store all the needed quantities for later analysis"""
         self.example_weights_.append(self.example_weights)
-        m =  self.new_voter.shape[0]
-        t = np.sum(self.previous_vote * self.new_voter)/m
-        print(np.linalg.norm(self.previous_vote)>1)
-        # if abs((g_g*f2*(2*g_f+self.q*g_g))/(g_f**2*(2*d_fg+self.q * m)))<=1:
-        #     print((g_g*f2*(2*g_f+self.q*g_g))/(g_f**2*(2*d_fg+self.q * m)))
-        # print((g_g*f2*(2*g_f+self.q*g_g))/(g_f**2*(2*d_fg+self.q * m))>=1)
         self.previous_vote += self.q * self.new_voter
-
         self.previous_votes.append(self.previous_vote)
         self.previous_margins.append(
             np.multiply(y, self.previous_vote))
@@ -226,6 +225,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
     def append_new_voter(self, new_voter_index):
         """Used to append the voter to the majority vote"""
         self.chosen_columns_.append(new_voter_index)
+        # print((self.classification_matrix[:, new_voter_index] == self.chosen_one).all())
         self.new_voter = self.classification_matrix[:, new_voter_index].reshape(
             (self.n_total_examples, 1))
 
@@ -244,6 +244,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         self.example_weights = self._initialize_alphas(m).reshape((m, 1))
 
         self.example_weights_.append(self.example_weights)
+
         if self.random_start:
             first_voter_index = self.random_state.choice(
                 np.where(np.sum(y_kernel_matrix, axis=0)>0)[0])
@@ -252,8 +253,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                 y_kernel_matrix)
 
         self.chosen_columns_.append(first_voter_index)
-        self.new_voter = self.classification_matrix[:,
-                         first_voter_index].reshape((m, 1))
+        self.new_voter = np.array(self.classification_matrix[:,
+                         first_voter_index].reshape((m, 1)), copy=True)
 
         self.previous_vote = self.new_voter
 
@@ -276,6 +277,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         self.update_example_weights(y)
         self.example_weights_.append(self.example_weights)
 
+
         self.previous_margins.append(
             np.multiply(y, self.previous_vote))
         self.selected_margins.append(np.sum(np.multiply(y, self.previous_vote)))
@@ -293,6 +295,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
 
         self.bounds.append(bound)
 
+
+
     def format_X_y(self, X, y):
         """Formats the data  : X -the examples- and y -the labels- to be used properly by the algorithm """
         if scipy.sparse.issparse(X):
@@ -315,6 +319,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
 
         m, n = self.classification_matrix.shape
         y_kernel_matrix = np.multiply(y, self.classification_matrix)
+
         return m, n, y_kernel_matrix
 
     def init_info_containers(self):
@@ -374,7 +379,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         and select the one that has the smallest minimum"""
         m = y_kernel_matrix.shape[0]
         weighted_previous_sum = np.multiply(y,
-                                            self.previous_vote.reshape((m, 1)))
+                                            self.previous_vote.reshape(m,1))
         margin_old = np.sum(weighted_previous_sum)
         if self.c_bound_sol:
             weighted_hypothesis = y_kernel_matrix
@@ -384,7 +389,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         bad_margins = np.where(np.sum(weighted_hypothesis, axis=0)<=0.0)[0]
 
         self.B2 = m
-        self.B1s = np.sum(2 * (weighted_previous_sum * weighted_hypothesis),
+        self.B1s = np.sum(2 * np.multiply(weighted_previous_sum, weighted_hypothesis),
                           axis=0)
         self.B0 = np.sum(weighted_previous_sum ** 2)
 
@@ -406,6 +411,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         self.margins.append(math.sqrt(self.A2s[best_hyp_index]/m))
         self.disagreements.append(0.5*self.B1s[best_hyp_index]/m)
 
+
         return sols[best_hyp_index], best_hyp_index
 
     def make_masked_c_bounds(self, sols, bad_margins):
@@ -453,8 +459,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         for step_index in range(self.step_decisions.shape[1]-1):
             step_metrics.append(self.plotted_metric.score(y_test, self.step_decisions[:, step_index]))
         step_metrics = np.array(step_metrics)
+        np.savetxt(directory + "step_test_metrics.csv", step_metrics, delimiter=',')
         get_accuracy_graph(step_metrics, self.__class__.__name__,
-                           directory + 'step_test_metrics.png', self.plotted_metric)
+                           directory + 'step_test_metrics.png', self.plotted_metric, set="test")
+        step_cbounds = []
+        for step_index in range(self.step_prod.shape[1]):
+            num = np.sum(y_test*self.step_prod[:, step_index])**2
+            den = np.sum((self.step_prod[:, step_index])**2)
+            step_cbounds.append(1-num/(den*self.step_prod.shape[0]))
+        step_cbounds = np.array(step_cbounds)
+        np.savetxt(directory + "step_test_c_bounds.csv", step_cbounds,
+                   delimiter=',')
+        get_accuracy_graph(step_cbounds, self.__class__.__name__,
+                           directory + 'step_test_c_bounds.png',
+                           "C_bound", set="test")
 
     def getInterpretQar(self, directory, y_test=None):
         self.directory = directory
diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py
index d9ecbf28..b2d7dcad 100644
--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py
@@ -1,23 +1,24 @@
-from ..Monoview.MonoviewUtils import BaseMonoviewClassifier
+from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint
 from ..Monoview.Additions.BoostUtils import getInterpretBase
 from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar
 
 
 class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
 
-    def __init__(self, random_state=None, **kwargs):
-        super(CGreed, self).__init__(n_max_iterations=500,
+    def __init__(self, random_state=None, n_max_iterations=500, n_stumps_per_attribute=10, **kwargs):
+        super(CGreed, self).__init__(n_max_iterations=n_max_iterations,
             random_state=random_state,
             self_complemented=True,
             twice_the_same=True,
             c_bound_choice=True,
             random_start=False,
-            n_stumps_per_attribute=10,
+            n_stumps_per_attribute=n_stumps_per_attribute,
             use_r=True,
             c_bound_sol=True
             )
-        self.param_names = []
-        self.distribs = []
+
+        self.param_names = ["n_max_iterations"]
+        self.distribs = [CustomRandint(low=1, high=500)]
         self.classed_params = []
         self.weird_strings = {}
 
@@ -34,7 +35,8 @@ class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier):
 
 def formatCmdArgs(args):
     """Used to format kwargs for the parsed args"""
-    kwargsDict = {}
+    kwargsDict = {"n_stumps_per_attribute":args.CGR_stumps,
+    "n_max_iterations":args.CGR_n_iter}
     return kwargsDict
 
 
diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
index 9a9f486f..0086ae5c 100644
--- a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
+++ b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
@@ -23,6 +23,8 @@ def copyHDF5(pathF, name, nbCores):
         newDataSet.close()
 
 
+
+
 def datasetsAlreadyExist(pathF, name, nbCores):
     """Used to check if it's necessary to copy datasets"""
     allDatasetExist = True
@@ -52,7 +54,7 @@ def makeMeNoisy(viewData, randomState, percentage=15):
     return noisyViewData
 
 
-def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", randomState=None, full=True, nbView=3,
+def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", randomState=None, full=True, add_noise=False, noise_std=0.15, nbView=3,
                        nbClass=2, datasetLength=34, randomStateInt=None):
     """Used to generate a plausible dataset to test the algorithms"""
     randomStateInt = 42
@@ -289,38 +291,74 @@ def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, dest
             newDset.attrs[key] = value
 
 
-def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False):
+def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, add_noise=False, noise_std=0.15):
     """Used to load a hdf5 database"""
     if full:
         datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
+        dataset_name = nameDB
         labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in
                                 enumerate(datasetFile.get("Labels").attrs["names"]))
-        return datasetFile, labelsDictionary
     else:
         askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in askedLabelsNames]
-        datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
-        fullLabels = datasetFile.get("Labels").value
-        temp_dataset = h5py.File(pathF+nameDB+"_temp_view_label_select.hdf5", "w")
-        datasetFile.copy("Metadata", temp_dataset)
+        baseDatasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
+        fullLabels = baseDatasetFile.get("Labels").value
+        datasetFile = h5py.File(pathF+nameDB+"_temp_view_label_select.hdf5", "w")
+        dataset_name = nameDB+"_temp_view_label_select"
+        baseDatasetFile.copy("Metadata", datasetFile)
         labelsSet = getClasses(fullLabels)
-        availableLabelsNames = list(datasetFile.get("Labels").attrs["names"])
+        availableLabelsNames = list(baseDatasetFile.get("Labels").attrs["names"])
         askedLabelsNames, askedLabelsNamesSet = fillLabelNames(NB_CLASS, askedLabelsNames,
                                                                randomState, availableLabelsNames)
 
         newLabels, newLabelsNames, usedIndices = filterLabels(labelsSet, askedLabelsNamesSet, fullLabels,
                                                               availableLabelsNames, askedLabelsNames)
-        temp_dataset.get("Metadata").attrs["datasetLength"] = len(usedIndices)
-        temp_dataset.get("Metadata").attrs["nbClass"] = NB_CLASS
-        temp_dataset.create_dataset("Labels", data=newLabels)
-        temp_dataset.get("Labels").attrs["names"] = newLabelsNames
-        filterViews(datasetFile, temp_dataset, views, usedIndices)
+        datasetFile.get("Metadata").attrs["datasetLength"] = len(usedIndices)
+        datasetFile.get("Metadata").attrs["nbClass"] = NB_CLASS
+        datasetFile.create_dataset("Labels", data=newLabels)
+        datasetFile.get("Labels").attrs["names"] = newLabelsNames
+        filterViews(baseDatasetFile, datasetFile, views, usedIndices)
 
         labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in
-                                enumerate(temp_dataset.get("Labels").attrs["names"]))
-        return temp_dataset, labelsDictionary
+                                enumerate(datasetFile.get("Labels").attrs["names"]))
+
+    if add_noise:
+        datasetFile = add_gaussian_noise(datasetFile, randomState, pathF, dataset_name, noise_std)
+    else:
+        pass
+    return datasetFile, labelsDictionary
+
+
+def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, noise_std=0.15):
+    """In this function, we add a guaussian noise centered in 0 with specified
+    std to each view, according to it's range (the noise will be
+    mutliplied by this range) and we crop the noisy signal according to the
+    view's attributes limits.
+    This is done by creating a new dataset, to keep clean data."""
+    noisy_dataset = h5py.File(path_f+dataset_name+"_noised.hdf5", "w")
+    dataset_file.copy("Metadata", noisy_dataset)
+    dataset_file.copy("Labels", noisy_dataset)
+    for view_index in range(dataset_file.get("Metadata").attrs["nbView"]):
+        dataset_file.copy("View"+str(view_index), noisy_dataset)
+    # dataset_file.close()
+    for view_index in range(noisy_dataset.get("Metadata").attrs["nbView"]):
+        view_name = "View" + str(view_index)
+        view_dset = noisy_dataset.get(view_name)
+        orig_shape = view_dset.value.shape
+        view_ranges = view_dset.attrs["ranges"]
+        view_limits = view_dset.attrs["limits"]
+        normal_dist = random_state.normal(0, noise_std, view_dset.value.shape)
+        noise = normal_dist*view_ranges
+        noised_data = view_dset.value+noise
+        noised_data = np.where(noised_data<view_limits[:,0], view_limits[:,0], noised_data)
+        noised_data = np.where(noised_data>view_limits[:,1], view_limits[:,1], noised_data)
+        noisy_dataset[view_name][...] = noised_data
+        final_shape = noised_data.shape
+    return noisy_dataset
+
+
 
 
-def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, delimiter=","):
+def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, add_noise=False, noise_std=0.15, delimiter=","):
     # TODO : Update this one
     labelsNames = np.genfromtxt(pathF + nameDB + "-labels-names.csv", dtype='str', delimiter=delimiter)
     datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w")
diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
index c0e0f072..1f0b1101 100644
--- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
+++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
@@ -43,6 +43,11 @@ def parseTheArgs(arguments):
     groupStandard.add_argument('-full', action='store_true', help='Use option to use full dataset and no labels or view filtering')
     groupStandard.add_argument('-debug', action='store_true',
                                help='Use option to bebug implemented algorithms')
+    groupStandard.add_argument('-add_noise', action='store_true',
+                               help='Use option to add noise to the data')
+    groupStandard.add_argument('--noise_std', metavar='FLOAT', action='store',
+                               help='The std of the gaussian noise that will be added to the data.',
+                               type=float, default=0.15)
 
 
     groupClass = parser.add_argument_group('Classification arguments')
@@ -172,11 +177,11 @@ def parseTheArgs(arguments):
     groupQarBoost.add_argument('--QarB_epsilon', metavar='FLOAT', type=float, action='store',
                                  help='Set the epsilon parameter for QarBoost', default=1e-08)
 
-    groupQarBoostv2 = parser.add_argument_group('QarBoostv2 arguments')
-    groupQarBoostv2.add_argument('--QarB2_mu', metavar='FLOAT', type=float, action='store',
-                               help='Set the mu parameter for QarBoostv2', default=0.001)
-    groupQarBoostv2.add_argument('--QarB2_epsilon', metavar='FLOAT', type=float, action='store',
-                                 help='Set the epsilon parameter for QarBoostv2', default=1e-08)
+    groupCGreed = parser.add_argument_group('CGreed arguments')
+    groupCGreed.add_argument('--CGR_stumps', metavar='INT', type=int, action='store',
+                               help='Set the n_stumps_per_attribute parameter for CGreed', default=1)
+    groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, action='store',
+                                 help='Set the n_max_iterations parameter for CGreed', default=100)
 
     groupQarBoostv3 = parser.add_argument_group('QarBoostv3 arguments')
     groupQarBoostv3.add_argument('--QarB3_mu', metavar='FLOAT', type=float, action='store',
-- 
GitLab