From 6e4d208ab7862d77ba386516950ec8aa94c7d4aa Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Tue, 3 Sep 2019 16:10:09 -0400
Subject: [PATCH] ECMLJ_expes

---
 .../MonoMultiViewClassifiers/ExecClassif.py   |  17 +-
 .../Monoview/Additions/BoostUtils.py          |   2 +-
 .../Monoview/Additions/CBBoostUtils.py        |  20 +-
 .../Monoview/Additions/CGDescUtils.py         |  40 +-
 .../Monoview/Additions/_custom_criterion.pyx  | 621 +++++++++++++++++-
 .../Monoview/ExecClassifMonoView.py           |   4 +-
 .../Monoview/ExportResults.py                 |  12 +-
 .../Monoview/MonoviewUtils.py                 |   2 +-
 .../MonoviewClassifiers/AdaboostPregen.py     |   1 +
 .../Multiview/ExecMultiview.py                |   4 +-
 .../ResultAnalysis.py                         |   6 +-
 .../utils/GetMultiviewDb.py                   |  47 +-
 .../utils/HyperParameterSearch.py             |   2 +-
 .../utils/execution.py                        |   2 +-
 14 files changed, 711 insertions(+), 69 deletions(-)

diff --git a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py
index e75934e9..bcc03aed 100644
--- a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py
+++ b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py
@@ -9,6 +9,7 @@ import matplotlib
 import itertools
 import numpy as np
 from joblib import Parallel, delayed
+from sklearn.tree import DecisionTreeClassifier
 
 matplotlib.use(
     'Agg')  # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure
@@ -183,15 +184,23 @@ def gen_multiple_kwargs_combinations(clKWARGS):
     keys = clKWARGS.keys()
     kwargs_combination = [dict((key, value) for key, value in zip(keys, values))
                           for values in values_cartesian_prod]
-    return kwargs_combination
+
+    reduce_dict = {DecisionTreeClassifier: "DT", }
+    reduced_listed_values = [
+        [_ if type(_) not in reduce_dict else reduce_dict[type(_)] for _ in
+         list_] for list_ in listed_values]
+    reduced_values_cartesian_prod = [_ for _ in itertools.product(*reduced_listed_values)]
+    reduced_kwargs_combination = [dict((key, value) for key, value in zip(keys, values))
+                          for values in reduced_values_cartesian_prod]
+    return kwargs_combination, reduced_kwargs_combination
 
 
 def gen_multiple_args_dictionnaries(nbClass, kwargsInit,
                                     classifier, viewName, viewIndex):
-    multiple_kwargs_list = gen_multiple_kwargs_combinations(kwargsInit[classifier + "KWARGSInit"])
+    multiple_kwargs_list, reduced_multiple_kwargs_list = gen_multiple_kwargs_combinations(kwargsInit[classifier + "KWARGSInit"])
     multiple_kwargs_dict = dict(
-        (classifier+"_"+"_".join(map(str,list(dictionary.values()))), dictionary)
-        for dictionary in multiple_kwargs_list)
+        (classifier+"_"+"_".join(map(str,list(reduced_dictionary.values()))), dictionary)
+        for reduced_dictionary, dictionary in zip(reduced_multiple_kwargs_list, multiple_kwargs_list ))
     args_dictionnaries = [{
                         "args": {classifier_name + "KWARGS": arguments,
                                  "feat": viewName,
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
index 5573f626..10f034b7 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
@@ -851,7 +851,7 @@ def get_accuracy_graph(plotted_data, classifier_name, file_name,
         # plt.tight_layout()
     else:
         ax.legend((scat,), (name,))
-    f.savefig(file_name)
+    f.savefig(file_name, transparent=True)
     plt.close()
 
 
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CBBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CBBoostUtils.py
index 3187fc8f..38b3ab87 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CBBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CBBoostUtils.py
@@ -17,11 +17,11 @@ from ... import Metrics
 # Used for CBBoost
 
 class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
-    def __init__(self, n_max_iterations=None, estimators_generator=None,
-                 random_state=42, self_complemented=True, twice_the_same=False,
-                 random_start=True, n_stumps=1, c_bound_sol=True,
-                 plotted_metric=Metrics.zero_one_loss, save_train_data=True,
-                 test_graph=True, mincq_tracking=True):
+    def __init__(self, n_max_iterations=100, estimators_generator="Stumps",
+                 random_state=42, self_complemented=True, twice_the_same=True,
+                 random_start=False, n_stumps=1, c_bound_sol=True,
+                 plotted_metric=Metrics.zero_one_loss, save_train_data=False,
+                 test_graph=True, mincq_tracking=False):
         super(CBBoostClassifier, self).__init__()
         r"""
 
@@ -240,16 +240,6 @@ class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
         self.new_voter = self.classification_matrix[:, new_voter_index].reshape(
             (self.n_total_examples, 1))
 
-    # def choose_new_voter(self, y_kernel_matrix, formatted_y):
-    #     """Used to choose the voter according to the specified criterion (margin or C-Bound"""
-    #     if self.c_bound_choice:
-    #         sol, new_voter_index = self._find_new_voter(y_kernel_matrix,
-    #                                                     formatted_y)
-    #     else:
-    #         new_voter_index, sol = self._find_best_weighted_margin(
-    #             y_kernel_matrix)
-    #     return sol, new_voter_index
-
     def init_boosting(self, m, y, y_kernel_matrix):
         """THis initialization corressponds to the first round of boosting with equal weights for each examples and the voter chosen by it's margin."""
 
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py
index cfc5765a..0fbc8b08 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py
@@ -22,7 +22,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
                  c_bound_choice=True, random_start=True,
                  n_stumps=1, use_r=True, c_bound_sol=True,
                  plotted_metric=Metrics.zero_one_loss, save_train_data=True,
-                 test_graph=True, mincq_tracking=True):
+                 test_graph=True, mincq_tracking=False):
         super(ColumnGenerationClassifierQar, self).__init__()
         r"""
 
@@ -104,15 +104,15 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
             # Print dynamically the step and the error of the current classifier
             self.it = k
 
-            print(
-                "Resp. bound : {}, {}; {}/{}, eps :{}, ".format(
-                    self.respected_bound,
-                    self.bounds[-1] > self.train_metrics[-1],
-                    k + 2,
-                    self.n_max_iterations,
-                    self.voter_perfs[-1],
-                ),
-                end="\r")
+            # print(
+            #     "Resp. bound : {}, {}; {}/{}, eps :{}, ".format(
+            #         self.respected_bound,
+            #         self.bounds[-1] > self.train_metrics[-1],
+            #         k + 2,
+            #         self.n_max_iterations,
+            #         self.voter_perfs[-1],
+            #     ),
+            #     end="\r")
             sol, new_voter_index = self.choose_new_voter(y_kernel_matrix,
                                                              formatted_y)
             if type(sol) == str:
@@ -132,8 +132,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
             self.raw_weights = self.weights_
             self.y_train = formatted_y
 
-        print(self.classification_matrix)
-        print(self.weights_, self.break_cause)
+        # print(self.classification_matrix)
+        # print(self.weights_, self.break_cause)
         self.weights_ = np.array(self.weights_)
         self.weights_ /= np.sum(self.weights_)
 
@@ -451,11 +451,26 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
         self.A2s = np.sum(weighted_hypothesis, axis=0) ** 2
         self.A1s = np.sum(weighted_hypothesis, axis=0) * margin_old * 2
         self.A0 = margin_old ** 2
+        import matplotlib.pyplot as plt
+        # plt.plot(self.A2s * 0.5 * self.B1s / m**3)
+        # plt.plot(np.array([margin_old/m for _ in range(len(self.A2s))]))
+        # plt.savefig("try.png")
+
+        # print("C2 < 0 :", np.where(np.array([margin_old/m for _ in range(len(self.A2s))]) < np.sqrt(self.A2s) * 0.5 * self.B1s / m**2)[0])
+        # print("C1 < 0 :", np.where(np.array([margin_old ** 2 / m for _ in range(
+        #     len(self.A2s))]) < self.A2s * self.B0 / m ** 2)[0])
+        # print("Double root:", np.where((0.5 * self.B1s / m)**2 * m > self.B0)[0])
+
 
         C2s = (self.A1s * self.B2 - self.A2s * self.B1s)
+        # print("Wrong C2 :" , np.where(C2s < 0)[0].shape, bad_margins.shape)
         C1s = 2 * (self.A0 * self.B2 - self.A2s * self.B0)
+        # print("Wrong C2 :", np.where(C1s < 0)[0].shape, bad_margins.shape)
         C0s = self.A0 * self.B1s - self.A1s * self.B0
 
+        # print(np.where(C2s==0))
+        # print(self.chosen_columns_)
+
         sols = np.zeros(C0s.shape) - 3
         # sols[np.where(C2s == 0)[0]] = C0s[np.where(C2s == 0)[0]] / C1s[np.where(C2s == 0)[0]]
         sols[np.where(C2s != 0)[0]] = (-C1s[np.where(C2s != 0)[0]] + np.sqrt(
@@ -469,7 +484,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
             return "No more pertinent voters", 0
         else:
             best_hyp_index = np.argmin(masked_c_bounds)
-
             self.c_bounds.append(masked_c_bounds[best_hyp_index])
             self.margins.append(math.sqrt(self.A2s[best_hyp_index] / m))
             self.disagreements.append(0.5 * self.B1s[best_hyp_index] / m)
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx
index 8e50ea22..f6deb43e 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx
@@ -1,3 +1,11 @@
+from sklearn.tree._criterion import ClassificationCriterion
+
+class Cbound(ClassificationCriterion):
+    def node_impurity(self):
+        pass
+
+
+
 # # cython: cdivision=True
 # # cython: boundscheck=False
 # # cython: wraparound=False
@@ -14,6 +22,22 @@
 # #          Jacob Schreiber <jmschreiber91@gmail.com>
 # #          Nelson Liu <nelson@nelsonliu.me>
 # #
+# # License: BSD 3 clause# cython: cdivision=True
+# # cython: boundscheck=False
+# # cython: wraparound=False
+#
+# # Authors: Gilles Louppe <g.louppe@gmail.com>
+# #          Peter Prettenhofer <peter.prettenhofer@gmail.com>
+# #          Brian Holt <bdholt1@gmail.com>
+# #          Noel Dawe <noel@dawe.me>
+# #          Satrajit Gosh <satrajit.ghosh@gmail.com>
+# #          Lars Buitinck
+# #          Arnaud Joly <arnaud.v.joly@gmail.com>
+# #          Joel Nothman <joel.nothman@gmail.com>
+# #          Fares Hedayati <fares.hedayati@gmail.com>
+# #          Jacob Schreiber <jmschreiber91@gmail.com>
+# #          Nelson Liu <nelson@nelsonliu.me>
+# #
 # # License: BSD 3 clause
 #
 # calloc
@@ -76,7 +100,602 @@
 # cdef
 #
 #
-# class CustomCriterion:
+# class CustomCriterion(Criterion):
+#     """Interface for impurity criteria.
+#     This object stores methods on how to calculate how good a split is using
+#     different metrics.
+#     """
+#
+#     def __dealloc__(self):
+#         """Destructor."""
+#
+#         free(self.sum_total)
+#         free(self.sum_left)
+#         free(self.sum_right)
+#
+#     def __getstate__(self):
+#         return {}
+#
+#     def __setstate__(self, d):
+#         pass
+#
+#     cdef
+#     int
+#     init(self, DOUBLE_t * y, SIZE_t
+#     y_stride, DOUBLE_t * sample_weight,
+#     double
+#     weighted_n_samples, SIZE_t * samples, SIZE_t
+#     start,
+#     SIZE_t
+#     end) nogil except -1:
+#     """Placeholder for a method which will initialize the criterion.
+#     Returns -1 in case of failure to allocate memory (and raise MemoryError)
+#     or 0 otherwise.
+#     Parameters
+#     ----------
+#     y : array-like, dtype=DOUBLE_t
+#         y is a buffer that can store values for n_outputs target variables
+#     y_stride : SIZE_t
+#         y_stride is used to index the kth output value as follows:
+#         y[i, k] = y[i * y_stride + k]
+#     sample_weight : array-like, dtype=DOUBLE_t
+#         The weight of each sample
+#     weighted_n_samples : DOUBLE_t
+#         The total weight of the samples being considered
+#     samples : array-like, dtype=DOUBLE_t
+#         Indices of the samples in X and y, where samples[start:end]
+#         correspond to the samples in this node
+#     start : SIZE_t
+#         The first sample to be used on this node
+#     end : SIZE_t
+#         The last sample used on this node
+#     """
+#
+#     pass
+# #
+# #     cdef int reset(self) nogil except -1:
+# #         """Reset the criterion at pos=start.
+# #         This method must be implemented by the subclass.
+# #         """
+# #
+# #         pass
+# #
+# #     cdef int reverse_reset(self) nogil except -1:
+# #         """Reset the criterion at pos=end.
+# #         This method must be implemented by the subclass.
+# #         """
+# #         pass
+# #
+# #     cdef int update(self, SIZE_t new_pos) nogil except -1:
+# #         """Updated statistics by moving samples[pos:new_pos] to the left child.
+# #         This updates the collected statistics by moving samples[pos:new_pos]
+# #         from the right child to the left child. It must be implemented by
+# #         the subclass.
+# #         Parameters
+# #         ----------
+# #         new_pos : SIZE_t
+# #             New starting index position of the samples in the right child
+# #         """
+# #
+# #         pass
+# #
+# #     cdef double node_impurity(self) nogil:
+# #         """Placeholder for calculating the impurity of the node.
+# #         Placeholder for a method which will evaluate the impurity of
+# #         the current node, i.e. the impurity of samples[start:end]. This is the
+# #         primary function of the criterion class.
+# #         """
+# #
+# #         pass
+# #
+# #     cdef void children_impurity(self, double* impurity_left,
+# #                                 double* impurity_right) nogil:
+# #         """Placeholder for calculating the impurity of children.
+# #         Placeholder for a method which evaluates the impurity in
+# #         children nodes, i.e. the impurity of samples[start:pos] + the impurity
+# #         of samples[pos:end].
+# #         Parameters
+# #         ----------
+# #         impurity_left : double pointer
+# #             The memory address where the impurity of the left child should be
+# #             stored.
+# #         impurity_right : double pointer
+# #             The memory address where the impurity of the right child should be
+# #             stored
+# #         """
+# #
+# #         pass
+# #
+# #     cdef void node_value(self, double* dest) nogil:
+# #         """Placeholder for storing the node value.
+# #         Placeholder for a method which will compute the node value
+# #         of samples[start:end] and save the value into dest.
+# #         Parameters
+# #         ----------
+# #         dest : double pointer
+# #             The memory address where the node value should be stored.
+# #         """
+# #
+# #         pass
+# #
+# #     cdef double proxy_impurity_improvement(self) nogil:
+# #         """Compute a proxy of the impurity reduction
+# #         This method is used to speed up the search for the best split.
+# #         It is a proxy quantity such that the split that maximizes this value
+# #         also maximizes the impurity improvement. It neglects all constant terms
+# #         of the impurity decrease for a given split.
+# #         The absolute impurity improvement is only computed by the
+# #         impurity_improvement method once the best split has been found.
+# #         """
+# #         cdef double impurity_left
+# #         cdef double impurity_right
+# #         self.children_impurity(&impurity_left, &impurity_right)
+# #
+# #         return (- self.weighted_n_right * impurity_right
+# #                 - self.weighted_n_left * impurity_left)
+# #
+# #     cdef double impurity_improvement(self, double impurity) nogil:
+# #         """Compute the improvement in impurity
+# #         This method computes the improvement in impurity when a split occurs.
+# #         The weighted impurity improvement equation is the following:
+# #             N_t / N * (impurity - N_t_R / N_t * right_impurity
+# #                                 - N_t_L / N_t * left_impurity)
+# #         where N is the total number of samples, N_t is the number of samples
+# #         at the current node, N_t_L is the number of samples in the left child,
+# #         and N_t_R is the number of samples in the right child,
+# #         Parameters
+# #         ----------
+# #         impurity : double
+# #             The initial impurity of the node before the split
+# #         Return
+# #         ------
+# #         double : improvement in impurity after the split occurs
+# #         """
+# #
+# #         cdef double impurity_left
+# #         cdef double impurity_right
+# #
+# #         self.children_impurity(&impurity_left, &impurity_right)
+# #
+# #         return ((self.weighted_n_node_samples / self.weighted_n_samples) *
+# #                 (impurity - (self.weighted_n_right /
+# #                              self.weighted_n_node_samples * impurity_right)
+# #                           - (self.weighted_n_left /
+# #                              self.weighted_n_node_samples * impurity_left)))
+# #
+# #
+# # cdef class CustomClassificationCriterion(Criterion):
+# #     """Abstract criterion for classification."""
+# #
+# #     def __cinit__(self, SIZE_t n_outputs,
+# #                   np.ndarray[SIZE_t, ndim=1] n_classes):
+# #         """Initialize attributes for this criterion.
+# #         Parameters
+# #         ----------
+# #         n_outputs : SIZE_t
+# #             The number of targets, the dimensionality of the prediction
+# #         n_classes : numpy.ndarray, dtype=SIZE_t
+# #             The number of unique classes in each target
+# #         """
+# #
+# #         self.y = NULL
+# #         self.y_stride = 0
+# #         self.sample_weight = NULL
+# #
+# #         self.samples = NULL
+# #         self.start = 0
+# #         self.pos = 0
+# #         self.end = 0
+# #
+# #         self.n_outputs = n_outputs
+# #         self.n_samples = 0
+# #         self.n_node_samples = 0
+# #         self.weighted_n_node_samples = 0.0
+# #         self.weighted_n_left = 0.0
+# #         self.weighted_n_right = 0.0
+# #
+# #         # Count labels for each output
+# #         self.sum_total = NULL
+# #         self.sum_left = NULL
+# #         self.sum_right = NULL
+# #         self.n_classes = NULL
+# #
+# #         safe_realloc(&self.n_classes, n_outputs)
+# #
+# #         cdef SIZE_t k = 0
+# #         cdef SIZE_t sum_stride = 0
+# #
+# #         # For each target, set the number of unique classes in that target,
+# #         # and also compute the maximal stride of all targets
+# #         for k in range(n_outputs):
+# #             self.n_classes[k] = n_classes[k]
+# #
+# #             if n_classes[k] > sum_stride:
+# #                 sum_stride = n_classes[k]
+# #
+# #         self.sum_stride = sum_stride
+# #
+# #         cdef SIZE_t n_elements = n_outputs * sum_stride
+# #         self.sum_total = <double*> calloc(n_elements, sizeof(double))
+# #         self.sum_left = <double*> calloc(n_elements, sizeof(double))
+# #         self.sum_right = <double*> calloc(n_elements, sizeof(double))
+# #
+# #         if (self.sum_total == NULL or
+# #                 self.sum_left == NULL or
+# #                 self.sum_right == NULL):
+# #             raise MemoryError()
+# #
+# #     def __dealloc__(self):
+# #         """Destructor."""
+# #         free(self.n_classes)
+# #
+# #     def __reduce__(self):
+# #         return (type(self),
+# #                 (self.n_outputs,
+# #                  sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)),
+# #                 self.__getstate__())
+# #
+# #     cdef int init(self, DOUBLE_t* y, SIZE_t y_stride,
+# #                   DOUBLE_t* sample_weight, double weighted_n_samples,
+# #                   SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1:
+# #         """Initialize the criterion at node samples[start:end] and
+# #         children samples[start:start] and samples[start:end].
+# #         Returns -1 in case of failure to allocate memory (and raise MemoryError)
+# #         or 0 otherwise.
+# #         Parameters
+# #         ----------
+# #         y : array-like, dtype=DOUBLE_t
+# #             The target stored as a buffer for memory efficiency
+# #         y_stride : SIZE_t
+# #             The stride between elements in the buffer, important if there
+# #             are multiple targets (multi-output)
+# #         sample_weight : array-like, dtype=DTYPE_t
+# #             The weight of each sample
+# #         weighted_n_samples : SIZE_t
+# #             The total weight of all samples
+# #         samples : array-like, dtype=SIZE_t
+# #             A mask on the samples, showing which ones we want to use
+# #         start : SIZE_t
+# #             The first sample to use in the mask
+# #         end : SIZE_t
+# #             The last sample to use in the mask
+# #         """
+# #
+# #         self.y = y
+# #         self.y_stride = y_stride
+# #         self.sample_weight = sample_weight
+# #         self.samples = samples
+# #         self.start = start
+# #         self.end = end
+# #         self.n_node_samples = end - start
+# #         self.weighted_n_samples = weighted_n_samples
+# #         self.weighted_n_node_samples = 0.0
+# #
+# #         cdef SIZE_t* n_classes = self.n_classes
+# #         cdef double* sum_total = self.sum_total
+# #
+# #         cdef SIZE_t i
+# #         cdef SIZE_t p
+# #         cdef SIZE_t k
+# #         cdef SIZE_t c
+# #         cdef DOUBLE_t w = 1.0
+# #         cdef SIZE_t offset = 0
+# #
+# #         for k in range(self.n_outputs):
+# #             memset(sum_total + offset, 0, n_classes[k] * sizeof(double))
+# #             offset += self.sum_stride
+# #
+# #         for p in range(start, end):
+# #             i = samples[p]
+# #
+# #             # w is originally set to be 1.0, meaning that if no sample weights
+# #             # are given, the default weight of each sample is 1.0
+# #             if sample_weight != NULL:
+# #                 w = sample_weight[i]
+# #
+# #             # Count weighted class frequency for each target
+# #             for k in range(self.n_outputs):
+# #                 c = <SIZE_t> y[i * y_stride + k]
+# #                 sum_total[k * self.sum_stride + c] += w
+# #
+# #             self.weighted_n_node_samples += w
+# #
+# #         # Reset to pos=start
+# #         self.reset()
+# #         return 0
+# #
+# #     cdef int reset(self) nogil except -1:
+# #         """Reset the criterion at pos=start
+# #         Returns -1 in case of failure to allocate memory (and raise MemoryError)
+# #         or 0 otherwise.
+# #         """
+# #         self.pos = self.start
+# #
+# #         self.weighted_n_left = 0.0
+# #         self.weighted_n_right = self.weighted_n_node_samples
+# #
+# #         cdef double* sum_total = self.sum_total
+# #         cdef double* sum_left = self.sum_left
+# #         cdef double* sum_right = self.sum_right
+# #
+# #         cdef SIZE_t* n_classes = self.n_classes
+# #         cdef SIZE_t k
+# #
+# #         for k in range(self.n_outputs):
+# #             memset(sum_left, 0, n_classes[k] * sizeof(double))
+# #             memcpy(sum_right, sum_total, n_classes[k] * sizeof(double))
+# #
+# #             sum_total += self.sum_stride
+# #             sum_left += self.sum_stride
+# #             sum_right += self.sum_stride
+# #         return 0
+# #
+# #     cdef int reverse_reset(self) nogil except -1:
+# #         """Reset the criterion at pos=end
+# #         Returns -1 in case of failure to allocate memory (and raise MemoryError)
+# #         or 0 otherwise.
+# #         """
+# #         self.pos = self.end
+# #
+# #         self.weighted_n_left = self.weighted_n_node_samples
+# #         self.weighted_n_right = 0.0
+# #
+# #         cdef double* sum_total = self.sum_total
+# #         cdef double* sum_left = self.sum_left
+# #         cdef double* sum_right = self.sum_right
+# #
+# #         cdef SIZE_t* n_classes = self.n_classes
+# #         cdef SIZE_t k
+# #
+# #         for k in range(self.n_outputs):
+# #             memset(sum_right, 0, n_classes[k] * sizeof(double))
+# #             memcpy(sum_left, sum_total, n_classes[k] * sizeof(double))
+# #
+# #             sum_total += self.sum_stride
+# #             sum_left += self.sum_stride
+# #             sum_right += self.sum_stride
+# #         return 0
+# #
+# #     cdef int update(self, SIZE_t new_pos) nogil except -1:
+# #         """Updated statistics by moving samples[pos:new_pos] to the left child.
+# #         Returns -1 in case of failure to allocate memory (and raise MemoryError)
+# #         or 0 otherwise.
+# #         Parameters
+# #         ----------
+# #         new_pos : SIZE_t
+# #             The new ending position for which to move samples from the right
+# #             child to the left child.
+# #         """
+# #         cdef DOUBLE_t* y = self.y
+# #         cdef SIZE_t pos = self.pos
+# #         cdef SIZE_t end = self.end
+# #
+# #         cdef double* sum_left = self.sum_left
+# #         cdef double* sum_right = self.sum_right
+# #         cdef double* sum_total = self.sum_total
+# #
+# #         cdef SIZE_t* n_classes = self.n_classes
+# #         cdef SIZE_t* samples = self.samples
+# #         cdef DOUBLE_t* sample_weight = self.sample_weight
+# #
+# #         cdef SIZE_t i
+# #         cdef SIZE_t p
+# #         cdef SIZE_t k
+# #         cdef SIZE_t c
+# #         cdef SIZE_t label_index
+# #         cdef DOUBLE_t w = 1.0
+# #
+# #         # Update statistics up to new_pos
+# #         #
+# #         # Given that
+# #         #   sum_left[x] +  sum_right[x] = sum_total[x]
+# #         # and that sum_total is known, we are going to update
+# #         # sum_left from the direction that require the least amount
+# #         # of computations, i.e. from pos to new_pos or from end to new_po.
+# #
+# #         if (new_pos - pos) <= (end - new_pos):
+# #             for p in range(pos, new_pos):
+# #                 i = samples[p]
+# #
+# #                 if sample_weight != NULL:
+# #                     w = sample_weight[i]
+# #
+# #                 for k in range(self.n_outputs):
+# #                     label_index = (k * self.sum_stride +
+# #                                    <SIZE_t> y[i * self.y_stride + k])
+# #                     sum_left[label_index] += w
+# #
+# #                 self.weighted_n_left += w
+# #
+# #         else:
+# #             self.reverse_reset()
+# #
+# #             for p in range(end - 1, new_pos - 1, -1):
+# #                 i = samples[p]
+# #
+# #                 if sample_weight != NULL:
+# #                     w = sample_weight[i]
+# #
+# #                 for k in range(self.n_outputs):
+# #                     label_index = (k * self.sum_stride +
+# #                                    <SIZE_t> y[i * self.y_stride + k])
+# #                     sum_left[label_index] -= w
+# #
+# #                 self.weighted_n_left -= w
+# #
+# #         # Update right part statistics
+# #         self.weighted_n_right = self.weighted_n_node_samples - self.weighted_n_left
+# #         for k in range(self.n_outputs):
+# #             for c in range(n_classes[k]):
+# #                 sum_right[c] = sum_total[c] - sum_left[c]
+# #
+# #             sum_right += self.sum_stride
+# #             sum_left += self.sum_stride
+# #             sum_total += self.sum_stride
+# #
+# #         self.pos = new_pos
+# #         return 0
+# #
+# #     cdef double node_impurity(self) nogil:
+# #         pass
+# #
+# #     cdef void children_impurity(self, double* impurity_left,
+# #                                 double* impurity_right) nogil:
+# #         pass
+# #
+# #     cdef void node_value(self, double* dest) nogil:
+# #         """Compute the node value of samples[start:end] and save it into dest.
+# #         Parameters
+# #         ----------
+# #         dest : double pointer
+# #             The memory address which we will save the node value into.
+# #         """
+# #
+# #         cdef double* sum_total = self.sum_total
+# #         cdef SIZE_t* n_classes = self.n_classes
+# #         cdef SIZE_t k
+# #
+# #         for k in range(self.n_outputs):
+# #             memcpy(dest, sum_total, n_classes[k] * sizeof(double))
+# #             dest += self.sum_stride
+# # sum_total += self.sum_stride
+# #
+# # cdef class CCriterion(CustomClassificationCriterion):
+# #     r"""Cross Entropy impurity criterion.
+# #     This handles cases where the target is a classification taking values
+# #     0, 1, ... K-2, K-1. If node m represents a region Rm with Nm observations,
+# #     then let
+# #         count_k = 1 / Nm \sum_{x_i in Rm} I(yi = k)
+# #     be the proportion of class k observations in node m.
+# #     The cross-entropy is then defined as
+# #         cross-entropy = -\sum_{k=0}^{K-1} count_k log(count_k)
+# #     """
+# #
+# #     cdef double node_impurity(self) nogil:
+# #         """Evaluate the impurity of the current node, i.e. the impurity of
+# #         samples[start:end], using the cross-entropy criterion."""
+# #
+# #         # cdef SIZE_t* n_classes = self.n_classes
+# #         # cdef double* sum_total = self.sum_total
+# #         # cdef double entropy = 0.0
+# #         # cdef double count_k
+# #         # cdef SIZE_t k
+# #         # cdef SIZE_t c
+# #         #
+# #         # for k in range(self.n_outputs):
+# #         #     for c in range(n_classes[k]):
+# #         #         count_k = sum_total[c]
+# #         #         if count_k > 0.0:
+# #         #             count_k /= self.weighted_n_node_samples
+# #         #             entropy -= count_k * log(count_k)
+# #         #
+# #         #     sum_total += self.sum_stride
+# #
+# #         return 1.0
+# #
+# #     cdef void children_impurity(self, double* impurity_left,
+# #                                 double* impurity_right) nogil:
+# #         """Evaluate the impurity in children nodes
+# #         i.e. the impurity of the left child (samples[start:pos]) and the
+# #         impurity the right child (samples[pos:end]).
+# #         Parameters
+# #         ----------
+# #         impurity_left : double pointer
+# #             The memory address to save the impurity of the left node
+# #         impurity_right : double pointer
+# #             The memory address to save the impurity of the right node
+# #         """
+# #
+# #         # cdef SIZE_t* n_classes = self.n_classes
+# #         # cdef double* sum_left = self.sum_left
+# #         # cdef double* sum_right = self.sum_right
+# #         # cdef double entropy_left = 0.0
+# #         # cdef double entropy_right = 0.0
+# #         # cdef double count_k
+# #         # cdef SIZE_t k
+# #         # cdef SIZE_t c
+# #         #
+# #         # for k in range(self.n_outputs):
+# #         #     for c in range(n_classes[k]):
+# #         #         count_k = sum_left[c]
+# #         #         if count_k > 0.0:
+# #         #             count_k /= self.weighted_n_left
+# #         #             entropy_left -= count_k * log(count_k)
+# #         #
+# #         #         count_k = sum_right[c]
+# #         #         if count_k > 0.0:
+# #         #             count_k /= self.weighted_n_right
+# #         #             entropy_right -= count_k * log(count_k)
+# #         #
+# #         #     sum_left += self.sum_stride
+# #         #     sum_right += self.sum_stride
+# #         #
+# #         # impurity_left[0] = entropy_left / self.n_outputs
+# #         # impurity_right[0] = entropy_right / self.n_outputs
+#
+#
+# calloc
+#
+# free
+#
+# memcpy
+#
+# memset
+#
+# fabs
+#
+# malloc
+#
+# realloc
+# # from libc.math cimport log as ln
+#
+# import numpy as np
+# from sklearn.tree import Crit
+#
+# cimport
+# numpy as np
+# np.import_array()
+# # from sklearn.tree._criterion cimport Criterion, ClassificationCriterion
+#
+# cdef
+# realloc_ptr
+# safe_realloc(realloc_ptr * p, size_t
+# nelems) nogil except *:
+# # sizeof(realloc_ptr[0]) would be more like idiomatic C, but causes Cython
+# # 0.20.1 to crash.
+# cdef
+# size_t
+# nbytes = nelems * sizeof(p[0][0])
+# if nbytes / sizeof(p[0][0]) != nelems:
+#     # Overflow in the multiplication
+#     with gil:
+#         raise MemoryError("could not allocate (%d * %d) bytes"
+#                           % (nelems, sizeof(p[0][0])))
+# cdef
+# realloc_ptr
+# tmp = < realloc_ptr > realloc(p[0], nbytes)
+# if tmp == NULL:
+#     with gil:
+#         raise MemoryError("could not allocate %d bytes" % nbytes)
+# p[0] = tmp
+# return tmp  # for
+#
+# cdef
+# inline
+# np.ndarray
+# sizet_ptr_to_ndarray(SIZE_t * data, SIZE_t
+# size):
+# """Return copied data as 1D numpy array of intp's."""
+# cdef
+# np.npy_intp
+# shape[1]
+# shape[0] = < np.npy_intp > size
+# return np.PyArray_SimpleNewFromData(1, shape, np.NPY_INTP, data).copy()
+#
+# cdef
+#
+#
+# class CustomCriterion(Criterion):
 #     """Interface for impurity criteria.
 #     This object stores methods on how to calculate how good a split is using
 #     different metrics.
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py
index a1fcab81..0e689085 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py
@@ -222,11 +222,11 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred,
                     testFileName = outputFileName + imageName + "-" + str(
                         i) + ".png"
                     if not os.path.isfile(testFileName):
-                        imagesAnalysis[imageName].savefig(testFileName)
+                        imagesAnalysis[imageName].savefig(testFileName, transparent=True)
                         break
 
             imagesAnalysis[imageName].savefig(
-                outputFileName + imageName + '.png')
+                outputFileName + imageName + '.png', transparent=True)
 
 
 if __name__ == '__main__':
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py
index ba1a9088..086080ee 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py
@@ -135,11 +135,11 @@ def showScoreTime(directory, filename, store, resScore, resTime, rangeX,
             for i in range(1, 20):
                 testFileName = filename + "-" + str(i) + ".png"
                 if not os.path.isfile(directory + testFileName):
-                    plt.savefig(directory + testFileName)
+                    plt.savefig(directory + testFileName, transparent=True)
                     break
 
         else:
-            plt.savefig(file)
+            plt.savefig(file, transparent=True)
     else:
         plt.show()
 
@@ -180,11 +180,11 @@ def showResults(directory, filename, db, feat, score):
         for i in range(1, 20):
             testFileName = filename + "-" + str(i) + ".png"
             if not os.path.isfile(directory + testFileName):
-                plt.savefig(directory + testFileName)
+                plt.savefig(directory + testFileName, transparent=True)
                 break
 
     else:
-        plt.savefig(file)
+        plt.savefig(file, transparent=True)
 
     plt.close()
 
@@ -262,11 +262,11 @@ def plot_confusion_matrix(directory, filename, df_confusion,
         for i in range(1, 20):
             testFileName = filename + "-" + str(i) + ".png"
             if not os.path.isfile(directory + testFileName):
-                plt.savefig(directory + testFileName)
+                plt.savefig(directory + testFileName, transparent=True)
                 break
 
     else:
-        plt.savefig(file)
+        plt.savefig(file, transparent=True)
 
     plt.close()
 
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py
index daa2fff7..1d6c4129 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py
@@ -190,7 +190,7 @@ class BaseMonoviewClassifier(object):
         ax.yaxis.set_major_formatter(formatter)
         plt.bar(x, featureImportancesSorted)
         plt.title("Importance depending on feature")
-        fig.savefig(directory + "feature_importances.png")
+        fig.savefig(directory + "feature_importances.png", transparent=True)
         plt.close()
         featuresImportancesDict = dict((featureIndex, featureImportance)
                                        for featureIndex, featureImportance in
diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen.py
index 6e70dc9d..9df79130 100644
--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen.py
@@ -55,6 +55,7 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
         self.metrics = np.array(
             [self.plotted_metric.score(change_label_to_zero(pred), y) for pred
              in self.staged_predict(pregen_X)])
+
         self.bounds = np.array([np.prod(
             np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1])))
                                 for i in
diff --git a/multiview_platform/MonoMultiViewClassifiers/Multiview/ExecMultiview.py b/multiview_platform/MonoMultiViewClassifiers/Multiview/ExecMultiview.py
index a8b97339..54cd4e85 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Multiview/ExecMultiview.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Multiview/ExecMultiview.py
@@ -69,11 +69,11 @@ def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule,
                     testFileName = outputFileName + imageName + "-" + str(
                         i) + ".png"
                     if not os.path.isfile(testFileName):
-                        imagesAnalysis[imageName].savefig(testFileName)
+                        imagesAnalysis[imageName].savefig(testFileName, transparent=True)
                         break
 
             imagesAnalysis[imageName].savefig(
-                outputFileName + imageName + '.png')
+                outputFileName + imageName + '.png', transparent=True)
 
 
 def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds,
diff --git a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py
index 34fb2ad5..22ba5ec0 100644
--- a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py
+++ b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py
@@ -249,7 +249,7 @@ def plotMetricScores(trainScores, testScores, names, nbResults, metricName,
         plt.tight_layout()
     except:
         pass
-    f.savefig(fileName + '.png')
+    f.savefig(fileName + '.png', transparent=True)
     plt.close()
     import pandas as pd
     if train_STDs is None:
@@ -377,7 +377,7 @@ def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies,
     cbar = fig.colorbar(cax, ticks=[-100 * statsIter / 2, 0, statsIter])
     cbar.ax.set_yticklabels(['Unseen', 'Always Wrong', 'Always Right'])
     fig.tight_layout()
-    fig.savefig(fileName + "error_analysis_2D.png", bbox_inches="tight")
+    fig.savefig(fileName + "error_analysis_2D.png", bbox_inches="tight", transparent=True)
     plt.close()
 
 
@@ -405,7 +405,7 @@ def publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, fileName):
     plt.bar(x, errorOnExamples)
     plt.ylim([0, nbClassifiers])
     plt.title("Number of classifiers that failed to classify each example")
-    fig.savefig(fileName + "error_analysis_bar.png")
+    fig.savefig(fileName + "error_analysis_bar.png", transparent=True)
     plt.close()
 
 
diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
index 1bfa4f92..c60796db 100644
--- a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
+++ b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
@@ -333,25 +333,26 @@ def filterViews(datasetFile, temp_dataset, views, usedIndices):
         for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]):
             copyhdf5Dataset(datasetFile, temp_dataset, "View" + str(viewIndex),
                             "View" + str(viewIndex), usedIndices)
-    for askedViewName in views:
-        for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]):
-            viewName = datasetFile.get("View" + str(viewIndex)).attrs["name"]
-            if type(viewName) == bytes:
-                viewName = viewName.decode("utf-8")
-            if viewName == askedViewName:
-                copyhdf5Dataset(datasetFile, temp_dataset,
-                                "View" + str(viewIndex),
-                                "View" + str(newViewIndex), usedIndices)
-                newViewName = \
-                temp_dataset.get("View" + str(newViewIndex)).attrs["name"]
-                if type(newViewName) == bytes:
-                    temp_dataset.get("View" + str(newViewIndex)).attrs[
-                        "name"] = newViewName.decode("utf-8")
-
-                newViewIndex += 1
-            else:
-                pass
-    temp_dataset.get("Metadata").attrs["nbView"] = len(views)
+    else:
+        for askedViewName in views:
+            for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]):
+                viewName = datasetFile.get("View" + str(viewIndex)).attrs["name"]
+                if type(viewName) == bytes:
+                    viewName = viewName.decode("utf-8")
+                if viewName == askedViewName:
+                    copyhdf5Dataset(datasetFile, temp_dataset,
+                                    "View" + str(viewIndex),
+                                    "View" + str(newViewIndex), usedIndices)
+                    newViewName = \
+                    temp_dataset.get("View" + str(newViewIndex)).attrs["name"]
+                    if type(newViewName) == bytes:
+                        temp_dataset.get("View" + str(newViewIndex)).attrs[
+                            "name"] = newViewName.decode("utf-8")
+
+                    newViewIndex += 1
+                else:
+                    pass
+        temp_dataset.get("Metadata").attrs["nbView"] = len(views)
 
 
 def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName,
@@ -447,6 +448,14 @@ def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name,
                                view_limits[:, 0], noised_data)
         noised_data = np.where(noised_data > view_limits[:, 1],
                                view_limits[:, 1], noised_data)
+        # import matplotlib.pyplot as plt
+        # plt.imshow(noised_data[1,:].reshape((28,28)))
+        # plt.savefig("plif.png")
+        # lower_contrast = view_dset.value[1,:].reshape((28,28))/10
+        # print(np.max(lower_contrast))
+        # plt.imshow(lower_contrast.astype(int))
+        # plt.savefig("plif2.png")
+        # quit()
         noisy_dataset[view_name][...] = noised_data
         # final_shape = noised_data.shape
     return noisy_dataset, dataset_name + "_noised"
diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/HyperParameterSearch.py b/multiview_platform/MonoMultiViewClassifiers/utils/HyperParameterSearch.py
index 84e03d89..08b23063 100644
--- a/multiview_platform/MonoMultiViewClassifiers/utils/HyperParameterSearch.py
+++ b/multiview_platform/MonoMultiViewClassifiers/utils/HyperParameterSearch.py
@@ -146,7 +146,7 @@ def genHeatMaps(params, scoresArray, outputFileName):
         plt.yticks(np.arange(len(paramArray2Set)), paramArray2Set, rotation=45)
         plt.title('Validation metric')
         plt.savefig(
-            outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png")
+            outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png", transparent=True)
         plt.close()
 
 # nohup python ~/dev/git/spearmint/spearmint/main.py . &
diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
index ad2b75e7..92b96cbf 100644
--- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
+++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py
@@ -848,7 +848,6 @@ def genSplits(labels, splitRatio, statsIterRandomStates):
                                                                   random_state=randomState,
                                                                   test_size=splitRatio)
         folds = foldsObj.split(indices, labels)
-        print(indices)
         for fold in folds:
             train_fold, test_fold = fold
         trainIndices = indices[train_fold]
@@ -907,6 +906,7 @@ def initViews(DATASET, argViews):
         Names of all the available views in the dataset.
     """
     NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
+    print(NB_VIEW)
     if argViews != [""]:
         allowedViews = argViews
         allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"])
-- 
GitLab