From 1c3a3ccef490e8d1e554208f3b32b5c27235741e Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Tue, 28 Jan 2020 10:11:56 +0100
Subject: [PATCH] Removed real randomness replaced by random_state

---
 generator/update_baptiste.py | 39 +++++++++++++-----------------------
 1 file changed, 14 insertions(+), 25 deletions(-)

diff --git a/generator/update_baptiste.py b/generator/update_baptiste.py
index a3810c0..d26fa60 100644
--- a/generator/update_baptiste.py
+++ b/generator/update_baptiste.py
@@ -2,10 +2,8 @@ import os
 import yaml
 import numpy as np
 from sklearn.datasets import make_classification
-from random import gauss
 from math import ceil, floor
 import pandas as pd
-import shutil
 import h5py
 
 class MultiviewDatasetGenetator():
@@ -40,7 +38,12 @@ class MultiviewDatasetGenetator():
             self.standard_deviation = standard_deviation
             self.weights = weights
             self.flip_y = flip_y
-            self.random_state = random_state
+            if isinstance(random_state, np.random.RandomState):
+                self.random_state = random_state
+            elif isinstance(random_state, int):
+                self.random_state = np.random.RandomState(random_state)
+            else:
+                raise ValueError("random_sate must be np.random.RandomState or int")
 
     def generate(self):
         if self.n_views < 2:
@@ -65,7 +68,7 @@ class MultiviewDatasetGenetator():
                 "Il faut que (d+D)/2 - 3*standard_deviation >= 1 pour avoir des valeurs positives non nulles lors de l'emploi de la loi normale")
 
         # n_views dimension of view v values randomly from N((d+D)/2, standard_deviation^2)
-        d_v = np.random.normal(loc=(self.d + self.D) / 2,
+        d_v = self.random_state.normal(loc=(self.d + self.D) / 2,
                                scale=self.standard_deviation,
                                size=self.n_views)
         d_v = list(d_v)
@@ -75,7 +78,7 @@ class MultiviewDatasetGenetator():
                 remove_list.append(dim_view)
                 add = -1
                 while add < self.d or add > self.D:
-                    add = gauss((self.d + self.D) / 2, self.standard_deviation)
+                    add = self.random_state.normal((self.d + self.D) / 2, self.standard_deviation)
                 add_list.append(add)
         d_v = [view for view in d_v if view not in remove_list] + add_list
         d_v = [int(view) for view in d_v]  # dimension of views = integer
@@ -99,14 +102,14 @@ class MultiviewDatasetGenetator():
         self.results = []
         for view in range(n_views):
             # choice d_v[view] numeros of Z columns uniformly from I_q
-            I_v = np.random.choice(I_q, size=d_v[view],
+            I_v = self.random_state.choice(I_q, size=d_v[view],
                                    replace=False)  # tirage dans I_q sans remise de taille d_v[view]
             meta_I_v += list(I_v)
             # projection of Z along the columns in I_v
             X_v = self.projection( I_v)
             self.results.append((X_v, I_v))
             # remove R*d_v[view] columns numeros of I_v form I_q
-            elements_to_remove = np.random.choice(I_v,
+            elements_to_remove = self.random_state.choice(I_v,
                                                   size=floor(self.R * d_v[view]),
                                                   replace=False)  # tirage dans I_v sans remise de taille floor(R*d_v[view])
             I_q = np.setdiff1d(I_q,
@@ -242,9 +245,9 @@ if __name__=="__main__":
     class_sep_factor = 10000  # Separation between the different classes
     n_informative_divid = 2  # Divides the number of informative features in the latent space
     standard_deviation = 2
-    d = 4
-    D = 10
-    flip_y = 0.00
+    d = 4  # View size lower limit
+    D = 10  # View size upper limit
+    flip_y = 0.00  # Ratio of label noise
     random_state = 42
     weights = None # The proportions of examples in each class
 
@@ -269,18 +272,4 @@ if __name__=="__main__":
                                                     random_state=random_state)
 
     multiview_generator.generate()
-    multiview_generator.to_hdf5(saving_path=path, name=name)
-
-    # for filename in os.listdir(path):
-    #     file_path = os.path.join(path, filename)
-    #     try:
-    #         if os.path.isfile(file_path) or os.path.islink(file_path):
-    #             os.unlink(file_path)
-    #         elif os.path.isdir(file_path):
-    #             shutil.rmtree(file_path)
-    #     except Exception as e:
-    #         print('Failed to delete %s. Reason: %s' % (file_path, e))
-    # changing_labels_indices = np.random.RandomState(random_state).choice(np.arange(y.shape[0]), n_outliers)
-    # print(changing_labels_indices)
-    # y[changing_labels_indices] = np.invert(y[changing_labels_indices].astype(bool)).astype(int)
-    # results_to_csv(path, Z, y, results)
\ No newline at end of file
+    multiview_generator.to_hdf5(saving_path=path, name=name)
\ No newline at end of file
-- 
GitLab