generator v2.0

01634364 · Baptiste Bauvin · 349c8ed1 · 01634364 · 01634364 · 01634364
Commit 01634364 authored Mar 19, 2020 by Baptiste Bauvin
--- a/.gitignore
+++ b/.gitignore
+__pycache__
\ No newline at end of file
--- a/classify_generated.py
+++ b/classify_generated.py
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import StratifiedKFold
+import numpy as np
+from sklearn.metrics import confusion_matrix
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+from plotly.colors import DEFAULT_PLOTLY_COLORS
+import plotly
+
+
+def gen_folds(random_state, generator, n_folds=5):
+    folds_gene = StratifiedKFold(n_folds, random_state=random_state,
+                                 shuffle=True)
+    folds = folds_gene.split(np.arange(generator.y.shape[0]), generator.y)
+    folds = [[list(train), list(test)] for train, test in folds]
+    return folds
+
+
+def test_dataset( folds, n_views, n_classes, generator,):
+    dt = DecisionTreeClassifier(max_depth=3)
+    n_folds = len(folds)
+    confusion_mat = np.zeros((n_folds, n_views, n_classes, n_classes))
+    n_sample_per_class = np.zeros((n_views, n_classes, n_folds))
+    for view_index in range(n_views):
+        for fold_index, [train, test] in enumerate(folds):
+            dt.fit(generator.view_data[view_index][train, :], generator.y[train])
+            pred = dt.predict(generator.view_data[view_index][test, :])
+            confusion_mat[fold_index, view_index, :, :] = confusion_matrix(generator.y[test], pred)
+            for class_index in range(n_classes):
+                n_sample_per_class[view_index, class_index, fold_index] = np.where(generator.y[test]==class_index)[0].shape[0]
+    confusion_mat = np.mean(confusion_mat, axis=0)
+    n_sample_per_class = np.mean(n_sample_per_class, axis=2)
+    confusion_output = np.zeros((n_classes, n_views))
+    for class_index in range(n_classes):
+        for view_index in range(n_views):
+            confusion_output[class_index, view_index] = 1-confusion_mat[view_index, class_index, class_index]/n_sample_per_class[view_index, class_index]
+    return confusion_output
+
+
+def make_fig(conf, confusion_output, n_views, n_classes, generator):
+    fig = make_subplots(rows=2, cols=2, subplot_titles=[
+        "View {}, Confusion : <br>In:{}<br>Out:{}".format(view_index,
+                                                   np.round(conf[:, view_index], 3),
+                                                   np.round(confusion_output[:, view_index], 3)) for
+        view_index
+        in range(n_views)],
+                        specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}, ],
+                               [{'type': 'scatter3d'},
+                                {'type': 'scatter3d'}, ]])
+    row = 1
+    col = 1
+    for view_index in range(n_views):
+        for lab_index in range(n_classes):
+            concerned_examples = np.where(generator.y == lab_index)[0]
+            fig.add_trace(
+                go.Scatter3d(
+                    x=generator.view_data[view_index][concerned_examples, 0],
+                    y=generator.view_data[view_index][concerned_examples, 1],
+                    z=generator.view_data[view_index][concerned_examples, 2],
+                    mode='markers', marker=dict(
+                        size=1,  # set color to an array/list of desired values
+                        color=DEFAULT_PLOTLY_COLORS[lab_index],
+                        opacity=0.8
+                    ), name="Class {}".format(lab_index)), row=row, col=col)
+            # fig.update_layout(
+            #             scene=dict(
+            #             xaxis=dict(nticks=4, range=[low_range, high_range], ),
+            #             yaxis=dict(nticks=4, range=[low_range, high_range], ),
+            #             zaxis=dict(nticks=4, range=[low_range, high_range], ), ),)
+        col += 1
+        if col == 3:
+            col = 1
+            row += 1
+            # fig.update_xaxes(range=[-class_sep-0.1*class_sep, +class_sep+margin_ratio*class_sep], row=row, col=col)
+            # fig.update_yaxes(
+            #     range=[-class_sep - 0.1 * class_sep, +class_sep + margin_ratio * class_sep],
+            #     row=row, col=col)
+            # fig.update_zaxes(
+            #     range=[-class_sep - 0.1 * class_sep, +class_sep + margin_ratio * class_sep],
+            #     row=row, col=col)
+    plotly.offline.plot(fig, filename="center_blob.html")
+
--- a/demo/center_blob.html
+++ b/demo/center_blob.html
--- a/demo/config_generator.yml
+++ b/demo/config_generator.yml
+n_samples: 100  # Number of samples in tha dataset
+n_views: 4  # Number of views in the dataset
+n_classes: 3  # Number of classes in the dataset
+n_clusters_per_class: 1  # Number of clusters for each class
+class_sep: 1.55 # Separation between the different classes
+n_informative: 100 # Divides the number of informative features in the latent space
+flip_y: 0.00  # Ratio of label noise
+random_state: 42
+class_weights: None # The proportions of examples in each class
+confusion_matrix: [[0.9, 0.5, 0.3, 0.1],
+                   [0.5, 0.3, 0.3, 0.1],
+                   [0.1, 0.1, 0.3, 0.1]]
+precision: 0.05
+example_subsampling_method: "block"
+example_subsampling_config: {}
+feature_subampling_method: "block"
+feature_subsampling_config: {}
+redundancy: None
+methods: "uniform"
+view_dims: None
+estimator_name: "LOneOneScore"
+estimator_config: {}
+build_method: "iterative"
+priority: "random",
--- a/demo/generated_dset.hdf5
+++ b/demo/generated_dset.hdf5
--- a/demo/random_in_classes.html
+++ b/demo/random_in_classes.html
--- a/generator/multiple_sub_problems.py
+++ b/generator/multiple_sub_problems.py
--- a/generator/tests/test_multiple_sub_problems.py
+++ b/generator/tests/test_multiple_sub_problems.py
+import unittest
+import numpy as np
+
+from ..multiple_sub_problems import MultiViewSubProblemsGenerator
+
+
+class Test_MultiVieSubProblemsGenerator():
+
+    def __init__(self):
+        self.conf = np.array([
+            np.array([0.0, 0.1, 0.1, 0.9]),
+            np.array([0.0, 0.2, 0.1, 0.0]),
+            np.array([0.0, 0.3, 0.1, 0.0]),
+            np.array([0.0, 0.4, 0.2, 0.0]),
+            np.array([0.0, 0.5, 0.2, 0.0]),
+            np.array([0.0, 0.6, 0.2, 0.0]),
+            np.array([0.0, 0.7, 0.2, 0.0]),
+            np.array([0.0, 0.8, 0.1, 0.]),
+        ])
+        self.n_views = 4
+        self.n_folds = 10
+        self.n_classes = 8
+        self.n_samples = 2000
+        self.class_sep = 1.5
+        self.class_weights = [0.125, 0.1, 0.15, 0.125, 0.01, 0.2, 0.125, 0.125, ]
+
--- a/generator/tests/unit_test_update.py
+++ b/generator/tests/unit_test_update.py
+import unittest
+import numpy as np
+
+from ..update_baptiste import MultiviewDatasetGenetator
+
+class TestSubSmaple(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.random_state = np.random.RandomState(42)
+        cls.indices = np.arange(100)
+        cls.quantity = 10
+        cls.method = "block"
+        cls.beggining = 0
+        cls.generator = MultiviewDatasetGenetator(random_state=cls.random_state)
+
+    def test_block_simple(self):
+        chosen_indices = self.generator.sub_sample(self.indices, self.quantity, self.method, self.beggining)
+        np.testing.assert_array_equal(np.array([0,1,2,3,4,5,6,7,8,9]), chosen_indices)
+
+    def test_block_too_big(self):
+        chosen_indices = self.generator.sub_sample(self.indices, 121,
+                                                   self.method, self.beggining)
+        np.testing.assert_array_equal(np.arange(100),
+                                      chosen_indices)
+
+    def test_block_no_beg(self):
+        chosen_indices = self.generator.sub_sample(self.indices, 10,
+                                                   self.method, None)
+        np.testing.assert_array_equal(np.array([82, 83, 84, 85, 86, 87, 88, 89, 90, 91,]),
+                                      chosen_indices)
+
+    def test_block_no_beg_too_long(self):
+        chosen_indices = self.generator.sub_sample(self.indices, 120,
+                                                   self.method, None)
+        np.testing.assert_array_equal(np.arange(100),
+                                      chosen_indices)
+    def test_choice_simple(self):
+        chosen_indices = self.generator.sub_sample(self.indices, 10,
+                                                   "choice")
+        np.testing.assert_array_equal(np.array([77, 10,  4, 83, 62, 67, 30, 45, 95, 11]),
+                                      chosen_indices)
+
+    def test_choice_too_big(self):
+        chosen_indices = self.generator.sub_sample(self.indices, 105,
+                                                   "choice")
+        self.assertEqual(100, chosen_indices.shape[0])
+        self.assertEqual(100, np.unique(chosen_indices).shape[0])
+
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/generator/update_baptiste.py
+++ b/generator/update_baptiste.py