Skip to content
Snippets Groups Projects
Commit 01634364 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

generator v2.0

parent 349c8ed1
No related branches found
No related tags found
No related merge requests found
__pycache__
\ No newline at end of file
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
import numpy as np
from sklearn.metrics import confusion_matrix
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly.colors import DEFAULT_PLOTLY_COLORS
import plotly
def gen_folds(random_state, generator, n_folds=5):
folds_gene = StratifiedKFold(n_folds, random_state=random_state,
shuffle=True)
folds = folds_gene.split(np.arange(generator.y.shape[0]), generator.y)
folds = [[list(train), list(test)] for train, test in folds]
return folds
def test_dataset( folds, n_views, n_classes, generator,):
dt = DecisionTreeClassifier(max_depth=3)
n_folds = len(folds)
confusion_mat = np.zeros((n_folds, n_views, n_classes, n_classes))
n_sample_per_class = np.zeros((n_views, n_classes, n_folds))
for view_index in range(n_views):
for fold_index, [train, test] in enumerate(folds):
dt.fit(generator.view_data[view_index][train, :], generator.y[train])
pred = dt.predict(generator.view_data[view_index][test, :])
confusion_mat[fold_index, view_index, :, :] = confusion_matrix(generator.y[test], pred)
for class_index in range(n_classes):
n_sample_per_class[view_index, class_index, fold_index] = np.where(generator.y[test]==class_index)[0].shape[0]
confusion_mat = np.mean(confusion_mat, axis=0)
n_sample_per_class = np.mean(n_sample_per_class, axis=2)
confusion_output = np.zeros((n_classes, n_views))
for class_index in range(n_classes):
for view_index in range(n_views):
confusion_output[class_index, view_index] = 1-confusion_mat[view_index, class_index, class_index]/n_sample_per_class[view_index, class_index]
return confusion_output
def make_fig(conf, confusion_output, n_views, n_classes, generator):
fig = make_subplots(rows=2, cols=2, subplot_titles=[
"View {}, Confusion : <br>In:{}<br>Out:{}".format(view_index,
np.round(conf[:, view_index], 3),
np.round(confusion_output[:, view_index], 3)) for
view_index
in range(n_views)],
specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}, ],
[{'type': 'scatter3d'},
{'type': 'scatter3d'}, ]])
row = 1
col = 1
for view_index in range(n_views):
for lab_index in range(n_classes):
concerned_examples = np.where(generator.y == lab_index)[0]
fig.add_trace(
go.Scatter3d(
x=generator.view_data[view_index][concerned_examples, 0],
y=generator.view_data[view_index][concerned_examples, 1],
z=generator.view_data[view_index][concerned_examples, 2],
mode='markers', marker=dict(
size=1, # set color to an array/list of desired values
color=DEFAULT_PLOTLY_COLORS[lab_index],
opacity=0.8
), name="Class {}".format(lab_index)), row=row, col=col)
# fig.update_layout(
# scene=dict(
# xaxis=dict(nticks=4, range=[low_range, high_range], ),
# yaxis=dict(nticks=4, range=[low_range, high_range], ),
# zaxis=dict(nticks=4, range=[low_range, high_range], ), ),)
col += 1
if col == 3:
col = 1
row += 1
# fig.update_xaxes(range=[-class_sep-0.1*class_sep, +class_sep+margin_ratio*class_sep], row=row, col=col)
# fig.update_yaxes(
# range=[-class_sep - 0.1 * class_sep, +class_sep + margin_ratio * class_sep],
# row=row, col=col)
# fig.update_zaxes(
# range=[-class_sep - 0.1 * class_sep, +class_sep + margin_ratio * class_sep],
# row=row, col=col)
plotly.offline.plot(fig, filename="center_blob.html")
This diff is collapsed.
n_samples: 100 # Number of samples in tha dataset
n_views: 4 # Number of views in the dataset
n_classes: 3 # Number of classes in the dataset
n_clusters_per_class: 1 # Number of clusters for each class
class_sep: 1.55 # Separation between the different classes
n_informative: 100 # Divides the number of informative features in the latent space
flip_y: 0.00 # Ratio of label noise
random_state: 42
class_weights: None # The proportions of examples in each class
confusion_matrix: [[0.9, 0.5, 0.3, 0.1],
[0.5, 0.3, 0.3, 0.1],
[0.1, 0.1, 0.3, 0.1]]
precision: 0.05
example_subsampling_method: "block"
example_subsampling_config: {}
feature_subampling_method: "block"
feature_subsampling_config: {}
redundancy: None
methods: "uniform"
view_dims: None
estimator_name: "LOneOneScore"
estimator_config: {}
build_method: "iterative"
priority: "random",
File added
This diff is collapsed.
This diff is collapsed.
import unittest
import numpy as np
from ..multiple_sub_problems import MultiViewSubProblemsGenerator
class Test_MultiVieSubProblemsGenerator():
def __init__(self):
self.conf = np.array([
np.array([0.0, 0.1, 0.1, 0.9]),
np.array([0.0, 0.2, 0.1, 0.0]),
np.array([0.0, 0.3, 0.1, 0.0]),
np.array([0.0, 0.4, 0.2, 0.0]),
np.array([0.0, 0.5, 0.2, 0.0]),
np.array([0.0, 0.6, 0.2, 0.0]),
np.array([0.0, 0.7, 0.2, 0.0]),
np.array([0.0, 0.8, 0.1, 0.]),
])
self.n_views = 4
self.n_folds = 10
self.n_classes = 8
self.n_samples = 2000
self.class_sep = 1.5
self.class_weights = [0.125, 0.1, 0.15, 0.125, 0.01, 0.2, 0.125, 0.125, ]
import unittest
import numpy as np
from ..update_baptiste import MultiviewDatasetGenetator
class TestSubSmaple(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.random_state = np.random.RandomState(42)
cls.indices = np.arange(100)
cls.quantity = 10
cls.method = "block"
cls.beggining = 0
cls.generator = MultiviewDatasetGenetator(random_state=cls.random_state)
def test_block_simple(self):
chosen_indices = self.generator.sub_sample(self.indices, self.quantity, self.method, self.beggining)
np.testing.assert_array_equal(np.array([0,1,2,3,4,5,6,7,8,9]), chosen_indices)
def test_block_too_big(self):
chosen_indices = self.generator.sub_sample(self.indices, 121,
self.method, self.beggining)
np.testing.assert_array_equal(np.arange(100),
chosen_indices)
def test_block_no_beg(self):
chosen_indices = self.generator.sub_sample(self.indices, 10,
self.method, None)
np.testing.assert_array_equal(np.array([82, 83, 84, 85, 86, 87, 88, 89, 90, 91,]),
chosen_indices)
def test_block_no_beg_too_long(self):
chosen_indices = self.generator.sub_sample(self.indices, 120,
self.method, None)
np.testing.assert_array_equal(np.arange(100),
chosen_indices)
def test_choice_simple(self):
chosen_indices = self.generator.sub_sample(self.indices, 10,
"choice")
np.testing.assert_array_equal(np.array([77, 10, 4, 83, 62, 67, 30, 45, 95, 11]),
chosen_indices)
def test_choice_too_big(self):
chosen_indices = self.generator.sub_sample(self.indices, 105,
"choice")
self.assertEqual(100, chosen_indices.shape[0])
self.assertEqual(100, np.unique(chosen_indices).shape[0])
if __name__ == '__main__':
unittest.main()
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment