Commit 01634364 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

generator v2.0

parent 349c8ed1
__pycache__
\ No newline at end of file
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
import numpy as np
from sklearn.metrics import confusion_matrix
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly.colors import DEFAULT_PLOTLY_COLORS
import plotly
def gen_folds(random_state, generator, n_folds=5):
folds_gene = StratifiedKFold(n_folds, random_state=random_state,
shuffle=True)
folds = folds_gene.split(np.arange(generator.y.shape[0]), generator.y)
folds = [[list(train), list(test)] for train, test in folds]
return folds
def test_dataset( folds, n_views, n_classes, generator,):
dt = DecisionTreeClassifier(max_depth=3)
n_folds = len(folds)
confusion_mat = np.zeros((n_folds, n_views, n_classes, n_classes))
n_sample_per_class = np.zeros((n_views, n_classes, n_folds))
for view_index in range(n_views):
for fold_index, [train, test] in enumerate(folds):
dt.fit(generator.view_data[view_index][train, :], generator.y[train])
pred = dt.predict(generator.view_data[view_index][test, :])
confusion_mat[fold_index, view_index, :, :] = confusion_matrix(generator.y[test], pred)
for class_index in range(n_classes):
n_sample_per_class[view_index, class_index, fold_index] = np.where(generator.y[test]==class_index)[0].shape[0]
confusion_mat = np.mean(confusion_mat, axis=0)
n_sample_per_class = np.mean(n_sample_per_class, axis=2)
confusion_output = np.zeros((n_classes, n_views))
for class_index in range(n_classes):
for view_index in range(n_views):
confusion_output[class_index, view_index] = 1-confusion_mat[view_index, class_index, class_index]/n_sample_per_class[view_index, class_index]
return confusion_output
def make_fig(conf, confusion_output, n_views, n_classes, generator):
fig = make_subplots(rows=2, cols=2, subplot_titles=[
"View {}, Confusion : <br>In:{}<br>Out:{}".format(view_index,
np.round(conf[:, view_index], 3),
np.round(confusion_output[:, view_index], 3)) for
view_index
in range(n_views)],
specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}, ],
[{'type': 'scatter3d'},
{'type': 'scatter3d'}, ]])
row = 1
col = 1
for view_index in range(n_views):
for lab_index in range(n_classes):
concerned_examples = np.where(generator.y == lab_index)[0]
fig.add_trace(
go.Scatter3d(
x=generator.view_data[view_index][concerned_examples, 0],
y=generator.view_data[view_index][concerned_examples, 1],
z=generator.view_data[view_index][concerned_examples, 2],
mode='markers', marker=dict(
size=1, # set color to an array/list of desired values
color=DEFAULT_PLOTLY_COLORS[lab_index],
opacity=0.8
), name="Class {}".format(lab_index)), row=row, col=col)
# fig.update_layout(
# scene=dict(
# xaxis=dict(nticks=4, range=[low_range, high_range], ),
# yaxis=dict(nticks=4, range=[low_range, high_range], ),
# zaxis=dict(nticks=4, range=[low_range, high_range], ), ),)
col += 1
if col == 3:
col = 1
row += 1
# fig.update_xaxes(range=[-class_sep-0.1*class_sep, +class_sep+margin_ratio*class_sep], row=row, col=col)
# fig.update_yaxes(
# range=[-class_sep - 0.1 * class_sep, +class_sep + margin_ratio * class_sep],
# row=row, col=col)
# fig.update_zaxes(
# range=[-class_sep - 0.1 * class_sep, +class_sep + margin_ratio * class_sep],
# row=row, col=col)
plotly.offline.plot(fig, filename="center_blob.html")
This diff is collapsed.
n_samples: 100 # Number of samples in tha dataset
n_views: 4 # Number of views in the dataset
n_classes: 3 # Number of classes in the dataset
n_clusters_per_class: 1 # Number of clusters for each class
class_sep: 1.55 # Separation between the different classes
n_informative: 100 # Divides the number of informative features in the latent space
flip_y: 0.00 # Ratio of label noise
random_state: 42
class_weights: None # The proportions of examples in each class
confusion_matrix: [[0.9, 0.5, 0.3, 0.1],
[0.5, 0.3, 0.3, 0.1],
[0.1, 0.1, 0.3, 0.1]]
precision: 0.05
example_subsampling_method: "block"
example_subsampling_config: {}
feature_subampling_method: "block"
feature_subsampling_config: {}
redundancy: None
methods: "uniform"
view_dims: None
estimator_name: "LOneOneScore"
estimator_config: {}
build_method: "iterative"
priority: "random",
This diff is collapsed.
This diff is collapsed.
import unittest
import numpy as np
from ..multiple_sub_problems import MultiViewSubProblemsGenerator
class Test_MultiVieSubProblemsGenerator():
def __init__(self):
self.conf = np.array([
np.array([0.0, 0.1, 0.1, 0.9]),
np.array([0.0, 0.2, 0.1, 0.0]),
np.array([0.0, 0.3, 0.1, 0.0]),
np.array([0.0, 0.4, 0.2, 0.0]),
np.array([0.0, 0.5, 0.2, 0.0]),
np.array([0.0, 0.6, 0.2, 0.0]),
np.array([0.0, 0.7, 0.2, 0.0]),
np.array([0.0, 0.8, 0.1, 0.]),
])
self.n_views = 4
self.n_folds = 10
self.n_classes = 8
self.n_samples = 2000
self.class_sep = 1.5
self.class_weights = [0.125, 0.1, 0.15, 0.125, 0.01, 0.2, 0.125, 0.125, ]
import unittest
import numpy as np
from ..update_baptiste import MultiviewDatasetGenetator
class TestSubSmaple(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.random_state = np.random.RandomState(42)
cls.indices = np.arange(100)
cls.quantity = 10
cls.method = "block"
cls.beggining = 0
cls.generator = MultiviewDatasetGenetator(random_state=cls.random_state)
def test_block_simple(self):
chosen_indices = self.generator.sub_sample(self.indices, self.quantity, self.method, self.beggining)
np.testing.assert_array_equal(np.array([0,1,2,3,4,5,6,7,8,9]), chosen_indices)
def test_block_too_big(self):
chosen_indices = self.generator.sub_sample(self.indices, 121,
self.method, self.beggining)
np.testing.assert_array_equal(np.arange(100),
chosen_indices)
def test_block_no_beg(self):
chosen_indices = self.generator.sub_sample(self.indices, 10,
self.method, None)
np.testing.assert_array_equal(np.array([82, 83, 84, 85, 86, 87, 88, 89, 90, 91,]),
chosen_indices)
def test_block_no_beg_too_long(self):
chosen_indices = self.generator.sub_sample(self.indices, 120,
self.method, None)
np.testing.assert_array_equal(np.arange(100),
chosen_indices)
def test_choice_simple(self):
chosen_indices = self.generator.sub_sample(self.indices, 10,
"choice")
np.testing.assert_array_equal(np.array([77, 10, 4, 83, 62, 67, 30, 45, 95, 11]),
chosen_indices)
def test_choice_too_big(self):
chosen_indices = self.generator.sub_sample(self.indices, 105,
"choice")
self.assertEqual(100, chosen_indices.shape[0])
self.assertEqual(100, np.unique(chosen_indices).shape[0])
if __name__ == '__main__':
unittest.main()
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment