Commit 4d0493da authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Tests improving

parent 74791924
Pipeline #7404 passed with stages
in 1 minute and 24 seconds
......@@ -6,4 +6,6 @@ demo/tutorials/.ipy*
demo/tutorials/supplementary_material/demo.hdf5
demo/tutorials/supplementary_material/tuto.hdf5
demo/tutorials/supplementary_material/report.md
demo/tutorials/supplementary_material/tuto/
\ No newline at end of file
demo/tutorials/supplementary_material/tuto/
.idea*
_static
\ No newline at end of file
# Default ignored files
/workspace.xml
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_12" default="false" project-jdk-name="Python 3.6 (develop)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/multiview_generator.iml" filepath="$PROJECT_DIR$/.idea/multiview_generator.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (develop)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="R User Library" level="project" />
<orderEntry type="library" name="R Skeletons" level="application" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
......@@ -64,7 +64,7 @@ class MultiViewSubProblemsGenerator:
complementarity_level=3,
mutual_error=0.0, name="generated_dataset", config_file=None,
sub_problem_type="base", sub_problem_configurations=None,
**kwargs):
min_rndm_val=-1, max_rndm_val=1, **kwargs):
if config_file is not None:
args = get_config_from_file(config_file)
......@@ -76,6 +76,8 @@ class MultiViewSubProblemsGenerator:
self.n_classes = n_classes
self.n_views = n_views
self.name = name
self.min_rndm_val = min_rndm_val
self.max_rndm_val = max_rndm_val
self.n_features = format_array(n_features, n_views, type_needed=int)
self.redundancy = format_array(redundancy, n_classes,
type_needed=float).reshape(
......@@ -263,72 +265,72 @@ class MultiViewSubProblemsGenerator:
label_indice in label_indices]
self.dt_error[:, view_index] = np.array(loss)
def _find_rows_cols(self):
rows=1
cols=1
if self.n_views == 4:
rows = 2
cols = 2
if self.n_views>1:
for i in range(self.n_views):
if rows*cols < i+1:
if cols < 4*rows:
cols+=1
else:
rows+=1
return rows, cols
def _get_pca(self, n_components=2, output_path='.'):
pca = PCA(n_components=n_components)
import plotly.graph_objects as go
from plotly.subplots import make_subplots
rows, cols = self._find_rows_cols()
fig = make_subplots(rows=rows, cols=cols,
subplot_titles=["View{}".format(view_index)
for view_index
in range(self.n_views)],
specs=[[{'type': 'scatter'} for _ in range(cols) ]
for _ in range(rows)])
row = 1
col = 1
import plotly.express as px
for view_index, view_data in enumerate(self.dataset):
if self.n_features[view_index]>n_components:
pca.fit(view_data)
reducted_data = pca.transform(view_data)
elif self.n_features[view_index] ==1:
reducted_data = np.transpose(np.array([view_data, view_data]))[0, :, :]
else:
reducted_data = view_data
fig.add_trace(
go.Scatter(
x=reducted_data[:, 0],
y=reducted_data[:, 1],
text=self.sample_ids,
mode='markers', marker=dict(
size=3, # set color to an array/list of desired values
color=self.y,
colorscale=["red", "blue", "black", "green", "orange", "purple"],
opacity=0.8
), ),
row=row, col=col)
col += 1
if col > cols:
col = 1
row += 1
fig.update_shapes(dict(xref='x', yref='y'))
plotly.offline.plot(fig, filename=os.path.join(output_path, self.name+"_fig_pca.html"), auto_open=False)
# def _find_rows_cols(self):
# rows=1
# cols=1
# if self.n_views == 4:
# rows = 2
# cols = 2
# if self.n_views>1:
# for i in range(self.n_views):
# if rows*cols < i+1:
# if cols < 4*rows:
# cols+=1
# else:
# rows+=1
# return rows, cols
# def _get_pca(self, n_components=2, output_path='.'):
# pca = PCA(n_components=n_components)
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
# rows, cols = self._find_rows_cols()
# fig = make_subplots(rows=rows, cols=cols,
# subplot_titles=["View{}".format(view_index)
# for view_index
# in range(self.n_views)],
# specs=[[{'type': 'scatter'} for _ in range(cols) ]
# for _ in range(rows)])
# row = 1
# col = 1
# import plotly.express as px
# for view_index, view_data in enumerate(self.dataset):
# if self.n_features[view_index]>n_components:
# pca.fit(view_data)
# reducted_data = pca.transform(view_data)
# elif self.n_features[view_index] ==1:
# reducted_data = np.transpose(np.array([view_data, view_data]))[0, :, :]
# else:
# reducted_data = view_data
# fig.add_trace(
# go.Scatter(
# x=reducted_data[:, 0],
# y=reducted_data[:, 1],
# text=self.sample_ids,
# mode='markers', marker=dict(
# size=3, # set color to an array/list of desired values
# color=self.y,
# colorscale=["red", "blue", "black", "green", "orange", "purple"],
# opacity=0.8
# ), ),
# row=row, col=col)
# col += 1
# if col > cols:
# col = 1
# row += 1
# fig.update_shapes(dict(xref='x', yref='y'))
# plotly.offline.plot(fig, filename=os.path.join(output_path, self.name+"_fig_pca.html"), auto_open=False)
def gen_view_report(self, view_index):
view_string = "\n\n### View "+str(view_index+1)
view_string+=self._sub_problem_generators[view_index].gen_report()
return view_string
def _get_generator_report(self, view_index, doc_type=".md"):
if self.sub_problem_types[view_index] in ["make_classification", "base"]:
return "[`make_classification`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html)"
elif self.sub_problem_types[view_index]in ["gaussian", "make_gaussian_quantiles"]:
return "[`make_gaussian_quantiles`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html#sklearn.datasets.make_gaussian_quantiles)"
# def _get_generator_report(self, view_index, doc_type=".md"):
# if self.sub_problem_types[view_index] in ["make_classification", "base"]:
# return "[`make_classification`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html)"
# elif self.sub_problem_types[view_index]in ["gaussian", "make_gaussian_quantiles"]:
# return "[`make_gaussian_quantiles`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html#sklearn.datasets.make_gaussian_quantiles)"
def _init_base_arguments(self):
self.n_samples_per_class = (
......
......@@ -19,8 +19,8 @@ class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator):
complementarity_level=3,
mutual_error=0.01, name="generated_dataset", config_file=None,
sub_problem_type="base", sub_problem_configurations=None,
sub_problem_generators="StumpsGenerator", random_vertices=False
, **kwargs):
sub_problem_generators="StumpsGenerator", random_vertices=False,
min_rndm_val=-1, max_rndm_val=1, **kwargs):
"""
:param random_state: int or np.random.RandomState object to fix the
......@@ -66,6 +66,8 @@ class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator):
config_file=config_file,
sub_problem_type=sub_problem_type,
sub_problem_configurations=sub_problem_configurations,
min_rndm_val=min_rndm_val,
max_rndm_val=max_rndm_val,
**kwargs)
self.random_vertices = format_array(random_vertices, n_views, bool)
self.sub_problem_generators = format_array(sub_problem_generators, n_views, str)
......@@ -273,8 +275,13 @@ class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator):
self.n_samples = np.sum(self.n_samples_per_class)
self.n_complem =np.zeros(self.n_classes)
self.n_max_features = np.max(self.n_features)
self.generated_data = self.rs.uniform(low=-self.latent_size_mult, high=self.latent_size_mult, size=(self.n_views, self.n_classes, self.n_max_samples, self.n_max_features))
self.descriptions = np.zeros((self.n_views, self.n_classes, self.n_max_samples,))
self.generated_data = self.rs.uniform(low=-self.min_rndm_val,
high=self.max_rndm_val,
size=(self.n_views, self.n_classes,
self.n_max_samples,
self.n_max_features))
self.descriptions = np.zeros((self.n_views, self.n_classes,
self.n_max_samples,))
self.n_total_samples = np.sum(self.n_samples_per_class)
sample_indices = np.arange(int(np.sum(self.n_samples_per_class)))
self.rs.shuffle(sample_indices)
......
......@@ -120,7 +120,7 @@ class StumpsGenerator(BaseSubProblem):
return DecisionTreeClassifier(max_depth=math.ceil(math.log2(self.n_classes)))
class TreesGenerator(BaseSubProblem):
class TreesGenerator(BaseSubProblem): # pragma: no cover
""" Work in progress : Similar generator as StumpsGenerator, but that
generates several blobs per class """
......
import unittest
import os
from ..gaussian_classes import MultiViewGaussianSubProblemsGenerator
tmp_path = os.path.join(
os.path.dirname(
os.path.abspath(__file__)),
"tmp_tests", "")
def rm_tmp(path=tmp_path):
try:
for file_name in os.listdir(path):
if os.path.isdir(os.path.join(path, file_name)):
rm_tmp(os.path.join(path, file_name))
else:
os.remove(os.path.join(path, file_name))
os.rmdir(path)
except BaseException:
pass
class Test_MultiViewGaussianSubProblemsGenerator(unittest.TestCase):
@classmethod
......@@ -13,4 +30,25 @@ class Test_MultiViewGaussianSubProblemsGenerator(unittest.TestCase):
pass
def test_simple(self):
gene = MultiViewGaussianSubProblemsGenerator(sub_problem_generators=["StumpsGenerator",
"RingsGenerator",
"StumpsGenerator",
"RingsGenerator"])
data, labels = gene.generate_multi_view_dataset()
def test_report(self):
gene = MultiViewGaussianSubProblemsGenerator(sub_problem_generators=["StumpsGenerator",
"RingsGenerator",
"StumpsGenerator",
"RingsGenerator"])
data, labels = gene.generate_multi_view_dataset()
rep = gene.gen_report(save=False)
def test_save(self):
gene = MultiViewGaussianSubProblemsGenerator()
data, labels = gene.generate_multi_view_dataset()
rm_tmp()
os.mkdir(tmp_path)
gene.to_hdf5_mc(tmp_path)
rep = gene.gen_report(output_path=tmp_path, save=True)
rm_tmp()
\ No newline at end of file
# import unittest
# import numpy as np
#
# from ..multiple_sub_problems import MultiViewSubProblemsGenerator
#
#
# class Test_MultiViewSubProblemsGenerator(unittest.TestCase):
#
# @classmethod
# def setUpClass(cls):
# pass
#
# @classmethod
# def tearDownClass(cls):
# pass
#
# def test_simple(self):
# gene = MultiViewSubProblemsGenerator()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment