diff --git a/.gitignore b/.gitignore index 7a6d303a1256cba62a5d540e2acb74a4fab4b124..db4f192195f18ddead21ddf6e973c87286d03ca2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ demo/tutorials/.ipy* demo/tutorials/supplementary_material/demo.hdf5 demo/tutorials/supplementary_material/tuto.hdf5 demo/tutorials/supplementary_material/report.md -demo/tutorials/supplementary_material/tuto/ \ No newline at end of file +demo/tutorials/supplementary_material/tuto/ +.idea* +_static \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 0e40fe8f57160b43f9ea8e200b1a5d9f91f4aed9..0000000000000000000000000000000000000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ - -# Default ignored files -/workspace.xml \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 3c2c013aca5383a4194378b8fe233d5f04daa7b8..0000000000000000000000000000000000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="JavaScriptSettings"> - <option name="languageLevel" value="ES6" /> - </component> - <component name="ProjectRootManager" version="2" languageLevel="JDK_12" default="false" project-jdk-name="Python 3.6 (develop)" project-jdk-type="Python SDK" /> -</project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 6164328c76fbdf70a112333535f10474e6703fd0..0000000000000000000000000000000000000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/multiview_generator.iml" filepath="$PROJECT_DIR$/.idea/multiview_generator.iml" /> - </modules> - </component> -</project> \ No newline at end of file diff --git a/.idea/multiview_generator.iml b/.idea/multiview_generator.iml deleted file mode 100644 index b6d61efc59564f86c4cca889b90b0289fe871ca5..0000000000000000000000000000000000000000 --- a/.idea/multiview_generator.iml +++ /dev/null @@ -1,10 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module type="JAVA_MODULE" version="4"> - <component name="NewModuleRootManager"> - <content url="file://$MODULE_DIR$" /> - <orderEntry type="jdk" jdkName="Python 3.6 (develop)" jdkType="Python SDK" /> - <orderEntry type="sourceFolder" forTests="false" /> - <orderEntry type="library" name="R User Library" level="project" /> - <orderEntry type="library" name="R Skeletons" level="application" /> - </component> -</module> \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1ddfbbc029bcab630581847471d7f238ec53..0000000000000000000000000000000000000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="VcsDirectoryMappings"> - <mapping directory="" vcs="Git" /> - </component> -</project> \ No newline at end of file diff --git a/_static/fig_rec.png b/_static/fig_rec.png deleted file mode 100644 index 8b1dde52ecc5797568e18ec7dd5efe9ddb01a54e..0000000000000000000000000000000000000000 Binary files a/_static/fig_rec.png and /dev/null differ diff --git a/multiview_generator/base.py b/multiview_generator/base.py index f616c4a5ee5a76406714ec2600fe90bec1a1f442..ca6180ee6a01db81f7bb36924e1031cefa7af4d7 100644 --- a/multiview_generator/base.py +++ b/multiview_generator/base.py @@ -64,7 +64,7 @@ class MultiViewSubProblemsGenerator: complementarity_level=3, mutual_error=0.0, name="generated_dataset", config_file=None, sub_problem_type="base", sub_problem_configurations=None, - **kwargs): + min_rndm_val=-1, max_rndm_val=1, **kwargs): if config_file is not None: args = get_config_from_file(config_file) @@ -76,6 +76,8 @@ class MultiViewSubProblemsGenerator: self.n_classes = n_classes self.n_views = n_views self.name = name + self.min_rndm_val = min_rndm_val + self.max_rndm_val = max_rndm_val self.n_features = format_array(n_features, n_views, type_needed=int) self.redundancy = format_array(redundancy, n_classes, type_needed=float).reshape( @@ -263,72 +265,72 @@ class MultiViewSubProblemsGenerator: label_indice in label_indices] self.dt_error[:, view_index] = np.array(loss) - def _find_rows_cols(self): - rows=1 - cols=1 - if self.n_views == 4: - rows = 2 - cols = 2 - if self.n_views>1: - for i in range(self.n_views): - if rows*cols < i+1: - if cols < 4*rows: - cols+=1 - else: - rows+=1 - return rows, cols - - def _get_pca(self, n_components=2, output_path='.'): - pca = PCA(n_components=n_components) - import plotly.graph_objects as go - from plotly.subplots import make_subplots - rows, cols = self._find_rows_cols() - fig = make_subplots(rows=rows, cols=cols, - subplot_titles=["View{}".format(view_index) - for view_index - in range(self.n_views)], - specs=[[{'type': 'scatter'} for _ in range(cols) ] - for _ in range(rows)]) - row = 1 - col = 1 - import plotly.express as px - for view_index, view_data in enumerate(self.dataset): - if self.n_features[view_index]>n_components: - pca.fit(view_data) - reducted_data = pca.transform(view_data) - elif self.n_features[view_index] ==1: - reducted_data = np.transpose(np.array([view_data, view_data]))[0, :, :] - else: - reducted_data = view_data - fig.add_trace( - go.Scatter( - x=reducted_data[:, 0], - y=reducted_data[:, 1], - text=self.sample_ids, - mode='markers', marker=dict( - size=3, # set color to an array/list of desired values - color=self.y, - colorscale=["red", "blue", "black", "green", "orange", "purple"], - opacity=0.8 - ), ), - row=row, col=col) - col += 1 - if col > cols: - col = 1 - row += 1 - fig.update_shapes(dict(xref='x', yref='y')) - plotly.offline.plot(fig, filename=os.path.join(output_path, self.name+"_fig_pca.html"), auto_open=False) + # def _find_rows_cols(self): + # rows=1 + # cols=1 + # if self.n_views == 4: + # rows = 2 + # cols = 2 + # if self.n_views>1: + # for i in range(self.n_views): + # if rows*cols < i+1: + # if cols < 4*rows: + # cols+=1 + # else: + # rows+=1 + # return rows, cols + + # def _get_pca(self, n_components=2, output_path='.'): + # pca = PCA(n_components=n_components) + # import plotly.graph_objects as go + # from plotly.subplots import make_subplots + # rows, cols = self._find_rows_cols() + # fig = make_subplots(rows=rows, cols=cols, + # subplot_titles=["View{}".format(view_index) + # for view_index + # in range(self.n_views)], + # specs=[[{'type': 'scatter'} for _ in range(cols) ] + # for _ in range(rows)]) + # row = 1 + # col = 1 + # import plotly.express as px + # for view_index, view_data in enumerate(self.dataset): + # if self.n_features[view_index]>n_components: + # pca.fit(view_data) + # reducted_data = pca.transform(view_data) + # elif self.n_features[view_index] ==1: + # reducted_data = np.transpose(np.array([view_data, view_data]))[0, :, :] + # else: + # reducted_data = view_data + # fig.add_trace( + # go.Scatter( + # x=reducted_data[:, 0], + # y=reducted_data[:, 1], + # text=self.sample_ids, + # mode='markers', marker=dict( + # size=3, # set color to an array/list of desired values + # color=self.y, + # colorscale=["red", "blue", "black", "green", "orange", "purple"], + # opacity=0.8 + # ), ), + # row=row, col=col) + # col += 1 + # if col > cols: + # col = 1 + # row += 1 + # fig.update_shapes(dict(xref='x', yref='y')) + # plotly.offline.plot(fig, filename=os.path.join(output_path, self.name+"_fig_pca.html"), auto_open=False) def gen_view_report(self, view_index): view_string = "\n\n### View "+str(view_index+1) view_string+=self._sub_problem_generators[view_index].gen_report() return view_string - def _get_generator_report(self, view_index, doc_type=".md"): - if self.sub_problem_types[view_index] in ["make_classification", "base"]: - return "[`make_classification`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html)" - elif self.sub_problem_types[view_index]in ["gaussian", "make_gaussian_quantiles"]: - return "[`make_gaussian_quantiles`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html#sklearn.datasets.make_gaussian_quantiles)" + # def _get_generator_report(self, view_index, doc_type=".md"): + # if self.sub_problem_types[view_index] in ["make_classification", "base"]: + # return "[`make_classification`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html)" + # elif self.sub_problem_types[view_index]in ["gaussian", "make_gaussian_quantiles"]: + # return "[`make_gaussian_quantiles`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html#sklearn.datasets.make_gaussian_quantiles)" def _init_base_arguments(self): self.n_samples_per_class = ( diff --git a/multiview_generator/gaussian_classes.py b/multiview_generator/gaussian_classes.py index 2a6dbdcb180ce123914e275d2b4936e2233745c5..23b7a3001448809dcdd5a39170e1bd19d698a1b5 100644 --- a/multiview_generator/gaussian_classes.py +++ b/multiview_generator/gaussian_classes.py @@ -19,8 +19,8 @@ class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator): complementarity_level=3, mutual_error=0.01, name="generated_dataset", config_file=None, sub_problem_type="base", sub_problem_configurations=None, - sub_problem_generators="StumpsGenerator", random_vertices=False - , **kwargs): + sub_problem_generators="StumpsGenerator", random_vertices=False, + min_rndm_val=-1, max_rndm_val=1, **kwargs): """ :param random_state: int or np.random.RandomState object to fix the @@ -66,6 +66,8 @@ class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator): config_file=config_file, sub_problem_type=sub_problem_type, sub_problem_configurations=sub_problem_configurations, + min_rndm_val=min_rndm_val, + max_rndm_val=max_rndm_val, **kwargs) self.random_vertices = format_array(random_vertices, n_views, bool) self.sub_problem_generators = format_array(sub_problem_generators, n_views, str) @@ -273,8 +275,13 @@ class MultiViewGaussianSubProblemsGenerator(MultiViewSubProblemsGenerator): self.n_samples = np.sum(self.n_samples_per_class) self.n_complem =np.zeros(self.n_classes) self.n_max_features = np.max(self.n_features) - self.generated_data = self.rs.uniform(low=-self.latent_size_mult, high=self.latent_size_mult, size=(self.n_views, self.n_classes, self.n_max_samples, self.n_max_features)) - self.descriptions = np.zeros((self.n_views, self.n_classes, self.n_max_samples,)) + self.generated_data = self.rs.uniform(low=-self.min_rndm_val, + high=self.max_rndm_val, + size=(self.n_views, self.n_classes, + self.n_max_samples, + self.n_max_features)) + self.descriptions = np.zeros((self.n_views, self.n_classes, + self.n_max_samples,)) self.n_total_samples = np.sum(self.n_samples_per_class) sample_indices = np.arange(int(np.sum(self.n_samples_per_class))) self.rs.shuffle(sample_indices) diff --git a/multiview_generator/sub_problems.py b/multiview_generator/sub_problems.py index b08c7f347c280785fa7a345449b6aaf5690d58b6..e464b8ab97ba7797f15e3c54b17a2ceca3b582c2 100644 --- a/multiview_generator/sub_problems.py +++ b/multiview_generator/sub_problems.py @@ -120,7 +120,7 @@ class StumpsGenerator(BaseSubProblem): return DecisionTreeClassifier(max_depth=math.ceil(math.log2(self.n_classes))) -class TreesGenerator(BaseSubProblem): +class TreesGenerator(BaseSubProblem): # pragma: no cover """ Work in progress : Similar generator as StumpsGenerator, but that generates several blobs per class """ diff --git a/multiview_generator/tests/test_gaussian_classes.py b/multiview_generator/tests/test_gaussian_classes.py index 8dc472287f75c3a9534452462afaf53f7328168a..4890343f5588af8a1f0d4dc8879987c7e0190f66 100644 --- a/multiview_generator/tests/test_gaussian_classes.py +++ b/multiview_generator/tests/test_gaussian_classes.py @@ -1,7 +1,24 @@ import unittest +import os from ..gaussian_classes import MultiViewGaussianSubProblemsGenerator +tmp_path = os.path.join( + os.path.dirname( + os.path.abspath(__file__)), + "tmp_tests", "") + +def rm_tmp(path=tmp_path): + try: + for file_name in os.listdir(path): + if os.path.isdir(os.path.join(path, file_name)): + rm_tmp(os.path.join(path, file_name)) + else: + os.remove(os.path.join(path, file_name)) + os.rmdir(path) + except BaseException: + pass + class Test_MultiViewGaussianSubProblemsGenerator(unittest.TestCase): @classmethod @@ -13,4 +30,25 @@ class Test_MultiViewGaussianSubProblemsGenerator(unittest.TestCase): pass def test_simple(self): + gene = MultiViewGaussianSubProblemsGenerator(sub_problem_generators=["StumpsGenerator", + "RingsGenerator", + "StumpsGenerator", + "RingsGenerator"]) + data, labels = gene.generate_multi_view_dataset() + + def test_report(self): + gene = MultiViewGaussianSubProblemsGenerator(sub_problem_generators=["StumpsGenerator", + "RingsGenerator", + "StumpsGenerator", + "RingsGenerator"]) + data, labels = gene.generate_multi_view_dataset() + rep = gene.gen_report(save=False) + + def test_save(self): gene = MultiViewGaussianSubProblemsGenerator() + data, labels = gene.generate_multi_view_dataset() + rm_tmp() + os.mkdir(tmp_path) + gene.to_hdf5_mc(tmp_path) + rep = gene.gen_report(output_path=tmp_path, save=True) + rm_tmp() \ No newline at end of file diff --git a/multiview_generator/tests/test_multiple_sub_problems.py b/multiview_generator/tests/test_multiple_sub_problems.py deleted file mode 100644 index e2ce0c3cc3997271862b76cd4fe75a631dd5e725..0000000000000000000000000000000000000000 --- a/multiview_generator/tests/test_multiple_sub_problems.py +++ /dev/null @@ -1,19 +0,0 @@ -# import unittest -# import numpy as np -# -# from ..multiple_sub_problems import MultiViewSubProblemsGenerator -# -# -# class Test_MultiViewSubProblemsGenerator(unittest.TestCase): -# -# @classmethod -# def setUpClass(cls): -# pass -# -# @classmethod -# def tearDownClass(cls): -# pass -# -# def test_simple(self): -# gene = MultiViewSubProblemsGenerator() -