diff --git a/README.rst b/README.rst index 7a2c3a1529d990445bc81f96edd50ccfe58fbe12..8b123cec4e92bebf74fe7546bb6abeceec05807e 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ Documentation The **documentation** including installation instructions, API documentation and examples is -`available online <http://dev.pages.lis-lab.fr/multimodal>`_. +`available online <http://dev.pages.lis-lab.fr/scikit-multimodallearn>`_. Installation @@ -31,7 +31,7 @@ Installation using pip ~~~~~~~~~~~~~~~~~~~~~~ **scikit-multimodallearn** is -`available on PyPI <https://pypi.org/project/multiconfusion/>`_ +`available on PyPI <https://pypi.org/project/scikit-multimodallearn/>`_ and can be installed using **pip**:: pip install scikit-multimodallearn @@ -105,6 +105,22 @@ following paper:: supervised learning}, } + @InProceedings{Huu:2019:BAMCC, + author={Huusari, Riika, Kadri Hachem and Capponi, C{\'e}cile}, + editor={}, + title={Multi-view Metric Learning in Vector-valued Kernel Spaces}, + booktitle={arXiv:1803.07821v1}, + year={2018}, + location={Athens, Greece}, + publisher={}, + address={}, + pages={209--228}, + numpages = {12} + isbn={978-3-642-23783-6} + url={https://link.springer.com/chapter/10.1007/978-3-642-23783-6_14}, + keywords={boosting, classification, multiview learning, + merric learning, vector-valued, kernel spaces}, + } References ~~~~~~~~~~ @@ -113,7 +129,6 @@ References Linking and mining heterogeneous an multi-view data, Unsupervised and semi-supervised learning Series Editor M. Emre Celeri, pp 161-182, Springer - * Sokol Koço, Cécile Capponi, `"A boosting approach to multiview classification with cooperation" <https://link.springer.com/chapter/10.1007/978-3-642-23783-6_14>`_, diff --git a/doc/conf.py b/doc/conf.py index a45f0cf2f943497097cbe6fc091262596b280486..f1717cc721342e1812b136b25512af7590f68ae8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -3,12 +3,14 @@ from datetime import date import os import sys -sys.path.insert(0, os.path.abspath('../metriclearning')) +sys.path.insert(0, os.path.abspath('../multimodal')) sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath('../../multimodal')) sys.path.insert(0, os.path.abspath(".")) sys.path.append(os.path.join(os.path.dirname(__name__), '..')) sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'sphinxext')) -import metriclearning + +import multimodal # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -28,6 +30,9 @@ extensions = [ 'sphinx.ext.doctest', 'sphinx.ext.imgmath', 'numpydoc', + 'sphinx.ext.napoleon', + 'sphinx_gallery.gen_gallery' + # 'm2r', # 'sphinx_gallery.gen_gallery' ] @@ -44,18 +49,18 @@ source_encoding = 'utf-8' master_doc = 'index' # General information about the project. -project = 'metriclearning' +project = 'scikit-multimodallearn' author = 'Dominique Benielli' -copyright = '2017-{}, LIS UMR 7020'.format(date.today().year) +copyright = '2020-{}, LIS UMR 7020'.format(date.today().year) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = metriclearning.__version__ +version = multimodal.__version__ # The full version, including alpha/beta/rc tags. -release = metriclearning.__version__ +release = multimodal.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -69,7 +74,7 @@ release = metriclearning.__version__ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = [] +exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -243,7 +248,7 @@ man_pages = [ # dir menu entry, description, category) texinfo_documents = [ (master_doc, project, '{} Documentation'.format(project), author, project, - 'Multi-View Metric Learning in Vector-Valued Kernel Spaces for machine learning.', + 'Multimodal Multi-View Learning with kernels and boosting algorithms.', 'Miscellaneous')] # Documents to append as an appendix to all manuals. @@ -267,11 +272,11 @@ numpydoc_show_class_members = False sphinx_gallery_conf = { 'doc_module': (project,), - 'backreferences_dir': 'backreferences', + 'backreferences_dir': 'tutorial/backreferences', # path to your examples scripts 'examples_dirs': '../examples', # path where to save gallery generated examples - 'gallery_dirs': 'auto_examples'} + 'gallery_dirs': 'tutorial/auto_examples'} # Generate the plots for the gallery plot_gallery = 'True' diff --git a/doc/docmumbo/install_devel.rst b/doc/docmumbo/install_devel.rst deleted file mode 100644 index 189ff2a1521a0954bae8e2cbb40bae67be79c7bc..0000000000000000000000000000000000000000 --- a/doc/docmumbo/install_devel.rst +++ /dev/null @@ -1,70 +0,0 @@ -Installation and development -============================ - -Dependencies ------------- - -**multimodalboost** works with **Python 3.5 or later**. - -**multimodalboost** depends on **scikit-learn** (version >= 0.19). - -Optionally, **matplotlib** is required when running the examples. - -Installation ------------- - -**multimodalboost** is -`available on PyPI <https://pypi.org/project/multimodalboost/>`_ -and can be installed using **pip**:: - - pip install multimodalboost - -If you prefer to install directly from the **source code**, clone the **Git** -repository of the project and run the **setup.py** file with the following -commands:: - - git clone git@gitlab.lis-lab.fr:dev/multimodalboost.git - cd multimodalboost - python setup.py install - -or alternatively use **pip**:: - - pip install git+https://gitlab.lis-lab.fr/dev/multimodalboost.git - -Development ------------ - -The development of multimodalboost follows the guidelines provided by the -scikit-learn community. - -Refer to the `Developer's Guide <http://scikit-learn.org/stable/developers>`_ -of the scikit-learn project for more details. - -Source code ------------ - -You can get the **source code** from the **Git** repository of the project:: - - git clone git@gitlab.lis-lab.fr:dev/multimodalboost.git - - -Testing -------- - -**pytest** and **pytest-cov** are required to run the **test suite** with:: - - cd multimodalboost - pytest - -A code coverage report is displayed in the terminal when running the tests. -An HTML version of the report is also stored in the directory **htmlcov**. - -Generating the documentation ----------------------------- - -The generation of the documentation requires **sphinx**, **sphinx-gallery**, -**numpydoc** and **matplotlib** and can be run with:: - - python setup.py build_sphinx - -The resulting files are stored in the directory **build/sphinx/html**. diff --git a/doc/index.rst b/doc/index.rst index a8fc5521206fe86b1c6bd1929762b08cf727a24e..6e5825639d13fb3884055b1dc02338107056d949 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,7 +1,4 @@ -.. metriclearning documentation master file, created by - sphinx-quickstart on Mon Sep 2 12:12:08 2019. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. + Welcome to scikit-multimodallearn's documentation! ================================================== @@ -15,8 +12,11 @@ package for machine learning in Python. Documentation ------------- +:Release: |version| +:Date: |today| + .. toctree:: - :maxdepth: 2 + :maxdepth: 3 :caption: Contents: @@ -27,21 +27,6 @@ Documentation -Documentation -------------- - -:Release: |version| -:Date: |today| - -.. toctree:: - :maxdepth: 1 - - install_devel - api - auto_examples/index - credits - - Indices and tables ------------------ diff --git a/doc/reference/api.rst b/doc/reference/api.rst index 815df9e47b1b8db0d2991cdac90322fe30f99d3d..e68064331b202159580220038d1e22dda86f78ef 100644 --- a/doc/reference/api.rst +++ b/doc/reference/api.rst @@ -1,9 +1,19 @@ API Documentation ================= +datasets +-------- + +.. automodule:: multimodal.datasets.data_sample + :members: + :inherited-members: + + +Boosting +-------- multimodal.boosting.mumbo -------------------------- ++++++++++++++++++++++++++ .. automodule:: multimodal.boosting.mumbo :members: @@ -11,7 +21,7 @@ multimodal.boosting.mumbo multimodal.boosting.cumbo -------------------------- ++++++++++++++++++++++++++ .. automodule:: multimodal.boosting.cumbo :members: @@ -19,8 +29,35 @@ multimodal.boosting.cumbo multimodal.boosting.boost -------------------------- ++++++++++++++++++++++++++ .. automodule:: multimodal.boosting.boost :members: :inherited-members: + + +Kernels +------- + +multimodal.kernels.mvml ++++++++++++++++++++++++ + +.. automodule:: multimodal.kernels.mvml + :members: + :inherited-members: + + +multimodal.kernels.lpMKL +++++++++++++++++++++++++ + +.. automodule:: multimodal.kernels.lpMKL + :members: + :inherited-members: + + +multimodal.kernels.mkernel +++++++++++++++++++++++++++ + +.. automodule:: multimodal.kernels.mkernel + :members: + :inherited-members: diff --git a/examples/README.txt b/examples/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..f749137f56fe6950ead3682bcee3a44915567dea --- /dev/null +++ b/examples/README.txt @@ -0,0 +1,11 @@ +.. _examples: + +Examples +======== + +MuMBo Examples +-------------- + +The following toy examples illustrate how the MuMBo algorithm exploits +cooperation between views for classification. + diff --git a/examples/cumbo_plot_2_views_2_classes.py b/examples/cumbo_plot_2_views_2_classes.py new file mode 100644 index 0000000000000000000000000000000000000000..28760f5fdc8319f73014f13e111887b9591717df --- /dev/null +++ b/examples/cumbo_plot_2_views_2_classes.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +""" +========================== +2 views, 2 classes example +========================== + +In this toy example, we generate data from two classes, split between two +two-dimensional views. + +For each view, the data are generated so that half of the points of each class +are well separated in the plane, while the other half of the points are not +separated and placed in the same area. We also insure that the points that are +not separated in one view are well separated in the other view. + +Thus, in the figure representing the data, the points represented by crosses +(x) are well separated in view 0 while they are not separated in view 1, while +the points represented by dots (.) are well separated in view 1 while they are +not separated in view 0. In this figure, the blue symbols represent points +of class 0, while red symbols represent points of class 1. + +The MuMBo algorithm take adavantage of the complementarity of the two views to +rightly classify the points. +""" + +import numpy as np +from multimodal.boosting.cumbo import MuCumboClassifier +from matplotlib import pyplot as plt + + +def generate_data(n_samples, lim): + """Generate random data in a rectangle""" + lim = np.array(lim) + n_features = lim.shape[0] + data = np.random.random((n_samples, n_features)) + data = (lim[:, 1]-lim[:, 0]) * data + lim[:, 0] + return data + + +seed = 12 +np.random.seed(seed) + +n_samples = 100 + +view_0 = np.concatenate((generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[1., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [1., 2.]]))) + +view_1 = np.concatenate((generate_data(n_samples, [[1., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [1., 2.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]))) + +X = np.concatenate((view_0, view_1), axis=1) + +y = np.zeros(4*n_samples, dtype=np.int64) +y[2*n_samples:] = 1 + +views_ind = np.array([0, 2, 4]) + +n_estimators = 3 +clf = MuCumboClassifier(n_estimators=n_estimators) +clf.fit(X, y, views_ind) + +print('\nAfter 3 iterations, the MuMBo classifier reaches exact ' + 'classification for the\nlearning samples:') +for ind, score in enumerate(clf.staged_score(X, y)): + print(' - iteration {}, score: {}'.format(ind + 1, score)) + + +print('\nThe resulting MuCuMBo classifier uses three sub-classifiers that are ' + 'wheighted\nusing the following weights:\n' + ' estimator weights: {}'.format(clf.estimator_weights_alpha_)) + +# print('\nThe two first sub-classifiers use the data of view 0 to compute ' +# 'their\nclassification results, while the third one uses the data of ' +# 'view 1:\n' +# ' best views: {}'. format(clf.best_views_)) + +print('\nThe first figure displays the data, splitting the representation ' + 'between the\ntwo views.') + +fig = plt.figure(figsize=(10., 8.)) +fig.suptitle('Representation of the data', size=16) +for ind_view in range(2): + ax = plt.subplot(2, 1, ind_view + 1) + ax.set_title('View {}'.format(ind_view)) + ind_feature = ind_view * 2 + styles = ('.b', 'xb', '.r', 'xr') + labels = ('non-separated', 'separated') + for ind in range(4): + ind_class = ind // 2 + label = labels[(ind + ind_view) % 2] + ax.plot(X[n_samples*ind:n_samples*(ind+1), ind_feature], + X[n_samples*ind:n_samples*(ind+1), ind_feature + 1], + styles[ind], + label='Class {} ({})'.format(ind_class, label)) + ax.legend() + +print('\nThe second figure displays the classification results for the ' + 'sub-classifiers\non the learning sample data.\n') + +styles = ('.b', '.r') +# fig = plt.figure(figsize=(12., 7.)) +# fig.suptitle('Classification results on the learning data for the ' +# 'sub-classifiers', size=16) +# for ind_estimator in range(n_estimators): +# best_view = clf.best_views_[ind_estimator] +# y_pred = clf.estimators_[ind_estimator].predict( +# X[:, 2*best_view:2*best_view+2]) +# background_color = (1.0, 1.0, 0.9) +# for ind_view in range(2): +# ax = plt.subplot(2, 3, ind_estimator + 3*ind_view + 1) +# if ind_view == best_view: +# ax.set_facecolor(background_color) +# ax.set_title( +# 'Sub-classifier {} - View {}'.format(ind_estimator, ind_view)) +# ind_feature = ind_view * 2 +# for ind_class in range(2): +# ind_samples = (y_pred == ind_class) +# ax.plot(X[ind_samples, ind_feature], +# X[ind_samples, ind_feature + 1], +# styles[ind_class], +# label='Class {}'.format(ind_class)) +# ax.legend(title='Predicted class:') + +plt.show() diff --git a/examples/cumbo_plot_3_views_3_classes.py b/examples/cumbo_plot_3_views_3_classes.py new file mode 100644 index 0000000000000000000000000000000000000000..6a03a103d15cea1025ba950587f847e407781984 --- /dev/null +++ b/examples/cumbo_plot_3_views_3_classes.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +========================== +3 views, 3 classes example +========================== + +In this toy example, we generate data from three classes, split between three +two-dimensional views. + +For each view, the data are generated so that the points for two classes are +well seperated, while the points for the third class are not seperated with +the two other classes. That means that, taken separately, none of the single +views allows for a good classification of the data. + +Nevertheless, the MuMBo algorithm take adavantage of the complementarity of +the views to rightly classify the points. +""" + +import numpy as np +from multimodal.boosting.cumbo import MuCumboClassifier +from matplotlib import pyplot as plt + + +def generate_data(n_samples, lim): + """Generate random data in a rectangle""" + lim = np.array(lim) + n_features = lim.shape[0] + data = np.random.random((n_samples, n_features)) + data = (lim[:, 1]-lim[:, 0]) * data + lim[:, 0] + return data + + +seed = 12 +np.random.seed(seed) + +n_samples = 300 + +view_0 = np.concatenate((generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[1., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 2.], [0., 1.]]))) + +view_1 = np.concatenate((generate_data(n_samples, [[1., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]))) + +view_2 = np.concatenate((generate_data(n_samples, [[0., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[1., 2.], [0., 1.]]))) + +X = np.concatenate((view_0, view_1, view_2), axis=1) + +y = np.zeros(3*n_samples, dtype=np.int64) +y[n_samples:2*n_samples] = 1 +y[2*n_samples:] = 2 + +views_ind = np.array([0, 2, 4, 6]) + +n_estimators = 4 +clf = MuCumboClassifier(n_estimators=n_estimators) +clf.fit(X, y, views_ind) + +print('\nAfter 4 iterations, the MuCuMBo classifier reaches exact ' + 'classification for the\nlearning samples:') +for ind, score in enumerate(clf.staged_score(X, y)): + print(' - iteration {}, score: {}'.format(ind + 1, score)) + +print('\nThe resulting MuCuMBo classifier uses four sub-classifiers that are ' + 'wheighted\nusing the following weights:\n' + ' estimator weights alpha: {}'.format(clf.estimator_weights_alpha_)) + +# print('\nThe first sub-classifier uses the data of view 0 to compute ' +# 'its classification\nresults, the second and third sub-classifiers use ' +# 'the data of view 1, while the\nfourth one uses the data of ' +# 'view 2:\n' +# ' best views: {}'. format(clf.best_views_)) + +print('\nThe first figure displays the data, splitting the representation ' + 'between the\nthree views.') + +styles = ('.b', '.r', '.g') +fig = plt.figure(figsize=(12., 11.)) +fig.suptitle('Representation of the data', size=16) +for ind_view in range(3): + ax = plt.subplot(3, 1, ind_view + 1) + ax.set_title('View {}'.format(ind_view)) + ind_feature = ind_view * 2 + for ind_class in range(3): + ind_samples = (y == ind_class) + ax.plot(X[ind_samples, ind_feature], + X[ind_samples, ind_feature + 1], + styles[ind_class], + label='Class {}'.format(ind_class)) + ax.legend(loc='upper left', framealpha=0.9) + +print('\nThe second figure displays the classification results for the ' + 'sub-classifiers\non the learning sample data.\n') + +# fig = plt.figure(figsize=(14., 11.)) +# fig.suptitle('Classification results on the learning data for the ' +# 'sub-classifiers', size=16) +# for ind_estimator in range(n_estimators): +# best_view = clf.best_views_[ind_estimator] +# y_pred = clf.estimators_[ind_estimator].predict( +# X[:, 2*best_view:2*best_view+2]) +# background_color = (1.0, 1.0, 0.9) +# for ind_view in range(3): +# ax = plt.subplot(3, 4, ind_estimator + 4*ind_view + 1) +# if ind_view == best_view: +# ax.set_facecolor(background_color) +# ax.set_title( +# 'Sub-classifier {} - View {}'.format(ind_estimator, ind_view)) +# ind_feature = ind_view * 2 +# for ind_class in range(3): +# ind_samples = (y_pred == ind_class) +# ax.plot(X[ind_samples, ind_feature], +# X[ind_samples, ind_feature + 1], +# styles[ind_class], +# label='Class {}'.format(ind_class)) +# ax.legend(title='Predicted class:', loc='upper left', framealpha=0.9) + +plt.show() diff --git a/examples/mumbo_plot_2_views_2_classes.py b/examples/mumbo_plot_2_views_2_classes.py new file mode 100644 index 0000000000000000000000000000000000000000..a2f864eb092a749e645c24697cb56bd80a1aabca --- /dev/null +++ b/examples/mumbo_plot_2_views_2_classes.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +""" +========================== +2 views, 2 classes example +========================== + +In this toy example, we generate data from two classes, split between two +two-dimensional views. + +For each view, the data are generated so that half of the points of each class +are well separated in the plane, while the other half of the points are not +separated and placed in the same area. We also insure that the points that are +not separated in one view are well separated in the other view. + +Thus, in the figure representing the data, the points represented by crosses +(x) are well separated in view 0 while they are not separated in view 1, while +the points represented by dots (.) are well separated in view 1 while they are +not separated in view 0. In this figure, the blue symbols represent points +of class 0, while red symbols represent points of class 1. + +The MuMBo algorithm take adavantage of the complementarity of the two views to +rightly classify the points. +""" + +import numpy as np +from multimodal.boosting.mumbo import MumboClassifier +from matplotlib import pyplot as plt + + +def generate_data(n_samples, lim): + """Generate random data in a rectangle""" + lim = np.array(lim) + n_features = lim.shape[0] + data = np.random.random((n_samples, n_features)) + data = (lim[:, 1]-lim[:, 0]) * data + lim[:, 0] + return data + + +seed = 12 +np.random.seed(seed) + +n_samples = 100 + +view_0 = np.concatenate((generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[1., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [1., 2.]]))) + +view_1 = np.concatenate((generate_data(n_samples, [[1., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [1., 2.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]))) + +X = np.concatenate((view_0, view_1), axis=1) + +y = np.zeros(4*n_samples, dtype=np.int64) +y[2*n_samples:] = 1 + +views_ind = np.array([0, 2, 4]) + +n_estimators = 3 +clf = MumboClassifier(n_estimators=n_estimators) +clf.fit(X, y, views_ind) + +print('\nAfter 3 iterations, the MuMBo classifier reaches exact ' + 'classification for the\nlearning samples:') +for ind, score in enumerate(clf.staged_score(X, y)): + print(' - iteration {}, score: {}'.format(ind + 1, score)) + + +print('\nThe resulting MuMBo classifier uses three sub-classifiers that are ' + 'wheighted\nusing the following weights:\n' + ' estimator weights: {}'.format(clf.estimator_weights_)) + +print('\nThe two first sub-classifiers use the data of view 0 to compute ' + 'their\nclassification results, while the third one uses the data of ' + 'view 1:\n' + ' best views: {}'. format(clf.best_views_)) + +print('\nThe first figure displays the data, splitting the representation ' + 'between the\ntwo views.') + +fig = plt.figure(figsize=(10., 8.)) +fig.suptitle('Representation of the data', size=16) +for ind_view in range(2): + ax = plt.subplot(2, 1, ind_view + 1) + ax.set_title('View {}'.format(ind_view)) + ind_feature = ind_view * 2 + styles = ('.b', 'xb', '.r', 'xr') + labels = ('non-separated', 'separated') + for ind in range(4): + ind_class = ind // 2 + label = labels[(ind + ind_view) % 2] + ax.plot(X[n_samples*ind:n_samples*(ind+1), ind_feature], + X[n_samples*ind:n_samples*(ind+1), ind_feature + 1], + styles[ind], + label='Class {} ({})'.format(ind_class, label)) + ax.legend() + +print('\nThe second figure displays the classification results for the ' + 'sub-classifiers\non the learning sample data.\n') + +styles = ('.b', '.r') +fig = plt.figure(figsize=(12., 7.)) +fig.suptitle('Classification results on the learning data for the ' + 'sub-classifiers', size=16) +for ind_estimator in range(n_estimators): + best_view = clf.best_views_[ind_estimator] + y_pred = clf.estimators_[ind_estimator].predict( + X[:, 2*best_view:2*best_view+2]) + background_color = (1.0, 1.0, 0.9) + for ind_view in range(2): + ax = plt.subplot(2, 3, ind_estimator + 3*ind_view + 1) + if ind_view == best_view: + ax.set_facecolor(background_color) + ax.set_title( + 'Sub-classifier {} - View {}'.format(ind_estimator, ind_view)) + ind_feature = ind_view * 2 + for ind_class in range(2): + ind_samples = (y_pred == ind_class) + ax.plot(X[ind_samples, ind_feature], + X[ind_samples, ind_feature + 1], + styles[ind_class], + label='Class {}'.format(ind_class)) + ax.legend(title='Predicted class:') + +plt.show() diff --git a/examples/mumbo_plot_3_views_3_classes.py b/examples/mumbo_plot_3_views_3_classes.py new file mode 100644 index 0000000000000000000000000000000000000000..624a7910a7847541084b34432fd1e6f2edc5a59a --- /dev/null +++ b/examples/mumbo_plot_3_views_3_classes.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +========================== +3 views, 3 classes example +========================== + +In this toy example, we generate data from three classes, split between three +two-dimensional views. + +For each view, the data are generated so that the points for two classes are +well seperated, while the points for the third class are not seperated with +the two other classes. That means that, taken separately, none of the single +views allows for a good classification of the data. + +Nevertheless, the MuMBo algorithm take adavantage of the complementarity of +the views to rightly classify the points. +""" + +import numpy as np +from multimodal.boosting.mumbo import MumboClassifier +from matplotlib import pyplot as plt + + +def generate_data(n_samples, lim): + """Generate random data in a rectangle""" + lim = np.array(lim) + n_features = lim.shape[0] + data = np.random.random((n_samples, n_features)) + data = (lim[:, 1]-lim[:, 0]) * data + lim[:, 0] + return data + + +seed = 12 +np.random.seed(seed) + +n_samples = 300 + +view_0 = np.concatenate((generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[1., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 2.], [0., 1.]]))) + +view_1 = np.concatenate((generate_data(n_samples, [[1., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]))) + +view_2 = np.concatenate((generate_data(n_samples, [[0., 2.], [0., 1.]]), + generate_data(n_samples, [[0., 1.], [0., 1.]]), + generate_data(n_samples, [[1., 2.], [0., 1.]]))) + +X = np.concatenate((view_0, view_1, view_2), axis=1) + +y = np.zeros(3*n_samples, dtype=np.int64) +y[n_samples:2*n_samples] = 1 +y[2*n_samples:] = 2 + +views_ind = np.array([0, 2, 4, 6]) + +n_estimators = 4 +clf = MumboClassifier(n_estimators=n_estimators) +clf.fit(X, y, views_ind) + +print('\nAfter 4 iterations, the MuMBo classifier reaches exact ' + 'classification for the\nlearning samples:') +for ind, score in enumerate(clf.staged_score(X, y)): + print(' - iteration {}, score: {}'.format(ind + 1, score)) + +print('\nThe resulting MuMBo classifier uses four sub-classifiers that are ' + 'wheighted\nusing the following weights:\n' + ' estimator weights: {}'.format(clf.estimator_weights_)) + +print('\nThe first sub-classifier uses the data of view 0 to compute ' + 'its classification\nresults, the second and third sub-classifiers use ' + 'the data of view 1, while the\nfourth one uses the data of ' + 'view 2:\n' + ' best views: {}'. format(clf.best_views_)) + +print('\nThe first figure displays the data, splitting the representation ' + 'between the\nthree views.') + +styles = ('.b', '.r', '.g') +fig = plt.figure(figsize=(12., 11.)) +fig.suptitle('Representation of the data', size=16) +for ind_view in range(3): + ax = plt.subplot(3, 1, ind_view + 1) + ax.set_title('View {}'.format(ind_view)) + ind_feature = ind_view * 2 + for ind_class in range(3): + ind_samples = (y == ind_class) + ax.plot(X[ind_samples, ind_feature], + X[ind_samples, ind_feature + 1], + styles[ind_class], + label='Class {}'.format(ind_class)) + ax.legend(loc='upper left', framealpha=0.9) + +print('\nThe second figure displays the classification results for the ' + 'sub-classifiers\non the learning sample data.\n') + +fig = plt.figure(figsize=(14., 11.)) +fig.suptitle('Classification results on the learning data for the ' + 'sub-classifiers', size=16) +for ind_estimator in range(n_estimators): + best_view = clf.best_views_[ind_estimator] + y_pred = clf.estimators_[ind_estimator].predict( + X[:, 2*best_view:2*best_view+2]) + background_color = (1.0, 1.0, 0.9) + for ind_view in range(3): + ax = plt.subplot(3, 4, ind_estimator + 4*ind_view + 1) + if ind_view == best_view: + ax.set_facecolor(background_color) + ax.set_title( + 'Sub-classifier {} - View {}'.format(ind_estimator, ind_view)) + ind_feature = ind_view * 2 + for ind_class in range(3): + ind_samples = (y_pred == ind_class) + ax.plot(X[ind_samples, ind_feature], + X[ind_samples, ind_feature + 1], + styles[ind_class], + label='Class {}'.format(ind_class)) + ax.legend(title='Predicted class:', loc='upper left', framealpha=0.9) + +plt.show() diff --git a/multimodal/boosting/cumbo.py b/multimodal/boosting/cumbo.py index 55a07f917a96674eed42f36fe7c37d676d7175ce..6753e71b30e88d48683dfe5dc50e1f4a816282a6 100644 --- a/multimodal/boosting/cumbo.py +++ b/multimodal/boosting/cumbo.py @@ -5,32 +5,6 @@ This module contains a **Mu**\ lti\ **C**\ onfusion **M**\ Matrix **B**\ osting estimator for classification implemented in the ``MuCumboClassifier`` class. """ -# Université d'Aix Marseille (AMU) - -# Centre National de la Recherche Scientifique (CNRS) - -# Université de Toulon (UTLN). -# Copyright © 2017-2018 AMU, CNRS, UTLN -# -# This file is part of multimodalboost. -# -# multimodalboost is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# multimodalboost is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with multiconfusion. If not, see <http://www.gnu.org/licenses/>. -# -# Author: Dominique Benielli- Laboratoire d'Informatique et Systèmes - UMR 7020 - -# The implementation of the MuCumboClassifier in this module used the code of -# sklearn.ensemble.AdaBoostClassifier as a model and tried to use the same -# structure, notations and behavior where possible. - import numpy as np from sklearn.base import ClassifierMixin from sklearn.ensemble import BaseEnsemble diff --git a/multimodal/datasets/data_sample.py b/multimodal/datasets/data_sample.py index a50c56736f6397e763f061f3a792bfb720ff05c5..037bcbc7642c910271cd57513628e952a652700d 100644 --- a/multimodal/datasets/data_sample.py +++ b/multimodal/datasets/data_sample.py @@ -6,18 +6,29 @@ nbL and nbEx numbers, MultiModalArray class inherit from numpy ndarray and contains a 2d data ndarray with the shape (n_samples, n_view_i * n_features_i) -0 1 2 3 -======== ==== ==== ==== -xxxxxxxx xxxx xxxx xxxx -xxxxxxxx xxxx xxxx xxxx -xxxxxxxx xxxx xxxx xxxx -xxxxxxxx xxxx xxxx xxxx -xxxxxxxx xxxx xxxx xxxx -xxxxxxxx xxxx xxxx xxxx -xxxxxxxx xxxx xxxx xxxx -xxxxxxxx xxxx xxxx xxxx -xxxxxxxx xxxx xxxx xxxx -======== ==== ==== ==== +.. tabularcolumns:: |l|l|l|l| + ++----------+------+------+------+ +| 0 | 1 | 2 | 3 | ++==========+======+======+======+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ +| xxxxxxxx | xxxx | xxxx | xxxx | ++----------+------+------+------+ MultiModalSparseArray inherit from scipy sparce matrix with the shape (n_samples, n_view_i * n_features_i) diff --git a/multimodal/kernels/mvml.py b/multimodal/kernels/mvml.py index 6ab1d62a3867b6fc740af20f39aadee810c9b38f..6d585d0a7d3002cd3c8eae018ad47deb5dd28219 100644 --- a/multimodal/kernels/mvml.py +++ b/multimodal/kernels/mvml.py @@ -417,7 +417,7 @@ class MVML(MKernel, BaseEstimator, ClassifierMixin): Parameters ---------- - test_kernels : `Metriclearn_array` of test kernels + test_kernels : `` of test kernels g : learning solution that is learned in learn_mvml