From 2d56b39e838896e8a1bb34c559c5be9d5070b554 Mon Sep 17 00:00:00 2001 From: Dominique Benielli <dominique.benielli@lis-lab.fr> Date: Wed, 15 Jan 2020 19:46:13 +0100 Subject: [PATCH] packaging --- .gitlab-ci.yml | 38 ++++ MANIFEST.in | 5 + README.rst | 147 +++++++++++++++ copyright.py | 105 +++++++++++ copyrightstamp.txt | 39 ++++ doc/conf.py | 277 +++++++++++++++++++++++++++++ doc/index.rst | 50 ++++++ doc/reference/api.rst | 26 +++ docker/Dockerfile_ubuntu_18.04 | 26 +++ license.txt | 30 ++++ multimodal/__init__.py | 1 + multimodal/boosting/__init__.py | 9 +- multimodal/boosting/cumbo.py | 16 +- multimodal/boosting/mumbo.py | 13 +- multimodal/datasets/data_sample.py | 10 +- multimodal/kernels/__init__.py | 5 +- multimodal/kernels/lpMKL.py | 2 + multimodal/kernels/mkernel.py | 39 +++- multimodal/kernels/mvml.py | 30 +--- setup.cfg | 16 ++ setup.py | 68 +++++++ 21 files changed, 892 insertions(+), 60 deletions(-) create mode 100644 .gitlab-ci.yml create mode 100644 MANIFEST.in create mode 100644 README.rst create mode 100644 copyright.py create mode 100644 copyrightstamp.txt create mode 100644 doc/conf.py create mode 100644 doc/index.rst create mode 100644 doc/reference/api.rst create mode 100644 docker/Dockerfile_ubuntu_18.04 create mode 100644 license.txt create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..72f9c51 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,38 @@ +# run the test suite +tests: + image: registry.gitlab.lis-lab.fr:5005/dev/multimodal/ubuntu:18.04 + tags: + - docker + script: + - python3 setup.py install + - pytest-3 + +# generate the documentation +doc: + image: registry.gitlab.lis-lab.fr:5005/dev/multimodal/ubuntu:18.04 + tags: + - docker + only: + - master + script: + - export LC_ALL=$(locale -a | grep en_US) + - export LANG=$(locale -a | grep en_US) + - python3 setup.py build_sphinx + +# TODO: Replace the task doc by the following task pages when making the +# project public +# +#pages: +# image: registry.gitlab.lis-lab.fr:5005/dev/multimodal/ubuntu:18.04 +# tags: +# - docker +# only: +# - master +# script: +# - export LC_ALL=$(locale -a | grep en_US) +# - export LANG=$(locale -a | grep en_US) +# - python3 setup.py build_sphinx +# - cp -r build/sphinx/html public +# artifacts: +# paths: +# - public diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..ece9ebb --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +include *.txt +include *.rst +include doc/*.rst doc/*.py +include multimodal/tests/*.py +include examples/*.py examples/*.txt diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..e2bc4c4 --- /dev/null +++ b/README.rst @@ -0,0 +1,147 @@ +scikit-multimodallearn +====================== + +**scikit-multimodallearn** is a Python package implementing algorithms multimodal data. + +It is compatible with `scikit-learn <http://scikit-learn.org/>`_, a popular +package for machine learning in Python. + + +Documentation +------------- + +The **documentation** including installation instructions, API documentation +and examples is +`available online <http://dev.pages.lis-lab.fr/multimodal>`_. + + +Installation +------------ + +Dependencies +~~~~~~~~~~~~ + +**scikit-multimodallearn** works with **Python 3.5 or later**. + +**scikit-multimodallearn** depends on **scikit-learn** (version >= 0.19). + +Optionally, **matplotlib** is required to run the examples. + +Installation using pip +~~~~~~~~~~~~~~~~~~~~~~ + +**scikit-multimodallearn** is +`available on PyPI <https://pypi.org/project/multiconfusion/>`_ +and can be installed using **pip**:: + + pip install scikit-multimodallearn + + +Development +----------- + +The development of this package follows the guidelines provided by the +scikit-learn community. + +Refer to the `Developer's Guide <http://scikit-learn.org/stable/developers>`_ +of the scikit-learn project for more details. + +Source code +~~~~~~~~~~~ + +You can get the **source code** from the **Git** repository of the project:: + + git clone git@gitlab.lis-lab.fr:dev/multiconfusion.git + +Testing +~~~~~~~ + +**pytest** and **pytest-cov** are required to run the **test suite** with:: + + cd multiconfusion + pytest + +A code coverage report is displayed in the terminal when running the tests. +An HTML version of the report is also stored in the directory **htmlcov**. + + +Generating the documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The generation of the documentation requires **sphinx**, **sphinx-gallery**, +**numpydoc** and **matplotlib** and can be run with:: + + python setup.py build_sphinx + +The resulting files are stored in the directory **build/sphinx/html**. + + +Credits +------- + +**scikit-multimodallearn** is developped by the +`development team <https://developpement.lis-lab.fr/>`_ of the +`LIS <http://www.lis-lab.fr/>`_. + +If you use **scikit-multimodallearn** in a scientific publication, please cite the +following paper:: + + @InProceedings{Koco:2011:BAMCC, + author={Ko\c{c}o, Sokol and Capponi, C{\'e}cile}, + editor={Gunopulos, Dimitrios and Hofmann, Thomas and Malerba, Donato + and Vazirgiannis, Michalis}, + title={A Boosting Approach to Multiview Classification with Cooperation}, + booktitle={Proceedings of the 2011 European Conference on Machine Learning + and Knowledge Discovery in Databases - Volume Part II}, + year={2011}, + location={Athens, Greece}, + publisher={Springer-Verlag}, + address={Berlin, Heidelberg}, + pages={209--228}, + numpages = {20}, + isbn={978-3-642-23783-6} + url={https://link.springer.com/chapter/10.1007/978-3-642-23783-6_14}, + keywords={boosting, classification, multiview learning, + supervised learning}, + } + + +References +~~~~~~~~~~ +* Sokol Koço, Cécile Capponi, + `"Learning from Imbalanced Datasets with cross-view cooperation"` + Linking and mining heterogeneous an multi-view data, Unsupervised and + semi-supervised learning Series Editor M. Emre Celeri, pp 161-182, Springer + + +* Sokol Koço, Cécile Capponi, + `"A boosting approach to multiview classification with cooperation" + <https://link.springer.com/chapter/10.1007/978-3-642-23783-6_14>`_, + Proceedings of the 2011 European Conference on Machine Learning (ECML), + Athens, Greece, pp.209-228, 2011, Springer-Verlag. + +* Sokol Koço, + `"Tackling the uneven views problem with cooperation based ensemble + learning methods" <http://www.theses.fr/en/2013AIXM4101>`_, + PhD Thesis, Aix-Marseille Université, 2013. + +* Riikka Huusari, Hachem Kadri and Cécile Capponi, + "Multi-View Metric Learning in Vector-Valued Kernel Spaces" + in International Conference on Artificial Intelligence and Statistics (AISTATS) 2018 + +Copyright +~~~~~~~~~ + +Université d'Aix Marseille (AMU) - +Centre National de la Recherche Scientifique (CNRS) - +Université de Toulon (UTLN). + +Copyright © 2017-2018 AMU, CNRS, UTLN + +License +~~~~~~~ + +**multiconfusion** is free software: you can redistribute it and/or modify +it under the terms of the **GNU Lesser General Public License** as published by +the Free Software Foundation, either **version 3** of the License, or +(at your option) any later version. diff --git a/copyright.py b/copyright.py new file mode 100644 index 0000000..dcf2f20 --- /dev/null +++ b/copyright.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function, division +import time +import os +import sys +import fileinput + + +def findFiles(directory, files=[]): + """scan a directory for py, pyx, pxd extension files.""" + for filename in os.listdir(directory): + path = os.path.join(directory, filename) + if os.path.isfile(path) and (path.endswith(".py") or + path.endswith(".pyx") or + path.endswith(".pxd")): + if filename != "__init__.py" and filename != "version.py": + files.append(path) + elif os.path.isdir(path): + findFiles(path, files) + return files + + +def fileUnStamping(filename): + """ Remove stamp from a file """ + is_stamp = False + for line in fileinput.input(filename, inplace=1): + if line.find("# COPYRIGHT #") != -1: + is_stamp = not is_stamp + elif not is_stamp: + print(line, end="") + + +def fileStamping(filename, stamp): + """ Write a stamp on a file + + WARNING : The stamping must be done on an default utf8 machine ! + """ + old_stamp = False # If a copyright already exist over write it. + for line in fileinput.input(filename, inplace=1): + if line.find("# COPYRIGHT #") != -1: + old_stamp = not old_stamp + elif line.startswith("# -*- coding: utf-8 -*-"): + print(line, end="") + print(stamp) + elif not old_stamp: + print(line, end="") + + +def getStamp(date, iw_version): + """ Return the corrected formated stamp """ + stamp = open("copyrightstamp.txt").read() + stamp = stamp.replace("DATE", date) + stamp = stamp.replace("IW_VERSION", iw_version) + stamp = stamp.replace('\n', '\n# ') + stamp = "# " + stamp + stamp = stamp.replace("# \n", "#\n") + return stamp.strip() + + +def getVersionsAndDate(): + """ Return (date, iw_version) """ + v_text = open('VERSION').read().strip() + v_text_formted = '{"' + v_text.replace('\n', '","').replace(':', '":"') + v_text_formted += '"}' + v_dict = eval(v_text_formted) + return (time.strftime("%Y"), v_dict['iw']) + + +def writeStamp(): + """ Write a copyright stamp on all files """ + stamp = getStamp(*getVersionsAndDate()) + files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)), + "iw")) + for filename in files: + fileStamping(filename, stamp) + fileStamping("setup.py", stamp) + + +def eraseStamp(): + """ Erase a copyright stamp from all files """ + files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)), + "iw")) + for filename in files: + fileUnStamping(filename) + fileUnStamping("setup.py") + + +def usage(arg): + print("Usage :") + print("\tpython %s stamping" % arg) + print("\tpython %s unstamping" % arg) + + +if __name__ == "__main__": + if len(sys.argv) == 1: + usage(sys.argv[0]) + elif len(sys.argv) == 2: + if sys.argv[1].startswith("unstamping"): + eraseStamp() + elif sys.argv[1].startswith("stamping"): + writeStamp() + else: + usage(sys.argv[0]) + else: + usage(sys.argv[0]) diff --git a/copyrightstamp.txt b/copyrightstamp.txt new file mode 100644 index 0000000..bb382b5 --- /dev/null +++ b/copyrightstamp.txt @@ -0,0 +1,39 @@ +######### COPYRIGHT ######### + +Copyright(c) DATE +----------------- + +* Université d'Aix Marseille (AMU) - +* Centre National de la Recherche Scientifique (CNRS) - +* Université de Toulon (UTLN). +* Copyright © 2019-2020 AMU, CNRS, UTLN + +Contributors: +------------ + +* Sokol Koço <sokol.koco_AT_lis-lab.fr> +* Cécile Capponi <cecile.capponi_AT_univ-amu.fr> +* Florent Jaillet <florent.jaillet_AT_math.cnrs.fr> +* Dominique Benielli <dominique.benielli_AT_univ-amu.fr> +* Riikka Huusari <rikka.huusari_AT_univ-amu.fr> +* Baptiste Bauvin <baptiste.bauvin_AT_univ-amu.fr> + +Description: +----------- + +The multimodal package implement classifiers multiview, +MumboClassifier class, MuCumboClassifier class, MVML class, MKL class. +compatible with sklearn + +Version: +------- + +* multimodal version = MULTIMODAL_VERSION + +Licence: +------- + +License: LGPLv3+ + + +######### COPYRIGHT ######### diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000..a45f0cf --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- + +from datetime import date +import os +import sys +sys.path.insert(0, os.path.abspath('../metriclearning')) +sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath(".")) +sys.path.append(os.path.join(os.path.dirname(__name__), '..')) +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'sphinxext')) +import metriclearning + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.imgmath', + 'numpydoc', + # 'sphinx_gallery.gen_gallery' +] + +# Add any paths that contain templates here, relative to this directory. +# templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'metriclearning' +author = 'Dominique Benielli' +copyright = '2017-{}, LIS UMR 7020'.format(date.today().year) + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = metriclearning.__version__ +# The full version, including alpha/beta/rc tags. +release = metriclearning.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'nature' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +# html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = '{}doc'.format(project) + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + 'papersize': 'a4paper', + + # The font size ('10pt', '11pt' or '12pt'). + 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # 'preamble': '', + + # Latex figure (float) alignment + 'figure_align': 'htbp'} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, '{}.tex'.format(project), '{} Documentation'.format(project), + author, 'manual')] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, project, '{} Documentation'.format(project), + [author], 1) +] + +# If true, show URL addresses after external links. +# man_show_urls = False + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, project, '{} Documentation'.format(project), author, project, + 'Multi-View Metric Learning in Vector-Valued Kernel Spaces for machine learning.', + 'Miscellaneous')] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'sklearn': ('http://scikit-learn.org/stable', None) +} + +numpydoc_show_class_members = False + +sphinx_gallery_conf = { + 'doc_module': (project,), + 'backreferences_dir': 'backreferences', + # path to your examples scripts + 'examples_dirs': '../examples', + # path where to save gallery generated examples + 'gallery_dirs': 'auto_examples'} + +# Generate the plots for the gallery +plot_gallery = 'True' diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000..a8fc552 --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,50 @@ +.. metriclearning documentation master file, created by + sphinx-quickstart on Mon Sep 2 12:12:08 2019. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to scikit-multimodallearn's documentation! +================================================== +**scikit-multimodallearn** is a Python package implementing boost and kernel algorithms for +machine learning with multimodal data. + +It is compatible with `scikit-learn <http://scikit-learn.org/>`_, a popular +package for machine learning in Python. + + +Documentation +------------- + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + + reference/api + tutorial/install_devel + tutorial/auto_examples/index + + + +Documentation +------------- + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 1 + + install_devel + api + auto_examples/index + credits + + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/doc/reference/api.rst b/doc/reference/api.rst new file mode 100644 index 0000000..815df9e --- /dev/null +++ b/doc/reference/api.rst @@ -0,0 +1,26 @@ +API Documentation +================= + + +multimodal.boosting.mumbo +------------------------- + +.. automodule:: multimodal.boosting.mumbo + :members: + :inherited-members: + + +multimodal.boosting.cumbo +------------------------- + +.. automodule:: multimodal.boosting.cumbo + :members: + :inherited-members: + + +multimodal.boosting.boost +------------------------- + +.. automodule:: multimodal.boosting.boost + :members: + :inherited-members: diff --git a/docker/Dockerfile_ubuntu_18.04 b/docker/Dockerfile_ubuntu_18.04 new file mode 100644 index 0000000..309a621 --- /dev/null +++ b/docker/Dockerfile_ubuntu_18.04 @@ -0,0 +1,26 @@ +FROM ubuntu:18.04 +MAINTAINER Dominique Benielli +WORKDIR / +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install -y --no-install-recommends \ + python3 \ + python3-setuptools \ + python3-sklearn\ + python3-pytest \ + python3-pytest-cov \ + python3-nose \ + python3-sphinx \ + python3-numpydoc \ + python3-sphinx-gallery \ + python3-matplotlib \ + python3-pil \ + && \ + apt-get clean +RUN apt-get install -y --no-install-recommends locales && \ + apt-get clean && \ + locale-gen en_US.UTF-8 && \ + update-locale en_US.UTF-8 && \ + echo "export LC_ALL=$(locale -a | grep en_US)" >> /root/.bashrc && \ + echo "export LANG=$(locale -a | grep en_US)" >> /root/.bashrc diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..1011e46 --- /dev/null +++ b/license.txt @@ -0,0 +1,30 @@ +New BSD License + +Copyright (c) 2020-15-01, The scit-multimodallearn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of the IntertwiningWavelet developers nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. diff --git a/multimodal/__init__.py b/multimodal/__init__.py index e69de29..1ff9679 100644 --- a/multimodal/__init__.py +++ b/multimodal/__init__.py @@ -0,0 +1 @@ +__version__ = '0.0.dev0' diff --git a/multimodal/boosting/__init__.py b/multimodal/boosting/__init__.py index 5b5a0e3..bb0d1af 100644 --- a/multimodal/boosting/__init__.py +++ b/multimodal/boosting/__init__.py @@ -1,11 +1,6 @@ from .mumbo import MumboClassifier - -__all__ = ['MumboClassifier'] - -__version__ = '1.0.dev0' - from .cumbo import MuCumboClassifier -__all__ = ['MuCumboClassifier'] +__all__ = ['MumboClassifier', 'MuCumboClassifier'] + -__version__ = '1.0.dev0' diff --git a/multimodal/boosting/cumbo.py b/multimodal/boosting/cumbo.py index 0f928df..0d7eb4d 100644 --- a/multimodal/boosting/cumbo.py +++ b/multimodal/boosting/cumbo.py @@ -408,6 +408,7 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): ValueError where `X` and `view_ind` are not compatibles """ warnings.filterwarnings("ignore") + self.X_ = self._global_X_transform(X, views_ind=views_ind) if (self.base_estimator is None or isinstance(self.base_estimator, (BaseDecisionTree, BaseForest))): @@ -416,16 +417,12 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): else: dtype = None accept_sparse = ['csr', 'csc'] - if views_ind is None: - if X.shape[1] > 1: - views_ind = np.array([0, X.shape[1]//2, X.shape[1]]) - else: - views_ind = np.array([0, X.shape[1]]) - self.X_ = self._global_X_transform(X, views_ind=views_ind) - views_ind_, n_views = self.X_._validate_views_ind(views_ind, - X.shape[1]) - check_X_y(self.X_, y, accept_sparse=accept_sparse, dtype=dtype) + + + views_ind_, n_views = self.X_._validate_views_ind(self.X_.views_ind, + self.X_.shape[1]) + check_X_y(self.X_, y) check_classification_targets(y) self._validate_estimator() @@ -472,7 +469,6 @@ class MuCumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): # TO DO estimator_errors_ estimate ########################################### - #############self.estimator_errors_[current_iteration] = to do # update C_t de g diff --git a/multimodal/boosting/mumbo.py b/multimodal/boosting/mumbo.py index f2c522d..6405e5d 100644 --- a/multimodal/boosting/mumbo.py +++ b/multimodal/boosting/mumbo.py @@ -344,13 +344,7 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): else: dtype = None accept_sparse = ['csr', 'csc'] - # if views_ind is None: - # if X.shape[1] > 1: - # views_ind = np.array([0, X.shape[1]//2, X.shape[1]]) - # elif X.shape[1]==1: - # views_ind = np.array([0, X.shape[1]]) - # else: - # views_ind = np.array([0]) + self.X_ = self._global_X_transform(X, views_ind=views_ind) views_ind_, n_views = self.X_._validate_views_ind(self.X_.views_ind, self.X_.shape[1]) @@ -442,10 +436,12 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): Parameters ---------- - X : {array-like, sparse matrix}, shape = (n_samples, n_features) + X : { array-like, sparse matrix}, + shape = (n_samples, n_views * n_features) Multi-view input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK and LIL are converted to CSR. + maybe also MultimodalData Returns ------- @@ -493,6 +489,7 @@ class MumboClassifier(BaseEnsemble, ClassifierMixin, UBoosting): Multi-view input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK and LIL are converted to CSR. + maybe also MultimodalData Returns ------- diff --git a/multimodal/datasets/data_sample.py b/multimodal/datasets/data_sample.py index 9f6d730..7894518 100644 --- a/multimodal/datasets/data_sample.py +++ b/multimodal/datasets/data_sample.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -"""This module contains the DataSample class and Metriclearn_array class +"""This module contains the DataSample class, MultiModalArray, MultiModalSparseArray, MultiModalSparseInfo and MultiModalData, class The DataSample class encapsulates a sample 's components nbL and nbEx numbers, -Metriclearn_arra class inherit from numpy ndarray and contains a 2d data ndarray +MultiModalArray class inherit from numpy ndarray and contains a 2d data ndarray with the shape (n_samples, n_view_i * n_features_i) 0 1 2 3 @@ -19,8 +19,8 @@ xxxxxxxx xxxx xxxx xxxx xxxxxxxx xxxx xxxx xxxx ======== ==== ==== ==== -the number nbL and nbEx and , the fourth dictionaries for sample, -prefix, suffix and factor where they are computed +MultiModalSparseArray inherit from scipy sparce matrix with the shape (n_samples, n_view_i * n_features_i) + """ from abc import ABCMeta import numpy as np @@ -77,7 +77,7 @@ class MultiModalData(metaclass=ABCMeta): def _validate_views_ind(self, views_ind, n_features): """Ensure proper format for views_ind and return number of views.""" - views_ind = np.array(views_ind) + # views_ind = np.array(views_ind) if np.issubdtype(views_ind.dtype, np.integer) and views_ind.ndim == 1: if len(views_ind) > 2 and np.any(views_ind[:-1] >= views_ind[1:]): raise ValueError("Values in views_ind must be sorted.") diff --git a/multimodal/kernels/__init__.py b/multimodal/kernels/__init__.py index 7d48045..02895ab 100644 --- a/multimodal/kernels/__init__.py +++ b/multimodal/kernels/__init__.py @@ -1 +1,4 @@ -__all__ = ['MVML', 'MKernel', 'MVML'] +from .lpMKL import MKL +from .mvml import MVML + +__all__ = ['MVML', 'MKL'] diff --git a/multimodal/kernels/lpMKL.py b/multimodal/kernels/lpMKL.py index 1eca272..9de928a 100644 --- a/multimodal/kernels/lpMKL.py +++ b/multimodal/kernels/lpMKL.py @@ -36,6 +36,7 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel): n_loops : (default 50) number of iterions + Attributes ---------- lmbda : float coeficient for combined kernels @@ -140,6 +141,7 @@ class MKL(BaseEstimator, ClassifierMixin, MKernel): ------- return tuple (C, weights) """ + C = None views = self.K_.n_views X = self.K_ p = 2 diff --git a/multimodal/kernels/mkernel.py b/multimodal/kernels/mkernel.py index ac1ef5c..a550dcb 100644 --- a/multimodal/kernels/mkernel.py +++ b/multimodal/kernels/mkernel.py @@ -8,10 +8,22 @@ from multimodal.datasets.data_sample import DataSample, MultiModalArray class MKernel(metaclass=ABCMeta): """ Abstract class MKL and MVML should inherit from - for methods of transform kernel to/from data + for methods of transform kernel to/from data. + + + Attributes + ---------- + + W_sqrootinv_dict : dict of nyström approximation kernel + in the case of nystrom approximation + the a dictonary of reduced kernel is calculated + + kernel_params : list of dict of corresponding kernels + params KERNEL_PARAMS """ def _get_kernel(self, X, Y=None, v=0): + met =None if self.kernel_params is not None: if isinstance(self.kernel_params, list): ind = min(v, len(self.kernel) - 1) @@ -30,9 +42,34 @@ class MKernel(metaclass=ABCMeta): filter_params=True, **params) def _global_kernel_transform(self, X, views_ind=None, Y=None): + """ + Private function witch transforms X input format to + :class:`multimodal.datasets.MultiModalData` and internal kernels + + Parameters + ---------- + X : input data should be 'MultiModalArray' + array [n_samples_a, n_samples_a] if metric == “precomputed”, + or, [n_samples_a, n_view* n_features] + otherwise Array of pairwise kernels between samples, + or a feature array. + + views_ind : list or numpy arra, (default : None) indicate + the struture of different views + + Y : second input for pairing kernel by pairwise_kernels in the case + of + + + Returns + ------- + (X_, K_) tuple tranform Data X_ in :class:`multimodal.datasets.MultiModalData` + K_ dict of kernels + """ kernel_dict = {} X_ = None + y = None if Y is None: y = Y if isinstance(X, np.ndarray) and X.ndim == 1: diff --git a/multimodal/kernels/mvml.py b/multimodal/kernels/mvml.py index 535d974..6ab1d62 100644 --- a/multimodal/kernels/mvml.py +++ b/multimodal/kernels/mvml.py @@ -14,39 +14,13 @@ from multimodal.datasets.data_sample import DataSample, MultiModalArray from multimodal.kernels.mkernel import MKernel """ - Copyright (C) 2018 Riikka Huusari - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - - -This file contains algorithms for Multi-View Metric Learning (MVML) as introduced in +This file contains algorithms for MultiModal Learning (MVML) as +introduced in Riikka Huusari, Hachem Kadri and Cécile Capponi: Multi-View Metric Learning in Vector-Valued Kernel Spaces in International Conference on Artificial Intelligence and Statistics (AISTATS) 2018 -Usage (see also demo.py for a more detailed example): - create a MVML object via: - mvml = MVML(kernel_dict, label_vector, regression_parameter_list, nystrom_param) - learn the model: - A, g, w = mvml.learn_mvml() - predict with the model: - predictions = predict_mvml(test_kernel_dict, g, w) - -(parameter names as in the paper) - -Code is tested with Python 3.5.2 and numpy 1.12.1 """ diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..bf8d7b4 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,16 @@ +[metadata] +license_file = LICENSE.txt + +[tool:pytest] +testpaths = multimodal +addopts = --verbose + --cov-report=term-missing + --cov-report=html + --cov=multimodalboost + --doctest-modules + +[coverage:run] +branch = True +source = multimodal +include = */multimodal/* +omit = */tests/* diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..030ae13 --- /dev/null +++ b/setup.py @@ -0,0 +1,68 @@ + +import os +from setuptools import setup, find_packages + +import multiconfusion + + +def setup_package(): + """Setup function""" + + name = 'scikit-multimodallearn' + version = multiconfusion.__version__ + description = 'A scikit-learn compatible package for multimodal Classifiers' + here = os.path.abspath(os.path.dirname(__file__)) + with open(os.path.join(here, 'README.rst'), encoding='utf-8') as readme: + long_description = readme.read() + group = 'dev' + url = 'https://gitlab.lis-lab.fr/{}/{}'.format(group, name) + project_urls = { + 'Documentation': 'http://{}.pages.lis-lab.fr/{}'.format(group, name), + 'Source': url, + 'Tracker': '{}/issues'.format(url)} + author = 'Dominique Benielli' + author_email = 'contact.dev@lis-lab.fr' + license = 'newBSD' + classifiers = [ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: GNU Lesser General Public License' + ' v3 or later (LGPLv3+)', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX :: Linux', + 'Operating System :: MacOS'], + keywords = ('machine learning, supervised learning, classification, ' + 'ensemble methods, boosting, kernel') + packages = find_packages(exclude=['*.tests']) + install_requires = ['scikit-learn>=0.19', 'numpy', 'scipy', 'cvxopt' ] + python_requires = '>=3.5' + extras_require = { + 'dev': ['pytest', 'pytest-cov'], + 'doc': ['sphinx', 'numpydoc', 'sphinx_gallery', 'matplotlib']} + include_package_data = True + + setup(name=name, + version=version, + description=description, + long_description=long_description, + url=url, + project_urls=project_urls, + author=author, + author_email=author_email, + license=license, + classifiers=classifiers, + keywords=keywords, + packages=packages, + install_requires=install_requires, + python_requires=python_requires, + extras_require=extras_require, + include_package_data=include_package_data) + + +if __name__ == "__main__": + setup_package() -- GitLab