Skip to content
Snippets Groups Projects
Commit d60441fd authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Merge

parents a2652ea7 429c7c0d
No related branches found
No related tags found
No related merge requests found
...@@ -6,6 +6,8 @@ tests: ...@@ -6,6 +6,8 @@ tests:
script: script:
- export LC_ALL=$(locale -a | grep en_US) - export LC_ALL=$(locale -a | grep en_US)
- export LANG=$(locale -a | grep en_US) - export LANG=$(locale -a | grep en_US)
- pip3 install --upgrade pip
- pip3 -V
- pip3 install -e . - pip3 install -e .
- pytest-3 - pytest-3
coverage: '/^TOTAL.+?(\d+\%)$/' coverage: '/^TOTAL.+?(\d+\%)$/'
...@@ -24,6 +26,7 @@ doc: ...@@ -24,6 +26,7 @@ doc:
script: script:
- export LC_ALL=$(locale -a | grep en_US) - export LC_ALL=$(locale -a | grep en_US)
- export LANG=$(locale -a | grep en_US) - export LANG=$(locale -a | grep en_US)
- pip3 install --upgrade pip
- pip3 install -e .[doc] - pip3 install -e .[doc]
- sphinx-apidoc -o docs/source summit - sphinx-apidoc -o docs/source summit
- cd docs/source - cd docs/source
...@@ -45,6 +48,7 @@ pages: ...@@ -45,6 +48,7 @@ pages:
script: script:
- export LC_ALL=$(locale -a | grep en_US) - export LC_ALL=$(locale -a | grep en_US)
- export LANG=$(locale -a | grep en_US) - export LANG=$(locale -a | grep en_US)
- pip3 install --upgrade pip
- pip3 install -e .[doc] - pip3 install -e .[doc]
- pytest-3 - pytest-3
- sphinx-apidoc -o docs/source summit - sphinx-apidoc -o docs/source summit
......
...@@ -57,7 +57,9 @@ And the following python modules will be automatically installed : ...@@ -57,7 +57,9 @@ And the following python modules will be automatically installed :
* `pyyaml <https://pypi.org/project/PyYAML/>`_ - Used to read the config files, * `pyyaml <https://pypi.org/project/PyYAML/>`_ - Used to read the config files,
* `plotly <https://plot.ly/>`_ - Used to generate interactive HTML visuals, * `plotly <https://plot.ly/>`_ - Used to generate interactive HTML visuals,
* `tabulate <https://pypi.org/project/tabulate/>`_ - Used to generated the confusion matrix. * `tabulate <https://pypi.org/project/tabulate/>`_ - Used to generated the confusion matrix.
* `pyscm-ml <https://pypi.org/project/pyscm-ml/>`_ - * `pyscm-ml <https://pypi.org/project/pyscm-ml/>`_ - SCM python implementation
* `randomscm <https://github.com/thibgo/randomscm>`_ - Random SCM python implementation
* `imbalance-bagging <https://imbalanced-learn.org/stable>`_ - Imbalanced learning library
Installing Installing
......
...@@ -12,3 +12,5 @@ plotly>=4.2.1 ...@@ -12,3 +12,5 @@ plotly>=4.2.1
matplotlib>=3.1.1 matplotlib>=3.1.1
tabulate>=0.8.6 tabulate>=0.8.6
pyscm-ml>=1.0.0 pyscm-ml>=1.0.0
git+https://github.com/thibgo/randomscm/archive/refs/tags/v0.0.0-alpha.zip
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
#Extracting requrements from requirements.txt #Extracting requrements from requirements.txt
with open('requirements.txt') as f: # with open('requirements.txt') as f:
requirements = f.read().splitlines() # requirements = f.read().splitlines()
# from Cython.Build import cythonize # from Cython.Build import cythonize
from setuptools import setup, find_packages from setuptools import setup, find_packages
# Ceci n'est qu'un appel de fonction. Mais il est trèèèèèèèèèèès long
# et il comporte beaucoup de paramètres
def setup_package(): def setup_package():
setup( setup(
# le nom de votre bibliothèque, tel qu'il apparaitre sur pypi
name='summit', name='summit',
# la version du code
version=0.0, version=0.0,
python_requires = '>=3.5', python_requires = '>=3.5',
# Liste les packages à insérer dans la distribution
# plutôt que de le faire à la main, on utilise la foncton
# find_packages() de setuptools qui va cherche tous les packages
# python recursivement dans le dossier courant.
# C'est pour cette raison que l'on a tout mis dans un seul dossier:
# on peut ainsi utiliser cette fonction facilement
packages=find_packages(), packages=find_packages(),
# votre pti nom
author="Baptiste Bauvin", author="Baptiste Bauvin",
# Votre email, sachant qu'il sera publique visible, avec tous les risques
# que ça implique.
author_email="baptiste.bauvin@lis-lab.fr", author_email="baptiste.bauvin@lis-lab.fr",
# Une description courte
description="Supervised MultiModal Integration Tool", description="Supervised MultiModal Integration Tool",
# Une description longue, sera affichée pour présenter la lib
# Généralement on dump le README ici
long_description=open('README.rst').read(), long_description=open('README.rst').read(),
# Vous pouvez rajouter une liste de dépendances pour votre lib
# et même préciser une version. A l'installation, Python essayera de
# les télécharger et les installer.
#
# Ex: ["gunicorn", "docutils >= 0.3", "lxml==0.5a7"]
#
# Dans notre cas on en a pas besoin, donc je le commente, mais je le
# laisse pour que vous sachiez que ça existe car c'est très utile.
# install_requires= ,
# Active la prise en compte du fichier MANIFEST.in
include_package_data=True, include_package_data=True,
# dependency_links=['https://github.com/aldro61/pyscm.git#egg=pyscm'],
# Une url qui pointe vers la page officielle de votre lib
url='http://gitlab.lis-lab.fr/baptiste.bauvin/summit/', url='http://gitlab.lis-lab.fr/baptiste.bauvin/summit/',
install_requires=requirements, install_requires=['h5py>=2.9.0', 'joblib>=0.13.2', 'numpy>=1.16.4',
'pyparsing>=2.4.0', 'python-dateutil>=2.8.0',
'scikit-learn>=0.19.0', 'scipy>=1.3.0', 'six>=1.12.0',
'pandas>=0.23.3', 'pyyaml>=3.12', 'plotly>=4.2.1',
'matplotlib>=3.1.1', 'tabulate>=0.8.6', 'pyscm-ml>=1.0.0',
"randomscm @ git+https://github.com/thibgo/randomscm.git#egg=randomscm",
"imbalanced-learn"],
extras_require={ extras_require={
'dev': ['pytest', 'pytest-cov'], 'dev': ['pytest', 'pytest-cov'],
'doc': ['sphinx >= 3.0.2', 'numpydoc', 'docutils', 'sphinx-autoapi', 'doc': ['sphinx >= 3.0.2', 'numpydoc', 'docutils', 'sphinx-autoapi',
'sphinx_rtd_theme']}, 'sphinx_rtd_theme']},
# Il est d'usage de mettre quelques metadata à propos de sa lib
# Pour que les robots puissent facilement la classer.
# La liste des marqueurs autorisées est longue:
# https://pypi.python.org/pypi?%3Aaction=list_classifiers.
#
# Il n'y a pas vraiment de règle pour le contenu. Chacun fait un peu
# comme il le sent. Il y en a qui ne mettent rien.
classifiers=[ classifiers=[
"Programming Language :: Python", "Programming Language :: Python",
"Development Status :: 1 - Planning", "Development Status :: 1 - Planning",
...@@ -77,28 +40,8 @@ def setup_package(): ...@@ -77,28 +40,8 @@ def setup_package():
"Programming Language :: Python :: 2/3", "Programming Language :: Python :: 2/3",
"Topic :: Machine Learning", "Topic :: Machine Learning",
], ],
# C'est un système de plugin, mais on s'en sert presque exclusivement
# Pour créer des commandes, comme "django-admin".
# Par exemple, si on veut créer la fabuleuse commande "proclame-sm", on
# va faire pointer ce nom vers la fonction proclamer(). La commande sera
# créé automatiquement.
# La syntaxe est "nom-de-commande-a-creer = package.module:fonction".
# entry_points={
# 'console_scripts': [
# 'exec_multiview = summit.execute:exec',
# ],
# },
# A fournir uniquement si votre licence n'est pas listée dans "classifiers"
# ce qui est notre cas
license="GNUGPL", license="GNUGPL",
# Il y a encore une chiée de paramètres possibles, mais avec ça vous
# couvrez 90% des besoins
# ext_modules=cythonize(
# "summit/multiview_platform/monoview/additions/_custom_criterion.pyx"),
) )
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -14,7 +14,7 @@ from .organization import secure_file_path ...@@ -14,7 +14,7 @@ from .organization import secure_file_path
of SuMMIT''' of SuMMIT'''
class Dataset(): class Dataset:
""" """
This is the base class for all the type of multiview datasets of SuMMIT. This is the base class for all the type of multiview datasets of SuMMIT.
""" """
...@@ -165,11 +165,9 @@ class Dataset(): ...@@ -165,11 +165,9 @@ class Dataset():
return selected_label_names return selected_label_names
def gen_feat_id(self): def gen_feat_id(self, view_ind):
self.feature_ids = [["ID_" + str(i) for i in self.feature_ids[view_ind] = ["ID_" + str(i) for i in
range(self.get_v(view_ind).shape[1])] range(self.get_v(view_ind).shape[1])]
for view_ind in self.view_dict.values()]
class RAMDataset(Dataset): class RAMDataset(Dataset):
...@@ -193,13 +191,14 @@ class RAMDataset(Dataset): ...@@ -193,13 +191,14 @@ class RAMDataset(Dataset):
self.name = name self.name = name
self.nb_view = len(self.views) self.nb_view = len(self.views)
self.is_temp = False self.is_temp = False
self.feature_ids = [_ for _ in range(self.nb_view)]
for view_ind in range(self.nb_view):
if feature_ids is not None: if feature_ids is not None:
feature_ids = [[feature_id if not is_just_number(feature_id) self.feature_ids[view_ind] = [feature_id if not is_just_number(feature_id)
else "ID_" + feature_id for feature_id in else "ID_" + feature_id for feature_id in
feat_ids] for feat_ids in feature_ids] feature_ids[view_ind]]
self.feature_ids = feature_ids
else: else:
self.gen_feat_id() self.gen_feat_id(view_ind)
def get_view_name(self, view_idx): def get_view_name(self, view_idx):
return self.view_names[view_idx] return self.view_names[view_idx]
...@@ -377,14 +376,15 @@ class HDF5Dataset(Dataset): ...@@ -377,14 +376,15 @@ class HDF5Dataset(Dataset):
else: else:
self.sample_ids = ["ID_" + str(i) self.sample_ids = ["ID_" + str(i)
for i in range(labels.shape[0])] for i in range(labels.shape[0])]
self.feature_ids = [_ for _ in range(self.nb_view)]
for view_index in range(self.nb_view):
if feature_ids is not None: if feature_ids is not None:
feature_ids = [[feature_id if not is_just_number(feature_id) feat_ids = [feature_id if not is_just_number(feature_id)
else "ID_" + feature_id for feature_id in else "ID_" + feature_id for feature_id in
feat_ids] for feat_ids in feature_ids] feature_ids[view_index]]
self.feature_ids = feature_ids self.feature_ids = feat_ids
else: else:
self.gen_feat_id() self.gen_feat_id(view_index)
def get_v(self, view_index, sample_indices=None): def get_v(self, view_index, sample_indices=None):
""" Extract the view and returns a numpy.ndarray containing the description """ Extract the view and returns a numpy.ndarray containing the description
...@@ -443,6 +443,7 @@ class HDF5Dataset(Dataset): ...@@ -443,6 +443,7 @@ class HDF5Dataset(Dataset):
""" """
self.nb_view = self.dataset["Metadata"].attrs["nbView"] self.nb_view = self.dataset["Metadata"].attrs["nbView"]
self.feature_ids = [_ for _ in range(self.nb_view)]
self.view_dict = self.get_view_dict() self.view_dict = self.get_view_dict()
self.view_names = [self.dataset["View{}".format(ind)].attrs['name'] for ind in range(self.nb_view)] self.view_names = [self.dataset["View{}".format(ind)].attrs['name'] for ind in range(self.nb_view)]
if "sample_ids" in self.dataset["Metadata"].keys(): if "sample_ids" in self.dataset["Metadata"].keys():
...@@ -454,14 +455,14 @@ class HDF5Dataset(Dataset): ...@@ -454,14 +455,14 @@ class HDF5Dataset(Dataset):
else: else:
self.sample_ids = ["ID_" + str(i) for i in self.sample_ids = ["ID_" + str(i) for i in
range(self.dataset["Labels"].shape[0])] range(self.dataset["Labels"].shape[0])]
if "feature_ids" in self.dataset["Metadata"].keys(): for view_index in range(self.nb_view):
self.feature_ids = [[feature_id.decode() if "feature_ids-View{}".format(view_index) in self.dataset["Metadata"].keys():
self.feature_ids[view_index] = [feature_id.decode()
if not is_just_number(feature_id.decode()) if not is_just_number(feature_id.decode())
else "ID_" + feature_id.decode() else "ID_" + feature_id.decode()
for feature_id in feature_ids] for feature_ids in for feature_id in self.dataset["Metadata"]["feature_ids-View{}".format(view_index)]]
self.dataset["Metadata"]["feature_ids"]]
else: else:
self.gen_feat_id() self.gen_feat_id(view_index)
def get_nb_samples(self): def get_nb_samples(self):
""" """
......
...@@ -76,9 +76,11 @@ class TestFunctions(unittest.TestCase): ...@@ -76,9 +76,11 @@ class TestFunctions(unittest.TestCase):
self.assertEqual(avail, ['adaboost', self.assertEqual(avail, ['adaboost',
'decision_tree', 'decision_tree',
'gradient_boosting', 'gradient_boosting',
'imbalance_bagging',
'knn', 'knn',
'lasso', 'lasso',
'random_forest', 'random_forest',
"random_scm",
'scm', 'scm',
'sgd', 'sgd',
'svm_linear', 'svm_linear',
...@@ -89,8 +91,10 @@ class TestFunctions(unittest.TestCase): ...@@ -89,8 +91,10 @@ class TestFunctions(unittest.TestCase):
self.assertEqual(avail, ['adaboost', self.assertEqual(avail, ['adaboost',
'decision_tree', 'decision_tree',
'gradient_boosting', 'gradient_boosting',
'imbalance_bagging',
'knn', 'knn',
'random_forest', 'random_forest',
"random_scm",
'scm', 'scm',
'svm_linear', 'svm_linear',
'svm_poly', 'svm_poly',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment