Merged

85f372eb · Baptiste Bauvin · 71f781d4 · 85f372eb · 85f372eb · 85f372eb
Commit 85f372eb authored 2 years ago by Baptiste Bauvin
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,6 +6,8 @@ tests:
    script:
        - export LC_ALL=$(locale -a | grep en_US)
        - export LANG=$(locale -a | grep en_US)
+        - pip3 install --upgrade pip
+        - pip3 -V
        - pip3 install -e .
        - pytest-3
    coverage: '/^TOTAL.+?(\d+\%)$/'
@@ -24,6 +26,7 @@ doc:
    script:
        - export LC_ALL=$(locale -a | grep en_US)
        - export LANG=$(locale -a | grep en_US)
+        - pip3 install --upgrade pip
        - pip3 install -e .[doc]
        - sphinx-apidoc -o docs/source summit
        - cd docs/source
@@ -45,6 +48,7 @@ pages:
    script:
        - export LC_ALL=$(locale -a | grep en_US)
        - export LANG=$(locale -a | grep en_US)
+        - pip3 install --upgrade pip
        - pip3 install -e .[doc]
        - pytest-3
        - sphinx-apidoc -o docs/source summit

--- a/README.rst
+++ b/README.rst
@@ -57,7 +57,9 @@ And the following python modules will be automatically installed  :
 * `pyyaml <https://pypi.org/project/PyYAML/>`_ - Used to read the config files,
 * `plotly <https://plot.ly/>`_ - Used to generate interactive HTML visuals,
 * `tabulate <https://pypi.org/project/tabulate/>`_ - Used to generated the confusion matrix.
-* `pyscm-ml <https://pypi.org/project/pyscm-ml/>`_ - 
+* `pyscm-ml <https://pypi.org/project/pyscm-ml/>`_ - SCM python implementation
+* `randomscm <https://github.com/thibgo/randomscm>`_ - Random SCM python implementation
+* `imbalance-bagging <https://imbalanced-learn.org/stable>`_ - Imbalanced learning library
 Installing

--- a/config_files/config_cuisine.yml
+++ b/config_files/config_cuisine.yml
+# The base configuration of the benchmark
+log: True
+name: ['tnbc_mazid']
+label: ""
+file_type: ".hdf5"
+views:
+pathf: "/home/baptiste/Documents/Datasets/Mazid/"
+nice: 0
+random_state: 42
+nb_cores: 1
+full: True
+debug: True
+add_noise: False
+noise_std: 0.0
+res_dir: "../results/"
+track_tracebacks: True
+# All the classification-realted configuration options
+multiclass_method: "oneVersusOne"
+split: 0.30
+nb_folds: 5
+nb_class: 2
+classes:
+type: ["monoview","multiview"]
+algos_monoview: ["samba", "scm_bagging", "random_forest", "adaboost", 'scm']
+algos_multiview: ["early_fusion_adaboost",  "early_fusion_decision_tree", "early_fusion_random_forest", "early_fusion_samba"]
+stats_iter: 5
+metrics:
+  balanced_accuracy: {}
+  f1_score:
+    average: 'micro'
+  accuracy_score: {}
+metric_princ: "balanced_accuracy"
+hps_type: "Random"
+hps_args:
+  n_iter: 20
+  equivalent_draws: False
+svm_rbf:
+  C: 0.7
+scm_bagging:
+  {max_features: 0.908115713423863, max_rules: 9, max_samples: 0.9277949143533335, model_type: conjunction,
+   n_estimators: 109, p_options: 0.7823433255515356}
+samba:
+  n_estimators: 22
+adaboost:
+  {base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0, base_estimator__class_weight: null,
+   base_estimator__criterion: gini, base_estimator__max_depth: 5, base_estimator__max_features: null,
+   base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
+   base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1, base_estimator__min_samples_split: 2,
+   base_estimator__min_weight_fraction_leaf: 0.0, base_estimator__random_state: null,
+   base_estimator__splitter: best, n_estimators: 354}
+svm_linear:
+  C: 0.3867
+cb_boost:
+  n_stumps: 1
+  n_max_iterations: 20
+  estimators_generator: "Stumps"
+cq_boost:
+  n_max_iterations: 10
+  n_stumps: 1
+min_cq:
+  n_stumps_per_attribute: 1
+decision_tree:
+  {criterion: entropy, max_depth: 271, splitter: random}
+early_fusion_adaboost:
+  {base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0, base_estimator__class_weight: null,
+   base_estimator__criterion: gini, base_estimator__max_depth: 5, base_estimator__max_features: null,
+   base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
+   base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1, base_estimator__min_samples_split: 2,
+   base_estimator__min_weight_fraction_leaf: 0.0, base_estimator__random_state: null,
+   base_estimator__splitter: best, base_estimator_config: null, n_estimators: 273}
+early_fusion_decision_tree:
+  {criterion: entropy, max_depth: 293, splitter: random}
+early_fusion_random_forest:
+  {criterion: gini, max_depth: 8, n_estimators: 46}
+random_forest:
+  {criterion: gini, max_depth: 8, n_estimators: 32}
+weighted_linear_late_fusion:
+  classifier_configs:
+    - decision_tree: {criterion: entropy, max_depth: 112, splitter: random}
+    - adaboost: {base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0,
+                 base_estimator__class_weight: null, base_estimator__criterion: gini, base_estimator__max_depth: 2,
+                 base_estimator__max_features: null, base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
+                 base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1,
+                 base_estimator__min_samples_split: 2, base_estimator__min_weight_fraction_leaf: 0.0,
+                 base_estimator__random_state: null, base_estimator__splitter: best, n_estimators: 400}
+  classifiers_names: [decision_tree, adaboost]
+  nb_cores: 1
+  rs: 724
+  weights: [0.9636627605010293, 0.3834415188257777]
+scm:
+  {max_rules: 10, model_type: conjunction, p: 0.8310271995093625}
+mumbo:
+  base_estimator:
+    - svm_rbf:
+        C: 0.001
+    - svm_rbf:
+        C: 0.001
+    - decision_tree:
+        max_depth: 1
+    - decision_tree:
+        max_depth: 1
+  n_estimators: 100
+mv_cb_boost:
+  n_estimators: 100
+  base_estimator: ["Stumps", "Stumps", "Stumps", "Stumps"]
+  base_estimator__n_stumps: [50, 50, 50, 50]
+  base_estimator__check_diff: False
+  base_estimator__C: 0.001
+  base_estimator__kernel: "rbf"
+  base_estimator__max_depth: 2
+  base_estimator__distribution_type: "uniform"
+  base_estimator__low: 0
+  base_estimator__high: 10
+  base_estimator__attributes_ratio: 0.5
+  base_estimator__examples_ratio: 0.55
+early_fusion_cb:
+  monoview_classifier_config:
+    cb_boost:
+      n_estimators: 100
+      base_estimator__max_depth: 1
+early_fusion_dt:
+  monoview_classifier_config:
+    decision_tree:
+      max_depth: 2
+early_fusion_rf:
+  monoview_classifier_config:
+    random_forest:
+      n_estimators: 100
+      max_depth: 1
+early_fusion_svm:
+  monoview_classifier_config:
+    svm_rbf:
+      C: 0.7
+#pb_mv_boost:
+#  num_iterations: 20
+#  decision_tree_depth: 1
+#weighted_linear_early_fusion:
+#  monoview_classifier_name: "cb_boost"
+#  monoview_classifier_config:
+#    cb_boost:
+#      n_stumps: 30
+#      n_max_iterations: 20
+#      estimators_generator: "Trees"
+#      max_depth: 1
+#weighted_linear_late_fusion:
+#  classifiers_names: ["cb_boost", "cb_boost", "cb_boost", "cb_boost"]
+#  classifier_configs:
+#    - cb_boost:
+#        n_stumps: 30
+#        n_max_iterations: 20
+#        estimators_generator: "Trees"
+#        max_depth: 1
+#    - cb_boost:
+#        n_stumps: 30
+#        n_max_iterations: 20
+#        estimators_generator: "Trees"
+#        max_depth: 1
+#    - cb_boost:
+#        n_stumps: 30
+#        n_max_iterations: 20
+#        estimators_generator: "Trees"
+#        max_depth: 1
+#    - cb_boost:
+#        n_stumps: 30
+#        n_max_iterations: 20
+#        estimators_generator: "Trees"
+#        max_depth: 1
+#
--- a/config_files/config_private_algos.yml
+++ b/config_files/config_private_algos.yml
+# The base configuration of the benchmark
+log: True
+name: ["multiview_mnist"]
+label: "_"
+file_type: ".hdf5"
+views:
+pathf: "examples/data/"
+nice: 0
+random_state: 43
+nb_cores: 1
+full: True
+debug: True
+add_noise: False
+noise_std: 0.0
+res_dir: "../results/"
+track_tracebacks: False
+# All the classification-realted configuration options
+multiclass_method: "oneVersusOne"
+split: 0.96
+nb_folds: 5
+nb_class:
+classes:
+type: ["monoview","multiview"]
+algos_monoview: ["decision_tree","adaboost"]
+algos_multiview: ["mumbo","mvml", 'lp_norm_mkl', 'mucombo', 'early_fusion_decision_tree', 'early_fusion_adaboost']
+stats_iter: 1
+metrics:
+  accuracy_score: {}
+  f1_score: {}
+metric_princ: "accuracy_score"
+hps_type: "None"
+hps_args:
+  n_iter: 2
+mumbo:
+  base_estimator:
+    decision_tree:
+      max_depth: 3
\ No newline at end of file
--- a/config_files/config_test.yml
+++ b/config_files/config_test.yml
@@ -106,9 +106,27 @@ adaboost:
  n_estimators: 50
 ######################################
 ## The Monoview Classifier arguments #
 ######################################
+mumbo:
+  base_estimator__criterion: 'gini'
+  base_estimator__max_depth: 3
+  base_estimator__random_state: None
+  base_estimator__splitter: 'best'
+  best_view_mode: 'edge'
+  base_estimator: 'decision_tree'
+  n_estimators: 10
+mucombo:
+  base_estimator__criterion: 'gini'
+  base_estimator__max_depth: 3
+  base_estimator__random_state: None
+  base_estimator__splitter: 'best'
+  best_view_mode: 'edge'
+  base_estimator: 'decision_tree'
+  n_estimators: 10
 #
 #random_forest:
 #  n_estimators: [25]

--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,5 @@ plotly>=4.2.1
 matplotlib>=3.1.1
 tabulate>=0.8.6
 pyscm-ml>=1.0.0
+git+https://github.com/thibgo/randomscm/archive/refs/tags/v0.0.0-alpha.zip
--- a/setup.py
+++ b/setup.py
 # -*- coding: utf-8 -*-
 #Extracting requrements from requirements.txt
-with open('requirements.txt') as f:
+# with open('requirements.txt') as f:
-    requirements = f.read().splitlines()
+#     requirements = f.read().splitlines()
 # from Cython.Build import cythonize
 from setuptools import setup, find_packages
-# Ceci n'est qu'un appel de fonction. Mais il est trèèèèèèèèèèès long
-# et il comporte beaucoup de paramètres
 def setup_package():
    setup(
-    # le nom de votre bibliothèque, tel qu'il apparaitre sur pypi
    name='summit',
-    # la version du code
    version=0.0,
    python_requires = '>=3.5',
-    # Liste les packages à insérer dans la distribution
-    # plutôt que de le faire à la main, on utilise la foncton
-    # find_packages() de setuptools qui va cherche tous les packages
-    # python recursivement dans le dossier courant.
-    # C'est pour cette raison que l'on a tout mis dans un seul dossier:
-    # on peut ainsi utiliser cette fonction facilement
    packages=find_packages(),
-    # votre pti nom
    author="Baptiste Bauvin",
-    # Votre email, sachant qu'il sera publique visible, avec tous les risques
-    # que ça implique.
    author_email="baptiste.bauvin@lis-lab.fr",
-    # Une description courte
    description="Supervised MultiModal Integration Tool",
-    # Une description longue, sera affichée pour présenter la lib
-    # Généralement on dump le README ici
    long_description=open('README.rst').read(),
-    # Vous pouvez rajouter une liste de dépendances pour votre lib
-    # et même préciser une version. A l'installation, Python essayera de
-    # les télécharger et les installer.
-    #
-    # Ex: ["gunicorn", "docutils >= 0.3", "lxml==0.5a7"]
-    #
-    # Dans notre cas on en a pas besoin, donc je le commente, mais je le
-    # laisse pour que vous sachiez que ça existe car c'est très utile.
-    # install_requires= ,
-    # Active la prise en compte du fichier MANIFEST.in
    include_package_data=True,
-    # dependency_links=['https://github.com/aldro61/pyscm.git#egg=pyscm'],
-    # Une url qui pointe vers la page officielle de votre lib
    url='http://gitlab.lis-lab.fr/baptiste.bauvin/summit/',
-    install_requires=requirements,
+    install_requires=['h5py>=2.9.0', 'joblib>=0.13.2', 'numpy>=1.16.4',
+                      'pyparsing>=2.4.0', 'python-dateutil>=2.8.0',
+                      'scikit-learn>=0.19.0', 'scipy>=1.3.0', 'six>=1.12.0',
+                      'pandas>=0.23.3', 'pyyaml>=3.12', 'plotly>=4.2.1',
+                      'matplotlib>=3.1.1', 'tabulate>=0.8.6', 'pyscm-ml>=1.0.0',
+                      "randomscm @ git+https://github.com/thibgo/randomscm.git#egg=randomscm",
+                      "imbalanced-learn"],
    extras_require={
            'dev': ['pytest', 'pytest-cov'],
            'doc': ['sphinx >= 3.0.2', 'numpydoc', 'docutils', 'sphinx-autoapi',
                    'sphinx_rtd_theme']},
-    # Il est d'usage de mettre quelques metadata à propos de sa lib
-    # Pour que les robots puissent facilement la classer.
-    # La liste des marqueurs autorisées est longue:
-    # https://pypi.python.org/pypi?%3Aaction=list_classifiers.
-    #
-    # Il n'y a pas vraiment de règle pour le contenu. Chacun fait un peu
-    # comme il le sent. Il y en a qui ne mettent rien.
    classifiers=[
        "Programming Language :: Python",
        "Development Status :: 1 - Planning",
@@ -77,27 +40,8 @@ def setup_package():
        "Programming Language :: Python :: 2/3",
        "Topic :: Machine Learning",
    ],
-    # C'est un système de plugin, mais on s'en sert presque exclusivement
-    # Pour créer des commandes, comme "django-admin".
-    # Par exemple, si on veut créer la fabuleuse commande "proclame-sm", on
-    # va faire pointer ce nom vers la fonction proclamer(). La commande sera
-    # créé automatiquement.
-    # La syntaxe est "nom-de-commande-a-creer = package.module:fonction".
-    # entry_points={
-    #     'console_scripts': [
-    #         'exec_multiview = summit.execute:exec',
-    #     ],
-    # },
-    # A fournir uniquement si votre licence n'est pas listée dans "classifiers"
-    # ce qui est notre cas
    license="GNUGPL",
-    # Il y a encore une chiée de paramètres possibles, mais avec ça vous
-    # couvrez 90% des besoins
-    # ext_modules=cythonize(
-    #     "summit/multiview_platform/monoview/additions/_custom_criterion.pyx"),
 )
 if __name__ == "__main__":

--- a/summit/__init__.py
+++ b/summit/__init__.py
 __version__ = "0.0.0.0"
+__url__ = "https://gitlab.lis-lab.fr/baptiste.bauvin/summit"
 from . import multiview_platform, execute
--- a/summit/examples/config_files/config_example_0.yml
+++ b/summit/examples/config_files/config_example_0.yml
@@ -27,7 +27,7 @@ res_dir: "examples/results/example_0/"
 # If an error occurs in a classifier, if track_tracebacks is set to True, the
 # benchmark saves the traceback and continues, if it is set to False, it will
 # stop the benchmark and raise the error
-track_tracebacks: True
+track_tracebacks: False
 # All the classification-realted configuration options
@@ -40,14 +40,14 @@ nb_class:
 # The name of the classes to select in the dataset
 classes:
 # The type of algorithms to run during the benchmark (monoview and/or multiview)
-type: ["monoview","multiview"]
+cl_type: ["monoview","multiview"]
 # The name of the monoview algorithms to run, ["all"] to run all the available classifiers
 algos_monoview: ["decision_tree", "adaboost"]
 # The names of the multiview algorithms to run, ["all"] to run all the available classifiers
 algos_multiview: ["early_fusion_decision_tree", "early_fusion_adaboost", "weighted_linear_late_fusion",]
 # The number of times the benchamrk is repeated with different train/test
 # split, to have more statistically significant results
-stats_iter: 1
+stats_iter: 2
 # The metrics that will be use din the result analysis
 metrics:
  accuracy_score: {}

--- a/summit/examples/data/config_mv_mnist.yml
+++ b/summit/examples/data/config_mv_mnist.yml
+# The base configuration of the benchmark
+# Enable logging
+log: True
+# The name of each dataset in the directory on which the benchmark should be run
+name: "multiview_mnist"
+# A label for the result directory
+label: "mnist"
+# The type of dataset, currently supported ".hdf5", and ".csv"
+file_type: ".hdf5"
+# The views to use in the banchmark, an empty value will result in using all the views
+views:
+# The path to the directory where the datasets are stored, an absolute path is advised
+pathf: "examples/data/"
+# The niceness of the processes, useful to lower their priority
+nice: 0
+# The random state of the benchmark, useful for reproducibility
+random_state: 42
+# The number of parallel computing threads
+nb_cores: 4
+# Used to run the benchmark on the full dataset
+full: True
+# Used to be able to run more than one benchmark per minute
+debug: False
+# The directory in which the results will be stored, an absolute path is advised
+res_dir: "examples/results/example_3/"
+# If an error occurs in a classifier, if track_tracebacks is set to True, the
+# benchmark saves the traceback and continues, if it is set to False, it will
+# stop the benchmark and raise the error
+track_tracebacks: True
+# All the classification-realted configuration options
+# If the dataset is multiclass, will use this multiclass-to-biclass method
+multiclass_method: "oneVersusOne"
+# The ratio number of test exmaples/number of train samples
+split: 0.8
+# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
+nb_folds: 5
+# The number of classes to select in the dataset
+nb_class: 2
+# The name of the classes to select in the dataset
+classes:
+# The type of algorithms to run during the benchmark (monoview and/or multiview)
+type: ["monoview","multiview"]
+# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
+algos_monoview: ["decision_tree", "adaboost", ]
+# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
+algos_multiview: ["early_fusion_decision_tree", "early_fusion_adaboost"]
+# The number of times the benchamrk is repeated with different train/test
+# split, to have more statistically significant results
+stats_iter: 5
+# The metrics that will be use din the result analysis
+metrics:
+  accuracy_score: {}
+  f1_score:
+    average: "micro"
+# The metric that will be used in the hyper-parameter optimization process
+metric_princ: "accuracy_score"
+# The type of hyper-parameter optimization method
+hps_type: 'Random'
+# The number of iteration in the hyper-parameter optimization process
+hps_args:
+  n_iter: 10
+decision_tree:
+  max_depth: 3
+adaboost:
+  base_estimator: "DecisionTreeClassifier"
+  n_estimators: 10
+weighted_linear_late_fusion:
+  classifiers_names: "decision_tree"
+  classifier_configs:
+    decision_tree:
+      max_depth: 2
+# The following arguments are classifier-specific, and are documented in each
+# of the corresponding modules.
+# In order to run multiple sets of parameters, use multiple values in the
+# following lists, and set hps_type to None.
--- a/summit/execute.py
+++ b/summit/execute.py
@@ -8,7 +8,7 @@ def execute(config_path=None):  # pragma: no cover
    from summit.multiview_platform import exec_classif
    if config_path is None:
-        exec_classif.exec_classif(sys.argv[1:])
+        sum = exec_classif.Summit(config_path=sys.argv[1:])
    else:
        if config_path == "example 0":
            config_path = os.path.join(
@@ -59,7 +59,8 @@ def execute(config_path=None):  # pragma: no cover
                "examples",
                "config_files",
                "config_example_3.yml")
-        exec_classif.exec_classif(["--config_path", config_path])
+        sum = exec_classif.Summit(["--config_path", config_path])
+    sum.exec_classif()
 if __name__ == "__main__":

--- a/summit/multiview_platform/exec_classif.py
+++ b/summit/multiview_platform/exec_classif.py
--- a/summit/multiview_platform/metrics/balanced_accuracy.py
+++ b/summit/multiview_platform/metrics/balanced_accuracy.py
+"""Functions :
+ score: to get the accuracy score
+ get_scorer: returns a sklearn scorer for grid search
+"""
+from sklearn.metrics import balanced_accuracy_score as metric
+from sklearn.metrics import make_scorer
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype
+def score(y_true, y_pred, multiclass=False, **kwargs):
+    """Arguments:
+    y_true: real labels
+    y_pred: predicted labels
+    Keyword Arguments:
+    "0": weights to compute accuracy
+    Returns:
+    Weighted accuracy score for y_true, y_pred"""
+    score = metric(y_true, y_pred, **kwargs)
+    return score
+def get_scorer(**kwargs):
+    """Keyword Arguments:
+    "0": weights to compute accuracy
+    Returns:
+    A weighted sklearn scorer for accuracy"""
+    return make_scorer(metric, greater_is_better=True,
+                       **kwargs)
+def get_config(**kwargs):
+    config_string = "Balanced accuracy score using {}, (higher is better)".format(
+        kwargs)
+    return config_string
--- a/summit/multiview_platform/metrics/roc_auc_score.py
+++ b/summit/multiview_platform/metrics/roc_auc_score.py
@@ -7,7 +7,10 @@ __status__ = "Prototype"  # Production, Development, Prototype
 def score(y_true, y_pred, multiclass=False, **kwargs):
+    try:
        score = metric(y_true, y_pred, **kwargs)
+    except:
+        score = 0.0
    return score

--- a/summit/multiview_platform/metrics/specificity_score.py
+++ b/summit/multiview_platform/metrics/specificity_score.py
+from sklearn.metrics import make_scorer
+from sklearn.metrics import confusion_matrix as metric
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype
+def score(y_true, y_pred, **kwargs):
+    score = metric(y_true, y_pred, **kwargs)
+    if score[0,0]+score[0,1] !=0:
+        return score[0,0]/(score[0,0]+score[0,1])
+    else:
+        return 0
+def get_scorer(**kwargs):
+    return make_scorer(score, greater_is_better=True, **kwargs)
+def get_config(**kwargs):
+    configString = "Specificity score (higher is better)".format(kwargs)
+    return configString
\ No newline at end of file
--- a/summit/multiview_platform/monoview/exec_classif_mono_view.py
+++ b/summit/multiview_platform/monoview/exec_classif_mono_view.py
--- a/summit/multiview_platform/monoview_classifiers/__scm_bagging_mincq.py
+++ b/summit/multiview_platform/monoview_classifiers/__scm_bagging_mincq.py
+from randomscm.randomscm import RandomScmClassifier
+from ..monoview.monoview_utils import BaseMonoviewClassifier
+from summit.multiview_platform.utils.hyper_parameter_search import CustomUniform, CustomRandint
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype
+classifier_class_name = "ScmBaggingMinCq"
+import numpy as np
+from six import iteritems
+MAX_INT = np.iinfo(np.int32).max
+class ScmBaggingMinCq(RandomScmClassifier, BaseMonoviewClassifier):
+    """A Bagging classifier. for SetCoveringMachineClassifier()
+    The base estimators are built on subsets of both samples
+    and features.
+    Parameters
+    ----------
+    n_estimators : int, default=10
+        The number of base estimators in the ensemble.
+    max_samples : int or float, default=1.0
+        The number of samples to draw from X to train each base estimator with
+        replacement.
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples.
+    max_features : int or float, default=1.0
+        The number of features to draw from X to train each base estimator (
+        without replacement.
+        - If int, then draw `max_features` features.
+        - If float, then draw `max_features * X.shape[1]` features.
+    p_options : list of float with len =< n_estimators, default=[1.0]
+        The estimators will be fitted with values of p found in p_options
+        let k be k = n_estimators/len(p_options),
+        the k first estimators will have p=p_options[0],
+        the next k estimators will have p=p_options[1] and so on...
+    random_state : int or RandomState, default=None
+        Controls the random resampling of the original dataset
+        (sample wise and feature wise).
+        If the base estimator accepts a `random_state` attribute, a different
+        seed is generated for each instance in the ensemble.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+    Attributes
+    ----------
+    n_features_ : int
+        The number of features when :meth:`fit` is performed.
+    estimators_ : list of estimators
+        The collection of fitted base estimators.
+    estim_features : list of arrays
+        The subset of drawn features for each base estimator.
+    Examples
+    --------
+    >>> @TODO
+    References
+    ----------
+    .. [1] L. Breiman, "Pasting small votes for classification in large
+           databases and on-line", Machine Learning, 36(1), 85-103, 1999.
+    .. [2] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine
+           Learning and Knowledge Discovery in Databases, 346-361, 2012.
+    """
+    def __init__(self,
+                 n_estimators=50,
+                 max_samples=1.0,
+                 max_features=1.0,
+                 max_rules=10,
+                 p_options=[0.316],
+                 model_type="conjunction",
+                 min_cq_combination=True,
+                 min_cq_mu=10e-3,
+                 random_state=None):
+        if isinstance(p_options, float):
+            p_options = [p_options]
+        RandomScmClassifier.__init__(self, n_estimators=n_estimators,
+                 max_samples=max_samples,
+                 max_features=max_features,
+                 max_rules=max_rules,
+                 p_options=p_options,
+                 model_type=model_type,
+                 min_cq_combination=min_cq_combination,
+                 min_cq_mu=min_cq_mu,
+                 random_state=random_state)
+        self.param_names = ["n_estimators", "max_rules", "max_samples", "max_features", "model_type", "p_options", "random_state"]
+        self.classed_params = []
+        self.distribs = [CustomRandint(low=1, high=300), CustomRandint(low=1, high=20),
+                         CustomUniform(), CustomUniform(), ["conjunction", "disjunction"], CustomUniform(), [random_state]]
+        self.weird_strings = {}
+    def set_params(self, p_options=[0.316], **kwargs):
+        if not isinstance(p_options, list):
+            p_options = [p_options]
+        kwargs["p_options"] = p_options
+        for parameter, value in iteritems(kwargs):
+            setattr(self, parameter, value)
+        return self
+    def get_interpretation(self, directory, base_file_name, y_test,
+                           multi_class=False):
+        self.features_importance()
+        interpret_string = self.get_feature_importance(directory, base_file_name)
+        return interpret_string
--- a/summit/multiview_platform/monoview_classifiers/adaboost.py
+++ b/summit/multiview_platform/monoview_classifiers/adaboost.py
@@ -35,7 +35,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
                                    )
        self.param_names = ["n_estimators", "base_estimator"]
        self.classed_params = ["base_estimator"]
-        self.distribs = [CustomRandint(low=1, high=500),
+        self.distribs = [CustomRandint(low=1, high=100),
                         base_boosting_estimators]
        self.weird_strings = {"base_estimator": "class_name"}
        self.plotted_metric = metrics.zero_one_loss
@@ -67,27 +67,27 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
    def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
                           multi_class=False):  # pragma: no cover
        interpretString = ""
-        interpretString += self.get_feature_importance(directory,
+        # interpretString += self.get_feature_importance(directory,
-                                                       base_file_name,
+        #                                                base_file_name,
-                                                       feature_ids)
+        #                                                feature_ids)
-        interpretString += "\n\n Estimator error | Estimator weight\n"
+        # interpretString += "\n\n Estimator error | Estimator weight\n"
-        interpretString += "\n".join(
+        # interpretString += "\n".join(
-            [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
+        #     [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
-             error, weight in
+        #      error, weight in
-             zip(self.estimator_errors_, self.estimator_weights_)])
+        #      zip(self.estimator_errors_, self.estimator_weights_)])
-        step_test_metrics = np.array(
+        # step_test_metrics = np.array(
-            [self.plotted_metric.score(y_test, step_pred) for step_pred in
+        #     [self.plotted_metric.score(y_test, step_pred) for step_pred in
-             self.step_predictions])
+        #      self.step_predictions])
-        get_accuracy_graph(step_test_metrics, "Adaboost",
+        # get_accuracy_graph(step_test_metrics, "Adaboost",
-                           os.path.join(directory,
+        #                    os.path.join(directory,
-                                        base_file_name + "test_metrics.png"),
+        #                                 base_file_name + "test_metrics.png"),
-                           self.plotted_metric_name, set="test")
+        #                    self.plotted_metric_name, set="test")
-        np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
+        # np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
-                   step_test_metrics,
+        #            step_test_metrics,
-                   delimiter=',')
+        #            delimiter=',')
-        np.savetxt(
+        # np.savetxt(
-            os.path.join(directory, base_file_name + "train_metrics.csv"),
+        #     os.path.join(directory, base_file_name + "train_metrics.csv"),
-            self.metrics, delimiter=',')
+        #     self.metrics, delimiter=',')
-        np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
+        # np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
-                   np.array([self.train_time, self.pred_time]), delimiter=',')
+        #            np.array([self.train_time, self.pred_time]), delimiter=',')
        return interpretString
--- a/summit/multiview_platform/monoview_classifiers/adaboost_graalpy.py
+++ b/summit/multiview_platform/monoview_classifiers/adaboost_graalpy.py
+import logging
+import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils.validation import check_is_fitted
+from ..metrics import zero_one_loss
+from .additions.BoostUtils import StumpsClassifiersGenerator, \
+    BaseBoost
+from ..monoview.monoview_utils import CustomRandint, \
+    BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero
+classifier_class_name = "AdaboostGraalpy"
+class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost):
+    """Scikit-Learn compatible AdaBoost classifier. Original code by Pascal Germain, adapted by Jean-Francis Roy.
+    Parameters
+    ----------
+    n_iterations : int, optional
+        The number of iterations of the algorithm. Defaults to 200.
+    iterations_to_collect_as_hyperparameters : list
+        Iteration numbers to collect while learning, that will be converted as hyperparameter values at evaluation time.
+        Defaults to None.
+    classifiers_generator : Transformer, optional
+        A transformer to convert input samples in voters' outputs. Default: Decision stumps transformer, with 10 stumps
+        per attributes.
+    callback_function : function, optional
+        A function to call at each iteration that is supplied learning information. Defaults to None.
+    n_stumps : int ( default : 10)
+    self_complemented : boolean (default : True
+    Attributes
+    ----------
+    n_iterations : int, optional
+        The number of iterations of the algorithm. Defaults to 200.
+    iterations_to_collect_as_hyperparameters : list
+        Iteration numbers to collect while learning, that will be converted as hyperparameter values at evaluation time.
+        Defaults to None.
+    classifiers_generator : Transformer, optional
+        A transformer to convert input samples in voters' outputs. Default: Decision stumps transformer, with 10 stumps
+        per attributes.
+    callback_function : function, optional
+        A function to call at each iteration that is supplied learning information. Defaults to None.
+    """
+    def __init__(self, n_iterations=200,
+                 iterations_to_collect_as_hyperparameters=True,
+                 classifiers_generator=None, callback_function=None,
+                 n_stumps=10, self_complemented=True):
+        self.n_iterations = n_iterations
+        self.n_stumps = n_stumps
+        self.iterations_to_collect_as_hyperparameters = iterations_to_collect_as_hyperparameters
+        self.estimators_generator = classifiers_generator
+        self.callback_function = callback_function
+        self.self_complemented = self_complemented
+    def fit(self, X, y):
+        """Fits the algorithm on training data.
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            The input data.
+        y : ndarray of shape (n_samples, )
+            The input labels.
+        Returns
+        -------
+        self
+        """
+        y_neg = change_label_to_minus(y)
+        if self.estimators_generator is None:
+            self.estimators_generator = StumpsClassifiersGenerator(
+                n_stumps_per_attribute=self.n_stumps,
+                self_complemented=self.self_complemented)
+        # Step 1: We fit the classifiers generator and get its classification matrix.
+        self.estimators_generator.fit(X, y_neg)
+        # hint: This is equivalent to construct a new X
+        classification_matrix = self._binary_classification_matrix(X)
+        n_samples, n_voters = classification_matrix.shape
+        # logging.debug("n_voters = {}".format(n_voters))
+        # Step 2: We initialize the weights on the samples and the weak classifiers.
+        sample_weights = np.ones(n_samples) / n_samples
+        alpha_weights = np.zeros(n_voters)
+        self.losses = []
+        # Step 3: We loop for each iteration.
+        self.collected_weight_vectors_ = []
+        for t in range(self.n_iterations):
+            # Step 4: We find the classifier that maximizes the success, weighted by the sample weights.
+            classifier_successes = np.dot(classification_matrix.T,
+                                          sample_weights * y_neg)
+            best_voter_index = np.argmax(classifier_successes)
+            success = classifier_successes[best_voter_index]
+            if success >= 1.0:
+                logging.info("AdaBoost stopped : perfect classifier found!")
+                self.weights_ = np.zeros(n_voters)
+                self.weights_[best_voter_index] = 1.0
+                return self
+            # Step 5: We calculate the alpha_t parameter and update the alpha weights.
+            alpha = 0.5 * np.log((1.0 + success) / (1.0 - success))
+            alpha_weights[best_voter_index] += alpha
+            # logging.debug("{} : {}".format(t, str(alpha)))
+            # Step 6: We update the sample weights.
+            sample_weights *= np.exp(
+                -1 * alpha * y_neg * classification_matrix[:, best_voter_index])
+            normalization_constant = sample_weights.sum()
+            sample_weights = sample_weights / normalization_constant
+            # We collect iteration information for later evaluation.
+            if self.iterations_to_collect_as_hyperparameters:
+                weights = alpha_weights / np.sum(alpha_weights)
+                self.collected_weight_vectors_.append(weights.copy())
+            loss = zero_one_loss.score(y_neg, np.sign(np.sum(
+                np.multiply(classification_matrix,
+                            alpha_weights / np.sum(alpha_weights)), axis=1)))
+            self.losses.append(loss)
+            if self.callback_function is not None:
+                self.callback_function(t, alpha_weights, normalization_constant,
+                                       self.estimators_generator, self.weights_)
+        self.weights_ = alpha_weights / np.sum(alpha_weights)
+        self.losses = np.array(self.losses)
+        self.learner_info_ = {
+            'n_nonzero_weights': np.sum(self.weights_ > 1e-12)}
+        return self
+    def predict(self, X):
+        """Predict inputs using the fit classifier.
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            The data to classify.
+        Returns
+        -------
+        predictions : ndarray of shape (n_samples, )
+            The estimated labels.
+        """
+        check_is_fitted(self, 'weights_')
+        classification_matrix = self._binary_classification_matrix(X)
+        if self.iterations_to_collect_as_hyperparameters:
+            self.test_preds = []
+            for weight_vector in self.collected_weight_vectors_:
+                preds = np.sum(np.multiply(classification_matrix,
+                                           weight_vector), axis=1)
+                self.test_preds.append(change_label_to_zero(np.sign(preds)))
+            self.test_preds = np.array(self.test_preds)
+        margins = np.squeeze(
+            np.asarray(np.dot(classification_matrix, self.weights_)))
+        return change_label_to_zero(
+            np.array([int(x) for x in np.sign(margins)]))
+class AdaboostGraalpy(AdaBoostGP, BaseMonoviewClassifier):
+    """AdaboostGraalpy
+    Parameters
+    ----------
+    random_state : int seed, RandomState instance, or None (default=None)
+        The seed of the pseudo random number generator to use when
+        shuffling the data.
+    n_iterations : in number of iterations (default : 200)
+    n_stumps : int (default 1)
+    kwargs :  others arguments
+    Attributes
+    ----------
+    param_names :
+    distribs :
+    weird_strings :
+    n_stumps :
+    nbCores :
+    """
+    def __init__(self, random_state=None, n_iterations=200, n_stumps=1,
+                 **kwargs):
+        super(AdaboostGraalpy, self).__init__(
+            n_iterations=n_iterations,
+            n_stumps=n_stumps
+        )
+        self.param_names = ["n_iterations", "n_stumps", "random_state"]
+        self.distribs = [CustomRandint(low=1, high=500), [n_stumps],
+                         [random_state]]
+        self.classed_params = []
+        self.weird_strings = {}
+        self.n_stumps = n_stumps
+        if "nbCores" not in kwargs:
+            self.nbCores = 1
+        else:
+            self.nbCores = kwargs["nbCores"]
+    # def canProbas(self):
+    #     """
+    #     Used to know if the classifier can return label probabilities
+    #
+    #     Returns
+    #     -------
+    #     True in any case
+    #     """
+    #     return True
+    def getInterpret(self, directory, y_test):
+        """
+        Parameters
+        ----------
+        directory :
+        y_test :
+        Returns
+        -------
+        retur string of interpret
+        """
+        np.savetxt(directory + "train_metrics.csv", self.losses, delimiter=',')
+        np.savetxt(directory + "y_test_step.csv", self.test_preds,
+                   delimiter=',')
+        step_metrics = []
+        for step_index in range(self.test_preds.shape[0] - 1):
+            step_metrics.append(zero_one_loss.score(y_test,
+                                                    self.test_preds[step_index,
+                                                    :]))
+        step_metrics = np.array(step_metrics)
+        np.savetxt(directory + "step_test_metrics.csv", step_metrics,
+                   delimiter=',')
+        return ""
+# def formatCmdArgs(args):
+#     """Used to format kwargs for the parsed args"""
+#     kwargsDict = {"n_iterations": args.AdG_n_iter,
+#                   "n_stumps": args.AdG_stumps, }
+#     return kwargsDict
+def paramsToSet(nIter, random_state):
+    """Used for weighted linear early fusion to generate random search sets"""
+    paramsSet = []
+    for _ in range(nIter):
+        paramsSet.append({"n_iterations": random_state.randint(1, 500), })
+    return paramsSet
--- a/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py
+++ b/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py
+import time
+import numpy as np
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.tree import DecisionTreeClassifier
+from .. import metrics
+from .additions.BoostUtils import get_accuracy_graph
+from .additions.PregenUtils import PregenClassifier
+from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier, \
+    change_label_to_zero
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype
+classifier_class_name = "AdaboostPregen"
+class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
+                     PregenClassifier):
+    """
+    Parameters
+    ----------
+    random_state : int seed, RandomState instance, or None (default=None)
+        The seed of the pseudo random number generator to use when
+        shuffling the data.
+    n_estimators : int number of estimators (default : 50)
+    base_estimator :
+    n_stumps : int (default : 1)
+    estimators_generator : str, (default : "Stumps")
+    max_depth : int (default : 1)
+    self_complemeted : bool, (default : True)
+    kwargs : others arguments
+    Attributes
+    ----------
+    param_names : list of parameters names
+    classed_params :  list of parameters names
+    distribs :
+    weird_strings :
+    plotted_metric
+    plotted_metric_name : str name of plotted  metric
+    step_predictions :
+    estimators_generator :
+    max_depth :
+    n_stumps :
+    self_complemented :
+    """
+    def __init__(self, random_state=None, n_estimators=50,
+                 base_estimator=None, n_stumps=1, estimators_generator="Stumps",
+                 max_depth_pregen=1, self_complemeted=True,
+                 **kwargs):
+        super(AdaboostPregen, self).__init__(
+            random_state=random_state,
+            n_estimators=n_estimators,
+            base_estimator=base_estimator,
+            algorithm="SAMME"
+        )
+        self.param_names = ["n_estimators", "base_estimator", "n_stumps",
+                            "estimators_generator", "max_depth_pregen",
+                            "random_state"]
+        self.classed_params = ["base_estimator"]
+        self.distribs = [CustomRandint(low=1, high=500),
+                         [DecisionTreeClassifier(max_depth=1)], [n_stumps],
+                         ["Stumps", "Tree"], CustomRandint(low=1, high=5),
+                         [random_state]]
+        self.weird_strings = {"base_estimator": "class_name"}
+        self.plotted_metric = metrics.zero_one_loss
+        self.plotted_metric_name = "zero_one_loss"
+        self.step_predictions = None
+        self.estimators_generator = estimators_generator
+        self.max_depth_pregen = max_depth_pregen
+        self.n_stumps = n_stumps
+        self.self_complemented = self_complemeted
+    def fit(self, X, y, sample_weight=None):
+        """
+        Fit the AdaboostPregen
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            For kernel="precomputed", the expected shape of X is
+            (n_samples_test, n_samples_train).
+        y :  { array-like, shape (n_samples,)
+            Target values class labels in classification
+        sample_weight :
+        """
+        begin = time.time()
+        pregen_X, pregen_y = self.pregen_voters(X, y)
+        super(AdaboostPregen, self).fit(pregen_X, pregen_y,
+                                        sample_weight=sample_weight)
+        end = time.time()
+        self.train_time = end - begin
+        self.train_shape = pregen_X.shape
+        self.base_predictions = np.array(
+            [change_label_to_zero(estim.predict(pregen_X)) for estim in
+             self.estimators_])
+        self.metrics = np.array(
+            [self.plotted_metric.score(change_label_to_zero(pred), y) for pred
+             in self.staged_predict(pregen_X)])
+        self.bounds = np.array([np.prod(
+            np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1])))
+                                for i in
+                                range(self.estimator_errors_.shape[0])])
+        self.feature_importances_ = np.ones(X.shape[1])
+        return self
+    # def canProbas(self):
+    #     """
+    #     Used to know if the classifier can return label probabilities
+    #
+    #     Returns
+    #     -------
+    #     True
+    #     """
+    #     return True
+    def predict(self, X):
+        """
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            For kernel="precomputed", the expected shape of X is
+            (n_samples_test, n_samples_train).
+        Returns
+        -------
+        """
+        begin = time.time()
+        pregen_X, _ = self.pregen_voters(X)
+        pred = super(AdaboostPregen, self).predict(pregen_X)
+        end = time.time()
+        self.pred_time = end - begin
+        if pregen_X.shape != self.train_shape:
+            self.step_predictions = np.array(
+                [change_label_to_zero(step_pred) for step_pred in
+                 self.staged_predict(pregen_X)])
+        return change_label_to_zero(pred)
+    # def set_params(self, **params):
+    #     super().set_params(params)
+    #     self.random_state = params["random_state"]
+    #     self.n_stumps_per_attribute = params["n_tumps"]
+    #     return self
+    # def getInterpret(self, directory, y_test):
+    #     # interpretString = ""
+    #     # interpretString += self.getFeatureImportance(directory)
+    #     # interpretString += "\n\n Estimator error | Estimator weight\n"
+    #     # interpretString += "\n".join(
+    #     #     [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
+    #     #      error, weight in
+    #     #      zip(self.estimator_errors_, self.estimator_weights_)])
+    #     # step_test_metrics = np.array(
+    #     #     [self.plotted_metric.score(y_test, step_pred) for step_pred in
+    #     #      self.step_predictions])
+    #     # get_accuracy_graph(step_test_metrics, "AdaboostPregen",
+    #     #                    directory + "test_metrics.png",
+    #     #                    self.plotted_metric_name, set="test")
+    #     # # get_accuracy_graph(self.metrics, "AdaboostPregen",
+    #     # #                    directory + "metrics.png", self.plotted_metric_name,
+    #     # #                    bounds=list(self.bounds),
+    #     # #                    bound_name="boosting bound")
+    #     # np.savetxt(directory + "test_metrics.csv", step_test_metrics,
+    #     #            delimiter=',')
+    #     # np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',')
+    #     # np.savetxt(directory + "times.csv",
+    #     #            np.array([self.train_time, self.pred_time]), delimiter=',')
+    #     # np.savetxt(directory + "times_iter.csv",
+    #     #            np.array([self.train_time, len(self.estimator_weights_)]), delimiter=',')
+    #     return interpretString
+    def feature_importances_(self, value):
+        self._feature_importances_ = value
+# def formatCmdArgs(args):
+#     """Used to format kwargs for the parsed args"""
+#     kwargsDict = {'n_estimators': args.AdP_n_est,
+#                   'base_estimator': [DecisionTreeClassifier(max_depth=1)],
+#                   'n_stumps': args.AdP_stumps}
+#     return kwargsDict
+# def paramsToSet(nIter, random_state):
+#     """Used for weighted linear early fusion to generate random search sets"""
+#     paramsSet = []
+#     for _ in range(nIter):
+#         paramsSet.append({"n_estimators": random_state.randint(1, 500),
+#                           "base_estimator": None})
+#     return paramsSet