Added multvew algorithms

b28de9bc · Baptiste Bauvin · be1d27c3 · b28de9bc · b28de9bc · b28de9bc
Commit b28de9bc authored 4 years ago by Baptiste Bauvin
--- a/config_files/config_test.yml
+++ b/config_files/config_test.yml
@@ -21,9 +21,9 @@ split: 0.8
 nb_folds: 2
 nb_class: 3
 classes:
-type: [ "monoview"]
+type: ["multiview"]
 algos_monoview: ["decision_tree", ]
-algos_multiview: ["weighted_linear_late_fusion"]
+algos_multiview: ["early_fusion_adaboost"]
 stats_iter: 3
 metrics:
  accuracy_score: {}

--- a/copyright.py
+++ b/copyright.py
+# -*- coding: utf-8 -*-
+from __future__ import print_function, division
+import time
+import os
+import sys
+import fileinput
+
+
+def findFiles(directory, files=[]):
+    """scan a directory for py, pyx, pxd extension files."""
+    for filename in os.listdir(directory):
+        path = os.path.join(directory, filename)
+        if os.path.isfile(path) and (path.endswith(".py") or
+                                     path.endswith(".pyx") or
+                                     path.endswith(".pxd")):
+            if filename != "__init__.py" and filename != "version.py":
+                files.append(path)
+        elif os.path.isdir(path):
+            findFiles(path, files)
+    return files
+
+
+def fileUnStamping(filename):
+    """ Remove stamp from a file """
+    is_stamp = False
+    for line in fileinput.input(filename, inplace=1):
+        if line.find("# COPYRIGHT #") != -1:
+            is_stamp = not is_stamp
+        elif not is_stamp:
+            print(line, end="")
+
+
+def fileStamping(filename, stamp):
+    """ Write a stamp on a file
+
+    WARNING : The stamping must be done on an default utf8 machine !
+    """
+    old_stamp = False  # If a copyright already exist over write it.
+    for line in fileinput.input(filename, inplace=1):
+        if line.find("# COPYRIGHT #") != -1:
+            old_stamp = not old_stamp
+        elif line.startswith("# -*- coding: utf-8 -*-"):
+            print(line, end="")
+            print(stamp)
+        elif not old_stamp:
+            print(line, end="")
+
+
+def getStamp(date, multimodal_version):
+    """ Return the corrected formated stamp """
+    stamp = open("copyrightstamp.txt").read()
+    stamp = stamp.replace("DATE", date)
+    stamp = stamp.replace("MULTIMODAL_VERSION", multimodal_version)
+    stamp = stamp.replace('\n', '\n# ')
+    stamp = "# " + stamp
+    stamp = stamp.replace("# \n", "#\n")
+    return stamp.strip()
+
+
+def getVersionsAndDate():
+    """ Return (date, multimodal_version..
+    ) """
+    v_text = open('VERSION').read().strip()
+    v_text_formted = '{"' + v_text.replace('\n', '","').replace(':', '":"')
+    v_text_formted += '"}'
+    v_dict = eval(v_text_formted)
+    return (time.strftime("%Y"), v_dict['multimodal'])
+
+
+def writeStamp():
+    """ Write a copyright stamp on all files """
+    stamp = getStamp(*getVersionsAndDate())
+    files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                   "multimodal"))
+    for filename in files:
+        fileStamping(filename, stamp)
+    fileStamping("setup.py", stamp)
+
+
+def eraseStamp():
+    """ Erase a copyright stamp from all files """
+    files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                   "multimodal"))
+    for filename in files:
+        fileUnStamping(filename)
+    fileUnStamping("setup.py")
+
+
+def usage(arg):
+    print("Usage :")
+    print("\tpython %s stamping" % arg)
+    print("\tpython %s unstamping" % arg)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        usage(sys.argv[0])
+    elif len(sys.argv) == 2:
+        if sys.argv[1].startswith("unstamping"):
+            eraseStamp()
+        elif sys.argv[1].startswith("stamping"):
+            writeStamp()
+        else:
+            usage(sys.argv[0])
+    else:
+        usage(sys.argv[0])
--- a/copyrightstamp.txt
+++ b/copyrightstamp.txt
+######### COPYRIGHT #########
+
+Copyright(c) DATE
+-----------------
+
+
+* Université d'Aix Marseille (AMU) -
+* Centre National de la Recherche Scientifique (CNRS) -
+* Université de Toulon (UTLN).
+* Copyright © 2019-2020 AMU, CNRS, UTLN
+
+Contributors:
+------------
+
+* Sokol Koço <sokol.koco_AT_lis-lab.fr>
+* Cécile Capponi <cecile.capponi_AT_univ-amu.fr>
+* Dominique Benielli <dominique.benielli_AT_univ-amu.fr>
+* Baptiste Bauvin <baptiste.bauvin_AT_univ-amu.fr>
+
+Description:
+-----------
+
+
+
+Version:
+-------
+
+* multiview_generator version = MULTIMODAL_VERSION
+
+Licence:
+-------
+
+License: New BSD License
+
+
+######### COPYRIGHT #########
--- a/license.txt
+++ b/license.txt
+New BSD License
+
+Copyright (c) 2020-15-01, The scikit-multimodallearn developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  a. Redistributions of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+  b. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+  c. Neither the name of the IntertwiningWavelet developers nor the names of
+     its contributors may be used to endorse or promote products
+     derived from this software without specific prior written
+     permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
\ No newline at end of file
--- a/summit/examples/config_files/config_example_0.yml
+++ b/summit/examples/config_files/config_example_0.yml
@@ -42,9 +42,9 @@ classes:
 # The type of algorithms to run during the benchmark (monoview and/or multiview)
 type: ["monoview","multiview"]
 # The name of the monoview algorithms to run, ["all"] to run all the available classifiers
-algos_monoview: ["decision_tree"]
+algos_monoview: ["decision_tree", "adaboost"]
 # The names of the multiview algorithms to run, ["all"] to run all the available classifiers
-algos_multiview: ["weighted_linear_early_fusion", "weighted_linear_late_fusion",]
+algos_multiview: ["early_fusion_decision_tree", "early_fusion_adaboost", "weighted_linear_late_fusion",]
 # The number of times the benchamrk is repeated with different train/test
 # split, to have more statistically significant results
 stats_iter: 1

--- a/summit/multiview_platform/monoview_classifiers/sgd.py
+++ b/summit/multiview_platform/monoview_classifiers/sgd.py
@@ -23,8 +23,8 @@ class SGD(SGDClassifier, BaseMonoviewClassifier):
                               loss=loss,
                               penalty=penalty,
                               alpha=alpha,
-                               max_iter=5,
-                               tol=None,
+                               max_iter=max_iter,
+                               tol=tol,
                               random_state=random_state
                               )
        self.param_names = ["loss", "penalty", "alpha", "random_state"]

--- a/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py
+++ b/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py
+import numpy as np
+
+from ... import monoview_classifiers
+from ...multiview.multiview_utils import get_available_monoview_classifiers, \
+    BaseMultiviewClassifier, ConfigGenerator
+from ...utils.dataset import get_samples_views_indices
+from ...utils.multiclass import get_mc_estim, MultiClassWrapper
+
+# from ..utils.dataset import get_v
+
+classifier_class_name = "WeightedLinearEarlyFusion"
+
+
+class BaseEarlyFusion(BaseMultiviewClassifier):
+
+    def __init__(self, monoview_classifier="decision_tree", random_state=None,
+                 **kwargs):
+        BaseMultiviewClassifier.__init__(self, random_state=random_state)
+        monoview_classifier_module = getattr(monoview_classifiers, monoview_classifier)
+        monoview_classifier_class = getattr(monoview_classifier_module, monoview_classifier_module.classifier_class_name)
+        self.monoview_classifier = monoview_classifier_class(**kwargs)
+
+    def set_params(self, **params):
+        self.monoview_classifier.set_params(**params)
+        return self
+
+    def get_params(self, deep=True):
+        monoview_params = self.monoview_classifier.get_params(deep=deep)
+        monoview_params["random_state"] = self.random_state
+        return monoview_params
+
+    def fit(self, X, y, train_indices=None, view_indices=None):
+        train_indices, X = self.transform_data_to_monoview(X, train_indices,
+                                                           view_indices)
+        self.used_views = view_indices
+        if np.unique(y[train_indices]).shape[0] > 2 and \
+                not (isinstance(self.monoview_classifier, MultiClassWrapper)):
+            self.monoview_classifier = get_mc_estim(self.monoview_classifier,
+                                                    self.random_state,
+                                                    multiview=False,
+                                                    y=y[train_indices])
+        self.monoview_classifier.fit(X, y[train_indices])
+        return self
+
+    def predict(self, X, sample_indices=None, view_indices=None):
+        _, X = self.transform_data_to_monoview(X, sample_indices, view_indices)
+        self._check_views(self.view_indices)
+        predicted_labels = self.monoview_classifier.predict(X)
+        return predicted_labels
+
+    def transform_data_to_monoview(self, dataset, sample_indices,
+                                   view_indices):
+        """Here, we extract the data from the HDF5 dataset file and store all
+        the concatenated views in one variable"""
+        sample_indices, self.view_indices = get_samples_views_indices(dataset,
+                                                                      sample_indices,
+                                                                      view_indices)
+
+        X = self.hdf5_to_monoview(dataset, sample_indices)
+        return sample_indices, X
+
+    def hdf5_to_monoview(self, dataset, samples):
+        """Here, we concatenate the views for the asked samples """
+        monoview_data = np.concatenate(
+            [dataset.get_v(view_idx, samples)
+             for index, view_idx
+             in enumerate(self.view_indices)], axis=1)
+        return monoview_data
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py
+++ b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py
+from .additions.early_fusion_from_monoview import BaseEarlyFusion
+from ..utils.hyper_parameter_search import CustomRandint
+from ..utils.base import base_boosting_estimators
+
+# from ..utils.dataset import get_v
+
+classifier_class_name = "EarlyFusionAdaboost"
+
+
+class EarlyFusionAdaboost(BaseEarlyFusion):
+
+    def __init__(self, random_state=None, n_estimators=50,
+                 base_estimator=None, base_estimator_config=None, **kwargs):
+        BaseEarlyFusion.__init__(self, random_state=random_state,
+                                 monoview_classifier="adaboost",
+                                 n_estimators= n_estimators,
+                                 base_estimator=base_estimator,
+                                 base_estimator_config=base_estimator_config, **kwargs)
+        self.param_names = ["n_estimators", "base_estimator"]
+        self.classed_params = ["base_estimator"]
+        self.distribs = [CustomRandint(low=1, high=500),
+                         base_boosting_estimators]
+        self.weird_strings = {"base_estimator": "class_name"}
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py
+++ b/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py
+from .additions.early_fusion_from_monoview import BaseEarlyFusion
+from ..utils.hyper_parameter_search import CustomRandint
+
+# from ..utils.dataset import get_v
+
+classifier_class_name = "EarlyFusionDT"
+
+
+class EarlyFusionDT(BaseEarlyFusion):
+
+    def __init__(self, random_state=None, max_depth=None,
+                 criterion='gini', splitter='best', **kwargs):
+        BaseEarlyFusion.__init__(self, random_state=random_state,
+                                 monoview_classifier="decision_tree", max_depth=max_depth,
+                                 criterion=criterion, splitter=splitter, **kwargs)
+        self.param_names = ["max_depth", "criterion", "splitter",
+                            'random_state']
+        self.classed_params = []
+        self.distribs = [CustomRandint(low=1, high=300),
+                         ["gini", "entropy"],
+                         ["best", "random"], [random_state]]
+        self.weird_strings = {}
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py
+++ b/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py
+from .additions.early_fusion_from_monoview import BaseEarlyFusion
+from ..utils.hyper_parameter_search import CustomRandint
+from ..monoview_classifiers.gradient_boosting import CustomDecisionTreeGB
+
+classifier_class_name = "EarlyFusionGB"
+
+
+class EarlyFusionGB(BaseEarlyFusion):
+
+    def __init__(self, random_state=None, loss="exponential", max_depth=1.0,
+                 n_estimators=100,
+                 init=CustomDecisionTreeGB(max_depth=1),
+                 **kwargs):
+        BaseEarlyFusion.__init__(self, random_state=random_state,
+                                 monoview_classifier="gradient_boosting",
+                                 loss=loss, max_depth=max_depth,
+                                 n_estimators=n_estimators, init=init, **kwargs)
+        self.param_names = ["n_estimators", "max_depth"]
+        self.classed_params = []
+        self.distribs = [CustomRandint(low=50, high=500),
+                         CustomRandint(low=1, high=10), ]
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py
+++ b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py
+from .additions.early_fusion_from_monoview import BaseEarlyFusion
+from ..utils.hyper_parameter_search import CustomUniform, CustomRandint
+
+classifier_class_name = "EarlyFusionLasso"
+
+
+class EarlyFusionLasso(BaseEarlyFusion):
+
+    def __init__(self, random_state=None, alpha=1.0,
+                 max_iter=10, warm_start=False, **kwargs):
+        BaseEarlyFusion.__init__(self, random_state=None, alpha=alpha,
+                                 max_iter=max_iter,
+                                 warm_start=warm_start, **kwargs)
+        self.param_names = ["max_iter", "alpha", "random_state"]
+        self.classed_params = []
+        self.distribs = [CustomRandint(low=1, high=30--0),
+                         CustomUniform(), [random_state]]
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py
+++ b/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py
+import numpy as np
+
+from .additions.early_fusion_from_monoview import BaseEarlyFusion
+from ..utils.hyper_parameter_search import CustomRandint
+
+classifier_class_name = "EarlyFusionRF"
+
+
+class EarlyFusionRF(BaseEarlyFusion):
+
+    def __init__(self, random_state=None, n_estimators=10,
+                 max_depth=None, criterion='gini', **kwargs):
+        BaseEarlyFusion.__init__(self, random_state=random_state,
+                                 monoview_classifier="random_forest",
+                                 n_estimators=n_estimators, max_depth=max_depth,
+                                 criterion=criterion, **kwargs)
+        self.param_names = ["n_estimators", "max_depth", "criterion",
+                            "random_state"]
+        self.classed_params = []
+        self.distribs = [CustomRandint(low=1, high=300),
+                         CustomRandint(low=1, high=10),
+                         ["gini", "entropy"], [random_state]]
+        self.weird_strings = {}
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py
+++ b/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py
+from .additions.early_fusion_from_monoview import BaseEarlyFusion
+from ..utils.hyper_parameter_search import CustomUniform
+
+classifier_class_name = "EarlyFusionSGD"
+
+
+class EarlyFusionSGD(BaseEarlyFusion):
+
+    def __init__(self, random_state=None, loss='hinge',
+                 penalty='l2', alpha=0.0001, max_iter=5, tol=None, **kwargs):
+        BaseEarlyFusion.__init__(self, random_state=random_state,
+                                 monoview_classifier="sgd", loss=loss,
+                 penalty=penalty, alpha=alpha, max_iter=max_iter, tol=tol, **kwargs)
+        self.param_names = ["loss", "penalty", "alpha", "random_state"]
+        self.classed_params = []
+        self.distribs = [['log', 'modified_huber'],
+                         ["l1", "l2", "elasticnet"],
+                         CustomUniform(loc=0, state=1), [random_state]]
+        self.weird_strings = {}
\ No newline at end of file
--- a/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py
+++ b/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py
+from .additions.early_fusion_from_monoview import BaseEarlyFusion
+from ..utils.hyper_parameter_search import CustomUniform
+
+classifier_class_name = "EarlyFusionSVMRBF"
+
+
+class EarlyFusionSVMRBF(BaseEarlyFusion):
+
+    def __init__(self, random_state=None, C=1.0, **kwargs):
+        BaseEarlyFusion.__init__(self, random_state=random_state,
+                                 monoview_classifier="svm_rbf", C=C, **kwargs)
+        self.param_names = ["C", "random_state"]
+        self.distribs = [CustomUniform(loc=0, state=1), [random_state]]
\ No newline at end of file