From b28de9bc7beceaf145e9722a51ce8164dbc81926 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Wed, 24 Mar 2021 11:08:21 -0400 Subject: [PATCH] Added multvew algorithms --- config_files/config_test.yml | 4 +- copyright.py | 106 ++++++++++++++++++ copyrightstamp.txt | 36 ++++++ license.txt | 30 +++++ .../config_files/config_example_0.yml | 4 +- .../monoview_classifiers/sgd.py | 4 +- .../additions/early_fusion_from_monoview.py | 68 +++++++++++ .../early_fusion_adaboost.py | 23 ++++ .../early_fusion_decision_tree.py | 22 ++++ .../early_fusion_gradient_boosting.py | 21 ++++ .../early_fusion_lasso.py | 17 +++ .../early_fusion_random_forest.py | 23 ++++ .../multiview_classifiers/early_fusion_sgd.py | 19 ++++ .../early_fusion_svm_rbf.py | 13 +++ 14 files changed, 384 insertions(+), 6 deletions(-) create mode 100644 copyright.py create mode 100644 copyrightstamp.txt create mode 100644 license.txt create mode 100644 summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py create mode 100644 summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py create mode 100644 summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py create mode 100644 summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py create mode 100644 summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py create mode 100644 summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py create mode 100644 summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py create mode 100644 summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py diff --git a/config_files/config_test.yml b/config_files/config_test.yml index 478e0e08..7130ff97 100644 --- a/config_files/config_test.yml +++ b/config_files/config_test.yml @@ -21,9 +21,9 @@ split: 0.8 nb_folds: 2 nb_class: 3 classes: -type: [ "monoview"] +type: ["multiview"] algos_monoview: ["decision_tree", ] -algos_multiview: ["weighted_linear_late_fusion"] +algos_multiview: ["early_fusion_adaboost"] stats_iter: 3 metrics: accuracy_score: {} diff --git a/copyright.py b/copyright.py new file mode 100644 index 00000000..f471123b --- /dev/null +++ b/copyright.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function, division +import time +import os +import sys +import fileinput + + +def findFiles(directory, files=[]): + """scan a directory for py, pyx, pxd extension files.""" + for filename in os.listdir(directory): + path = os.path.join(directory, filename) + if os.path.isfile(path) and (path.endswith(".py") or + path.endswith(".pyx") or + path.endswith(".pxd")): + if filename != "__init__.py" and filename != "version.py": + files.append(path) + elif os.path.isdir(path): + findFiles(path, files) + return files + + +def fileUnStamping(filename): + """ Remove stamp from a file """ + is_stamp = False + for line in fileinput.input(filename, inplace=1): + if line.find("# COPYRIGHT #") != -1: + is_stamp = not is_stamp + elif not is_stamp: + print(line, end="") + + +def fileStamping(filename, stamp): + """ Write a stamp on a file + + WARNING : The stamping must be done on an default utf8 machine ! + """ + old_stamp = False # If a copyright already exist over write it. + for line in fileinput.input(filename, inplace=1): + if line.find("# COPYRIGHT #") != -1: + old_stamp = not old_stamp + elif line.startswith("# -*- coding: utf-8 -*-"): + print(line, end="") + print(stamp) + elif not old_stamp: + print(line, end="") + + +def getStamp(date, multimodal_version): + """ Return the corrected formated stamp """ + stamp = open("copyrightstamp.txt").read() + stamp = stamp.replace("DATE", date) + stamp = stamp.replace("MULTIMODAL_VERSION", multimodal_version) + stamp = stamp.replace('\n', '\n# ') + stamp = "# " + stamp + stamp = stamp.replace("# \n", "#\n") + return stamp.strip() + + +def getVersionsAndDate(): + """ Return (date, multimodal_version.. + ) """ + v_text = open('VERSION').read().strip() + v_text_formted = '{"' + v_text.replace('\n', '","').replace(':', '":"') + v_text_formted += '"}' + v_dict = eval(v_text_formted) + return (time.strftime("%Y"), v_dict['multimodal']) + + +def writeStamp(): + """ Write a copyright stamp on all files """ + stamp = getStamp(*getVersionsAndDate()) + files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)), + "multimodal")) + for filename in files: + fileStamping(filename, stamp) + fileStamping("setup.py", stamp) + + +def eraseStamp(): + """ Erase a copyright stamp from all files """ + files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)), + "multimodal")) + for filename in files: + fileUnStamping(filename) + fileUnStamping("setup.py") + + +def usage(arg): + print("Usage :") + print("\tpython %s stamping" % arg) + print("\tpython %s unstamping" % arg) + + +if __name__ == "__main__": + if len(sys.argv) == 1: + usage(sys.argv[0]) + elif len(sys.argv) == 2: + if sys.argv[1].startswith("unstamping"): + eraseStamp() + elif sys.argv[1].startswith("stamping"): + writeStamp() + else: + usage(sys.argv[0]) + else: + usage(sys.argv[0]) diff --git a/copyrightstamp.txt b/copyrightstamp.txt new file mode 100644 index 00000000..872fce0e --- /dev/null +++ b/copyrightstamp.txt @@ -0,0 +1,36 @@ +######### COPYRIGHT ######### + +Copyright(c) DATE +----------------- + + +* Université d'Aix Marseille (AMU) - +* Centre National de la Recherche Scientifique (CNRS) - +* Université de Toulon (UTLN). +* Copyright © 2019-2020 AMU, CNRS, UTLN + +Contributors: +------------ + +* Sokol Koço <sokol.koco_AT_lis-lab.fr> +* Cécile Capponi <cecile.capponi_AT_univ-amu.fr> +* Dominique Benielli <dominique.benielli_AT_univ-amu.fr> +* Baptiste Bauvin <baptiste.bauvin_AT_univ-amu.fr> + +Description: +----------- + + + +Version: +------- + +* multiview_generator version = MULTIMODAL_VERSION + +Licence: +------- + +License: New BSD License + + +######### COPYRIGHT ######### diff --git a/license.txt b/license.txt new file mode 100644 index 00000000..573f99a8 --- /dev/null +++ b/license.txt @@ -0,0 +1,30 @@ +New BSD License + +Copyright (c) 2020-15-01, The scikit-multimodallearn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of the IntertwiningWavelet developers nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. \ No newline at end of file diff --git a/summit/examples/config_files/config_example_0.yml b/summit/examples/config_files/config_example_0.yml index 6e0e029f..753e5c07 100644 --- a/summit/examples/config_files/config_example_0.yml +++ b/summit/examples/config_files/config_example_0.yml @@ -42,9 +42,9 @@ classes: # The type of algorithms to run during the benchmark (monoview and/or multiview) type: ["monoview","multiview"] # The name of the monoview algorithms to run, ["all"] to run all the available classifiers -algos_monoview: ["decision_tree"] +algos_monoview: ["decision_tree", "adaboost"] # The names of the multiview algorithms to run, ["all"] to run all the available classifiers -algos_multiview: ["weighted_linear_early_fusion", "weighted_linear_late_fusion",] +algos_multiview: ["early_fusion_decision_tree", "early_fusion_adaboost", "weighted_linear_late_fusion",] # The number of times the benchamrk is repeated with different train/test # split, to have more statistically significant results stats_iter: 1 diff --git a/summit/multiview_platform/monoview_classifiers/sgd.py b/summit/multiview_platform/monoview_classifiers/sgd.py index 1b1d3375..312a2bf2 100644 --- a/summit/multiview_platform/monoview_classifiers/sgd.py +++ b/summit/multiview_platform/monoview_classifiers/sgd.py @@ -23,8 +23,8 @@ class SGD(SGDClassifier, BaseMonoviewClassifier): loss=loss, penalty=penalty, alpha=alpha, - max_iter=5, - tol=None, + max_iter=max_iter, + tol=tol, random_state=random_state ) self.param_names = ["loss", "penalty", "alpha", "random_state"] diff --git a/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py b/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py new file mode 100644 index 00000000..582466b0 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py @@ -0,0 +1,68 @@ +import numpy as np + +from ... import monoview_classifiers +from ...multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from ...utils.dataset import get_samples_views_indices +from ...utils.multiclass import get_mc_estim, MultiClassWrapper + +# from ..utils.dataset import get_v + +classifier_class_name = "WeightedLinearEarlyFusion" + + +class BaseEarlyFusion(BaseMultiviewClassifier): + + def __init__(self, monoview_classifier="decision_tree", random_state=None, + **kwargs): + BaseMultiviewClassifier.__init__(self, random_state=random_state) + monoview_classifier_module = getattr(monoview_classifiers, monoview_classifier) + monoview_classifier_class = getattr(monoview_classifier_module, monoview_classifier_module.classifier_class_name) + self.monoview_classifier = monoview_classifier_class(**kwargs) + + def set_params(self, **params): + self.monoview_classifier.set_params(**params) + return self + + def get_params(self, deep=True): + monoview_params = self.monoview_classifier.get_params(deep=deep) + monoview_params["random_state"] = self.random_state + return monoview_params + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + self.used_views = view_indices + if np.unique(y[train_indices]).shape[0] > 2 and \ + not (isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + self.random_state, + multiview=False, + y=y[train_indices]) + self.monoview_classifier.fit(X, y[train_indices]) + return self + + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) + self._check_views(self.view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, sample_indices, + view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) + + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X + + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx, samples) + for index, view_idx + in enumerate(self.view_indices)], axis=1) + return monoview_data \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py new file mode 100644 index 00000000..c35355f9 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py @@ -0,0 +1,23 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint +from ..utils.base import base_boosting_estimators + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionAdaboost" + + +class EarlyFusionAdaboost(BaseEarlyFusion): + + def __init__(self, random_state=None, n_estimators=50, + base_estimator=None, base_estimator_config=None, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="adaboost", + n_estimators= n_estimators, + base_estimator=base_estimator, + base_estimator_config=base_estimator_config, **kwargs) + self.param_names = ["n_estimators", "base_estimator"] + self.classed_params = ["base_estimator"] + self.distribs = [CustomRandint(low=1, high=500), + base_boosting_estimators] + self.weird_strings = {"base_estimator": "class_name"} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py b/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py new file mode 100644 index 00000000..fc3e672b --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py @@ -0,0 +1,22 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionDT" + + +class EarlyFusionDT(BaseEarlyFusion): + + def __init__(self, random_state=None, max_depth=None, + criterion='gini', splitter='best', **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="decision_tree", max_depth=max_depth, + criterion=criterion, splitter=splitter, **kwargs) + self.param_names = ["max_depth", "criterion", "splitter", + 'random_state'] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), + ["gini", "entropy"], + ["best", "random"], [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py b/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py new file mode 100644 index 00000000..f87d3360 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py @@ -0,0 +1,21 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint +from ..monoview_classifiers.gradient_boosting import CustomDecisionTreeGB + +classifier_class_name = "EarlyFusionGB" + + +class EarlyFusionGB(BaseEarlyFusion): + + def __init__(self, random_state=None, loss="exponential", max_depth=1.0, + n_estimators=100, + init=CustomDecisionTreeGB(max_depth=1), + **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="gradient_boosting", + loss=loss, max_depth=max_depth, + n_estimators=n_estimators, init=init, **kwargs) + self.param_names = ["n_estimators", "max_depth"] + self.classed_params = [] + self.distribs = [CustomRandint(low=50, high=500), + CustomRandint(low=1, high=10), ] \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py new file mode 100644 index 00000000..828b7155 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py @@ -0,0 +1,17 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform, CustomRandint + +classifier_class_name = "EarlyFusionLasso" + + +class EarlyFusionLasso(BaseEarlyFusion): + + def __init__(self, random_state=None, alpha=1.0, + max_iter=10, warm_start=False, **kwargs): + BaseEarlyFusion.__init__(self, random_state=None, alpha=alpha, + max_iter=max_iter, + warm_start=warm_start, **kwargs) + self.param_names = ["max_iter", "alpha", "random_state"] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=30--0), + CustomUniform(), [random_state]] \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py b/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py new file mode 100644 index 00000000..1a01aa31 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py @@ -0,0 +1,23 @@ +import numpy as np + +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint + +classifier_class_name = "EarlyFusionRF" + + +class EarlyFusionRF(BaseEarlyFusion): + + def __init__(self, random_state=None, n_estimators=10, + max_depth=None, criterion='gini', **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="random_forest", + n_estimators=n_estimators, max_depth=max_depth, + criterion=criterion, **kwargs) + self.param_names = ["n_estimators", "max_depth", "criterion", + "random_state"] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), + CustomRandint(low=1, high=10), + ["gini", "entropy"], [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py b/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py new file mode 100644 index 00000000..580b4662 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py @@ -0,0 +1,19 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform + +classifier_class_name = "EarlyFusionSGD" + + +class EarlyFusionSGD(BaseEarlyFusion): + + def __init__(self, random_state=None, loss='hinge', + penalty='l2', alpha=0.0001, max_iter=5, tol=None, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="sgd", loss=loss, + penalty=penalty, alpha=alpha, max_iter=max_iter, tol=tol, **kwargs) + self.param_names = ["loss", "penalty", "alpha", "random_state"] + self.classed_params = [] + self.distribs = [['log', 'modified_huber'], + ["l1", "l2", "elasticnet"], + CustomUniform(loc=0, state=1), [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py b/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py new file mode 100644 index 00000000..6d427c98 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py @@ -0,0 +1,13 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform + +classifier_class_name = "EarlyFusionSVMRBF" + + +class EarlyFusionSVMRBF(BaseEarlyFusion): + + def __init__(self, random_state=None, C=1.0, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="svm_rbf", C=C, **kwargs) + self.param_names = ["C", "random_state"] + self.distribs = [CustomUniform(loc=0, state=1), [random_state]] \ No newline at end of file -- GitLab