diff --git a/config_files/config_test.yml b/config_files/config_test.yml index 478e0e08c5f343e9a929d8a3f9c5c0501ce73041..7130ff977aad13bf9f9ee3c34133ccaef0b58df4 100644 --- a/config_files/config_test.yml +++ b/config_files/config_test.yml @@ -21,9 +21,9 @@ split: 0.8 nb_folds: 2 nb_class: 3 classes: -type: [ "monoview"] +type: ["multiview"] algos_monoview: ["decision_tree", ] -algos_multiview: ["weighted_linear_late_fusion"] +algos_multiview: ["early_fusion_adaboost"] stats_iter: 3 metrics: accuracy_score: {} diff --git a/copyright.py b/copyright.py new file mode 100644 index 0000000000000000000000000000000000000000..f471123bb34c1224915666262dce11a8fdef2057 --- /dev/null +++ b/copyright.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function, division +import time +import os +import sys +import fileinput + + +def findFiles(directory, files=[]): + """scan a directory for py, pyx, pxd extension files.""" + for filename in os.listdir(directory): + path = os.path.join(directory, filename) + if os.path.isfile(path) and (path.endswith(".py") or + path.endswith(".pyx") or + path.endswith(".pxd")): + if filename != "__init__.py" and filename != "version.py": + files.append(path) + elif os.path.isdir(path): + findFiles(path, files) + return files + + +def fileUnStamping(filename): + """ Remove stamp from a file """ + is_stamp = False + for line in fileinput.input(filename, inplace=1): + if line.find("# COPYRIGHT #") != -1: + is_stamp = not is_stamp + elif not is_stamp: + print(line, end="") + + +def fileStamping(filename, stamp): + """ Write a stamp on a file + + WARNING : The stamping must be done on an default utf8 machine ! + """ + old_stamp = False # If a copyright already exist over write it. + for line in fileinput.input(filename, inplace=1): + if line.find("# COPYRIGHT #") != -1: + old_stamp = not old_stamp + elif line.startswith("# -*- coding: utf-8 -*-"): + print(line, end="") + print(stamp) + elif not old_stamp: + print(line, end="") + + +def getStamp(date, multimodal_version): + """ Return the corrected formated stamp """ + stamp = open("copyrightstamp.txt").read() + stamp = stamp.replace("DATE", date) + stamp = stamp.replace("MULTIMODAL_VERSION", multimodal_version) + stamp = stamp.replace('\n', '\n# ') + stamp = "# " + stamp + stamp = stamp.replace("# \n", "#\n") + return stamp.strip() + + +def getVersionsAndDate(): + """ Return (date, multimodal_version.. + ) """ + v_text = open('VERSION').read().strip() + v_text_formted = '{"' + v_text.replace('\n', '","').replace(':', '":"') + v_text_formted += '"}' + v_dict = eval(v_text_formted) + return (time.strftime("%Y"), v_dict['multimodal']) + + +def writeStamp(): + """ Write a copyright stamp on all files """ + stamp = getStamp(*getVersionsAndDate()) + files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)), + "multimodal")) + for filename in files: + fileStamping(filename, stamp) + fileStamping("setup.py", stamp) + + +def eraseStamp(): + """ Erase a copyright stamp from all files """ + files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)), + "multimodal")) + for filename in files: + fileUnStamping(filename) + fileUnStamping("setup.py") + + +def usage(arg): + print("Usage :") + print("\tpython %s stamping" % arg) + print("\tpython %s unstamping" % arg) + + +if __name__ == "__main__": + if len(sys.argv) == 1: + usage(sys.argv[0]) + elif len(sys.argv) == 2: + if sys.argv[1].startswith("unstamping"): + eraseStamp() + elif sys.argv[1].startswith("stamping"): + writeStamp() + else: + usage(sys.argv[0]) + else: + usage(sys.argv[0]) diff --git a/copyrightstamp.txt b/copyrightstamp.txt new file mode 100644 index 0000000000000000000000000000000000000000..872fce0ebc426b0aad2e9b220791edd90e5db042 --- /dev/null +++ b/copyrightstamp.txt @@ -0,0 +1,36 @@ +######### COPYRIGHT ######### + +Copyright(c) DATE +----------------- + + +* Université d'Aix Marseille (AMU) - +* Centre National de la Recherche Scientifique (CNRS) - +* Université de Toulon (UTLN). +* Copyright © 2019-2020 AMU, CNRS, UTLN + +Contributors: +------------ + +* Sokol Koço <sokol.koco_AT_lis-lab.fr> +* Cécile Capponi <cecile.capponi_AT_univ-amu.fr> +* Dominique Benielli <dominique.benielli_AT_univ-amu.fr> +* Baptiste Bauvin <baptiste.bauvin_AT_univ-amu.fr> + +Description: +----------- + + + +Version: +------- + +* multiview_generator version = MULTIMODAL_VERSION + +Licence: +------- + +License: New BSD License + + +######### COPYRIGHT ######### diff --git a/license.txt b/license.txt new file mode 100644 index 0000000000000000000000000000000000000000..573f99a8258c728e1aa5bd241ec6941ed7a0d7c1 --- /dev/null +++ b/license.txt @@ -0,0 +1,30 @@ +New BSD License + +Copyright (c) 2020-15-01, The scikit-multimodallearn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of the IntertwiningWavelet developers nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. \ No newline at end of file diff --git a/summit/examples/config_files/config_example_0.yml b/summit/examples/config_files/config_example_0.yml index 6e0e029f34fb01cffe3d17b138384fed8bdfbaac..753e5c077579d5bc8f7d6f8641ba9e148e860038 100644 --- a/summit/examples/config_files/config_example_0.yml +++ b/summit/examples/config_files/config_example_0.yml @@ -42,9 +42,9 @@ classes: # The type of algorithms to run during the benchmark (monoview and/or multiview) type: ["monoview","multiview"] # The name of the monoview algorithms to run, ["all"] to run all the available classifiers -algos_monoview: ["decision_tree"] +algos_monoview: ["decision_tree", "adaboost"] # The names of the multiview algorithms to run, ["all"] to run all the available classifiers -algos_multiview: ["weighted_linear_early_fusion", "weighted_linear_late_fusion",] +algos_multiview: ["early_fusion_decision_tree", "early_fusion_adaboost", "weighted_linear_late_fusion",] # The number of times the benchamrk is repeated with different train/test # split, to have more statistically significant results stats_iter: 1 diff --git a/summit/multiview_platform/monoview_classifiers/sgd.py b/summit/multiview_platform/monoview_classifiers/sgd.py index 1b1d3375c39527152c767d7a9f86f0e0c0611b00..312a2bf26b91afb2d857716837c2eddf99883e9d 100644 --- a/summit/multiview_platform/monoview_classifiers/sgd.py +++ b/summit/multiview_platform/monoview_classifiers/sgd.py @@ -23,8 +23,8 @@ class SGD(SGDClassifier, BaseMonoviewClassifier): loss=loss, penalty=penalty, alpha=alpha, - max_iter=5, - tol=None, + max_iter=max_iter, + tol=tol, random_state=random_state ) self.param_names = ["loss", "penalty", "alpha", "random_state"] diff --git a/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py b/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py new file mode 100644 index 0000000000000000000000000000000000000000..582466b018d351cd3ed88dce8884972bff216e85 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py @@ -0,0 +1,68 @@ +import numpy as np + +from ... import monoview_classifiers +from ...multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from ...utils.dataset import get_samples_views_indices +from ...utils.multiclass import get_mc_estim, MultiClassWrapper + +# from ..utils.dataset import get_v + +classifier_class_name = "WeightedLinearEarlyFusion" + + +class BaseEarlyFusion(BaseMultiviewClassifier): + + def __init__(self, monoview_classifier="decision_tree", random_state=None, + **kwargs): + BaseMultiviewClassifier.__init__(self, random_state=random_state) + monoview_classifier_module = getattr(monoview_classifiers, monoview_classifier) + monoview_classifier_class = getattr(monoview_classifier_module, monoview_classifier_module.classifier_class_name) + self.monoview_classifier = monoview_classifier_class(**kwargs) + + def set_params(self, **params): + self.monoview_classifier.set_params(**params) + return self + + def get_params(self, deep=True): + monoview_params = self.monoview_classifier.get_params(deep=deep) + monoview_params["random_state"] = self.random_state + return monoview_params + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + self.used_views = view_indices + if np.unique(y[train_indices]).shape[0] > 2 and \ + not (isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + self.random_state, + multiview=False, + y=y[train_indices]) + self.monoview_classifier.fit(X, y[train_indices]) + return self + + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) + self._check_views(self.view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, sample_indices, + view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) + + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X + + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx, samples) + for index, view_idx + in enumerate(self.view_indices)], axis=1) + return monoview_data \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py new file mode 100644 index 0000000000000000000000000000000000000000..c35355f98f53a3ce7f84c8809b1594de7b68cc37 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py @@ -0,0 +1,23 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint +from ..utils.base import base_boosting_estimators + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionAdaboost" + + +class EarlyFusionAdaboost(BaseEarlyFusion): + + def __init__(self, random_state=None, n_estimators=50, + base_estimator=None, base_estimator_config=None, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="adaboost", + n_estimators= n_estimators, + base_estimator=base_estimator, + base_estimator_config=base_estimator_config, **kwargs) + self.param_names = ["n_estimators", "base_estimator"] + self.classed_params = ["base_estimator"] + self.distribs = [CustomRandint(low=1, high=500), + base_boosting_estimators] + self.weird_strings = {"base_estimator": "class_name"} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py b/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py new file mode 100644 index 0000000000000000000000000000000000000000..fc3e672b494cd0dd8adcb39ef2d841751075d692 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py @@ -0,0 +1,22 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionDT" + + +class EarlyFusionDT(BaseEarlyFusion): + + def __init__(self, random_state=None, max_depth=None, + criterion='gini', splitter='best', **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="decision_tree", max_depth=max_depth, + criterion=criterion, splitter=splitter, **kwargs) + self.param_names = ["max_depth", "criterion", "splitter", + 'random_state'] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), + ["gini", "entropy"], + ["best", "random"], [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py b/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py new file mode 100644 index 0000000000000000000000000000000000000000..f87d336066f6f7a746cd8900204945ba8e8e09ee --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py @@ -0,0 +1,21 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint +from ..monoview_classifiers.gradient_boosting import CustomDecisionTreeGB + +classifier_class_name = "EarlyFusionGB" + + +class EarlyFusionGB(BaseEarlyFusion): + + def __init__(self, random_state=None, loss="exponential", max_depth=1.0, + n_estimators=100, + init=CustomDecisionTreeGB(max_depth=1), + **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="gradient_boosting", + loss=loss, max_depth=max_depth, + n_estimators=n_estimators, init=init, **kwargs) + self.param_names = ["n_estimators", "max_depth"] + self.classed_params = [] + self.distribs = [CustomRandint(low=50, high=500), + CustomRandint(low=1, high=10), ] \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py new file mode 100644 index 0000000000000000000000000000000000000000..828b7155f7d55cda2f24dc81c2377345699752bf --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py @@ -0,0 +1,17 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform, CustomRandint + +classifier_class_name = "EarlyFusionLasso" + + +class EarlyFusionLasso(BaseEarlyFusion): + + def __init__(self, random_state=None, alpha=1.0, + max_iter=10, warm_start=False, **kwargs): + BaseEarlyFusion.__init__(self, random_state=None, alpha=alpha, + max_iter=max_iter, + warm_start=warm_start, **kwargs) + self.param_names = ["max_iter", "alpha", "random_state"] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=30--0), + CustomUniform(), [random_state]] \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py b/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py new file mode 100644 index 0000000000000000000000000000000000000000..1a01aa314536d14f93dbb0c5b0a9627971c724db --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py @@ -0,0 +1,23 @@ +import numpy as np + +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint + +classifier_class_name = "EarlyFusionRF" + + +class EarlyFusionRF(BaseEarlyFusion): + + def __init__(self, random_state=None, n_estimators=10, + max_depth=None, criterion='gini', **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="random_forest", + n_estimators=n_estimators, max_depth=max_depth, + criterion=criterion, **kwargs) + self.param_names = ["n_estimators", "max_depth", "criterion", + "random_state"] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), + CustomRandint(low=1, high=10), + ["gini", "entropy"], [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py b/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py new file mode 100644 index 0000000000000000000000000000000000000000..580b46628e39dd4790fafda205704f20a0df4812 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py @@ -0,0 +1,19 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform + +classifier_class_name = "EarlyFusionSGD" + + +class EarlyFusionSGD(BaseEarlyFusion): + + def __init__(self, random_state=None, loss='hinge', + penalty='l2', alpha=0.0001, max_iter=5, tol=None, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="sgd", loss=loss, + penalty=penalty, alpha=alpha, max_iter=max_iter, tol=tol, **kwargs) + self.param_names = ["loss", "penalty", "alpha", "random_state"] + self.classed_params = [] + self.distribs = [['log', 'modified_huber'], + ["l1", "l2", "elasticnet"], + CustomUniform(loc=0, state=1), [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py b/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py new file mode 100644 index 0000000000000000000000000000000000000000..6d427c9838aa172537d84b31a180d195e6e63f40 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py @@ -0,0 +1,13 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform + +classifier_class_name = "EarlyFusionSVMRBF" + + +class EarlyFusionSVMRBF(BaseEarlyFusion): + + def __init__(self, random_state=None, C=1.0, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="svm_rbf", C=C, **kwargs) + self.param_names = ["C", "random_state"] + self.distribs = [CustomUniform(loc=0, state=1), [random_state]] \ No newline at end of file