diff --git a/config_files/config_test.yml b/config_files/config_test.yml index 36db7863899c2b2debec4cbff8b56e849273d084..af27496b79071a59abdbe0dc7df450d8a35b07e5 100644 --- a/config_files/config_test.yml +++ b/config_files/config_test.yml @@ -21,9 +21,9 @@ split: 0.8 nb_folds: 2 nb_class: 3 classes: -type: [ "monoview"] -algos_monoview: ["decision_tree", "adaboost" ] -algos_multiview: ["weighted_linear_late_fusion"] +type: ["multiview"] +algos_monoview: ["decision_tree", ] +algos_multiview: ["early_fusion_adaboost"] stats_iter: 3 metrics: accuracy_score: {} diff --git a/copyright.py b/copyright.py new file mode 100644 index 0000000000000000000000000000000000000000..f471123bb34c1224915666262dce11a8fdef2057 --- /dev/null +++ b/copyright.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function, division +import time +import os +import sys +import fileinput + + +def findFiles(directory, files=[]): + """scan a directory for py, pyx, pxd extension files.""" + for filename in os.listdir(directory): + path = os.path.join(directory, filename) + if os.path.isfile(path) and (path.endswith(".py") or + path.endswith(".pyx") or + path.endswith(".pxd")): + if filename != "__init__.py" and filename != "version.py": + files.append(path) + elif os.path.isdir(path): + findFiles(path, files) + return files + + +def fileUnStamping(filename): + """ Remove stamp from a file """ + is_stamp = False + for line in fileinput.input(filename, inplace=1): + if line.find("# COPYRIGHT #") != -1: + is_stamp = not is_stamp + elif not is_stamp: + print(line, end="") + + +def fileStamping(filename, stamp): + """ Write a stamp on a file + + WARNING : The stamping must be done on an default utf8 machine ! + """ + old_stamp = False # If a copyright already exist over write it. + for line in fileinput.input(filename, inplace=1): + if line.find("# COPYRIGHT #") != -1: + old_stamp = not old_stamp + elif line.startswith("# -*- coding: utf-8 -*-"): + print(line, end="") + print(stamp) + elif not old_stamp: + print(line, end="") + + +def getStamp(date, multimodal_version): + """ Return the corrected formated stamp """ + stamp = open("copyrightstamp.txt").read() + stamp = stamp.replace("DATE", date) + stamp = stamp.replace("MULTIMODAL_VERSION", multimodal_version) + stamp = stamp.replace('\n', '\n# ') + stamp = "# " + stamp + stamp = stamp.replace("# \n", "#\n") + return stamp.strip() + + +def getVersionsAndDate(): + """ Return (date, multimodal_version.. + ) """ + v_text = open('VERSION').read().strip() + v_text_formted = '{"' + v_text.replace('\n', '","').replace(':', '":"') + v_text_formted += '"}' + v_dict = eval(v_text_formted) + return (time.strftime("%Y"), v_dict['multimodal']) + + +def writeStamp(): + """ Write a copyright stamp on all files """ + stamp = getStamp(*getVersionsAndDate()) + files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)), + "multimodal")) + for filename in files: + fileStamping(filename, stamp) + fileStamping("setup.py", stamp) + + +def eraseStamp(): + """ Erase a copyright stamp from all files """ + files = findFiles(os.path.join(os.path.dirname(os.path.abspath(__file__)), + "multimodal")) + for filename in files: + fileUnStamping(filename) + fileUnStamping("setup.py") + + +def usage(arg): + print("Usage :") + print("\tpython %s stamping" % arg) + print("\tpython %s unstamping" % arg) + + +if __name__ == "__main__": + if len(sys.argv) == 1: + usage(sys.argv[0]) + elif len(sys.argv) == 2: + if sys.argv[1].startswith("unstamping"): + eraseStamp() + elif sys.argv[1].startswith("stamping"): + writeStamp() + else: + usage(sys.argv[0]) + else: + usage(sys.argv[0]) diff --git a/copyrightstamp.txt b/copyrightstamp.txt new file mode 100644 index 0000000000000000000000000000000000000000..872fce0ebc426b0aad2e9b220791edd90e5db042 --- /dev/null +++ b/copyrightstamp.txt @@ -0,0 +1,36 @@ +######### COPYRIGHT ######### + +Copyright(c) DATE +----------------- + + +* Université d'Aix Marseille (AMU) - +* Centre National de la Recherche Scientifique (CNRS) - +* Université de Toulon (UTLN). +* Copyright © 2019-2020 AMU, CNRS, UTLN + +Contributors: +------------ + +* Sokol Koço <sokol.koco_AT_lis-lab.fr> +* Cécile Capponi <cecile.capponi_AT_univ-amu.fr> +* Dominique Benielli <dominique.benielli_AT_univ-amu.fr> +* Baptiste Bauvin <baptiste.bauvin_AT_univ-amu.fr> + +Description: +----------- + + + +Version: +------- + +* multiview_generator version = MULTIMODAL_VERSION + +Licence: +------- + +License: New BSD License + + +######### COPYRIGHT ######### diff --git a/license.txt b/license.txt new file mode 100644 index 0000000000000000000000000000000000000000..573f99a8258c728e1aa5bd241ec6941ed7a0d7c1 --- /dev/null +++ b/license.txt @@ -0,0 +1,30 @@ +New BSD License + +Copyright (c) 2020-15-01, The scikit-multimodallearn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of the IntertwiningWavelet developers nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. \ No newline at end of file diff --git a/summit/examples/config_files/config_example_0.yml b/summit/examples/config_files/config_example_0.yml index 6e0e029f34fb01cffe3d17b138384fed8bdfbaac..753e5c077579d5bc8f7d6f8641ba9e148e860038 100644 --- a/summit/examples/config_files/config_example_0.yml +++ b/summit/examples/config_files/config_example_0.yml @@ -42,9 +42,9 @@ classes: # The type of algorithms to run during the benchmark (monoview and/or multiview) type: ["monoview","multiview"] # The name of the monoview algorithms to run, ["all"] to run all the available classifiers -algos_monoview: ["decision_tree"] +algos_monoview: ["decision_tree", "adaboost"] # The names of the multiview algorithms to run, ["all"] to run all the available classifiers -algos_multiview: ["weighted_linear_early_fusion", "weighted_linear_late_fusion",] +algos_multiview: ["early_fusion_decision_tree", "early_fusion_adaboost", "weighted_linear_late_fusion",] # The number of times the benchamrk is repeated with different train/test # split, to have more statistically significant results stats_iter: 1 diff --git a/summit/multiview_platform/exec_classif.py b/summit/multiview_platform/exec_classif.py index 0b92a6f10fbe834b0646489c56f8b53b5108bcfc..03ef02a44f3a1a40cf60f2ef835e12b467eb7a97 100644 --- a/summit/multiview_platform/exec_classif.py +++ b/summit/multiview_platform/exec_classif.py @@ -203,6 +203,7 @@ def get_random_hps_args(hps_args, classifier_name): hps_dict["param_distributions"] = value return hps_dict + def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class, view_index, view_name, hps_kwargs): if classifier_name in arguments: @@ -430,116 +431,6 @@ def benchmark_init(directory, classification_indices, labels, labels_dictionary, return results_monoview, labels_names -# def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None, -# classification_indices=None, args=None, -# k_folds=None, random_state=None, hyper_param_search=None, -# metrics=None, argument_dictionaries=None, -# benchmark=None, views=None, views_indices=None, flag=None, -# labels=None, -# exec_monoview_multicore=exec_monoview_multicore, -# exec_multiview_multicore=exec_multiview_multicore,): -# """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and -# exec_multiview_multicore args are only used for tests""" -# -# results_monoview, labels_names = benchmark_init(directory, -# classification_indices, labels, -# labels_dictionary, k_folds) -# -# logging.debug("Start:\t monoview benchmark") -# results_monoview += [ -# exec_monoview_multicore(directory, args["name"], labels_names, -# classification_indices, k_folds, -# core_index, args["file_type"], args["pathf"], random_state, -# labels, -# hyper_param_search=hyper_param_search, -# metrics=metrics, -# n_iter=args["hps_iter"], **argument) -# for argument in argument_dictionaries["Monoview"]] -# logging.debug("Done:\t monoview benchmark") -# -# -# logging.debug("Start:\t multiview benchmark") -# results_multiview = [ -# exec_multiview_multicore(directory, core_index, args["name"], -# classification_indices, k_folds, args["file_type"], -# args["pathf"], labels_dictionary, random_state, -# labels, hyper_param_search=hyper_param_search, -# metrics=metrics, n_iter=args["hps_iter"], -# **arguments) -# for arguments in argument_dictionaries["multiview"]] -# logging.debug("Done:\t multiview benchmark") -# -# return [flag, results_monoview + results_multiview] -# -# -# def exec_one_benchmark_multicore(nb_cores=-1, labels_dictionary=None, -# directory=None, classification_indices=None, -# args=None, -# k_folds=None, random_state=None, -# hyper_param_search=None, metrics=None, -# argument_dictionaries=None, -# benchmark=None, views=None, views_indices=None, -# flag=None, labels=None, -# exec_monoview_multicore=exec_monoview_multicore, -# exec_multiview_multicore=exec_multiview_multicore,): -# """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and -# exec_multiview_multicore args are only used for tests""" -# -# results_monoview, labels_names = benchmark_init(directory, -# classification_indices, labels, -# labels_dictionary, k_folds) -# -# logging.debug("Start:\t monoview benchmark") -# nb_experiments = len(argument_dictionaries["monoview"]) -# nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores)) -# for step_index in range(nb_multicore_to_do): -# results_monoview += (Parallel(n_jobs=nb_cores)( -# delayed(exec_monoview_multicore)(directory, args["name"], labels_names, -# classification_indices, k_folds, -# core_index, args["file_type"], args["pathf"], -# random_state, labels, -# hyper_param_search=hyper_param_search, -# metrics=metrics, -# n_iter=args["hps_iter"], -# **argument_dictionaries["monoview"][ -# core_index + step_index * nb_cores]) -# for core_index in -# range(min(nb_cores, nb_experiments - step_index * nb_cores)))) -# logging.debug("Done:\t monoview benchmark") -# -# logging.debug("Start:\t multiview arguments initialization") -# # argument_dictionaries = initMultiviewArguments(args, benchmark, views, -# # views_indices, -# # argument_dictionaries, -# # random_state, directory, -# # resultsMonoview, -# # classification_indices) -# logging.debug("Done:\t multiview arguments initialization") -# -# logging.debug("Start:\t multiview benchmark") -# results_multiview = [] -# nb_experiments = len(argument_dictionaries["multiview"]) -# nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores)) -# for step_index in range(nb_multicore_to_do): -# results_multiview += Parallel(n_jobs=nb_cores)( -# delayed(exec_multiview_multicore)(directory, core_index, args["name"], -# classification_indices, k_folds, -# args["file_type"], args["Base"]["pathf"], -# labels_dictionary, random_state, -# labels, -# hyper_param_search=hyper_param_search, -# metrics=metrics, -# n_iter=args["hps_iter"], -# ** -# argument_dictionaries["multiview"][ -# step_index * nb_cores + core_index]) -# for core_index in -# range(min(nb_cores, nb_experiments - step_index * nb_cores))) -# logging.debug("Done:\t multiview benchmark") -# -# return [flag, results_monoview + results_multiview] - - def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, directory=None, classification_indices=None, args=None, @@ -548,7 +439,11 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, argument_dictionaries=None, benchmark=None, views=None, views_indices=None, flag=None, labels=None, +<<<<<<< HEAD track_tracebacks=False, n_jobs=1): # pragma: no cover +======= + track_tracebacks=False, nb_cores=1): # pragma: no cover +>>>>>>> develop results_monoview, labels_names = benchmark_init(directory, classification_indices, labels, @@ -564,7 +459,11 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, results_monoview += [ exec_monoview(directory, X, Y, args["name"], labels_names, classification_indices, k_folds, +<<<<<<< HEAD n_jobs, args["file_type"], args["pathf"], random_state, +======= + nb_cores, args["file_type"], args["pathf"], random_state, +>>>>>>> develop hyper_param_search=hyper_param_search, metrics=metrics, **arguments)] @@ -575,19 +474,8 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, "view_name"]] = traceback.format_exc() else: raise - logging.info("Done:\t monoview benchmark") - logging.info("Start:\t multiview arguments initialization") - - # argument_dictionaries = initMultiviewArguments(args, benchmark, views, - # views_indices, - # argument_dictionaries, - # random_state, directory, - # resultsMonoview, - # classification_indices) - logging.info("Done:\t multiview arguments initialization") - logging.info("Start:\t multiview benchmark") results_multiview = [] for arguments in argument_dictionaries["multiview"]: @@ -595,7 +483,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, results_multiview += [ exec_multiview(directory, dataset_var, args["name"], classification_indices, - k_folds, 1, args["file_type"], + k_folds, nb_cores, args["file_type"], args["pathf"], labels_dictionary, random_state, labels, hps_method=hyper_param_search, @@ -663,7 +551,11 @@ def exec_benchmark(nb_cores, stats_iter, for arguments in benchmark_arguments_dictionaries: benchmark_results = exec_one_benchmark_mono_core( dataset_var=dataset_var, +<<<<<<< HEAD track_tracebacks=track_tracebacks, n_jobs=nb_cores, +======= + track_tracebacks=track_tracebacks, nb_cores=nb_cores, +>>>>>>> develop **arguments) analyze_iterations([benchmark_results], benchmark_arguments_dictionaries, stats_iter, @@ -793,11 +685,5 @@ def exec_classif(arguments): # pragma: no cover stats_iter_random_states, metrics, argument_dictionaries, benchmark, views, views_indices) - results_mean_stds = exec_benchmark( - nb_cores, stats_iter, - benchmark_argument_dictionaries, directory, metrics, - dataset_var, - args["track_tracebacks"]) - # noise_results.append([noise_std, results_mean_stds]) - # plot_results_noise(directory, noise_results, metrics[0][0], - # dataset_name) + exec_benchmark(nb_cores, stats_iter, benchmark_argument_dictionaries, + directory, metrics, dataset_var, args["track_tracebacks"]) diff --git a/summit/multiview_platform/monoview/exec_classif_mono_view.py b/summit/multiview_platform/monoview/exec_classif_mono_view.py index 18d896bc353664555135950c21ffa4a137948b89..98b65e3dbfde3c8cbbbb51e6096065eb2ef0214e 100644 --- a/summit/multiview_platform/monoview/exec_classif_mono_view.py +++ b/summit/multiview_platform/monoview/exec_classif_mono_view.py @@ -36,8 +36,8 @@ def exec_monoview_multicore(directory, name, labels_names, **args): # pragma: no cover dataset_var = HDF5Dataset( hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r")) - neededViewIndex = args["view_index"] - X = dataset_var.get_v(neededViewIndex) + needed_view_index = args["view_index"] + X = dataset_var.get_v(needed_view_index) Y = labels return exec_monoview(directory, X, Y, name, labels_names, classification_indices, k_folds, 1, database_type, @@ -61,8 +61,8 @@ def exec_monoview(directory, X, Y, database_name, labels_names, view_name, \ classifier_name, \ X, \ - learningRate, \ - labelsString, \ + learning_rate, \ + labels_string, \ output_file_name, \ directory, \ base_file_name = init_constants(args, X, classification_indices, @@ -74,7 +74,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, "Info:\t Classification - Database:" + str( database_name) + " View:" + str( view_name) + " train ratio:" - + str(learningRate) + ", CrossValidation k-folds: " + str( + + str(learning_rate) + ", CrossValidation k-folds: " + str( k_folds.n_splits) + ", cores:" + str(nb_cores) + ", algorithm : " + classifier_name) @@ -110,7 +110,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, random_state, y=Y) fit_beg = time.monotonic() - classifier.fit(X_train, y_train) # NB_CORES=nbCores, + classifier.fit(X_train, y_train) fit_duration = time.monotonic() - fit_beg logging.info("Done:\t Training") @@ -122,10 +122,10 @@ def exec_monoview(directory, X, Y, database_name, labels_names, # Filling the full prediction in the right order full_pred = np.zeros(Y.shape, dtype=int) - 100 - for trainIndex, index in enumerate(classification_indices[0]): - full_pred[index] = train_pred[trainIndex] - for testIndex, index in enumerate(classification_indices[1]): - full_pred[index] = test_pred[testIndex] + for train_index, index in enumerate(classification_indices[0]): + full_pred[index] = train_pred[train_index] + for test_index, index in enumerate(classification_indices[1]): + full_pred[index] = test_pred[test_index] logging.info("Done:\t Predicting") @@ -185,7 +185,8 @@ def init_constants(args, X, classification_indices, labels_names, base_file_name = cl_type_string + '-' + name + "-" + view_name + "-" output_file_name = os.path.join(directory, base_file_name) secure_file_path(output_file_name) - return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string, output_file_name, directory, base_file_name + return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string,\ + output_file_name, directory, base_file_name def init_train_test(X, Y, classification_indices): diff --git a/summit/multiview_platform/monoview/monoview_utils.py b/summit/multiview_platform/monoview/monoview_utils.py index c46c8b2e75c4e2b28ee2278fdd3f44316cbc5e44..2fbf53d3e077e013382663ceaa2af5ce57024531 100644 --- a/summit/multiview_platform/monoview/monoview_utils.py +++ b/summit/multiview_platform/monoview/monoview_utils.py @@ -13,8 +13,6 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -# __date__ = 2016 - 03 - 25 - def change_label_to_minus(y): """ Change the label 0 to minus one @@ -76,43 +74,6 @@ def gen_test_folds_preds(X_train, y_train, KFolds, estimator): return test_folds_preds -# class CustomRandint: -# """Used as a distribution returning a integer between low and high-1. -# It can be used with a multiplier agrument to be able to perform more complex generation -# for example 10 e -(randint)""" -# -# def __init__(self, low=0, high=0, multiplier=""): -# self.randint = randint(low, high) -# self.multiplier = multiplier -# -# def rvs(self, random_state=None): -# randinteger = self.randint.rvs(random_state=random_state) -# if self.multiplier == "e-": -# return 10 ** -randinteger -# else: -# return randinteger -# -# def get_nb_possibilities(self): -# return self.randint.b - self.randint.a -# -# -# class CustomUniform: -# """Used as a distribution returning a float between loc and loc + scale.. -# It can be used with a multiplier agrument to be able to perform more complex generation -# for example 10 e -(float)""" -# -# def __init__(self, loc=0, state=1, multiplier=""): -# self.uniform = uniform(loc, state) -# self.multiplier = multiplier -# -# def rvs(self, random_state=None): -# unif = self.uniform.rvs(random_state=random_state) -# if self.multiplier == 'e-': -# return 10 ** -unif -# else: -# return unif - - class BaseMonoviewClassifier(BaseClassifier): def get_feature_importance(self, directory, base_file_name, diff --git a/summit/multiview_platform/monoview_classifiers/sgd.py b/summit/multiview_platform/monoview_classifiers/sgd.py index 1b1d3375c39527152c767d7a9f86f0e0c0611b00..312a2bf26b91afb2d857716837c2eddf99883e9d 100644 --- a/summit/multiview_platform/monoview_classifiers/sgd.py +++ b/summit/multiview_platform/monoview_classifiers/sgd.py @@ -23,8 +23,8 @@ class SGD(SGDClassifier, BaseMonoviewClassifier): loss=loss, penalty=penalty, alpha=alpha, - max_iter=5, - tol=None, + max_iter=max_iter, + tol=tol, random_state=random_state ) self.param_names = ["loss", "penalty", "alpha", "random_state"] diff --git a/summit/multiview_platform/multiview/exec_multiview.py b/summit/multiview_platform/multiview/exec_multiview.py index 1f3dcdc39b11f0d0e79c3f1629068bbfd72973b4..a594624c8f5aab3e332b6ef7d20776d91308500c 100644 --- a/summit/multiview_platform/multiview/exec_multiview.py +++ b/summit/multiview_platform/multiview/exec_multiview.py @@ -260,11 +260,8 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.info("Done:\t Getting train/test split") logging.info("Start:\t Getting classifiers modules") - # classifierPackage = getattr(multiview_classifiers, - # CL_type) # Permet d'appeler un module avec une string classifier_module = getattr(multiview_classifiers, cl_type) classifier_name = classifier_module.classifier_class_name - # classifierClass = getattr(classifierModule, CL_type + "Class") logging.info("Done:\t Getting classifiers modules") logging.info("Start:\t Optimizing hyperparameters") @@ -285,13 +282,6 @@ def exec_multiview(directory, dataset_var, name, classification_indices, hps.fit(dataset_var, dataset_var.get_labels(), ) classifier_config = hps.get_best_params() hps.gen_report(output_file_name) - # classifier_config = hyper_parameter_search.search_best_settings( - # dataset_var, dataset_var.get_labels(), classifier_module, - # classifier_name, - # metrics[0], learning_indices, k_folds, random_state, - # output_file_name, nb_cores=nb_cores, views_indices=views_indices, - # searching_tool=hps_method, n_iter=n_iter, - # classifier_config=classifier_config) hps_duration = time.monotonic() - hps_beg classifier = get_mc_estim( getattr(classifier_module, classifier_name)(random_state=random_state, @@ -325,8 +315,6 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.info( "Info:\t Classification duration " + str(extraction_time) + "s") - # TODO: get better cltype - logging.info("Start:\t Result Analysis for " + cl_type) times = (extraction_time, whole_duration) result_analyzer = MultiviewResultAnalyzer(view_names=views, diff --git a/summit/multiview_platform/multiview/multiview_utils.py b/summit/multiview_platform/multiview/multiview_utils.py index 88c4ac534a3623a67cfe7acdc5903edb04e41587..6a26aaa37623b1b1f265e10af39ecdc3bef513fa 100644 --- a/summit/multiview_platform/multiview/multiview_utils.py +++ b/summit/multiview_platform/multiview/multiview_utils.py @@ -7,13 +7,6 @@ from .. import monoview_classifiers from ..utils.base import BaseClassifier, ResultAnalyser from ..utils.dataset import RAMDataset - -# class FakeEstimator(): -# -# def predict(self, X, sample_indices=None, view_indices=None): -# return np.zeros(sample_indices.shape[0]) - - class BaseMultiviewClassifier(BaseClassifier): """ BaseMultiviewClassifier base of Multiview classifiers @@ -49,21 +42,6 @@ class BaseMultiviewClassifier(BaseClassifier): 'Used {} views to fit, and trying to predict on {}'.format( self.used_views, view_indices)) - # def to_str(self, param_name): - # if param_name in self.weird_strings: - # string = "" - # if "class_name" in self.weird_strings[param_name]: - # string += self.get_params()[param_name].__class__.__name__ - # if "config" in self.weird_strings[param_name]: - # string += "( with " + self.get_params()[ - # param_name].params_to_string() + ")" - # else: - # string += self.weird_strings[param_name]( - # self.get_params()[param_name]) - # return string - # else: - # return str(self.get_params()[param_name]) - def accepts_multi_class(self, random_state, n_samples=10, dim=2, n_classes=3, n_views=2): if int(n_samples / n_classes) < 1: diff --git a/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py b/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py new file mode 100644 index 0000000000000000000000000000000000000000..582466b018d351cd3ed88dce8884972bff216e85 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/additions/early_fusion_from_monoview.py @@ -0,0 +1,68 @@ +import numpy as np + +from ... import monoview_classifiers +from ...multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from ...utils.dataset import get_samples_views_indices +from ...utils.multiclass import get_mc_estim, MultiClassWrapper + +# from ..utils.dataset import get_v + +classifier_class_name = "WeightedLinearEarlyFusion" + + +class BaseEarlyFusion(BaseMultiviewClassifier): + + def __init__(self, monoview_classifier="decision_tree", random_state=None, + **kwargs): + BaseMultiviewClassifier.__init__(self, random_state=random_state) + monoview_classifier_module = getattr(monoview_classifiers, monoview_classifier) + monoview_classifier_class = getattr(monoview_classifier_module, monoview_classifier_module.classifier_class_name) + self.monoview_classifier = monoview_classifier_class(**kwargs) + + def set_params(self, **params): + self.monoview_classifier.set_params(**params) + return self + + def get_params(self, deep=True): + monoview_params = self.monoview_classifier.get_params(deep=deep) + monoview_params["random_state"] = self.random_state + return monoview_params + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + self.used_views = view_indices + if np.unique(y[train_indices]).shape[0] > 2 and \ + not (isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + self.random_state, + multiview=False, + y=y[train_indices]) + self.monoview_classifier.fit(X, y[train_indices]) + return self + + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) + self._check_views(self.view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, sample_indices, + view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) + + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X + + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx, samples) + for index, view_idx + in enumerate(self.view_indices)], axis=1) + return monoview_data \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py new file mode 100644 index 0000000000000000000000000000000000000000..c35355f98f53a3ce7f84c8809b1594de7b68cc37 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py @@ -0,0 +1,23 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint +from ..utils.base import base_boosting_estimators + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionAdaboost" + + +class EarlyFusionAdaboost(BaseEarlyFusion): + + def __init__(self, random_state=None, n_estimators=50, + base_estimator=None, base_estimator_config=None, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="adaboost", + n_estimators= n_estimators, + base_estimator=base_estimator, + base_estimator_config=base_estimator_config, **kwargs) + self.param_names = ["n_estimators", "base_estimator"] + self.classed_params = ["base_estimator"] + self.distribs = [CustomRandint(low=1, high=500), + base_boosting_estimators] + self.weird_strings = {"base_estimator": "class_name"} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py b/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py new file mode 100644 index 0000000000000000000000000000000000000000..fc3e672b494cd0dd8adcb39ef2d841751075d692 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_decision_tree.py @@ -0,0 +1,22 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionDT" + + +class EarlyFusionDT(BaseEarlyFusion): + + def __init__(self, random_state=None, max_depth=None, + criterion='gini', splitter='best', **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="decision_tree", max_depth=max_depth, + criterion=criterion, splitter=splitter, **kwargs) + self.param_names = ["max_depth", "criterion", "splitter", + 'random_state'] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), + ["gini", "entropy"], + ["best", "random"], [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py b/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py new file mode 100644 index 0000000000000000000000000000000000000000..f87d336066f6f7a746cd8900204945ba8e8e09ee --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_gradient_boosting.py @@ -0,0 +1,21 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint +from ..monoview_classifiers.gradient_boosting import CustomDecisionTreeGB + +classifier_class_name = "EarlyFusionGB" + + +class EarlyFusionGB(BaseEarlyFusion): + + def __init__(self, random_state=None, loss="exponential", max_depth=1.0, + n_estimators=100, + init=CustomDecisionTreeGB(max_depth=1), + **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="gradient_boosting", + loss=loss, max_depth=max_depth, + n_estimators=n_estimators, init=init, **kwargs) + self.param_names = ["n_estimators", "max_depth"] + self.classed_params = [] + self.distribs = [CustomRandint(low=50, high=500), + CustomRandint(low=1, high=10), ] \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py new file mode 100644 index 0000000000000000000000000000000000000000..828b7155f7d55cda2f24dc81c2377345699752bf --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py @@ -0,0 +1,17 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform, CustomRandint + +classifier_class_name = "EarlyFusionLasso" + + +class EarlyFusionLasso(BaseEarlyFusion): + + def __init__(self, random_state=None, alpha=1.0, + max_iter=10, warm_start=False, **kwargs): + BaseEarlyFusion.__init__(self, random_state=None, alpha=alpha, + max_iter=max_iter, + warm_start=warm_start, **kwargs) + self.param_names = ["max_iter", "alpha", "random_state"] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=30--0), + CustomUniform(), [random_state]] \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py b/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py new file mode 100644 index 0000000000000000000000000000000000000000..1a01aa314536d14f93dbb0c5b0a9627971c724db --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_random_forest.py @@ -0,0 +1,23 @@ +import numpy as np + +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomRandint + +classifier_class_name = "EarlyFusionRF" + + +class EarlyFusionRF(BaseEarlyFusion): + + def __init__(self, random_state=None, n_estimators=10, + max_depth=None, criterion='gini', **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="random_forest", + n_estimators=n_estimators, max_depth=max_depth, + criterion=criterion, **kwargs) + self.param_names = ["n_estimators", "max_depth", "criterion", + "random_state"] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), + CustomRandint(low=1, high=10), + ["gini", "entropy"], [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py b/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py new file mode 100644 index 0000000000000000000000000000000000000000..580b46628e39dd4790fafda205704f20a0df4812 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_sgd.py @@ -0,0 +1,19 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform + +classifier_class_name = "EarlyFusionSGD" + + +class EarlyFusionSGD(BaseEarlyFusion): + + def __init__(self, random_state=None, loss='hinge', + penalty='l2', alpha=0.0001, max_iter=5, tol=None, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="sgd", loss=loss, + penalty=penalty, alpha=alpha, max_iter=max_iter, tol=tol, **kwargs) + self.param_names = ["loss", "penalty", "alpha", "random_state"] + self.classed_params = [] + self.distribs = [['log', 'modified_huber'], + ["l1", "l2", "elasticnet"], + CustomUniform(loc=0, state=1), [random_state]] + self.weird_strings = {} \ No newline at end of file diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py b/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py new file mode 100644 index 0000000000000000000000000000000000000000..6d427c9838aa172537d84b31a180d195e6e63f40 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_svm_rbf.py @@ -0,0 +1,13 @@ +from .additions.early_fusion_from_monoview import BaseEarlyFusion +from ..utils.hyper_parameter_search import CustomUniform + +classifier_class_name = "EarlyFusionSVMRBF" + + +class EarlyFusionSVMRBF(BaseEarlyFusion): + + def __init__(self, random_state=None, C=1.0, **kwargs): + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="svm_rbf", C=C, **kwargs) + self.param_names = ["C", "random_state"] + self.distribs = [CustomUniform(loc=0, state=1), [random_state]] \ No newline at end of file diff --git a/summit/multiview_platform/result_analysis/error_analysis.py b/summit/multiview_platform/result_analysis/error_analysis.py index 7ff5f06b9471ba08a15d2487e334285e1c6c5ef1..90d6b69756560a450b304db55abba3cea496f888 100644 --- a/summit/multiview_platform/result_analysis/error_analysis.py +++ b/summit/multiview_platform/result_analysis/error_analysis.py @@ -199,7 +199,6 @@ def plot_2d(data, classifiers_names, nb_classifiers, file_name, dataset_name, la plt.close() # The following part is used to generate an interactive graph. if use_plotly: - # [np.where(labels==i)[0] for i in np.unique(labels)] hover_text = [[sample_ids[sample_index] + " failed " + str( stats_iter - data[ sample_index, classifier_index]) + " time(s), labelled " + str(