diff --git a/summit/multiview_platform/exec_classif.py b/summit/multiview_platform/exec_classif.py index e578633a47506c2b9ac02a1513a9f924ab60507a..73b87d7786e0d034896a9046e4c3803e675cff53 100644 --- a/summit/multiview_platform/exec_classif.py +++ b/summit/multiview_platform/exec_classif.py @@ -1062,7 +1062,10 @@ class Summit(BaseExec): if framework == "monoview": getattr(monoview_classifiers, classifiers_name) else: - getattr(multiview_classifiers, classifiers_name) + if classifiers_name.startswith('early_fusion_'): + getattr(monoview_classifiers, classifiers_name.split('early_fusion_')[1]) + else: + getattr(multiview_classifiers, classifiers_name) except AttributeError: raise AttributeError( classifiers_name + " is not implemented in monoview_classifiers, " diff --git a/summit/multiview_platform/monoview/exec_classif_mono_view.py b/summit/multiview_platform/monoview/exec_classif_mono_view.py index ce5c4be32765768e71b34871923b437b11569835..5f4087da0f24d2ee7696753397e50daec5929355 100644 --- a/summit/multiview_platform/monoview/exec_classif_mono_view.py +++ b/summit/multiview_platform/monoview/exec_classif_mono_view.py @@ -29,27 +29,27 @@ __status__ = "Prototype" # Production, Development, Prototype # __date__ = 2016 - 03 - 25 - -def exec_monoview_multicore(directory, name, labels_names, - classification_indices, - k_folds, dataset_file_index, database_type, - path, random_state, labels, - hyper_param_search="randomized_search", - metrics=[["accuracy_score", None]], n_iter=30, - **args): # pragma: no cover - dataset_var = HDF5Dataset( - hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r")) - needed_view_index = args["view_index"] - X = dataset_var.get_v(needed_view_index) - Y = labels - return exec_monoview(directory, X, Y, name, labels_names, - classification_indices, k_folds, 1, database_type, - path, - random_state, hyper_param_search=hyper_param_search, - metrics=metrics, n_iter=n_iter, - view_name=dataset_var.get_view_name( - args["view_index"]), - **args) +# +# def exec_monoview_multicore(directory, name, labels_names, +# classification_indices, +# k_folds, dataset_file_index, database_type, +# path, random_state, labels, +# hyper_param_search="randomized_search", +# metrics=[["accuracy_score", None]], n_iter=30, +# **args): # pragma: no cover +# dataset_var = HDF5Dataset( +# hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r")) +# needed_view_index = args["view_index"] +# X = dataset_var.get_v(needed_view_index) +# Y = labels +# return exec_monoview(directory, X, Y, name, labels_names, +# classification_indices, k_folds, 1, database_type, +# path, +# random_state, hyper_param_search=hyper_param_search, +# metrics=metrics, n_iter=n_iter, +# view_name=dataset_var.get_view_name( +# args["view_index"]), +# **args) class MonoViewExp: @@ -83,21 +83,7 @@ class MonoViewExp: self.splits=splits self.random_state = random_state - def exec(self, dataset_var): - t_start = time.monotonic() - X = dataset_var.get_v(self.view_index) - Y = dataset_var.get_labels() - logging.info("Start:\t Loading data") - self.init_constants() - logging.info("Done:\t Loading data") - - logging.info( - "Info:\t Classification - Database:" + str( - self.database_name) + " View:" + str( - self.view_name) + " train ratio:" - + str(self.train_size) + ", CrossValidation k-folds: " + str( - self.k_folds.n_splits) + ", algorithm : " + self.classifier_name) - + def hp_train_test(self, X, Y, classifier_class_name, classifier_module): logging.info("Start:\t Determine Train/Test split") X_train, y_train, X_test, y_test = self.init_train_test(X, Y) self.X_train = X_train @@ -110,16 +96,16 @@ class MonoViewExp: logging.info("Done:\t Determine Train/Test split") logging.info("Start:\t Generate classifier args") - classifier_module = getattr(monoview_classifiers, self.classifier_name) - classifier_class_name = classifier_module.classifier_class_name hyper_param_beg = time.monotonic() self.cl_kwargs = get_hyper_params(classifier_module, self.hps_type, - self.classifier_name, - classifier_class_name, - X_train, y_train, - self.random_state, self.output_file_name, - self.k_folds, self.nb_cores, self.metrics, self.classifier_config, - **self.hps_kwargs) + self.classifier_name, + classifier_class_name, + X_train, y_train, + self.random_state, + self.output_file_name, + self.k_folds, self.nb_cores, + self.metrics, self.classifier_config, + **self.hps_kwargs) self.argi = {} self.argi[self.directory] = self.cl_kwargs.copy() self.hyper_param_duration = time.monotonic() - hyper_param_beg @@ -128,13 +114,13 @@ class MonoViewExp: logging.info("Start:\t Training") self.classifier = get_mc_estim(getattr(classifier_module, - classifier_class_name) - (random_state=self.random_state, **self.cl_kwargs), - self.random_state, - y=Y) + classifier_class_name) + (random_state=self.random_state, + **self.cl_kwargs), + self.random_state, + y=Y) fit_beg = time.monotonic() self.classifier.fit(X_train, y_train) - self.fit_duration = time.monotonic() - fit_beg logging.info("Done:\t Training") @@ -143,6 +129,30 @@ class MonoViewExp: pred_beg = time.monotonic() test_pred = self.classifier.predict(X_test) self.pred_duration = time.monotonic() - pred_beg + return train_pred, test_pred, X_train ,y_train, y_test, self.hyper_param_duration, self.fit_duration, self.pred_duration + + + def exec(self, dataset_var): + t_start = time.monotonic() + X = dataset_var.get_v(self.view_index) + Y = dataset_var.get_labels() + logging.info("Start:\t Loading data") + self.init_constants() + logging.info("Done:\t Loading data") + + logging.info( + "Info:\t Classification - Database:" + str( + self.database_name) + " View:" + str( + self.view_name) + " train ratio:" + + str(self.train_size) + ", CrossValidation k-folds: " + str( + self.k_folds.n_splits) + ", algorithm : " + self.classifier_name) + + classifier_module = getattr(monoview_classifiers, self.classifier_name) + classifier_class_name = classifier_module.classifier_class_name + + train_pred, test_pred, X_train ,\ + y_train, y_test, _, _, _ = self.hp_train_test(X, Y, classifier_class_name, + classifier_module) # Filling the full prediction in the right order full_pred = np.zeros(Y.shape, dtype=int) - 100 diff --git a/summit/multiview_platform/monoview_classifiers/decision_tree.py b/summit/multiview_platform/monoview_classifiers/decision_tree.py index 00c56dee55e536ac5b073d0e522836a1abf3c6a6..0d27eb29a9f8887641ac721984f6e67e051c6098 100644 --- a/summit/multiview_platform/monoview_classifiers/decision_tree.py +++ b/summit/multiview_platform/monoview_classifiers/decision_tree.py @@ -38,6 +38,9 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier): def get_interpretation(self, directory, base_file_name, y_test, feature_ids, multiclass=False): + print(feature_ids) + print(len(feature_ids)) + print(self.tree_.feature[0]) interpretString = "First feature : \n\t{} <= {}\n".format( feature_ids[self.tree_.feature[0]], self.tree_.threshold[0]) diff --git a/summit/multiview_platform/monoview_classifiers/gradient_boosting.py b/summit/multiview_platform/monoview_classifiers/gradient_boosting.py index 77242502ca370fabba2f51df167774a2c3ac24e2..57288a250ce8b53f3e769ed6dc3ff3475ae5bbb0 100644 --- a/summit/multiview_platform/monoview_classifiers/gradient_boosting.py +++ b/summit/multiview_platform/monoview_classifiers/gradient_boosting.py @@ -7,7 +7,7 @@ from sklearn.tree import DecisionTreeClassifier from .. import metrics from ..monoview.monoview_utils import BaseMonoviewClassifier, get_accuracy_graph -from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint +from summit.multiview_platform.utils.hyper_parameter_search import CustomRandint, CustomUniform # Author-Info __author__ = "Baptiste Bauvin" @@ -31,7 +31,7 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): """ def __init__(self, random_state=None, loss="exponential", max_depth=1.0, - n_estimators=100, + n_estimators=100, learning_rate=0.1, init=CustomDecisionTreeGB(max_depth=1), **kwargs): GradientBoostingClassifier.__init__(self, @@ -39,12 +39,15 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): max_depth=max_depth, n_estimators=n_estimators, init=init, + learning_rate=learning_rate, random_state=random_state ) - self.param_names = ["n_estimators", "max_depth"] + self.param_names = ["n_estimators", "max_depth", "loss", "learning_rate"] self.classed_params = [] self.distribs = [CustomRandint(low=50, high=500), - CustomRandint(low=1, high=10), ] + CustomRandint(low=1, high=10), + ['log_loss', 'deviance', 'exponential'], + CustomUniform(loc=0, state=1)] self.weird_strings = {} self.plotted_metric = metrics.zero_one_loss self.plotted_metric_name = "zero_one_loss" @@ -85,24 +88,24 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): interpretString += self.get_feature_importance(directory, base_file_name, feature_ids) - step_test_metrics = np.array( - [self.plotted_metric.score(y_test, step_pred) for step_pred in - self.step_predictions]) - get_accuracy_graph(step_test_metrics, "AdaboostClassic", - directory + "test_metrics.png", - self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "AdaboostClassic", - directory + "metrics.png", - self.plotted_metric_name) - np.savetxt( - os.path.join(directory, base_file_name + "test_metrics.csv"), - step_test_metrics, - delimiter=',') - np.savetxt( - os.path.join(directory, base_file_name + "train_metrics.csv"), - self.metrics, - delimiter=',') - np.savetxt(os.path.join(directory, base_file_name + "times.csv"), - np.array([self.train_time, self.pred_time]), - delimiter=',') + # step_test_metrics = np.array( + # [self.plotted_metric.score(y_test, step_pred) for step_pred in + # self.step_predictions]) + # get_accuracy_graph(step_test_metrics, "AdaboostClassic", + # directory + "test_metrics.png", + # self.plotted_metric_name, set="test") + # get_accuracy_graph(self.metrics, "AdaboostClassic", + # directory + "metrics.png", + # self.plotted_metric_name) + # np.savetxt( + # os.path.join(directory, base_file_name + "test_metrics.csv"), + # step_test_metrics, + # delimiter=',') + # np.savetxt( + # os.path.join(directory, base_file_name + "train_metrics.csv"), + # self.metrics, + # delimiter=',') + # np.savetxt(os.path.join(directory, base_file_name + "times.csv"), + # np.array([self.train_time, self.pred_time]), + # delimiter=',') return interpretString diff --git a/summit/multiview_platform/monoview_classifiers/ib_adaboost.py b/summit/multiview_platform/monoview_classifiers/ib_adaboost.py new file mode 100644 index 0000000000000000000000000000000000000000..0010bfa03ad437a7596087a7b0ddca9eda80b5ec --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/ib_adaboost.py @@ -0,0 +1,44 @@ +from imblearn.ensemble import BalancedBaggingClassifier +import numpy as np +from sklearn.ensemble import AdaBoostClassifier + + +from ..monoview.monoview_utils import BaseMonoviewClassifier +from ..utils.base import base_boosting_estimators +from ..utils.hyper_parameter_search import CustomRandint, CustomUniform + +classifier_class_name = "IBAda" + +class IBAda(BaseMonoviewClassifier, BalancedBaggingClassifier): + + def __init__(self, random_state=None, n_estimators=10, + sampling_strategy="auto", base_estimator=AdaBoostClassifier(), + replacement=False, **kwargs): + super(IBAda, self).__init__(random_state=random_state, + base_estimator=base_estimator, + n_estimators=n_estimators, + sampling_strategy=sampling_strategy, + replacement=replacement) + + self.param_names = ["n_estimators", "sampling_strategy", + "base_estimator__n_estimators", + "base_estimator__base_estimator", + ] + self.classed_params = ["base_estimator", "base_estimator__base_estimator"] + self.distribs = [CustomRandint(low=1, high=50), + ["auto"],CustomRandint(low=1, high=100), + base_boosting_estimators,] + self.weird_strings = {"base_estimator": "class_name"} + + def fit(self, X, y): + BalancedBaggingClassifier.fit(self, X, y) + self.feature_importances_ = np.zeros(X.shape[1]) + for estim in self.estimators_: + if hasattr(estim['classifier'], 'feature_importances_'): + self.feature_importances_ += estim['classifier'].feature_importances_ + if np.sum(self.feature_importances_)!=0: + self.feature_importances_ /= np.sum(self.feature_importances_) + return self + + + diff --git a/summit/multiview_platform/monoview_classifiers/ib_gradient_boosting.py b/summit/multiview_platform/monoview_classifiers/ib_gradient_boosting.py new file mode 100644 index 0000000000000000000000000000000000000000..5b2ff3ad9be6d50b90bb7a06131d770be79d8b44 --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/ib_gradient_boosting.py @@ -0,0 +1,44 @@ +from imblearn.ensemble import BalancedBaggingClassifier +import numpy as np +from sklearn.ensemble import GradientBoostingClassifier + + +from ..monoview.monoview_utils import BaseMonoviewClassifier +from ..utils.base import base_boosting_estimators +from ..utils.hyper_parameter_search import CustomRandint, CustomUniform + +classifier_class_name = "IBGB" + +class IBGB(BaseMonoviewClassifier, BalancedBaggingClassifier): + + def __init__(self, random_state=None, n_estimators=10, + sampling_strategy="auto", base_estimator=GradientBoostingClassifier(), + replacement=False, **kwargs): + super(IBGB, self).__init__(random_state=random_state, + base_estimator=base_estimator, + n_estimators=n_estimators, + sampling_strategy=sampling_strategy, + replacement=replacement) + + self.param_names = ["n_estimators", "sampling_strategy", + "base_estimator__n_estimators", + "base_estimator__loss", + "base_estimator__learning_rate",] + self.classed_params = ["base_estimator",] + self.distribs = [CustomRandint(low=1, high=50), + ["auto"],CustomRandint(low=1, high=100), + ['log_loss', 'exponential'], + CustomUniform(loc=0, state=1)] + self.weird_strings = {"base_estimator": "class_name"} + + def fit(self, X, y): + BalancedBaggingClassifier.fit(self, X, y) + self.feature_importances_ = np.zeros(X.shape[1]) + for estim in self.estimators_: + if hasattr(estim['classifier'], 'feature_importances_'): + self.feature_importances_ += estim['classifier'].feature_importances_ + self.feature_importances_ /= np.sum(self.feature_importances_) + return self + + + diff --git a/summit/multiview_platform/monoview_classifiers/ib_lasso.py b/summit/multiview_platform/monoview_classifiers/ib_lasso.py new file mode 100644 index 0000000000000000000000000000000000000000..b46d4ca3cce15f4b213dc884e0441796d2661823 --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/ib_lasso.py @@ -0,0 +1,51 @@ +from imblearn.ensemble import BalancedBaggingClassifier +import numpy as np +from sklearn.linear_model import Lasso + + +from ..monoview.monoview_utils import BaseMonoviewClassifier +from ..utils.base import base_boosting_estimators +from ..utils.hyper_parameter_search import CustomRandint, CustomUniform + +classifier_class_name = "IBLasso" + +class IBLasso(BaseMonoviewClassifier, BalancedBaggingClassifier): + + def __init__(self, random_state=None, n_estimators=10, + sampling_strategy="auto", base_estimator=Lasso(), + replacement=False, **kwargs): + super(IBLasso, self).__init__(random_state=random_state, + base_estimator=base_estimator, + n_estimators=n_estimators, + sampling_strategy=sampling_strategy, + replacement=replacement) + + self.param_names = ["n_estimators", "sampling_strategy", + "base_estimator__max_iter", + "base_estimator__alpha"] + self.distribs = [CustomRandint(low=1, high=50), + ["auto"],CustomRandint(low=1, high=300), + CustomUniform(), ] + self.weird_strings={} + + def fit(self, X, y): + neg_y = np.copy(y) + neg_y[np.where(neg_y == 0)] = -1 + BalancedBaggingClassifier.fit(self, X, y) + self.feature_importances_ = np.zeros(X.shape[1]) + for estim in self.estimators_: + if hasattr(estim['classifier'], 'feature_importances_'): + self.feature_importances_ += estim['classifier'].feature_importances_ + self.feature_importances_ /= np.sum(self.feature_importances_) + return self + + def predict(self, X): + prediction = BalancedBaggingClassifier.predict(self, X) + signed = np.sign(prediction) + signed[np.where(signed == -1)] = 0 + print(signed) + return signed + + + + diff --git a/summit/multiview_platform/monoview_classifiers/ib_random_forest.py b/summit/multiview_platform/monoview_classifiers/ib_random_forest.py index 7f4517a1bcdef4d0642dd73b42e727206349d0a7..9e599b6f86bb02a4e2a7df9f2f8b8be9ba06c159 100644 --- a/summit/multiview_platform/monoview_classifiers/ib_random_forest.py +++ b/summit/multiview_platform/monoview_classifiers/ib_random_forest.py @@ -1,4 +1,4 @@ -from imblearn.ensemble import BalancedBaggingClassifier +from imblearn.ensemble import BalancedRandomForestClassifier import numpy as np from sklearn.ensemble import RandomForestClassifier @@ -10,36 +10,34 @@ from ..utils.hyper_parameter_search import CustomRandint, CustomUniform classifier_class_name = "IBRF" proto=True -class IBRF(BaseMonoviewClassifier, BalancedBaggingClassifier): +class IBRF(BaseMonoviewClassifier, BalancedRandomForestClassifier): def __init__(self, random_state=None, n_estimators=10, - sampling_strategy="auto", replacement=False, - base_estimator=RandomForestClassifier(), **kwargs): - super(IBRF, self).__init__(random_state=random_state, - base_estimator=base_estimator, - n_estimators=n_estimators, - sampling_strategy=sampling_strategy, - replacement=replacement) - - self.param_names = ["n_estimators", "sampling_strategy", - "base_estimator__n_estimators", - "base_estimator__max_depth", - "base_estimator__criterion"] - self.classed_params = ["base_estimator"] - self.distribs = [CustomRandint(low=1, high=50), - ["auto"],CustomRandint(low=1, high=300), + max_depth=None, criterion='gini', **kwargs): + + BalancedRandomForestClassifier.__init__(self, + n_estimators=n_estimators, + max_depth=max_depth, + criterion=criterion, + class_weight="balanced", + random_state=random_state + ) + self.param_names = ["n_estimators", "max_depth", "criterion", + "random_state"] + self.classed_params = [] + self.distribs = [CustomRandint(low=1, high=300), CustomRandint(low=1, high=10), - ["gini", "entropy"],] - self.weird_strings=[] - - def fit(self, X, y): - BalancedBaggingClassifier.fit(self, X, y) - self.feature_importances_ = np.zeros(X.shape[1]) - for estim in self.estimators_: - if hasattr(estim['classifier'], 'feature_importances_'): - self.feature_importances_ += estim['classifier'].feature_importances_ - self.feature_importances_ /= np.sum(self.feature_importances_) - return self + ["gini", "entropy"], [random_state]] + self.weird_strings = {} + + def get_interpretation(self, directory, base_file_name, y_test, feature_ids, + multiclass=False): + + interpret_string = "" + interpret_string += self.get_feature_importance(directory, + base_file_name, + feature_ids) + return interpret_string diff --git a/summit/multiview_platform/monoview_classifiers/ib_samba.py b/summit/multiview_platform/monoview_classifiers/ib_samba.py new file mode 100644 index 0000000000000000000000000000000000000000..f20a9029bfb7486d35c82b3c7e9cbc65d8bd8ad6 --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/ib_samba.py @@ -0,0 +1,55 @@ +from imblearn.ensemble import BalancedBaggingClassifier +import numpy as np +from SamBA.samba import NeighborHoodClassifier, ExpTrainWeighting +from SamBA.relevances import * +from SamBA.distances import * +from sklearn.preprocessing import RobustScaler + + + +from ..monoview.monoview_utils import BaseMonoviewClassifier +from ..utils.base import base_boosting_estimators +from ..utils.hyper_parameter_search import CustomRandint, CustomUniform + +classifier_class_name = "IBSamba" + +class IBSamba(BaseMonoviewClassifier, BalancedBaggingClassifier): + + def __init__(self, random_state=None, n_estimators=10, + sampling_strategy="auto", base_estimator=NeighborHoodClassifier(), + replacement=False, **kwargs): + super(IBSamba, self).__init__(random_state=random_state, + base_estimator=base_estimator, + n_estimators=n_estimators, + sampling_strategy=sampling_strategy, + replacement=replacement) + + self.param_names = ["n_estimators", "sampling_strategy", + "base_estimator__n_estimators", + "base_estimator__relevance", + "base_estimator__distance", + "base_estimator__difficulty", "base_estimator__b", "base_estimator__pred_train", "base_estimator__normalizer", + "base_estimator__normalize_dists", "base_estimator__a", "base_estimator__class_weight",] + self.distribs = [CustomRandint(low=1, high=50), + ["auto"],CustomRandint(low=1, high=70), + [ExpRelevance()], + [EuclidianDist(), PolarDist(), ExpEuclidianDist(), Jaccard()], + [ExpTrainWeighting()], + CustomUniform(0.1, 6,), + [True, False], + [RobustScaler()], + [True], CustomRandint(0, 10, 'e-'), + ["balanced", None],] + self.weird_strings ={} + + def fit(self, X, y): + BalancedBaggingClassifier.fit(self, X, y) + self.feature_importances_ = np.zeros(X.shape[1]) + for estim in self.estimators_: + if hasattr(estim['classifier'], 'feature_importances_'): + self.feature_importances_ += estim['classifier'].feature_importances_ + self.feature_importances_ /= np.sum(self.feature_importances_) + return self + + + diff --git a/summit/multiview_platform/monoview_classifiers/ib_svm_rbf.py b/summit/multiview_platform/monoview_classifiers/ib_svm_rbf.py new file mode 100644 index 0000000000000000000000000000000000000000..095bc7dcd63864733d8b2ffa299378fc4f6646ca --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/ib_svm_rbf.py @@ -0,0 +1,39 @@ +from imblearn.ensemble import BalancedBaggingClassifier +import numpy as np +from sklearn.svm import SVC + + +from ..monoview.monoview_utils import BaseMonoviewClassifier +from ..utils.base import base_boosting_estimators +from ..utils.hyper_parameter_search import CustomRandint, CustomUniform + +classifier_class_name = "IBSVMRBF" + +class IBSVMRBF(BaseMonoviewClassifier, BalancedBaggingClassifier): + + def __init__(self, random_state=None, n_estimators=10, + sampling_strategy="auto", base_estimator=SVC(kernel='rbf', gamma="scale"), + replacement=False, **kwargs): + super(IBSVMRBF, self).__init__(random_state=random_state, + base_estimator=base_estimator, + n_estimators=n_estimators, + sampling_strategy=sampling_strategy, + replacement=replacement) + + self.param_names = ["n_estimators", "sampling_strategy", + "base_estimator__C",] + self.distribs = [CustomRandint(low=1, high=50), + ["auto"],CustomUniform(loc=0, state=1),] + self.weird_strings = {} + + def fit(self, X, y): + BalancedBaggingClassifier.fit(self, X, y) + self.feature_importances_ = np.zeros(X.shape[1]) + for estim in self.estimators_: + if hasattr(estim['classifier'], 'feature_importances_'): + self.feature_importances_ += estim['classifier'].feature_importances_ + self.feature_importances_ /= np.sum(self.feature_importances_) + return self + + + diff --git a/summit/multiview_platform/monoview_classifiers/samba.py b/summit/multiview_platform/monoview_classifiers/samba.py index a2ca6a306b466340741b19bd5d229c1b778a8e35..96a290115fe1ca426ef51d3eaa6731cbba735583 100644 --- a/summit/multiview_platform/monoview_classifiers/samba.py +++ b/summit/multiview_platform/monoview_classifiers/samba.py @@ -26,7 +26,7 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier): estimator_params=tuple(), relevance=ExpRelevance(), distance=EuclidianDist(), - train_weighting=ExpTrainWeighting(), + difficulty=ExpTrainWeighting(), keep_selected_features=True, normalizer=RobustScaler(), b=2, a=0.01, @@ -50,7 +50,7 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier): estimator_params=estimator_params, relevance=relevance, distance=distance, - train_weighting=train_weighting, + difficulty=difficulty, keep_selected_features=keep_selected_features, normalizer=normalizer, forced_diversity=forced_diversity, @@ -60,8 +60,8 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier): self.param_names = ["n_estimators", "relevance", "distance", - "train_weighting", "b", "pred_train", "normalizer", - "normalize_dists", "a", "class_weight", "base_estimator"] + "difficulty", "b", "pred_train", "normalizer", + "normalize_dists", "a", "class_weight",] self.distribs = [CustomRandint(low=1, high=70), [ExpRelevance()], [EuclidianDist(), PolarDist(), ExpEuclidianDist(), Jaccard()], diff --git a/summit/multiview_platform/multiview/exec_multiview.py b/summit/multiview_platform/multiview/exec_multiview.py index c2abe337012302af17f671a2602223bca2c2ff8b..3c3217d05fa6048f690c0d0fc7271fc73324cf8b 100644 --- a/summit/multiview_platform/multiview/exec_multiview.py +++ b/summit/multiview_platform/multiview/exec_multiview.py @@ -8,6 +8,8 @@ import numpy as np from .multiview_utils import MultiviewResult, MultiviewResultAnalyzer from .. import multiview_classifiers +from .. import monoview_classifiers +from ..monoview.exec_classif_mono_view import MonoViewExp from ..utils import hyper_parameter_search from ..utils.multiclass import get_mc_estim from ..utils.organization import secure_file_path @@ -17,37 +19,26 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -class MultiViewExp: +class MultiViewExp(MonoViewExp): def __init__(self, classifier_name="decision_tree", classifier_config={"depth":3}, view_names=[], - view_indices=[0], nb_class=2, hps_kwargs={}, train_size=0.8, + view_indices=[0], nb_class=2, hps_kwargs={}, train_size=0.8, labels_dictionary={}, database_name="", hps_type="Random", nb_cores=1, metrics={}, equivalent_draws=False): - self.classifier_name = classifier_name - self.classifier_config=classifier_config - self.view_names=view_names - self.view_indices=view_indices - self.nb_class=nb_class - self.hps_kwargs=hps_kwargs - self.train_size=train_size - self.labels_dictionary=labels_dictionary - self.directory=None - self.database_name=database_name - self.k_folds=None - self.split=None - self.hps_type = hps_type - self.nb_cores=nb_cores - self.metrics=metrics - self.equivalent_draws=equivalent_draws - - def add_bootstrap_info(self, directory="", k_folds=[], splits=[], - random_state=42): - self.directory = directory - self.k_folds=k_folds - self.splits=splits - self.random_state = random_state + MonoViewExp.__init__(self, classifier_name=classifier_name, + classifier_config=classifier_config, + nb_class=nb_class, + hps_kwargs=hps_kwargs, + train_size=train_size, + labels_dictionary=labels_dictionary, + database_name=database_name, + hps_type=hps_type, + nb_cores=nb_cores, metrics=metrics) + self.view_names = view_names + self.view_indices = view_indices + self.equivalent_draws = equivalent_draws def init_constants(self, dataset_var ): """ @@ -146,6 +137,18 @@ class MultiViewExp: images_analysis[image_name].savefig( self.output_file_name + image_name + '.png', transparent=True) + def hdf5_to_monoview(self, dataset): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx) + for index, view_idx + in enumerate(self.view_indices)], axis=1) + self.feature_ids = [] + for view_idx in self.view_indices: + view_name = dataset.view_names[view_idx] + self.feature_ids += [view_name+"-"+feat_id for feat_id in dataset.feature_ids[view_idx]] + return monoview_data + def exec(self, dataset_var, ): """Used to execute multiview classification and result analysis @@ -204,59 +207,73 @@ class MultiViewExp: logging.info("Done:\t Getting train/test split") logging.info("Start:\t Getting classifiers modules") - classifier_module = getattr(multiview_classifiers, self.classifier_name) - classifier_name = classifier_module.classifier_class_name - logging.info("Done:\t Getting classifiers modules") - logging.info("Start:\t Optimizing hyperparameters") - hps_beg = time.monotonic() - - if self.hps_type != "None": - hps_method_class = getattr(hyper_parameter_search, self.hps_type) - estimator = getattr(classifier_module, classifier_name)( - random_state=self.random_state, - **self.classifier_config) - estimator = get_mc_estim(estimator, self.random_state, - multiview=True, - y=dataset_var.get_labels()[ - learning_indices]) - hps = hps_method_class(estimator, scoring=self.metrics, cv=self.k_folds, - random_state=self.random_state, - framework="multiview", - n_jobs=self.nb_cores, - learning_indices=learning_indices, - view_indices=self.view_indices, - **self.hps_kwargs) - hps.fit(dataset_var, dataset_var.get_labels(), ) - classifier_config = hps.get_best_params() - hps.gen_report(self.output_file_name) - hps_duration = time.monotonic() - hps_beg - self.classifier = get_mc_estim( - getattr(classifier_module, classifier_name)( - random_state=self.random_state, - **self.classifier_config), - self.random_state, multiview=True, - y=dataset_var.get_labels()) - logging.info("Done:\t Optimizing hyperparameters") - logging.info("Start:\t Fitting classifier") - fit_beg = time.monotonic() - - self.classifier.fit(dataset_var, dataset_var.get_labels(), - train_indices=learning_indices, - view_indices=self.view_indices) - - fit_duration = time.monotonic() - fit_beg - logging.info("Done:\t Fitting classifier") - - logging.info("Start:\t Predicting") - train_pred = self.classifier.predict(dataset_var, - sample_indices=learning_indices, - view_indices=self.view_indices) - pred_beg = time.monotonic() - test_pred = self.classifier.predict(dataset_var, - sample_indices=validation_indices, - view_indices=self.view_indices) - pred_duration = time.monotonic() - pred_beg + logging.info("Done:\t Getting classifiers modules") + if self.classifier_name.startswith("early_fusion_"): + mono_clf_name = self.classifier_name.split("early_fusion_")[1] + classifier_module = getattr(monoview_classifiers, mono_clf_name) + classifier_class_name = classifier_module.classifier_class_name + X = self.hdf5_to_monoview(dataset_var) + y = dataset_var.get_labels() + train_pred, test_pred, X_train, y_train, y_test, \ + hps_duration, fit_duration, pred_duration = self.hp_train_test(X, y, + classifier_class_name, + classifier_module) + self.classifier.short_name="EF_"+mono_clf_name + else: + self.feature_ids = dataset_var.feature_ids + classifier_module = getattr(multiview_classifiers, + self.classifier_name) + classifier_class_name = classifier_module.classifier_class_name + logging.info("Start:\t Optimizing hyperparameters") + hps_beg = time.monotonic() + + if self.hps_type != "None": + hps_method_class = getattr(hyper_parameter_search, self.hps_type) + estimator = getattr(classifier_module, classifier_class_name)( + random_state=self.random_state, + **self.classifier_config) + estimator = get_mc_estim(estimator, self.random_state, + multiview=True, + y=dataset_var.get_labels()[ + learning_indices]) + hps = hps_method_class(estimator, scoring=self.metrics, cv=self.k_folds, + random_state=self.random_state, + framework="multiview", + n_jobs=self.nb_cores, + learning_indices=learning_indices, + view_indices=self.view_indices, + **self.hps_kwargs) + hps.fit(dataset_var, dataset_var.get_labels(), ) + classifier_config = hps.get_best_params() + hps.gen_report(self.output_file_name) + hps_duration = time.monotonic() - hps_beg + self.classifier = get_mc_estim( + getattr(classifier_module, classifier_class_name)( + random_state=self.random_state, + **self.classifier_config), + self.random_state, multiview=True, + y=dataset_var.get_labels()) + logging.info("Done:\t Optimizing hyperparameters") + logging.info("Start:\t Fitting classifier") + fit_beg = time.monotonic() + + self.classifier.fit(dataset_var, dataset_var.get_labels(), + train_indices=learning_indices, + view_indices=self.view_indices) + + fit_duration = time.monotonic() - fit_beg + logging.info("Done:\t Fitting classifier") + + logging.info("Start:\t Predicting") + train_pred = self.classifier.predict(dataset_var, + sample_indices=learning_indices, + view_indices=self.view_indices) + pred_beg = time.monotonic() + test_pred = self.classifier.predict(dataset_var, + sample_indices=validation_indices, + view_indices=self.view_indices) + pred_duration = time.monotonic() - pred_beg full_pred = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100 full_pred[learning_indices] = train_pred full_pred[validation_indices] = test_pred @@ -288,7 +305,7 @@ class MultiViewExp: database_name=dataset_var.get_name(), nb_cores=self.nb_cores, duration=whole_duration, - feature_ids=dataset_var.feature_ids) + feature_ids=self.feature_ids) string_analysis, images_analysis, metrics_scores, class_metrics_scores, \ confusion_matrix = result_analyzer.analyze() logging.info("Done:\t Result Analysis for " + self.classifier_name) diff --git a/summit/multiview_platform/multiview/multiview_utils.py b/summit/multiview_platform/multiview/multiview_utils.py index 19fb7ee87a4c2e90fdd3b7be23b33486cc022aa8..3c2ca682ebba9159cd50d80135328e9582f3ad6d 100644 --- a/summit/multiview_platform/multiview/multiview_utils.py +++ b/summit/multiview_platform/multiview/multiview_utils.py @@ -213,3 +213,4 @@ class MultiviewResultAnalyzer(ResultAnalyser): def get_view_specific_info(self): return "\t- Views : " + ', '.join(self.view_names) + "\n" + diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py index 828b7155f7d55cda2f24dc81c2377345699752bf..b007a6d4cdebe786aebfbbcf9786a52c6aa1a61d 100644 --- a/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_lasso.py @@ -8,7 +8,8 @@ class EarlyFusionLasso(BaseEarlyFusion): def __init__(self, random_state=None, alpha=1.0, max_iter=10, warm_start=False, **kwargs): - BaseEarlyFusion.__init__(self, random_state=None, alpha=alpha, + BaseEarlyFusion.__init__(self, random_state=random_state, + monoview_classifier="lasso", alpha=alpha, max_iter=max_iter, warm_start=warm_start, **kwargs) self.param_names = ["max_iter", "alpha", "random_state"] diff --git a/summit/multiview_platform/utils/base.py b/summit/multiview_platform/utils/base.py index 754b00f7415d3596cf24974e6b3843bf5dd80bad..c92a258defba09f1ffabc8af7c46665fb5094d69 100644 --- a/summit/multiview_platform/utils/base.py +++ b/summit/multiview_platform/utils/base.py @@ -163,6 +163,8 @@ def get_metric(metrics_dict): princ_metric_name = metric_name[:-1] princ_metric_kwargs = metric_kwargs metric_module = getattr(metrics, princ_metric_name) + if princ_metric_kwargs is None: + princ_metric_kwargs = {} return metric_module, princ_metric_kwargs