diff --git a/summit/multiview_platform/multiview/exec_multiview.py b/summit/multiview_platform/multiview/exec_multiview.py index 254b11b3a676e0a82e3c7ef6abb6a4dc0a181753..d2fbf8efb6d3e3f813823bb3126613688ec1e451 100644 --- a/summit/multiview_platform/multiview/exec_multiview.py +++ b/summit/multiview_platform/multiview/exec_multiview.py @@ -5,6 +5,7 @@ import time import h5py import numpy as np +from matplotlib.style.core import available from .multiview_utils import MultiviewResult, MultiviewResultAnalyzer from .. import multiview_classifiers @@ -256,7 +257,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.info("Info:\t Extraction duration " + str(extraction_time) + "s") logging.info("Start:\t Getting train/test split") - learning_indices, validation_indices = classification_indices + available_indices, validation_indices = classification_indices logging.info("Done:\t Getting train/test split") logging.info("Start:\t Getting classifiers modules") @@ -275,11 +276,11 @@ def exec_multiview(directory, dataset_var, name, classification_indices, **classifier_config) estimator = get_mc_estim(estimator, random_state, multiview=True, - y=dataset_var.get_labels()[learning_indices]) + y=dataset_var.get_labels()[available_indices]) hps = hps_method_class(estimator, scoring=metrics, cv=k_folds, random_state=random_state, framework="multiview", n_jobs=nb_cores, - learning_indices=learning_indices, + available_indices=available_indices, view_indices=views_indices, **hps_kwargs) hps.fit(dataset_var, dataset_var.get_labels(), ) classifier_config = hps.get_best_params() @@ -294,7 +295,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.info("Start:\t Fitting classifier") fit_beg = time.monotonic() classifier.fit(dataset_var, dataset_var.get_labels(), - train_indices=learning_indices, + train_indices=available_indices, view_indices=views_indices) print("pou") fit_duration = time.monotonic() - fit_beg @@ -302,7 +303,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.info("Start:\t Predicting") train_pred = classifier.predict(dataset_var, - sample_indices=learning_indices, + sample_indices=available_indices, view_indices=views_indices) pred_beg = time.monotonic() test_pred = classifier.predict(dataset_var, @@ -310,7 +311,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, view_indices=views_indices) pred_duration = time.monotonic() - pred_beg full_pred = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100 - full_pred[learning_indices] = train_pred + full_pred[available_indices] = train_pred full_pred[validation_indices] = test_pred logging.info("Done:\t Pertidcting") diff --git a/summit/multiview_platform/utils/dataset.py b/summit/multiview_platform/utils/dataset.py index c474f8ad6c942cae74c4dd1eeae813ac641e7cc8..beaf5829d86231854630d0f935968b7e450e343a 100644 --- a/summit/multiview_platform/utils/dataset.py +++ b/summit/multiview_platform/utils/dataset.py @@ -215,11 +215,11 @@ class RAMDataset(Dataset): def get_label_names(self, sample_indices=None, decode=True): selected_labels = self.get_labels(sample_indices) if decode: - return [label_name.encode("utf-8") + return [label_name.encode('utf-8') for label, label_name in enumerate(self.labels_names) if label in selected_labels] else: - return [label_name.encode("utf-8") + return [label_name for label, label_name in enumerate(self.labels_names) if label in selected_labels] @@ -484,7 +484,7 @@ class HDF5Dataset(Dataset): "name"]] = view_index return view_dict - def get_label_names(self, decode=True, sample_indices=None): + def get_label_names(self, decode=False, sample_indices=None): """ Used to get the list of the label names for the given set of samples @@ -502,8 +502,10 @@ class HDF5Dataset(Dataset): seleted labels' names """ selected_labels = self.get_labels(sample_indices) + print("selected labels ", selected_labels) + print("self.dataset ", self.dataset["Labels"].attrs["names"]) if decode: - return [label_name + return [label_name.decode("utf-8") for label, label_name in enumerate(self.dataset["Labels"].attrs["names"]) if label in selected_labels] diff --git a/summit/multiview_platform/utils/hyper_parameter_search.py b/summit/multiview_platform/utils/hyper_parameter_search.py index c5cf33f1e12fd9f9250c2dc52b828d61024ff561..49ed36bcc2da21e77cc61126504e5e034859b26d 100644 --- a/summit/multiview_platform/utils/hyper_parameter_search.py +++ b/summit/multiview_platform/utils/hyper_parameter_search.py @@ -55,6 +55,7 @@ class HPSearch: return metric def fit_multiview(self, X, y, groups=None, **fit_params): + print(self.available_indices) n_splits = self.cv.get_n_splits(self.available_indices, y[self.available_indices]) folds = list( @@ -153,7 +154,7 @@ class HPSearch: class Random(RandomizedSearchCV, HPSearch): def __init__(self, estimator, param_distributions=None, n_iter=10, - refit=False, n_jobs=1, scoring=None, cv=None, learning_indices=None, + refit=False, n_jobs=1, scoring=None, cv=None, available_indices=None, random_state=None, view_indices=None, framework="monoview", equivalent_draws=True, track_tracebacks=True): @@ -166,7 +167,7 @@ class Random(RandomizedSearchCV, HPSearch): refit=refit, n_jobs=n_jobs, scoring=scoring, cv=cv, random_state=random_state) self.framework = framework - self.available_indices = learning_indices + self.available_indices = available_indices self.view_indices = view_indices self.equivalent_draws = equivalent_draws self.track_tracebacks = track_tracebacks @@ -211,14 +212,14 @@ class Grid(GridSearchCV, HPSearch): def __init__(self, estimator, param_grid={}, refit=False, n_jobs=1, scoring=None, cv=None, - learning_indices=None, view_indices=None, framework="monoview", + available_indices=None, view_indices=None, framework="monoview", random_state=None, track_tracebacks=True): scoring = HPSearch.get_scoring(self, scoring) GridSearchCV.__init__(self, estimator, param_grid, scoring=scoring, n_jobs=n_jobs, refit=refit, cv=cv) self.framework = framework - self.available_indices = learning_indices + self.available_indices = available_indices self.view_indices = view_indices self.track_tracebacks = track_tracebacks self.tracebacks = [] diff --git a/summit/multiview_platform/utils/multiclass.py b/summit/multiview_platform/utils/multiclass.py index 98c6c1d6bfcf9e3c8fcbeec5eb72f093a873f73d..2d7189e242b1c9d98d8619acb415bb03220546f7 100644 --- a/summit/multiview_platform/utils/multiclass.py +++ b/summit/multiview_platform/utils/multiclass.py @@ -217,7 +217,7 @@ def _multiview_fit_ovo_binary(estimator, X, y, i, j, train_indices, view_indices): cond = np.logical_or(y == i, y == j) # y = y[cond] - y_binary = np.empty(y.shape, np.int) + y_binary = np.empty(y.shape, np.int_) y_binary[y == i] = 0 y_binary[y == j] = 1 indcond = np.arange(X.get_nb_samples())[cond] @@ -230,6 +230,12 @@ def _multiview_fit_ovo_binary(estimator, X, y, i, j, train_indices, class MultiviewOVOWrapper(MultiviewWrapper, OneVsOneClassifier): + def get_tags(self): + """Get tags of estimateur see sklearn > 1.6.0 _pairwise attribut removed """ + if hasattr(self.estimator, "get_tags"): + return self.estimator.get_tags() + return {"pairwise": False} + def fit(self, X, y, train_indices=None, view_indices=None): """Fit underlying estimators. @@ -265,7 +271,7 @@ class MultiviewOVOWrapper(MultiviewWrapper, OneVsOneClassifier): self.estimators_ = estimators_indices[0] self.pairwise_indices_ = ( - estimators_indices[1] if self._pairwise else None) + estimators_indices[1] if self.get_tags()["pairwise"] else None) return self diff --git a/summit/tests/test_multi_view/test_multiview_utils.py b/summit/tests/test_multi_view/test_multiview_utils.py index 99d725253c7a0341719913856259c00d65fba3a9..7ead1eb6bb4a174b75489f635e25a31b180a02c4 100644 --- a/summit/tests/test_multi_view/test_multiview_utils.py +++ b/summit/tests/test_multi_view/test_multiview_utils.py @@ -76,9 +76,11 @@ class TestFunctions(unittest.TestCase): self.assertEqual(avail, ['adaboost', 'decision_tree', 'gradient_boosting', + 'imbalance_bagging', 'knn', 'lasso', 'random_forest', + 'random_scm', 'scm', 'sgd', 'svm_linear', @@ -89,8 +91,10 @@ class TestFunctions(unittest.TestCase): self.assertEqual(avail, ['adaboost', 'decision_tree', 'gradient_boosting', + 'imbalance_bagging', 'knn', 'random_forest', + 'random_scm', 'scm', 'svm_linear', 'svm_poly', diff --git a/summit/tests/test_utils/test_GetMultiviewDB.py b/summit/tests/test_utils/test_GetMultiviewDB.py index 13cab526f46e079f89c9ca540a2d3d10e2373058..ce4392f6662075bfdc6974f5f5cbaad13721305a 100644 --- a/summit/tests/test_utils/test_GetMultiviewDB.py +++ b/summit/tests/test_utils/test_GetMultiviewDB.py @@ -53,6 +53,8 @@ class Test_get_classic_db_hdf5(unittest.TestCase): self.assertEqual(dataset.nb_view, 1) self.assertEqual(labels_dictionary, {0: "0", 1: "2", 2: "1"}) + print(labels_dictionary) + print(dataset.get_labels()) self.assertEqual(dataset.get_nb_samples(), 5) self.assertEqual(len(np.unique(dataset.get_labels())), 3) diff --git a/summit/tests/test_utils/test_dataset.py b/summit/tests/test_utils/test_dataset.py index 67750428c00bbe80f513c00fb35311c314900a3f..8536f90084493a8f8f7b7ec42d62f6010f1d2a61 100644 --- a/summit/tests/test_utils/test_dataset.py +++ b/summit/tests/test_utils/test_dataset.py @@ -162,7 +162,7 @@ class Test_Dataset(unittest.TestCase): decoded_label_names = dataset_object.get_label_names() restricted_label_names = dataset_object.get_label_names( sample_indices=[3, 4]) - self.assertEqual(raw_label_names, [b'0', b'1', b'2']) + self.assertEqual(raw_label_names, ['0', '1', '2']) self.assertEqual(decoded_label_names, ['0', '1', '2']) self.assertEqual(restricted_label_names, ['2']) @@ -319,7 +319,7 @@ class TestRAMDataset(unittest.TestCase): self.assertEqual(shape, ['0'.encode('utf-8'), '1'.encode('utf-8'), '2'.encode('utf-8')]) - shape = dataset_object.get_label_names(decode=False) + shape = dataset_object.get_label_names(decode=True) self.assertEqual(shape, ['0'.encode('utf-8'), '1'.encode('utf-8'), '2'.encode('utf-8')]) diff --git a/summit/tests/test_utils/test_hyper_parameter_search.py b/summit/tests/test_utils/test_hyper_parameter_search.py index 8a5c9ec3d6fdca8bf6e50e5a408ae7ebf484a52e..65381b95bab105f09697abb73ec6975f8b948de7 100644 --- a/summit/tests/test_utils/test_hyper_parameter_search.py +++ b/summit/tests/test_utils/test_hyper_parameter_search.py @@ -66,7 +66,7 @@ class Test_Random(unittest.TestCase): cls.scoring = make_scorer(accuracy_score, ) cls.cv = StratifiedKFold(n_splits=n_splits, ) cls.random_state = np.random.RandomState(42) - # cls.learning_indices = np.array([0, 1, 2, 3, 4, ]) + cls.available_indices = np.array([0, 1, 2, 3, 4, ]) cls.view_indices = None cls.framework = "monoview" cls.equivalent_draws = False @@ -78,7 +78,8 @@ class Test_Random(unittest.TestCase): self.estimator, self.param_distributions, n_iter=self.n_iter, refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, random_state=self.random_state, - view_indices=self.view_indices, + available_indices=self.available_indices, + view_indices=self.view_indices, framework=self.framework, equivalent_draws=self.equivalent_draws ) @@ -89,6 +90,7 @@ class Test_Random(unittest.TestCase): refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, random_state=self.random_state, + available_indices=self.available_indices, view_indices=self.view_indices, framework=self.framework, equivalent_draws=self.equivalent_draws @@ -105,6 +107,7 @@ class Test_Random(unittest.TestCase): refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, random_state=self.random_state, + available_indices=self.available_indices, view_indices=self.view_indices, framework="multiview", equivalent_draws=self.equivalent_draws @@ -120,6 +123,7 @@ class Test_Random(unittest.TestCase): cv=self.cv, random_state=self.random_state, view_indices=self.view_indices, + available_indices=self.available_indices, framework="multiview", equivalent_draws=True ) @@ -134,6 +138,7 @@ class Test_Random(unittest.TestCase): refit=self.refit, n_jobs=self.n_jobs, scoring=self.scoring, cv=self.cv, random_state=self.random_state, + available_indices=self.available_indices, view_indices=self.view_indices, framework="multiview", equivalent_draws=False @@ -200,7 +205,7 @@ class Test_Grid(unittest.TestCase): # "splitter": "best"} # cls.k_folds = StratifiedKFold(n_splits=3, random_state=cls.random_state, # shuffle=True) -# cls.learning_indices = np.array([1,2,3,4, 5,6,7,8,9]) +# cls.available_indices= np.array([1,2,3,4, 5,6,7,8,9]) # cls.dataset = HDF5Dataset(hdf5_file=cls.dataset_file) # # @classmethod