diff --git a/skluc/main/data/mldatasets/Dataset.py b/skluc/main/data/mldatasets/Dataset.py index f211bb74a4e01b2122e017d0470a92a0d3fc0dd5..325f5775e029d2ec76865db910fabb5b95928af2 100644 --- a/skluc/main/data/mldatasets/Dataset.py +++ b/skluc/main/data/mldatasets/Dataset.py @@ -34,14 +34,6 @@ class Dataset(object): self.permuted_index_validation = None self.validation_size = validation_size - @property - def min(self): - return np.min(self.train.data) - - @property - def max(self): - return np.max(self.train.data) - def reduce_data_size(self, new_size): logger.info("Reducing datasize of dataset {} to .".format(self.s_name, new_size)) kept_indices = self.get_uniform_class_rand_indices_train(new_size) @@ -220,16 +212,19 @@ class Dataset(object): :return: """ logger.info("Apply normalization to data from dataset {}.".format(self.s_name)) + _min = np.min(self.train.data) + _max = np.max(self.train.data) + logger.debug(f"Minimum value of train set is {_min}; max is {_max}") + for kw in self.data_groups_private: + logger.debug("Apply normalization to {} data of {} dataset.".format(kw, self.s_name)) datlab = getattr(self, kw) if len(datlab.labels) == 0: continue data = datlab.data - _min = self.min - _max = self.max - logger.debug(f"Minimum value of train set is {_min}; max is {_max}") + logger.debug(f"Minimum value of {kw} set before normalization is {np.min(data)}; max is {np.max(data)}") data = (data - _min) / (_max - _min) - logger.debug("Apply normalization to {} data of {} dataset.".format(kw, self.s_name)) + logger.debug(f"Minimum value of {kw} set after normalization is {np.min(data)}; max is {np.max(data)}") setattr(self, kw, LabeledData(data, datlab.labels)) def data_astype(self, _type): diff --git a/skluc/test/test_data/test_mldatasets/TestDataset.py b/skluc/test/test_data/test_mldatasets/TestDataset.py index a9c9e2e4a8e0a74dc318da02d9587ea0714f345d..2d97f22024ac157351538eab52d60d81c7b0351b 100644 --- a/skluc/test/test_data/test_mldatasets/TestDataset.py +++ b/skluc/test/test_data/test_mldatasets/TestDataset.py @@ -2,6 +2,7 @@ import unittest import numpy as np +from skluc.main.data.mldatasets import Cifar100FineDataset, Cifar10Dataset, MnistDataset, SVHNDataset from skluc.main.data.mldatasets.Dataset import Dataset from skluc.main.utils import LabeledData @@ -22,7 +23,11 @@ class FooDataset(Dataset): class TestDataset(unittest.TestCase): def setUp(self): - self.dataset_classes = [FooDataset] + self.dataset_classes = [FooDataset, + Cifar100FineDataset, + Cifar10Dataset, + MnistDataset, + SVHNDataset] def test_min_max(self): for d_class in self.dataset_classes: