From 074f3dae0b970c2bbfa9ca093930786d6886f8d8 Mon Sep 17 00:00:00 2001 From: Luc Giffon <luc.giffon@lis-lab.fr> Date: Tue, 30 Oct 2018 09:23:52 +0100 Subject: [PATCH] fix problem normalization in Dataset class + Test --- skluc/main/data/mldatasets/Dataset.py | 19 +++++++------------ .../test_data/test_mldatasets/TestDataset.py | 7 ++++++- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/skluc/main/data/mldatasets/Dataset.py b/skluc/main/data/mldatasets/Dataset.py index f211bb7..325f577 100644 --- a/skluc/main/data/mldatasets/Dataset.py +++ b/skluc/main/data/mldatasets/Dataset.py @@ -34,14 +34,6 @@ class Dataset(object): self.permuted_index_validation = None self.validation_size = validation_size - @property - def min(self): - return np.min(self.train.data) - - @property - def max(self): - return np.max(self.train.data) - def reduce_data_size(self, new_size): logger.info("Reducing datasize of dataset {} to .".format(self.s_name, new_size)) kept_indices = self.get_uniform_class_rand_indices_train(new_size) @@ -220,16 +212,19 @@ class Dataset(object): :return: """ logger.info("Apply normalization to data from dataset {}.".format(self.s_name)) + _min = np.min(self.train.data) + _max = np.max(self.train.data) + logger.debug(f"Minimum value of train set is {_min}; max is {_max}") + for kw in self.data_groups_private: + logger.debug("Apply normalization to {} data of {} dataset.".format(kw, self.s_name)) datlab = getattr(self, kw) if len(datlab.labels) == 0: continue data = datlab.data - _min = self.min - _max = self.max - logger.debug(f"Minimum value of train set is {_min}; max is {_max}") + logger.debug(f"Minimum value of {kw} set before normalization is {np.min(data)}; max is {np.max(data)}") data = (data - _min) / (_max - _min) - logger.debug("Apply normalization to {} data of {} dataset.".format(kw, self.s_name)) + logger.debug(f"Minimum value of {kw} set after normalization is {np.min(data)}; max is {np.max(data)}") setattr(self, kw, LabeledData(data, datlab.labels)) def data_astype(self, _type): diff --git a/skluc/test/test_data/test_mldatasets/TestDataset.py b/skluc/test/test_data/test_mldatasets/TestDataset.py index a9c9e2e..2d97f22 100644 --- a/skluc/test/test_data/test_mldatasets/TestDataset.py +++ b/skluc/test/test_data/test_mldatasets/TestDataset.py @@ -2,6 +2,7 @@ import unittest import numpy as np +from skluc.main.data.mldatasets import Cifar100FineDataset, Cifar10Dataset, MnistDataset, SVHNDataset from skluc.main.data.mldatasets.Dataset import Dataset from skluc.main.utils import LabeledData @@ -22,7 +23,11 @@ class FooDataset(Dataset): class TestDataset(unittest.TestCase): def setUp(self): - self.dataset_classes = [FooDataset] + self.dataset_classes = [FooDataset, + Cifar100FineDataset, + Cifar10Dataset, + MnistDataset, + SVHNDataset] def test_min_max(self): for d_class in self.dataset_classes: -- GitLab