diff --git a/skluc/data/mldatasets/Dataset.py b/skluc/data/mldatasets/Dataset.py index 5e7f7ca396219daf603da90833fef594a6c84387..5604a753f148dd010b0b7849feeefe591846d77c 100644 --- a/skluc/data/mldatasets/Dataset.py +++ b/skluc/data/mldatasets/Dataset.py @@ -1,7 +1,6 @@ import os import numpy as np -from sklearn.cross_validation import train_test_split from sklearn.model_selection import StratifiedShuffleSplit from sklearn.preprocessing import LabelBinarizer diff --git a/skluc/data/mldatasets/ImageDataset.py b/skluc/data/mldatasets/ImageDataset.py index a8180c095ca12cac8c987bd0bc743811d0bd6562..b8f6ee6d0a3f10cee9ba659785ec4b4514d5809d 100644 --- a/skluc/data/mldatasets/ImageDataset.py +++ b/skluc/data/mldatasets/ImageDataset.py @@ -46,7 +46,7 @@ class ImageDataset(Dataset): # one need to apply it to the data # then to save the transformation logger.debug("Files {} don't exist or model md5 checksum doesn't match. Need to produce them".format(transform_filepaths)) - logger.info("Apply convolution of {} to dataset {}".format(transformer_name, self.s_name)) + logger.info("Apply transformation of {} to dataset {}".format(transformer_name, self.s_name)) for kw in self.data_groups_private: data, labels = getattr(self, kw) transformed_data, transformed_labels = transformer.transform(data, labels) diff --git a/skluc/data/transformation/ImageTransformer/RescaleTransformer.py b/skluc/data/transformation/RescaleTransformer.py similarity index 70% rename from skluc/data/transformation/ImageTransformer/RescaleTransformer.py rename to skluc/data/transformation/RescaleTransformer.py index f5f3ac64521e86610854caf562c97af853166ea0..f4f47909f01777e0f95b7fe3c8a8f3fb41dac468 100644 --- a/skluc/data/transformation/ImageTransformer/RescaleTransformer.py +++ b/skluc/data/transformation/RescaleTransformer.py @@ -1,4 +1,3 @@ -import os import tensorflow as tf import numpy as np @@ -7,13 +6,13 @@ from skluc.utils import logger, Singleton class RescaleTransformer(Transformer, metaclass=Singleton): - def __init__(self, scaling_factor): - self.rescale_factor = scaling_factor - self.__name = os.path.join("resize", "{}".format(str(scaling_factor).replace(".", "-"))) + def __init__(self, data_name, scaling_factor): + transformation_name = self.__class__.__name__ + "_" + "{}".format(str(scaling_factor).replace(".", "-")) + + super().__init__(data_name=data_name, + transformation_name=transformation_name) - @property - def name(self): - return self.__name + self.rescale_factor = scaling_factor def transform(self, data, labels): if len(data.shape) != 4: @@ -25,10 +24,12 @@ class RescaleTransformer(Transformer, metaclass=Singleton): sess = tf.InteractiveSession() images_mat = data - output_shape = np.multiply(images_mat.shape[1:-1], (self.rescale_factor, self.rescale_factor)) + float_output_shape = np.multiply(images_mat.shape[1:-1], (self.rescale_factor, self.rescale_factor)) + output_shape = float_output_shape.astype(np.int) labels = labels logger.debug("Expected output shape: {}".format((data.shape[0], *output_shape, data.shape[-1]))) new_images = tf.image.resize_images(images_mat, output_shape).eval() logger.debug("Shape of data after rescaling: {}".format(new_images.shape)) sess.close() + tf.reset_default_graph() return np.array(new_images), labels \ No newline at end of file diff --git a/skluc/data/transformation/ImageTransformer/ResizeTransformer.py b/skluc/data/transformation/ResizeTransformer.py similarity index 67% rename from skluc/data/transformation/ImageTransformer/ResizeTransformer.py rename to skluc/data/transformation/ResizeTransformer.py index 7be096fdbe3eefd26dedbde9fad1ba76fe19be77..02b9216231c6dfa48fbc4a9c2107139dc82a19d4 100644 --- a/skluc/data/transformation/ImageTransformer/ResizeTransformer.py +++ b/skluc/data/transformation/ResizeTransformer.py @@ -1,4 +1,3 @@ -import os import tensorflow as tf import numpy as np @@ -7,15 +6,15 @@ from skluc.utils import logger, Singleton class ResizeTransformer(Transformer, metaclass=Singleton): - def __init__(self, output_shape): + def __init__(self, data_name, output_shape): if len(output_shape) != 2: raise AssertionError("Output shape should be 2D and it is {}D: {}".format(len(output_shape), output_shape)) - self.output_shape = output_shape - self.__name = os.path.join("resize", "{}x{}".format(output_shape[0], output_shape[1])) - @property - def name(self): - return self.__name + transformation_name = self.__class__.__name__ + "_" + "{}x{}".format(output_shape[0], output_shape[1]) + + super().__init__(data_name=data_name, + transformation_name=transformation_name) + self.output_shape = output_shape def transform(self, data, labels): if len(data.shape) != 4: @@ -29,10 +28,10 @@ class ResizeTransformer(Transformer, metaclass=Singleton): sess = tf.InteractiveSession() images_mat = data labels = labels - lst_new_image = [] - for image_mat in images_mat: - new_image = tf.image.resize_images(image_mat, self.output_shape).eval() - lst_new_image.append(new_image) - logger.debug("Shape data after resize: {}".format(np.array(lst_new_image).shape)) + + new_images = tf.image.resize_images(images_mat, self.output_shape).eval() + + logger.debug("Shape data after resize: {}".format(new_images.shape)) sess.close() - return np.array(lst_new_image), labels + tf.reset_default_graph() + return np.array(new_images), labels diff --git a/skluc/test/test_transformation/TestRescaleTransformer.py b/skluc/test/test_transformation/TestRescaleTransformer.py new file mode 100644 index 0000000000000000000000000000000000000000..8b01f4825c8c5fbaf0cba84ee250fdb2fd06a5c8 --- /dev/null +++ b/skluc/test/test_transformation/TestRescaleTransformer.py @@ -0,0 +1,47 @@ +import unittest + +from skluc.data.mldatasets import MnistDataset, Cifar10Dataset, Cifar100FineDataset, SVHNDataset +from skluc.data.transformation.RescaleTransformer import RescaleTransformer +from skluc.utils import logger + + +class TestResizeTransformer(unittest.TestCase): + def setUp(self): + self.dict_datasets = { + "mnist": MnistDataset, + "cifar10": Cifar10Dataset, + "cifar100": Cifar100FineDataset, + "svhn": SVHNDataset + } + self.lst_scales = [ + 0.5, + 0.7, + 1, + 2 + ] + + def test_transform(self): + valsize = 10000 + for data_name in self.dict_datasets: + logger.info("Testing dataset {}".format(data_name)) + for scale in self.lst_scales: + logger.info("Testing size {}".format(str(scale))) + dataset = self.dict_datasets[data_name] + d = dataset(validation_size=valsize) + d.load() + d.flatten() + d.to_image() + trans = RescaleTransformer(data_name=data_name, scaling_factor=scale) + d.apply_transformer(transformer=trans) + del trans + + def test_init(self): + for data_name in self.dict_datasets: + for scale in self.lst_scales: + logger.info("Testing size {}".format(str(scale))) + trans = RescaleTransformer(data_name=data_name, scaling_factor=scale) + del trans + + +if __name__ == '__main__': + unittest.main() diff --git a/skluc/test/test_transformation/TestResizeTransformer.py b/skluc/test/test_transformation/TestResizeTransformer.py new file mode 100644 index 0000000000000000000000000000000000000000..60ea6c000217edd2c4f26deec912feecdf4c95ad --- /dev/null +++ b/skluc/test/test_transformation/TestResizeTransformer.py @@ -0,0 +1,47 @@ +import unittest + +from skluc.data.mldatasets import MnistDataset, Cifar10Dataset, Cifar100FineDataset, SVHNDataset +from skluc.data.transformation.ResizeTransformer import ResizeTransformer +from skluc.utils import logger + + +class TestResizeTransformer(unittest.TestCase): + def setUp(self): + self.dict_datasets = { + "mnist": MnistDataset, + "cifar10": Cifar10Dataset, + "cifar100": Cifar100FineDataset, + "svhn": SVHNDataset + } + self.lst_sizes = [ + (28, 32), + (32, 32), + (28, 28), + (32, 28) + ] + + def test_transform(self): + valsize = 10000 + for data_name in self.dict_datasets: + logger.info("Testing dataset {}".format(data_name)) + for size in self.lst_sizes: + logger.info("Testing size {}".format(str(size))) + dataset = self.dict_datasets[data_name] + d = dataset(validation_size=valsize) + d.load() + d.flatten() + d.to_image() + trans = ResizeTransformer(data_name=data_name, output_shape=size) + d.apply_transformer(transformer=trans) + del trans + + def test_init(self): + for data_name in self.dict_datasets: + for size in self.lst_sizes: + logger.info("Testing size {}".format(str(size))) + trans = ResizeTransformer(data_name=data_name, output_shape=size) + del trans + + +if __name__ == '__main__': + unittest.main()