diff --git a/skluc/data/mldatasets/Cifar10Dataset.py b/skluc/data/mldatasets/Cifar10Dataset.py index 5e2292b58a52ec5b12b7bf82ae5a7c0f98f5b944..1f5468bf76b2b3d0581883fffc557dcc1570277b 100644 --- a/skluc/data/mldatasets/Cifar10Dataset.py +++ b/skluc/data/mldatasets/Cifar10Dataset.py @@ -7,6 +7,7 @@ import numpy as np from skluc.utils import LabeledData from skluc.data.mldatasets.ImageDataset import ImageDataset from skluc.utils import logger, check_files +import matplotlib.pyplot as plt class Cifar10Dataset(ImageDataset): @@ -87,4 +88,18 @@ class Cifar10Dataset(ImageDataset): self._test = LabeledData(*self.get_cifar10_data('test')) self.meta = self.get_meta() - self._check_validation_size(self._train[0].shape[0]) \ No newline at end of file + self._check_validation_size(self._train[0].shape[0]) + + +if __name__ == "__main__": + import time + d = Cifar10Dataset(validation_size=10000) + d.load() + d.to_image() + d.normalize() + for i, im in enumerate(d.train.data): + print(im.shape) + plt.imshow(im) + plt.show() + print(d.train.labels[i]) + time.sleep(1) \ No newline at end of file diff --git a/skluc/data/mldatasets/Dataset.py b/skluc/data/mldatasets/Dataset.py index f30550cec2e76a3e78a12c8b09258ea5abb1ed56..5e7f7ca396219daf603da90833fef594a6c84387 100644 --- a/skluc/data/mldatasets/Dataset.py +++ b/skluc/data/mldatasets/Dataset.py @@ -2,6 +2,7 @@ import os import numpy as np from sklearn.cross_validation import train_test_split +from sklearn.model_selection import StratifiedShuffleSplit from sklearn.preprocessing import LabelBinarizer from skluc.utils import LabeledData @@ -15,7 +16,6 @@ class Dataset(object): # data_groups_private will be used to refer to the attributes self._train and self._test via their names # as stringes. It is usefull when the same operations must be performed on train and test set data_groups_private = ["_train", "_test"] - # data_groups_public = ["train", "test", "validation"] def __init__(self, l_url, s_name, s_download_dir=os.path.join(os.path.expanduser("~"), "ml_datasets"), validation_size=0, seed=None): @@ -42,8 +42,8 @@ class Dataset(object): def get_uniform_class_rand_indices_train(self, size): try: - kept_indices, _ = train_test_split(np.arange(len(self.train.data)), - train_size=size, stratify=self.train.labels, random_state=self.seed) + sss = StratifiedShuffleSplit(n_splits=1, train_size=size, random_state=self.seed) + kept_indices, _ = sss.split(np.arange(len(self.train.data)), self.train.labels) except ValueError as e: logger.warning("In Dataset.get_uniform_class_rand_indices_train Handled exception: {}".format(str(e))) logger.debug("Use random indexes instead") @@ -52,8 +52,8 @@ class Dataset(object): def get_uniform_class_rand_indices_validation(self, size): try: - kept_indices, _ = train_test_split(np.arange(len(self.validation.data)), - train_size=size, stratify=self.validation.labels, random_state=self.seed) + sss = StratifiedShuffleSplit(n_splits=1, train_size=size, random_state=self.seed) + kept_indices, _ = sss.split(np.arange(len(self.train.data)), self.train.labels) except ValueError as e: logger.warning("In Dataset.get_uniform_class_rand_indices_validation Handled exception: {}".format(str(e))) logger.debug("Use random indexes instead") @@ -208,6 +208,29 @@ class Dataset(object): if self._train is None and self._test is None: raise Exception("No data loaded at the end of load method.") + def save_npz(self, npzdir_path=None): + if npzdir_path is None: + npzdir_path = os.path.join(self.s_download_dir, "npzfiles") + for kw in self.data_groups_private: + data, labels = getattr(self, kw) + print(data, labels) + dict_attr = {kw + "_data": data, kw + "_labels": labels} + filepath = os.path.join(npzdir_path, kw + ".npz") + logger.debug("Shape of {} set: {}".format(kw, data.shape)) + logger.debug("Saving {} data to {}".format(kw, filepath)) + np.savez(filepath, **dict_attr) + + def load_npz(self, npzdir_path=None): + if npzdir_path is None: + npzdir_path = os.path.join(self.s_download_dir, "npzfiles") + for kw in self.data_groups_private: + npzfile_path = os.path.join(npzdir_path, kw + ".npz") + logger.debug("Loading {}".format(npzfile_path)) + npzfile = np.load(npzfile_path) + data = npzfile[kw + "_data"] + logger.debug("Shape of {} set: {}".format(kw, data.shape)) + labels = npzfile[kw + "_labels"] + setattr(self, kw, LabeledData(data=data, labels=labels)) # --- Abstract methods diff --git a/skluc/data/mldatasets/ImageDataset.py b/skluc/data/mldatasets/ImageDataset.py index 9232eb2465b4d91f9effbde7ad8d6d4c5f0b7b47..a8180c095ca12cac8c987bd0bc743811d0bd6562 100644 --- a/skluc/data/mldatasets/ImageDataset.py +++ b/skluc/data/mldatasets/ImageDataset.py @@ -19,14 +19,14 @@ class ImageDataset(Dataset): :param transformer: Transformer object (not a class) :return: """ - logger.info("Apply transformation {} to data from dataset {}.".format(transformer.__class__.__name__, self.s_name)) # todo, cette fonction devrait marcher pour tout dataset (donc il faudrait la mettre dans la classe Dataset) - transformer_name = transformer.NAME + transformer_name = transformer.transformation_name + logger.info("Apply transformation {} to data from dataset {} using transformer {}.".format(transformer.__class__.__name__, self.s_name, transformer_name)) transform_path = os.path.join(self.s_download_dir, transformer_name) transform_filepaths = [os.path.join(transform_path, kw + ".npz") for kw in self.data_groups_private] create_directory(transform_path) - if check_files(transform_filepaths) and transformer.check_model(): + if check_files(transform_filepaths) and transformer.check(): # in the case where the transformations already exist in npz files # and the model is the actual good model # but I have no guarantee the transformation has been obtained with the stored model though... @@ -77,10 +77,8 @@ class ImageDataset(Dataset): raise Exception("Dimensionality problem") else: length_by_chanel = int(length_by_chanel) - images_mat = np.reshape(images_vec, (images_vec.shape[0], length_by_chanel, self.DEPTH), - order='F') - images = np.reshape(images_mat, (images_mat.shape[0], self.HEIGHT, self.WIDTH, - self.DEPTH), order='C') + images_mat = np.reshape(images_vec, (images_vec.shape[0], length_by_chanel, self.DEPTH), order='F') + images = np.reshape(images_mat, (images_mat.shape[0], self.HEIGHT, self.WIDTH, self.DEPTH), order='C') setattr(self, kw, LabeledData(images, labels)) def flatten(self): @@ -99,25 +97,58 @@ class ImageDataset(Dataset): data = datlab.data.reshape(datlab.data.shape[0], init_dim) setattr(self, kw, LabeledData(data=data, labels=datlab.labels)) - def rescale(self, factor): - """ - Rescale images by factor. - - :param factor: - :return: - """ - sess = tf.InteractiveSession() - for kw in self.data_groups_private: - datlab = getattr(self, kw) - images_mat = datlab.data - output_shape = np.multiply(images_mat.shape[1:-1], (factor, factor)) - labels = datlab.labels - logger.debug("Shape of {} data before rescaling: {}".format(kw, images_mat.shape)) - logger.debug("Excpected output shape for images: {}".format(output_shape)) - new_image = tf.image.resize_images(images_mat, output_shape).eval() - logger.debug("Shape of {} data after rescaling: {}".format(kw, new_image.shape)) - setattr(self, kw, LabeledData(new_image, labels)) - sess.close() + # def rescale(self, factor): + # """ + # Rescale images by factor. + # + # :param factor: + # :return: + # """ + # sess = tf.InteractiveSession() + # for kw in self.data_groups_private: + # datlab = getattr(self, kw) + # images_mat = datlab.data + # output_shape = np.multiply(images_mat.shape[1:-1], (factor, factor)) + # labels = datlab.labels + # logger.debug("Shape of {} data before rescaling: {}".format(kw, images_mat.shape)) + # logger.debug("Excpected output shape for images: {}".format(output_shape)) + # new_image = tf.image.resize_images(images_mat, output_shape).eval() + # logger.debug("Shape of {} data after rescaling: {}".format(kw, new_image.shape)) + # setattr(self, kw, LabeledData(new_image, labels)) + # sess.close() + + # def resize(self, new_shape): + # """ + # resize image but the number of channel is preserved + # + # :param new_shape: 2D shape (x, y) + # :return: + # """ + # npzdir_path = os.path.join(self.s_download_dir, "npzfiles") + # resize_npz_dir = os.path.join(npzdir_path, "resize_{}_{}".format(new_shape[0], new_shape[1])) + # lst_npzfile_paths = [os.path.join(resize_npz_dir, kw + ".npz") + # for kw in self.data_groups_private] + # create_directory(resize_npz_dir) + # if check_files(lst_npzfile_paths): + # case npz files already exist + # logger.debug("Files {} already exists".format(lst_npzfile_paths)) + # logger.info("Loading resized data from files {}".format(lst_npzfile_paths)) + # self.load_npz(resize_npz_dir) + # else: + # sess = tf.InteractiveSession() + # for kw in self.data_groups_private: + # datlab = getattr(self, kw) + # images_mat = datlab.data + # labels = datlab.labels + # logger.debug("Shape of {} data before resize: {}".format(kw, images_mat.shape)) + # logger.debug("Excpected output shape for images: {}".format(new_shape)) + # lst_new_image = [] + # for image_mat in images_mat: + # new_image = tf.image.resize_images(image_mat, new_shape).eval() + # lst_new_image.append(new_image) + # logger.debug("Shape of {} data after resize: {}".format(kw, np.array(lst_new_image).shape)) + # setattr(self, kw, LabeledData(np.array(lst_new_image), labels)) + # sess.close() def to_feature_vectors(self): """ @@ -136,3 +167,5 @@ class ImageDataset(Dataset): logger.debug("Shape of {} data after reshape: {}".format(kw, images_mat.shape)) setattr(self, kw, LabeledData(images_mat, labels)) + def is_image(self): + return len(self._train.data.shape) > 2 and len(self._test.data.shape) > 2 \ No newline at end of file diff --git a/skluc/data/transformation/Transformer.py b/skluc/data/transformation/Transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..36f8855a64d534041d8583d4d998b61f2b7766e3 --- /dev/null +++ b/skluc/data/transformation/Transformer.py @@ -0,0 +1,25 @@ +from skluc.utils import Singleton + + +class Transformer: + def transform(self, data, labels): + """ + Apply the transformer to the data and labels. + + :param data: the data to transform + :type data: np.ndarray + :param labels: the labels to transform + :type labels: np.ndarray + :return: np.ndarray, np.ndarray + """ + raise NotImplementedError + + def check(self): + return True + + @property + def name(self): + raise NotImplementedError + + # @property + # def \ No newline at end of file diff --git a/skluc/data/transformation/VGG19Transformer/VGG19Cifar10CovAbs.py b/skluc/data/transformation/VGG19Transformer/VGG19Cifar10CovAbs.py new file mode 100644 index 0000000000000000000000000000000000000000..886edf880b50f4b4aa1b6497bd68506c6033478b --- /dev/null +++ b/skluc/data/transformation/VGG19Transformer/VGG19Cifar10CovAbs.py @@ -0,0 +1,98 @@ +import os + +import numpy as np +from keras import Model +from keras.models import load_model + +from skluc.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.utils import logger, create_directory, download_data, check_file_md5, deprecated + + +# todo check those deprecated things +@deprecated +class VGG19Cifar10CovAbs(VGG19Transformer): + """ + Extend the vgg19transformer class with weights learned on CIFAR10. + The covariance matrix is then computed on the transformed data image. + """ + NAME = "vgg19_cifar10_cov" + MODEL_URL = "https://pageperso.lis-lab.fr/~luc.giffon/models/1522967518.1916964_vgg19_cifar10.h5" + MODEL_CHECKSUM = "0dbb4f02ceb1f4acb6e24831758106e5" + # todo faire une fonction qui regarde directement le checksum sur le site ? + + def __init__(self): + super().__init__(name=self.NAME) + + @staticmethod + def _compute_cov_matrix(data): + """ + + :param data: (b x W x H x D) + :type data: np.ndarray + :return: + """ + data = data.reshape((data.shape[0], data.shape[1] * data.shape[2], data.shape[3])) + mean = np.mean(data, axis=1) + mean = mean.reshape((mean.shape[0], 1, mean.shape[-1])) + data_centered = data - mean + + cov_mat = [] + for i, mat in enumerate(data_centered): + cov_mat.append(mat.T.dot(mat)) + if i % 1000 == 0: + logger.debug("Computing covariance matrix - step {}/{}".format(i, len(data_centered))) + cov_mat = 1. / data.shape[1] * np.array(cov_mat) + logger.debug("Final covariance matrix shape: {}".format(str(cov_mat.shape))) + return cov_mat + + @staticmethod + def _compute_log_matrix(data): + log_mat = [] + for i, mat in enumerate(data): + U, S, V = np.linalg.svd(mat, full_matrices=False) + log_mat.append(U.dot(np.diag(np.log(S))).dot(V)) + if i % 1000 == 0: + logger.debug("Computing log matrix - step {}/{}".format(i, len(data))) + log_mat = np.array(log_mat) + logger.debug("Final log matrix shape: {}".format(str(log_mat.shape))) + return log_mat + + def load(self): + create_directory(self.s_download_dir) + s_model_path = download_data(self.MODEL_URL, self.s_download_dir) + check_file_md5(s_model_path, self.__class__.MODEL_CHECKSUM) + if self.vgg_conv_model is None: + logger.debug("Loading VGG19 model with cifar10 weights") + self.vgg_conv_model = load_model(s_model_path) + bloc3pool_layer = self.vgg_conv_model.get_layer('block3_pool') + # this is weird but the index is actually the index of the layer just before the pooling layer + # so this is what we want here: we don't want the pooling + index_bloc3pool_layer = self.vgg_conv_model.layers.index(bloc3pool_layer) + self.vgg_conv_model = Model(inputs=self.vgg_conv_model.input, + outputs=self.vgg_conv_model.get_layer(index=index_bloc3pool_layer).output) + else: + logger.debug("Skip loading model VGG19 model with cifar10 weights. Already there.") + + def transform(self, data, labels): + if len(data.shape) != 4: + raise AssertionError("Data shape should be of size 4 (image batch with channel dimension). " + "It is {}: {}. Maybe have you forgotten to reshape it to an image format?" + "".format(len(data.shape), data.shape)) + self.load() + transformed_data, labels = super().transform(data, labels) + transformed_data = self._compute_cov_matrix(transformed_data) + return transformed_data, labels + + def check_model(self): + name = os.path.basename(os.path.normpath(self.__class__.MODEL_URL)) + s_file_path = os.path.join(self.s_download_dir, name) + if os.path.exists(s_file_path) and check_file_md5(s_file_path, + self.__class__.MODEL_CHECKSUM, + raise_=False): + return True + else: + return False + + + + diff --git a/skluc/data/transformation/VGG19Transformer/VGG19ImageNetTransformer.py b/skluc/data/transformation/VGG19Transformer/VGG19ImageNetTransformer.py new file mode 100644 index 0000000000000000000000000000000000000000..02697cc761be15d454bd0f13dd82c5d19d33b940 --- /dev/null +++ b/skluc/data/transformation/VGG19Transformer/VGG19ImageNetTransformer.py @@ -0,0 +1,31 @@ +from keras.applications import VGG19 + +from skluc.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.utils import logger, deprecated, Singleton + + +@deprecated +class VGG19ImagenetTransformer(VGG19Transformer, metaclass=Singleton): + """ + Extend the vgg19transformer class with convolutional wieghts learned on the imagenet dataset. + """ + NAME = "vgg19_imagenet" + + def __init__(self): + super().__init__(name=self.NAME) + + def load(self, input_shape): + if self.vgg_conv_model is None: + logger.debug("Loading VGG19 model with imagenet weights from keras") + self.vgg_conv_model = VGG19(include_top=False, weights='imagenet', input_shape=input_shape) + else: + logger.debug("Skip loading model VGG19 model with imagenet weights. Already there.") + + def transform(self, data, labels): + # todo trouver une solution pour ne pas avoir un copier collé entre cette classe et celle avec cifar + if len(data.shape) != 4: + raise AssertionError("Data shape should be of size 4 (image batch with channel dimension). " + "It is {}: {}. Maybe have you forgotten to reshape it to an image format?" + "".format(len(data.shape), data.shape)) + self.load(input_shape=data[0].shape) + return super().transform(data, labels) diff --git a/skluc/data/transformation/VGG19Transformer/__init__.py b/skluc/data/transformation/VGG19Transformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ce287858d48c6496d0b9c8e111f261d6e3bd342 --- /dev/null +++ b/skluc/data/transformation/VGG19Transformer/__init__.py @@ -0,0 +1,109 @@ +import os +import collections +import numpy as np +from keras import Model +from keras.models import load_model + +from skluc.data.mldatasets import Cifar10Dataset +from skluc.data.transformation.Transformer import Transformer +from skluc.utils import logger, create_directory, download_data, check_file_md5, Singleton, DownloadableModel + + +class VGG19Transformer(Transformer, metaclass=Singleton): + """ + Uses the vgg19 convolution network to transform data. + """ + + MAP_DATA_MODEL = { + "svhn": DownloadableModel( + url="https://pageperso.lis-lab.fr/~luc.giffon/models/1529968150.5454917_vgg19_svhn.h5", + checksum="563a9ec2aad37459bd1ed0e329441b05"), + "cifar100": DownloadableModel( + url="https://pageperso.lis-lab.fr/~luc.giffon/models/1530965727.781668_vgg19_cifar100fine.h5", + checksum="edf43e263fec05e2c013dd5a2128fc38"), + "cifar10": DownloadableModel( + url="https://pageperso.lis-lab.fr/~luc.giffon/models/1522967518.1916964_vgg19_cifar10.h5", + checksum="0dbb4f02ceb1f4acb6e24831758106e5") + } + + def __init__(self, data_name, cut_layer_name=None, cut_layer_index=None, + download_dir=os.path.join(os.path.expanduser("~"), "ml_models")): + if data_name not in self.MAP_DATA_MODEL.keys(): + raise ValueError("Unknown data name. Can't load weights") + else: + self.data_name = data_name + self.vgg_conv_model = None + self.s_download_dir = os.path.join(download_dir, data_name) + super().__init__() + + if cut_layer_name is None: + cut_layer_name = "block5_pool" + self.__cut_layer_name = cut_layer_name + self.__cut_layer_index = cut_layer_index + if cut_layer_name is not None: + self.transformation_name = self.__class__.__name__ + "_" + str(cut_layer_name) + elif cut_layer_index is not None: + self.transformation_name = self.__class__.__name__\ + + "_" + str(cut_layer_index) + else: + raise AttributeError("Cut layer name or cut_layer index must be given to init VGG19Cifar10Transformer.") + + @property + def name(self): + return self.transformation_name + + def load(self): + create_directory(self.s_download_dir) + logger.debug(self.data_name) + s_model_path = download_data(self.MAP_DATA_MODEL[self.data_name].url, self.s_download_dir) + check_file_md5(s_model_path, self.MAP_DATA_MODEL[self.data_name].checksum) + if self.vgg_conv_model is None: + logger.debug("Loading VGG19 model for {} transformation".format(self.transformation_name)) + self.vgg_conv_model = load_model(s_model_path) + + if self.__cut_layer_name is not None: + self.vgg_conv_model = Model(inputs=self.vgg_conv_model.input, + outputs=self.vgg_conv_model.get_layer(name=self.__cut_layer_name).output) + elif self.__cut_layer_index is not None: + self.vgg_conv_model = Model(inputs=self.vgg_conv_model.input, + outputs=self.vgg_conv_model.get_layer(name=self.__cut_layer_index).output) + + else: + logger.debug("Skip loading model VGG19 for {} transformer. Already there.".format(self.__class__.__name__)) + + def transform(self, data, labels): + if len(data.shape) != 4: + raise AssertionError("Data shape should be of size 4 (image batch with channel dimension). " + "It is {}: {}. Maybe have you forgotten to reshape it to an image format?" + "".format(len(data.shape), data.shape)) + self.load() + model = Model(inputs=self.vgg_conv_model.input, outputs=self.vgg_conv_model.output) + logger.debug("Type of data to transform: {}".format(type(data))) + logger.debug("Length of data to transform: {}".format(len(data))) + logger.debug("Transforming data using pretrained model") + transformed_data = np.array(model.predict(data)).reshape(-1, *model.output_shape[1:]) + logger.debug("Type of transformed data: {}".format(type(transformed_data))) + return transformed_data, labels + + def check(self): + return self.check_model() + + def check_model(self): + name = os.path.basename(os.path.normpath(self.MAP_DATA_MODEL[self.data_name].url)) + s_file_path = os.path.join(self.s_download_dir, name) + if os.path.exists(s_file_path) and check_file_md5(s_file_path, + self.MAP_DATA_MODEL[self.data_name].checksum, + raise_=False): + return True + else: + return False + + +if __name__ == '__main__': + valsize = 10000 + d = Cifar10Dataset(validation_size=valsize) + + d.load() + d.to_image() + trans = VGG19Transformer(data_name="cifar10", cut_layer_name="block5_pool") + d.apply_transformer(transformer=trans) diff --git a/skluc/data/transformation/__init__.py b/skluc/data/transformation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skluc/test/test_transformation.py b/skluc/test/test_transformation.py deleted file mode 100644 index 71800be10fa3e83ee565211cfe2a81bce00ad731..0000000000000000000000000000000000000000 --- a/skluc/test/test_transformation.py +++ /dev/null @@ -1,45 +0,0 @@ -import os -import unittest - -from skluc.data.mldatasets import Cifar10Dataset, SVHNDataset, Cifar100FineDataset -from skluc.data.transformation import VGG19Cifar10Transformer, VGG19Cifar100Transformer, VGG19SvhnTransformer - - -class TestVGG19Transformer(unittest.TestCase): - def setUp(self): - self.lst_name_cut_layers = [ - "block5_conv4", - "block4_conv4", - "block5_pool" - ] - self.lst_offspring_classes = [ - VGG19Cifar10Transformer, - VGG19Cifar100Transformer, - VGG19SvhnTransformer - ] - self.lst_dataset_classes = [ - Cifar10Dataset, - Cifar100FineDataset, - SVHNDataset - ] - - # self.lst_index_cut_layers = [ - # , - # - # ] - - def test_transform(self): - valsize = 10000 - for name_cut_layer in self.lst_name_cut_layers: - for idx_dataset, dataset in enumerate(self.lst_dataset_classes): - d = dataset(validation_size=valsize) - d.load() - d.flatten() - d.to_image() - trans = self.lst_offspring_classes[idx_dataset](cut_layer_name=name_cut_layer) - d.apply_transformer(transformer=trans) - del trans - - -if __name__ == '__main__': - unittest.main() diff --git a/skluc/test/test_transformation/TestVGG19Transformer.py b/skluc/test/test_transformation/TestVGG19Transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..16ba0f108c66db87eed62f22dcfa1db67c354d27 --- /dev/null +++ b/skluc/test/test_transformation/TestVGG19Transformer.py @@ -0,0 +1,40 @@ +import os +import unittest + +from skluc.data.mldatasets import Cifar10Dataset, SVHNDataset, Cifar100FineDataset +from skluc.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.utils import logger + + +class TestVGG19Transformer(unittest.TestCase): + def setUp(self): + self.lst_name_cut_layers = [ + "block5_conv4", + "block4_conv4", + "block5_pool", + "block3_pool" + ] + self.dict_datasets = { + "cifar10": Cifar10Dataset, + "cifar100": Cifar100FineDataset, + "svhn": SVHNDataset + } + + def test_transform(self): + valsize = 10000 + for name_cut_layer in self.lst_name_cut_layers: + logger.info("Testing cut layer {}".format(name_cut_layer)) + for data_name in self.dict_datasets: + logger.info("Testing dataset {}".format(data_name)) + dataset = self.dict_datasets[data_name] + d = dataset(validation_size=valsize) + d.load() + d.flatten() + d.to_image() + trans = VGG19Transformer(data_name=data_name, cut_layer_name=name_cut_layer) + d.apply_transformer(transformer=trans) + del trans + + +if __name__ == '__main__': + unittest.main() diff --git a/skluc/test/test_transformation/__init__.py b/skluc/test/test_transformation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skluc/utils.py b/skluc/utils.py index 42f9d43b9cebc368790d7273a1eaf7f493025ea3..aaace63b61deb0b3172135dd33b04e72e7d3cb03 100644 --- a/skluc/utils.py +++ b/skluc/utils.py @@ -1,5 +1,6 @@ import errno import hashlib +import pathlib import logging import os import time as t @@ -69,21 +70,15 @@ def silentremove(filename): logger.debug("Directory or file {} doesn't exist".format(filename)) -def create_directory(_dir): +def create_directory(_dir, parents=True, exist_ok=True): """ Try to create the directory if it does not exist. :param dir: the path to the directory to be created :return: None """ - try: - logger.debug("Trying to create directory {}".format(_dir)) - os.makedirs(_dir) - logger.debug("Directory {} has been created".format(_dir)) - except OSError as e: - if e.errno != errno.EEXIST: - raise - logger.debug("Directory {} already exists".format(_dir)) + logger.debug("Creating directory {} if needed".format(_dir)) + pathlib.Path(_dir).mkdir(parents=parents, exist_ok=exist_ok) def download_data(url, directory, name=None): @@ -297,8 +292,11 @@ def deprecated(msg=""): return new_func return inner + LabeledData = collections.namedtuple("LabeledData", ["data", "labels"]) +DownloadableModel = collections.namedtuple("DownloadableModel", ["url", "checksum"]) + if __name__ == "__main__": a = np.identity(1000)