diff --git a/skluc/main/data/transformation/VGG19Transformer.py b/skluc/main/data/transformation/VGG19Transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skluc/main/data/transformation/VGG19Transformer/VGG19Cifar10CovAbs.py b/skluc/main/data/transformation/VGG19Transformer/VGG19Cifar10CovAbs.py deleted file mode 100644 index 170260bd5e0d95fe7d00665d2ff28caf46e8d140..0000000000000000000000000000000000000000 --- a/skluc/main/data/transformation/VGG19Transformer/VGG19Cifar10CovAbs.py +++ /dev/null @@ -1,98 +0,0 @@ -import os - -import numpy as np -from keras import Model -from keras.models import load_model - -from skluc.main.data import VGG19Transformer -from skluc.main.utils import logger, create_directory, download_data, check_file_md5, deprecated - - -# todo check those deprecated things -@deprecated -class VGG19Cifar10CovAbs(VGG19Transformer): - """ - Extend the vgg19transformer class with weights learned on CIFAR10. - The covariance matrix is then computed on the transformed data image. - """ - NAME = "vgg19_cifar10_cov" - MODEL_URL = "https://pageperso.lis-lab.fr/~luc.giffon/models/1522967518.1916964_vgg19_cifar10.h5" - MODEL_CHECKSUM = "0dbb4f02ceb1f4acb6e24831758106e5" - # todo faire une fonction qui regarde directement le checksum sur le site ? - - def __init__(self): - super().__init__(name=self.NAME) - - @staticmethod - def _compute_cov_matrix(data): - """ - - :param data: (b x W x H x D) - :type data: np.ndarray - :return: - """ - data = data.reshape((data.shape[0], data.shape[1] * data.shape[2], data.shape[3])) - mean = np.mean(data, axis=1) - mean = mean.reshape((mean.shape[0], 1, mean.shape[-1])) - data_centered = data - mean - - cov_mat = [] - for i, mat in enumerate(data_centered): - cov_mat.append(mat.T.dot(mat)) - if i % 1000 == 0: - logger.debug("Computing covariance matrix - step {}/{}".format(i, len(data_centered))) - cov_mat = 1. / data.shape[1] * np.array(cov_mat) - logger.debug("Final covariance matrix shape: {}".format(str(cov_mat.shape))) - return cov_mat - - @staticmethod - def _compute_log_matrix(data): - log_mat = [] - for i, mat in enumerate(data): - U, S, V = np.linalg.svd(mat, full_matrices=False) - log_mat.append(U.dot(np.diag(np.log(S))).dot(V)) - if i % 1000 == 0: - logger.debug("Computing log matrix - step {}/{}".format(i, len(data))) - log_mat = np.array(log_mat) - logger.debug("Final log matrix shape: {}".format(str(log_mat.shape))) - return log_mat - - def load(self): - create_directory(self.s_download_dir) - s_model_path = download_data(self.MODEL_URL, self.s_download_dir) - check_file_md5(s_model_path, self.__class__.MODEL_CHECKSUM) - if self.vgg_conv_model is None: - logger.debug("Loading VGG19 model with cifar10 weights") - self.vgg_conv_model = load_model(s_model_path) - bloc3pool_layer = self.vgg_conv_model.get_layer('block3_pool') - # this is weird but the index is actually the index of the layer just before the pooling layer - # so this is what we want here: we don't want the pooling - index_bloc3pool_layer = self.vgg_conv_model.layers.index(bloc3pool_layer) - self.vgg_conv_model = Model(inputs=self.vgg_conv_model.input, - outputs=self.vgg_conv_model.get_layer(index=index_bloc3pool_layer).output) - else: - logger.debug("Skip loading model VGG19 model with cifar10 weights. Already there.") - - def transform(self, data, labels): - if len(data.shape) != 4: - raise AssertionError("Data shape should be of size 4 (image batch with channel dimension). " - "It is {}: {}. Maybe have you forgotten to reshape it to an image format?" - "".format(len(data.shape), data.shape)) - self.load() - transformed_data, labels = super().transform(data, labels) - transformed_data = self._compute_cov_matrix(transformed_data) - return transformed_data, labels - - def check_model(self): - name = os.path.basename(os.path.normpath(self.__class__.MODEL_URL)) - s_file_path = os.path.join(self.s_download_dir, name) - if os.path.exists(s_file_path) and check_file_md5(s_file_path, - self.__class__.MODEL_CHECKSUM, - raise_=False): - return True - else: - return False - - - - diff --git a/skluc/main/data/transformation/VGG19Transformer/VGG19ImageNetTransformer.py b/skluc/main/data/transformation/VGG19Transformer/VGG19ImageNetTransformer.py deleted file mode 100644 index 4e3a4db28fa7c6059125fa73f27567ec67c3f4e4..0000000000000000000000000000000000000000 --- a/skluc/main/data/transformation/VGG19Transformer/VGG19ImageNetTransformer.py +++ /dev/null @@ -1,31 +0,0 @@ -from keras.applications import VGG19 - -from skluc.main.data import VGG19Transformer -from skluc.main.utils import logger, deprecated, Singleton - - -@deprecated -class VGG19ImagenetTransformer(VGG19Transformer, metaclass=Singleton): - """ - Extend the vgg19transformer class with convolutional wieghts learned on the imagenet dataset. - """ - NAME = "vgg19_imagenet" - - def __init__(self): - super().__init__(name=self.NAME) - - def load(self, input_shape): - if self.vgg_conv_model is None: - logger.debug("Loading VGG19 model with imagenet weights from keras") - self.vgg_conv_model = VGG19(include_top=False, weights='imagenet', input_shape=input_shape) - else: - logger.debug("Skip loading model VGG19 model with imagenet weights. Already there.") - - def transform(self, data, labels): - # todo trouver une solution pour ne pas avoir un copier collé entre cette classe et celle avec cifar - if len(data.shape) != 4: - raise AssertionError("Data shape should be of size 4 (image batch with channel dimension). " - "It is {}: {}. Maybe have you forgotten to reshape it to an image format?" - "".format(len(data.shape), data.shape)) - self.load(input_shape=data[0].shape) - return super().transform(data, labels) diff --git a/skluc/main/data/transformation/VGG19Transformer/__init__.py b/skluc/main/data/transformation/VGG19Transformer/__init__.py deleted file mode 100644 index cfbbdde0cea5d83c0b65aac458c84cfe5d2b03d8..0000000000000000000000000000000000000000 --- a/skluc/main/data/transformation/VGG19Transformer/__init__.py +++ /dev/null @@ -1,93 +0,0 @@ -from keras import Model -from keras.models import load_model - -from skluc.main.data.mldatasets.Cifar10Dataset import Cifar10Dataset -from skluc.main.data.transformation.KerasModelTransformer import KerasModelTransformer -from skluc.main.utils import logger, create_directory, download_data, check_file_md5, Singleton, DownloadableModel - - -class VGG19Transformer(KerasModelTransformer, metaclass=Singleton): - """ - Uses the vgg19 convolution network to transform data. - """ - - MAP_DATA_MODEL = { - "svhn": DownloadableModel( - url="https://pageperso.lis-lab.fr/~luc.giffon/models/1529968150.5454917_vgg19_svhn.h5", - checksum="563a9ec2aad37459bd1ed0e329441b05" - ), - "cifar100": DownloadableModel( - url="https://pageperso.lis-lab.fr/~luc.giffon/models/1530965727.781668_vgg19_cifar100fine.h5", - checksum="edf43e263fec05e2c013dd5a2128fc38" - ), - "cifar10": DownloadableModel( - url="https://pageperso.lis-lab.fr/~luc.giffon/models/1522967518.1916964_vgg19_cifar10.h5", - checksum="0dbb4f02ceb1f4acb6e24831758106e5" - ), - "siamese_omniglot_28x28": DownloadableModel( - url="https://pageperso.lis-lab.fr/~luc.giffon/models/1536244775.6502118_siamese_vgg19_omniglot_28x28_conv.h5", - checksum="90aec06e688ec3248ba89544a10c9f1f" - ), - "omniglot_28x28": DownloadableModel( - url="https://pageperso.lis-lab.fr/~luc.giffon/models/1536764034.66037_vgg19_omniglot.h5", - checksum="ef1272e9c7ce070e8f70889ec58d1c33" - ) - } - - def __init__(self, data_name, cut_layer_name=None, cut_layer_index=None): - if data_name not in self.MAP_DATA_MODEL.keys(): - raise ValueError("Unknown data name. Can't load weights") - - if cut_layer_name is None and cut_layer_index is None: - logger.warning( - "Cut layer chosen automatically but it eventually will lead to an error in future: block5_pool should be specified explicitly") - cut_layer_name = "block5_pool" - if cut_layer_name is not None: - transformation_name = str(data_name) + "_" + self.__class__.__name__ + "_" + str(cut_layer_name) - elif cut_layer_index is not None: - transformation_name = str(data_name) + "_" + self.__class__.__name__ \ - + "_" + str(cut_layer_index) - # todo sauvegarder index / nom dans le meme dossier si c'est les meme - else: - raise AttributeError("Cut layer name or cut_layer index must be given to init VGG19Transformer.") - self.__cut_layer_name = cut_layer_name - self.__cut_layer_index = cut_layer_index - - self.keras_model = None - - super().__init__(data_name=data_name, - transformation_name=transformation_name) - - def load(self): - create_directory(self.s_download_dir) - s_model_path = download_data(self.MAP_DATA_MODEL[self.data_name].url, self.s_download_dir) - check_file_md5(s_model_path, self.MAP_DATA_MODEL[self.data_name].checksum) - if self.keras_model is None: - logger.debug("Loading VGG19 model for {} transformation with {} weights".format(self.transformation_name, self.data_name)) - self.keras_model = load_model(s_model_path) - - logger.debug("Layers of model {}".format([l.name for l in self.keras_model.layers])) - - if self.__cut_layer_index is not None: - cut_layer = self.keras_model.layers[-1] - self.__cut_layer_name = cut_layer.name - logger.debug( - "Found associated layer {} to layer index {}".format(self.__cut_layer_name, self.__cut_layer_index)) - - self.keras_model = Model(inputs=self.keras_model.input, - outputs=self.keras_model.get_layer(name=self.__cut_layer_name).output) - - else: - logger.debug("Skip loading model VGG19 for {} transformation with {} weights. Already there.".format( - self.transformation_name, - self.data_name)) - - -if __name__ == '__main__': - valsize = 10000 - d = Cifar10Dataset(validation_size=valsize) - - d.load() - d.to_image() - trans = VGG19Transformer(data_name="cifar10", cut_layer_name="block5_pool") - d.apply_transformer(transformer=trans)