add model functions + fix encoding executioner and cluger + change default...

add model functions + fix encoding executioner and cluger + change default pattern in gather_results + add nystrom end to end layer class + add class paramanager etc.

add model functions + fix encoding executioner and cluger + change default...
16c2c1cd · Luc Giffon · 7707a0f8 · 16c2c1cd · 16c2c1cd · 16c2c1cd
Commit 16c2c1cd authored 6 years ago by Luc Giffon
--- a/skluc/main/data/mldatasets/Caltech101Dataset.py
+++ b/skluc/main/data/mldatasets/Caltech101Dataset.py
+import os
+import pickle
+import tarfile
+import numpy as np
+from skluc.main.data.mldatasets.ImageDataset import ImageDataset
+from skluc.main.utils import LabeledData
+from skluc.main.utils import logger, check_files
+class Cifar100FineDataset(ImageDataset):
+    HEIGHT = 32
+    WIDTH = 32
+    DEPTH = 3
+    def __init__(self, validation_size=0, seed=None, s_download_dir=None):
+        self.__s_url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
+        self.meta = None
+        name = "cifar100fine"
+        if s_download_dir is not None:
+            super().__init__([self.__s_url], name, s_download_dir, validation_size=validation_size, seed=seed)
+        else:
+            super().__init__([self.__s_url], name, validation_size=validation_size, seed=seed)
+        self.__extracted_dirname = os.path.join(self.s_download_dir, "cifar-100-python")
+        self.__extracted_files = [
+            'train',
+            'test',
+            'meta'
+        ]
+        self.__extracted_file_paths = [os.path.join(self.__extracted_dirname, file) for file in self.__extracted_files]
+    def get_cifar100_data(self, keyword):
+        """
+        Get data from the files containing the keyword in their name.
+        :param keyword:
+        :return:
+        """
+        full_data = []
+        full_labels = []
+        for fpath in self.__extracted_file_paths:
+            if keyword in fpath.split('/')[-1]:
+                with open(fpath, 'rb') as f:
+                    pckl_data = pickle.load(f, encoding='bytes')
+                    full_data.append(pckl_data[b'data'])
+                    full_labels.append(pckl_data[b'fine_labels'])
+        final_data = np.vstack(full_data)
+        final_label = np.hstack(full_labels)
+        return final_data, final_label
+    def get_meta(self):
+        """
+        Get meta data about cifar10 from file.
+        :return:
+        """
+        for fpath in self.__extracted_file_paths:
+            if 'meta' in fpath.split('/')[-1]:
+                with open(fpath, 'rb') as f:
+                    pckl_data = pickle.load(f, encoding='bytes')
+                    meta = pckl_data[b'fine_label_names']
+        return np.array(meta)
+    def read(self):
+        targz_file_path = self.l_filepaths[-1]
+        if not check_files(self.__extracted_file_paths):
+            logger.debug("Extracting {} ...".format(targz_file_path))
+            tar = tarfile.open(targz_file_path, "r:gz")
+            tar.extractall(path=self.s_download_dir)
+        else:
+            logger.debug("File {} has already been extracted".format(targz_file_path))
+        logger.debug("Get training data of dataset {}".format(self.s_name))
+        self._train = LabeledData(*self.get_cifar100_data('train'))
+        logger.debug("Get testing data of dataset {}".format(self.s_name))
+        self._test = LabeledData(*self.get_cifar100_data('test'))
+        self.meta = self.get_meta()
+        self._check_validation_size(self._train[0].shape[0])
--- a/skluc/main/tensorflow_/kernel_approximation/nystrom_layer.py
+++ b/skluc/main/tensorflow_/kernel_approximation/nystrom_layer.py
@@ -311,6 +311,120 @@ class DeepstromLayer(tf.keras.layers.Layer):
        return out
+class DeepstromLayerEndToEnd(tf.keras.layers.Layer):
+    def __init__(self,
+                 subsample_size,
+                 kernel_name,
+                 out_dim=None,
+                 activation=None,
+                 sum_of_kernels=False,
+                 stack_of_kernels=False,
+                 kernel_dict={}
+                 ):
+        def init_kernel():
+            if kernel_name == "rbf":
+                kernel_fct = rbf_kernel
+                tf_kernel_fct = tf_rbf_kernel
+            elif kernel_name == "linear":
+                kernel_fct = linear_kernel
+                tf_kernel_fct = tf_linear_kernel
+            elif kernel_name == "chi2_cpd":
+                kernel_fct = additive_chi2_kernel
+                tf_kernel_fct = tf_chi_square_CPD
+            elif kernel_name == "chi2_exp_cpd":
+                kernel_fct = chi2_kernel
+                tf_kernel_fct = tf_chi_square_CPD_exp
+            elif kernel_name == "chi2_pd":
+                raise NotImplementedError("Bien verifier que ce code ne fait pas bordel")
+            elif kernel_name == "laplacian":
+                tf_kernel_fct = tf_laplacian_kernel
+                kernel_fct = laplacian_kernel
+            else:
+                raise ValueError("Unknown kernel name: {}".format(kernel_name))
+            return kernel_name, kernel_fct, tf_kernel_fct, kernel_dict
+        def init_output_dim(subsample_size):
+            if out_dim is not None and out_dim > subsample_size:
+                logger.debug("Output dim is greater than deepstrom subsample size. Aborting.")
+                exit()
+            elif out_dim is None:
+                return subsample_size
+            else:
+                return out_dim
+        def init_activation():
+            if activation == "tan":
+                activation_fct = tf.nn.tanh
+            elif activation == "relu":
+                activation_fct = tf.nn.relu
+            else:
+                activation_fct = activation
+            return activation_fct
+        super().__init__()
+        self.__subsample_size = subsample_size
+        self.__sum_of_kernels = sum_of_kernels
+        self.__stack_of_kernels = stack_of_kernels
+        self.__kernel_name, self.__kernel_fct, self.__tf_kernel_fct, self.__kernel_dict = init_kernel()
+        self.__output_dim = init_output_dim(self.__subsample_size)
+        self.__activation = init_activation()
+        self.__W_matrix = None
+        logger.info("Selecting deepstrom layer function with "
+                    "subsample size = {}, "
+                    "output_dim = {}, "
+                    "{} activation function "
+                    "and kernel = {}"
+                    .format(self.__subsample_size,
+                            self.__output_dim,
+                            "with" if self.__activation else "without",
+                            self.__kernel_name))
+    def build(self, input_shape):
+        if self.__output_dim != 0:
+            # outputdim == 0 means there is no W matrix and the kernel vector is directly added as input to
+            # the next layer
+            self.__W_matrix = self.add_variable(
+                name="W_nystrom",
+                shape=[self.__subsample_size, self.__output_dim],
+                initializer=tf.random_normal_initializer(stddev=0.1),
+                trainable=True
+            )
+    def call(self, inputs, **kwargs):
+        if type(inputs) is not list:
+            raise ValueError("Inputs of layer deepstrom should be a list")
+        if len(inputs[0].shape) != 2:
+            raise ValueError(f"Input x should be 2D but it is {len(inputs[0].shape)}D")
+        if len(inputs[1].shape) != 2:
+            raise ValueError(f"Input subsample should be 2D but it is {len(inputs[1].shape)}D")
+        if inputs[1].shape[0] != self.__subsample_size:
+            raise ValueError(f"Subsample should be of size {self.__subsample_size}")
+        if inputs[0][0].shape[0] != inputs[1][0].shape[0]:
+            raise ValueError(f"Input and subsample should have the same dimension")
+        input_x = inputs[0]
+        input_sub = inputs[1]
+        with tf.name_scope("NystromLayer"):
+            with tf.name_scope("kernel_vec"):
+                kernel_vector = self.__tf_kernel_fct(input_x, input_sub, **self.__kernel_dict)
+            if self.__output_dim != 0:
+                out = tf.matmul(kernel_vector, self.__W_matrix)
+            else:
+                out = kernel_vector
+        if self.__activation is not None:
+            out = self.__activation(out)
+        return out
 if __name__ == '__main__':
-    main()
+    DeepstromLayerEndToEnd(subsample_size=64,
+                           kernel_name='chi2_cpd',
+                           kernel_dict={})
--- a/skluc/main/tensorflow_/models.py
+++ b/skluc/main/tensorflow_/models.py
+from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, Flatten, BatchNormalization, Activation
+from tensorflow.python.keras.models import Sequential
+from tensorflow.python.keras.regularizers import l2
+from tensorflow.python.keras.initializers import he_normal
+def build_lenet_model(input_shape):
+    model = Sequential()
+    model.add(
+        Conv2D(6, (5, 5), padding='valid', activation='relu', kernel_initializer=he_normal(), input_shape=input_shape))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+    model.add(Conv2D(16, (5, 5), padding='valid', activation='relu', kernel_initializer=he_normal()))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+    model.add(Flatten())
+    return model
+def build_vgg19_model(input_shape, weight_decay=0.0001):
+    model = Sequential()
+    # Block 1
+    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block1_conv1', input_shape=input_shape))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block1_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))
+    # Block 2
+    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block2_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block2_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool'))
+    # Block 3
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block3_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block3_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block3_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block3_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool'))
+    # Block 4
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block4_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block4_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block4_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block4_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool'))
+    # Block 5
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block5_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block5_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block5_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block5_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool'))
+    model.add(Flatten(name='flatten'))
+    return model
\ No newline at end of file
--- a/skluc/main/tools/experiences/cluger.py
+++ b/skluc/main/tools/experiences/cluger.py
+# -*- coding: utf-8 -*-
 """
 Cluger

--- a/skluc/main/tools/experiences/executioner.py
+++ b/skluc/main/tools/experiences/executioner.py
+# -*- coding: utf-8 -*-
 """
 The Executioner is a tool used for performing clean experiments by managing the output files generated by them.

--- a/skluc/main/tools/experiences/gather_results.py
+++ b/skluc/main/tools/experiences/gather_results.py
@@ -9,7 +9,7 @@ Usage:
 Options:
    -h --help                   Show this screen.
    -i --input-dir=<IPATH>      Input directory wher to find results
-    -p --patern=regex           Specify the pattern of the files to be looked at [default: .+\.stdout].
+    -p --patern=regex           Specify the pattern of the files to be looked at [default: .+\_stdout.txt].
    -r --header                 Says if there is a header in the result files.
    -v --verbose                Print the lines of the final file
 """

--- a/skluc/main/utils.py
+++ b/skluc/main/utils.py
@@ -14,6 +14,7 @@ import psutil
 import collections
 from sklearn.metrics.pairwise import additive_chi2_kernel
+import tensorflow as tf
 daiquiri.setup(level=logging.DEBUG)
 logger = daiquiri.getLogger()
@@ -189,6 +190,7 @@ def compute_euristic_sigma(dataset_full, slice_size=1000):
    :return:
    """
    results = []
+    dataset_full = np.reshape(dataset_full, (-1, 1))
    if slice_size > dataset_full.shape[0]:
        slice_size = dataset_full.shape[0]
    for i in range(dataset_full.shape[0] // slice_size):
@@ -240,6 +242,7 @@ def compute_euristic_sigma_chi2(dataset_full, slice_size=100):
    :param dataset: The dataset on which to look for the best sigma
    :return:
    """
+    dataset_full = np.reshape(dataset_full, (-1, 1))
    results = []
    if slice_size > dataset_full.shape[0]:
        slice_size = dataset_full.shape[0]
@@ -297,7 +300,124 @@ LabeledData = collections.namedtuple("LabeledData", ["data", "labels"])
 DownloadableModel = collections.namedtuple("DownloadableModel", ["url", "checksum"])
+class DictManager(dict):
+    pass
+    # def __getattr__(self, item):
+    #     return self[item]
+    #
+    # def __setattr__(self, key, value):
+    #     self[key] = value
+    #
+    # def __delattr__(self, item):
+    #     del self[item]
+    # def __missing__(self, key):
+    #     logger.warning(f"Call to missing key {key} in {self.__class__.__name__}. None value returned.")
+    #     self[key] = None
+    #     return self[key]
+class ParameterManager(DictManager):
+    def get_gamma_value(self, dat, chi2=False):
+        if self["--gamma"] is None:
+            logger.debug("Gamma arguments is None. Need to compute it.")
+            if chi2:
+                gamma_value = 1. / compute_euristic_sigma_chi2(dat)
+            else:
+                gamma_value = 1. / compute_euristic_sigma(dat)
+        else:
+            gamma_value = eval(self["--gamma"])
+        logger.debug("Gamma value is {}".format(gamma_value))
+        return gamma_value
+    def init_kernel(self):
+        if self["--rbf-kernel"]:
+            return "rbf"
+        elif self["--linear-kernel"]:
+            return "linear"
+        elif self["--chi-square-kernel"]:
+            return "chi2_cpd"
+        elif self["--exp-chi-square-kernel"]:
+            return "chi2_exp_cpd"
+        elif self["--chi-square-PD-kernel"]:
+            return "chi2_pd"
+        elif self["--laplacian-kernel"]:
+            return "laplacian"
+        else:
+            return None
+    def init_network(self):
+        if self["dense"]:
+            return "dense"
+        elif self["deepfriedconvnet"]:
+            return "deepfriedconvnet"
+        elif self["deepstrom"]:
+            return "deepstrom"
+        elif self["none"]:
+            return "none"
+    def init_non_linearity(self):
+        if self["--non-linearity"] == "tanh":
+            return tf.nn.tanh
+        elif self["--non-linearity"] == "relu":
+            return tf.nn.relu
+        elif self["--non-linearity"] == "None":
+            return None
+    def init_dataset(self):
+        if self["--cifar10"]:
+            return "cifar10"
+        if self["--cifar100"]:
+            return "cifar100"
+        if self["--mnist"]:
+            return "mnist"
+        if self["--svhn"]:
+            return "svhn"
+class ResultManager(DictManager):
+    pass
+class ResultPrinter:
+    def __init__(self, *args, header=True):
+        self.__dicts = []
+        self.__dicts.extend(args)
+        self.__header = header
+    def _get_ordered_items(self):
+        all_keys = []
+        all_values = []
+        for d in self.__dicts:
+            keys, values = zip(*d.items())
+            all_keys.extend(keys)
+            all_values.extend(values)
+        arr_keys, arr_values = np.array(all_keys), np.array(all_values)
+        indexes_sort = np.argsort(arr_keys)
+        return list(arr_keys[indexes_sort]), list(arr_values[indexes_sort])
+    def _get_values_ordered_by_keys(self):
+        _, values = self._get_ordered_items()
+        return values
+    def _get_ordered_keys(self):
+        keys, _ = self._get_ordered_items()
+        return keys
+    def add(self, d):
+        self.__dicts.append(d)
+    def print(self):
+        headers, values = self._get_ordered_items()
+        headers = [str(h) for h in headers]
+        values = [str(v) for v in values]
+        if self.__header:
+            print(",".join(headers))
+        print(",".join(values))
 if __name__ == "__main__":
-    a = np.identity(1000)
+    paraman = ParameterManager({"a": 4})
-    print(compute_euristic_sigma(a))
+    resulman = ResultManager({"b": 2})
\ No newline at end of file
+    resprinter = ResultPrinter(paraman)
+    resprinter.add(resulman)
+    resprinter.print()
--- a/skluc/test/test_data/test_mldatasets/TestCaltech101Dataset.py
+++ b/skluc/test/test_data/test_mldatasets/TestCaltech101Dataset.py