diff --git a/skluc/main/keras_/kernel.py b/skluc/main/keras_/kernel.py
index 807a3352e8546beb58462d2db507e4875aee970d..9ce4d5e35d64d2ce8d254cd7e70b18bb5c3ca95a 100644
--- a/skluc/main/keras_/kernel.py
+++ b/skluc/main/keras_/kernel.py
@@ -1,48 +1,62 @@
-import tensorflow as tf
+# import tensorflow as tftf
+import keras.backend as K
+from keras.activations import tanh
 
 from skluc.main.tensorflow_.utils import replace_nan
 
 
-def keras_linear_kernel(args):
+def keras_linear_kernel(args, normalize=True):
     X = args[0]
     Y = args[1]
-    X = tf.nn.l2_normalize(X, axis=-1)
-    Y = tf.nn.l2_normalize(Y, axis=-1)
-    return tf.matmul(X, tf.transpose(Y))
+    if normalize:
+        X = K.l2_normalize(X, axis=-1)
+        Y = K.l2_normalize(Y, axis=-1)
+    return K.dot(X, K.transpose(Y))
 
-def keras_chi_square_CPD(args):
+
+def keras_chi_square_CPD(args, epsilon=None, tanh_activation=True, normalize=True):
     X = args[0]
     Y = args[1]
-    # X = tf.nn.l2_normalize(X, axis=-1)
-    # Y = tf.nn.l2_normalize(Y, axis=-1)
+    if normalize:
+        X = K.l2_normalize(X, axis=-1)
+        Y = K.l2_normalize(Y, axis=-1)
     # the drawing of the matrix X expanded looks like a wall
-    wall = tf.expand_dims(X, axis=1)
+    wall = K.expand_dims(X, axis=1)
     # the drawing of the matrix Y expanded looks like a floor
-    floor = tf.expand_dims(Y, axis=0)
-    numerator = tf.square((wall - floor))
+    floor = K.expand_dims(Y, axis=0)
+    numerator = K.square((wall - floor))
     denominator = wall + floor
-    quotient = numerator / denominator
+    if epsilon is not None:
+        quotient = numerator / (denominator + epsilon)
+    else:
+        quotient = numerator / denominator
     quotient_without_nan = replace_nan(quotient)
-    K = - tf.reduce_sum(quotient_without_nan, axis=2)
-    return K
-
-def chi2_kernel(args):
-    x = args[0]
-    y = tf.concat(args[1:], 0)
-
-    x = tf.nn.l2_normalize(x, axis=-1)
-    y = tf.nn.l2_normalize(y, axis=-1)
-    # the drawing of the matrix X expanded looks like a wall
-    wall = tf.expand_dims(x, axis=1)
-    # the drawing of the matrix Y expanded looks like a floor
-    floor = tf.expand_dims(y, axis=0)
-    numerator = tf.square(tf.subtract(wall, floor))
-    denominator = tf.add(wall, floor) + 0.001
-    quotient = numerator / denominator
-    quotient_without_nan = quotient #replace_nan(quotient)
-
-    K = - tf.reduce_sum(quotient_without_nan, axis=2)
-    return tf.tanh(K)
-
-if __name__ == '__main__':
-    a = tf.Constant(value=0)
\ No newline at end of file
+    result = - K.sum(quotient_without_nan, axis=2)
+    if tanh_activation:
+        return tanh(result)
+    else:
+        return result
+
+
+def keras_chi_square_CPD_exp(X, Y, gamma):
+    K = keras_chi_square_CPD(X, Y)
+    K *= gamma
+    return K.exp(K)
+
+# def chi2_kernel(args):
+#     x = args[0]
+#     y = tf.concat(args[1:], 0)
+#
+#     x = tf.nn.l2_normalize(x, axis=-1)
+#     y = tf.nn.l2_normalize(y, axis=-1)
+#     # the drawing of the matrix X expanded looks like a wall
+#     wall = tf.expand_dims(x, axis=1)
+#     # the drawing of the matrix Y expanded looks like a floor
+#     floor = tf.expand_dims(y, axis=0)
+#     numerator = tf.square(tf.subtract(wall, floor))
+#     denominator = tf.add(wall, floor) + 0.001
+#     quotient = numerator / denominator
+#     quotient_without_nan = quotient #replace_nan(quotient)
+#
+#     K = - tf.reduce_sum(quotient_without_nan, axis=2)
+#     return tf.tanh(K)
diff --git a/skluc/main/keras_/kernel_approximation/fastfood_layer.py b/skluc/main/keras_/kernel_approximation/fastfood_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f358bac19620b540bcc1979bab80fb279a17586
--- /dev/null
+++ b/skluc/main/keras_/kernel_approximation/fastfood_layer.py
@@ -0,0 +1,255 @@
+import keras
+import keras.backend as K
+import numpy as np
+import scipy.linalg
+import scipy.stats
+import tensorflow as tf
+import tensorflow.summary as tfsum
+
+
+# --- Fast Food Naive --- #
+
+
+def G_variable(shape, trainable=False):
+    """
+    Return a Gaussian Random matrix converted into Tensorflow Variable.
+
+    :param shape: The shape of the matrix (number of fastfood stacks (v), dimension of the input space (d))
+    :type shape: int or tuple of int (tuple size = 2)
+    :return: K.variable object containing the matrix, The norm2 of each line (np.array of float)
+    """
+    assert type(shape) == int or (type(shape) == tuple and len(shape) == 2)
+    G = np.random.normal(size=shape).astype(np.float32)
+    G_norms = np.linalg.norm(G, ord=2, axis=1)
+    if trainable:
+        return K.variable(G, name="G"), G_norms
+    else:
+        return K.constant(G, name="G"), G_norms
+
+
+def B_variable(shape, trainable=False):
+    """
+    Return a random matrix of -1 and 1 picked uniformly and converted into Tensorflow Variable.
+
+    :param shape: The shape of the matrix (number of fastfood stacks (v), dimension of the input space (d))
+    :type shape: int or tuple of int (tuple size = 2)
+    :return: K.variable object containing the matrix
+    """
+    assert type(shape) == int or (type(shape) == tuple and len(shape) == 2)
+    B = np.random.choice([-1, 1], size=shape, replace=True).astype(np.float32)
+    if trainable:
+        return K.variable(B, name="B")
+    else:
+        return K.constant(B, name="B")
+
+
+def P_variable(d, nbr_stack):
+    """
+    Return a permutation matrix converted into Tensorflow Variable.
+
+    :param d: The width of the matrix (dimension of the input space)
+    :type d: int
+    :param nbr_stack: The height of the matrix (nbr_stack x d is the dimension of the output space)
+    :type nbr_stack: int
+    :return: K.variable object containing the matrix
+    """
+    idx = np.hstack([(i * d) + np.random.permutation(d) for i in range(nbr_stack)])
+    P = np.random.permutation(np.eye(N=nbr_stack * d))[idx].astype(np.float32)
+    return K.constant(P, name="P")
+
+
+def H_variable(d):
+    """
+    Return an Hadamard matrix converted into Tensorflow Variable.
+
+    d must be a power of two.
+
+    :param d: The size of the Hadamard matrix (dimension of the input space).
+    :type d: int
+    :return: K.variable object containing the diagonal and not trainable
+    """
+    H = build_hadamard(d).astype(np.float32)
+    return K.constant(H, name="H")
+
+
+def S_variable(shape, G_norms, trainable=False):
+    """
+    Return a scaling matrix of random values picked from a chi distribution.
+
+    The values are re-scaled using the norm of the associated Gaussian random matrix G. The associated Gaussian
+    vectors are the ones generated by the `G_variable` function.
+
+    :param shape: The shape of the matrix (number of fastfood stacks (v), dimension of the input space (d))
+    :type shape: int or tuple of int (tuple size = 2)
+    :param G_norms: The norms of the associated Gaussian random matrices G.
+    :type G_norms: np.array of floats
+    :return: K.variable object containing the matrix.
+    """
+    S = np.multiply((1 / G_norms.reshape((-1, 1))), scipy.stats.chi.rvs(shape[1], size=shape).astype(np.float32))
+    if trainable:
+        return K.variable(S, name="S")
+    else:
+        return K.constant(S, name="S")
+
+
+def fastfood_layer(conv_out, sigma, nbr_stack=1, trainable=False):
+    """
+    Return a fastfood transform op compatible with tensorflow graph.
+
+    Implementation largely inspired from https://gist.github.com/dougalsutherland/1a3c70e57dd1f64010ab .
+
+    See:
+    "Fastfood | Approximating Kernel Expansions in Loglinear Time" by
+    Quoc Le, Tamas Sarl and Alex Smola.
+
+    :param conv_out: the input of the op
+    :param sigma: bandwith of the gaussian distribution
+    :param nbr_stack: number of fast food stacks
+    :param trainable: the diagonal matrices are trainable or not
+    :return: the output of the fastfood transform
+    """
+    raise NotImplementedError("fastfood_layer function is not yet correctly implemented in keras")
+    with K.name_scope("fastfood" + "_sigma-" + str(sigma)):
+        init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
+        final_dim = int(dimensionality_constraints(init_dim))
+        padding = final_dim - init_dim
+        conv_out2 = K.reshape(conv_out, [-1, init_dim])
+        paddings = K.constant([[0, 0], [0, padding]])
+        conv_out2 = K.pad(conv_out2, paddings, "CONSTANT")
+
+        G, G_norm = G_variable((nbr_stack, final_dim), trainable=trainable)
+        tfsum.histogram("weights_G", G)
+        B = B_variable((nbr_stack, final_dim), trainable=trainable)
+        tfsum.histogram("weights_B", B)
+        H = H_variable(final_dim)
+        tfsum.histogram("weights_H", H)
+        P = P_variable(final_dim, nbr_stack)
+        tfsum.histogram("weights_P", P)
+        S = S_variable((nbr_stack, final_dim), G_norm, trainable=trainable)
+        tfsum.histogram("weights_S", S)
+
+        conv_out2 = K.reshape(conv_out2, (1, -1, 1, final_dim))
+        h_ff1 = conv_out2 * B
+        h_ff1 = K.reshape(h_ff1, (-1, final_dim))
+        h_ff2 = K.dot(h_ff1, H)
+        h_ff2 = K.reshape(h_ff2, (-1, final_dim * nbr_stack))
+        h_ff3 = K.dot(h_ff2, P)
+        h_ff4 = K.reshape(h_ff3, (-1, final_dim * nbr_stack)) * K.reshape(G, (-1, final_dim * nbr_stack))
+        h_ff4 = K.reshape(h_ff4, (-1, final_dim))
+        h_ff5 = K.dot(h_ff4, H)
+
+        h_ff6 = (1. / (sigma * np.sqrt(final_dim))) * K.reshape(h_ff5, (-1, final_dim * nbr_stack)) * K.reshape(S, (-1, final_dim * nbr_stack))
+        h_ff7_1 = K.cos(h_ff6)
+        h_ff7_2 = K.sin(h_ff6)
+        h_ff7 = K.sqrt(float(1 / final_dim)) * K.concatenate([h_ff7_1, h_ff7_2], axis=1)
+    return h_ff7
+
+
+# --- Hadamard utils --- #
+
+
+def dimensionality_constraints(d):
+    """
+    Enforce d to be a power of 2
+
+    :param d: the original dimension
+    :return: the final dimension
+    """
+    if not is_power_of_two(d):
+        # find d that fulfills 2^l
+        d = np.power(2, np.floor(np.log2(d)) + 1)
+    return d
+
+
+def is_power_of_two(input_integer):
+    """ Test if an integer is a power of two. """
+    if input_integer == 1:
+        return False
+    return input_integer != 0 and ((input_integer & (input_integer - 1)) == 0)
+
+
+def build_hadamard(n_neurons):
+    return scipy.linalg.hadamard(n_neurons)
+
+
+class FastFoodLayer(keras.layers.Layer):
+    def __init__(self, sigma, nbr_stack, trainable=True, **kwargs):
+        super().__init__(**kwargs)
+        self.__sigma = sigma
+        self.__nbr_stack = nbr_stack
+        self.__trainable = trainable
+
+        self.__init_dim = None
+        self.__final_dim = None
+
+    def build(self, input_shape):
+        with K.name_scope("fastfood" + "_sigma-" + str(self.__sigma)):
+            self.__init_dim = np.prod([s for s in input_shape if s is not None])
+            self.__final_dim = int(dimensionality_constraints(self.__init_dim))
+            self.num_outputs = None
+
+            G, G_norm = G_variable((self.__nbr_stack, self.__final_dim))
+            self.__G = self.add_weight(
+                name="G",
+                shape=(self.__nbr_stack, self.__final_dim),
+                initializer=lambda *args, **kwargs: G,
+                trainable=self.__trainable
+            )
+
+            B = B_variable((self.__nbr_stack, self.__final_dim))
+            self.__B = self.add_weight(
+                name="B",
+                shape=(self.__nbr_stack, self.__final_dim),
+                initializer=lambda *args, **kwargs: B,
+                trainable=self.__trainable
+            )
+
+            H = H_variable(self.__final_dim)
+            self.__H = self.add_weight(
+                name="H",
+                shape=(self.__final_dim, self.__final_dim),
+                initializer=lambda *args, **kwargs: H,
+                trainable=False
+            )
+
+            P = P_variable(self.__final_dim, self.__nbr_stack)
+            self.__P = self.add_weight(
+                name="P",
+                shape=(self.__final_dim * self.__nbr_stack, self.__final_dim * self.__nbr_stack),
+                initializer=lambda *args, **kwargs: P,
+                trainable=False
+            )
+
+            S = S_variable((self.__nbr_stack, self.__final_dim), G_norm)
+            self.__S = self.add_weight(
+                name="S",
+                shape=(self.__final_dim * self.__nbr_stack, self.__final_dim * self.__nbr_stack),
+                initializer=lambda *args, **kwargs: S,
+                trainable=self.__trainable
+            )
+
+        self.num_outputs = self.__final_dim * self.__nbr_stack
+
+    def call(self, input, **kwargs):
+        padding = self.__final_dim - self.__init_dim
+        conv_out2 = K.reshape(input, [-1, self.__init_dim])
+        paddings = K.constant([[0, 0], [0, padding]], dtype=np.int32)
+        conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT")
+        conv_out2 = K.reshape(conv_out2, (1, -1, 1, self.__final_dim))
+        h_ff1 = conv_out2 * self.__B
+        h_ff1 = K.reshape(h_ff1, (-1, self.__final_dim))
+        h_ff2 = K.dot(h_ff1, self.__H)
+        h_ff2 = K.reshape(h_ff2, (-1, self.__final_dim * self.__nbr_stack))
+        h_ff3 = K.dot(h_ff2, self.__P)
+        h_ff4 = K.reshape(h_ff3, (-1, self.__final_dim * self.__nbr_stack)) * K.reshape(self.__G, (-1, self.__final_dim * self.__nbr_stack))
+        h_ff4 = K.reshape(h_ff4, (-1, self.__final_dim))
+        h_ff5 = K.dot(h_ff4, self.__H)
+
+        h_ff6 = (1 / (self.__sigma * np.sqrt(self.__final_dim))) * K.reshape(h_ff5, (-1, self.__final_dim * self.__nbr_stack)) * K.reshape(self.__S, (-1, self.__final_dim * self.__nbr_stack))
+        h_ff7_1 = K.cos(h_ff6)
+        h_ff7_2 = K.sin(h_ff6)
+        h_ff7 = np.sqrt(float(1 / self.__final_dim)) * K.concatenate([h_ff7_1, h_ff7_2], axis=1)
+        return h_ff7
+
+    def compute_output_shape(self, input_shape):
+        return (input_shape[0], 2 * input_shape[1] * self.__nbr_stack)
diff --git a/skluc/main/keras_/kernel_approximation/nystrom_layer.py b/skluc/main/keras_/kernel_approximation/nystrom_layer.py
index 1c2458c32d19f30574e838a5fd18b95919ab9e77..8b4e646e3080c6f2405166f8dfe6574c2e926461 100644
--- a/skluc/main/keras_/kernel_approximation/nystrom_layer.py
+++ b/skluc/main/keras_/kernel_approximation/nystrom_layer.py
@@ -2,16 +2,16 @@
 Convnet with nystrom approximation of the feature map.
 
 """
-import time as t
 
-import numpy as np
 import keras
+import keras.backend as K
+import tensorflow.summary as tfsum
 from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel
 
-import skluc.main.data.mldatasets as dataset
-from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, tf_laplacian_kernel, tf_linear_kernel
+from skluc.main.keras_.kernel import keras_chi_square_CPD, keras_linear_kernel
+from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_chi_square_CPD_exp, tf_laplacian_kernel
 from skluc.main.utils import logger
-import tensorflow as tf
+
 
 class DeepstromLayerEndToEnd(keras.layers.Layer):
     def __init__(self,
@@ -25,25 +25,30 @@ class DeepstromLayerEndToEnd(keras.layers.Layer):
                  ):
 
         def init_kernel():
-            if kernel_name == "rbf":
-                kernel_fct = rbf_kernel
-                tf_kernel_fct = tf_rbf_kernel
-            elif kernel_name == "linear":
-                kernel_fct = linear_kernel
-                tf_kernel_fct = tf_linear_kernel
-            elif kernel_name == "chi2_cpd":
-                kernel_fct = additive_chi2_kernel
-                tf_kernel_fct = tf_chi_square_CPD
-            elif kernel_name == "chi2_exp_cpd":
-                kernel_fct = chi2_kernel
-                tf_kernel_fct = tf_chi_square_CPD_exp
-            elif kernel_name == "chi2_pd":
-                raise NotImplementedError("Bien verifier que ce code ne fait pas bordel")
-            elif kernel_name == "laplacian":
-                tf_kernel_fct = tf_laplacian_kernel
-                kernel_fct = laplacian_kernel
-            else:
-                raise ValueError("Unknown kernel name: {}".format(kernel_name))
+            try:
+                if kernel_name == "rbf":
+                    raise NotImplementedError
+                    kernel_fct = rbf_kernel
+                    tf_kernel_fct = tf_rbf_kernel
+                elif kernel_name == "linear":
+                    kernel_fct = linear_kernel
+                    tf_kernel_fct = keras_linear_kernel
+                elif kernel_name == "chi2_cpd":
+                    kernel_fct = additive_chi2_kernel
+                    tf_kernel_fct = keras_chi_square_CPD
+                elif kernel_name == "chi2_exp_cpd":
+                    raise NotImplementedError
+                    kernel_fct = chi2_kernel
+                    tf_kernel_fct = tf_chi_square_CPD_exp
+                elif kernel_name == "chi2_pd":
+                    raise NotImplementedError  # todo Bien verifier que ce code ne fait pas bordel
+                elif kernel_name == "laplacian":
+                    tf_kernel_fct = tf_laplacian_kernel
+                    kernel_fct = laplacian_kernel
+                else:
+                    raise ValueError("Unknown kernel name: {}".format(kernel_name))
+            except NotImplementedError:
+                raise NotImplementedError(f"kernel {kernel_name} has not been implemented with keras yet")
             return kernel_name, kernel_fct, tf_kernel_fct, kernel_dict
 
         def init_output_dim(subsample_size):
@@ -57,9 +62,9 @@ class DeepstromLayerEndToEnd(keras.layers.Layer):
 
         def init_activation():
             if activation == "tan":
-                activation_fct = tf.nn.tanh
+                activation_fct = keras.activations.tanh
             elif activation == "relu":
-                activation_fct = tf.nn.relu
+                activation_fct = keras.activations.relu
             else:
                 activation_fct = activation
 
@@ -117,16 +122,16 @@ class DeepstromLayerEndToEnd(keras.layers.Layer):
 
         input_x = inputs[0]
         input_sub = inputs[1]
-        input_x = tf.nn.l2_normalize(input_x, axis=-1)
-        input_sub = tf.nn.l2_normalize(input_sub, axis=-1)
-        with tf.name_scope("NystromLayer"):
-            with tf.name_scope("kernel_vec"):
+        # input_x = K.l2_normalize(input_x, axis=-1)
+        # input_sub = K.l2_normalize(input_sub, axis=-1)
+        with K.name_scope("NystromLayer"):
+            with K.name_scope("kernel_vec"):
                 kernel_vector = self.__tf_kernel_fct(input_x, input_sub, **self.__kernel_dict)
-                tf.summary.histogram("kernel_vector", kernel_vector)
+                tfsum.histogram("kernel_vector", kernel_vector)
 
             if self.__output_dim != 0:
-                out = tf.matmul(kernel_vector, self.__W_matrix)
-                tf.summary.histogram("W_matrix", self.__W_matrix)
+                out = K.dot(kernel_vector, self.__W_matrix)
+                tfsum.histogram("W_matrix", self.__W_matrix)
             else:
                 out = kernel_vector
         if self.__activation is not None:
diff --git a/skluc/main/keras_/models.py b/skluc/main/keras_/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fc1f323ed971470a9d930f1ae00031a5b3a0535
--- /dev/null
+++ b/skluc/main/keras_/models.py
@@ -0,0 +1,205 @@
+from keras.initializers import he_normal
+from keras.layers import Conv2D, MaxPooling2D, Flatten, BatchNormalization, Activation
+from keras.models import Sequential
+from keras.regularizers import l2
+
+
+def build_lenet_model(input_shape):
+    model = Sequential()
+    model.add(
+        Conv2D(6, (5, 5), padding='valid', activation='relu', kernel_initializer=he_normal(), input_shape=input_shape))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+    model.add(Conv2D(16, (5, 5), padding='valid', activation='relu', kernel_initializer=he_normal()))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+    model.add(Flatten())
+    return model
+
+
+def build_steph_model(input_shape):
+    model = Sequential()
+    model.add(Conv2D(64, kernel_size=(4, 4), strides=(2, 2), padding='same', activation=None, input_shape=input_shape))
+    model.add(Activation('relu'))
+    model.add(Conv2D(128, kernel_size=(4, 4), strides=(2, 2), padding='same', activation=None))
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, kernel_size=(4, 4), strides=(2, 2), padding='same', activation=None))
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=None))
+    model.add(BatchNormalization())
+    model.add(MaxPooling2D((2, 2)))
+    model.add(Activation('relu'))
+    model.add(Flatten())
+    return model
+
+
+def build_vgg19_model(input_shape, weight_decay=0.0001):
+    model = Sequential()
+
+    # Block 1
+    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block1_conv1', input_shape=input_shape))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block1_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))
+
+    # Block 2
+    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block2_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block2_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool'))
+
+    # Block 3
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block3_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block3_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block3_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block3_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool'))
+
+    # Block 4
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block4_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block4_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block4_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block4_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool'))
+
+    # Block 5
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block5_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block5_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block5_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     kernel_initializer=he_normal(), name='block5_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool'))
+
+    model.add(Flatten(name='flatten'))
+
+    return model
+
+
+def build_vgg19_model_glorot(input_shape, weight_decay=0.0001):
+    model = Sequential()
+
+    # Block 1
+    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block1_conv1', input_shape=input_shape))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block1_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))
+
+    # Block 2
+    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block2_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block2_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool'))
+
+    # Block 3
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block3_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block3_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block3_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block3_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool'))
+
+    # Block 4
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block4_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block4_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block4_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block4_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool'))
+
+    # Block 5
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block5_conv1'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block5_conv2'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block5_conv3'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
+                     name='block5_conv4'))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool'))
+
+    model.add(Flatten(name='flatten'))
+
+    return model