diff --git a/skluc/main/keras_/kernel.py b/skluc/main/keras_/kernel.py index 807a3352e8546beb58462d2db507e4875aee970d..9ce4d5e35d64d2ce8d254cd7e70b18bb5c3ca95a 100644 --- a/skluc/main/keras_/kernel.py +++ b/skluc/main/keras_/kernel.py @@ -1,48 +1,62 @@ -import tensorflow as tf +# import tensorflow as tftf +import keras.backend as K +from keras.activations import tanh from skluc.main.tensorflow_.utils import replace_nan -def keras_linear_kernel(args): +def keras_linear_kernel(args, normalize=True): X = args[0] Y = args[1] - X = tf.nn.l2_normalize(X, axis=-1) - Y = tf.nn.l2_normalize(Y, axis=-1) - return tf.matmul(X, tf.transpose(Y)) + if normalize: + X = K.l2_normalize(X, axis=-1) + Y = K.l2_normalize(Y, axis=-1) + return K.dot(X, K.transpose(Y)) -def keras_chi_square_CPD(args): + +def keras_chi_square_CPD(args, epsilon=None, tanh_activation=True, normalize=True): X = args[0] Y = args[1] - # X = tf.nn.l2_normalize(X, axis=-1) - # Y = tf.nn.l2_normalize(Y, axis=-1) + if normalize: + X = K.l2_normalize(X, axis=-1) + Y = K.l2_normalize(Y, axis=-1) # the drawing of the matrix X expanded looks like a wall - wall = tf.expand_dims(X, axis=1) + wall = K.expand_dims(X, axis=1) # the drawing of the matrix Y expanded looks like a floor - floor = tf.expand_dims(Y, axis=0) - numerator = tf.square((wall - floor)) + floor = K.expand_dims(Y, axis=0) + numerator = K.square((wall - floor)) denominator = wall + floor - quotient = numerator / denominator + if epsilon is not None: + quotient = numerator / (denominator + epsilon) + else: + quotient = numerator / denominator quotient_without_nan = replace_nan(quotient) - K = - tf.reduce_sum(quotient_without_nan, axis=2) - return K - -def chi2_kernel(args): - x = args[0] - y = tf.concat(args[1:], 0) - - x = tf.nn.l2_normalize(x, axis=-1) - y = tf.nn.l2_normalize(y, axis=-1) - # the drawing of the matrix X expanded looks like a wall - wall = tf.expand_dims(x, axis=1) - # the drawing of the matrix Y expanded looks like a floor - floor = tf.expand_dims(y, axis=0) - numerator = tf.square(tf.subtract(wall, floor)) - denominator = tf.add(wall, floor) + 0.001 - quotient = numerator / denominator - quotient_without_nan = quotient #replace_nan(quotient) - - K = - tf.reduce_sum(quotient_without_nan, axis=2) - return tf.tanh(K) - -if __name__ == '__main__': - a = tf.Constant(value=0) \ No newline at end of file + result = - K.sum(quotient_without_nan, axis=2) + if tanh_activation: + return tanh(result) + else: + return result + + +def keras_chi_square_CPD_exp(X, Y, gamma): + K = keras_chi_square_CPD(X, Y) + K *= gamma + return K.exp(K) + +# def chi2_kernel(args): +# x = args[0] +# y = tf.concat(args[1:], 0) +# +# x = tf.nn.l2_normalize(x, axis=-1) +# y = tf.nn.l2_normalize(y, axis=-1) +# # the drawing of the matrix X expanded looks like a wall +# wall = tf.expand_dims(x, axis=1) +# # the drawing of the matrix Y expanded looks like a floor +# floor = tf.expand_dims(y, axis=0) +# numerator = tf.square(tf.subtract(wall, floor)) +# denominator = tf.add(wall, floor) + 0.001 +# quotient = numerator / denominator +# quotient_without_nan = quotient #replace_nan(quotient) +# +# K = - tf.reduce_sum(quotient_without_nan, axis=2) +# return tf.tanh(K) diff --git a/skluc/main/keras_/kernel_approximation/fastfood_layer.py b/skluc/main/keras_/kernel_approximation/fastfood_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..7f358bac19620b540bcc1979bab80fb279a17586 --- /dev/null +++ b/skluc/main/keras_/kernel_approximation/fastfood_layer.py @@ -0,0 +1,255 @@ +import keras +import keras.backend as K +import numpy as np +import scipy.linalg +import scipy.stats +import tensorflow as tf +import tensorflow.summary as tfsum + + +# --- Fast Food Naive --- # + + +def G_variable(shape, trainable=False): + """ + Return a Gaussian Random matrix converted into Tensorflow Variable. + + :param shape: The shape of the matrix (number of fastfood stacks (v), dimension of the input space (d)) + :type shape: int or tuple of int (tuple size = 2) + :return: K.variable object containing the matrix, The norm2 of each line (np.array of float) + """ + assert type(shape) == int or (type(shape) == tuple and len(shape) == 2) + G = np.random.normal(size=shape).astype(np.float32) + G_norms = np.linalg.norm(G, ord=2, axis=1) + if trainable: + return K.variable(G, name="G"), G_norms + else: + return K.constant(G, name="G"), G_norms + + +def B_variable(shape, trainable=False): + """ + Return a random matrix of -1 and 1 picked uniformly and converted into Tensorflow Variable. + + :param shape: The shape of the matrix (number of fastfood stacks (v), dimension of the input space (d)) + :type shape: int or tuple of int (tuple size = 2) + :return: K.variable object containing the matrix + """ + assert type(shape) == int or (type(shape) == tuple and len(shape) == 2) + B = np.random.choice([-1, 1], size=shape, replace=True).astype(np.float32) + if trainable: + return K.variable(B, name="B") + else: + return K.constant(B, name="B") + + +def P_variable(d, nbr_stack): + """ + Return a permutation matrix converted into Tensorflow Variable. + + :param d: The width of the matrix (dimension of the input space) + :type d: int + :param nbr_stack: The height of the matrix (nbr_stack x d is the dimension of the output space) + :type nbr_stack: int + :return: K.variable object containing the matrix + """ + idx = np.hstack([(i * d) + np.random.permutation(d) for i in range(nbr_stack)]) + P = np.random.permutation(np.eye(N=nbr_stack * d))[idx].astype(np.float32) + return K.constant(P, name="P") + + +def H_variable(d): + """ + Return an Hadamard matrix converted into Tensorflow Variable. + + d must be a power of two. + + :param d: The size of the Hadamard matrix (dimension of the input space). + :type d: int + :return: K.variable object containing the diagonal and not trainable + """ + H = build_hadamard(d).astype(np.float32) + return K.constant(H, name="H") + + +def S_variable(shape, G_norms, trainable=False): + """ + Return a scaling matrix of random values picked from a chi distribution. + + The values are re-scaled using the norm of the associated Gaussian random matrix G. The associated Gaussian + vectors are the ones generated by the `G_variable` function. + + :param shape: The shape of the matrix (number of fastfood stacks (v), dimension of the input space (d)) + :type shape: int or tuple of int (tuple size = 2) + :param G_norms: The norms of the associated Gaussian random matrices G. + :type G_norms: np.array of floats + :return: K.variable object containing the matrix. + """ + S = np.multiply((1 / G_norms.reshape((-1, 1))), scipy.stats.chi.rvs(shape[1], size=shape).astype(np.float32)) + if trainable: + return K.variable(S, name="S") + else: + return K.constant(S, name="S") + + +def fastfood_layer(conv_out, sigma, nbr_stack=1, trainable=False): + """ + Return a fastfood transform op compatible with tensorflow graph. + + Implementation largely inspired from https://gist.github.com/dougalsutherland/1a3c70e57dd1f64010ab . + + See: + "Fastfood | Approximating Kernel Expansions in Loglinear Time" by + Quoc Le, Tamas Sarl and Alex Smola. + + :param conv_out: the input of the op + :param sigma: bandwith of the gaussian distribution + :param nbr_stack: number of fast food stacks + :param trainable: the diagonal matrices are trainable or not + :return: the output of the fastfood transform + """ + raise NotImplementedError("fastfood_layer function is not yet correctly implemented in keras") + with K.name_scope("fastfood" + "_sigma-" + str(sigma)): + init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None]) + final_dim = int(dimensionality_constraints(init_dim)) + padding = final_dim - init_dim + conv_out2 = K.reshape(conv_out, [-1, init_dim]) + paddings = K.constant([[0, 0], [0, padding]]) + conv_out2 = K.pad(conv_out2, paddings, "CONSTANT") + + G, G_norm = G_variable((nbr_stack, final_dim), trainable=trainable) + tfsum.histogram("weights_G", G) + B = B_variable((nbr_stack, final_dim), trainable=trainable) + tfsum.histogram("weights_B", B) + H = H_variable(final_dim) + tfsum.histogram("weights_H", H) + P = P_variable(final_dim, nbr_stack) + tfsum.histogram("weights_P", P) + S = S_variable((nbr_stack, final_dim), G_norm, trainable=trainable) + tfsum.histogram("weights_S", S) + + conv_out2 = K.reshape(conv_out2, (1, -1, 1, final_dim)) + h_ff1 = conv_out2 * B + h_ff1 = K.reshape(h_ff1, (-1, final_dim)) + h_ff2 = K.dot(h_ff1, H) + h_ff2 = K.reshape(h_ff2, (-1, final_dim * nbr_stack)) + h_ff3 = K.dot(h_ff2, P) + h_ff4 = K.reshape(h_ff3, (-1, final_dim * nbr_stack)) * K.reshape(G, (-1, final_dim * nbr_stack)) + h_ff4 = K.reshape(h_ff4, (-1, final_dim)) + h_ff5 = K.dot(h_ff4, H) + + h_ff6 = (1. / (sigma * np.sqrt(final_dim))) * K.reshape(h_ff5, (-1, final_dim * nbr_stack)) * K.reshape(S, (-1, final_dim * nbr_stack)) + h_ff7_1 = K.cos(h_ff6) + h_ff7_2 = K.sin(h_ff6) + h_ff7 = K.sqrt(float(1 / final_dim)) * K.concatenate([h_ff7_1, h_ff7_2], axis=1) + return h_ff7 + + +# --- Hadamard utils --- # + + +def dimensionality_constraints(d): + """ + Enforce d to be a power of 2 + + :param d: the original dimension + :return: the final dimension + """ + if not is_power_of_two(d): + # find d that fulfills 2^l + d = np.power(2, np.floor(np.log2(d)) + 1) + return d + + +def is_power_of_two(input_integer): + """ Test if an integer is a power of two. """ + if input_integer == 1: + return False + return input_integer != 0 and ((input_integer & (input_integer - 1)) == 0) + + +def build_hadamard(n_neurons): + return scipy.linalg.hadamard(n_neurons) + + +class FastFoodLayer(keras.layers.Layer): + def __init__(self, sigma, nbr_stack, trainable=True, **kwargs): + super().__init__(**kwargs) + self.__sigma = sigma + self.__nbr_stack = nbr_stack + self.__trainable = trainable + + self.__init_dim = None + self.__final_dim = None + + def build(self, input_shape): + with K.name_scope("fastfood" + "_sigma-" + str(self.__sigma)): + self.__init_dim = np.prod([s for s in input_shape if s is not None]) + self.__final_dim = int(dimensionality_constraints(self.__init_dim)) + self.num_outputs = None + + G, G_norm = G_variable((self.__nbr_stack, self.__final_dim)) + self.__G = self.add_weight( + name="G", + shape=(self.__nbr_stack, self.__final_dim), + initializer=lambda *args, **kwargs: G, + trainable=self.__trainable + ) + + B = B_variable((self.__nbr_stack, self.__final_dim)) + self.__B = self.add_weight( + name="B", + shape=(self.__nbr_stack, self.__final_dim), + initializer=lambda *args, **kwargs: B, + trainable=self.__trainable + ) + + H = H_variable(self.__final_dim) + self.__H = self.add_weight( + name="H", + shape=(self.__final_dim, self.__final_dim), + initializer=lambda *args, **kwargs: H, + trainable=False + ) + + P = P_variable(self.__final_dim, self.__nbr_stack) + self.__P = self.add_weight( + name="P", + shape=(self.__final_dim * self.__nbr_stack, self.__final_dim * self.__nbr_stack), + initializer=lambda *args, **kwargs: P, + trainable=False + ) + + S = S_variable((self.__nbr_stack, self.__final_dim), G_norm) + self.__S = self.add_weight( + name="S", + shape=(self.__final_dim * self.__nbr_stack, self.__final_dim * self.__nbr_stack), + initializer=lambda *args, **kwargs: S, + trainable=self.__trainable + ) + + self.num_outputs = self.__final_dim * self.__nbr_stack + + def call(self, input, **kwargs): + padding = self.__final_dim - self.__init_dim + conv_out2 = K.reshape(input, [-1, self.__init_dim]) + paddings = K.constant([[0, 0], [0, padding]], dtype=np.int32) + conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT") + conv_out2 = K.reshape(conv_out2, (1, -1, 1, self.__final_dim)) + h_ff1 = conv_out2 * self.__B + h_ff1 = K.reshape(h_ff1, (-1, self.__final_dim)) + h_ff2 = K.dot(h_ff1, self.__H) + h_ff2 = K.reshape(h_ff2, (-1, self.__final_dim * self.__nbr_stack)) + h_ff3 = K.dot(h_ff2, self.__P) + h_ff4 = K.reshape(h_ff3, (-1, self.__final_dim * self.__nbr_stack)) * K.reshape(self.__G, (-1, self.__final_dim * self.__nbr_stack)) + h_ff4 = K.reshape(h_ff4, (-1, self.__final_dim)) + h_ff5 = K.dot(h_ff4, self.__H) + + h_ff6 = (1 / (self.__sigma * np.sqrt(self.__final_dim))) * K.reshape(h_ff5, (-1, self.__final_dim * self.__nbr_stack)) * K.reshape(self.__S, (-1, self.__final_dim * self.__nbr_stack)) + h_ff7_1 = K.cos(h_ff6) + h_ff7_2 = K.sin(h_ff6) + h_ff7 = np.sqrt(float(1 / self.__final_dim)) * K.concatenate([h_ff7_1, h_ff7_2], axis=1) + return h_ff7 + + def compute_output_shape(self, input_shape): + return (input_shape[0], 2 * input_shape[1] * self.__nbr_stack) diff --git a/skluc/main/keras_/kernel_approximation/nystrom_layer.py b/skluc/main/keras_/kernel_approximation/nystrom_layer.py index 1c2458c32d19f30574e838a5fd18b95919ab9e77..8b4e646e3080c6f2405166f8dfe6574c2e926461 100644 --- a/skluc/main/keras_/kernel_approximation/nystrom_layer.py +++ b/skluc/main/keras_/kernel_approximation/nystrom_layer.py @@ -2,16 +2,16 @@ Convnet with nystrom approximation of the feature map. """ -import time as t -import numpy as np import keras +import keras.backend as K +import tensorflow.summary as tfsum from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel -import skluc.main.data.mldatasets as dataset -from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, tf_laplacian_kernel, tf_linear_kernel +from skluc.main.keras_.kernel import keras_chi_square_CPD, keras_linear_kernel +from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_chi_square_CPD_exp, tf_laplacian_kernel from skluc.main.utils import logger -import tensorflow as tf + class DeepstromLayerEndToEnd(keras.layers.Layer): def __init__(self, @@ -25,25 +25,30 @@ class DeepstromLayerEndToEnd(keras.layers.Layer): ): def init_kernel(): - if kernel_name == "rbf": - kernel_fct = rbf_kernel - tf_kernel_fct = tf_rbf_kernel - elif kernel_name == "linear": - kernel_fct = linear_kernel - tf_kernel_fct = tf_linear_kernel - elif kernel_name == "chi2_cpd": - kernel_fct = additive_chi2_kernel - tf_kernel_fct = tf_chi_square_CPD - elif kernel_name == "chi2_exp_cpd": - kernel_fct = chi2_kernel - tf_kernel_fct = tf_chi_square_CPD_exp - elif kernel_name == "chi2_pd": - raise NotImplementedError("Bien verifier que ce code ne fait pas bordel") - elif kernel_name == "laplacian": - tf_kernel_fct = tf_laplacian_kernel - kernel_fct = laplacian_kernel - else: - raise ValueError("Unknown kernel name: {}".format(kernel_name)) + try: + if kernel_name == "rbf": + raise NotImplementedError + kernel_fct = rbf_kernel + tf_kernel_fct = tf_rbf_kernel + elif kernel_name == "linear": + kernel_fct = linear_kernel + tf_kernel_fct = keras_linear_kernel + elif kernel_name == "chi2_cpd": + kernel_fct = additive_chi2_kernel + tf_kernel_fct = keras_chi_square_CPD + elif kernel_name == "chi2_exp_cpd": + raise NotImplementedError + kernel_fct = chi2_kernel + tf_kernel_fct = tf_chi_square_CPD_exp + elif kernel_name == "chi2_pd": + raise NotImplementedError # todo Bien verifier que ce code ne fait pas bordel + elif kernel_name == "laplacian": + tf_kernel_fct = tf_laplacian_kernel + kernel_fct = laplacian_kernel + else: + raise ValueError("Unknown kernel name: {}".format(kernel_name)) + except NotImplementedError: + raise NotImplementedError(f"kernel {kernel_name} has not been implemented with keras yet") return kernel_name, kernel_fct, tf_kernel_fct, kernel_dict def init_output_dim(subsample_size): @@ -57,9 +62,9 @@ class DeepstromLayerEndToEnd(keras.layers.Layer): def init_activation(): if activation == "tan": - activation_fct = tf.nn.tanh + activation_fct = keras.activations.tanh elif activation == "relu": - activation_fct = tf.nn.relu + activation_fct = keras.activations.relu else: activation_fct = activation @@ -117,16 +122,16 @@ class DeepstromLayerEndToEnd(keras.layers.Layer): input_x = inputs[0] input_sub = inputs[1] - input_x = tf.nn.l2_normalize(input_x, axis=-1) - input_sub = tf.nn.l2_normalize(input_sub, axis=-1) - with tf.name_scope("NystromLayer"): - with tf.name_scope("kernel_vec"): + # input_x = K.l2_normalize(input_x, axis=-1) + # input_sub = K.l2_normalize(input_sub, axis=-1) + with K.name_scope("NystromLayer"): + with K.name_scope("kernel_vec"): kernel_vector = self.__tf_kernel_fct(input_x, input_sub, **self.__kernel_dict) - tf.summary.histogram("kernel_vector", kernel_vector) + tfsum.histogram("kernel_vector", kernel_vector) if self.__output_dim != 0: - out = tf.matmul(kernel_vector, self.__W_matrix) - tf.summary.histogram("W_matrix", self.__W_matrix) + out = K.dot(kernel_vector, self.__W_matrix) + tfsum.histogram("W_matrix", self.__W_matrix) else: out = kernel_vector if self.__activation is not None: diff --git a/skluc/main/keras_/models.py b/skluc/main/keras_/models.py new file mode 100644 index 0000000000000000000000000000000000000000..3fc1f323ed971470a9d930f1ae00031a5b3a0535 --- /dev/null +++ b/skluc/main/keras_/models.py @@ -0,0 +1,205 @@ +from keras.initializers import he_normal +from keras.layers import Conv2D, MaxPooling2D, Flatten, BatchNormalization, Activation +from keras.models import Sequential +from keras.regularizers import l2 + + +def build_lenet_model(input_shape): + model = Sequential() + model.add( + Conv2D(6, (5, 5), padding='valid', activation='relu', kernel_initializer=he_normal(), input_shape=input_shape)) + model.add(MaxPooling2D((2, 2), strides=(2, 2))) + model.add(Conv2D(16, (5, 5), padding='valid', activation='relu', kernel_initializer=he_normal())) + model.add(MaxPooling2D((2, 2), strides=(2, 2))) + model.add(Flatten()) + return model + + +def build_steph_model(input_shape): + model = Sequential() + model.add(Conv2D(64, kernel_size=(4, 4), strides=(2, 2), padding='same', activation=None, input_shape=input_shape)) + model.add(Activation('relu')) + model.add(Conv2D(128, kernel_size=(4, 4), strides=(2, 2), padding='same', activation=None)) + model.add(Activation('relu')) + model.add(Conv2D(256, kernel_size=(4, 4), strides=(2, 2), padding='same', activation=None)) + model.add(Activation('relu')) + model.add(Conv2D(512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=None)) + model.add(BatchNormalization()) + model.add(MaxPooling2D((2, 2))) + model.add(Activation('relu')) + model.add(Flatten()) + return model + + +def build_vgg19_model(input_shape, weight_decay=0.0001): + model = Sequential() + + # Block 1 + model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block1_conv1', input_shape=input_shape)) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block1_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')) + + # Block 2 + model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block2_conv1')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block2_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')) + + # Block 3 + model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block3_conv1')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block3_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block3_conv3')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block3_conv4')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')) + + # Block 4 + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block4_conv1')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block4_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block4_conv3')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block4_conv4')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')) + + # Block 5 + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block5_conv1')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block5_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block5_conv3')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + kernel_initializer=he_normal(), name='block5_conv4')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')) + + model.add(Flatten(name='flatten')) + + return model + + +def build_vgg19_model_glorot(input_shape, weight_decay=0.0001): + model = Sequential() + + # Block 1 + model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block1_conv1', input_shape=input_shape)) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block1_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')) + + # Block 2 + model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block2_conv1')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block2_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')) + + # Block 3 + model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block3_conv1')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block3_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block3_conv3')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block3_conv4')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')) + + # Block 4 + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block4_conv1')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block4_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block4_conv3')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block4_conv4')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')) + + # Block 5 + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block5_conv1')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block5_conv2')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block5_conv3')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay), + name='block5_conv4')) + model.add(BatchNormalization()) + model.add(Activation('relu')) + model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')) + + model.add(Flatten(name='flatten')) + + return model