Skip to content
Snippets Groups Projects
Commit fe0cda9b authored by Luc Giffon's avatar Luc Giffon
Browse files

move graph drawing by date + remove some useless scripts

parent 759abb39
No related branches found
No related tags found
No related merge requests found
Showing
with 474 additions and 0 deletions
"""
Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network.
Usage:
benchmark_vgg deepstrom [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n]
Options:
--help -h Display help and exit.
-e numepoch --num-epoch=numepoch The number of epoch.
-s batchsize --batch-size=batchsize The number of example in each batch
-v size --validation-size size The size of the validation set [default: 10000]
-a value --seed value The seed value used for all randomization processed [default: 0]
-D reprdim --out-dim=reprdim The dimension of the final representation
-m size --nys-size size The number of example in the nystrom subsample.
-n --non-linear Tell Nystrom to use the non linear activation function on its output.
-r --real-nystrom Use the real w matrix
-g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet)
-c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel.
-R --rbf-kernel Says if the rbf kernel should be used for nystrom.
-L --linear-kernel Says if the linear kernel should be used for nystrom.
-C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom.
-E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom.
-P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom.
-S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom.
-A --laplacian-kernel Says if the laplacian kernel should be used for nystrom.
-T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom.
-M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom.
"""
import sys
import os
import time as t
import numpy as np
import tensorflow as tf
import docopt
from keras import Model
from keras.preprocessing.image import ImageDataGenerator
import skluc.main.data.mldatasets as dataset
from skluc.main.tensorflow_.kernel_approximation import nystrom_layer
from skluc.main.tensorflow_.utils import batch_generator, classification_cifar
from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \
tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels
from skluc.main.utils import logger, log_memory_usage
import keras
from keras.models import Sequential, load_model
from keras.layers import Activation
from keras.layers import Conv2D, MaxPooling2D
from keras.initializers import he_normal
from keras.layers.normalization import BatchNormalization
def VGG19(input_shape):
# with tf.variable_scope("block1_conv1"):
# weights = tf.get_variable("weights", (3, 3, 3, 64), initializer=tf.random_normal_initializer(stddev=0.1), trainable=trainable)
# biases = tf.get_variable("biases", (64), initializer=tf.constant_initializer(0.0), trainable=trainable)
# regularizer = tf.contrib.layers.l2_regularizer(scale=0.1)
# conv = tf.nn.conv2d(input_, weights, strides=[1, 1, 1, 1], padding='SAME', kernel_regularizer=regularizer)
# batch_norm = tf.nn.batch_normalization(conv, variance_epsilon=1e-3)
# relu = tf.nn.relu(conv + biases)
# tf.summary.histogram("act", relu)
# in order to reduce dimensionality, use bigger pooling size
# pool = max_pool(relu, pool_size=pool_size)
# with tf.variable_scope("conv_pool_2"):
# conv2 = conv_relu_pool(conv1, [5, 5, 6, 16], [16], pool_size=2, trainable=trainable)
weight_decay = 0.0001
# build model
model = Sequential()
# Block 1
model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block1_conv1', input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block1_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))
# Block 2
model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block2_conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block2_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool'))
#
# Block 3
model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block3_conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block3_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block3_conv3'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block3_conv4'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool'))
#
# Block 4
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block4_conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block4_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block4_conv3'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block4_conv4'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool'))
# Block 5
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block5_conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block5_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block5_conv3'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block5_conv4'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool'))
return model
def VGG19_preload():
logger.debug("filename: {}".format(os.path.abspath(__file__)))
model = load_model(os.path.join(os.path.dirname(os.path.abspath(__file__)), "1522967518.1916964_vgg19_cifar10.h5"))
vgg_conv_model = Model(inputs=model.input,
outputs=model.get_layer('block5_pool').output)
return vgg_conv_model
def get_gamma_value(arguments, dat, chi2=False):
if arguments["--gamma"] is None:
logger.debug("Gamma arguments is None. Need to compute it.")
if chi2:
gamma_value = 1./compute_euristic_sigma_chi2(dat.train.data)
else:
gamma_value = 1./compute_euristic_sigma(dat.train.data)
else:
gamma_value = eval(arguments["--gamma"])
logger.debug("Gamma value is {}".format(gamma_value))
return gamma_value
def init_kernel():
kernel_dict = {}
GAMMA = None
if RBF_KERNEL:
KERNEL = tf_rbf_kernel
KERNEL_NAME = "rbf"
GAMMA = get_gamma_value(arguments, data)
kernel_dict = {"gamma": GAMMA}
elif LINEAR_KERNEL:
KERNEL = tf_linear_kernel
KERNEL_NAME = "linear"
elif CHI2_KERNEL:
KERNEL = tf_chi_square_CPD
KERNEL_NAME = "chi2_cpd"
elif CHI2_EXP_KERNEL:
KERNEL = tf_chi_square_CPD_exp
KERNEL_NAME = "chi2_exp_cpd"
GAMMA = get_gamma_value(arguments, data, chi2=True)
kernel_dict = {"gamma": GAMMA}
elif CHI2_PD_KERNEL:
KERNEL = tf_chi_square_PD
KERNEL_NAME = "chi2_pd"
elif SIGMOID_KERNEL:
KERNEL = tf_sigmoid_kernel
KERNEL_NAME = "sigmoid"
GAMMA = get_gamma_value(arguments, data)
CONST = float(arguments["--intercept-constant"])
kernel_dict = {"gamma": GAMMA, "constant": CONST}
elif LAPLACIAN_KERNEL:
KERNEL = tf_laplacian_kernel
KERNEL_NAME = "laplacian"
GAMMA = get_gamma_value(arguments, data)
kernel_dict = {"gamma": np.sqrt(GAMMA)}
elif STACKED_KERNEL:
GAMMA = get_gamma_value(arguments, data)
def KERNEL(X, Y):
return tf_stack_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA],
[{"gamma": g_value} for g_value in GAMMA])
KERNEL_NAME = "stacked"
elif SUMED_KERNEL:
GAMMA = get_gamma_value(arguments, data)
def KERNEL(X, Y):
return tf_sum_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA],
[{"gamma": g_value} for g_value in GAMMA])
KERNEL_NAME = "summed"
else:
raise Exception("No kernel function specified for deepstrom")
return KERNEL_NAME, KERNEL, kernel_dict, GAMMA
if __name__ == '__main__':
arguments = docopt.docopt(__doc__)
NUM_EPOCH = int(arguments["--num-epoch"])
BATCH_SIZE = int(arguments["--batch-size"])
SEED_TRAIN_VALIDATION = 0
SEED = int(arguments["--seed"])
OUT_DIM = int(arguments["--out-dim"]) if arguments["--out-dim"] is not None else None
VALIDATION_SIZE = int(arguments["--validation-size"])
NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"])
if OUT_DIM is None:
OUT_DIM = NYS_SUBSAMPLE_SIZE
KERNEL_NAME = None
GAMMA = None
CONST = None
REAL_NYSTROM = arguments["--real-nystrom"]
NON_LINEAR = tf.nn.relu if arguments["--non-linear"] else None
RBF_KERNEL = arguments["--rbf-kernel"]
LINEAR_KERNEL = arguments["--linear-kernel"]
CHI2_KERNEL = arguments["--chi-square-kernel"]
CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"]
CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"]
SIGMOID_KERNEL = arguments["--sigmoid-kernel"]
LAPLACIAN_KERNEL = arguments["--laplacian-kernel"]
STACKED_KERNEL = arguments["--stacked-kernel"]
SUMED_KERNEL = arguments["--sumed-kernel"]
data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
data.load()
data.normalize()
data.data_astype(np.float32)
data.labels_astype(np.float32)
data.to_image()
data.to_one_hot()
logger.debug("Start benchmark with parameters: {}".format(" ".join(sys.argv[1:])))
logger.debug("Using dataset {} with validation size {} and seed for spliting set {}.".format(data.s_name, data.validation_size, data.seed))
logger.debug("Shape of train set data: {}; shape of train set labels: {}".format(data.train[0].shape, data.train[1].shape))
logger.debug("Shape of validation set data: {}; shape of validation set labels: {}".format(data.validation[0].shape, data.validation[1].shape))
logger.debug("Shape of test set data: {}; shape of test set labels: {}".format(data.test[0].shape, data.test[1].shape))
logger.debug("Sample of label: {}".format(data.train[1][0]))
NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"])
if OUT_DIM is None:
OUT_DIM = NYS_SUBSAMPLE_SIZE
KERNEL_NAME, KERNEL, kernel_dict, GAMMA = init_kernel()
input_dim, output_dim = data.train[0].shape[1:], data.train[1].shape[1]
with tf.Graph().as_default():
np.random.seed(SEED)
nys_subsample_index = np.random.permutation(data.train[0].shape[0])
nys_subsample = data.train[0][nys_subsample_index[:NYS_SUBSAMPLE_SIZE]]
nys_subsample_placeholder = tf.Variable(nys_subsample, dtype=tf.float32, name="nys_subsample", trainable=False)
x = tf.placeholder(tf.float32, shape=[None, *input_dim], name="x")
y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label")
# nys_subsample_placeholder = tf.placeholder(tf.float32, shape=[NYS_SUBSAMPLE_SIZE, *input_dim], name="nys_subsample")
# vgg_conv_model = VGG19_preload()
with tf.variable_scope("Convolution") as scope_convolution:
vgg_conv_model = VGG19(input_dim)
vgg_conv_model.trainable=False
conv_x = vgg_conv_model(x)
tf.summary.histogram("convolution_x", conv_x)
vgg_conv_model_subsample = keras.Model(inputs=vgg_conv_model.inputs,
outputs=vgg_conv_model.outputs)
vgg_conv_model_subsample.trainable = False
conv_nys_subsample = vgg_conv_model_subsample(nys_subsample_placeholder)
logger.debug("Selecting deepstrom layer function with "
"subsample size = {}, "
"output_dim = {}, "
"{} activation function "
"and kernel = {}"
.format(NYS_SUBSAMPLE_SIZE,
OUT_DIM,
"with" if NON_LINEAR else "without",
KERNEL_NAME))
if OUT_DIM is not None and OUT_DIM > NYS_SUBSAMPLE_SIZE:
logger.debug("Output dim is greater than deepstrom subsample size. Aborting.")
# todo change this because it is copy-pasted (use function instead)
global_acc_val = None
global_acc_test = None
training_time = None
printed_r_list = [str(global_acc_val),
str(global_acc_test),
str(training_time),
str(NUM_EPOCH),
str(BATCH_SIZE),
str(OUT_DIM),
str(KERNEL_NAME),
str(GAMMA),
str(CONST),
str(NYS_SUBSAMPLE_SIZE),
str(VALIDATION_SIZE),
str(SEED),
str(NON_LINEAR),
]
print(",".join(printed_r_list))
exit()
w_matrix = None
if REAL_NYSTROM:
init_dim = np.prod([s.value for s in conv_x.shape[1:] if s.value is not None])
h_conv_nystrom_subsample_flat = tf.reshape(conv_nys_subsample, [conv_nys_subsample.shape[0], init_dim])
K_matrix = KERNEL(h_conv_nystrom_subsample_flat, h_conv_nystrom_subsample_flat, **kernel_dict)
S, U, V = tf.svd(K_matrix)
invert_root_K = tf.matmul(tf.matmul(U, tf.sqrt(tf.diag(S))), tf.transpose(V))
w_matrix = invert_root_K
input_classif = fct_deepstrom(conv_x, OUT_DIM, conv_nys_subsample, KERNEL, kernel_dict, w_matrix=w_matrix, non_linearity=NON_LINEAR)
classif, keep_prob = classification_cifar(input_classif, output_dim)
# calcul de la loss
with tf.name_scope("xent"):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"),
name="xentropy_mean")
tf.summary.scalar('loss-xent', cross_entropy)
# todo learning rate as hyperparameter
# calcul du gradient
with tf.name_scope("train"):
global_step = tf.Variable(0, name="global_step", trainable=False)
train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy,
global_step=global_step)
# calcul de l'accuracy
with tf.name_scope("accuracy"):
predictions = tf.argmax(classif, 1)
correct_prediction = tf.equal(predictions, tf.argmax(y, 1))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy_op)
merged_summary = tf.summary.merge_all()
init = tf.global_variables_initializer()
# Create a session for running Ops on the Graph.
# Instantiate a SummaryWriter to output summaries and the Graph.
# summary_writer = tf.summary.FileWriter("debug_benchmark_vgg")
# Initialize all Variable objects
# actual learning
saver = tf.train.Saver()
with tf.Session() as sess:
logger.debug("trainable variables are: {}".format(tf.trainable_variables()))
# summary_writer.add_graph(sess.graph)
# Initialize all Variable objects
datagen = ImageDataGenerator(horizontal_flip=True,
width_shift_range=0.125,
height_shift_range=0.125,
fill_mode='constant',
cval=0.)
datagen.fit(data.train[0])
sess.run(init)
# actual learning
# feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0}
global_start = t.time()
feed_dict = {nys_subsample_placeholder: nys_subsample}
feed_dict_val = {nys_subsample_placeholder: nys_subsample}
feed_dict_test = {nys_subsample_placeholder: nys_subsample}
start_time_int = int(t.time())
for i in range(NUM_EPOCH):
saver.save(sess, os.path.abspath('end_to_end_model'), global_step=start_time_int)
start = t.time()
# for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, True):
batchgen = datagen.flow(data.train[0], data.train[1], BATCH_SIZE, shuffle=False)
j = 0
log_memory_usage()
while j < len(batchgen):
X_batch, Y_batch = next(batchgen)
# batch_generator(data.train[0], data.train[1], BATCH_SIZE, True):
# X_batch = tf.map_fn(lambda img: datagen.random_transform(img), X_batch)
feed_dict.update({x: X_batch, y: Y_batch, keep_prob: 0.5})
_, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict)
if j % 100 == 0:
# summary_str = sess.run(merged_summary, feed_dict=feed_dict)
# summary_writer.add_summary(summary_str, j)
logger.debug("epoch: {}/{}; batch: {}/{}; loss: {}; acc: {}".format(i, NUM_EPOCH,
j, int(data.train[0].shape[0]/BATCH_SIZE),
loss, acc))
j += 1
training_time = t.time() - global_start
accuracies_val = []
i = 0
for X_batch, Y_batch in batch_generator(data.validation[0], data.validation[1], 1000, False):
feed_dict_val.update({x: X_batch, y: Y_batch, keep_prob: 1.0})
accuracy = sess.run([accuracy_op], feed_dict=feed_dict_val)
accuracies_val.append(accuracy[0])
i += 1
accuracies_test = []
i = 0
for X_batch, Y_batch in batch_generator(data.test[0], data.test[1], 1000, False):
feed_dict_test.update({x: X_batch, y: Y_batch, keep_prob: 1.0})
accuracy = sess.run([accuracy_op], feed_dict=feed_dict_test)
accuracies_test.append(accuracy[0])
i += 1
global_acc_val = sum(accuracies_val) / i
global_acc_test = sum(accuracies_test) / i
printed_r_list = [str(global_acc_val),
str(global_acc_test),
str(training_time),
str(NUM_EPOCH),
str(BATCH_SIZE),
str(OUT_DIM),
str(KERNEL_NAME),
str(GAMMA),
str(CONST),
str(NYS_SUBSAMPLE_SIZE),
str(VALIDATION_SIZE),
str(SEED),
str(NON_LINEAR),
]
print(",".join(printed_r_list))
%% Cell type:code id: tags:
``` python
```
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment