Skip to content
Snippets Groups Projects
Commit 489ceb8f authored by Luc Giffon's avatar Luc Giffon
Browse files

remove old benchmark_vgg

parent 516c99fe
No related branches found
No related tags found
No related merge requests found
"""
Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network.
Usage:
benchmark_vgg dense [-q] [--cifar100|--cifar|--mnist|--svhn] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l]
benchmark_vgg deepfriedconvnet [-q] [--cifar100|--cifar|--mnist|--svhn] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack]
benchmark_vgg deepstrom [-q] [--cifar100|--cifar|--mnist|--svhn] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n]
Options:
--help -h Display help and exit.
-q --quiet Set logging level to info.
-a value --seed value The seed value used for all randomization processed [default: 0]
-t --train-size size Size of train set.
-v size --validation-size size The size of the validation set [default: 10000]
-e numepoch --num-epoch=numepoch The number of epoch.
-s batchsize --batch-size=batchsize The number of example in each batch
-d --dropout val Keep probability of neurons before classif [default: 1.0]
-D reprdim --out-dim=reprdim The dimension of the final representation
Dense:
-l --two-layers Says if the dense network should have 1 or 2 layers (default 1).
Deepfried convnet:
-N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet
Deepstrom:
-r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2)
-n --non-linear Tell Nystrom to use the non linear activation function on its output.
-m size --nys-size size The number of example in the nystrom subsample.
Datasets:
--cifar Use cifar dataset
--mnist Use mnist dataset
--svhn Use svhn dataset
--cifar100 Use cifar100 dataset
Dataset related:
-B --bad-repr nb Use bad convolution with cifar10 dataset: remove nb number of convolution from the head
Possible kernels:
-R --rbf-kernel Says if the rbf kernel should be used for nystrom.
-L --linear-kernel Says if the linear kernel should be used for nystrom.
-C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom.
-E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom.
-P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom.
-S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom.
-A --laplacian-kernel Says if the laplacian kernel should be used for nystrom.
-T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom.
-M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom.
Kernel related:
-g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet)
-c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel.
"""
import logging
import sys
import time as t
import daiquiri
import numpy as np
import tensorflow as tf
import docopt
from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel
import skluc.data.mldatasets as dataset
from skluc.data.transformation import VGG19Cifar10Transformer, LecunMnistTransformer, VGG19Cifar10BadTransformer, \
VGG19Cifar10BadTransformerV2, VGG19Cifar10BadTransformerV3, VGG19Cifar10BadTransformerV4, VGG19SvhnTransformer, \
VGG19Cifar100Transformer
from skluc.tensorflow_.kernel_approximation import nystrom_layer, fastfood_layer
from skluc.tensorflow_.utils import fully_connected, batch_generator, classification_cifar
from skluc.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \
tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels
from skluc.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2
def print_result():
printed_r_list = [str(NETWORK),
str(global_acc_val),
str(global_acc_test),
str(training_time),
str(NUM_EPOCH),
str(BATCH_SIZE),
str(OUT_DIM),
str(TWO_LAYERS_DENSE),
str(KERNEL_NAME),
str(GAMMA),
str(CONST),
str(NB_STACK),
str(NYS_SUBSAMPLE_SIZE),
str(VALIDATION_SIZE),
str(SEED),
str(NON_LINEAR),
str(REAL_NYSTROM),
str(BAD_REPR),
str(TRAIN_SIZE),
str(DROPOUT),
str(DATASET)
]
print(",".join(printed_r_list))
exit()
def fct_dense(input_, out_dim, two_layers):
with tf.variable_scope("dense_layers"):
fc_1 = fully_connected(input_, out_dim, act=tf.nn.relu, variable_scope="fc1")
if two_layers:
fc_2 = fully_connected(fc_1, out_dim, act=tf.nn.relu, variable_scope="fc2")
else:
fc_2 = fc_1
out = fc_2
return out
def fct_deepstrom(input_, out_dim, subsample, kernel, kernel_params, w_matrix, non_linearity):
"""
Wrap the computing of the deepstrom layer
:param input_:
:param out_dim:
:param subsample:
:param kernel:
:param kernel_params:
:return:
"""
out_fc = nystrom_layer(input_, subsample, W_matrix=w_matrix, output_dim=out_dim, kernel=kernel, output_act=non_linearity, **kernel_params)
return out_fc
def fct_deepfried(input_, nb_stack, sigma):
out_fc = fastfood_layer(input_, sigma, nbr_stack=nb_stack, trainable=True)
return out_fc
def get_gamma_value(arguments, dat, chi2=False):
if arguments["--gamma"] is None:
logger.debug("Gamma arguments is None. Need to compute it.")
if chi2:
gamma_value = 1./compute_euristic_sigma_chi2(dat.train.data)
else:
gamma_value = 1./compute_euristic_sigma(dat.train.data)
else:
gamma_value = eval(arguments["--gamma"])
logger.debug("Gamma value is {}".format(gamma_value))
return gamma_value
if __name__ == '__main__':
# todo special treat for each type of execution
arguments = docopt.docopt(__doc__)
if arguments["--quiet"]:
daiquiri.setup(level=logging.INFO)
NUM_EPOCH = int(arguments["--num-epoch"])
BATCH_SIZE = int(arguments["--batch-size"])
OUT_DIM = int(arguments["--out-dim"]) if arguments["--out-dim"] is not None else None
TWO_LAYERS_DENSE = arguments["--two-layers"]
RBF_KERNEL = arguments["--rbf-kernel"]
LINEAR_KERNEL = arguments["--linear-kernel"]
CHI2_KERNEL = arguments["--chi-square-kernel"]
CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"]
CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"]
SIGMOID_KERNEL = arguments["--sigmoid-kernel"]
LAPLACIAN_KERNEL = arguments["--laplacian-kernel"]
STACKED_KERNEL = arguments["--stacked-kernel"]
SUMED_KERNEL = arguments["--sumed-kernel"]
VALIDATION_SIZE = int(arguments["--validation-size"])
REAL_NYSTROM = arguments["--real-nystrom"]
SEED = int(arguments["--seed"]) # The seed change the data ordering in the dataset (so train/validation/test split may change with != seeds)
NYS_SUBSAMPLE_SIZE = None
KERNEL_NAME = None
GAMMA = None
CONST = None
NB_STACK = None
NON_LINEAR = tf.nn.relu if arguments["--non-linear"] else None
kernel_dict = {}
CIFAR_DATASET = bool(arguments["--cifar"])
CIFAR100_DATASET = bool(arguments["--cifar100"])
MNIST_DATASET = bool(arguments["--mnist"])
SVHN_DATASET = bool(arguments["--svhn"])
if CIFAR_DATASET:
DATASET = "cifar"
elif MNIST_DATASET:
DATASET = "mnist"
elif SVHN_DATASET:
DATASET = "svhn"
elif CIFAR100_DATASET:
DATASET = "cifar100"
else:
raise ValueError("no know dataset specified")
BAD_REPR = arguments["--bad-repr"]
logger.debug("{}".format(arguments["--dropout"]))
DROPOUT = float(arguments["--dropout"]) if arguments["--dropout"] is not None else None
logger.debug("DROPOUT value is {} and type {}".format(DROPOUT, type(DROPOUT)))
if arguments["--train-size"] is not None:
TRAIN_SIZE = int(arguments["--train-size"])
else:
TRAIN_SIZE = arguments["--train-size"]
global_acc_val = None
global_acc_test = None
training_time = None
SEED_TRAIN_VALIDATION = SEED
if CIFAR_DATASET:
data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
if BAD_REPR is None or int(BAD_REPR) == 0:
# todo faire quelquechose pour ces "bad repr"
# parametre de bad representation et une seule classe?
transformer = VGG19Cifar10Transformer
elif int(BAD_REPR) == 1:
transformer = VGG19Cifar10BadTransformer
elif int(BAD_REPR) == 2:
transformer = VGG19Cifar10BadTransformerV2
elif int(BAD_REPR) == 3:
transformer = VGG19Cifar10BadTransformerV3
elif int(BAD_REPR) == 4:
transformer = VGG19Cifar10BadTransformerV4
else:
raise ValueError("Not known transformer value: {}".format(BAD_REPR))
elif MNIST_DATASET:
data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
transformer = LecunMnistTransformer
elif SVHN_DATASET:
data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
transformer = VGG19SvhnTransformer
elif CIFAR100_DATASET:
data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
transformer = VGG19Cifar100Transformer
else:
raise ValueError("No dataset specified")
data.load()
data.to_image() # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?)
data.data_astype(np.float32)
data.labels_astype(np.float32)
data.normalize()
logger.debug("train dataset shape: {}".format(data.train.data.shape))
data.apply_transformer(transformer)
data.normalize()
data.to_one_hot()
data.flatten()
if TRAIN_SIZE is not None:
data.reduce_data_size(int(TRAIN_SIZE))
logger.info("Start benchmark with parameters: {}".format(" ".join(sys.argv[1:])))
logger.info("Using dataset {} with validation size {} and seed for spliting set {}.".format(data.s_name, data.validation_size, data.seed))
logger.info("Shape of train set data: {}; shape of train set labels: {}".format(data.train[0].shape, data.train[1].shape))
logger.info("Shape of validation set data: {}; shape of validation set labels: {}".format(data.validation[0].shape, data.validation[1].shape))
logger.info("Shape of test set data: {}; shape of test set labels: {}".format(data.test[0].shape, data.test[1].shape))
logger.debug("Sample of label: {}".format(data.train[1][0]))
# todo separated function for parameters parsing
if arguments["dense"]:
NETWORK = "dense"
elif arguments["deepstrom"]:
NETWORK = "deepstrom"
NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"])
if OUT_DIM is None:
OUT_DIM = NYS_SUBSAMPLE_SIZE
if RBF_KERNEL:
KERNEL = tf_rbf_kernel
KERNEL_NAME = "rbf"
GAMMA = get_gamma_value(arguments, data)
kernel_dict = {"gamma": GAMMA}
elif LINEAR_KERNEL:
KERNEL = tf_linear_kernel
KERNEL_NAME = "linear"
elif CHI2_KERNEL:
KERNEL = tf_chi_square_CPD
KERNEL_NAME = "chi2_cpd"
elif CHI2_EXP_KERNEL:
KERNEL = tf_chi_square_CPD_exp
KERNEL_NAME = "chi2_exp_cpd"
GAMMA = get_gamma_value(arguments, data, chi2=True)
kernel_dict = {"gamma": GAMMA}
elif CHI2_PD_KERNEL:
KERNEL = tf_chi_square_PD
KERNEL_NAME = "chi2_pd"
elif SIGMOID_KERNEL:
KERNEL = tf_sigmoid_kernel
KERNEL_NAME = "sigmoid"
GAMMA = get_gamma_value(arguments, data)
CONST = float(arguments["--intercept-constant"])
kernel_dict = {"gamma": GAMMA, "constant": CONST}
elif LAPLACIAN_KERNEL:
KERNEL = tf_laplacian_kernel
KERNEL_NAME = "laplacian"
GAMMA = get_gamma_value(arguments, data)
kernel_dict = {"gamma": np.sqrt(GAMMA)}
elif STACKED_KERNEL:
GAMMA = get_gamma_value(arguments, data)
def KERNEL(X, Y):
return tf_stack_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA],
[{"gamma": g_value} for g_value in GAMMA])
KERNEL_NAME = "stacked"
elif SUMED_KERNEL:
GAMMA = get_gamma_value(arguments, data)
def KERNEL(X, Y):
return tf_sum_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA],
[{"gamma": g_value} for g_value in GAMMA])
KERNEL_NAME = "summed"
else:
raise Exception("No kernel function specified for deepstrom")
elif arguments["deepfriedconvnet"]:
NETWORK = "deepfriedconvnet"
NB_STACK = int(arguments["--nb-stack"])
GAMMA = get_gamma_value(arguments, data)
SIGMA = 1 / GAMMA
else:
raise Exception("Not recognized network")
input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1]
x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x")
y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label")
if NETWORK == "dense":
logger.info("Selecting dense layer function with output dim = {} and {} layers".format(OUT_DIM, 2 if TWO_LAYERS_DENSE else 1))
input_classif = fct_dense(x, OUT_DIM, TWO_LAYERS_DENSE)
elif NETWORK == "deepstrom":
logger.info("Selecting {} deepstrom layer function with "
"subsample size = {}, "
"output_dim = {}, "
"{} activation function "
"and kernel = {}"
.format("real" if REAL_NYSTROM else "learned",
NYS_SUBSAMPLE_SIZE,
OUT_DIM,
"with" if NON_LINEAR else "without",
KERNEL_NAME))
# if TRAIN_SIZE < int(NYS_SUBSAMPLE_SIZE) + 10:
# logger.debug("Train size is {} and nys size is {}. not ok".format(TRAIN_SIZE, NYS_SUBSAMPLE_SIZE))
# print_result()
if OUT_DIM is not None and OUT_DIM > NYS_SUBSAMPLE_SIZE:
logger.debug("Output dim is greater than deepstrom subsample size. Aborting.")
print_result()
if TRAIN_SIZE is not None:
subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE)
nys_subsample = data.validation.data[subsample_indexes]
else:
subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE)
nys_subsample = data.train.data[subsample_indexes]
logger.debug("Chosen subsample: {}".format(nys_subsample))
if REAL_NYSTROM:
logger.debug("Real nystrom asked: eg projection matrix has the vanilla formula")
if SUMED_KERNEL:
# here K11 matrix are added before doing nystrom approximation
added_K11 = np.zeros((nys_subsample.shape[0], nys_subsample.shape[0]))
for g_value in GAMMA:
added_K11 = np.add(added_K11, rbf_kernel(nys_subsample, nys_subsample, gamma=g_value))
U, S, V = np.linalg.svd(added_K11)
invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
input_classif = fct_deepstrom(x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
w_matrix=invert_root_K11, non_linearity=NON_LINEAR)
elif STACKED_KERNEL:
# here nystrom approximations are stacked
lst_invert_root_K11 = []
for g_value in GAMMA:
K11 = rbf_kernel(nys_subsample, nys_subsample, gamma=g_value)
U, S, V = np.linalg.svd(K11)
invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
lst_invert_root_K11.append(invert_root_K11)
stack_K11 = np.vstack(lst_invert_root_K11)
input_classif = fct_deepstrom(x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
w_matrix=stack_K11, non_linearity=NON_LINEAR)
else:
if KERNEL_NAME == "rbf":
kernel_fct = rbf_kernel
elif KERNEL_NAME == "linear":
kernel_fct = linear_kernel
elif KERNEL_NAME == "chi2_cpd":
kernel_fct = additive_chi2_kernel
elif KERNEL_NAME == "chi2_exp_cpd":
kernel_fct = chi2_kernel
elif KERNEL_NAME == "chi2_pd":
raise NotImplementedError("Bien verifier que ce code ne fait pas bordel")
elif KERNEL_NAME == "laplacian":
kernel_fct = laplacian_kernel
else:
raise ValueError("Unknown kernel name: {}".format(KERNEL_NAME))
K11 = kernel_fct(nys_subsample, nys_subsample, **kernel_dict)
U, S, V = np.linalg.svd(K11)
invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
input_classif = fct_deepstrom(x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=invert_root_K11, non_linearity=NON_LINEAR)
else:
input_classif = fct_deepstrom(x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
w_matrix=None, non_linearity=NON_LINEAR)
elif NETWORK == "deepfriedconvnet":
logger.debug("Selecting deepfriedconvnet layer function")
input_classif = fct_deepfried(x, NB_STACK, SIGMA)
else:
raise Exception("Not recognized network")
classif, keep_prob = classification_cifar(input_classif, output_dim)
# calcul de la loss
with tf.name_scope("xent"):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"),
name="xentropy_mean")
tf.summary.scalar('loss-xent', cross_entropy)
# todo learning rate as hyperparameter
# calcul du gradient
with tf.name_scope("train"):
global_step = tf.Variable(0, name="global_step", trainable=False)
train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy,
global_step=global_step)
# calcul de l'accuracy
with tf.name_scope("accuracy"):
predictions = tf.argmax(classif, 1)
correct_prediction = tf.equal(predictions, tf.argmax(y, 1))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy_op)
merged_summary = tf.summary.merge_all()
init = tf.global_variables_initializer()
# Create a session for running Ops on the Graph.
# Instantiate a SummaryWriter to output summaries and the Graph.
# summary_writer = tf.summary.FileWriter("debug_benchmark_vgg")
# Initialize all Variable objects
# actual learning
with tf.Session() as sess:
logger.info("Start training")
# summary_writer.add_graph(sess.graph)
# Initialize all Variable objects
sess.run(init)
# actual learning
# feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0}
global_start = t.time()
for i in range(NUM_EPOCH):
j = 0
start = t.time()
for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False):
feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT}
_, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict)
if j % 100 == 0:
logger.info("epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j+1, int(data.train[0].shape[0]/BATCH_SIZE)+1, X_batch.shape, loss, acc))
# summary_str = sess.run(merged_summary, feed_dict=feed_dict)
# summary_writer.add_summary(summary_str, j)
j += 1
logger.info("Evaluation on validation data")
training_time = t.time() - global_start
accuracies_val = []
i = 0
for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, 1000, False):
accuracy = sess.run([accuracy_op], feed_dict={
x: X_batch, y: Y_batch, keep_prob: 1.0})
accuracies_val.append(accuracy[0])
i += 1
global_acc_val = sum(accuracies_val) / i
logger.info("Evaluation on test data")
accuracies_test = []
i = 0
for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, 1000, False):
accuracy = sess.run([accuracy_op], feed_dict={
x: X_batch, y: Y_batch, keep_prob: 1.0})
accuracies_test.append(accuracy[0])
i += 1
global_acc_test = sum(accuracies_test) / i
print_result()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment