Skip to content
Snippets Groups Projects
Commit 3fd01101 authored by Luc Giffon's avatar Luc Giffon
Browse files

now uses deepstromlayer class + tensorboard support

parent e8d3baa5
No related branches found
No related tags found
No related merge requests found
......@@ -2,13 +2,14 @@
Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network.
Usage:
benchmark_vgg dense [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size]
benchmark_vgg deepfriedconvnet [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z]
benchmark_vgg deepstrom [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size]
benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-W name] [-V]
benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-W name] [-V]
benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-W name] [-V]
Options:
--help -h Display help and exit.
-q --quiet Set logging level to info.
-V --tensorboard Write tensorboard logs.
-a value --seed value The seed value used for all randomization processed [default: 0]
-t --train-size size Size of train set.
-v size --validation-size size The size of the validation set [default: 10000]
......@@ -31,13 +32,14 @@ Deepstrom:
-n --non-linear Tell Nystrom to use the non linear activation function on its output.
Datasets:
--cifar Use cifar dataset
--cifar10 Use cifar dataset
--mnist Use mnist dataset
--svhn Use svhn dataset
--cifar100 Use cifar100 dataset
Dataset related:
-B --cut-layer name The name of the last convolutional layer when loading VVG19Transformer.
-W --weights name The name of the dataset used for weights.
Possible kernels:
-R --rbf-kernel Says if the rbf kernel should be used for nystrom.
......@@ -63,19 +65,20 @@ import daiquiri
import numpy as np
import tensorflow as tf
import docopt
from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel
from tensorflow.python.keras.layers import Dense
import skluc.main.data.mldatasets as dataset
from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer
from skluc.main.data.transformation.LeCunTransformer import LecunTransformer
from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import nystrom_layer
from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import fastfood_layer
from skluc.main.tensorflow_.utils import fully_connected, batch_generator, classification_cifar
from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayer
from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer
from skluc.main.tensorflow_.utils import batch_generator
from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \
tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels
from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage
def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None):
def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None, error=None):
printed_r_list = [str(NETWORK),
str(global_acc_val),
str(global_acc_test),
......@@ -100,46 +103,16 @@ def print_result(global_acc_val=None, global_acc_test=None, training_time=None,
str(TRAIN_SIZE),
str(DROPOUT),
str(DATASET),
str(REAL_FASTFOOD)
str(REAL_FASTFOOD),
str(WEIGHTS)
]
print(",".join(printed_r_list))
if error is None:
exit()
def fct_dense(input_, out_dim, two_layers, activation_function=tf.nn.relu):
with tf.variable_scope("dense_layers"):
fc_1 = fully_connected(input_, out_dim, act=activation_function, variable_scope="fc1")
if two_layers:
fc_2 = fully_connected(fc_1, out_dim, act=activation_function, variable_scope="fc2")
else:
fc_2 = fc_1
out = fc_2
return out
def fct_deepstrom(input_, out_dim, subsample, kernel, kernel_params, w_matrix, non_linearity):
"""
Wrap the computing of the deepstrom layer
:param input_:
:param out_dim:
:param subsample:
:param kernel:
:param kernel_params:
:return:
"""
out_fc = nystrom_layer(input_, subsample, W_matrix=w_matrix, output_dim=out_dim, kernel=kernel, output_act=non_linearity, **kernel_params)
return out_fc
raise error
def fct_deepfried(input_, nb_stack, sigma, trainable=True):
try:
return fastfood_layer(input_, sigma, nbr_stack=nb_stack, trainable=trainable)
except Exception as e:
logger.critical(e)
print_result()
def get_gamma_value(arguments, dat, chi2=False):
if arguments["--gamma"] is None:
......@@ -156,7 +129,16 @@ def get_gamma_value(arguments, dat, chi2=False):
return gamma_value
def get_input_classif_deepstrom(p_x):
def main():
input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1]
x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x")
y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label")
tf.summary.histogram("convolved_examples", x)
if NETWORK == "dense":
representation_layer = Dense(OUT_DIM, activation=ACTIVATION_FUNCTION)
elif NETWORK == "deepstrom":
logger.info("Selecting {} deepstrom layer function with "
"subsample size = {}, "
"output_dim = {}, "
......@@ -167,15 +149,6 @@ def get_input_classif_deepstrom(p_x):
OUT_DIM,
"with" if NON_LINEAR else "without",
KERNEL_NAME))
# if TRAIN_SIZE < int(NYS_SUBSAMPLE_SIZE) + 10:
# logger.debug("Train size is {} and nys size is {}. not ok".format(TRAIN_SIZE, NYS_SUBSAMPLE_SIZE))
# print_result()
if OUT_DIM is not None and OUT_DIM > NYS_SUBSAMPLE_SIZE:
logger.debug("Output dim is greater than deepstrom subsample size. Aborting.")
print_result()
if TRAIN_SIZE is not None:
subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE)
nys_subsample = data.validation.data[subsample_indexes]
......@@ -183,98 +156,36 @@ def get_input_classif_deepstrom(p_x):
subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE)
nys_subsample = data.train.data[subsample_indexes]
logger.debug("Chosen subsample: {}".format(nys_subsample))
if REAL_NYSTROM:
logger.debug("Real nystrom asked: eg projection matrix has the vanilla formula")
if SUMED_KERNEL:
# here K11 matrix are added before doing nystrom approximation
added_K11 = np.zeros((nys_subsample.shape[0], nys_subsample.shape[0]))
for g_value in GAMMA:
added_K11 = np.add(added_K11, rbf_kernel(nys_subsample, nys_subsample, gamma=g_value))
U, S, V = np.linalg.svd(added_K11)
invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
w_matrix=invert_root_K11, non_linearity=NON_LINEAR)
elif STACKED_KERNEL:
# here nystrom approximations are stacked
lst_invert_root_K11 = []
for g_value in GAMMA:
K11 = rbf_kernel(nys_subsample, nys_subsample, gamma=g_value)
U, S, V = np.linalg.svd(K11)
invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
lst_invert_root_K11.append(invert_root_K11)
stack_K11 = np.vstack(lst_invert_root_K11)
input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
w_matrix=stack_K11, non_linearity=NON_LINEAR)
else:
if KERNEL_NAME == "rbf":
kernel_fct = rbf_kernel
elif KERNEL_NAME == "linear":
kernel_fct = linear_kernel
elif KERNEL_NAME == "chi2_cpd":
kernel_fct = additive_chi2_kernel
elif KERNEL_NAME == "chi2_exp_cpd":
kernel_fct = chi2_kernel
elif KERNEL_NAME == "chi2_pd":
raise NotImplementedError("Bien verifier que ce code ne fait pas bordel")
elif KERNEL_NAME == "laplacian":
kernel_fct = laplacian_kernel
else:
raise ValueError("Unknown kernel name: {}".format(KERNEL_NAME))
K11 = kernel_fct(nys_subsample, nys_subsample, **kernel_dict)
U, S, V = np.linalg.svd(K11)
invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=invert_root_K11,
non_linearity=NON_LINEAR)
else:
input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
w_matrix=None, non_linearity=NON_LINEAR)
return input_classif
def get_input_classif_dense(p_x):
logger.info("Selecting dense layer function with output dim = {} and activation function = {}".format(OUT_DIM, ACTIVATION_FUNCTION))
# two layers is handled outside of here
input_classif = fct_dense(p_x, OUT_DIM, two_layers=False, activation_function=ACTIVATION_FUNCTION)
return input_classif
def get_input_classif_deepfriedconvnet(p_x):
logger.debug("Selecting deepfriedconvnet layer function")
input_classif = fct_deepfried(p_x, NB_STACK, SIGMA, trainable=not REAL_FASTFOOD)
return input_classif
def main():
input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1]
x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x")
y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label")
if NETWORK == "dense":
input_classif = get_input_classif_dense(x)
elif NETWORK == "deepstrom":
input_classif = get_input_classif_deepstrom(x)
representation_layer = DeepstromLayer(subsample=nys_subsample,
out_dim=OUT_DIM,
activation=ACTIVATION_FUNCTION,
kernel_name=KERNEL_NAME,
real_nystrom=not REAL_NYSTROM,
kernel_dict=kernel_dict)
elif NETWORK == "deepfriedconvnet":
input_classif = get_input_classif_deepfriedconvnet(x)
representation_layer = FastFoodLayer(sigma=SIGMA,
nbr_stack=NB_STACK,
trainable=not REAL_FASTFOOD)
else:
raise Exception("Not recognized network")
input_classif = representation_layer(x)
if SIZE_SECOND_LAYER > 0:
logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION))
with tf.variable_scope("second_layer"):
input_classif_2nd_layer = fully_connected(input_classif, SIZE_SECOND_LAYER, act=ACTIVATION_FUNCTION,
variable_scope="fc")
input_classif_2nd_layer = Dense(SIZE_SECOND_LAYER, activation=ACTIVATION_FUNCTION)(input_classif)
else:
logger.debug("No second layer")
input_classif_2nd_layer = input_classif
logger.debug("Add softmax layer for classification")
classif, keep_prob = classification_cifar(input_classif_2nd_layer, output_dim)
with tf.variable_scope("classification"):
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
input_drop = tf.nn.dropout(input_classif_2nd_layer, keep_prob)
classif = Dense(output_dim)(input_drop)
# calcul de la loss
logger.debug("Add softmax layer for classification")
with tf.name_scope("xent"):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"),
......@@ -295,18 +206,19 @@ def main():
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy_op)
# merged_summary = tf.summary.merge_all()
merged_summary = tf.summary.merge_all()
init = tf.global_variables_initializer()
# Create a session for running Ops on the Graph.
# Instantiate a SummaryWriter to output summaries and the Graph.
# summary_writer = tf.summary.FileWriter("debug_benchmark_vgg")
if TENSORBOARD:
summary_writer = tf.summary.FileWriter("debug_benchmark_classification")
# Initialize all Variable objects
# actual learning
with tf.Session() as sess:
logger.info("Start training")
# summary_writer.add_graph(sess.graph)
if TENSORBOARD:
summary_writer.add_graph(sess.graph)
# Initialize all Variable objects
sess.run(init)
# actual learning
......@@ -318,7 +230,7 @@ def main():
start = t.time()
for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False):
feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT}
_, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict)
_, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict)
if j % 100 == 0:
logger.info(
"epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1,
......@@ -326,8 +238,8 @@ def main():
0] / BATCH_SIZE) + 1,
X_batch.shape, loss,
acc))
# summary_str = sess.run(merged_summary, feed_dict=feed_dict)
# summary_writer.add_summary(summary_str, j)
if TENSORBOARD:
summary_writer.add_summary(summary_str, (j+1)*(i+1))
j += 1
logger.info("Evaluation on validation data")
......@@ -384,13 +296,14 @@ if __name__ == '__main__':
VALIDATION_SIZE = int(arguments["--validation-size"])
REAL_NYSTROM = arguments["--real-nystrom"]
SEED = int(arguments["--seed"]) # The seed change the data ordering in the dataset (so train/validation/test split may change with != seeds)
TENSORBOARD = arguments["--tensorboard"]
NYS_SUBSAMPLE_SIZE = None
KERNEL_NAME = None
GAMMA = None
CONST = None
NB_STACK = None
kernel_dict = {}
CIFAR_DATASET = bool(arguments["--cifar"])
CIFAR_DATASET = bool(arguments["--cifar10"])
CIFAR100_DATASET = bool(arguments["--cifar100"])
MNIST_DATASET = bool(arguments["--mnist"])
SVHN_DATASET = bool(arguments["--svhn"])
......@@ -408,7 +321,7 @@ if __name__ == '__main__':
NON_LINEAR = ACTIVATION_FUNCTION if arguments["--non-linear"] else None
if CIFAR_DATASET:
DATASET = "cifar"
DATASET = "cifar10"
elif MNIST_DATASET:
DATASET = "mnist"
elif SVHN_DATASET:
......@@ -418,6 +331,12 @@ if __name__ == '__main__':
else:
raise ValueError("no know dataset specified")
CUT_LAYER = arguments["--cut-layer"]
if arguments["--weights"] is None:
WEIGHTS = DATASET
else:
WEIGHTS = arguments["--weights"]
DROPOUT = float(arguments["--dropout"]) if arguments["--dropout"] is not None else None
logger.debug("DROPOUT value is {} and type {}".format(DROPOUT, type(DROPOUT)))
if arguments["--train-size"] is not None:
......@@ -431,17 +350,17 @@ if __name__ == '__main__':
SEED_TRAIN_VALIDATION = SEED
if CIFAR_DATASET:
data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
transformer = VGG19Transformer(data_name="cifar10", cut_layer_name=CUT_LAYER)
transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER)
elif MNIST_DATASET:
data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
# todo rendre conv_pool2 parametrable
transformer = LecunTransformer(data_name="mnist", cut_layer_name="conv_pool_2")
transformer = LecunTransformer(data_name=WEIGHTS, cut_layer_name="conv_pool_2")
elif SVHN_DATASET:
data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
transformer = VGG19Transformer(data_name="svhn", cut_layer_name=CUT_LAYER)
transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER)
elif CIFAR100_DATASET:
data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
transformer = VGG19Transformer(data_name="cifar100", cut_layer_name=CUT_LAYER)
transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER)
else:
raise ValueError("No dataset specified")
......@@ -541,5 +460,4 @@ if __name__ == '__main__':
try:
main()
except Exception as e:
print_result()
raise e
\ No newline at end of file
print_result(error=e)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment