benchmark_classification.py

"""
Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network.

Usage:
    benchmark_vgg dense [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size]
    benchmark_vgg deepfriedconvnet [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z]
    benchmark_vgg deepstrom [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size]

Options:
    --help -h                               Display help and exit.
    -q --quiet                              Set logging level to info.
    -a value --seed value                   The seed value used for all randomization processed [default: 0]
    -t --train-size size                    Size of train set.
    -v size --validation-size size          The size of the validation set [default: 10000]
    -e numepoch --num-epoch=numepoch        The number of epoch.
    -s batchsize --batch-size=batchsize     The number of example in each batch
    -d --dropout val                        Keep probability of neurons before classif [default: 1.0]
    -D reprdim --out-dim=reprdim            The dimension of the final representation
    -f --non-linearity name                 Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu]

Dense:
    -l --second-layer-size size             Says the size of the second non-linear layer [default: 0]

Deepfried convnet:
    -N nbstack --nb-stack nbstack           The number of fastfood stack for deepfriedconvnet
    -z --real-fastfood                      Tell fastfood layer to not update its weights

Deepstrom:
    -r --real-nystrom                       Says if the matrix for deepstrom should be K^(-1/2)
    -m size --nys-size size                 The number of example in the nystrom subsample.
    -n --non-linear                         Tell Nystrom to use the non linear activation function on its output.

Datasets:
    --cifar                                 Use cifar dataset
    --mnist                                 Use mnist dataset
    --svhn                                  Use svhn dataset
    --cifar100                              Use cifar100 dataset

Dataset related:
    -B --cut-layer name                     The name of the last convolutional layer when loading VVG19Transformer.

Possible kernels:
    -R --rbf-kernel                         Says if the rbf kernel should be used for nystrom.
    -L --linear-kernel                      Says if the linear kernel should be used for nystrom.
    -C --chi-square-kernel                  Says if the basic additive chi square kernel should be used for nystrom.
    -E --exp-chi-square-kernel              Says if the exponential chi square kernel should be used for nystrom.
    -P --chi-square-PD-kernel               Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom.
    -S --sigmoid-kernel                     Says it the sigmoid kernel should be used for nystrom.
    -A --laplacian-kernel                   Says if the laplacian kernel should be used for nystrom.
    -T --stacked-kernel                     Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom.
    -M --sumed-kernel                       Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom.

Kernel related:
    -g gammavalue --gamma gammavalue        The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet)
    -c cvalue --intercept-constant cvalue   The value of the intercept constant for the hyperbolic tangent kernel.

"""
import logging
import sys
import time as t

import daiquiri
import numpy as np
import tensorflow as tf
import docopt
from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel
import skluc.main.data.mldatasets as dataset
from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer
from skluc.main.data.transformation.LeCunTransformer import LecunTransformer
from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import nystrom_layer
from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import fastfood_layer
from skluc.main.tensorflow_.utils import fully_connected, batch_generator, classification_cifar
from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \
    tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels
from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage


def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None):
    printed_r_list = [str(NETWORK),
                      str(global_acc_val),
                      str(global_acc_test),
                      str(training_time),
                      str(val_eval_time),
                      str(test_eval_time),
                      str(NUM_EPOCH),
                      str(BATCH_SIZE),
                      str(OUT_DIM),
                      str(SIZE_SECOND_LAYER),
                      str(KERNEL_NAME),
                      str(GAMMA),
                      str(CONST),
                      str(NB_STACK),
                      str(NYS_SUBSAMPLE_SIZE),
                      str(VALIDATION_SIZE),
                      str(SEED),
                      str(ACTIVATION_FUNCTION),
                      str(NON_LINEAR),
                      str(REAL_NYSTROM),
                      str(CUT_LAYER),
                      str(TRAIN_SIZE),
                      str(DROPOUT),
                      str(DATASET),
                      str(REAL_FASTFOOD)
                      ]
    print(",".join(printed_r_list))
    exit()


def fct_dense(input_, out_dim, two_layers, activation_function=tf.nn.relu):
    with tf.variable_scope("dense_layers"):
        fc_1 = fully_connected(input_, out_dim, act=activation_function, variable_scope="fc1")
        if two_layers:
            fc_2 = fully_connected(fc_1, out_dim, act=activation_function, variable_scope="fc2")
        else:
            fc_2 = fc_1
    out = fc_2
    return out


def fct_deepstrom(input_, out_dim, subsample, kernel, kernel_params, w_matrix, non_linearity):
    """
    Wrap the computing of the deepstrom layer

    :param input_:
    :param out_dim:
    :param subsample:
    :param kernel:
    :param kernel_params:
    :return:
    """
    out_fc = nystrom_layer(input_, subsample, W_matrix=w_matrix, output_dim=out_dim, kernel=kernel, output_act=non_linearity, **kernel_params)
    return out_fc


def fct_deepfried(input_, nb_stack, sigma, trainable=True):

    try:
        return fastfood_layer(input_, sigma, nbr_stack=nb_stack, trainable=trainable)
    except Exception as e:
        logger.critical(e)
        print_result()


def get_gamma_value(arguments, dat, chi2=False):
    if arguments["--gamma"] is None:
        logger.debug("Gamma arguments is None. Need to compute it.")
        if chi2:
            gamma_value = 1./compute_euristic_sigma_chi2(dat.train.data)

        else:
            gamma_value = 1./compute_euristic_sigma(dat.train.data)
    else:
        gamma_value = eval(arguments["--gamma"])

    logger.debug("Gamma value is {}".format(gamma_value))
    return gamma_value


def get_input_classif_deepstrom(p_x):
    logger.info("Selecting {} deepstrom layer function with "
                "subsample size = {}, "
                "output_dim = {}, "
                "{} activation function "
                "and kernel = {}"
                .format("real" if REAL_NYSTROM else "learned",
                        NYS_SUBSAMPLE_SIZE,
                        OUT_DIM,
                        "with" if NON_LINEAR else "without",
                        KERNEL_NAME))

    # if TRAIN_SIZE < int(NYS_SUBSAMPLE_SIZE) + 10:
    #     logger.debug("Train size is {} and nys size is {}. not ok".format(TRAIN_SIZE, NYS_SUBSAMPLE_SIZE))
    #     print_result()

    if OUT_DIM is not None and OUT_DIM > NYS_SUBSAMPLE_SIZE:
        logger.debug("Output dim is greater than deepstrom subsample size. Aborting.")
        print_result()

    if TRAIN_SIZE is not None:
        subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE)
        nys_subsample = data.validation.data[subsample_indexes]
    else:
        subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE)
        nys_subsample = data.train.data[subsample_indexes]
    logger.debug("Chosen subsample: {}".format(nys_subsample))

    if REAL_NYSTROM:
        logger.debug("Real nystrom asked: eg projection matrix has the vanilla formula")
        if SUMED_KERNEL:
            # here K11 matrix are added before doing nystrom approximation
            added_K11 = np.zeros((nys_subsample.shape[0], nys_subsample.shape[0]))
            for g_value in GAMMA:
                added_K11 = np.add(added_K11, rbf_kernel(nys_subsample, nys_subsample, gamma=g_value))
            U, S, V = np.linalg.svd(added_K11)
            invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
            input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
                                          w_matrix=invert_root_K11, non_linearity=NON_LINEAR)
        elif STACKED_KERNEL:
            # here nystrom approximations are stacked
            lst_invert_root_K11 = []
            for g_value in GAMMA:
                K11 = rbf_kernel(nys_subsample, nys_subsample, gamma=g_value)
                U, S, V = np.linalg.svd(K11)
                invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
                lst_invert_root_K11.append(invert_root_K11)
            stack_K11 = np.vstack(lst_invert_root_K11)
            input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
                                          w_matrix=stack_K11, non_linearity=NON_LINEAR)

        else:
            if KERNEL_NAME == "rbf":
                kernel_fct = rbf_kernel
            elif KERNEL_NAME == "linear":
                kernel_fct = linear_kernel
            elif KERNEL_NAME == "chi2_cpd":
                kernel_fct = additive_chi2_kernel
            elif KERNEL_NAME == "chi2_exp_cpd":
                kernel_fct = chi2_kernel
            elif KERNEL_NAME == "chi2_pd":
                raise NotImplementedError("Bien verifier que ce code ne fait pas bordel")

            elif KERNEL_NAME == "laplacian":
                kernel_fct = laplacian_kernel
            else:
                raise ValueError("Unknown kernel name: {}".format(KERNEL_NAME))
            K11 = kernel_fct(nys_subsample, nys_subsample, **kernel_dict)
            U, S, V = np.linalg.svd(K11)
            invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32)
            input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=invert_root_K11,
                                          non_linearity=NON_LINEAR)
    else:
        input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict,
                                      w_matrix=None, non_linearity=NON_LINEAR)
    return input_classif


def get_input_classif_dense(p_x):
    logger.info("Selecting dense layer function with output dim = {} and activation function = {}".format(OUT_DIM, ACTIVATION_FUNCTION))
    # two layers is handled outside of here
    input_classif = fct_dense(p_x, OUT_DIM, two_layers=False, activation_function=ACTIVATION_FUNCTION)
    return input_classif


def get_input_classif_deepfriedconvnet(p_x):
    logger.debug("Selecting deepfriedconvnet layer function")
    input_classif = fct_deepfried(p_x, NB_STACK, SIGMA, trainable=not REAL_FASTFOOD)
    return input_classif


def main():
    input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1]

    x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x")
    y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label")

    if NETWORK == "dense":
        input_classif = get_input_classif_dense(x)
    elif NETWORK == "deepstrom":
        input_classif = get_input_classif_deepstrom(x)
    elif NETWORK == "deepfriedconvnet":
        input_classif = get_input_classif_deepfriedconvnet(x)
    else:
        raise Exception("Not recognized network")

    if SIZE_SECOND_LAYER > 0:
        logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION))
        with tf.variable_scope("second_layer"):
            input_classif_2nd_layer = fully_connected(input_classif, SIZE_SECOND_LAYER, act=ACTIVATION_FUNCTION,
                                                      variable_scope="fc")
    else:
        logger.debug("No second layer")
        input_classif_2nd_layer = input_classif

    logger.debug("Add softmax layer for classification")
    classif, keep_prob = classification_cifar(input_classif_2nd_layer, output_dim)

    # calcul de la loss
    with tf.name_scope("xent"):
        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"),
            name="xentropy_mean")
        tf.summary.scalar('loss-xent', cross_entropy)

    # todo learning rate as hyperparameter
    # calcul du gradient
    with tf.name_scope("train"):
        global_step = tf.Variable(0, name="global_step", trainable=False)
        train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy,
                                                                              global_step=global_step)

    # calcul de l'accuracy
    with tf.name_scope("accuracy"):
        predictions = tf.argmax(classif, 1)
        correct_prediction = tf.equal(predictions, tf.argmax(y, 1))
        accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy_op)

    # merged_summary = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    # Create a session for running Ops on the Graph.
    # Instantiate a SummaryWriter to output summaries and the Graph.
    # summary_writer = tf.summary.FileWriter("debug_benchmark_vgg")
    # Initialize all Variable objects
    # actual learning

    with tf.Session() as sess:
        logger.info("Start training")
        # summary_writer.add_graph(sess.graph)
        # Initialize all Variable objects
        sess.run(init)
        # actual learning
        # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0}
        global_start = t.time()
        for i in range(NUM_EPOCH):
            logger.debug(memory_usage())
            j = 0
            start = t.time()
            for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False):
                feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT}
                _, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict)
                if j % 100 == 0:
                    logger.info(
                        "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1,
                                                                                                int(data.train[0].shape[
                                                                                                        0] / BATCH_SIZE) + 1,
                                                                                                X_batch.shape, loss,
                                                                                                acc))
                    # summary_str = sess.run(merged_summary, feed_dict=feed_dict)
                    # summary_writer.add_summary(summary_str, j)
                j += 1

        logger.info("Evaluation on validation data")
        training_time = t.time() - global_start
        accuracies_val = []
        i = 0
        val_eval_start = t.time()
        for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, 1000, False):
            accuracy = sess.run([accuracy_op], feed_dict={
                x: X_batch, y: Y_batch, keep_prob: 1.0})
            accuracies_val.append(accuracy[0])
            i += 1
        global_acc_val = sum(accuracies_val) / i
        VAL_EVAL_TIME = t.time() - val_eval_start

        logger.info("Evaluation on test data")
        accuracies_test = []
        i = 0
        test_eval_start = t.time()
        for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, 1000, False):
            accuracy = sess.run([accuracy_op], feed_dict={
                x: X_batch, y: Y_batch, keep_prob: 1.0})
            accuracies_test.append(accuracy[0])
            i += 1
        global_acc_test = sum(accuracies_test) / i
        TEST_EVAL_TIME = t.time() - test_eval_start

    print_result(global_acc_val=global_acc_val,
                 global_acc_test=global_acc_test,
                 training_time=training_time,
                 val_eval_time=VAL_EVAL_TIME,
                 test_eval_time=TEST_EVAL_TIME)


if __name__ == '__main__':
    logger.debug("Command line: {}".format(' '.join(sys.argv)))
    arguments = docopt.docopt(__doc__)
    logger.debug(arguments)
    if arguments["--quiet"]:
        daiquiri.setup(level=logging.INFO)
    NUM_EPOCH = int(arguments["--num-epoch"])
    BATCH_SIZE = int(arguments["--batch-size"])
    OUT_DIM = int(arguments["--out-dim"]) if arguments["--out-dim"] is not None else None
    SIZE_SECOND_LAYER = int(arguments["--second-layer-size"])
    RBF_KERNEL = arguments["--rbf-kernel"]
    LINEAR_KERNEL = arguments["--linear-kernel"]
    CHI2_KERNEL = arguments["--chi-square-kernel"]
    CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"]
    CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"]
    SIGMOID_KERNEL = arguments["--sigmoid-kernel"]
    LAPLACIAN_KERNEL = arguments["--laplacian-kernel"]
    STACKED_KERNEL = arguments["--stacked-kernel"]
    SUMED_KERNEL = arguments["--sumed-kernel"]
    VALIDATION_SIZE = int(arguments["--validation-size"])
    REAL_NYSTROM = arguments["--real-nystrom"]
    SEED = int(arguments["--seed"])  # The seed change the data ordering in the dataset (so train/validation/test split may change with != seeds)
    NYS_SUBSAMPLE_SIZE = None
    KERNEL_NAME = None
    GAMMA = None
    CONST = None
    NB_STACK = None
    kernel_dict = {}
    CIFAR_DATASET = bool(arguments["--cifar"])
    CIFAR100_DATASET = bool(arguments["--cifar100"])
    MNIST_DATASET = bool(arguments["--mnist"])
    SVHN_DATASET = bool(arguments["--svhn"])
    REAL_FASTFOOD = bool(arguments["--real-fastfood"])
    test_eval_time = None
    val_eval_time = None
    if arguments["--non-linearity"] == "relu":
        ACTIVATION_FUNCTION = tf.nn.relu
    elif arguments["--non-linearity"] == "tanh":
        ACTIVATION_FUNCTION = tf.nn.tanh
    elif arguments["--non-linearity"] is None:
        ACTIVATION_FUNCTION = tf.nn.relu
    else:
        raise ValueError("Not known --non-linearity arg: {}".format(arguments["--non-linearity"]))
    NON_LINEAR = ACTIVATION_FUNCTION if arguments["--non-linear"] else None

    if CIFAR_DATASET:
        DATASET = "cifar"
    elif MNIST_DATASET:
        DATASET = "mnist"
    elif SVHN_DATASET:
        DATASET = "svhn"
    elif CIFAR100_DATASET:
        DATASET = "cifar100"
    else:
        raise ValueError("no know dataset specified")
    CUT_LAYER = arguments["--cut-layer"]
    DROPOUT = float(arguments["--dropout"]) if arguments["--dropout"] is not None else None
    logger.debug("DROPOUT value is {} and type {}".format(DROPOUT, type(DROPOUT)))
    if arguments["--train-size"] is not None:
        TRAIN_SIZE = int(arguments["--train-size"])
    else:
        TRAIN_SIZE = arguments["--train-size"]
    global_acc_val = None
    global_acc_test = None
    training_time = None

    SEED_TRAIN_VALIDATION = SEED
    if CIFAR_DATASET:
        data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
        transformer = VGG19Transformer(data_name="cifar10", cut_layer_name=CUT_LAYER)
    elif MNIST_DATASET:
        data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
        # todo rendre conv_pool2 parametrable
        transformer = LecunTransformer(data_name="mnist", cut_layer_name="conv_pool_2")
    elif SVHN_DATASET:
        data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
        transformer = VGG19Transformer(data_name="svhn", cut_layer_name=CUT_LAYER)
    elif CIFAR100_DATASET:
        data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)
        transformer = VGG19Transformer(data_name="cifar100", cut_layer_name=CUT_LAYER)
    else:
        raise ValueError("No dataset specified")

    data.load()  # todo gérer le bug flatten
    if not data.is_image():
        data.to_image()  # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?)
    data.data_astype(np.float32)
    data.labels_astype(np.float32)
    data.normalize()
    logger.debug("train dataset shape: {}".format(data.train.data.shape))
    data.apply_transformer(transformer)
    data.normalize()
    data.to_one_hot()
    data.flatten()
    data.data_astype(np.float32)
    data.labels_astype(np.int)
    if TRAIN_SIZE is not None:
        data.reduce_data_size(int(TRAIN_SIZE))

    logger.info("Start benchmark with parameters: {}".format(" ".join(sys.argv[1:])))
    logger.info("Using dataset {} with validation size {} and seed for spliting set {}.".format(data.s_name, data.validation_size, data.seed))
    logger.info("Shape of train set data: {}; shape of train set labels: {}".format(data.train[0].shape, data.train[1].shape))
    logger.info("Shape of validation set data: {}; shape of validation set labels: {}".format(data.validation[0].shape, data.validation[1].shape))
    logger.info("Shape of test set data: {}; shape of test set labels: {}".format(data.test[0].shape, data.test[1].shape))
    logger.debug("Sample of label: {}".format(data.train[1][0]))
    # todo separated function for parameters parsing

    if arguments["dense"]:
        NETWORK = "dense"
    elif arguments["deepstrom"]:
        NETWORK = "deepstrom"
        NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"])
        if OUT_DIM is None:
            OUT_DIM = NYS_SUBSAMPLE_SIZE
        if RBF_KERNEL:
            KERNEL = tf_rbf_kernel
            KERNEL_NAME = "rbf"
            GAMMA = get_gamma_value(arguments, data)
            kernel_dict = {"gamma": GAMMA}
        elif LINEAR_KERNEL:
            KERNEL = tf_linear_kernel
            KERNEL_NAME = "linear"
        elif CHI2_KERNEL:
            KERNEL = tf_chi_square_CPD
            KERNEL_NAME = "chi2_cpd"
        elif CHI2_EXP_KERNEL:
            KERNEL = tf_chi_square_CPD_exp
            KERNEL_NAME = "chi2_exp_cpd"
            GAMMA = get_gamma_value(arguments, data, chi2=True)
            kernel_dict = {"gamma": GAMMA}
        elif CHI2_PD_KERNEL:
            KERNEL = tf_chi_square_PD
            KERNEL_NAME = "chi2_pd"
        elif SIGMOID_KERNEL:
            KERNEL = tf_sigmoid_kernel
            KERNEL_NAME = "sigmoid"
            GAMMA = get_gamma_value(arguments, data)
            CONST = float(arguments["--intercept-constant"])
            kernel_dict = {"gamma": GAMMA, "constant": CONST}
        elif LAPLACIAN_KERNEL:
            KERNEL = tf_laplacian_kernel
            KERNEL_NAME = "laplacian"
            GAMMA = get_gamma_value(arguments, data)
            kernel_dict = {"gamma": np.sqrt(GAMMA)}
        elif STACKED_KERNEL:
            GAMMA = get_gamma_value(arguments, data)


            def KERNEL(X, Y):
                return tf_stack_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA],
                                           [{"gamma": g_value} for g_value in GAMMA])


            KERNEL_NAME = "stacked"

        elif SUMED_KERNEL:
            GAMMA = get_gamma_value(arguments, data)


            def KERNEL(X, Y):
                return tf_sum_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA],
                                         [{"gamma": g_value} for g_value in GAMMA])


            KERNEL_NAME = "summed"
        else:
            raise Exception("No kernel function specified for deepstrom")

    elif arguments["deepfriedconvnet"]:
        NETWORK = "deepfriedconvnet"
        NB_STACK = int(arguments["--nb-stack"])
        GAMMA = get_gamma_value(arguments, data)
        SIGMA = 1 / GAMMA
    else:
        raise Exception("Not recognized network")

    try:
        main()
    except Exception as e:
        print_result()
        raise e