diff --git a/main/experiments/benchmark_vgg_multiview.py b/main/experiments/benchmark_vgg_multiview.py index 99fa2b60e3c39fc0d0e3109a81799aa5dff8ca94..3af8145d69aa4943b2c67d6cdabf6ca46bbc259f 100644 --- a/main/experiments/benchmark_vgg_multiview.py +++ b/main/experiments/benchmark_vgg_multiview.py @@ -2,7 +2,8 @@ Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. Usage: - benchmark_vgg deepstrom -f name [-q] [--cifar100|--cifar|--mnist|--svhn] [--w-after] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] + benchmark_vgg deepstrom -f name [-q] [--cifar100|--cifar|--mnist|--svhn] [--w-after] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] + (-Z|-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-y value] Options: --help -h Display help and exit. @@ -39,33 +40,37 @@ Possible kernels: -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. + -Z --polynomial-kernel Says if the polynomial kernel should be used for nystrom. -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. Kernel related: -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. + -y --degree value The value of the degree for polynomial kernel [default: 2] """ import logging import sys import time as t + +import matplotlib.pyplot as plt import daiquiri import docopt import numpy as np import tensorflow as tf -from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel +from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel, polynomial_kernel -import skluc.data.mldatasets as dataset -from skluc.data.transformation import VGG19Cifar10Transformer, LecunMnistTransformer, VGG19Cifar10BadTransformer, \ - VGG19Cifar10BadTransformerV2, VGG19Cifar10BadTransformerV3, VGG19Cifar10BadTransformerV4, VGG19SvhnTransformer, \ - VGG19Cifar100Transformer -from skluc.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ - tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels -from skluc.tensorflow_.kernel_approximation import nystrom_layer, fastfood_layer -from skluc.tensorflow_.utils import fully_connected, batch_generator, classification_cifar -from skluc.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2 +import skluc.main.data.mldatasets as dataset +from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.main.data.transformation.LeCunTransformer import LecunTransformer +from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ + tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels, tf_polynomial_kernel +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import nystrom_layer +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import fastfood_layer +from skluc.main.tensorflow_.utils import fully_connected, batch_generator, classification_cifar +from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2 def print_result(): @@ -90,7 +95,8 @@ def print_result(): str(DROPOUT), str(DATASET), str(WAFTER), - str(FUSING) + str(FUSING), + str(arguments["--degree"]) ] print(",".join(printed_r_list)) exit() @@ -151,7 +157,7 @@ def get_gamma_value(arguments, dat, chi2=False): if __name__ == '__main__': - # todo special treat for each type of execution + logger.debug("Command line: {}".format(' '.join(sys.argv))) arguments = docopt.docopt(__doc__) if arguments["--quiet"]: daiquiri.setup(level=logging.INFO) @@ -165,6 +171,7 @@ if __name__ == '__main__': CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"] SIGMOID_KERNEL = arguments["--sigmoid-kernel"] LAPLACIAN_KERNEL = arguments["--laplacian-kernel"] + POLYNOMIAL_KERNEL = arguments["--polynomial-kernel"] STACKED_KERNEL = arguments["--stacked-kernel"] SUMED_KERNEL = arguments["--sumed-kernel"] VALIDATION_SIZE = int(arguments["--validation-size"]) @@ -210,39 +217,32 @@ if __name__ == '__main__': SEED_TRAIN_VALIDATION = SEED if CIFAR_DATASET: data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - if BAD_REPR is None or int(BAD_REPR) == 0: - # todo faire quelquechose pour ces "bad repr" - # parametre de bad representation et une seule classe? - transformer = VGG19Cifar10Transformer - elif int(BAD_REPR) == 1: - transformer = VGG19Cifar10BadTransformer - elif int(BAD_REPR) == 2: - transformer = VGG19Cifar10BadTransformerV2 - elif int(BAD_REPR) == 3: - transformer = VGG19Cifar10BadTransformerV3 - elif int(BAD_REPR) == 4: - transformer = VGG19Cifar10BadTransformerV4 - else: - raise ValueError("Not known transformer value: {}".format(BAD_REPR)) + transformer = VGG19Transformer(data_name="cifar", cut_layer_name=BAD_REPR) elif MNIST_DATASET: data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = LecunMnistTransformer + # todo rendre conv_pool2 parametrable + trasnformer = LecunTransformer(data_name="mnist", cut_layer_name="conv_pool_2") elif SVHN_DATASET: data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = VGG19SvhnTransformer + transformer = VGG19Transformer(data_name="svhn", cut_layer_name=BAD_REPR) elif CIFAR100_DATASET: data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = VGG19Cifar100Transformer + transformer = VGG19Transformer(data_name="cifar100", cut_layer_name=BAD_REPR) + else: raise ValueError("No dataset specified") data.load() - data.flatten() data.to_image() # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?) data.data_astype(np.float32) data.labels_astype(np.float32) data.normalize() + # plt.imshow(data.train.data[0]) + # plt.show() logger.debug("train dataset shape: {}".format(data.train.data.shape)) + + # exit() + data.apply_transformer(transformer) data.normalize() data.to_one_hot() @@ -315,12 +315,13 @@ if __name__ == '__main__': x_slice = tf.expand_dims(x_slice, axis=-1) logger.debug("x slice shape: {}".format(x_slice.shape)) - dataset_slice = data.train.data[:, :, :, i] + dataset_slice = data.train.data[:, :, :, i] # type: np.ndarray + flatttened_dataset_slice = dataset_slice.reshape(dataset_slice.shape[0], -1) if RBF_KERNEL: KERNEL = tf_rbf_kernel KERNEL_NAME = "rbf" - GAMMA = get_gamma_value(arguments, dataset_slice) + GAMMA = get_gamma_value(arguments, flatttened_dataset_slice) kernel_dict = {"gamma": GAMMA} elif LINEAR_KERNEL: KERNEL = tf_linear_kernel @@ -347,6 +348,15 @@ if __name__ == '__main__': KERNEL_NAME = "laplacian" GAMMA = get_gamma_value(arguments, dataset_slice) kernel_dict = {"gamma": np.sqrt(GAMMA)} + elif POLYNOMIAL_KERNEL: + KERNEL = tf_polynomial_kernel + KERNEL_NAME = "polynomial2" + + kernel_dict = { + "degree": int(arguments["--degree"]), + "coef0": 1, + "gamma": None + } ## beware, those arguments are hard written for the polynomial kernel of scikit learn elif STACKED_KERNEL: GAMMA = get_gamma_value(arguments, dataset_slice) @@ -410,6 +420,8 @@ if __name__ == '__main__': elif KERNEL_NAME == "laplacian": kernel_fct = laplacian_kernel + elif KERNEL_NAME == "polynomial2": + kernel_fct = polynomial_kernel else: raise ValueError("Unknown kernel name: {}".format(KERNEL_NAME)) K11 = kernel_fct(nys_subsample_slice, nys_subsample_slice, **kernel_dict) @@ -461,7 +473,7 @@ if __name__ == '__main__': # calcul du gradient with tf.name_scope("train"): global_step = tf.Variable(0, name="global_step", trainable=False) - train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cross_entropy, global_step=global_step) # calcul de l'accuracy @@ -494,7 +506,7 @@ if __name__ == '__main__': for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False): feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT} _, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict) - if j % 100 == 0: + if j % 1 == 0: logger.info( "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1, int(data.train[0].shape[