diff --git a/main/experiments/benchmark_classification.py b/main/experiments/benchmark_classification.py index a3f60e507214853f483d5fd677b33a1ab70d4c46..e41a45b8741a94e2b22129e460b05fab179975b6 100644 --- a/main/experiments/benchmark_classification.py +++ b/main/experiments/benchmark_classification.py @@ -2,13 +2,14 @@ Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. Usage: - benchmark_vgg dense [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] - benchmark_vgg deepfriedconvnet [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] - benchmark_vgg deepstrom [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] + benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-W name] [-V] + benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-W name] [-V] + benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-W name] [-V] Options: --help -h Display help and exit. -q --quiet Set logging level to info. + -V --tensorboard Write tensorboard logs. -a value --seed value The seed value used for all randomization processed [default: 0] -t --train-size size Size of train set. -v size --validation-size size The size of the validation set [default: 10000] @@ -31,13 +32,14 @@ Deepstrom: -n --non-linear Tell Nystrom to use the non linear activation function on its output. Datasets: - --cifar Use cifar dataset + --cifar10 Use cifar dataset --mnist Use mnist dataset --svhn Use svhn dataset --cifar100 Use cifar100 dataset Dataset related: -B --cut-layer name The name of the last convolutional layer when loading VVG19Transformer. + -W --weights name The name of the dataset used for weights. Possible kernels: -R --rbf-kernel Says if the rbf kernel should be used for nystrom. @@ -63,19 +65,20 @@ import daiquiri import numpy as np import tensorflow as tf import docopt -from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel +from tensorflow.python.keras.layers import Dense + import skluc.main.data.mldatasets as dataset from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer from skluc.main.data.transformation.LeCunTransformer import LecunTransformer -from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import nystrom_layer -from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import fastfood_layer -from skluc.main.tensorflow_.utils import fully_connected, batch_generator, classification_cifar +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayer +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.tensorflow_.utils import batch_generator from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage -def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None): +def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None, error=None): printed_r_list = [str(NETWORK), str(global_acc_val), str(global_acc_test), @@ -100,45 +103,15 @@ def print_result(global_acc_val=None, global_acc_test=None, training_time=None, str(TRAIN_SIZE), str(DROPOUT), str(DATASET), - str(REAL_FASTFOOD) + str(REAL_FASTFOOD), + str(WEIGHTS) ] print(",".join(printed_r_list)) - exit() - - -def fct_dense(input_, out_dim, two_layers, activation_function=tf.nn.relu): - with tf.variable_scope("dense_layers"): - fc_1 = fully_connected(input_, out_dim, act=activation_function, variable_scope="fc1") - if two_layers: - fc_2 = fully_connected(fc_1, out_dim, act=activation_function, variable_scope="fc2") - else: - fc_2 = fc_1 - out = fc_2 - return out - - -def fct_deepstrom(input_, out_dim, subsample, kernel, kernel_params, w_matrix, non_linearity): - """ - Wrap the computing of the deepstrom layer - - :param input_: - :param out_dim: - :param subsample: - :param kernel: - :param kernel_params: - :return: - """ - out_fc = nystrom_layer(input_, subsample, W_matrix=w_matrix, output_dim=out_dim, kernel=kernel, output_act=non_linearity, **kernel_params) - return out_fc - - -def fct_deepfried(input_, nb_stack, sigma, trainable=True): + if error is None: + exit() + else: + raise error - try: - return fastfood_layer(input_, sigma, nbr_stack=nb_stack, trainable=trainable) - except Exception as e: - logger.critical(e) - print_result() def get_gamma_value(arguments, dat, chi2=False): @@ -156,125 +129,63 @@ def get_gamma_value(arguments, dat, chi2=False): return gamma_value -def get_input_classif_deepstrom(p_x): - logger.info("Selecting {} deepstrom layer function with " - "subsample size = {}, " - "output_dim = {}, " - "{} activation function " - "and kernel = {}" - .format("real" if REAL_NYSTROM else "learned", - NYS_SUBSAMPLE_SIZE, - OUT_DIM, - "with" if NON_LINEAR else "without", - KERNEL_NAME)) - - # if TRAIN_SIZE < int(NYS_SUBSAMPLE_SIZE) + 10: - # logger.debug("Train size is {} and nys size is {}. not ok".format(TRAIN_SIZE, NYS_SUBSAMPLE_SIZE)) - # print_result() - - if OUT_DIM is not None and OUT_DIM > NYS_SUBSAMPLE_SIZE: - logger.debug("Output dim is greater than deepstrom subsample size. Aborting.") - print_result() - - if TRAIN_SIZE is not None: - subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE) - nys_subsample = data.validation.data[subsample_indexes] - else: - subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE) - nys_subsample = data.train.data[subsample_indexes] - logger.debug("Chosen subsample: {}".format(nys_subsample)) - - if REAL_NYSTROM: - logger.debug("Real nystrom asked: eg projection matrix has the vanilla formula") - if SUMED_KERNEL: - # here K11 matrix are added before doing nystrom approximation - added_K11 = np.zeros((nys_subsample.shape[0], nys_subsample.shape[0])) - for g_value in GAMMA: - added_K11 = np.add(added_K11, rbf_kernel(nys_subsample, nys_subsample, gamma=g_value)) - U, S, V = np.linalg.svd(added_K11) - invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32) - input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, - w_matrix=invert_root_K11, non_linearity=NON_LINEAR) - elif STACKED_KERNEL: - # here nystrom approximations are stacked - lst_invert_root_K11 = [] - for g_value in GAMMA: - K11 = rbf_kernel(nys_subsample, nys_subsample, gamma=g_value) - U, S, V = np.linalg.svd(K11) - invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32) - lst_invert_root_K11.append(invert_root_K11) - stack_K11 = np.vstack(lst_invert_root_K11) - input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, - w_matrix=stack_K11, non_linearity=NON_LINEAR) - - else: - if KERNEL_NAME == "rbf": - kernel_fct = rbf_kernel - elif KERNEL_NAME == "linear": - kernel_fct = linear_kernel - elif KERNEL_NAME == "chi2_cpd": - kernel_fct = additive_chi2_kernel - elif KERNEL_NAME == "chi2_exp_cpd": - kernel_fct = chi2_kernel - elif KERNEL_NAME == "chi2_pd": - raise NotImplementedError("Bien verifier que ce code ne fait pas bordel") - - elif KERNEL_NAME == "laplacian": - kernel_fct = laplacian_kernel - else: - raise ValueError("Unknown kernel name: {}".format(KERNEL_NAME)) - K11 = kernel_fct(nys_subsample, nys_subsample, **kernel_dict) - U, S, V = np.linalg.svd(K11) - invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32) - input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=invert_root_K11, - non_linearity=NON_LINEAR) - else: - input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, - w_matrix=None, non_linearity=NON_LINEAR) - return input_classif - - -def get_input_classif_dense(p_x): - logger.info("Selecting dense layer function with output dim = {} and activation function = {}".format(OUT_DIM, ACTIVATION_FUNCTION)) - # two layers is handled outside of here - input_classif = fct_dense(p_x, OUT_DIM, two_layers=False, activation_function=ACTIVATION_FUNCTION) - return input_classif - - -def get_input_classif_deepfriedconvnet(p_x): - logger.debug("Selecting deepfriedconvnet layer function") - input_classif = fct_deepfried(p_x, NB_STACK, SIGMA, trainable=not REAL_FASTFOOD) - return input_classif - - def main(): input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1] x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") + tf.summary.histogram("convolved_examples", x) if NETWORK == "dense": - input_classif = get_input_classif_dense(x) + representation_layer = Dense(OUT_DIM, activation=ACTIVATION_FUNCTION) elif NETWORK == "deepstrom": - input_classif = get_input_classif_deepstrom(x) + logger.info("Selecting {} deepstrom layer function with " + "subsample size = {}, " + "output_dim = {}, " + "{} activation function " + "and kernel = {}" + .format("real" if REAL_NYSTROM else "learned", + NYS_SUBSAMPLE_SIZE, + OUT_DIM, + "with" if NON_LINEAR else "without", + KERNEL_NAME)) + if TRAIN_SIZE is not None: + subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.validation.data[subsample_indexes] + else: + subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.train.data[subsample_indexes] + logger.debug("Chosen subsample: {}".format(nys_subsample)) + representation_layer = DeepstromLayer(subsample=nys_subsample, + out_dim=OUT_DIM, + activation=ACTIVATION_FUNCTION, + kernel_name=KERNEL_NAME, + real_nystrom=not REAL_NYSTROM, + kernel_dict=kernel_dict) elif NETWORK == "deepfriedconvnet": - input_classif = get_input_classif_deepfriedconvnet(x) + representation_layer = FastFoodLayer(sigma=SIGMA, + nbr_stack=NB_STACK, + trainable=not REAL_FASTFOOD) else: raise Exception("Not recognized network") + input_classif = representation_layer(x) + if SIZE_SECOND_LAYER > 0: logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION)) with tf.variable_scope("second_layer"): - input_classif_2nd_layer = fully_connected(input_classif, SIZE_SECOND_LAYER, act=ACTIVATION_FUNCTION, - variable_scope="fc") + input_classif_2nd_layer = Dense(SIZE_SECOND_LAYER, activation=ACTIVATION_FUNCTION)(input_classif) else: logger.debug("No second layer") input_classif_2nd_layer = input_classif - logger.debug("Add softmax layer for classification") - classif, keep_prob = classification_cifar(input_classif_2nd_layer, output_dim) + with tf.variable_scope("classification"): + keep_prob = tf.placeholder(tf.float32, name="keep_prob") + input_drop = tf.nn.dropout(input_classif_2nd_layer, keep_prob) + classif = Dense(output_dim)(input_drop) # calcul de la loss + logger.debug("Add softmax layer for classification") with tf.name_scope("xent"): cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), @@ -295,18 +206,19 @@ def main(): accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar("accuracy", accuracy_op) - # merged_summary = tf.summary.merge_all() + merged_summary = tf.summary.merge_all() init = tf.global_variables_initializer() # Create a session for running Ops on the Graph. # Instantiate a SummaryWriter to output summaries and the Graph. - # summary_writer = tf.summary.FileWriter("debug_benchmark_vgg") + if TENSORBOARD: + summary_writer = tf.summary.FileWriter("debug_benchmark_classification") # Initialize all Variable objects # actual learning - with tf.Session() as sess: logger.info("Start training") - # summary_writer.add_graph(sess.graph) + if TENSORBOARD: + summary_writer.add_graph(sess.graph) # Initialize all Variable objects sess.run(init) # actual learning @@ -318,7 +230,7 @@ def main(): start = t.time() for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False): feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT} - _, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict) + _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict) if j % 100 == 0: logger.info( "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1, @@ -326,8 +238,8 @@ def main(): 0] / BATCH_SIZE) + 1, X_batch.shape, loss, acc)) - # summary_str = sess.run(merged_summary, feed_dict=feed_dict) - # summary_writer.add_summary(summary_str, j) + if TENSORBOARD: + summary_writer.add_summary(summary_str, (j+1)*(i+1)) j += 1 logger.info("Evaluation on validation data") @@ -384,13 +296,14 @@ if __name__ == '__main__': VALIDATION_SIZE = int(arguments["--validation-size"]) REAL_NYSTROM = arguments["--real-nystrom"] SEED = int(arguments["--seed"]) # The seed change the data ordering in the dataset (so train/validation/test split may change with != seeds) + TENSORBOARD = arguments["--tensorboard"] NYS_SUBSAMPLE_SIZE = None KERNEL_NAME = None GAMMA = None CONST = None NB_STACK = None kernel_dict = {} - CIFAR_DATASET = bool(arguments["--cifar"]) + CIFAR_DATASET = bool(arguments["--cifar10"]) CIFAR100_DATASET = bool(arguments["--cifar100"]) MNIST_DATASET = bool(arguments["--mnist"]) SVHN_DATASET = bool(arguments["--svhn"]) @@ -408,7 +321,7 @@ if __name__ == '__main__': NON_LINEAR = ACTIVATION_FUNCTION if arguments["--non-linear"] else None if CIFAR_DATASET: - DATASET = "cifar" + DATASET = "cifar10" elif MNIST_DATASET: DATASET = "mnist" elif SVHN_DATASET: @@ -418,6 +331,12 @@ if __name__ == '__main__': else: raise ValueError("no know dataset specified") CUT_LAYER = arguments["--cut-layer"] + + if arguments["--weights"] is None: + WEIGHTS = DATASET + else: + WEIGHTS = arguments["--weights"] + DROPOUT = float(arguments["--dropout"]) if arguments["--dropout"] is not None else None logger.debug("DROPOUT value is {} and type {}".format(DROPOUT, type(DROPOUT))) if arguments["--train-size"] is not None: @@ -431,17 +350,17 @@ if __name__ == '__main__': SEED_TRAIN_VALIDATION = SEED if CIFAR_DATASET: data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = VGG19Transformer(data_name="cifar10", cut_layer_name=CUT_LAYER) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) elif MNIST_DATASET: data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) # todo rendre conv_pool2 parametrable - transformer = LecunTransformer(data_name="mnist", cut_layer_name="conv_pool_2") + transformer = LecunTransformer(data_name=WEIGHTS, cut_layer_name="conv_pool_2") elif SVHN_DATASET: data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = VGG19Transformer(data_name="svhn", cut_layer_name=CUT_LAYER) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) elif CIFAR100_DATASET: data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = VGG19Transformer(data_name="cifar100", cut_layer_name=CUT_LAYER) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) else: raise ValueError("No dataset specified") @@ -541,5 +460,4 @@ if __name__ == '__main__': try: main() except Exception as e: - print_result() - raise e \ No newline at end of file + print_result(error=e) \ No newline at end of file