diff --git a/main/experiments/benchmark_vgg_tmp.py b/main/experiments/benchmark_vgg_tmp.py index 89184f0515c64f542e9d3f9eb7e4c86daf24b8ef..a3f60e507214853f483d5fd677b33a1ab70d4c46 100644 --- a/main/experiments/benchmark_vgg_tmp.py +++ b/main/experiments/benchmark_vgg_tmp.py @@ -3,7 +3,7 @@ Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG netw Usage: benchmark_vgg dense [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] - benchmark_vgg deepfriedconvnet [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] + benchmark_vgg deepfriedconvnet [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] benchmark_vgg deepstrom [-q] [--cifar100|--cifar|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] Options: @@ -23,6 +23,7 @@ Dense: Deepfried convnet: -N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet + -z --real-fastfood Tell fastfood layer to not update its weights Deepstrom: -r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) @@ -63,23 +64,24 @@ import numpy as np import tensorflow as tf import docopt from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel -import skluc.data.mldatasets as dataset -from skluc.data.transformation import VGG19Cifar10Transformer, LecunMnistTransformer, \ - VGG19SvhnTransformer, VGG19Cifar100Transformer -from skluc.tensorflow_.kernel_approximation import nystrom_layer, fastfood_layer -from skluc.tensorflow_.utils import fully_connected, batch_generator, classification_cifar -from skluc.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ +import skluc.main.data.mldatasets as dataset +from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.main.data.transformation.LeCunTransformer import LecunTransformer +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import nystrom_layer +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import fastfood_layer +from skluc.main.tensorflow_.utils import fully_connected, batch_generator, classification_cifar +from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels -from skluc.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage +from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage -def print_result(): +def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None): printed_r_list = [str(NETWORK), str(global_acc_val), str(global_acc_test), str(training_time), - str(VAL_EVAL_TIME), - str(TEST_EVAL_TIME), + str(val_eval_time), + str(test_eval_time), str(NUM_EPOCH), str(BATCH_SIZE), str(OUT_DIM), @@ -97,7 +99,8 @@ def print_result(): str(CUT_LAYER), str(TRAIN_SIZE), str(DROPOUT), - str(DATASET) + str(DATASET), + str(REAL_FASTFOOD) ] print(",".join(printed_r_list)) exit() @@ -129,10 +132,10 @@ def fct_deepstrom(input_, out_dim, subsample, kernel, kernel_params, w_matrix, n return out_fc -def fct_deepfried(input_, nb_stack, sigma): +def fct_deepfried(input_, nb_stack, sigma, trainable=True): try: - return fastfood_layer(input_, sigma, nbr_stack=nb_stack, trainable=True) + return fastfood_layer(input_, sigma, nbr_stack=nb_stack, trainable=trainable) except Exception as e: logger.critical(e) print_result() @@ -153,7 +156,7 @@ def get_gamma_value(arguments, dat, chi2=False): return gamma_value -def get_input_classif_deepstrom(): +def get_input_classif_deepstrom(p_x): logger.info("Selecting {} deepstrom layer function with " "subsample size = {}, " "output_dim = {}, " @@ -190,7 +193,7 @@ def get_input_classif_deepstrom(): added_K11 = np.add(added_K11, rbf_kernel(nys_subsample, nys_subsample, gamma=g_value)) U, S, V = np.linalg.svd(added_K11) invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32) - input_classif = fct_deepstrom(x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, + input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=invert_root_K11, non_linearity=NON_LINEAR) elif STACKED_KERNEL: # here nystrom approximations are stacked @@ -201,7 +204,7 @@ def get_input_classif_deepstrom(): invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32) lst_invert_root_K11.append(invert_root_K11) stack_K11 = np.vstack(lst_invert_root_K11) - input_classif = fct_deepstrom(x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, + input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=stack_K11, non_linearity=NON_LINEAR) else: @@ -223,29 +226,144 @@ def get_input_classif_deepstrom(): K11 = kernel_fct(nys_subsample, nys_subsample, **kernel_dict) U, S, V = np.linalg.svd(K11) invert_root_K11 = np.dot(U / np.sqrt(S), V).astype(np.float32) - input_classif = fct_deepstrom(x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=invert_root_K11, + input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=invert_root_K11, non_linearity=NON_LINEAR) else: - input_classif = fct_deepstrom(x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, + input_classif = fct_deepstrom(p_x, OUT_DIM, nys_subsample, KERNEL, kernel_dict, w_matrix=None, non_linearity=NON_LINEAR) return input_classif -def get_input_classif_dense(): +def get_input_classif_dense(p_x): logger.info("Selecting dense layer function with output dim = {} and activation function = {}".format(OUT_DIM, ACTIVATION_FUNCTION)) # two layers is handled outside of here - input_classif = fct_dense(x, OUT_DIM, two_layers=False, activation_function=ACTIVATION_FUNCTION) + input_classif = fct_dense(p_x, OUT_DIM, two_layers=False, activation_function=ACTIVATION_FUNCTION) return input_classif -def get_input_classif_deepfriedconvnet(): +def get_input_classif_deepfriedconvnet(p_x): logger.debug("Selecting deepfriedconvnet layer function") - input_classif = fct_deepfried(x, NB_STACK, SIGMA) + input_classif = fct_deepfried(p_x, NB_STACK, SIGMA, trainable=not REAL_FASTFOOD) return input_classif +def main(): + input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1] + + x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") + y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") + + if NETWORK == "dense": + input_classif = get_input_classif_dense(x) + elif NETWORK == "deepstrom": + input_classif = get_input_classif_deepstrom(x) + elif NETWORK == "deepfriedconvnet": + input_classif = get_input_classif_deepfriedconvnet(x) + else: + raise Exception("Not recognized network") + + if SIZE_SECOND_LAYER > 0: + logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION)) + with tf.variable_scope("second_layer"): + input_classif_2nd_layer = fully_connected(input_classif, SIZE_SECOND_LAYER, act=ACTIVATION_FUNCTION, + variable_scope="fc") + else: + logger.debug("No second layer") + input_classif_2nd_layer = input_classif + + logger.debug("Add softmax layer for classification") + classif, keep_prob = classification_cifar(input_classif_2nd_layer, output_dim) + + # calcul de la loss + with tf.name_scope("xent"): + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), + name="xentropy_mean") + tf.summary.scalar('loss-xent', cross_entropy) + + # todo learning rate as hyperparameter + # calcul du gradient + with tf.name_scope("train"): + global_step = tf.Variable(0, name="global_step", trainable=False) + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, + global_step=global_step) + + # calcul de l'accuracy + with tf.name_scope("accuracy"): + predictions = tf.argmax(classif, 1) + correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) + accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + tf.summary.scalar("accuracy", accuracy_op) + + # merged_summary = tf.summary.merge_all() + + init = tf.global_variables_initializer() + # Create a session for running Ops on the Graph. + # Instantiate a SummaryWriter to output summaries and the Graph. + # summary_writer = tf.summary.FileWriter("debug_benchmark_vgg") + # Initialize all Variable objects + # actual learning + + with tf.Session() as sess: + logger.info("Start training") + # summary_writer.add_graph(sess.graph) + # Initialize all Variable objects + sess.run(init) + # actual learning + # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0} + global_start = t.time() + for i in range(NUM_EPOCH): + logger.debug(memory_usage()) + j = 0 + start = t.time() + for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False): + feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT} + _, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict) + if j % 100 == 0: + logger.info( + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1, + int(data.train[0].shape[ + 0] / BATCH_SIZE) + 1, + X_batch.shape, loss, + acc)) + # summary_str = sess.run(merged_summary, feed_dict=feed_dict) + # summary_writer.add_summary(summary_str, j) + j += 1 + + logger.info("Evaluation on validation data") + training_time = t.time() - global_start + accuracies_val = [] + i = 0 + val_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, 1000, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}) + accuracies_val.append(accuracy[0]) + i += 1 + global_acc_val = sum(accuracies_val) / i + VAL_EVAL_TIME = t.time() - val_eval_start + + logger.info("Evaluation on test data") + accuracies_test = [] + i = 0 + test_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, 1000, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}) + accuracies_test.append(accuracy[0]) + i += 1 + global_acc_test = sum(accuracies_test) / i + TEST_EVAL_TIME = t.time() - test_eval_start + + print_result(global_acc_val=global_acc_val, + global_acc_test=global_acc_test, + training_time=training_time, + val_eval_time=VAL_EVAL_TIME, + test_eval_time=TEST_EVAL_TIME) + + if __name__ == '__main__': - # todo special treat for each type of execution + logger.debug("Command line: {}".format(' '.join(sys.argv))) arguments = docopt.docopt(__doc__) logger.debug(arguments) if arguments["--quiet"]: @@ -276,8 +394,9 @@ if __name__ == '__main__': CIFAR100_DATASET = bool(arguments["--cifar100"]) MNIST_DATASET = bool(arguments["--mnist"]) SVHN_DATASET = bool(arguments["--svhn"]) - TEST_EVAL_TIME = None - VAL_EVAL_TIME = None + REAL_FASTFOOD = bool(arguments["--real-fastfood"]) + test_eval_time = None + val_eval_time = None if arguments["--non-linearity"] == "relu": ACTIVATION_FUNCTION = tf.nn.relu elif arguments["--non-linearity"] == "tanh": @@ -312,22 +431,23 @@ if __name__ == '__main__': SEED_TRAIN_VALIDATION = SEED if CIFAR_DATASET: data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = VGG19Cifar10Transformer(cut_layer_name=CUT_LAYER) + transformer = VGG19Transformer(data_name="cifar10", cut_layer_name=CUT_LAYER) elif MNIST_DATASET: data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = LecunMnistTransformer() + # todo rendre conv_pool2 parametrable + transformer = LecunTransformer(data_name="mnist", cut_layer_name="conv_pool_2") elif SVHN_DATASET: data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = VGG19SvhnTransformer(cut_layer_name=CUT_LAYER) + transformer = VGG19Transformer(data_name="svhn", cut_layer_name=CUT_LAYER) elif CIFAR100_DATASET: data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - transformer = VGG19Cifar100Transformer(cut_layer_name=CUT_LAYER) + transformer = VGG19Transformer(data_name="cifar100", cut_layer_name=CUT_LAYER) else: raise ValueError("No dataset specified") - data.load() - data.flatten() - data.to_image() # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?) + data.load() # todo gérer le bug flatten + if not data.is_image(): + data.to_image() # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?) data.data_astype(np.float32) data.labels_astype(np.float32) data.normalize() @@ -336,7 +456,8 @@ if __name__ == '__main__': data.normalize() data.to_one_hot() data.flatten() - + data.data_astype(np.float32) + data.labels_astype(np.int) if TRAIN_SIZE is not None: data.reduce_data_size(int(TRAIN_SIZE)) @@ -417,107 +538,8 @@ if __name__ == '__main__': else: raise Exception("Not recognized network") - input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1] - - x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") - y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") - - if NETWORK == "dense": - input_classif = get_input_classif_dense() - elif NETWORK == "deepstrom": - input_classif = get_input_classif_deepstrom() - elif NETWORK == "deepfriedconvnet": - input_classif = get_input_classif_deepfriedconvnet() - else: - raise Exception("Not recognized network") - - if SIZE_SECOND_LAYER > 0: - logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION)) - with tf.variable_scope("second_layer"): - input_classif_2nd_layer = fully_connected(input_classif, SIZE_SECOND_LAYER, act=ACTIVATION_FUNCTION, variable_scope="fc") - else: - logger.debug("No second layer") - input_classif_2nd_layer = input_classif - - logger.debug("Add softmax layer for classification") - classif, keep_prob = classification_cifar(input_classif_2nd_layer, output_dim) - - # calcul de la loss - with tf.name_scope("xent"): - cross_entropy = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), - name="xentropy_mean") - tf.summary.scalar('loss-xent', cross_entropy) - - # todo learning rate as hyperparameter - # calcul du gradient - with tf.name_scope("train"): - global_step = tf.Variable(0, name="global_step", trainable=False) - train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, - global_step=global_step) - - # calcul de l'accuracy - with tf.name_scope("accuracy"): - predictions = tf.argmax(classif, 1) - correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) - accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) - tf.summary.scalar("accuracy", accuracy_op) - - merged_summary = tf.summary.merge_all() - - init = tf.global_variables_initializer() - # Create a session for running Ops on the Graph. - # Instantiate a SummaryWriter to output summaries and the Graph. - # summary_writer = tf.summary.FileWriter("debug_benchmark_vgg") - # Initialize all Variable objects - # actual learning - - with tf.Session() as sess: - logger.info("Start training") - # summary_writer.add_graph(sess.graph) - # Initialize all Variable objects - sess.run(init) - # actual learning - # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0} - global_start = t.time() - for i in range(NUM_EPOCH): - logger.debug(memory_usage()) - j = 0 - start = t.time() - for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False): - feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT} - _, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict) - if j % 100 == 0: - logger.info("epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j+1, int(data.train[0].shape[0]/BATCH_SIZE)+1, X_batch.shape, loss, acc)) - # summary_str = sess.run(merged_summary, feed_dict=feed_dict) - # summary_writer.add_summary(summary_str, j) - j += 1 - - logger.info("Evaluation on validation data") - training_time = t.time() - global_start - accuracies_val = [] - i = 0 - val_eval_start = t.time() - for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, 1000, False): - accuracy = sess.run([accuracy_op], feed_dict={ - x: X_batch, y: Y_batch, keep_prob: 1.0}) - accuracies_val.append(accuracy[0]) - i += 1 - global_acc_val = sum(accuracies_val) / i - VAL_EVAL_TIME = t.time() - val_eval_start - - logger.info("Evaluation on test data") - accuracies_test = [] - i = 0 - test_eval_start = t.time() - for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, 1000, False): - accuracy = sess.run([accuracy_op], feed_dict={ - x: X_batch, y: Y_batch, keep_prob: 1.0}) - accuracies_test.append(accuracy[0]) - i += 1 - global_acc_test = sum(accuracies_test) / i - TEST_EVAL_TIME = t.time() - test_eval_start - - - print_result() - + try: + main() + except Exception as e: + print_result() + raise e \ No newline at end of file