diff --git a/main/experiments/benchmark_vgg_end_to_end.py b/main/experiments/benchmark_vgg_end_to_end.py deleted file mode 100644 index 8c897f10e48051135c3f8522b501f09bda093df5..0000000000000000000000000000000000000000 --- a/main/experiments/benchmark_vgg_end_to_end.py +++ /dev/null @@ -1,430 +0,0 @@ -""" -Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. - -Usage: - benchmark_vgg deepstrom [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] - -Options: - --help -h Display help and exit. - -e numepoch --num-epoch=numepoch The number of epoch. - -s batchsize --batch-size=batchsize The number of example in each batch - -v size --validation-size size The size of the validation set [default: 10000] - -a value --seed value The seed value used for all randomization processed [default: 0] - -D reprdim --out-dim=reprdim The dimension of the final representation - -m size --nys-size size The number of example in the nystrom subsample. - -n --non-linear Tell Nystrom to use the non linear activation function on its output. - -r --real-nystrom Use the real w matrix - -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) - -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. - -R --rbf-kernel Says if the rbf kernel should be used for nystrom. - -L --linear-kernel Says if the linear kernel should be used for nystrom. - -C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom. - -E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom. - -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. - -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. - -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. - -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. - -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. -""" -import sys -import os -import time as t -import numpy as np -import tensorflow as tf -import docopt -from keras import Model -from keras.preprocessing.image import ImageDataGenerator -from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, additive_chi2_kernel, chi2_kernel, laplacian_kernel -import skluc.data.mldatasets as dataset -from skluc.data.transformation import VGG19Cifar10Transformer -from skluc.tensorflow_.kernel_approximation import nystrom_layer, fastfood_layer -from skluc.tensorflow_.utils import fully_connected, batch_generator, classification_cifar, conv_relu_pool, conv2d, \ - max_pool -from skluc.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ - tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels -from skluc.utils import logger, log_memory_usage -import keras -from keras.models import Sequential, load_model -from keras.layers import Activation -from keras.layers import Conv2D, MaxPooling2D -from keras.initializers import he_normal -from keras.layers.normalization import BatchNormalization - - -def VGG19(input_shape): - # with tf.variable_scope("block1_conv1"): - # weights = tf.get_variable("weights", (3, 3, 3, 64), initializer=tf.random_normal_initializer(stddev=0.1), trainable=trainable) - # biases = tf.get_variable("biases", (64), initializer=tf.constant_initializer(0.0), trainable=trainable) - # regularizer = tf.contrib.layers.l2_regularizer(scale=0.1) - # conv = tf.nn.conv2d(input_, weights, strides=[1, 1, 1, 1], padding='SAME', kernel_regularizer=regularizer) - # batch_norm = tf.nn.batch_normalization(conv, variance_epsilon=1e-3) - # relu = tf.nn.relu(conv + biases) - # tf.summary.histogram("act", relu) - # in order to reduce dimensionality, use bigger pooling size - # pool = max_pool(relu, pool_size=pool_size) - # with tf.variable_scope("conv_pool_2"): - # conv2 = conv_relu_pool(conv1, [5, 5, 6, 16], [16], pool_size=2, trainable=trainable) - weight_decay = 0.0001 - # build model - model = Sequential() - - # Block 1 - model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block1_conv1', input_shape=input_shape)) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block1_conv2')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')) - - # Block 2 - model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block2_conv1')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block2_conv2')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')) - # - # Block 3 - model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block3_conv1')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block3_conv2')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block3_conv3')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block3_conv4')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')) - # - # Block 4 - model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block4_conv1')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block4_conv2')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block4_conv3')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block4_conv4')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')) - - # Block 5 - model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block5_conv1')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block5_conv2')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block5_conv3')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer=he_normal(), name='block5_conv4')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')) - - return model - - -def VGG19_preload(): - logger.debug("filename: {}".format(os.path.abspath(__file__))) - model = load_model(os.path.join(os.path.dirname(os.path.abspath(__file__)), "1522967518.1916964_vgg19_cifar10.h5")) - vgg_conv_model = Model(inputs=model.input, - outputs=model.get_layer('block5_pool').output) - return vgg_conv_model - - -def fct_deepstrom(input_, out_dim, subsample, kernel, kernel_params, w_matrix, non_linearity): - """ - Wrap the computing of the deepstrom layer - - :param input_: - :param out_dim: - :param subsample: - :param kernel: - :param kernel_params: - :return: - """ - out_fc = nystrom_layer(input_, subsample, W_matrix=w_matrix, output_dim=out_dim, kernel=kernel, output_act=non_linearity, **kernel_params) - return out_fc - - -if __name__ == '__main__': - - arguments = docopt.docopt(__doc__) - NUM_EPOCH = int(arguments["--num-epoch"]) - BATCH_SIZE = int(arguments["--batch-size"]) - SEED_TRAIN_VALIDATION = 0 - SEED = int(arguments["--seed"]) - OUT_DIM = int(arguments["--out-dim"]) if arguments["--out-dim"] is not None else None - VALIDATION_SIZE = int(arguments["--validation-size"]) - NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"]) - if OUT_DIM is None: - OUT_DIM = NYS_SUBSAMPLE_SIZE - KERNEL_NAME = None - GAMMA = None - CONST = None - REAL_NYSTROM = arguments["--real-nystrom"] - - NON_LINEAR = tf.nn.relu if arguments["--non-linear"] else None - - RBF_KERNEL = arguments["--rbf-kernel"] - LINEAR_KERNEL = arguments["--linear-kernel"] - CHI2_KERNEL = arguments["--chi-square-kernel"] - CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"] - CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"] - SIGMOID_KERNEL = arguments["--sigmoid-kernel"] - LAPLACIAN_KERNEL = arguments["--laplacian-kernel"] - STACKED_KERNEL = arguments["--stacked-kernel"] - SUMED_KERNEL = arguments["--sumed-kernel"] - - kernel_dict = {} - - data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) - data.load() - data.normalize() - data.data_astype(np.float32) - data.labels_astype(np.float32) - data.to_image() - data.to_one_hot() - - logger.debug("Start benchmark with parameters: {}".format(" ".join(sys.argv[1:]))) - logger.debug("Using dataset {} with validation size {} and seed for spliting set {}.".format(data.s_name, data.validation_size, data.seed)) - logger.debug("Shape of train set data: {}; shape of train set labels: {}".format(data.train[0].shape, data.train[1].shape)) - logger.debug("Shape of validation set data: {}; shape of validation set labels: {}".format(data.validation[0].shape, data.validation[1].shape)) - logger.debug("Shape of test set data: {}; shape of test set labels: {}".format(data.test[0].shape, data.test[1].shape)) - logger.debug("Sample of label: {}".format(data.train[1][0])) - - if RBF_KERNEL: - KERNEL = tf_rbf_kernel - KERNEL_NAME = "rbf" - GAMMA = float(arguments["--gamma"]) - kernel_dict = {"gamma": GAMMA} - elif LINEAR_KERNEL: - KERNEL = tf_linear_kernel - KERNEL_NAME = "linear" - elif CHI2_KERNEL: - KERNEL = tf_chi_square_CPD - KERNEL_NAME = "chi2_cpd" - elif CHI2_EXP_KERNEL: - KERNEL = tf_chi_square_CPD_exp - KERNEL_NAME = "chi2_exp_cpd" - GAMMA = float(arguments["--gamma"]) - kernel_dict = {"gamma": GAMMA} - elif CHI2_PD_KERNEL: - KERNEL = tf_chi_square_PD - KERNEL_NAME = "chi2_pd" - elif SIGMOID_KERNEL: - KERNEL = tf_sigmoid_kernel - KERNEL_NAME = "sigmoid" - GAMMA = float(arguments["--gamma"]) - CONST = float(arguments["--intercept-constant"]) - kernel_dict = {"gamma": GAMMA, "constant": CONST} - elif LAPLACIAN_KERNEL: - KERNEL = tf_laplacian_kernel - KERNEL_NAME = "laplacian" - GAMMA = float(arguments["--gamma"]) - kernel_dict = {"gamma": np.sqrt(GAMMA)} - elif STACKED_KERNEL: - # todo it doesn't work - GAMMA = float(arguments["--gamma"]) - - def KERNEL(X, Y): - return tf_stack_of_kernels(X, Y, - [tf_laplacian_kernel, tf_rbf_kernel, tf_chi_square_CPD], - [{"gamma": GAMMA}, {"gamma": GAMMA}, {}]) - KERNEL_NAME = "stacked" - elif SUMED_KERNEL: - GAMMA = float(arguments["--gamma"]) - - def KERNEL(X, Y): - return tf_sum_of_kernels(X, Y, - [tf_laplacian_kernel, tf_rbf_kernel, tf_chi_square_CPD], - [{"gamma": GAMMA}, {"gamma": GAMMA}, {}]) - KERNEL_NAME = "summed" - else: - raise Exception("No kernel function specified for deepstrom") - - input_dim, output_dim = data.train[0].shape[1:], data.train[1].shape[1] - with tf.Graph().as_default(): - np.random.seed(SEED) - nys_subsample_index = np.random.permutation(data.train[0].shape[0]) - nys_subsample = data.train[0][nys_subsample_index[:NYS_SUBSAMPLE_SIZE]] - - nys_subsample_placeholder = tf.Variable(nys_subsample, dtype=tf.float32, name="nys_subsample", trainable=False) - - x = tf.placeholder(tf.float32, shape=[None, *input_dim], name="x") - y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") - # nys_subsample_placeholder = tf.placeholder(tf.float32, shape=[NYS_SUBSAMPLE_SIZE, *input_dim], name="nys_subsample") - - # vgg_conv_model = VGG19_preload() - with tf.variable_scope("Convolution") as scope_convolution: - vgg_conv_model = VGG19(input_dim) - vgg_conv_model.trainable=False - conv_x = vgg_conv_model(x) - tf.summary.histogram("convolution_x", conv_x) - vgg_conv_model_subsample = keras.Model(inputs=vgg_conv_model.inputs, - outputs=vgg_conv_model.outputs) - vgg_conv_model_subsample.trainable = False - conv_nys_subsample = vgg_conv_model_subsample(nys_subsample_placeholder) - - logger.debug("Selecting deepstrom layer function with " - "subsample size = {}, " - "output_dim = {}, " - "{} activation function " - "and kernel = {}" - .format(NYS_SUBSAMPLE_SIZE, - OUT_DIM, - "with" if NON_LINEAR else "without", - KERNEL_NAME)) - if OUT_DIM is not None and OUT_DIM > NYS_SUBSAMPLE_SIZE: - logger.debug("Output dim is greater than deepstrom subsample size. Aborting.") - # todo change this because it is copy-pasted (use function instead) - - global_acc_val = None - global_acc_test = None - training_time = None - printed_r_list = [str(global_acc_val), - str(global_acc_test), - str(training_time), - str(NUM_EPOCH), - str(BATCH_SIZE), - str(OUT_DIM), - str(KERNEL_NAME), - str(GAMMA), - str(CONST), - str(NYS_SUBSAMPLE_SIZE), - str(VALIDATION_SIZE), - str(SEED), - str(NON_LINEAR), - ] - print(",".join(printed_r_list)) - exit() - w_matrix = None - if REAL_NYSTROM: - init_dim = np.prod([s.value for s in conv_x.shape[1:] if s.value is not None]) - h_conv_nystrom_subsample_flat = tf.reshape(conv_nys_subsample, [conv_nys_subsample.shape[0], init_dim]) - - K_matrix = KERNEL(h_conv_nystrom_subsample_flat, h_conv_nystrom_subsample_flat, **kernel_dict) - S, U, V = tf.svd(K_matrix) - invert_root_K = tf.matmul(tf.matmul(U, tf.sqrt(tf.diag(S))), tf.transpose(V)) - w_matrix = invert_root_K - - input_classif = fct_deepstrom(conv_x, OUT_DIM, conv_nys_subsample, KERNEL, kernel_dict, w_matrix=w_matrix, non_linearity=NON_LINEAR) - - classif, keep_prob = classification_cifar(input_classif, output_dim) - - # calcul de la loss - with tf.name_scope("xent"): - cross_entropy = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), - name="xentropy_mean") - tf.summary.scalar('loss-xent', cross_entropy) - - # todo learning rate as hyperparameter - # calcul du gradient - with tf.name_scope("train"): - global_step = tf.Variable(0, name="global_step", trainable=False) - train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, - global_step=global_step) - - # calcul de l'accuracy - with tf.name_scope("accuracy"): - predictions = tf.argmax(classif, 1) - correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) - accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) - tf.summary.scalar("accuracy", accuracy_op) - - merged_summary = tf.summary.merge_all() - - init = tf.global_variables_initializer() - # Create a session for running Ops on the Graph. - # Instantiate a SummaryWriter to output summaries and the Graph. - # summary_writer = tf.summary.FileWriter("debug_benchmark_vgg") - # Initialize all Variable objects - # actual learning - saver = tf.train.Saver() - - with tf.Session() as sess: - logger.debug("trainable variables are: {}".format(tf.trainable_variables())) - # summary_writer.add_graph(sess.graph) - # Initialize all Variable objects - datagen = ImageDataGenerator(horizontal_flip=True, - width_shift_range=0.125, - height_shift_range=0.125, - fill_mode='constant', - cval=0.) - datagen.fit(data.train[0]) - sess.run(init) - # actual learning - # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0} - global_start = t.time() - feed_dict = {nys_subsample_placeholder: nys_subsample} - feed_dict_val = {nys_subsample_placeholder: nys_subsample} - feed_dict_test = {nys_subsample_placeholder: nys_subsample} - start_time_int = int(t.time()) - for i in range(NUM_EPOCH): - saver.save(sess, os.path.abspath('end_to_end_model'), global_step=start_time_int) - start = t.time() - # for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, True): - batchgen = datagen.flow(data.train[0], data.train[1], BATCH_SIZE, shuffle=False) - j = 0 - log_memory_usage() - while j < len(batchgen): - X_batch, Y_batch = next(batchgen) - # batch_generator(data.train[0], data.train[1], BATCH_SIZE, True): - # X_batch = tf.map_fn(lambda img: datagen.random_transform(img), X_batch) - feed_dict.update({x: X_batch, y: Y_batch, keep_prob: 0.5}) - _, loss, acc = sess.run([train_optimizer, cross_entropy, accuracy_op], feed_dict=feed_dict) - if j % 100 == 0: - # summary_str = sess.run(merged_summary, feed_dict=feed_dict) - # summary_writer.add_summary(summary_str, j) - logger.debug("epoch: {}/{}; batch: {}/{}; loss: {}; acc: {}".format(i, NUM_EPOCH, - j, int(data.train[0].shape[0]/BATCH_SIZE), - loss, acc)) - j += 1 - - training_time = t.time() - global_start - accuracies_val = [] - i = 0 - for X_batch, Y_batch in batch_generator(data.validation[0], data.validation[1], 1000, False): - feed_dict_val.update({x: X_batch, y: Y_batch, keep_prob: 1.0}) - accuracy = sess.run([accuracy_op], feed_dict=feed_dict_val) - accuracies_val.append(accuracy[0]) - i += 1 - - accuracies_test = [] - i = 0 - for X_batch, Y_batch in batch_generator(data.test[0], data.test[1], 1000, False): - feed_dict_test.update({x: X_batch, y: Y_batch, keep_prob: 1.0}) - accuracy = sess.run([accuracy_op], feed_dict=feed_dict_test) - accuracies_test.append(accuracy[0]) - i += 1 - - global_acc_val = sum(accuracies_val) / i - global_acc_test = sum(accuracies_test) / i - printed_r_list = [str(global_acc_val), - str(global_acc_test), - str(training_time), - str(NUM_EPOCH), - str(BATCH_SIZE), - str(OUT_DIM), - str(KERNEL_NAME), - str(GAMMA), - str(CONST), - str(NYS_SUBSAMPLE_SIZE), - str(VALIDATION_SIZE), - str(SEED), - str(NON_LINEAR), - ] - print(",".join(printed_r_list)) - - diff --git a/main/experiments/deepstrom_classif_end_to_end_mnist.ipynb b/main/experiments/deepstrom_classif_end_to_end_mnist.ipynb deleted file mode 100644 index 01c10b25b30536ce51dd9b5d827ca16f8237e65a..0000000000000000000000000000000000000000 --- a/main/experiments/deepstrom_classif_end_to_end_mnist.ipynb +++ /dev/null @@ -1,34 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/main/experiments/graph_drawing/till_october_2018/transfert_few_data_batchnorm/vgg_deepstrom_few_data_batchnorm.py b/main/experiments/graph_drawing/till_october_2018/transfert_few_data_batchnorm/vgg_deepstrom_few_data_batchnorm.py new file mode 100644 index 0000000000000000000000000000000000000000..3ca6a3592a699ad5824b15fc5536fa28f3f9da71 --- /dev/null +++ b/main/experiments/graph_drawing/till_october_2018/transfert_few_data_batchnorm/vgg_deepstrom_few_data_batchnorm.py @@ -0,0 +1,255 @@ +import os + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import pathlib +from skluc.main.utils import logger + +matplotlib.rcParams.update({'font.size': 14}) + +# pd.set_option('display.width', 1000) +pd.set_option('display.expand_frame_repr', False) + +# DAT = ["SVHN"] +# DIR = ["/home/luc/Resultats/Deepstrom/october_2018/transfert_few_data_cifar100_from_cifar10"] + + +DATANAME = "CIFAR100" +DIRNAME = "/home/luc/Resultats/Deepstrom/october_2018/transfert_few_data" + +FILENAME = "gathered_results.csv" + +min_acc = 0.00 +max_acc = 1.05 +# max_acc = 1.0 +linewidth = 0.9 +output_conv_dim = 512 +nb_classes = 10 + +real_nys_marker = "s" + +learned_nys_marker = "x" + +linearity_color = "g" + +dense_marker = "v" +dense_color = "r" + +deepfried_marker = "8" +deepfried_color = "b" + +d_translate_kernel = { + "linear": "Linear", + "chi2_cpd": "Chi2", + "rbf": "Gaussian" +} + +if __name__ == '__main__': + filepath = os.path.join(DIRNAME, FILENAME) + field_names = ["method_name", + "accuracy_val", + "accuracy_test", + "runtime_train", + "runtime_val", + "runtime_test", + "number_epoch", + "batch_size", + "repr_dim", + "second_layer_size", + "kernel_deepstrom", + "gamma_kernel", + "constante_sigmoid", + "nb_layer_deepfried", + "subsample_size", + "validation_size", + "seed", + "act", + "non_linearity", + "real_nystrom", + "repr_quality", + "train_size", + "dropout", + "dataset", + "real_deepfried", + "weights" + ] + + df = pd.read_csv(filepath, names=field_names) + df = df[df["accuracy_val"] != 'None'] + df = df.apply(pd.to_numeric, errors="ignore") + df = df.drop_duplicates() + method_names = set(df["method_name"].values) + kernel_names = set(df["kernel_deepstrom"].values) + kernel_names.remove("None") + # kernel_names.remove("laplacian") + repr_dim = set(df["repr_dim"].values) + repr_dim.remove("None") # dtype: str + # repr_dim.remove("16") + nys_size = set(df["subsample_size"].values) + nys_size.remove("None") + nb_layers_deepfried = set(df["nb_layer_deepfried"].values) + nb_layers_deepfried.remove("None") + seed_values = set(df["seed"].values) + batch_size = 128 + train_sizes = set(df["train_size"]) + + cut_layers = set(df["repr_quality"].values) + + logger.debug("Nystrom possible sizes are: {}".format(nys_size)) + logger.debug("Kernel functions are: {}".format(kernel_names)) + logger.debug("Compared network types are: {}".format(method_names)) + logger.debug("Tested representation dimension are: {}".format(repr_dim)) + + means_deepstrom = {} + + for t_size in sorted(list(train_sizes)): + df_tsize = df[df["train_size"] == t_size] + + for cut_layer in cut_layers: + df_cut_layer = df_tsize[df_tsize["repr_quality"] == cut_layer] + + # plot deepstrom + # ============== + df_deepstrom = df_cut_layer[df_cut_layer["method_name"] == "deepstrom"] + df_deepstrom["subsample_size"] = df_deepstrom["subsample_size"].astype(np.int) + df_deepstrom_sort = df_deepstrom.sort_values(by=["subsample_size"]) + for k_name in sorted(kernel_names): + df_deepstrom_kernel = df_deepstrom_sort[df_deepstrom_sort["kernel_deepstrom"] == k_name] + + f, ax = plt.subplots() + + # get the results of learned nystrom + df_deepstrom_kernel_w = df_deepstrom_kernel[df_deepstrom_kernel["real_nystrom"] == False] + all_accs_w = np.array([ + list(df_deepstrom_kernel_w[df_deepstrom_kernel_w["seed"] == seed_v]["accuracy_test"]) for + seed_v in seed_values + ]) + np_deepstrom_kernel_w_mean_accuracy_test = np.mean(all_accs_w, axis=0) + np_deepstrom_kernel_w_std_accuracy_test = np.std(all_accs_w, axis=0) + np_param_nbr_deepstrom_kernel_w = ( + np.square(np.array(sorted(set(df_deepstrom_kernel_w["subsample_size"])))) + # m x m + np.array( + sorted(set(df_deepstrom_kernel_w["subsample_size"]))) * output_conv_dim + # m x d + np.array( + sorted(list(set(df_deepstrom_kernel_w["subsample_size"])))) * nb_classes) # m x c + + ax.errorbar(np_param_nbr_deepstrom_kernel_w, + np_deepstrom_kernel_w_mean_accuracy_test, + np_deepstrom_kernel_w_std_accuracy_test, + marker=learned_nys_marker, color=linearity_color, + label="Adaptative Deepström", + capsize=3) + + # get the results of vanilla nystrom + df_deepstrom_kernel_k = df_deepstrom_kernel[df_deepstrom_kernel["real_nystrom"]] + if len(df_deepstrom_kernel_k): + all_accs_k = np.array([ + list(df_deepstrom_kernel_k[df_deepstrom_kernel_k["seed"] == seed_v]["accuracy_test"]) for + seed_v in seed_values + ]) + np_deepstrom_kernel_k_mean_accuracy_test = np.mean(all_accs_k, axis=0) + np_deepstrom_kernel_k_std_accuracy_test = np.std(all_accs_k, axis=0) + + np_param_nbr_deepstrom_kernel_k = ( + np.square(np.array(sorted(set(df_deepstrom_kernel_k["subsample_size"])))) + # m x m + np.array(sorted( + set(df_deepstrom_kernel_k["subsample_size"]))) * output_conv_dim + # m x d + np.array(sorted( + list(set(df_deepstrom_kernel_k["subsample_size"])))) * nb_classes) # m x c + + ax.errorbar(np_param_nbr_deepstrom_kernel_k, + np_deepstrom_kernel_k_mean_accuracy_test, + np_deepstrom_kernel_k_std_accuracy_test, + marker=real_nys_marker, color=linearity_color, + label="Deepström", + capsize=3) + + # plot dense + # ========== + df_dense = df_cut_layer[df_cut_layer["method_name"] == "dense"] + df_dense = df_dense[df_dense["train_size"] == t_size] + df_dense["repr_dim"] = df_dense["repr_dim"].astype(np.int) + df_dense = df_dense.sort_values(by=["repr_dim"]) + np_dense_mean_accuracy_test = np.mean( + np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in + seed_values]), axis=0) + np_dense_std_accuracy_test = np.std( + np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in + seed_values]), axis=0) + ax.errorbar( + np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * output_conv_dim + + np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * nb_classes, + np_dense_mean_accuracy_test, + np_dense_std_accuracy_test, + color=dense_color, + marker=dense_marker, + label="Fully Connected", capsize=3) + + # # plot deepfried + # # ============== + df_deepfried = df_cut_layer[df_cut_layer["method_name"] == "deepfriedconvnet"] + np_deepfried_mean_accuracy_test = [] + np_deepfried_std_accuracy_test = [] + for l_nb in sorted(nb_layers_deepfried): + df_deepfried_stack = df_deepfried[df_deepfried["nb_layer_deepfried"] == l_nb] + if len(df_deepfried_stack): + np_deepfried_mean_accuracy_test.append(np.mean(df_deepfried_stack["accuracy_test"])) + np_deepfried_std_accuracy_test.append(np.std(df_deepfried_stack["accuracy_test"])) + + nb_param_vals = [(output_conv_dim * 3 + output_conv_dim * nb_classes) * int(i) for i in sorted(set(df_deepfried["nb_layer_deepfried"].values))] + ax.errorbar(nb_param_vals, + np_deepfried_mean_accuracy_test, + np_deepfried_std_accuracy_test, + color=deepfried_color, + marker=deepfried_marker, + label="Adaptative DeepFriedConvnet", capsize=3) + + + ax.set_ylim(min_acc, max_acc) + ax.set_ylabel("Accuracy") + ax.set_xticks([1e4, 1e5, 1e6]) + # if i == 2: + # ax.set_xlabel("# Parameters") + ax.set_xlabel("# Parameters") + ax.legend(bbox_to_anchor=(0.5, -0.20), loc="upper center", ncol=2) + ax.set_xticklabels([1e4, 1e5, 1e6]) + # else: + # ax.set_xticklabels([]) + ax.set_xscale("symlog") + + ax_twin = ax.twiny() + ax_twin.set_xscale("symlog") + ax_twin.set_xlim(ax.get_xlim()) + ax_twin.set_xticks(np_param_nbr_deepstrom_kernel_w) + + # if i == 0: + ax_twin.set_xlabel("Subsample Size") + ax.set_title( + "{} Kernel - {} - Train size: {}".format(d_translate_kernel[k_name], DATANAME, t_size), + y=1.2) + ax_twin.set_xticklabels(sorted(set(df_deepstrom_kernel_w["subsample_size"]))) + # else: + # ax.set_title("Noyau {} - {} - Train size: {}".format(d_translate_kernel[k_name], DATANAME, t_size)) + # ax_twin.set_xticklabels([]) + + f.set_size_inches(8, 6) + f.tight_layout() + f.subplots_adjust(bottom=0.3) + # f.show() + # exit() + # learnable: change legend + # ODIR = [ + # "/home/luc/PycharmProjects/deepFriedConvnets/main/experiments/graph_drawing/paper/svhn/few_data/parameters/dropout_{}".format( + # str(drop_val).replace(".", "-"))] + # out_dir_path = ODIR[h] + + + out_name = "acc_param_tsize_{}_{}_{}".format(t_size, cut_layer, k_name) + + base_out_dir = os.path.join(os.path.abspath(__file__.split(".")[0]), "images") + pathlib.Path(base_out_dir).mkdir(parents=True, exist_ok=True) + out_path = os.path.join(base_out_dir, out_name) + logger.debug(out_path) + f.savefig(out_path) diff --git a/main/experiments/parameter_files/october_2018/lazyfile_classif_end_to_end.yml b/main/experiments/parameter_files/october_2018/lazyfile_classif_end_to_end.yml new file mode 100644 index 0000000000000000000000000000000000000000..738deb29e4ab5c9374c5b0ae5df44a986bd0d68a --- /dev/null +++ b/main/experiments/parameter_files/october_2018/lazyfile_classif_end_to_end.yml @@ -0,0 +1,29 @@ +all: + dense: +# deepfried: + deepstrom: + +base: + epoch_numbers: {"-e": [200]} + batch_sizes: {"-s": [64]} + val_size: {"-v": [10000]} + seed: {"-a": "range(1)"} + quiet: ["-q"] + dataset: ["--mnist", "--cifar10", "--cifar100", "--svhn"] + +dense: + network: ["dense"] + base: + repr_dim: {"-D": [16, 64, 128, 1024]} + +deepfried: + network: ["deepfriedconvnet"] + base: + nbstacks: {"-N": [1, 3, 5, 7]} + +deepstrom: + network: ["deepstrom"] + base: + nys_size: {"-m": [4, 8, 16, 64, 128, 256, 512]} + kernel: ["-C", "-L"] + diff --git a/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_batchnorm.yml b/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_batchnorm.yml new file mode 100644 index 0000000000000000000000000000000000000000..658b8ec2b08cac4944819a7c6e62e0c1454dc638 --- /dev/null +++ b/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_batchnorm.yml @@ -0,0 +1,34 @@ +all: + dense: + deepfried: + deepstrom: + +base: + epoch_numbers: {"-e": [100]} + batch_sizes: {"-s": [64]} + val_size: {"-v": [10000]} + seed: {"-a": "range(1)"} + quiet: ["-q"] + data_size: {"-t":[50, 100, 200]} + dataset: ["--svhn"] + weights: {"-W": ["cifar100"]} + batchnorm: ["-b"] + cut_layer: {"-B": ["block3_pool"]} + +dense: + network: ["dense"] + base: + repr_dim: {"-D": [16, 64, 128, 1024]} + +deepfried: + network: ["deepfriedconvnet"] + base: + nbstacks: {"-N": [1, 3, 5, 7]} + +deepstrom: + network: ["deepstrom"] + base: + real_nys: ["-r", ""] + nys_size: {"-m": [16, 64, 256, 512]} + kernel: ["-L", "-R", "-C"] + diff --git a/main/experiments/scripts/__init__.py b/main/experiments/scripts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/scripts/until_october_2018/__init__.py b/main/experiments/scripts/until_october_2018/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/benchmark_tcnn.py b/main/experiments/scripts/until_october_2018/benchmark_tcnn.py similarity index 100% rename from main/experiments/benchmark_tcnn.py rename to main/experiments/scripts/until_october_2018/benchmark_tcnn.py diff --git a/main/experiments/benchmark_vgg_cov_kernel.py b/main/experiments/scripts/until_october_2018/benchmark_vgg_cov_kernel.py similarity index 100% rename from main/experiments/benchmark_vgg_cov_kernel.py rename to main/experiments/scripts/until_october_2018/benchmark_vgg_cov_kernel.py diff --git a/main/experiments/benchmark_vgg_multiview.py b/main/experiments/scripts/until_october_2018/benchmark_vgg_multiview.py similarity index 100% rename from main/experiments/benchmark_vgg_multiview.py rename to main/experiments/scripts/until_october_2018/benchmark_vgg_multiview.py diff --git a/main/experiments/scripts/until_october_2018/end_to_end/__init__.py b/main/experiments/scripts/until_october_2018/end_to_end/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/benchmark_vgg_end_to_end_new.py b/main/experiments/scripts/until_october_2018/end_to_end/benchmark_vgg_end_to_end.py similarity index 100% rename from main/experiments/benchmark_vgg_end_to_end_new.py rename to main/experiments/scripts/until_october_2018/end_to_end/benchmark_vgg_end_to_end.py diff --git a/main/experiments/scripts/until_october_2018/end_to_end/deepstrom_classif_end_to_end.py b/main/experiments/scripts/until_october_2018/end_to_end/deepstrom_classif_end_to_end.py new file mode 100644 index 0000000000000000000000000000000000000000..598fd6b07ff55d43df68569120bb3681d0044d16 --- /dev/null +++ b/main/experiments/scripts/until_october_2018/end_to_end/deepstrom_classif_end_to_end.py @@ -0,0 +1,319 @@ +""" +Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. + +Usage: + benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-V] + benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-V] + benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-V] + +Options: + --help -h Display help and exit. + -q --quiet Set logging level to info. + -V --tensorboard Write tensorboard logs. + -a --seed value The seed value used for all randomization processed [default: 0] + -t --train-size size Size of train set. + -v --validation-size size The size of the validation set [default: 10000] + -e --num-epoch=numepoch The number of epoch. + -s --batch-size=batchsize The number of example in each batch + -d --dropout val Keep probability of neurons before classif [default: 1.0] + -D reprdim --out-dim=reprdim The dimension of the final representation + -f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu] + +Dense: + -l --second-layer-size size Says the size of the second non-linear layer [default: 0] + +Deepfried convnet: + -N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet + -z --real-fastfood Tell fastfood layer to not update its weights + +Deepstrom: + -r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) + -m size --nys-size size The number of example in the nystrom subsample. + -n --non-linear Tell Nystrom to use the non linear activation function on its output. + +Datasets: + --cifar10 Use cifar dataset + --mnist Use mnist dataset + --svhn Use svhn dataset + --cifar100 Use cifar100 dataset + +Possible kernels: + -R --rbf-kernel Says if the rbf kernel should be used for nystrom. + -L --linear-kernel Says if the linear kernel should be used for nystrom. + -C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom. + -E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom. + -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. + -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. + -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. + -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. + -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. + +Kernel related: + -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) + -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. + +""" + + +import skluc.main.data.mldatasets as dataset +import numpy as np +import tensorflow as tf +from tensorflow.python.keras.layers import Dense +from tensorflow.python.keras.models import Sequential + +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd +from skluc.main.tensorflow_.models import build_lenet_model, build_vgg19_model +from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter +from skluc.main.tensorflow_.utils import batch_generator +import time as t +import docopt + + +class ParameterManagerMain(ParameterManager): + + def __init__(self, docopt_dict): + super().__init__(docopt_dict) + + self["--out-dim"] = int(self["--out-dim"]) if eval(str(self["--out-dim"])) is not None else None + self["kernel"] = self.init_kernel() + self["network"] = self.init_network() + self["activation_function"] = self.init_non_linearity() + self["dataset"] = self.init_dataset() + self["--nb-stack"] = int(self["--nb-stack"]) if self["--nb-stack"] is not None else None + self["--nys-size"] = int(self["--nys-size"]) if self["--nys-size"] is not None else None + self["--num-epoch"] = int(self["--num-epoch"]) + self["--validation-size"] = int(self["--validation-size"]) + self["--seed"] = int(self["--seed"]) + self["--batch-size"] = int(self["--batch-size"]) + self["deepstrom_activation"] = self.init_deepstrom_activation() + + self.__kernel_dict = None + + def init_deepstrom_activation(self): + if not self["deepstrom"]: + return None + + if self["--non-linear"]: + return self["--non-linearity"] + else: + return None + + def init_kernel_dict(self, data): + if self["kernel"] == "rbf": + GAMMA = self.get_gamma_value(data) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": GAMMA} + elif self["kernel"] == "chi2_exp_cpd": + GAMMA = self.get_gamma_value(data, chi2=True) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": GAMMA} + elif self["kernel"] == "laplacian": + GAMMA = self.get_gamma_value(data) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": np.sqrt(GAMMA)} + else: + # GAMMA = self.get_gamma_value(data) + # self["--gamma"] = GAMMA + self.__kernel_dict = {} + + def __getitem__(self, item): + if item == "kernel_dict": + return self.__kernel_dict + else: + return super().__getitem__(item) + + +class ResultManagerMain(ResultManager): + def __init__(self): + super().__init__() + self["training_time"] = None + self["val_eval_time"] = None + self["val_acc"] = None + self["test_acc"] = None + self["test_eval_time"] = None + +def main(paraman, resman, printman): + if paraman["dataset"] == "mnist": + data = dataset.MnistDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_lenet_model + elif paraman["dataset"] == "cifar10": + data = dataset.Cifar10Dataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_vgg19_model + elif paraman["dataset"] == "cifar100": + data = dataset.Cifar100FineDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_vgg19_model + elif paraman["dataset"] == "svhn": + data = dataset.SVHNDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_vgg19_model + else: + raise ValueError("Unknown dataset") + + data.load() + data.to_one_hot() + if not data.is_image(): + data.to_image() + data.data_astype(np.float32) + data.labels_astype(np.float32) + data.normalize() + + X_train, y_train = data.train.data, data.train.labels + X_test, y_test = data.test.data, data.test.labels + X_val, y_val = data.validation.data, data.validation.labels + + paraman.init_kernel_dict(X_train) + + # # Model definition + + input_dim = X_train.shape[1:] + output_dim = y_train.shape[1] + + x = tf.placeholder(tf.float32, shape=[None, *input_dim], name="x") + y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") + subs = tf.placeholder(tf.float32, shape=[paraman["--nys-size"], *input_dim], name="subsample") + + convnet_model = convmodel_func(x.shape[1:]) + + repr_x = convnet_model(x) + repr_sub = convnet_model(subs) + + logger.debug(paraman["kernel_dict"]) + + input_classifier = None + + if paraman["network"] == "deepstrom": + deepstrom_layer = DeepstromLayerEndToEnd(subsample_size=paraman["--nys-size"], + kernel_name=paraman["kernel"], + kernel_dict=paraman["kernel_dict"], + activation=paraman["deepstrom_activation"], + out_dim=paraman["--out-dim"]) + + input_classifier = deepstrom_layer([repr_x, repr_sub]) + + subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"]) + nys_subsample = data.validation.data[subsample_indexes] + + elif paraman["network"] == "dense": + dense_layer = Dense(paraman["--out-dim"], activation=paraman["activation_function"]) + input_classifier = dense_layer(repr_x) + elif paraman["network"] == "deepfriedconvnet": + deepfried_layer = FastFoodLayer(sigma=1/paraman["--gamma"], nbr_stack=paraman["--nb-stack"], trainable=not paraman["--real-fastfood"]) + input_classifier = deepfried_layer(repr_x) + else: + raise ValueError(f"Not recognized network {paraman['network']}") + + with tf.variable_scope("classification"): + classif = Dense(output_dim)(input_classifier) + + # calcul de la loss + with tf.name_scope("xent"): + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), + name="xentropy_mean") + tf.summary.scalar('loss-xent', cross_entropy) + + # calcul du gradient + with tf.name_scope("train"): + global_step = tf.Variable(0, name="global_step", trainable=False) + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, + global_step=global_step) + + # calcul de l'accuracy + with tf.name_scope("accuracy"): + predictions = tf.argmax(classif, 1) + correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) + accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + tf.summary.scalar("accuracy", accuracy_op) + + merged_summary = tf.summary.merge_all() + + # In[6]: + + + init = tf.global_variables_initializer() + + summary_writer = None + if paraman["--tensorboard"]: + summary_writer = tf.summary.FileWriter("debug_classification_end_to_end") + + # In[7]: + + with tf.Session() as sess: + logger.info("Start training") + if paraman["--tensorboard"]: + summary_writer.add_graph(sess.graph) + # Initialize all Variable objects + sess.run(init) + # actual learning + global_start = t.time() + j = 0 + for i in range(paraman["--num-epoch"]): + logger.debug(memory_usage()) + + for X_batch, Y_batch in batch_generator(X_train, y_train, paraman["--batch-size"], False): + if paraman["network"] == "deepstrom": + feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample} + else: + feed_dict = {x: X_batch, y: Y_batch} + _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict) + if j % 100 == 0: + logger.info( + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"], j + 1, + int(data.train[0].shape[ + 0] / paraman["--batch-size"]) + 1, + X_batch.shape, loss, + acc)) + if paraman["--tensorboard"]: + summary_writer.add_summary(summary_str, j) + j += 1 + + logger.info("Evaluation on validation data") + training_time = t.time() - global_start + resman["training_time"] = training_time + accuracies_val = [] + i = 0 + val_eval_start = t.time() + for X_batch, Y_batch in batch_generator(X_val, y_val, 1000, False): + if paraman["network"] == "deepstrom": + feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample} + else: + feed_dict = {x: X_batch, y: Y_batch} + accuracy = sess.run([accuracy_op], feed_dict=feed_dict) + accuracies_val.append(accuracy[0]) + i += 1 + global_acc_val = sum(accuracies_val) / i + + VAL_EVAL_TIME = t.time() - val_eval_start + resman["val_eval_time"] = VAL_EVAL_TIME + resman["val_acc"] = global_acc_val + + logger.info("Evaluation on test data") + accuracies_test = [] + i = 0 + test_eval_start = t.time() + for X_batch, Y_batch in batch_generator(X_test, y_test, 1000, False): + if paraman["network"] == "deepstrom": + feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample} + else: + feed_dict = {x: X_batch, y: Y_batch} + accuracy = sess.run([accuracy_op], feed_dict=feed_dict) + accuracies_test.append(accuracy[0]) + i += 1 + global_acc_test = sum(accuracies_test) / i + TEST_EVAL_TIME = t.time() - test_eval_start + resman["test_acc"] = global_acc_test + resman["test_eval_time"] = TEST_EVAL_TIME + printman.print() + + +if __name__ == "__main__": + paraman_obj = ParameterManagerMain(docopt.docopt(__doc__)) + resman_obj = ResultManagerMain() + printman_obj = ResultPrinter(paraman_obj, resman_obj) + + try: + main(paraman_obj, resman_obj, printman_obj) + except Exception as e: + printman_obj.print() + raise e + diff --git a/main/experiments/scripts/until_october_2018/end_to_end/deepstrom_classif_end_to_end_mnist.ipynb b/main/experiments/scripts/until_october_2018/end_to_end/deepstrom_classif_end_to_end_mnist.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0ebad13afe0b406631772a32c155504e52c4f680 --- /dev/null +++ b/main/experiments/scripts/until_october_2018/end_to_end/deepstrom_classif_end_to_end_mnist.ipynb @@ -0,0 +1,411 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-10-19 13:26:27,779 [21033] DEBUG matplotlib: $HOME=/home/luc\n", + "2018-10-19 13:26:27,780 [21033] DEBUG matplotlib: matplotlib data path /home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/matplotlib/mpl-data\n", + "2018-10-19 13:26:27,785 [21033] DEBUG matplotlib: loaded rc file /home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/matplotlib/mpl-data/matplotlibrc\n", + "2018-10-19 13:26:27,787 [21033] DEBUG matplotlib: matplotlib version 2.2.3\n", + "2018-10-19 13:26:27,788 [21033] DEBUG matplotlib: interactive is False\n", + "2018-10-19 13:26:27,789 [21033] DEBUG matplotlib: platform is linux\n", + "2018-10-19 13:26:27,790 [21033] DEBUG matplotlib: loaded modules: ['builtins', 'sys', '_frozen_importlib', '_imp', '_warnings', '_thread', '_weakref', '_frozen_importlib_external', '_io', 'marshal', 'posix', 'zipimport', 'encodings', 'codecs', '_codecs', 'encodings.aliases', 'encodings.utf_8', '_signal', '__main__', 'encodings.latin_1', 'io', 'abc', '_weakrefset', '_bootlocale', '_locale', 'site', 'os', 'errno', 'stat', '_stat', 'posixpath', 'genericpath', 'os.path', '_collections_abc', '_sitebuiltins', 'sysconfig', '_sysconfigdata_m_linux_x86_64-linux-gnu', 'types', 'functools', '_functools', 'collections', 'operator', '_operator', 'keyword', 'heapq', '_heapq', 'itertools', 'reprlib', '_collections', 'weakref', 'collections.abc', 'importlib', 'importlib._bootstrap', 'importlib._bootstrap_external', 'warnings', 'importlib.util', 'importlib.abc', 'importlib.machinery', 'contextlib', 'mpl_toolkits', 'google', 'zope', 'runpy', 'pkgutil', 'ipykernel', 'ipykernel._version', 'ipykernel.connect', '__future__', 'json', 'json.decoder', 're', 'enum', 'sre_compile', '_sre', 'sre_parse', 'sre_constants', 'copyreg', 'json.scanner', '_json', 'json.encoder', 'subprocess', 'time', 'signal', '_posixsubprocess', 'select', 'selectors', 'math', 'threading', 'traceback', 'linecache', 'tokenize', 'token', 'IPython', 'IPython.core', 'IPython.core.getipython', 'IPython.core.release', 'IPython.core.application', 'atexit', 'copy', 'glob', 'fnmatch', 'logging', 'string', '_string', 'shutil', 'zlib', 'bz2', '_compression', '_bz2', 'lzma', '_lzma', 'pwd', 'grp', 'traitlets', 'traitlets.traitlets', 'inspect', 'ast', '_ast', 'dis', 'opcode', '_opcode', 'six', 'struct', '_struct', 'traitlets.utils', 'traitlets.utils.getargspec', 'traitlets.utils.importstring', 'ipython_genutils', 'ipython_genutils._version', 'ipython_genutils.py3compat', 'ipython_genutils.encoding', 'locale', 'platform', 'traitlets.utils.sentinel', 'traitlets.utils.bunch', 'traitlets._version', 'traitlets.config', 'traitlets.config.application', 'decorator', 'traitlets.config.configurable', 'traitlets.config.loader', 'argparse', 'textwrap', 'gettext', 'ipython_genutils.path', 'random', 'hashlib', '_hashlib', '_blake2', '_sha3', 'bisect', '_bisect', '_random', 'ipython_genutils.text', 'ipython_genutils.importstring', 'IPython.core.crashhandler', 'pprint', 'IPython.core.ultratb', 'pydoc', 'urllib', 'urllib.parse', 'IPython.core.debugger', 'bdb', 'IPython.utils', 'IPython.utils.PyColorize', 'IPython.utils.coloransi', 'IPython.utils.ipstruct', 'IPython.utils.colorable', 'pygments', 'pygments.util', 'IPython.utils.py3compat', 'IPython.utils.encoding', 'IPython.core.excolors', 'IPython.testing', 'IPython.testing.skipdoctest', 'pdb', 'cmd', 'code', 'codeop', 'IPython.core.display_trap', 'IPython.utils.openpy', 'IPython.utils.path', 'IPython.utils.process', 'IPython.utils._process_posix', 'pexpect', 'pexpect.exceptions', 'pexpect.utils', 'pexpect.expect', 'pexpect.pty_spawn', 'pty', 'tty', 'termios', 'ptyprocess', 'ptyprocess.ptyprocess', 'fcntl', 'resource', 'ptyprocess.util', 'pexpect.spawnbase', 'pexpect.run', 'IPython.utils._process_common', 'shlex', 'IPython.utils.decorators', 'IPython.utils.data', 'IPython.utils.terminal', 'IPython.utils.sysinfo', 'IPython.utils._sysinfo', 'IPython.core.profiledir', 'IPython.paths', 'tempfile', 'IPython.utils.importstring', 'IPython.terminal', 'IPython.terminal.embed', 'IPython.core.compilerop', 'IPython.core.magic_arguments', 'IPython.core.error', 'IPython.utils.text', 'pathlib', 'ntpath', 'IPython.core.magic', 'getopt', 'IPython.core.oinspect', 'IPython.core.page', 'IPython.core.display', 'binascii', 'mimetypes', 'IPython.lib', 'IPython.lib.security', 'getpass', 'IPython.lib.pretty', 'datetime', '_datetime', 'IPython.utils.signatures', 'IPython.utils.dir2', 'IPython.utils.wildcard', 'pygments.lexers', 'pygments.lexers._mapping', 'pygments.modeline', 'pygments.plugin', 'pygments.lexers.python', 'pygments.lexer', 'pygments.filter', 'pygments.filters', 'pygments.token', 'pygments.regexopt', 'pygments.unistring', 'pygments.formatters', 'pygments.formatters._mapping', 'pygments.formatters.html', 'pygments.formatter', 'pygments.styles', 'IPython.core.inputsplitter', 'IPython.core.inputtransformer', 'IPython.core.splitinput', 'IPython.utils.tokenize2', 'IPython.core.interactiveshell', 'pickleshare', 'pickle', '_compat_pickle', '_pickle', 'IPython.core.prefilter', 'IPython.core.autocall', 'IPython.core.macro', 'IPython.core.alias', 'IPython.core.builtin_trap', 'IPython.core.events', 'backcall', 'backcall.backcall', 'IPython.core.displayhook', 'IPython.core.displaypub', 'IPython.core.extensions', 'IPython.core.formatters', 'IPython.utils.sentinel', 'IPython.core.history', 'sqlite3', 'sqlite3.dbapi2', '_sqlite3', 'IPython.core.logger', 'IPython.core.payload', 'IPython.core.usage', 'IPython.display', 'IPython.lib.display', 'html', 'html.entities', 'IPython.utils.io', 'IPython.utils.capture', 'IPython.utils.strdispatch', 'IPython.core.hooks', 'IPython.utils.syspathcontext', 'IPython.utils.tempdir', 'typing', 'typing.io', 'typing.re', 'IPython.utils.contexts', 'IPython.terminal.interactiveshell', 'prompt_toolkit', 'prompt_toolkit.interface', 'prompt_toolkit.application', 'prompt_toolkit.buffer', 'prompt_toolkit.auto_suggest', 'prompt_toolkit.filters', 'prompt_toolkit.filters.base', 'prompt_toolkit.utils', 'wcwidth', 'wcwidth.wcwidth', 'wcwidth.table_wide', 'wcwidth.table_zero', 'six.moves', 'prompt_toolkit.filters.cli', 'prompt_toolkit.enums', 'prompt_toolkit.key_binding', 'prompt_toolkit.key_binding.vi_state', 'prompt_toolkit.cache', 'prompt_toolkit.filters.types', 'prompt_toolkit.filters.utils', 'prompt_toolkit.clipboard', 'prompt_toolkit.clipboard.base', 'prompt_toolkit.selection', 'prompt_toolkit.clipboard.in_memory', 'prompt_toolkit.completion', 'prompt_toolkit.document', 'prompt_toolkit.history', 'prompt_toolkit.search_state', 'prompt_toolkit.validation', 'prompt_toolkit.buffer_mapping', 'prompt_toolkit.key_binding.bindings', 'prompt_toolkit.key_binding.bindings.basic', 'prompt_toolkit.keys', 'prompt_toolkit.layout', 'prompt_toolkit.layout.containers', 'prompt_toolkit.layout.controls', 'prompt_toolkit.mouse_events', 'prompt_toolkit.token', 'prompt_toolkit.layout.lexers', 'prompt_toolkit.layout.utils', 'prompt_toolkit.layout.processors', 'prompt_toolkit.reactive', 'prompt_toolkit.layout.screen', 'prompt_toolkit.layout.dimension', 'prompt_toolkit.layout.margins', 'prompt_toolkit.renderer', 'prompt_toolkit.layout.mouse_handlers', 'prompt_toolkit.output', 'prompt_toolkit.styles', 'prompt_toolkit.styles.base', 'prompt_toolkit.styles.defaults', 'prompt_toolkit.styles.from_dict', 'prompt_toolkit.styles.utils', 'prompt_toolkit.styles.from_pygments', 'pygments.style', 'pygments.styles.default', 'prompt_toolkit.key_binding.bindings.named_commands', 'prompt_toolkit.key_binding.bindings.completion', 'prompt_toolkit.key_binding.registry', 'prompt_toolkit.key_binding.input_processor', 'prompt_toolkit.key_binding.bindings.emacs', 'prompt_toolkit.key_binding.bindings.scroll', 'prompt_toolkit.key_binding.bindings.vi', 'prompt_toolkit.key_binding.digraphs', 'prompt_toolkit.key_binding.defaults', 'prompt_toolkit.eventloop', 'prompt_toolkit.eventloop.base', 'prompt_toolkit.eventloop.callbacks', 'prompt_toolkit.input', 'prompt_toolkit.terminal', 'prompt_toolkit.terminal.vt100_input', 'prompt_toolkit.shortcuts', 'prompt_toolkit.layout.menus', 'prompt_toolkit.layout.prompt', 'prompt_toolkit.layout.toolbars', 'prompt_toolkit.terminal.vt100_output', 'array', 'prompt_toolkit.key_binding.manager', 'IPython.terminal.debugger', 'IPython.core.completer', 'unicodedata', 'IPython.core.latex_symbols', 'IPython.utils.generics', 'simplegeneric', 'jedi', 'jedi.api', 'parso', 'parso.parser', 'parso.tree', 'parso._compatibility', 'parso.pgen2', 'parso.pgen2.generator', 'parso.pgen2.grammar_parser', 'parso.python', 'parso.python.tokenize', 'parso.python.token', 'parso.utils', 'parso.grammar', 'parso.python.diff', 'difflib', 'parso.python.parser', 'parso.python.tree', 'parso.python.prefix', 'parso.cache', 'gc', 'parso.python.errors', 'parso.normalizer', 'parso.python.pep8', 'jedi._compatibility', 'queue', 'jedi.parser_utils', 'jedi.debug', 'jedi.settings', 'jedi.cache', 'jedi.api.classes', 'jedi.evaluate', 'jedi.evaluate.utils', 'jedi.evaluate.imports', 'jedi.evaluate.sys_path', 'jedi.evaluate.cache', 'jedi.evaluate.base_context', 'jedi.common', 'jedi.common.context', 'jedi.evaluate.helpers', 'jedi.common.utils', 'jedi.evaluate.compiled', 'jedi.evaluate.compiled.context', 'jedi.evaluate.filters', 'jedi.evaluate.flow_analysis', 'jedi.evaluate.recursion', 'jedi.evaluate.lazy_context', 'jedi.evaluate.compiled.access', 'jedi.evaluate.compiled.getattr_static', 'jedi.evaluate.compiled.fake', 'jedi.evaluate.analysis', 'jedi.evaluate.context', 'jedi.evaluate.context.module', 'jedi.evaluate.context.klass', 'jedi.evaluate.context.function', 'jedi.evaluate.docstrings', 'jedi.evaluate.pep0484', 'jedi.evaluate.arguments', 'jedi.evaluate.context.iterable', 'jedi.evaluate.param', 'jedi.evaluate.context.asynchronous', 'jedi.evaluate.parser_cache', 'jedi.evaluate.context.instance', 'jedi.evaluate.syntax_tree', 'jedi.evaluate.finder', 'jedi.api.keywords', 'pydoc_data', 'pydoc_data.topics', 'jedi.api.interpreter', 'jedi.evaluate.compiled.mixed', 'jedi.api.helpers', 'jedi.api.completion', 'jedi.api.environment', 'filecmp', 'jedi.evaluate.compiled.subprocess', 'socket', '_socket', 'jedi.evaluate.compiled.subprocess.functions', 'jedi.api.exceptions', 'jedi.api.project', 'jedi.evaluate.usages', 'IPython.terminal.ptutils', 'IPython.terminal.shortcuts', 'IPython.terminal.magics', 'IPython.lib.clipboard', 'IPython.terminal.pt_inputhooks', 'IPython.terminal.prompts', 'IPython.terminal.ipapp', 'IPython.core.magics', 'IPython.core.magics.auto', 'IPython.core.magics.basic', 'IPython.core.magics.code', 'urllib.request', 'base64', 'email', 'http', 'http.client', 'email.parser', 'email.feedparser', 'email.errors', 'email._policybase', 'email.header', 'email.quoprimime', 'email.base64mime', 'email.charset', 'email.encoders', 'quopri', 'email.utils', 'email._parseaddr', 'calendar', 'email.message', 'uu', 'email._encoded_words', 'email.iterators', 'ssl', 'ipaddress', '_ssl', 'urllib.error', 'urllib.response', 'IPython.core.magics.config', 'IPython.core.magics.display', 'IPython.core.magics.execution', 'timeit', 'cProfile', '_lsprof', 'profile', 'optparse', 'pstats', 'IPython.utils.module_paths', 'imp', 'IPython.utils.timing', 'IPython.core.magics.extension', 'IPython.core.magics.history', 'IPython.core.magics.logging', 'IPython.core.magics.namespace', 'IPython.core.magics.osm', 'IPython.core.magics.pylab', 'IPython.core.pylabtools', 'IPython.core.magics.script', 'IPython.lib.backgroundjobs', 'IPython.core.shellapp', 'IPython.extensions', 'IPython.extensions.storemagic', 'IPython.utils.frame', 'jupyter_client', 'jupyter_client._version', 'jupyter_client.connect', 'zmq', 'ctypes', '_ctypes', 'ctypes._endian', 'zmq.backend', 'zmq.backend.select', 'zmq.backend.cython', 'cython_runtime', 'zmq.backend.cython.constants', '_cython_0_28_5', 'zmq.backend.cython.error', 'zmq.backend.cython.message', 'zmq.error', 'zmq.backend.cython.context', 'zmq.backend.cython.socket', 'zmq.backend.cython.utils', 'zmq.backend.cython._poll', 'zmq.backend.cython._version', 'zmq.backend.cython._device', 'zmq.sugar', 'zmq.sugar.constants', 'zmq.utils', 'zmq.utils.constant_names', 'zmq.sugar.context', 'zmq.sugar.attrsettr', 'zmq.sugar.socket', 'zmq.sugar.poll', 'zmq.utils.jsonapi', 'zmq.utils.strtypes', 'zmq.sugar.frame', 'zmq.sugar.tracker', 'zmq.sugar.version', 'zmq.sugar.stopwatch', 'jupyter_client.localinterfaces', 'jupyter_core', 'jupyter_core.version', 'jupyter_core.paths', 'jupyter_client.launcher', 'traitlets.log', 'jupyter_client.client', 'jupyter_client.channels', 'jupyter_client.channelsabc', 'jupyter_client.clientabc', 'jupyter_client.manager', 'jupyter_client.kernelspec', 'jupyter_client.managerabc', 'jupyter_client.blocking', 'jupyter_client.blocking.client', 'jupyter_client.blocking.channels', 'jupyter_client.multikernelmanager', 'uuid', 'ctypes.util', 'ipykernel.kernelapp', 'tornado', 'tornado.ioloop', 'numbers', 'tornado.concurrent', 'tornado.log', 'logging.handlers', 'tornado.escape', 'tornado.util', 'tornado.speedups', 'curses', '_curses', 'tornado.stack_context', 'concurrent', 'concurrent.futures', 'concurrent.futures._base', 'concurrent.futures.process', 'multiprocessing', 'multiprocessing.context', 'multiprocessing.process', 'multiprocessing.reduction', '__mp_main__', 'multiprocessing.connection', '_multiprocessing', 'multiprocessing.util', 'concurrent.futures.thread', 'asyncio', 'asyncio.base_events', 'asyncio.compat', 'asyncio.coroutines', 'asyncio.constants', 'asyncio.events', 'asyncio.base_futures', 'asyncio.log', 'asyncio.futures', 'asyncio.base_tasks', '_asyncio', 'asyncio.tasks', 'asyncio.locks', 'asyncio.protocols', 'asyncio.queues', 'asyncio.streams', 'asyncio.subprocess', 'asyncio.transports', 'asyncio.unix_events', 'asyncio.base_subprocess', 'asyncio.selector_events', 'asyncio.sslproto', 'tornado.platform', 'tornado.platform.auto', 'tornado.platform.posix', 'tornado.platform.common', 'tornado.platform.interface', 'zmq.eventloop', 'zmq.eventloop.ioloop', 'tornado.platform.asyncio', 'tornado.gen', 'zmq.eventloop.zmqstream', 'ipykernel.iostream', 'jupyter_client.session', 'hmac', 'jupyter_client.jsonutil', 'dateutil', 'dateutil._version', 'dateutil.parser', 'dateutil.parser._parser', 'decimal', '_decimal', 'dateutil.relativedelta', 'dateutil._common', 'dateutil.tz', 'dateutil.tz.tz', 'dateutil.tz._common', 'dateutil.tz._factories', 'dateutil.parser.isoparser', '_strptime', 'jupyter_client.adapter', 'ipykernel.heartbeat', 'ipykernel.ipkernel', 'IPython.utils.tokenutil', 'ipykernel.comm', 'ipykernel.comm.manager', 'ipykernel.comm.comm', 'ipykernel.kernelbase', 'ipykernel.jsonutil', 'ipykernel.zmqshell', 'IPython.core.payloadpage', 'ipykernel.displayhook', 'ipykernel.parentpoller', 'faulthandler', 'ipykernel.datapub', 'ipykernel.serialize', 'ipykernel.pickleutil', 'ipykernel.codeutil', 'IPython.core.completerlib', 'storemagic', 'ipywidgets', 'ipywidgets._version', 'ipywidgets.widgets', 'ipywidgets.widgets.widget', 'ipywidgets.widgets.domwidget', 'ipywidgets.widgets.trait_types', 'ipywidgets.widgets.widget_layout', 'ipywidgets.widgets.widget_style', 'ipywidgets.widgets.valuewidget', 'ipywidgets.widgets.widget_core', 'ipywidgets.widgets.widget_bool', 'ipywidgets.widgets.widget_description', 'ipywidgets.widgets.widget_button', 'ipywidgets.widgets.widget_box', 'ipywidgets.widgets.docutils', 'ipywidgets.widgets.widget_float', 'ipywidgets.widgets.widget_int', 'ipywidgets.widgets.widget_color', 'ipywidgets.widgets.widget_date', 'ipywidgets.widgets.widget_output', 'ipywidgets.widgets.widget_selection', 'ipywidgets.widgets.widget_selectioncontainer', 'ipywidgets.widgets.widget_string', 'ipywidgets.widgets.widget_controller', 'ipywidgets.widgets.interaction', 'ipywidgets.widgets.widget_link', 'ipywidgets.widgets.widget_media', 'skluc', 'skluc.main', 'skluc.main.data', 'skluc.main.data.mldatasets', 'skluc.main.data.mldatasets.Cifar100FineDataset', 'tarfile', 'numpy', 'numpy._globals', 'numpy.__config__', 'numpy.version', 'numpy._import_tools', 'numpy.add_newdocs', 'numpy.lib', 'numpy.lib.info', 'numpy.lib.type_check', 'numpy.core', 'numpy.core.info', 'numpy.core.multiarray', 'numpy.core.umath', 'numpy.core._internal', 'numpy.compat', 'numpy.compat._inspect', 'numpy.compat.py3k', 'numpy.core.numerictypes', 'numpy.core.numeric', 'numpy.core.fromnumeric', 'numpy.core._methods', 'numpy.core.arrayprint', 'numpy.core.defchararray', 'numpy.core.records', 'numpy.core.memmap', 'numpy.core.function_base', 'numpy.core.machar', 'numpy.core.getlimits', 'numpy.core.shape_base', 'numpy.core.einsumfunc', 'numpy.testing', 'unittest', 'unittest.result', 'unittest.util', 'unittest.case', 'unittest.suite', 'unittest.loader', 'unittest.main', 'unittest.runner', 'unittest.signals', 'numpy.testing.decorators', 'numpy.testing.nose_tools', 'numpy.testing.nose_tools.decorators', 'numpy.testing.nose_tools.utils', 'numpy.lib.utils', 'numpy.testing.nosetester', 'numpy.testing.nose_tools.nosetester', 'numpy.testing.utils', 'numpy.lib.ufunclike', 'numpy.lib.index_tricks', 'numpy.lib.function_base', 'numpy.lib.twodim_base', 'numpy.matrixlib', 'numpy.matrixlib.defmatrix', 'numpy.lib.stride_tricks', 'numpy.lib.mixins', 'numpy.lib.nanfunctions', 'numpy.lib.shape_base', 'numpy.lib.scimath', 'numpy.lib.polynomial', 'numpy.linalg', 'numpy.linalg.info', 'numpy.linalg.linalg', 'numpy.linalg.lapack_lite', 'numpy.linalg._umath_linalg', 'numpy.lib.arraysetops', 'numpy.lib.npyio', 'numpy.lib.format', 'numpy.lib._datasource', 'numpy.lib._iotools', 'numpy.lib.financial', 'numpy.lib.arrayterator', 'numpy.lib.arraypad', 'numpy.lib._version', 'numpy._distributor_init', 'numpy._mklinit', 'numpy.fft', 'numpy.fft.info', 'numpy.fft.fftpack', 'numpy.fft.fftpack_lite', 'numpy.fft.helper', 'mkl_fft', '_cython_0_28_4', 'mkl_fft._pydfti', 'numpy.core.multiarray_tests', 'mkl_fft._numpy_fft', 'numpy.polynomial', 'numpy.polynomial.polynomial', 'numpy.polynomial.polyutils', 'numpy.polynomial._polybase', 'numpy.polynomial.chebyshev', 'numpy.polynomial.legendre', 'numpy.polynomial.hermite', 'numpy.polynomial.hermite_e', 'numpy.polynomial.laguerre', 'numpy.random', 'numpy.random.info', 'mtrand', 'numpy.random.mtrand', 'numpy.ctypeslib', 'numpy.ma', 'numpy.ma.core', 'numpy.ma.extras', 'skluc.main.data.mldatasets.ImageDataset', 'skluc.main.data.mldatasets.Dataset', 'sklearn', 'sklearn.__check_build', 'sklearn.__check_build._check_build', 'sklearn.base', 'scipy', 'scipy._distributor_init', 'scipy.__config__', 'scipy.version', 'scipy._lib', 'scipy._lib._testutils', 'scipy._lib._version', 'scipy._lib.six', 'scipy._lib._ccallback', 'scipy._lib._ccallback_c', 'scipy.sparse', 'scipy.sparse.base', 'scipy._lib._numpy_compat', 'scipy.sparse.sputils', 'scipy.sparse.csr', 'scipy.sparse._sparsetools', 'scipy.sparse.compressed', 'scipy._lib._util', 'scipy.sparse.data', 'scipy.sparse.dia', 'scipy.sparse.csc', 'scipy.sparse.lil', 'scipy.sparse._csparsetools', 'scipy.sparse.dok', 'scipy.sparse.coo', 'scipy.sparse.bsr', 'scipy.sparse.construct', 'scipy.sparse.extract', 'scipy.sparse._matrix_io', 'scipy.sparse.csgraph', 'scipy.sparse.csgraph._laplacian', '_cython_0_28_3', 'scipy.sparse.csgraph._shortest_path', 'scipy.sparse.csgraph._validation', 'scipy.sparse.csgraph._tools', 'scipy.sparse.csgraph._traversal', 'scipy.sparse.csgraph._min_spanning_tree', 'scipy.sparse.csgraph._reordering', 'sklearn.externals', 'sklearn.externals.six', 'sklearn.externals.six.moves', 'sklearn.externals.six.moves.urllib_parse', 'sklearn.externals.six.moves.urllib.parse', 'sklearn.externals.six.moves.urllib_error', 'sklearn.externals.six.moves.urllib.error', 'sklearn.externals.six.moves.urllib_request', 'sklearn.externals.six.moves.urllib.request', 'sklearn.externals.six.moves.urllib_response', 'sklearn.externals.six.moves.urllib.response', 'sklearn.externals.six.moves.urllib_robotparser', 'sklearn.externals.six.moves.urllib.robotparser', 'sklearn.externals.six.moves.urllib', 'sklearn.utils', 'sklearn.utils.murmurhash', 'sklearn.utils.validation', 'sklearn.utils.fixes', 'scipy.sparse.linalg', 'scipy.sparse.linalg.isolve', 'scipy.sparse.linalg.isolve.iterative', 'scipy.sparse.linalg.isolve._iterative', 'scipy.sparse.linalg.interface', 'scipy._lib.decorator', 'scipy.sparse.linalg.isolve.utils', 'scipy._lib._threadsafety', 'scipy.sparse.linalg.isolve.minres', 'scipy.sparse.linalg.isolve.lgmres', 'scipy.linalg', 'scipy.linalg.linalg_version', 'scipy.linalg.misc', 'scipy.linalg.blas', 'scipy.linalg._fblas', 'scipy.linalg.lapack', 'scipy.linalg._flapack', 'scipy.linalg.basic', 'scipy.linalg.flinalg', 'scipy.linalg._flinalg', 'scipy.linalg.decomp', 'scipy.linalg.decomp_svd', 'scipy.linalg._solve_toeplitz', 'scipy.linalg.decomp_lu', 'scipy.linalg._decomp_ldl', 'scipy.linalg.decomp_cholesky', 'scipy.linalg.decomp_qr', 'scipy.linalg._decomp_qz', 'scipy.linalg.decomp_schur', 'scipy.linalg._decomp_polar', 'scipy.linalg.matfuncs', 'scipy.linalg.special_matrices', 'scipy.linalg._expm_frechet', 'scipy.linalg._matfuncs_sqrtm', 'scipy.linalg._solvers', 'scipy.linalg._procrustes', 'scipy.linalg._decomp_update', 'scipy.linalg.cython_blas', 'scipy.linalg.cython_lapack', 'scipy.linalg._sketches', 'numpy.dual', 'scipy.sparse.linalg.isolve._gcrotmk', 'scipy.sparse.linalg.isolve.lsqr', 'scipy.sparse.linalg.isolve.lsmr', 'scipy.sparse.linalg.dsolve', 'scipy.sparse.linalg.dsolve.linsolve', 'scipy.sparse.linalg.dsolve._superlu', 'scipy.sparse.linalg.dsolve._add_newdocs', 'scipy.sparse.linalg.eigen', 'scipy.sparse.linalg.eigen.arpack', 'scipy.sparse.linalg.eigen.arpack.arpack', 'scipy.sparse.linalg.eigen.arpack._arpack', 'scipy.sparse.linalg.eigen.lobpcg', 'scipy.sparse.linalg.eigen.lobpcg.lobpcg', 'scipy.sparse.linalg.matfuncs', 'scipy.special', 'scipy.special.sf_error', 'scipy.special._ufuncs', 'scipy.special._ufuncs_cxx', 'scipy.special.basic', 'scipy.special.specfun', 'scipy.special.orthogonal', 'scipy.special._comb', 'scipy.special._logsumexp', 'scipy.special.spfun_stats', 'scipy.special._ellip_harm', 'scipy.special._ellip_harm_2', 'scipy.special.lambertw', 'scipy.special._spherical_bessel', 'scipy.sparse.linalg._onenormest', 'scipy.sparse.linalg._norm', 'scipy.sparse.linalg._expm_multiply', 'sklearn.exceptions', 'sklearn.externals.joblib', 'sklearn.externals.joblib.memory', 'sklearn.externals.joblib.hashing', 'sklearn.externals.joblib._compat', 'sklearn.externals.joblib.func_inspect', 'sklearn.externals.joblib.logger', 'sklearn.externals.joblib.disk', 'sklearn.externals.joblib._memory_helpers', 'sklearn.externals.joblib.numpy_pickle', 'sklearn.externals.joblib.numpy_pickle_utils', 'gzip', 'sklearn.externals.joblib.numpy_pickle_compat', 'sklearn.externals.joblib.backports', 'distutils', 'distutils.version', 'sklearn.externals.joblib.parallel', 'sklearn.externals.joblib._multiprocessing_helpers', 'multiprocessing.synchronize', 'sklearn.externals.joblib.format_stack', 'sklearn.externals.joblib.my_exceptions', 'sklearn.externals.joblib._parallel_backends', 'sklearn.externals.joblib.pool', 'mmap', 'multiprocessing.pool', 'sklearn.utils.class_weight', 'sklearn.utils.deprecation', 'sklearn.preprocessing', 'sklearn.preprocessing._function_transformer', 'sklearn.preprocessing.data', 'scipy.stats', 'scipy.stats.stats', 'scipy.stats.distributions', 'scipy.stats._distn_infrastructure', 'scipy.misc', 'scipy.misc.doccer', 'scipy.misc.common', 'scipy.interpolate', 'scipy.interpolate.interpolate', 'scipy.interpolate.fitpack', 'scipy.interpolate._fitpack_impl', 'scipy.interpolate._fitpack', 'scipy.interpolate.dfitpack', 'scipy.interpolate._bsplines', 'scipy.interpolate._bspl', 'scipy.interpolate.polyint', 'scipy.interpolate._ppoly', 'scipy.interpolate.fitpack2', 'scipy.interpolate.interpnd', 'scipy.spatial', 'scipy.spatial.kdtree', 'scipy.spatial.ckdtree', 'scipy.spatial.qhull', 'scipy._lib.messagestream', 'scipy.spatial._spherical_voronoi', 'numpy.matlib', 'scipy.spatial._voronoi', 'scipy.spatial.distance', 'scipy.spatial._distance_wrap', 'scipy.spatial._hausdorff', 'scipy.spatial._plotutils', 'scipy.spatial._procrustes', 'scipy.interpolate.rbf', 'scipy.interpolate._cubic', 'scipy.interpolate.ndgriddata', 'scipy.interpolate._pade', 'scipy.misc.pilutil', 'PIL', 'PIL._version', 'PIL.Image', 'PIL._util', 'PIL._imaging', 'PIL.ImageMode', 'PIL._binary', 'cffi', 'cffi.api', 'cffi.lock', 'cffi.error', 'cffi.model', 'PIL.ImageFilter', 'scipy.stats._distr_params', 'scipy.optimize', 'scipy.optimize.optimize', 'scipy.optimize.linesearch', 'scipy.optimize.minpack2', 'scipy.optimize._minimize', 'scipy.optimize._trustregion_dogleg', 'scipy.optimize._trustregion', 'scipy.optimize._trustregion_ncg', 'scipy.optimize._trustregion_krylov', 'scipy.optimize._trlib', 'scipy.optimize._trlib._trlib', 'scipy.optimize._trustregion_exact', 'scipy.optimize._trustregion_constr', 'scipy.optimize._trustregion_constr.minimize_trustregion_constr', 'scipy.optimize._differentiable_functions', 'scipy.optimize._numdiff', 'scipy.optimize._group_columns', 'scipy.optimize._hessian_update_strategy', 'scipy.optimize._constraints', 'scipy.optimize._trustregion_constr.equality_constrained_sqp', 'scipy.optimize._trustregion_constr.projections', 'scipy.optimize._trustregion_constr.qp_subproblem', 'scipy.optimize._trustregion_constr.canonical_constraint', 'scipy.optimize._trustregion_constr.tr_interior_point', 'scipy.optimize._trustregion_constr.report', 'scipy.optimize.lbfgsb', 'scipy.optimize._lbfgsb', 'scipy.optimize.tnc', 'scipy.optimize.moduleTNC', 'scipy.optimize.cobyla', 'scipy.optimize._cobyla', 'scipy.optimize.slsqp', 'scipy.optimize._slsqp', 'scipy.optimize._root', 'scipy.optimize.minpack', 'scipy.optimize._minpack', 'scipy.optimize._lsq', 'scipy.optimize._lsq.least_squares', 'scipy.optimize._lsq.trf', 'scipy.optimize._lsq.common', 'scipy.optimize._lsq.dogbox', 'scipy.optimize._lsq.lsq_linear', 'scipy.optimize._lsq.trf_linear', 'scipy.optimize._lsq.givens_elimination', 'scipy.optimize._lsq.bvls', 'scipy.optimize._spectral', 'scipy.optimize.nonlin', 'scipy.optimize.zeros', 'scipy.optimize._zeros', 'scipy.optimize.nnls', 'scipy.optimize._nnls', 'scipy.optimize._basinhopping', 'scipy.optimize._linprog', 'scipy.optimize._linprog_ip', 'scipy.optimize._remove_redundancy', 'scipy.optimize._hungarian', 'scipy.optimize._differentialevolution', 'scipy.integrate', 'scipy.integrate.quadrature', 'scipy.integrate.odepack', 'scipy.integrate._odepack', 'scipy.integrate.quadpack', 'scipy.integrate._quadpack', 'scipy.integrate._ode', 'scipy.integrate.vode', 'scipy.integrate._dop', 'scipy.integrate.lsoda', 'scipy.integrate._bvp', 'scipy.integrate._ivp', 'scipy.integrate._ivp.ivp', 'scipy.integrate._ivp.bdf', 'scipy.integrate._ivp.common', 'scipy.integrate._ivp.base', 'scipy.integrate._ivp.radau', 'scipy.integrate._ivp.rk', 'scipy.integrate._ivp.lsoda', 'scipy.stats._constants', 'scipy.stats._continuous_distns', 'scipy.stats._stats', 'scipy.stats._tukeylambda_stats', 'scipy.stats._discrete_distns', 'scipy.stats.mstats_basic', 'scipy.stats._stats_mstats_common', 'scipy.stats.morestats', 'scipy.stats.statlib', 'scipy.stats.contingency', 'scipy.stats._binned_statistic', 'scipy.stats.kde', 'scipy.stats.mvn', 'scipy.stats.mstats', 'scipy.stats.mstats_extras', 'scipy.stats._multivariate', 'sklearn.utils.extmath', 'sklearn.utils._logistic_sigmoid', 'sklearn.utils.sparsefuncs_fast', 'sklearn.utils.sparsefuncs', 'sklearn.preprocessing.label', 'sklearn.utils.multiclass', 'sklearn.preprocessing.imputation', 'skluc.main.utils', 'daiquiri', 'logging.config', 'socketserver', 'daiquiri.output', 'syslog', 'daiquiri.formatter', 'daiquiri.handlers', 'psutil', 'psutil._common', 'psutil._compat', 'psutil._exceptions', 'psutil._pslinux', 'psutil._psposix', 'psutil._psutil_linux', 'psutil._psutil_posix', 'sklearn.metrics', 'sklearn.metrics.ranking', 'sklearn.metrics.base', 'sklearn.metrics.classification', 'sklearn.metrics.cluster', 'sklearn.metrics.cluster.supervised', 'sklearn.metrics.cluster.expected_mutual_info_fast', 'sklearn.utils.lgamma', 'sklearn.metrics.cluster.unsupervised', 'sklearn.metrics.pairwise', 'sklearn.metrics.pairwise_fast', 'sklearn.metrics.cluster.bicluster', 'sklearn.utils.linear_assignment_', 'sklearn.metrics.regression', 'sklearn.metrics.scorer', 'skluc.main.data.mldatasets.Cifar10Dataset', 'matplotlib', 'matplotlib.cbook', 'matplotlib.cbook.deprecation', 'matplotlib.cbook._backports', 'matplotlib.compat', 'matplotlib.compat.subprocess', 'matplotlib.rcsetup', 'matplotlib.testing', 'matplotlib.fontconfig_pattern', 'pyparsing', 'matplotlib.colors', 'matplotlib._color_data', 'cycler', 'six.moves.urllib', 'six.moves.urllib.request', 'matplotlib._version']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-10-19 13:26:27,853 [21033] DEBUG matplotlib: CACHEDIR=/home/luc/.cache/matplotlib\n", + "2018-10-19 13:26:27,857 [21033] DEBUG matplotlib.font_manager: Using fontManager instance from /home/luc/.cache/matplotlib/fontList.json\n", + "2018-10-19 13:26:27,952 [21033] DEBUG matplotlib.backends: backend module://ipykernel.pylab.backend_inline version unknown\n", + "2018-10-19 13:26:28,922 [21033] DEBUG matplotlib.backends: backend module://ipykernel.pylab.backend_inline version unknown\n" + ] + } + ], + "source": [ + "import skluc.main.data.mldatasets as dataset\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense\n", + "from tensorflow.keras.models import Sequential\n", + "from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd\n", + "from skluc.main.utils import logger, memory_usage\n", + "from skluc.main.tensorflow_.utils import batch_generator\n", + "import time as t\n", + "\n", + "NUM_EPOCH = 10\n", + "BATCH_SIZE = 128\n", + "VALIDATION_SIZE = 10000\n", + "SEED_TRAIN_VALIDATION = 0\n", + "SUBSAMPLE_SIZE = 64\n", + "KERNEL_NAME = 'chi2_cpd'\n", + "kernel_dict = {}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data loading" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-10-19 13:26:28,933 [21033] INFO root: Loading dataset mnist\n", + "2018-10-19 13:26:28,935 [21033] DEBUG root: Creating directory /home/luc/ml_datasets/mnist if needed\n", + "2018-10-19 13:26:28,936 [21033] DEBUG root: Check existence of files ['/home/luc/ml_datasets/mnist/train-images-idx3-ubyte.gz', '/home/luc/ml_datasets/mnist/train-labels-idx1-ubyte.gz', '/home/luc/ml_datasets/mnist/t10k-images-idx3-ubyte.gz', '/home/luc/ml_datasets/mnist/t10k-labels-idx1-ubyte.gz']\n", + "2018-10-19 13:26:28,937 [21033] DEBUG root: Files ['/home/luc/ml_datasets/mnist/train-images-idx3-ubyte.gz', '/home/luc/ml_datasets/mnist/train-labels-idx1-ubyte.gz', '/home/luc/ml_datasets/mnist/t10k-images-idx3-ubyte.gz', '/home/luc/ml_datasets/mnist/t10k-labels-idx1-ubyte.gz'] already exist\n", + "2018-10-19 13:26:28,938 [21033] INFO root: Read gziped ubyte file /home/luc/ml_datasets/mnist/train-images-idx3-ubyte.gz\n", + "2018-10-19 13:26:28,940 [21033] INFO root: Read gziped ubyte file /home/luc/ml_datasets/mnist/train-labels-idx1-ubyte.gz\n", + "2018-10-19 13:26:29,250 [21033] INFO root: Read gziped ubyte file /home/luc/ml_datasets/mnist/t10k-images-idx3-ubyte.gz\n", + "2018-10-19 13:26:29,251 [21033] INFO root: Read gziped ubyte file /home/luc/ml_datasets/mnist/t10k-labels-idx1-ubyte.gz\n", + "2018-10-19 13:26:29,293 [21033] DEBUG root: Validation size < data length (10000 < 60000)\n", + "2018-10-19 13:26:29,294 [21033] DEBUG root: Construction of random train indexes (seed: 0)\n", + "2018-10-19 13:26:29,295 [21033] DEBUG root: Construction of random test indexes (seed: 0)\n", + "2018-10-19 13:26:29,296 [21033] DEBUG root: Dataset size: 60000\n", + "2018-10-19 13:26:29,301 [21033] INFO root: Apply one hot encoding to dataset mnist.\n", + "2018-10-19 13:26:29,302 [21033] DEBUG root: Apply one hot encoding to _train data of mnist dataset\n", + "2018-10-19 13:26:29,310 [21033] DEBUG root: Apply one hot encoding to _test data of mnist dataset\n", + "2018-10-19 13:26:29,314 [21033] DEBUG root: Images vec shape: (60000, 784)\n", + "2018-10-19 13:26:29,315 [21033] DEBUG root: Images mat shape: (60000, 28, 28, 1)\n", + "2018-10-19 13:26:29,319 [21033] DEBUG root: Images vec shape: (10000, 784)\n", + "2018-10-19 13:26:29,320 [21033] DEBUG root: Images mat shape: (10000, 28, 28, 1)\n", + "2018-10-19 13:26:29,324 [21033] INFO root: Change type of data to <class 'numpy.float32'> in the dataset mnist.\n", + "2018-10-19 13:26:29,324 [21033] DEBUG root: Change type of _train data to <class 'numpy.float32'> in the dataset mnist.\n", + "2018-10-19 13:26:29,328 [21033] DEBUG root: _train data was of type uint8\n", + "2018-10-19 13:26:29,397 [21033] DEBUG root: _train data is now of type float32\n", + "2018-10-19 13:26:29,401 [21033] DEBUG root: Change type of _test data to <class 'numpy.float32'> in the dataset mnist.\n", + "2018-10-19 13:26:29,404 [21033] DEBUG root: _test data was of type uint8\n", + "2018-10-19 13:26:29,416 [21033] DEBUG root: _test data is now of type float32\n", + "2018-10-19 13:26:29,417 [21033] INFO root: Change type of labels to <class 'numpy.float32'> in the dataset mnist.\n", + "2018-10-19 13:26:29,421 [21033] DEBUG root: Change type of _train labels to <class 'numpy.float32'> in the dataset mnist.\n", + "2018-10-19 13:26:29,422 [21033] DEBUG root: _train labels were of type int64\n", + "2018-10-19 13:26:29,424 [21033] DEBUG root: _train labels are now of type float32\n", + "2018-10-19 13:26:29,426 [21033] DEBUG root: Change type of _test labels to <class 'numpy.float32'> in the dataset mnist.\n", + "2018-10-19 13:26:29,431 [21033] DEBUG root: _test labels were of type int64\n", + "2018-10-19 13:26:29,431 [21033] DEBUG root: _test labels are now of type float32\n", + "2018-10-19 13:26:29,433 [21033] INFO root: Apply normalization to data from dataset mnist.\n", + "2018-10-19 13:26:29,569 [21033] DEBUG root: Apply normalization to _train data of mnist dataset.\n", + "2018-10-19 13:26:29,600 [21033] DEBUG root: Apply normalization to _test data of mnist dataset.\n" + ] + } + ], + "source": [ + "data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION)\n", + "data.load()\n", + "data.to_one_hot()\n", + "data.to_image()\n", + "data.data_astype(np.float32)\n", + "data.labels_astype(np.float32)\n", + "data.normalize()\n", + "\n", + "X_train, y_train = data.train.data, data.train.labels\n", + "X_test, y_test = data.test.data, data.test.labels\n", + "X_val, y_val = data.validation.data, data.validation.labels" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(50000, 10)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Model definition" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def build_lenet_model(input_shape):\n", + " model = Sequential()\n", + " model.add(\n", + " Conv2D(6, (5, 5), padding='valid', activation='relu', kernel_initializer='he_normal', input_shape=input_shape))\n", + " model.add(MaxPooling2D((2, 2), strides=(2, 2)))\n", + " model.add(Conv2D(16, (5, 5), padding='valid', activation='relu', kernel_initializer='he_normal'))\n", + " model.add(MaxPooling2D((2, 2), strides=(2, 2)))\n", + " model.add(Flatten())\n", + " return model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-10-19 13:26:29,979 [21033] INFO root: Selecting deepstrom layer function with subsample size = 64, output_dim = 64, without activation function and kernel = chi2_cpd\n" + ] + } + ], + "source": [ + "input_dim = X_train.shape[1:]\n", + "output_dim = y_train.shape[1]\n", + "\n", + "x = tf.placeholder(tf.float32, shape=[None, *input_dim], name=\"x\")\n", + "y = tf.placeholder(tf.float32, shape=[None, output_dim], name=\"label\")\n", + "subs = tf.placeholder(tf.float32, shape=[SUBSAMPLE_SIZE, *input_dim], name=\"subsample\")\n", + "\n", + "convnet_model = build_lenet_model(x.shape[1:])\n", + "\n", + "repr_x = convnet_model(x)\n", + "repr_sub = convnet_model(subs)\n", + "\n", + "deepstrom_layer = DeepstromLayerEndToEnd(subsample_size=SUBSAMPLE_SIZE,\n", + " kernel_name=KERNEL_NAME)\n", + "\n", + "deepstrom_output = deepstrom_layer([repr_x, repr_sub])\n", + "\n", + "with tf.variable_scope(\"classification\"):\n", + " classif = Dense(output_dim)(deepstrom_output)\n", + "\n", + "# calcul de la loss\n", + "with tf.name_scope(\"xent\"):\n", + " cross_entropy = tf.reduce_mean(\n", + " tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name=\"xentropy\"),\n", + " name=\"xentropy_mean\")\n", + " tf.summary.scalar('loss-xent', cross_entropy)\n", + "\n", + "# todo learning rate as hyperparameter\n", + "# calcul du gradient\n", + "with tf.name_scope(\"train\"):\n", + " global_step = tf.Variable(0, name=\"global_step\", trainable=False)\n", + " train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy,\n", + " global_step=global_step)\n", + "\n", + "# calcul de l'accuracy\n", + "with tf.name_scope(\"accuracy\"):\n", + " predictions = tf.argmax(classif, 1)\n", + " correct_prediction = tf.equal(predictions, tf.argmax(y, 1))\n", + " accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", + " tf.summary.scalar(\"accuracy\", accuracy_op)\n", + "\n", + "merged_summary = tf.summary.merge_all()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-10-19 13:26:30,242 [21033] DEBUG root: Start finding subset indices of size 64 with uniform distribution of labels\n", + "2018-10-19 13:26:30,266 [21033] DEBUG root: Need 6 (+/- 1) example by label\n", + "2018-10-19 13:26:30,273 [21033] DEBUG root: After finding equal number (6) for example by labels (total = 60), need to find more examples to reach size 64\n" + ] + } + ], + "source": [ + "subsample_indexes = data.get_uniform_class_rand_indices_validation(SUBSAMPLE_SIZE)\n", + "nys_subsample = data.validation.data[subsample_indexes]\n", + "init = tf.global_variables_initializer()\n", + "summary_writer = tf.summary.FileWriter(\"debug_classification_end_to_end\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-10-19 13:26:30,308 [21033] INFO root: Start training\n", + "2018-10-19 13:26:30,390 [21033] DEBUG root: process = 2.669268992 total = 16.697475072 available = 6.650212352 used = 8.531468288 free = 2.257145856\n", + "2018-10-19 13:26:30,786 [21033] INFO root: epoch: 0/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 135.15185546875; acc: 0.09375\n", + "2018-10-19 13:26:40,643 [21033] INFO root: epoch: 0/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.3022894859313965; acc: 0.1640625\n", + "2018-10-19 13:26:50,082 [21033] INFO root: epoch: 0/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.302400827407837; acc: 0.125\n", + "2018-10-19 13:26:59,323 [21033] INFO root: epoch: 0/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.303340435028076; acc: 0.0546875\n", + "2018-10-19 13:27:07,533 [21033] DEBUG root: process = 3.064786944 total = 16.697475072 available = 6.555471872 used = 8.631795712 free = 2.150834176\n", + "2018-10-19 13:27:07,692 [21033] INFO root: epoch: 1/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.301776885986328; acc: 0.1328125\n", + "2018-10-19 13:27:17,066 [21033] INFO root: epoch: 1/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.301180601119995; acc: 0.1640625\n", + "2018-10-19 13:27:26,329 [21033] INFO root: epoch: 1/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.3020567893981934; acc: 0.125\n", + "2018-10-19 13:27:35,530 [21033] INFO root: epoch: 1/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.3043129444122314; acc: 0.0546875\n", + "2018-10-19 13:27:44,026 [21033] DEBUG root: process = 3.065049088 total = 16.697475072 available = 6.573920256 used = 8.613347328 free = 2.169237504\n", + "2018-10-19 13:27:44,196 [21033] INFO root: epoch: 2/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.301067352294922; acc: 0.1328125\n", + "2018-10-19 13:27:53,400 [21033] INFO root: epoch: 2/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.300229787826538; acc: 0.1640625\n", + "2018-10-19 13:28:02,700 [21033] INFO root: epoch: 2/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.3017892837524414; acc: 0.125\n", + "2018-10-19 13:28:12,191 [21033] INFO root: epoch: 2/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.305202007293701; acc: 0.0546875\n", + "2018-10-19 13:28:20,408 [21033] DEBUG root: process = 3.065573376 total = 16.697475072 available = 6.503186432 used = 8.684081152 free = 2.09844224\n", + "2018-10-19 13:28:20,605 [21033] INFO root: epoch: 3/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.3004751205444336; acc: 0.1328125\n", + "2018-10-19 13:28:30,416 [21033] INFO root: epoch: 3/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.2994225025177; acc: 0.1640625\n", + "2018-10-19 13:28:40,221 [21033] INFO root: epoch: 3/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.301581382751465; acc: 0.125\n", + "2018-10-19 13:28:49,637 [21033] INFO root: epoch: 3/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.306007146835327; acc: 0.0546875\n", + "2018-10-19 13:28:58,286 [21033] DEBUG root: process = 3.066097664 total = 16.697475072 available = 6.428352512 used = 8.724770816 free = 2.01736192\n", + "2018-10-19 13:28:58,494 [21033] INFO root: epoch: 4/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.2999792098999023; acc: 0.1328125\n", + "2018-10-19 13:29:08,571 [21033] INFO root: epoch: 4/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.298739433288574; acc: 0.1640625\n", + "2018-10-19 13:29:19,621 [21033] INFO root: epoch: 4/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.301421642303467; acc: 0.125\n", + "2018-10-19 13:29:29,278 [21033] INFO root: epoch: 4/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.3067288398742676; acc: 0.0546875\n", + "2018-10-19 13:29:39,028 [21033] DEBUG root: process = 3.066097664 total = 16.697475072 available = 6.257913856 used = 8.929456128 free = 1.839955968\n", + "2018-10-19 13:29:39,208 [21033] INFO root: epoch: 5/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.2995641231536865; acc: 0.1328125\n", + "2018-10-19 13:29:48,896 [21033] INFO root: epoch: 5/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.298161029815674; acc: 0.1640625\n", + "2018-10-19 13:29:58,203 [21033] INFO root: epoch: 5/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.3012990951538086; acc: 0.125\n", + "2018-10-19 13:30:08,695 [21033] INFO root: epoch: 5/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.307372570037842; acc: 0.0546875\n", + "2018-10-19 13:30:19,268 [21033] DEBUG root: process = 3.066097664 total = 16.697475072 available = 6.215753728 used = 8.969265152 free = 1.797505024\n", + "2018-10-19 13:30:19,509 [21033] INFO root: epoch: 6/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.299215793609619; acc: 0.1328125\n", + "2018-10-19 13:30:30,219 [21033] INFO root: epoch: 6/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.2976722717285156; acc: 0.1640625\n", + "2018-10-19 13:30:39,624 [21033] INFO root: epoch: 6/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.30120587348938; acc: 0.125\n", + "2018-10-19 13:30:49,742 [21033] INFO root: epoch: 6/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.3079419136047363; acc: 0.0546875\n", + "2018-10-19 13:30:59,754 [21033] DEBUG root: process = 3.066097664 total = 16.697475072 available = 6.317350912 used = 8.840716288 free = 1.893433344\n", + "2018-10-19 13:30:59,930 [21033] INFO root: epoch: 7/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.2989234924316406; acc: 0.1328125\n", + "2018-10-19 13:31:10,084 [21033] INFO root: epoch: 7/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.2972593307495117; acc: 0.1640625\n", + "2018-10-19 13:31:19,574 [21033] INFO root: epoch: 7/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.3011350631713867; acc: 0.125\n", + "2018-10-19 13:31:29,877 [21033] INFO root: epoch: 7/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.308443546295166; acc: 0.0546875\n", + "2018-10-19 13:31:38,583 [21033] DEBUG root: process = 3.066097664 total = 16.697475072 available = 6.329970688 used = 8.828215296 free = 1.9058688\n", + "2018-10-19 13:31:38,755 [21033] INFO root: epoch: 8/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.298677921295166; acc: 0.1328125\n", + "2018-10-19 13:31:48,235 [21033] INFO root: epoch: 8/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.296910047531128; acc: 0.1640625\n", + "2018-10-19 13:31:57,750 [21033] INFO root: epoch: 8/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.301081657409668; acc: 0.125\n", + "2018-10-19 13:32:07,934 [21033] INFO root: epoch: 8/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.3088831901550293; acc: 0.0546875\n", + "2018-10-19 13:32:17,172 [21033] DEBUG root: process = 3.066097664 total = 16.697475072 available = 6.257102848 used = 8.835477504 free = 1.832869888\n", + "2018-10-19 13:32:17,374 [21033] INFO root: epoch: 9/10; batch: 1/391; batch_shape: (128, 28, 28, 1); loss: 2.2984707355499268; acc: 0.1328125\n", + "2018-10-19 13:32:27,607 [21033] INFO root: epoch: 9/10; batch: 101/391; batch_shape: (128, 28, 28, 1); loss: 2.2966156005859375; acc: 0.1640625\n", + "2018-10-19 13:32:37,226 [21033] INFO root: epoch: 9/10; batch: 201/391; batch_shape: (128, 28, 28, 1); loss: 2.301042079925537; acc: 0.125\n", + "2018-10-19 13:32:47,503 [21033] INFO root: epoch: 9/10; batch: 301/391; batch_shape: (128, 28, 28, 1); loss: 2.309267520904541; acc: 0.0546875\n", + "2018-10-19 13:32:56,475 [21033] INFO root: Evaluation on validation data\n" + ] + }, + { + "ename": "InvalidArgumentError", + "evalue": "You must feed a value for placeholder tensor 'x_1' with dtype float and shape [64,28,28,1]\n\t [[Node: x_1 = Placeholder[dtype=DT_FLOAT, shape=[64,28,28,1], _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"]()]]\n\nCaused by op 'x_1', defined at:\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/runpy.py\", line 193, in _run_module_as_main\n \"__main__\", mod_spec)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/runpy.py\", line 85, in _run_code\n exec(code, run_globals)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel_launcher.py\", line 16, in <module>\n app.launch_new_instance()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/traitlets/config/application.py\", line 658, in launch_instance\n app.start()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/kernelapp.py\", line 497, in start\n self.io_loop.start()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tornado/platform/asyncio.py\", line 132, in start\n self.asyncio_loop.run_forever()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/asyncio/base_events.py\", line 422, in run_forever\n self._run_once()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/asyncio/base_events.py\", line 1434, in _run_once\n handle._run()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/asyncio/events.py\", line 145, in _run\n self._callback(*self._args)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tornado/ioloop.py\", line 758, in _run_callback\n ret = callback()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tornado/stack_context.py\", line 300, in null_wrapper\n return fn(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py\", line 536, in <lambda>\n self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py\", line 450, in _handle_events\n self._handle_recv()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py\", line 480, in _handle_recv\n self._run_callback(callback, msg)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py\", line 432, in _run_callback\n callback(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tornado/stack_context.py\", line 300, in null_wrapper\n return fn(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/kernelbase.py\", line 283, in dispatcher\n return self.dispatch_shell(stream, msg)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/kernelbase.py\", line 233, in dispatch_shell\n handler(stream, idents, msg)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/kernelbase.py\", line 399, in execute_request\n user_expressions, allow_stdin)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/ipkernel.py\", line 208, in do_execute\n res = shell.run_cell(code, store_history=store_history, silent=silent)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/zmqshell.py\", line 537, in run_cell\n return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2662, in run_cell\n raw_cell, store_history, silent, shell_futures)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2785, in _run_cell\n interactivity=interactivity, compiler=compiler, result=result)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2901, in run_ast_nodes\n if self.run_code(code, result):\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2961, in run_code\n exec(code_obj, self.user_global_ns, self.user_ns)\n File \"<ipython-input-5-068e95f301b0>\", line 6, in <module>\n subs = tf.placeholder(tf.float32, shape=[SUBSAMPLE_SIZE, *input_dim], name=\"x\")\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py\", line 1735, in placeholder\n return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py\", line 4925, in placeholder\n \"Placeholder\", dtype=dtype, shape=shape, name=name)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py\", line 787, in _apply_op_helper\n op_def=op_def)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py\", line 454, in new_func\n return func(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/framework/ops.py\", line 3155, in create_op\n op_def=op_def)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/framework/ops.py\", line 1717, in __init__\n self._traceback = tf_stack.extract_stack()\n\nInvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'x_1' with dtype float and shape [64,28,28,1]\n\t [[Node: x_1 = Placeholder[dtype=DT_FLOAT, shape=[64,28,28,1], _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"]()]]\n", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m~/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1277\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1278\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1279\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1262\u001b[0m return self._call_tf_sessionrun(\n\u001b[0;32m-> 1263\u001b[0;31m options, feed_dict, fetch_list, target_list, run_metadata)\n\u001b[0m\u001b[1;32m 1264\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_call_tf_sessionrun\u001b[0;34m(self, options, feed_dict, fetch_list, target_list, run_metadata)\u001b[0m\n\u001b[1;32m 1349\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1350\u001b[0;31m run_metadata)\n\u001b[0m\u001b[1;32m 1351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mInvalidArgumentError\u001b[0m: You must feed a value for placeholder tensor 'x_1' with dtype float and shape [64,28,28,1]\n\t [[Node: x_1 = Placeholder[dtype=DT_FLOAT, shape=[64,28,28,1], _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"]()]]", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-7-0bb168138279>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mX_batch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_batch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mbatch_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m accuracy = sess.run([accuracy_op], feed_dict={\n\u001b[0;32m---> 33\u001b[0;31m x: X_batch, y: Y_batch})\n\u001b[0m\u001b[1;32m 34\u001b[0m \u001b[0maccuracies_val\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 876\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 877\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 878\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 879\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1098\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mfeed_dict_tensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1099\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m-> 1100\u001b[0;31m feed_dict_tensor, options, run_metadata)\n\u001b[0m\u001b[1;32m 1101\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1102\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1271\u001b[0m return self._do_call(_run_fn, feeds, fetches, targets, options,\n\u001b[0;32m-> 1272\u001b[0;31m run_metadata)\n\u001b[0m\u001b[1;32m 1273\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1274\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_prun_fn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetches\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1289\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1290\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1291\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode_def\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1293\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_extend_graph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mInvalidArgumentError\u001b[0m: You must feed a value for placeholder tensor 'x_1' with dtype float and shape [64,28,28,1]\n\t [[Node: x_1 = Placeholder[dtype=DT_FLOAT, shape=[64,28,28,1], _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"]()]]\n\nCaused by op 'x_1', defined at:\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/runpy.py\", line 193, in _run_module_as_main\n \"__main__\", mod_spec)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/runpy.py\", line 85, in _run_code\n exec(code, run_globals)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel_launcher.py\", line 16, in <module>\n app.launch_new_instance()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/traitlets/config/application.py\", line 658, in launch_instance\n app.start()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/kernelapp.py\", line 497, in start\n self.io_loop.start()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tornado/platform/asyncio.py\", line 132, in start\n self.asyncio_loop.run_forever()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/asyncio/base_events.py\", line 422, in run_forever\n self._run_once()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/asyncio/base_events.py\", line 1434, in _run_once\n handle._run()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/asyncio/events.py\", line 145, in _run\n self._callback(*self._args)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tornado/ioloop.py\", line 758, in _run_callback\n ret = callback()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tornado/stack_context.py\", line 300, in null_wrapper\n return fn(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py\", line 536, in <lambda>\n self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py\", line 450, in _handle_events\n self._handle_recv()\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py\", line 480, in _handle_recv\n self._run_callback(callback, msg)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py\", line 432, in _run_callback\n callback(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tornado/stack_context.py\", line 300, in null_wrapper\n return fn(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/kernelbase.py\", line 283, in dispatcher\n return self.dispatch_shell(stream, msg)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/kernelbase.py\", line 233, in dispatch_shell\n handler(stream, idents, msg)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/kernelbase.py\", line 399, in execute_request\n user_expressions, allow_stdin)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/ipkernel.py\", line 208, in do_execute\n res = shell.run_cell(code, store_history=store_history, silent=silent)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/ipykernel/zmqshell.py\", line 537, in run_cell\n return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2662, in run_cell\n raw_cell, store_history, silent, shell_futures)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2785, in _run_cell\n interactivity=interactivity, compiler=compiler, result=result)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2901, in run_ast_nodes\n if self.run_code(code, result):\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2961, in run_code\n exec(code_obj, self.user_global_ns, self.user_ns)\n File \"<ipython-input-5-068e95f301b0>\", line 6, in <module>\n subs = tf.placeholder(tf.float32, shape=[SUBSAMPLE_SIZE, *input_dim], name=\"x\")\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py\", line 1735, in placeholder\n return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py\", line 4925, in placeholder\n \"Placeholder\", dtype=dtype, shape=shape, name=name)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py\", line 787, in _apply_op_helper\n op_def=op_def)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py\", line 454, in new_func\n return func(*args, **kwargs)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/framework/ops.py\", line 3155, in create_op\n op_def=op_def)\n File \"/home/luc/anaconda3/envs/ml/lib/python3.6/site-packages/tensorflow/python/framework/ops.py\", line 1717, in __init__\n self._traceback = tf_stack.extract_stack()\n\nInvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'x_1' with dtype float and shape [64,28,28,1]\n\t [[Node: x_1 = Placeholder[dtype=DT_FLOAT, shape=[64,28,28,1], _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"]()]]\n" + ], + "output_type": "error" + } + ], + "source": [ + " with tf.Session() as sess:\n", + " logger.info(\"Start training\")\n", + " summary_writer.add_graph(sess.graph)\n", + " # Initialize all Variable objects\n", + " sess.run(init)\n", + " # actual learning\n", + " # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0}\n", + " global_start = t.time()\n", + " for i in range(NUM_EPOCH):\n", + " logger.debug(memory_usage())\n", + " j = 0\n", + " start = t.time()\n", + " for X_batch, Y_batch in batch_generator(X_train, y_train, BATCH_SIZE, False):\n", + " feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample}\n", + " _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict)\n", + " if j % 100 == 0:\n", + " logger.info(\n", + " \"epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}\".format(i, NUM_EPOCH, j + 1,\n", + " int(data.train[0].shape[\n", + " 0] / BATCH_SIZE) + 1,\n", + " X_batch.shape, loss,\n", + " acc))\n", + " summary_writer.add_summary(summary_str, (j+1)*(i+1))\n", + " j += 1\n", + "\n", + " logger.info(\"Evaluation on validation data\")\n", + " training_time = t.time() - global_start\n", + " accuracies_val = []\n", + " i = 0\n", + " val_eval_start = t.time()\n", + " for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, 1000, False):\n", + " accuracy = sess.run([accuracy_op], feed_dict={\n", + " x: X_batch, y: Y_batch})\n", + " accuracies_val.append(accuracy[0])\n", + " i += 1\n", + " global_acc_val = sum(accuracies_val) / i\n", + " VAL_EVAL_TIME = t.time() - val_eval_start\n", + "\n", + " logger.info(\"Evaluation on test data\")\n", + " accuracies_test = []\n", + " i = 0\n", + " test_eval_start = t.time()\n", + " for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, 1000, False):\n", + " accuracy = sess.run([accuracy_op], feed_dict={\n", + " x: X_batch, y: Y_batch})\n", + " accuracies_test.append(accuracy[0])\n", + " i += 1\n", + " global_acc_test = sum(accuracies_test) / i\n", + " TEST_EVAL_TIME = t.time() - test_eval_start\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/main/experiments/scripts/until_october_2018/transfert_few_data/__init__.py b/main/experiments/scripts/until_october_2018/transfert_few_data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/scripts/until_october_2018/transfert_few_data/benchmark_classification.py b/main/experiments/scripts/until_october_2018/transfert_few_data/benchmark_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..eb835c154495316bcaaaaad7462063a8f0faa725 --- /dev/null +++ b/main/experiments/scripts/until_october_2018/transfert_few_data/benchmark_classification.py @@ -0,0 +1,462 @@ +""" +Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. + +Usage: + benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-W name] [-V] + benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-W name] [-V] + benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-W name] [-V] + +Options: + --help -h Display help and exit. + -q --quiet Set logging level to info. + -V --tensorboard Write tensorboard logs. + -a value --seed value The seed value used for all randomization processed [default: 0] + -t --train-size size Size of train set. + -v size --validation-size size The size of the validation set [default: 10000] + -e numepoch --num-epoch=numepoch The number of epoch. + -s batchsize --batch-size=batchsize The number of example in each batch + -d --dropout val Keep probability of neurons before classif [default: 1.0] + -D reprdim --out-dim=reprdim The dimension of the final representation + -f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu] + +Dense: + -l --second-layer-size size Says the size of the second non-linear layer [default: 0] + +Deepfried convnet: + -N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet + -z --real-fastfood Tell fastfood layer to not update its weights + +Deepstrom: + -r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) + -m size --nys-size size The number of example in the nystrom subsample. + -n --non-linear Tell Nystrom to use the non linear activation function on its output. + +Datasets: + --cifar10 Use cifar dataset + --mnist Use mnist dataset + --svhn Use svhn dataset + --cifar100 Use cifar100 dataset + +Dataset related: + -B --cut-layer name The name of the last convolutional layer when loading VVG19Transformer. + -W --weights name The name of the dataset used for weights. + +Possible kernels: + -R --rbf-kernel Says if the rbf kernel should be used for nystrom. + -L --linear-kernel Says if the linear kernel should be used for nystrom. + -C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom. + -E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom. + -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. + -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. + -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. + -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. + -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. + +Kernel related: + -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) + -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. + +""" +import logging +import sys +import time as t + +import daiquiri +import numpy as np +import tensorflow as tf +import docopt +from tensorflow.python.keras.layers import Dense + +import skluc.main.data.mldatasets as dataset +from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.main.data.transformation.LeCunTransformer import LecunTransformer +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayer +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.tensorflow_.utils import batch_generator +from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ + tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels +from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage + + +def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None, error=None): + printed_r_list = [str(NETWORK), + str(global_acc_val), + str(global_acc_test), + str(training_time), + str(val_eval_time), + str(test_eval_time), + str(NUM_EPOCH), + str(BATCH_SIZE), + str(OUT_DIM), + str(SIZE_SECOND_LAYER), + str(KERNEL_NAME), + str(GAMMA), + str(CONST), + str(NB_STACK), + str(NYS_SUBSAMPLE_SIZE), + str(VALIDATION_SIZE), + str(SEED), + str(ACTIVATION_FUNCTION), + str(NON_LINEAR), + str(REAL_NYSTROM), + str(CUT_LAYER), + str(TRAIN_SIZE), + str(DROPOUT), + str(DATASET), + str(REAL_FASTFOOD), + str(WEIGHTS) + ] + print(",".join(printed_r_list)) + if error is None: + exit() + else: + raise error + + +def get_gamma_value(arguments, dat, chi2=False): + if arguments["--gamma"] is None: + logger.debug("Gamma arguments is None. Need to compute it.") + if chi2: + gamma_value = 1./compute_euristic_sigma_chi2(dat.train.data) + + else: + gamma_value = 1./compute_euristic_sigma(dat.train.data) + else: + gamma_value = eval(arguments["--gamma"]) + + logger.debug("Gamma value is {}".format(gamma_value)) + return gamma_value + + +def main(): + input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1] + + x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") + y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") + tf.summary.histogram("convolved_examples", x) + + if NETWORK == "dense": + representation_layer = Dense(OUT_DIM, activation=ACTIVATION_FUNCTION) + elif NETWORK == "deepstrom": + logger.info("Selecting {} deepstrom layer function with " + "subsample size = {}, " + "output_dim = {}, " + "{} activation function " + "and kernel = {}" + .format("real" if REAL_NYSTROM else "learned", + NYS_SUBSAMPLE_SIZE, + OUT_DIM, + "with" if NON_LINEAR else "without", + KERNEL_NAME)) + if TRAIN_SIZE is not None: + subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.validation.data[subsample_indexes] + else: + subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.train.data[subsample_indexes] + logger.debug("Chosen subsample: {}".format(nys_subsample)) + representation_layer = DeepstromLayer(subsample=nys_subsample, + out_dim=OUT_DIM, + activation=ACTIVATION_FUNCTION, + kernel_name=KERNEL_NAME, + real_nystrom=not REAL_NYSTROM, + kernel_dict=kernel_dict) + elif NETWORK == "deepfriedconvnet": + representation_layer = FastFoodLayer(sigma=SIGMA, + nbr_stack=NB_STACK, + trainable=not REAL_FASTFOOD) + else: + raise Exception("Not recognized network") + + input_classif = representation_layer(x) + + if SIZE_SECOND_LAYER > 0: + logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION)) + with tf.variable_scope("second_layer"): + input_classif_2nd_layer = Dense(SIZE_SECOND_LAYER, activation=ACTIVATION_FUNCTION)(input_classif) + else: + logger.debug("No second layer") + input_classif_2nd_layer = input_classif + + with tf.variable_scope("classification"): + keep_prob = tf.placeholder(tf.float32, name="keep_prob") + input_drop = tf.nn.dropout(input_classif_2nd_layer, keep_prob) + classif = Dense(output_dim)(input_drop) + + # calcul de la loss + logger.debug("Add softmax layer for classification") + with tf.name_scope("xent"): + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), + name="xentropy_mean") + tf.summary.scalar('loss-xent', cross_entropy) + + # todo learning rate as hyperparameter + # calcul du gradient + with tf.name_scope("train"): + global_step = tf.Variable(0, name="global_step", trainable=False) + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, + global_step=global_step) + + # calcul de l'accuracy + with tf.name_scope("accuracy"): + predictions = tf.argmax(classif, 1) + correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) + accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + tf.summary.scalar("accuracy", accuracy_op) + + merged_summary = tf.summary.merge_all() + + init = tf.global_variables_initializer() + # Create a session for running Ops on the Graph. + # Instantiate a SummaryWriter to output summaries and the Graph. + if TENSORBOARD: + summary_writer = tf.summary.FileWriter("debug_benchmark_classification") + # Initialize all Variable objects + # actual learning + with tf.Session() as sess: + logger.info("Start training") + if TENSORBOARD: + summary_writer.add_graph(sess.graph) + # Initialize all Variable objects + sess.run(init) + # actual learning + # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0} + global_start = t.time() + for i in range(NUM_EPOCH): + logger.debug(memory_usage()) + j = 0 + start = t.time() + for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False): + feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT} + _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict) + if j % 100 == 0: + logger.info( + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1, + int(data.train[0].shape[ + 0] / BATCH_SIZE) + 1, + X_batch.shape, loss, + acc)) + if TENSORBOARD: + summary_writer.add_summary(summary_str, (j+1)*(i+1)) + j += 1 + + logger.info("Evaluation on validation data") + training_time = t.time() - global_start + accuracies_val = [] + i = 0 + val_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, 1000, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}) + accuracies_val.append(accuracy[0]) + i += 1 + global_acc_val = sum(accuracies_val) / i + VAL_EVAL_TIME = t.time() - val_eval_start + + logger.info("Evaluation on test data") + accuracies_test = [] + i = 0 + test_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, 1000, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}) + accuracies_test.append(accuracy[0]) + i += 1 + global_acc_test = sum(accuracies_test) / i + TEST_EVAL_TIME = t.time() - test_eval_start + + print_result(global_acc_val=global_acc_val, + global_acc_test=global_acc_test, + training_time=training_time, + val_eval_time=VAL_EVAL_TIME, + test_eval_time=TEST_EVAL_TIME) + + +if __name__ == '__main__': + logger.debug("Command line: {}".format(' '.join(sys.argv))) + arguments = docopt.docopt(__doc__) + logger.debug(arguments) + if arguments["--quiet"]: + daiquiri.setup(level=logging.INFO) + NUM_EPOCH = int(arguments["--num-epoch"]) + BATCH_SIZE = int(arguments["--batch-size"]) + OUT_DIM = int(arguments["--out-dim"]) if arguments["--out-dim"] is not None else None + SIZE_SECOND_LAYER = int(arguments["--second-layer-size"]) + RBF_KERNEL = arguments["--rbf-kernel"] + LINEAR_KERNEL = arguments["--linear-kernel"] + CHI2_KERNEL = arguments["--chi-square-kernel"] + CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"] + CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"] + SIGMOID_KERNEL = arguments["--sigmoid-kernel"] + LAPLACIAN_KERNEL = arguments["--laplacian-kernel"] + STACKED_KERNEL = arguments["--stacked-kernel"] + SUMED_KERNEL = arguments["--sumed-kernel"] + VALIDATION_SIZE = int(arguments["--validation-size"]) + REAL_NYSTROM = arguments["--real-nystrom"] + SEED = int(arguments["--seed"]) # The seed change the data ordering in the dataset (so train/validation/test split may change with != seeds) + TENSORBOARD = arguments["--tensorboard"] + NYS_SUBSAMPLE_SIZE = None + KERNEL_NAME = None + GAMMA = None + CONST = None + NB_STACK = None + kernel_dict = {} + CIFAR_DATASET = bool(arguments["--cifar10"]) + CIFAR100_DATASET = bool(arguments["--cifar100"]) + MNIST_DATASET = bool(arguments["--mnist"]) + SVHN_DATASET = bool(arguments["--svhn"]) + REAL_FASTFOOD = bool(arguments["--real-fastfood"]) + test_eval_time = None + val_eval_time = None + if arguments["--non-linearity"] == "relu": + ACTIVATION_FUNCTION = tf.nn.relu + elif arguments["--non-linearity"] == "tanh": + ACTIVATION_FUNCTION = tf.nn.tanh + elif arguments["--non-linearity"] is None: + ACTIVATION_FUNCTION = tf.nn.relu + else: + raise ValueError("Not known --non-linearity arg: {}".format(arguments["--non-linearity"])) + NON_LINEAR = ACTIVATION_FUNCTION if arguments["--non-linear"] else None + + if CIFAR_DATASET: + DATASET = "cifar10" + elif MNIST_DATASET: + DATASET = "mnist" + elif SVHN_DATASET: + DATASET = "svhn" + elif CIFAR100_DATASET: + DATASET = "cifar100" + else: + raise ValueError("no know dataset specified") + CUT_LAYER = arguments["--cut-layer"] + + if arguments["--weights"] is None: + WEIGHTS = DATASET + else: + WEIGHTS = arguments["--weights"] + + DROPOUT = float(arguments["--dropout"]) if arguments["--dropout"] is not None else None + logger.debug("DROPOUT value is {} and type {}".format(DROPOUT, type(DROPOUT))) + if arguments["--train-size"] is not None: + TRAIN_SIZE = int(arguments["--train-size"]) + else: + TRAIN_SIZE = arguments["--train-size"] + global_acc_val = None + global_acc_test = None + training_time = None + + SEED_TRAIN_VALIDATION = SEED + if CIFAR_DATASET: + data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + elif MNIST_DATASET: + data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + # todo rendre conv_pool2 parametrable + transformer = LecunTransformer(data_name=WEIGHTS, cut_layer_name="conv_pool_2") + elif SVHN_DATASET: + data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + elif CIFAR100_DATASET: + data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + else: + raise ValueError("No dataset specified") + + data.load() # todo gérer le bug flatten + if not data.is_image(): + data.to_image() # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?) + data.data_astype(np.float32) + data.labels_astype(np.float32) + data.normalize() + logger.debug("train dataset shape: {}".format(data.train.data.shape)) + data.apply_transformer(transformer) + data.normalize() + data.to_one_hot() + data.flatten() + data.data_astype(np.float32) + data.labels_astype(np.int) + if TRAIN_SIZE is not None: + data.reduce_data_size(int(TRAIN_SIZE)) + + logger.info("Start benchmark with parameters: {}".format(" ".join(sys.argv[1:]))) + logger.info("Using dataset {} with validation size {} and seed for spliting set {}.".format(data.s_name, data.validation_size, data.seed)) + logger.info("Shape of train set data: {}; shape of train set labels: {}".format(data.train[0].shape, data.train[1].shape)) + logger.info("Shape of validation set data: {}; shape of validation set labels: {}".format(data.validation[0].shape, data.validation[1].shape)) + logger.info("Shape of test set data: {}; shape of test set labels: {}".format(data.test[0].shape, data.test[1].shape)) + logger.debug("Sample of label: {}".format(data.train[1][0])) + # todo separated function for parameters parsing + + if arguments["dense"]: + NETWORK = "dense" + elif arguments["deepstrom"]: + NETWORK = "deepstrom" + NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"]) + if OUT_DIM is None: + OUT_DIM = NYS_SUBSAMPLE_SIZE + if RBF_KERNEL: + KERNEL = tf_rbf_kernel + KERNEL_NAME = "rbf" + GAMMA = get_gamma_value(arguments, data) + kernel_dict = {"gamma": GAMMA} + elif LINEAR_KERNEL: + KERNEL = tf_linear_kernel + KERNEL_NAME = "linear" + elif CHI2_KERNEL: + KERNEL = tf_chi_square_CPD + KERNEL_NAME = "chi2_cpd" + elif CHI2_EXP_KERNEL: + KERNEL = tf_chi_square_CPD_exp + KERNEL_NAME = "chi2_exp_cpd" + GAMMA = get_gamma_value(arguments, data, chi2=True) + kernel_dict = {"gamma": GAMMA} + elif CHI2_PD_KERNEL: + KERNEL = tf_chi_square_PD + KERNEL_NAME = "chi2_pd" + elif SIGMOID_KERNEL: + KERNEL = tf_sigmoid_kernel + KERNEL_NAME = "sigmoid" + GAMMA = get_gamma_value(arguments, data) + CONST = float(arguments["--intercept-constant"]) + kernel_dict = {"gamma": GAMMA, "constant": CONST} + elif LAPLACIAN_KERNEL: + KERNEL = tf_laplacian_kernel + KERNEL_NAME = "laplacian" + GAMMA = get_gamma_value(arguments, data) + kernel_dict = {"gamma": np.sqrt(GAMMA)} + elif STACKED_KERNEL: + GAMMA = get_gamma_value(arguments, data) + + + def KERNEL(X, Y): + return tf_stack_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA], + [{"gamma": g_value} for g_value in GAMMA]) + + + KERNEL_NAME = "stacked" + + elif SUMED_KERNEL: + GAMMA = get_gamma_value(arguments, data) + + + def KERNEL(X, Y): + return tf_sum_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA], + [{"gamma": g_value} for g_value in GAMMA]) + + + KERNEL_NAME = "summed" + else: + raise Exception("No kernel function specified for deepstrom") + + elif arguments["deepfriedconvnet"]: + NETWORK = "deepfriedconvnet" + NB_STACK = int(arguments["--nb-stack"]) + GAMMA = get_gamma_value(arguments, data) + SIGMA = 1 / GAMMA + else: + raise Exception("Not recognized network") + + try: + main() + except Exception as e: + print_result(error=e) \ No newline at end of file diff --git a/main/experiments/scripts/until_october_2018/transfert_few_data_batchnorm/__init__.py b/main/experiments/scripts/until_october_2018/transfert_few_data_batchnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/scripts/until_october_2018/transfert_few_data_batchnorm/benchmark_classification.py b/main/experiments/scripts/until_october_2018/transfert_few_data_batchnorm/benchmark_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..ea56aaba6bf324b2cdd368dff65e548a5a726dea --- /dev/null +++ b/main/experiments/scripts/until_october_2018/transfert_few_data_batchnorm/benchmark_classification.py @@ -0,0 +1,467 @@ +""" +Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. + +Usage: + benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-W name] [-V] [-b] + benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-W name] [-V] [-b] + benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-W name] [-V] [-b] + +Options: + --help -h Display help and exit. + -q --quiet Set logging level to info. + -V --tensorboard Write tensorboard logs. + -a value --seed value The seed value used for all randomization processed [default: 0] + -t --train-size size Size of train set. + -v size --validation-size size The size of the validation set [default: 10000] + -e numepoch --num-epoch=numepoch The number of epoch. + -s batchsize --batch-size=batchsize The number of example in each batch + -d --dropout val Keep probability of neurons before classif [default: 1.0] + -b --batchnorm Apply batch normalization before softmax layer + -D reprdim --out-dim=reprdim The dimension of the final representation + -f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu] + +Dense: + -l --second-layer-size size Says the size of the second non-linear layer [default: 0] + +Deepfried convnet: + -N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet + -z --real-fastfood Tell fastfood layer to not update its weights + +Deepstrom: + -r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) + -m size --nys-size size The number of example in the nystrom subsample. + -n --non-linear Tell Nystrom to use the non linear activation function on its output. + +Datasets: + --cifar10 Use cifar dataset + --mnist Use mnist dataset + --svhn Use svhn dataset + --cifar100 Use cifar100 dataset + +Dataset related: + -B --cut-layer name The name of the last convolutional layer when loading VVG19Transformer. + -W --weights name The name of the dataset used for weights. + +Possible kernels: + -R --rbf-kernel Says if the rbf kernel should be used for nystrom. + -L --linear-kernel Says if the linear kernel should be used for nystrom. + -C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom. + -E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom. + -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. + -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. + -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. + -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. + -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. + +Kernel related: + -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) + -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. + +""" +import logging +import sys +import time as t + +import daiquiri +import numpy as np +import tensorflow as tf +import docopt +from tensorflow.python.keras.layers import Dense, BatchNormalization + +import skluc.main.data.mldatasets as dataset +from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.main.data.transformation.LeCunTransformer import LecunTransformer +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayer +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.tensorflow_.utils import batch_generator +from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ + tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels +from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage + + +def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None, error=None): + printed_r_list = [str(NETWORK), + str(global_acc_val), + str(global_acc_test), + str(training_time), + str(val_eval_time), + str(test_eval_time), + str(NUM_EPOCH), + str(BATCH_SIZE), + str(OUT_DIM), + str(SIZE_SECOND_LAYER), + str(KERNEL_NAME), + str(GAMMA), + str(CONST), + str(NB_STACK), + str(NYS_SUBSAMPLE_SIZE), + str(VALIDATION_SIZE), + str(SEED), + str(ACTIVATION_FUNCTION), + str(NON_LINEAR), + str(REAL_NYSTROM), + str(CUT_LAYER), + str(TRAIN_SIZE), + str(DROPOUT), + str(DATASET), + str(REAL_FASTFOOD), + str(WEIGHTS) + ] + print(",".join(printed_r_list)) + if error is None: + exit() + else: + raise error + + +def get_gamma_value(arguments, dat, chi2=False): + if arguments["--gamma"] is None: + logger.debug("Gamma arguments is None. Need to compute it.") + if chi2: + gamma_value = 1./compute_euristic_sigma_chi2(dat.train.data) + + else: + gamma_value = 1./compute_euristic_sigma(dat.train.data) + else: + gamma_value = eval(arguments["--gamma"]) + + logger.debug("Gamma value is {}".format(gamma_value)) + return gamma_value + + +def main(): + input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1] + + x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") + y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") + tf.summary.histogram("convolved_examples", x) + + if NETWORK == "dense": + representation_layer = Dense(OUT_DIM, activation=ACTIVATION_FUNCTION) + elif NETWORK == "deepstrom": + logger.info("Selecting {} deepstrom layer function with " + "subsample size = {}, " + "output_dim = {}, " + "{} activation function " + "and kernel = {}" + .format("real" if REAL_NYSTROM else "learned", + NYS_SUBSAMPLE_SIZE, + OUT_DIM, + "with" if NON_LINEAR else "without", + KERNEL_NAME)) + if TRAIN_SIZE is not None: + subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.validation.data[subsample_indexes] + else: + subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.train.data[subsample_indexes] + logger.debug("Chosen subsample: {}".format(nys_subsample)) + representation_layer = DeepstromLayer(subsample=nys_subsample, + out_dim=OUT_DIM, + activation=ACTIVATION_FUNCTION, + kernel_name=KERNEL_NAME, + real_nystrom=not REAL_NYSTROM, + kernel_dict=kernel_dict) + elif NETWORK == "deepfriedconvnet": + representation_layer = FastFoodLayer(sigma=SIGMA, + nbr_stack=NB_STACK, + trainable=not REAL_FASTFOOD) + else: + raise Exception("Not recognized network") + + input_classif = representation_layer(x) + + if SIZE_SECOND_LAYER > 0: + logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION)) + with tf.variable_scope("second_layer"): + input_classif_2nd_layer = Dense(SIZE_SECOND_LAYER, activation=ACTIVATION_FUNCTION)(input_classif) + else: + logger.debug("No second layer") + input_classif_2nd_layer = input_classif + + with tf.variable_scope("classification"): + if BATCHNORM: + bn = BatchNormalization() + input_classif_2nd_layer = bn(input_classif_2nd_layer) + keep_prob = tf.placeholder(tf.float32, name="keep_prob") + input_drop = tf.nn.dropout(input_classif_2nd_layer, keep_prob) + classif = Dense(output_dim)(input_drop) + + # calcul de la loss + logger.debug("Add softmax layer for classification") + with tf.name_scope("xent"): + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), + name="xentropy_mean") + tf.summary.scalar('loss-xent', cross_entropy) + + # todo learning rate as hyperparameter + # calcul du gradient + with tf.name_scope("train"): + global_step = tf.Variable(0, name="global_step", trainable=False) + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, + global_step=global_step) + + # calcul de l'accuracy + with tf.name_scope("accuracy"): + predictions = tf.argmax(classif, 1) + correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) + accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + tf.summary.scalar("accuracy", accuracy_op) + + merged_summary = tf.summary.merge_all() + + init = tf.global_variables_initializer() + # Create a session for running Ops on the Graph. + # Instantiate a SummaryWriter to output summaries and the Graph. + if TENSORBOARD: + summary_writer = tf.summary.FileWriter("debug_benchmark_classification") + # Initialize all Variable objects + # actual learning + with tf.Session() as sess: + logger.info("Start training") + if TENSORBOARD: + summary_writer.add_graph(sess.graph) + # Initialize all Variable objects + sess.run(init) + # actual learning + # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0} + global_start = t.time() + for i in range(NUM_EPOCH): + logger.debug(memory_usage()) + j = 0 + start = t.time() + for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False): + feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT} + _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict) + if j % 100 == 0: + logger.info( + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1, + int(data.train[0].shape[ + 0] / BATCH_SIZE) + 1, + X_batch.shape, loss, + acc)) + if TENSORBOARD: + summary_writer.add_summary(summary_str, (j+1)*(i+1)) + j += 1 + + logger.info("Evaluation on validation data") + training_time = t.time() - global_start + accuracies_val = [] + i = 0 + val_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, 1000, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}) + accuracies_val.append(accuracy[0]) + i += 1 + global_acc_val = sum(accuracies_val) / i + VAL_EVAL_TIME = t.time() - val_eval_start + + logger.info("Evaluation on test data") + accuracies_test = [] + i = 0 + test_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, 1000, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}) + accuracies_test.append(accuracy[0]) + i += 1 + global_acc_test = sum(accuracies_test) / i + TEST_EVAL_TIME = t.time() - test_eval_start + + print_result(global_acc_val=global_acc_val, + global_acc_test=global_acc_test, + training_time=training_time, + val_eval_time=VAL_EVAL_TIME, + test_eval_time=TEST_EVAL_TIME) + + +if __name__ == '__main__': + logger.debug("Command line: {}".format(' '.join(sys.argv))) + arguments = docopt.docopt(__doc__) + logger.debug(arguments) + if arguments["--quiet"]: + daiquiri.setup(level=logging.INFO) + NUM_EPOCH = int(arguments["--num-epoch"]) + BATCH_SIZE = int(arguments["--batch-size"]) + OUT_DIM = int(arguments["--out-dim"]) if arguments["--out-dim"] is not None else None + SIZE_SECOND_LAYER = int(arguments["--second-layer-size"]) + RBF_KERNEL = arguments["--rbf-kernel"] + LINEAR_KERNEL = arguments["--linear-kernel"] + CHI2_KERNEL = arguments["--chi-square-kernel"] + CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"] + CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"] + SIGMOID_KERNEL = arguments["--sigmoid-kernel"] + LAPLACIAN_KERNEL = arguments["--laplacian-kernel"] + STACKED_KERNEL = arguments["--stacked-kernel"] + SUMED_KERNEL = arguments["--sumed-kernel"] + VALIDATION_SIZE = int(arguments["--validation-size"]) + REAL_NYSTROM = arguments["--real-nystrom"] + SEED = int(arguments["--seed"]) # The seed change the data ordering in the dataset (so train/validation/test split may change with != seeds) + TENSORBOARD = arguments["--tensorboard"] + NYS_SUBSAMPLE_SIZE = None + KERNEL_NAME = None + GAMMA = None + CONST = None + NB_STACK = None + kernel_dict = {} + CIFAR_DATASET = bool(arguments["--cifar10"]) + CIFAR100_DATASET = bool(arguments["--cifar100"]) + MNIST_DATASET = bool(arguments["--mnist"]) + SVHN_DATASET = bool(arguments["--svhn"]) + REAL_FASTFOOD = bool(arguments["--real-fastfood"]) + BATCHNORM = bool(arguments["--batchnorm"]) + test_eval_time = None + val_eval_time = None + if arguments["--non-linearity"] == "relu": + ACTIVATION_FUNCTION = tf.nn.relu + elif arguments["--non-linearity"] == "tanh": + ACTIVATION_FUNCTION = tf.nn.tanh + elif arguments["--non-linearity"] is None: + ACTIVATION_FUNCTION = tf.nn.relu + else: + raise ValueError("Not known --non-linearity arg: {}".format(arguments["--non-linearity"])) + NON_LINEAR = ACTIVATION_FUNCTION if arguments["--non-linear"] else None + + if CIFAR_DATASET: + DATASET = "cifar10" + elif MNIST_DATASET: + DATASET = "mnist" + elif SVHN_DATASET: + DATASET = "svhn" + elif CIFAR100_DATASET: + DATASET = "cifar100" + else: + raise ValueError("no know dataset specified") + CUT_LAYER = arguments["--cut-layer"] + + if arguments["--weights"] is None: + WEIGHTS = DATASET + else: + WEIGHTS = arguments["--weights"] + + DROPOUT = float(arguments["--dropout"]) if arguments["--dropout"] is not None else None + logger.debug("DROPOUT value is {} and type {}".format(DROPOUT, type(DROPOUT))) + if arguments["--train-size"] is not None: + TRAIN_SIZE = int(arguments["--train-size"]) + else: + TRAIN_SIZE = arguments["--train-size"] + global_acc_val = None + global_acc_test = None + training_time = None + + SEED_TRAIN_VALIDATION = SEED + if CIFAR_DATASET: + data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + elif MNIST_DATASET: + data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + # todo rendre conv_pool2 parametrable + transformer = LecunTransformer(data_name=WEIGHTS, cut_layer_name="conv_pool_2") + elif SVHN_DATASET: + data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + elif CIFAR100_DATASET: + data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + else: + raise ValueError("No dataset specified") + + data.load() # todo gérer le bug flatten + if not data.is_image(): + data.to_image() # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?) + data.data_astype(np.float32) + data.labels_astype(np.float32) + data.normalize() + logger.debug("train dataset shape: {}".format(data.train.data.shape)) + data.apply_transformer(transformer) + data.normalize() + data.to_one_hot() + data.flatten() + data.data_astype(np.float32) + data.labels_astype(np.int) + if TRAIN_SIZE is not None: + data.reduce_data_size(int(TRAIN_SIZE)) + + logger.info("Start benchmark with parameters: {}".format(" ".join(sys.argv[1:]))) + logger.info("Using dataset {} with validation size {} and seed for spliting set {}.".format(data.s_name, data.validation_size, data.seed)) + logger.info("Shape of train set data: {}; shape of train set labels: {}".format(data.train[0].shape, data.train[1].shape)) + logger.info("Shape of validation set data: {}; shape of validation set labels: {}".format(data.validation[0].shape, data.validation[1].shape)) + logger.info("Shape of test set data: {}; shape of test set labels: {}".format(data.test[0].shape, data.test[1].shape)) + logger.debug("Sample of label: {}".format(data.train[1][0])) + # todo separated function for parameters parsing + + if arguments["dense"]: + NETWORK = "dense" + elif arguments["deepstrom"]: + NETWORK = "deepstrom" + NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"]) + if OUT_DIM is None: + OUT_DIM = NYS_SUBSAMPLE_SIZE + if RBF_KERNEL: + KERNEL = tf_rbf_kernel + KERNEL_NAME = "rbf" + GAMMA = get_gamma_value(arguments, data) + kernel_dict = {"gamma": GAMMA} + elif LINEAR_KERNEL: + KERNEL = tf_linear_kernel + KERNEL_NAME = "linear" + elif CHI2_KERNEL: + KERNEL = tf_chi_square_CPD + KERNEL_NAME = "chi2_cpd" + elif CHI2_EXP_KERNEL: + KERNEL = tf_chi_square_CPD_exp + KERNEL_NAME = "chi2_exp_cpd" + GAMMA = get_gamma_value(arguments, data, chi2=True) + kernel_dict = {"gamma": GAMMA} + elif CHI2_PD_KERNEL: + KERNEL = tf_chi_square_PD + KERNEL_NAME = "chi2_pd" + elif SIGMOID_KERNEL: + KERNEL = tf_sigmoid_kernel + KERNEL_NAME = "sigmoid" + GAMMA = get_gamma_value(arguments, data) + CONST = float(arguments["--intercept-constant"]) + kernel_dict = {"gamma": GAMMA, "constant": CONST} + elif LAPLACIAN_KERNEL: + KERNEL = tf_laplacian_kernel + KERNEL_NAME = "laplacian" + GAMMA = get_gamma_value(arguments, data) + kernel_dict = {"gamma": np.sqrt(GAMMA)} + elif STACKED_KERNEL: + GAMMA = get_gamma_value(arguments, data) + + + def KERNEL(X, Y): + return tf_stack_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA], + [{"gamma": g_value} for g_value in GAMMA]) + + + KERNEL_NAME = "stacked" + + elif SUMED_KERNEL: + GAMMA = get_gamma_value(arguments, data) + + + def KERNEL(X, Y): + return tf_sum_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA], + [{"gamma": g_value} for g_value in GAMMA]) + + + KERNEL_NAME = "summed" + else: + raise Exception("No kernel function specified for deepstrom") + + elif arguments["deepfriedconvnet"]: + NETWORK = "deepfriedconvnet" + NB_STACK = int(arguments["--nb-stack"]) + GAMMA = get_gamma_value(arguments, data) + SIGMA = 1 / GAMMA + else: + raise Exception("Not recognized network") + + try: + main() + except Exception as e: + print_result(error=e) \ No newline at end of file