diff --git a/main/experiments/scripts/november_2018/end_to_end_with_2_layers_only_dense_with_augment/__init__.py b/main/experiments/scripts/november_2018/end_to_end_with_2_layers_only_dense_with_augment/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/scripts/november_2018/end_to_end_with_2_layers_only_dense_with_augment/deepstrom_classif_end_to_end.py b/main/experiments/scripts/november_2018/end_to_end_with_2_layers_only_dense_with_augment/deepstrom_classif_end_to_end.py new file mode 100644 index 0000000000000000000000000000000000000000..60f8e3f602b1a721f35ce56ed409572a2aa2fbf2 --- /dev/null +++ b/main/experiments/scripts/november_2018/end_to_end_with_2_layers_only_dense_with_augment/deepstrom_classif_end_to_end.py @@ -0,0 +1,328 @@ +""" +Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. + +Usage: + benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-V] + benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-V] + benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-V] + +Options: + --help -h Display help and exit. + -q --quiet Set logging level to info. + -V --tensorboard Write tensorboard logs. + -a --seed value The seed value used for all randomization processed [default: 0] + -t --train-size size Size of train set. + -v --validation-size size The size of the validation set [default: 10000] + -e --num-epoch=numepoch The number of epoch. + -s --batch-size=batchsize The number of example in each batch + -d --dropout val Keep probability of neurons before classif [default: 1.0] + -D reprdim --out-dim=reprdim The dimension of the final representation + -f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu] + +Dense: + -l --second-layer-size size Says the size of the second non-linear layer [default: 0] + +Deepfried convnet: + -N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet + -z --real-fastfood Tell fastfood layer to not update its weights + +Deepstrom: + -r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) + -m size --nys-size size The number of example in the nystrom subsample. + -n --non-linear Tell Nystrom to use the non linear activation function on its output. + +Datasets: + --cifar10 Use cifar dataset + --mnist Use mnist dataset + --svhn Use svhn dataset + --cifar100 Use cifar100 dataset + +Possible kernels: + -R --rbf-kernel Says if the rbf kernel should be used for nystrom. + -L --linear-kernel Says if the linear kernel should be used for nystrom. + -C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom. + -E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom. + -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. + -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. + -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. + -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. + -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. + +Kernel related: + -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) + -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. + +""" + + +import skluc.main.data.mldatasets as dataset +import numpy as np +import tensorflow as tf +from tensorflow.python.keras.layers import Dense +from tensorflow.python.keras.regularizers import l2 +from tensorflow.python.keras.initializers import he_normal + +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd +from skluc.main.tensorflow_.models import build_lenet_model, build_vgg19_model +from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter +from skluc.main.tensorflow_.utils import batch_generator +import time as t +import docopt + + +class ParameterManagerMain(ParameterManager): + + def __init__(self, docopt_dict): + super().__init__(docopt_dict) + + self["--out-dim"] = int(self["--out-dim"]) if eval(str(self["--out-dim"])) is not None else None + self["kernel"] = self.init_kernel() + self["network"] = self.init_network() + self["activation_function"] = self.init_non_linearity() + self["dataset"] = self.init_dataset() + self["--nb-stack"] = int(self["--nb-stack"]) if self["--nb-stack"] is not None else None + self["--nys-size"] = int(self["--nys-size"]) if self["--nys-size"] is not None else None + self["--num-epoch"] = int(self["--num-epoch"]) + self["--validation-size"] = int(self["--validation-size"]) + self["--seed"] = int(self["--seed"]) + self["--batch-size"] = int(self["--batch-size"]) + self["deepstrom_activation"] = self.init_deepstrom_activation() + + self.__kernel_dict = None + + def init_deepstrom_activation(self): + if not self["deepstrom"]: + return None + + if self["--non-linear"]: + return self["--non-linearity"] + else: + return None + + def init_kernel_dict(self, data): + if self["kernel"] == "rbf": + GAMMA = self.get_gamma_value(data) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": GAMMA} + elif self["kernel"] == "chi2_exp_cpd": + GAMMA = self.get_gamma_value(data, chi2=True) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": GAMMA} + elif self["kernel"] == "laplacian": + GAMMA = self.get_gamma_value(data) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": np.sqrt(GAMMA)} + else: + self.__kernel_dict = {} + + def __getitem__(self, item): + if item == "kernel_dict": + return self.__kernel_dict + else: + return super().__getitem__(item) + + +class ResultManagerMain(ResultManager): + def __init__(self): + super().__init__() + self["training_time"] = None + self["val_eval_time"] = None + self["val_acc"] = None + self["test_acc"] = None + self["test_eval_time"] = None + + +def cifar100_extended_convmodel_func(input_shape, weight_decay=0.0001): + vgg19_convolution_model = build_vgg19_model(input_shape, weight_decay) + vgg19_convolution_model.add(Dense(4096, use_bias=True, + kernel_regularizer=l2(weight_decay), kernel_initializer=he_normal(), + name='fc_cifar100')) + return vgg19_convolution_model + +def main(paraman, resman, printman): + if paraman["dataset"] == "mnist": + data = dataset.MnistDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_lenet_model + elif paraman["dataset"] == "cifar10": + data = dataset.Cifar10Dataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_vgg19_model + elif paraman["dataset"] == "cifar100": + data = dataset.Cifar100FineDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = cifar100_extended_convmodel_func + elif paraman["dataset"] == "svhn": + data = dataset.SVHNDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_vgg19_model + else: + raise ValueError("Unknown dataset") + + data.load() + data.to_one_hot() + if not data.is_image(): + data.to_image() + data.data_astype(np.float32) + data.labels_astype(np.float32) + data.normalize() + + X_train, y_train = data.train.data, data.train.labels + X_test, y_test = data.test.data, data.test.labels + X_val, y_val = data.validation.data, data.validation.labels + + paraman.init_kernel_dict(X_train) + + # # Model definition + + input_dim = X_train.shape[1:] + output_dim = y_train.shape[1] + + x = tf.placeholder(tf.float32, shape=[None, *input_dim], name="x") + y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") + subs = tf.placeholder(tf.float32, shape=[paraman["--nys-size"], *input_dim], name="subsample") + + convnet_model = convmodel_func(x.shape[1:]) + + repr_x = convnet_model(x) + repr_sub = convnet_model(subs) + + + + logger.debug(paraman["kernel_dict"]) + + input_classifier = None + + if paraman["network"] == "deepstrom": + deepstrom_layer = DeepstromLayerEndToEnd(subsample_size=paraman["--nys-size"], + kernel_name=paraman["kernel"], + kernel_dict=paraman["kernel_dict"], + activation=paraman["deepstrom_activation"], + out_dim=paraman["--out-dim"]) + + input_classifier = deepstrom_layer([repr_x, repr_sub]) + + subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"]) + nys_subsample = data.validation.data[subsample_indexes] + + elif paraman["network"] == "dense": + dense_layer = Dense(paraman["--out-dim"], activation=paraman["activation_function"]) + input_classifier = dense_layer(repr_x) + elif paraman["network"] == "deepfriedconvnet": + deepfried_layer = FastFoodLayer(sigma=1/paraman["--gamma"], nbr_stack=paraman["--nb-stack"], trainable=not paraman["--real-fastfood"]) + input_classifier = deepfried_layer(repr_x) + else: + raise ValueError(f"Not recognized network {paraman['network']}") + + with tf.variable_scope("classification"): + classif = Dense(output_dim)(input_classifier) + + # calcul de la loss + with tf.name_scope("xent"): + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), + name="xentropy_mean") + tf.summary.scalar('loss-xent', cross_entropy) + + # calcul du gradient + with tf.name_scope("train"): + global_step = tf.Variable(0, name="global_step", trainable=False) + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, + global_step=global_step) + + # calcul de l'accuracy + with tf.name_scope("accuracy"): + predictions = tf.argmax(classif, 1) + correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) + accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + tf.summary.scalar("accuracy", accuracy_op) + + merged_summary = tf.summary.merge_all() + + # In[6]: + + + init = tf.global_variables_initializer() + + summary_writer = None + if paraman["--tensorboard"]: + summary_writer = tf.summary.FileWriter("debug_classification_end_to_end") + + # In[7]: + + with tf.Session() as sess: + logger.info("Start training") + if paraman["--tensorboard"]: + summary_writer.add_graph(sess.graph) + # Initialize all Variable objects + sess.run(init) + # actual learning + global_start = t.time() + j = 0 + for i in range(paraman["--num-epoch"]): + logger.debug(memory_usage()) + + for X_batch, Y_batch in batch_generator(X_train, y_train, paraman["--batch-size"], False): + if paraman["network"] == "deepstrom": + feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample} + else: + feed_dict = {x: X_batch, y: Y_batch} + _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict) + if j % 100 == 0: + logger.info( + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"], j + 1, + int(data.train[0].shape[ + 0] / paraman["--batch-size"]) + 1, + X_batch.shape, loss, + acc)) + if paraman["--tensorboard"]: + summary_writer.add_summary(summary_str, j) + j += 1 + + logger.info("Evaluation on validation data") + training_time = t.time() - global_start + resman["training_time"] = training_time + accuracies_val = [] + i = 0 + val_eval_start = t.time() + for X_batch, Y_batch in batch_generator(X_val, y_val, 1000, False): + if paraman["network"] == "deepstrom": + feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample} + else: + feed_dict = {x: X_batch, y: Y_batch} + accuracy = sess.run([accuracy_op], feed_dict=feed_dict) + accuracies_val.append(accuracy[0]) + i += 1 + global_acc_val = sum(accuracies_val) / i + + VAL_EVAL_TIME = t.time() - val_eval_start + resman["val_eval_time"] = VAL_EVAL_TIME + resman["val_acc"] = global_acc_val + + logger.info("Evaluation on test data") + accuracies_test = [] + i = 0 + test_eval_start = t.time() + for X_batch, Y_batch in batch_generator(X_test, y_test, 1000, False): + if paraman["network"] == "deepstrom": + feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample} + else: + feed_dict = {x: X_batch, y: Y_batch} + accuracy = sess.run([accuracy_op], feed_dict=feed_dict) + accuracies_test.append(accuracy[0]) + i += 1 + global_acc_test = sum(accuracies_test) / i + TEST_EVAL_TIME = t.time() - test_eval_start + resman["test_acc"] = global_acc_test + resman["test_eval_time"] = TEST_EVAL_TIME + printman.print() + + +if __name__ == "__main__": + paraman_obj = ParameterManagerMain(docopt.docopt(__doc__)) + resman_obj = ResultManagerMain() + printman_obj = ResultPrinter(paraman_obj, resman_obj) + + try: + main(paraman_obj, resman_obj, printman_obj) + except Exception as e: + printman_obj.print() + raise e +