expe /end_to_end_subsample_conv_hand_with_augment script + lazyfile

7755e275 · Luc Giffon · 35c14b5b · 7755e275 · 7755e275 · 7755e275
Commit 7755e275 authored 6 years ago by Luc Giffon
--- a/main/experiments/parameter_files/november_2018/lazyfile_classif_end_to_end_subsample_conv_hand_with_augment.yml
+++ b/main/experiments/parameter_files/november_2018/lazyfile_classif_end_to_end_subsample_conv_hand_with_augment.yml
+all:
+  deepstrom:
+
+base:
+  epoch_numbers: {"-e": [200]}
+  batch_sizes: {"-s": [64]}
+  val_size: {"-v": [10000]}
+  seed: {"-a": "range(1)"}
+  quiet: ["-q"]
+  dataset: ["--mnist", "--cifar10", "--svhn"]
+  subs_every: {"-l": [50]}
+
+deepstrom:
+  network: ["deepstrom"]
+  base:
+  nys_size: {"-m": [4, 8, 16, 64, 128, 256, 512]}
+  kernel: ["-C"]
+
--- a/main/experiments/scripts/november_2018/end_to_end_subsample_conv_hand_with_augment/__init__.py
+++ b/main/experiments/scripts/november_2018/end_to_end_subsample_conv_hand_with_augment/__init__.py
--- a/main/experiments/scripts/november_2018/end_to_end_subsample_conv_hand_with_augment/deepstrom_classif_end_to_end.py
+++ b/main/experiments/scripts/november_2018/end_to_end_subsample_conv_hand_with_augment/deepstrom_classif_end_to_end.py
+"""
+Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network.
+
+Usage:
+    benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-V]
+
+Options:
+    --help -h                               Display help and exit.
+    -q --quiet                              Set logging level to info.
+    -V --tensorboard                        Write tensorboard logs.
+    -a --seed value                         The seed value used for all randomization processed [default: 0]
+    -t --train-size size                    Size of train set.
+    -v --validation-size size               The size of the validation set [default: 10000]
+    -e --num-epoch=numepoch                 The number of epoch.
+    -s --batch-size=batchsize               The number of example in each batch
+    -d --dropout val                        Keep probability of neurons before classif [default: 1.0]
+    -D reprdim --out-dim=reprdim            The dimension of the final representation
+    -f --non-linearity name                 Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu]
+
+Deepstrom:
+    -r --real-nystrom                       Says if the matrix for deepstrom should be K^(-1/2)
+    -m size --nys-size size                 The number of example in the nystrom subsample.
+    -n --non-linear                         Tell Nystrom to use the non linear activation function on its output.
+    -l --subs-every number                  Tell the number of step between each pass of the subsample in convolution layers [default: 1]
+
+Datasets:
+    --cifar10                               Use cifar dataset
+    --mnist                                 Use mnist dataset
+    --svhn                                  Use svhn dataset
+    --cifar100                              Use cifar100 dataset
+
+Possible kernels:
+    -R --rbf-kernel                         Says if the rbf kernel should be used for nystrom.
+    -L --linear-kernel                      Says if the linear kernel should be used for nystrom.
+    -C --chi-square-kernel                  Says if the basic additive chi square kernel should be used for nystrom.
+    -E --exp-chi-square-kernel              Says if the exponential chi square kernel should be used for nystrom.
+    -P --chi-square-PD-kernel               Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom.
+    -S --sigmoid-kernel                     Says it the sigmoid kernel should be used for nystrom.
+    -A --laplacian-kernel                   Says if the laplacian kernel should be used for nystrom.
+    -T --stacked-kernel                     Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom.
+    -M --sumed-kernel                       Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom.
+
+Kernel related:
+    -g gammavalue --gamma gammavalue        The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet)
+    -c cvalue --intercept-constant cvalue   The value of the intercept constant for the hyperbolic tangent kernel.
+
+"""
+
+
+import skluc.main.data.mldatasets as dataset
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.keras.layers import Dense
+from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
+
+from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd
+from skluc.main.tensorflow_.models import build_lenet_model, build_vgg19_model
+from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter
+from skluc.main.tensorflow_.utils import batch_generator
+import time as t
+import docopt
+
+
+class ParameterManagerMain(ParameterManager):
+
+    def __init__(self, docopt_dict):
+        super().__init__(docopt_dict)
+
+        self["--out-dim"] = int(self["--out-dim"]) if eval(str(self["--out-dim"])) is not None else None
+        self["kernel"] = self.init_kernel()
+        self["activation_function"] = self.init_non_linearity()
+        self["dataset"] = self.init_dataset()
+        self["--nys-size"] = int(self["--nys-size"]) if self["--nys-size"] is not None else None
+        self["--num-epoch"] = int(self["--num-epoch"])
+        self["--validation-size"] = int(self["--validation-size"])
+        self["--seed"] = int(self["--seed"])
+        self["--batch-size"] = int(self["--batch-size"])
+        self["deepstrom_activation"] = self.init_deepstrom_activation()
+        self["--subs-every"] = int(self["--subs-every"])
+
+        self.__kernel_dict = None
+
+    def init_deepstrom_activation(self):
+        if not self["deepstrom"]:
+            return None
+
+        if self["--non-linear"]:
+            return self["--non-linearity"]
+        else:
+            return None
+
+    def init_kernel_dict(self, data):
+        if self["kernel"] == "rbf":
+            GAMMA = self.get_gamma_value(data)
+            self["--gamma"] = GAMMA
+            self.__kernel_dict = {"gamma": GAMMA}
+        elif self["kernel"] == "chi2_exp_cpd":
+            GAMMA = self.get_gamma_value(data, chi2=True)
+            self["--gamma"] = GAMMA
+            self.__kernel_dict = {"gamma": GAMMA}
+        elif self["kernel"] == "laplacian":
+            GAMMA = self.get_gamma_value(data)
+            self["--gamma"] = GAMMA
+            self.__kernel_dict = {"gamma": np.sqrt(GAMMA)}
+        else:
+            # GAMMA = self.get_gamma_value(data)
+            # self["--gamma"] = GAMMA
+            self.__kernel_dict = {}
+
+    def __getitem__(self, item):
+        if item == "kernel_dict":
+            return self.__kernel_dict
+        else:
+            return super().__getitem__(item)
+
+
+class ResultManagerMain(ResultManager):
+    def __init__(self):
+        super().__init__()
+        self["training_time"] = None
+        self["val_eval_time"] = None
+        self["val_acc"] = None
+        self["test_acc"] = None
+        self["test_eval_time"] = None
+
+def main(paraman, resman, printman):
+    if paraman["dataset"] == "mnist":
+        data = dataset.MnistDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
+        convmodel_func = build_lenet_model
+    elif paraman["dataset"] == "cifar10":
+        data = dataset.Cifar10Dataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
+        convmodel_func = build_vgg19_model
+    elif paraman["dataset"] == "cifar100":
+        data = dataset.Cifar100FineDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
+        convmodel_func = build_vgg19_model
+    elif paraman["dataset"] == "svhn":
+        data = dataset.SVHNDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
+        convmodel_func = build_vgg19_model
+    else:
+        raise ValueError("Unknown dataset")
+
+    data.load()
+    data.to_one_hot()
+    if not data.is_image():
+        data.to_image()
+    data.data_astype(np.float32)
+    data.labels_astype(np.float32)
+    data.normalize()
+
+    X_train, y_train = data.train.data, data.train.labels
+    X_test, y_test = data.test.data, data.test.labels
+    X_val, y_val = data.validation.data, data.validation.labels
+    paraman.init_kernel_dict(X_train)
+
+    datagen = ImageDataGenerator(
+        rotation_range=20,
+        width_shift_range=0.2,
+        height_shift_range=0.2,
+        horizontal_flip=True)
+    datagen.fit(X_train)
+
+
+    subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"])
+    nys_subsample = data.validation.data[subsample_indexes]
+
+    # # Model definition
+
+    input_dim = X_train.shape[1:]
+    output_dim = y_train.shape[1]
+
+    x = tf.placeholder(tf.float32, shape=[None, *input_dim], name="x")
+    tf.summary.image("input_images", x)
+    y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label")
+
+    convnet_model = convmodel_func(x.shape[1:])
+
+    repr_x = convnet_model(x)
+    tf.summary.histogram("convolved_examples", repr_x)
+
+    repr_sub = tf.placeholder(tf.float32, shape=[paraman["--nys-size"], *repr_x.shape[1:]], name="subsample_conv_input")
+
+    deepstrom_layer = DeepstromLayerEndToEnd(subsample_size=paraman["--nys-size"],
+                                             kernel_name=paraman["kernel"],
+                                             kernel_dict=paraman["kernel_dict"],
+                                             activation=paraman["deepstrom_activation"],
+                                             out_dim=paraman["--out-dim"])
+
+    input_classifier = deepstrom_layer([repr_x, repr_sub])
+
+    with tf.variable_scope("classification"):
+        classif_layer = Dense(output_dim)
+        classif = classif_layer(input_classifier)
+
+    # calcul de la loss
+    with tf.name_scope("xent"):
+        cross_entropy = tf.reduce_mean(
+            tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"),
+            name="xentropy_mean")
+        tf.summary.scalar('loss-xent', cross_entropy)
+
+    # calcul du gradient
+    with tf.name_scope("train"):
+        global_step = tf.Variable(0, name="global_step", trainable=False)
+        lr = 1e-4
+        train_optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cross_entropy,
+                                                                              global_step=global_step)
+
+    # calcul de l'accuracy
+    with tf.name_scope("accuracy"):
+        predictions = tf.argmax(classif, 1)
+
+        correct_prediction = tf.equal(predictions, tf.argmax(y, 1))
+
+        accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+        tf.summary.scalar("accuracy", accuracy_op)
+
+    merged_summary = tf.summary.merge_all()
+
+    # In[6]:
+
+    init = tf.global_variables_initializer()
+
+    summary_writer = None
+    if paraman["--tensorboard"]:
+        summary_writer = tf.summary.FileWriter(f"log/{int(t.time())}/{paraman['dataset']}/nys_size_{paraman['--nys-size']}/subs_every_{paraman['--subs-every']}")
+
+    # In[7]:
+
+    with tf.Session() as sess:
+        logger.info("Start training")
+        if paraman["--tensorboard"]:
+            summary_writer.add_graph(sess.graph)
+        # Initialize all Variable objects
+        sess.run(init)
+        # actual learning
+        global_start = t.time()
+        j = 0
+        for i in range(paraman["--num-epoch"]):
+            logger.debug(memory_usage())
+
+            for k, (X_batch, Y_batch) in enumerate(datagen.flow(X_train, y_train, batch_size=paraman["--batch-size"])):
+                if j % paraman["--subs-every"] == 0:
+                    feed_dict = {x: nys_subsample}
+                    computed_repr_sub, = sess.run([repr_x], feed_dict=feed_dict)
+                try:
+                    feed_dict = {x: X_batch, y: Y_batch, repr_sub: computed_repr_sub}
+                except NameError as e:
+                    logger.error("A representation of the subsample must have been computed at least once for it to be used in the deepstrom")
+                    raise e
+
+                _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict)
+                if j % 100 == 0:
+                    logger.info(
+                        "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"],
+                                                                                                k,int(data.train[0].shape[
+                                                                                                        0] / paraman["--batch-size"]) + 1,
+                                                                                                X_batch.shape, loss,
+                                                                                                acc))
+                    if paraman["--tensorboard"]:
+                        summary_writer.add_summary(summary_str, j)
+                j += 1
+                if k > int(data.train[0].shape[0] / paraman["--batch-size"]):
+                    break
+
+        logger.info("Evaluation on validation data")
+        training_time = t.time() - global_start
+        resman["training_time"] = training_time
+
+        accuracies_val = []
+        i = 0
+        val_eval_start = t.time()
+        for X_batch, Y_batch in batch_generator(X_val, y_val, 1000, False):
+            feed_dict = {x: X_batch, y: Y_batch, repr_sub: computed_repr_sub}
+            accuracy = sess.run([accuracy_op], feed_dict=feed_dict)
+            accuracies_val.append(accuracy[0])
+            i += 1
+        global_acc_val = sum(accuracies_val) / i
+
+        VAL_EVAL_TIME = t.time() - val_eval_start
+        resman["val_eval_time"] = VAL_EVAL_TIME
+        resman["val_acc"] = global_acc_val
+
+        logger.info("Evaluation on test data")
+        accuracies_test = []
+        i = 0
+        test_eval_start = t.time()
+        for X_batch, Y_batch in batch_generator(X_test, y_test, 1000, False):
+            feed_dict = {x: X_batch, y: Y_batch, repr_sub: computed_repr_sub}
+            accuracy = sess.run([accuracy_op], feed_dict=feed_dict)
+            accuracies_test.append(accuracy[0])
+            i += 1
+        global_acc_test = sum(accuracies_test) / i
+        TEST_EVAL_TIME = t.time() - test_eval_start
+        resman["test_acc"] = global_acc_test
+        resman["test_eval_time"] = TEST_EVAL_TIME
+        printman.print()
+
+
+if __name__ == "__main__":
+    tf.reset_default_graph()
+    paraman_obj = ParameterManagerMain(docopt.docopt(__doc__))
+    resman_obj = ResultManagerMain()
+    printman_obj = ResultPrinter(paraman_obj, resman_obj)
+
+    try:
+        main(paraman_obj, resman_obj, printman_obj)
+    except Exception as e:
+        printman_obj.print()
+        raise e
+