script experiment vgg19/cifar10 with chi2 kernel + pre transformation of cifar10

d02959cf · Luc Giffon · 0bc7471d · d02959cf · d02959cf
Commit d02959cf authored 6 years ago by Luc Giffon
--- a/main/experiments/benchmark_vgg.py
+++ b/main/experiments/benchmark_vgg.py
@@ -3,8 +3,8 @@ Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG netw

 Usage:
    benchmark_vgg dense [-e numepoch] [-s batchsize] [-D reprdim] [-l]
-    benchmark_vgg deepfriedconvnet [-e numepoch] [-s batchsize] [-S sigmavalue] [-N nbstack]
-    benchmark_vgg deepstrom [-e numepoch] [-s batchsize] [-D reprdim] [-m size] [-R|-L] [-g gammavalue]
+    benchmark_vgg deepfriedconvnet [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack]
+    benchmark_vgg deepstrom [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S) [-g gammavalue] [-c cvalue]

 Options:
    --help -h                               Display help and exit.
@@ -15,8 +15,12 @@ Options:
    -m size --nys-size size                 The number of example in the nystrom subsample.
    -R --rbf-kernel                         Says if the rbf kernel should be used for nystrom.
    -L --linear-kernel                      Says if the linear kernel should be used for nystrom.
-    -g gammavalue --gamma gammavalue        The value of gamma for rbf kernel (deepstrom)
-    -S sigmavalue --sigma sigmavalue        The value of sigma for rbf kernel (deepfriedconvnet)
+    -C --chi-square-kernel                  Says if the basic additive chi square kernel should be used for nystrom.
+    -E --exp-chi-square-kernel              Says if the exponential chi square kernel should be used for nystrom.
+    -P --chi-square-PD-kernel               Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom.
+    -S --sigmoid-kernel                     Says it the sigmoid kernel should be used for nystrom.
+    -c cvalue --intercept-constant cvalue   The value of the intercept constant for the hyperbolic tangent kernel.
+    -g gammavalue --gamma gammavalue        The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet)
    -N nbstack --nb-stack nbstack           The number of fastfood stack for deepfriedconvnet
    --time                                  Says if the training time should be computed.
    --test                                  Says if the accuracy performance on test set should be computed.
@@ -26,9 +30,11 @@ import numpy as np
 import tensorflow as tf
 import docopt
 import skluc.mldatasets as dataset
+from skluc.mldatasets import VGG19Cifar10Transformer
 from skluc.tensorflow_.kernel_approximation import nystrom_layer, fastfood_layer
-from skluc.tensorflow_.utils import fully_connected, batch_generator, classification_cifar, tf_rbf_kernel, \
-    tf_linear_kernel
+from skluc.tensorflow_.utils import fully_connected, batch_generator, classification_cifar
+from skluc.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \
+    tf_chi_square_PD, tf_sigmoid_kernel


 def fct_dense(input_, out_dim, two_layers):
@@ -60,8 +66,16 @@ if __name__ == '__main__':
    TWO_LAYERS_DENSE = arguments["--two-layers"]
    RBF_KERNEL = arguments["--rbf-kernel"]
    LINEAR_KERNEL = arguments["--linear-kernel"]
+    CHI2_KERNEL = arguments["--chi-square-kernel"]
+    CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"]
+    CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"]
+    SIGMOID_KERNEL = arguments["--sigmoid-kernel"]
+    NYS_SUBSAMPLE_SIZE = None
    KERNEL_NAME = None
    GAMMA = None
+    CONST = None
+    NB_STACK = None
+    kernel_dict = {}
    if arguments["dense"]:
        NETWORK = "dense"
    elif arguments["deepstrom"]:
@@ -72,25 +86,43 @@ if __name__ == '__main__':
            KERNEL_NAME = "rbf"
            GAMMA = float(arguments["--gamma"])
            kernel_dict = {"gamma": GAMMA}
-        else:
+        elif LINEAR_KERNEL:
            KERNEL = tf_linear_kernel
            KERNEL_NAME = "linear"
-            kernel_dict = {}
+        elif CHI2_KERNEL:
+            KERNEL = tf_chi_square_CPD
+            KERNEL_NAME = "chi2_cpd"
+        elif CHI2_EXP_KERNEL:
+            KERNEL = tf_chi_square_CPD_exp
+            KERNEL_NAME = "chi2_exp_cpd"
+            GAMMA = float(arguments["--gamma"])
+            kernel_dict = {"gamma": GAMMA}
+        elif CHI2_PD_KERNEL:
+            KERNEL = tf_chi_square_PD
+            KERNEL_NAME = "chi2_pd"
+        elif SIGMOID_KERNEL:
+            KERNEL = tf_sigmoid_kernel
+            KERNEL_NAME = "sigmoid"
+            GAMMA = float(arguments["--gamma"])
+            CONST = float(arguments["--intercept-constant"])
+            kernel_dict = {"gamma": GAMMA, "constant": CONST}
+        else:
+            raise Exception("No kernel function specified for deepstrom")
    elif arguments["deepfriedconvnet"]:
        NETWORK = "deepfriedconvnet"
        NB_STACK = int(arguments["--nb-stack"])
-        GAMMA = float(arguments["--sigma"])
+        GAMMA = float(arguments["--gamma"])
        SIGMA = 1 / GAMMA
    else:
        raise Exception("Not recognized network")

-    data = dataset.Cifar10Dataset()
+    data = dataset.Cifar10Dataset(validation_size=5000)

    data.load()
    data.normalize()
    data.data_astype(np.float32)
    data.labels_astype(np.float32)
-    data.apply_vgg19()
+    data.apply_transformer(VGG19Cifar10Transformer)
    data.to_one_hot()
    data.flatten()

@@ -156,14 +188,6 @@ if __name__ == '__main__':

                feed_dict = {x: X_batch, y: Y_batch, keep_prob: 0.5}
                _, loss = sess.run([train_optimizer, cross_entropy], feed_dict=feed_dict)
-                # if j % 100 == 0:
-                #     print('epoch {}/{}, batch {}/{}, loss {} (with dropout), {:.2f}s / batch'
-                #           .format(i+1, NUM_EPOCH, j+1, int(data.train[0].shape[0]/BATCH_SIZE), loss,
-                #                   (t.time() - start) / 100))
-                #     r_accuracy = sess.run([accuracy_op], feed_dict=feed_dict_val)
-                    # summary_str = sess.run(merged_summary, feed_dict=feed_dict)
-                    # summary_writer.add_summary(summary_str, (j + 1) * (i + 1))
-                    # start = t.time()
                j += 1

        training_time = t.time() - global_start
@@ -185,7 +209,10 @@ if __name__ == '__main__':
                      str(TWO_LAYERS_DENSE),
                      str(KERNEL_NAME),
                      str(GAMMA),
-                      str(NB_STACK)]
+                      str(CONST),
+                      str(NB_STACK),
+                      str(NYS_SUBSAMPLE_SIZE)
+                      ]
    print(",".join(printed_r_list))


--- a/main/experiments/draw_graphes_vgg_cifar.py
+++ b/main/experiments/draw_graphes_vgg_cifar.py
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+
+pd.set_option('display.width', 1000)
+
+DIRNAME = "/home/luc/Resultats/Deepstrom/CIFAR10/vgg19/big_grid_vgg_cifar100_main"
+FILENAME = "gathered_results.csv"
+
+GAMMA_BEST = 0.1
+min_acc = 0.55
+max_acc = 0.9
+linewidth = 0.9
+output_conv_dim = 512
+kernel_marker = {
+    "chi2_cpd": "x",
+    "linear": "o",
+    "chi2_pd": "v",
+    "rbf": "d",
+    "chi2_exp_cpd": "h"
+}
+kernel_color = {
+    "chi2_cpd": "b",
+    "linear": "g",
+    "chi2_pd": "r",
+    "rbf": "c",
+    "chi2_exp_cpd": "m"
+}
+
+dense_marker = {
+    "True": "+",
+    "False": "*"
+}
+
+dense_color = {
+    "True": "y",
+    "False": "k"
+}
+
+deepfried_marker = {
+    "1": "8",
+    "2": "s"
+}
+
+deepfried_color = {
+    "1": "#F289E7",
+    "2": "#A2EF3E"
+}
+
+if __name__ == '__main__':
+    filepath = os.path.join(DIRNAME, FILENAME)
+    field_names = ["method_name",
+                   "accuracy",
+                   "runtime",
+                   "number_epoch",
+                   "batch_size",
+                   "repr_dim",
+                   "two_layers_dense",
+                   "kernel_deepstrom",
+                   "gamma_kernel",
+                   "constante_sigmoid",
+                   "nb_layer_deepfried",
+                   "subsample_size"
+                   ]
+
+    df = pd.read_csv(filepath, names=field_names)
+    truth = (df["gamma_kernel"] == str(GAMMA_BEST)) | (df["gamma_kernel"] == str(None))
+    df = df.apply(pd.to_numeric, errors="ignore")
+    df = df[truth]
+    df = df[(df["kernel_deepstrom"]) != "sigmoid"]
+    # df[df["repr_dim"] != str(None)]["repr_dim"] = df[df["repr_dim"] != str(None)]["repr_dim"].astype(np.int)
+    # df[df["subsample_size"] != str(None)]["subsample_size"] = df[df["subsample_size"] != str(None)]["subsample_size"].astype(np.int)
+    batche_sizes = set(df["batch_size"].values)
+    nys_sizes = set(df["subsample_size"].values)
+    nys_sizes.remove("None")
+    # nys_sizes = [int(r) for r in nys_sizes]
+    method_names = set(df["method_name"].values)
+    kernel_names = set(df["kernel_deepstrom"].values)
+    kernel_names.remove("None")
+    repr_dim = set(df["repr_dim"].values)
+    repr_dim.remove("None")
+    # repr_dim = [int(r) for r in repr_dim]
+    print(kernel_names)
+    print(method_names)
+    print(nys_sizes)
+    print(repr_dim)
+    for b_size in batche_sizes:
+        # acc/nb subsample
+        df_batch = df[df["batch_size"] == b_size]
+        for r_dim in repr_dim:
+            df_batch_repr = df_batch[(df_batch["repr_dim"] == r_dim) | (df_batch["repr_dim"] == str(None))]
+            df_deepstrom = df_batch_repr[df_batch_repr["method_name"] == "deepstrom"]
+            df_deepstrom["subsample_size"] = df_deepstrom["subsample_size"].astype(np.int)
+            df_deepstrom_sort = df_deepstrom.sort_values(by=["subsample_size"])
+            for k_name in kernel_names:
+                df_deepstrom_kernel = df_deepstrom[df_deepstrom["kernel_deepstrom"] == k_name]
+                plt.scatter(list(df_deepstrom_kernel["subsample_size"]), list(df_deepstrom_kernel["accuracy"]),
+                            marker=kernel_marker[k_name], color=kernel_color[k_name], label=k_name)
+
+            df_dense = df_batch_repr[df_batch_repr["method_name"] == "dense"]
+            acc_val_dense_1 = float(df_dense[df_dense["two_layers_dense"] == False]["accuracy"])
+            acc_val_dense_2 = float(df_dense[df_dense["two_layers_dense"] == True]["accuracy"])
+            plt.plot(sorted([int(n) for n in nys_sizes]), [acc_val_dense_1 for _ in nys_sizes], color=dense_color["False"], linewidth=linewidth, label="1 layer")
+            plt.plot(sorted([int(n) for n in nys_sizes]), [acc_val_dense_2 for _ in nys_sizes], color=dense_color["True"], linewidth=linewidth, label="2 layer")
+
+            df_deefired = df_batch_repr[df_batch_repr["method_name"] == "deepfriedconvnet"]
+            acc_val_deepfried_1 = float(df_deefired[df_deefired["nb_layer_deepfried"] == "1"]["accuracy"])
+            acc_val_deepfried_2 = float(df_deefired[df_deefired["nb_layer_deepfried"] == "2"]["accuracy"])
+            plt.plot(sorted([int(n) for n in nys_sizes]), [acc_val_deepfried_1 for _ in nys_sizes],
+                     color=deepfried_color["1"], linewidth=linewidth, label="1 stack")
+            plt.plot(sorted([int(n) for n in nys_sizes]), [acc_val_deepfried_2 for _ in nys_sizes],
+                     color=deepfried_color["2"], linewidth=linewidth, label="2 stack")
+            plt.title("Accuracy by number of subsample for \n batch size = {} and representation dim = {}".format(b_size, r_dim))
+            plt.ylabel("Accuracy")
+            plt.xlabel("log(Subsample size)")
+            plt.xscale("log")
+            plt.legend()
+            plt.ylim(min_acc, max_acc)
+            plt.xticks(sorted([int(n) for n in nys_sizes]))
+            plt.show()
+
+        # acc/nbparam
+        # acc/trainingtime
+        df_deepstrom = df_batch[df_batch["method_name"] == "deepstrom"]
+        for k_name in kernel_names:
+            df_deepstrom_kernel = df_deepstrom[df_deepstrom["kernel_deepstrom"] == k_name]
+            accuracies = []
+            nb_param = []
+            for r_dim in repr_dim:
+                df_deepstrom_rdim = df_deepstrom_kernel[df_deepstrom_kernel["repr_dim"] == r_dim]
+                print(df_deepstrom_rdim)
+                df_deepstrom_rdim["repr_dim"] = df_deepstrom_rdim["repr_dim"].astype(np.int)
+                df_deepstrom_rdim["subsample_size"] = df_deepstrom_rdim["subsample_size"].astype(np.int)
+                for n_size in nys_sizes:
+                    print(n_size)
+                    nb_param.append(int(r_dim) * int(n_size))
+                    accuracies.append(float(df_deepstrom_rdim[df_deepstrom_rdim["subsample_size"] == int(n_size)]["accuracy"]))
+            plt.scatter(nb_param, accuracies,
+                        marker=kernel_marker[k_name], color=kernel_color[k_name], label=k_name)
+
+            df_dense = df_batch[df_batch["method_name"] == "dense"]
+            df_dense["repr_dim"] = df_dense["repr_dim"].astype(np.int)
+            df_dense_1 = df_dense[df_dense["two_layers_dense"] == False]
+            df_dense_2 = df_dense[df_dense["two_layers_dense"] == True]
+            accuracies_1 = []
+            nb_param_1 = []
+            accuracies_2 = []
+            nb_param_2 = []
+            for r_dim in repr_dim:
+                accuracies_1.append(float(df_dense_1[df_dense_1["repr_dim"] == int(r_dim)]["accuracy"]))
+                nb_param_1.append(int(r_dim) * output_conv_dim)
+                accuracies_2.append(float(df_dense_2[df_dense_2["repr_dim"] == int(r_dim)]["accuracy"]))
+                nb_param_2.append(int(r_dim) * int(r_dim) + int(r_dim) * output_conv_dim)
+            plt.scatter(nb_param_1, accuracies_1, color=dense_color["False"],
+                     linewidth=linewidth, label="1 layer")
+            plt.scatter(nb_param_2, accuracies_2, color=dense_color["True"],
+                     linewidth=linewidth, label="2 layer")
+
+            df_deefired = df_batch[df_batch["method_name"] == "deepfriedconvnet"]
+            acc_val_deepfried_1 = float(df_deefired[df_deefired["nb_layer_deepfried"] == "1"]["accuracy"])
+            acc_val_deepfried_2 = float(df_deefired[df_deefired["nb_layer_deepfried"] == "2"]["accuracy"])
+            plt.scatter(output_conv_dim * 3, acc_val_deepfried_1,
+                     color=deepfried_color["1"], linewidth=linewidth, label="1 stack")
+            plt.scatter(output_conv_dim * 3 * 2, acc_val_deepfried_2,
+                     color=deepfried_color["2"], linewidth=linewidth, label="2 stack")
+
+            plt.title("Accuracy by number of parameters for batch size = {}".format(b_size))
+            plt.ylabel("Accuracy")
+            plt.xlabel("log(Nb_param)")
+            plt.xscale("log")
+            plt.legend()
+            plt.ylim(min_acc, max_acc)
+            # plt.xticks(sorted([int(n) for n in nys_sizes]))
+            plt.show()
+
+        # print(df_deepstrom)
\ No newline at end of file