diff --git a/main/experiments/benchmark_classification.py b/main/experiments/benchmark_classification.py index e41a45b8741a94e2b22129e460b05fab179975b6..e6c9271fa2273889375160694fd34d2c19685238 100644 --- a/main/experiments/benchmark_classification.py +++ b/main/experiments/benchmark_classification.py @@ -59,22 +59,22 @@ Kernel related: """ import logging import sys -import time as t import daiquiri +import docopt import numpy as np import tensorflow as tf -import docopt +import time as t from tensorflow.python.keras.layers import Dense import skluc.main.data.mldatasets as dataset -from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer from skluc.main.data.transformation.LeCunTransformer import LecunTransformer -from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayer -from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer -from skluc.main.tensorflow_.utils import batch_generator +from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayer +from skluc.main.tensorflow_.utils import batch_generator from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage @@ -113,7 +113,6 @@ def print_result(global_acc_val=None, global_acc_test=None, training_time=None, raise error - def get_gamma_value(arguments, dat, chi2=False): if arguments["--gamma"] is None: logger.debug("Gamma arguments is None. Need to compute it.") diff --git a/main/experiments/graph_drawing/till_october_2018/transfert_few_data/vgg_svhn_from_cifar100_deepstrom_few_data.py b/main/experiments/graph_drawing/till_october_2018/transfert_few_data/vgg_svhn_from_cifar100_deepstrom_few_data.py index 978978b94b39a2c7bdeb2509350692fed240b60e..0fbc414990a4b2bf406e15e57fd0eff84c0f14af 100644 --- a/main/experiments/graph_drawing/till_october_2018/transfert_few_data/vgg_svhn_from_cifar100_deepstrom_few_data.py +++ b/main/experiments/graph_drawing/till_october_2018/transfert_few_data/vgg_svhn_from_cifar100_deepstrom_few_data.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd import pathlib + from skluc.main.utils import logger matplotlib.rcParams.update({'font.size': 14}) @@ -25,7 +26,7 @@ min_acc = 0.00 max_acc = 1.05 # max_acc = 1.0 linewidth = 0.9 -output_conv_dim = 512 +output_conv_dims = {'block3_pool': 256 * 16, 'block5_conv4': 512 * 4, 'block5_pool': 512} nb_classes = 10 real_nys_marker = "s" @@ -129,8 +130,8 @@ if __name__ == '__main__': np_deepstrom_kernel_w_std_accuracy_test = np.std(all_accs_w, axis=0) np_param_nbr_deepstrom_kernel_w = ( np.square(np.array(sorted(set(df_deepstrom_kernel_w["subsample_size"])))) + # m x m - np.array( - sorted(set(df_deepstrom_kernel_w["subsample_size"]))) * output_conv_dim + # m x d + # np.array( + # sorted(set(df_deepstrom_kernel_w["subsample_size"]))) * output_conv_dims[cut_layer] + # m x d np.array( sorted(list(set(df_deepstrom_kernel_w["subsample_size"])))) * nb_classes) # m x c @@ -153,8 +154,8 @@ if __name__ == '__main__': np_param_nbr_deepstrom_kernel_k = ( np.square(np.array(sorted(set(df_deepstrom_kernel_k["subsample_size"])))) + # m x m - np.array(sorted( - set(df_deepstrom_kernel_k["subsample_size"]))) * output_conv_dim + # m x d + # np.array(sorted( + # set(df_deepstrom_kernel_k["subsample_size"]))) * output_conv_dims[cut_layer] + # m x d np.array(sorted( list(set(df_deepstrom_kernel_k["subsample_size"])))) * nb_classes) # m x c @@ -178,7 +179,7 @@ if __name__ == '__main__': np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in seed_values]), axis=0) ax.errorbar( - np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * output_conv_dim + + np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * output_conv_dims[cut_layer] + np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * nb_classes, np_dense_mean_accuracy_test, np_dense_std_accuracy_test, @@ -197,7 +198,7 @@ if __name__ == '__main__': np_deepfried_mean_accuracy_test.append(np.mean(df_deepfried_stack["accuracy_test"])) np_deepfried_std_accuracy_test.append(np.std(df_deepfried_stack["accuracy_test"])) - nb_param_vals = [(output_conv_dim * 3 + output_conv_dim * nb_classes) * int(i) for i in sorted(set(df_deepfried["nb_layer_deepfried"].values))] + nb_param_vals = [(output_conv_dims[cut_layer] * 3 + output_conv_dims[cut_layer] * nb_classes) * int(i) for i in sorted(set(df_deepfried["nb_layer_deepfried"].values))] ax.errorbar(nb_param_vals, np_deepfried_mean_accuracy_test, np_deepfried_std_accuracy_test, @@ -211,7 +212,7 @@ if __name__ == '__main__': ax.set_xticks([1e4, 1e5, 1e6]) # if i == 2: # ax.set_xlabel("# Parameters") - ax.set_xlabel("# Parameters") + ax.set_xlabel("# Learnable Parameters") ax.legend(bbox_to_anchor=(0.5, -0.20), loc="upper center", ncol=2) ax.set_xticklabels([1e4, 1e5, 1e6]) # else: diff --git a/main/experiments/graph_drawing/till_october_2018/transfert_few_data_batchnorm/vgg_deepstrom_few_data_batchnorm.py b/main/experiments/graph_drawing/till_october_2018/transfert_few_data_batchnorm/vgg_deepstrom_few_data_batchnorm.py index fba390c65d174a48f00143f9e6b568b65eeede9e..eac3d46e2f5bb48a450b9ac117d7421fe3d1745f 100644 --- a/main/experiments/graph_drawing/till_october_2018/transfert_few_data_batchnorm/vgg_deepstrom_few_data_batchnorm.py +++ b/main/experiments/graph_drawing/till_october_2018/transfert_few_data_batchnorm/vgg_deepstrom_few_data_batchnorm.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd import pathlib + from skluc.main.utils import logger matplotlib.rcParams.update({'font.size': 14}) @@ -25,7 +26,7 @@ min_acc = 0.00 max_acc = 1.05 # max_acc = 1.0 linewidth = 0.9 -output_conv_dim = 512 +output_conv_dims = {'block3_pool': 256 * 16, 'block5_conv4': 512 * 4, 'block5_pool': 512} nb_classes = 10 real_nys_marker = "s" @@ -130,8 +131,8 @@ if __name__ == '__main__': np_deepstrom_kernel_w_std_accuracy_test = np.std(all_accs_w, axis=0) np_param_nbr_deepstrom_kernel_w = ( np.square(np.array(sorted(set(df_deepstrom_kernel_w["subsample_size"])))) + # m x m - np.array( - sorted(set(df_deepstrom_kernel_w["subsample_size"]))) * output_conv_dim + # m x d + # np.array( + # sorted(set(df_deepstrom_kernel_w["subsample_size"]))) * output_conv_dims[cut_layer] + # m x d np.array( sorted(list(set(df_deepstrom_kernel_w["subsample_size"])))) * nb_classes) # m x c @@ -154,8 +155,8 @@ if __name__ == '__main__': np_param_nbr_deepstrom_kernel_k = ( np.square(np.array(sorted(set(df_deepstrom_kernel_k["subsample_size"])))) + # m x m - np.array(sorted( - set(df_deepstrom_kernel_k["subsample_size"]))) * output_conv_dim + # m x d + # np.array(sorted( + # set(df_deepstrom_kernel_k["subsample_size"]))) * output_conv_dims[cut_layer] + # m x d np.array(sorted( list(set(df_deepstrom_kernel_k["subsample_size"])))) * nb_classes) # m x c @@ -179,7 +180,7 @@ if __name__ == '__main__': np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in seed_values]), axis=0) ax.errorbar( - np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * output_conv_dim + + np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * output_conv_dims[cut_layer] + np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * nb_classes, np_dense_mean_accuracy_test, np_dense_std_accuracy_test, @@ -198,7 +199,7 @@ if __name__ == '__main__': np_deepfried_mean_accuracy_test.append(np.mean(df_deepfried_stack["accuracy_test"])) np_deepfried_std_accuracy_test.append(np.std(df_deepfried_stack["accuracy_test"])) - nb_param_vals = [(output_conv_dim * 3 + output_conv_dim * nb_classes) * int(i) for i in sorted(set(df_deepfried["nb_layer_deepfried"].values))] + nb_param_vals = [(output_conv_dims[cut_layer] * 3 + output_conv_dims[cut_layer] * nb_classes) * int(i) for i in sorted(set(df_deepfried["nb_layer_deepfried"].values))] ax.errorbar(nb_param_vals, np_deepfried_mean_accuracy_test, np_deepfried_std_accuracy_test, @@ -212,7 +213,7 @@ if __name__ == '__main__': ax.set_xticks([1e4, 1e5, 1e6]) # if i == 2: # ax.set_xlabel("# Parameters") - ax.set_xlabel("# Parameters") + ax.set_xlabel("# Learnable Parameters") ax.legend(bbox_to_anchor=(0.5, -0.20), loc="upper center", ncol=2) ax.set_xticklabels([1e4, 1e5, 1e6]) # else: diff --git a/main/experiments/graph_drawing/till_october_2018/transfert_few_data_cifar100_from_cifar10/vgg_deepstrom_few_data_cifar100_from_cifar10.py b/main/experiments/graph_drawing/till_october_2018/transfert_few_data_cifar100_from_cifar10/vgg_deepstrom_few_data_cifar100_from_cifar10.py index 3ca6a3592a699ad5824b15fc5536fa28f3f9da71..7b307345ece3147d0d61849c82b69f609d063e30 100644 --- a/main/experiments/graph_drawing/till_october_2018/transfert_few_data_cifar100_from_cifar10/vgg_deepstrom_few_data_cifar100_from_cifar10.py +++ b/main/experiments/graph_drawing/till_october_2018/transfert_few_data_cifar100_from_cifar10/vgg_deepstrom_few_data_cifar100_from_cifar10.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd import pathlib + from skluc.main.utils import logger matplotlib.rcParams.update({'font.size': 14}) @@ -17,7 +18,7 @@ pd.set_option('display.expand_frame_repr', False) DATANAME = "CIFAR100" -DIRNAME = "/home/luc/Resultats/Deepstrom/october_2018/transfert_few_data" +DIRNAME = "/home/luc/Resultats/Deepstrom/october_2018/transfert_few_data_cifar100_from_cifar10" FILENAME = "gathered_results.csv" diff --git a/main/experiments/graph_drawing/till_october_2018/transfert_few_data_cifar10_from_cifar100/vgg_deepstrom_few_data_cifar10_from_cifar100.py b/main/experiments/graph_drawing/till_october_2018/transfert_few_data_cifar10_from_cifar100/vgg_deepstrom_few_data_cifar10_from_cifar100.py new file mode 100644 index 0000000000000000000000000000000000000000..a751e4a69298de40b1b783c8d7c9e727725db8d1 --- /dev/null +++ b/main/experiments/graph_drawing/till_october_2018/transfert_few_data_cifar10_from_cifar100/vgg_deepstrom_few_data_cifar10_from_cifar100.py @@ -0,0 +1,259 @@ +import os + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import pathlib + +from skluc.main.utils import logger + +matplotlib.rcParams.update({'font.size': 14}) + +# pd.set_option('display.width', 1000) +pd.set_option('display.expand_frame_repr', False) + +# DAT = ["SVHN"] +# DIR = ["/home/luc/Resultats/Deepstrom/october_2018/transfert_few_data_cifar100_from_cifar10"] + + +DATANAME = "SVHN" +DIRNAME = "/home/luc/Resultats/Deepstrom/october_2018/transfert_few_data_cifar10_from_cifar100" + +FILENAME = "gathered_results.csv" + +min_acc = 0.00 +max_acc = 1.05 +# max_acc = 1.0 +linewidth = 0.9 +output_conv_dims = {'block3_pool': 256 * 16, 'block5_conv4': 512 * 4, 'block5_pool': 512} +nb_classes = 10 + +real_nys_marker = "s" + +learned_nys_marker = "x" + +linearity_color = "g" + +dense_marker = "v" +dense_color = "r" + +deepfried_marker = "8" +deepfried_color = "b" + +d_translate_kernel = { + "linear": "Linear", + "chi2_cpd": "Chi2", + "rbf": "Gaussian" +} + +if __name__ == '__main__': + filepath = os.path.join(DIRNAME, FILENAME) + field_names = ["method_name", + "accuracy_val", + "accuracy_test", + "runtime_train", + "runtime_val", + "runtime_test", + "number_epoch", + "batch_size", + "repr_dim", + "second_layer_size", + "kernel_deepstrom", + "gamma_kernel", + "constante_sigmoid", + "nb_layer_deepfried", + "subsample_size", + "validation_size", + "seed", + "act", + "non_linearity", + "real_nystrom", + "repr_quality", + "train_size", + "dropout", + "dataset", + "real_deepfried", + "weights" + ] + + df = pd.read_csv(filepath, names=field_names) + df = df[df["accuracy_val"] != 'None'] + df = df.apply(pd.to_numeric, errors="ignore") + df = df.drop_duplicates() + method_names = set(df["method_name"].values) + kernel_names = set(df["kernel_deepstrom"].values) + kernel_names.remove("None") + # kernel_names.remove("laplacian") + repr_dim = set(df["repr_dim"].values) + repr_dim.remove("None") # dtype: str + # repr_dim.remove("16") + nys_size = set(df["subsample_size"].values) + nys_size.remove("None") + nb_layers_deepfried = set(df["nb_layer_deepfried"].values) + nb_layers_deepfried.remove("None") + seed_values = set(df["seed"].values) + batch_size = 128 + train_sizes = set(df["train_size"]) + + cut_layers = set(df["repr_quality"].values) + + weights = set(df["weights"].values) + + logger.debug("Nystrom possible sizes are: {}".format(nys_size)) + logger.debug("Kernel functions are: {}".format(kernel_names)) + logger.debug("Compared network types are: {}".format(method_names)) + logger.debug("Tested representation dimension are: {}".format(repr_dim)) + logger.debug(f"Tested cut layers: {cut_layers}") + + means_deepstrom = {} + + for weight in weights: + df_weight = df[df["weights"] == weight] + for t_size in sorted(list(train_sizes)): + df_tsize = df_weight[df_weight["train_size"] == t_size] + + for cut_layer in cut_layers: + df_cut_layer = df_tsize[df_tsize["repr_quality"] == cut_layer] + + # plot deepstrom + # ============== + df_deepstrom = df_cut_layer[df_cut_layer["method_name"] == "deepstrom"] + df_deepstrom["subsample_size"] = df_deepstrom["subsample_size"].astype(np.int) + df_deepstrom_sort = df_deepstrom.sort_values(by=["subsample_size"]) + for k_name in sorted(kernel_names): + df_deepstrom_kernel = df_deepstrom_sort[df_deepstrom_sort["kernel_deepstrom"] == k_name] + + f, ax = plt.subplots() + + # get the results of learned nystrom + df_deepstrom_kernel_w = df_deepstrom_kernel[df_deepstrom_kernel["real_nystrom"] == False] + all_accs_w = np.array([ + list(df_deepstrom_kernel_w[df_deepstrom_kernel_w["seed"] == seed_v]["accuracy_test"]) for + seed_v in seed_values + ]) + np_deepstrom_kernel_w_mean_accuracy_test = np.mean(all_accs_w, axis=0) + np_deepstrom_kernel_w_std_accuracy_test = np.std(all_accs_w, axis=0) + np_param_nbr_deepstrom_kernel_w = ( + np.square(np.array(sorted(set(df_deepstrom_kernel_w["subsample_size"])))) + # m x m + # np.array( + # sorted(set(df_deepstrom_kernel_w["subsample_size"]))) * output_conv_dims[cut_layer] + # m x d + np.array( + sorted(list(set(df_deepstrom_kernel_w["subsample_size"])))) * nb_classes) # m x c + + ax.errorbar(np_param_nbr_deepstrom_kernel_w, + np_deepstrom_kernel_w_mean_accuracy_test, + np_deepstrom_kernel_w_std_accuracy_test, + marker=learned_nys_marker, color=linearity_color, + label="Adaptative Deepström", + capsize=3) + + # get the results of vanilla nystrom + df_deepstrom_kernel_k = df_deepstrom_kernel[df_deepstrom_kernel["real_nystrom"]] + if len(df_deepstrom_kernel_k): + all_accs_k = np.array([ + list(df_deepstrom_kernel_k[df_deepstrom_kernel_k["seed"] == seed_v]["accuracy_test"]) for + seed_v in seed_values + ]) + np_deepstrom_kernel_k_mean_accuracy_test = np.mean(all_accs_k, axis=0) + np_deepstrom_kernel_k_std_accuracy_test = np.std(all_accs_k, axis=0) + + np_param_nbr_deepstrom_kernel_k = ( + np.square(np.array(sorted(set(df_deepstrom_kernel_k["subsample_size"])))) + # m x m + # np.array(sorted( + # set(df_deepstrom_kernel_k["subsample_size"]))) * output_conv_dims[cut_layer] + # m x d + np.array(sorted( + list(set(df_deepstrom_kernel_k["subsample_size"])))) * nb_classes) # m x c + + ax.errorbar(np_param_nbr_deepstrom_kernel_k, + np_deepstrom_kernel_k_mean_accuracy_test, + np_deepstrom_kernel_k_std_accuracy_test, + marker=real_nys_marker, color=linearity_color, + label="Deepström", + capsize=3) + + # plot dense + # ========== + df_dense = df_cut_layer[df_cut_layer["method_name"] == "dense"] + df_dense = df_dense[df_dense["train_size"] == t_size] + df_dense["repr_dim"] = df_dense["repr_dim"].astype(np.int) + df_dense = df_dense.sort_values(by=["repr_dim"]) + np_dense_mean_accuracy_test = np.mean( + np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in + seed_values]), axis=0) + np_dense_std_accuracy_test = np.std( + np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in + seed_values]), axis=0) + ax.errorbar( + np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * output_conv_dims[cut_layer] + + np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * nb_classes, + np_dense_mean_accuracy_test, + np_dense_std_accuracy_test, + color=dense_color, + marker=dense_marker, + label="Fully Connected", capsize=3) + + # # plot deepfried + # # ============== + df_deepfried = df_cut_layer[df_cut_layer["method_name"] == "deepfriedconvnet"] + np_deepfried_mean_accuracy_test = [] + np_deepfried_std_accuracy_test = [] + for l_nb in sorted(nb_layers_deepfried): + df_deepfried_stack = df_deepfried[df_deepfried["nb_layer_deepfried"] == l_nb] + if len(df_deepfried_stack): + np_deepfried_mean_accuracy_test.append(np.mean(df_deepfried_stack["accuracy_test"])) + np_deepfried_std_accuracy_test.append(np.std(df_deepfried_stack["accuracy_test"])) + + nb_param_vals = [(output_conv_dims[cut_layer] * 3 + output_conv_dims[cut_layer] * nb_classes) * int(i) for i in sorted(set(df_deepfried["nb_layer_deepfried"].values))] + ax.errorbar(nb_param_vals, + np_deepfried_mean_accuracy_test, + np_deepfried_std_accuracy_test, + color=deepfried_color, + marker=deepfried_marker, + label="Adaptative DeepFriedConvnet", capsize=3) + + ax.set_ylim(min_acc, max_acc) + ax.set_ylabel("Accuracy") + ax.set_xticks([1e4, 1e5, 1e6]) + # if i == 2: + # ax.set_xlabel("# Parameters") + ax.set_xlabel("# Learnable Parameters") + ax.legend(bbox_to_anchor=(0.5, -0.20), loc="upper center", ncol=2) + ax.set_xticklabels([1e4, 1e5, 1e6]) + # else: + # ax.set_xticklabels([]) + ax.set_xscale("symlog") + + ax_twin = ax.twiny() + ax_twin.set_xscale("symlog") + ax_twin.set_xlim(ax.get_xlim()) + ax_twin.set_xticks(np_param_nbr_deepstrom_kernel_w) + + # if i == 0: + ax_twin.set_xlabel("Subsample Size") + ax.set_title( + "{} Kernel - {} - Train size: {} - weights: {}".format(d_translate_kernel[k_name], DATANAME, t_size, weight), + y=1.2) + ax_twin.set_xticklabels(sorted(set(df_deepstrom_kernel_w["subsample_size"]))) + # else: + # ax.set_title("Noyau {} - {} - Train size: {}".format(d_translate_kernel[k_name], DATANAME, t_size)) + # ax_twin.set_xticklabels([]) + + f.set_size_inches(8, 6) + f.tight_layout() + f.subplots_adjust(bottom=0.3) + # f.show() + # exit() + # learnable: change legend + # ODIR = [ + # "/home/luc/PycharmProjects/deepFriedConvnets/main/experiments/graph_drawing/paper/svhn/few_data/parameters/dropout_{}".format( + # str(drop_val).replace(".", "-"))] + # out_dir_path = ODIR[h] + + out_name = "acc_param_tsize_{}_{}_{}".format(t_size, cut_layer, k_name) + + base_out_dir = os.path.join(os.path.abspath(__file__.split(".")[0]), "images", f"{weight}", f"{cut_layer}") + pathlib.Path(base_out_dir).mkdir(parents=True, exist_ok=True) + out_path = os.path.join(base_out_dir, out_name) + logger.debug(out_path) + f.savefig(out_path) diff --git a/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_big_subsample_chi2.yml b/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_big_subsample_chi2.yml new file mode 100644 index 0000000000000000000000000000000000000000..3621fb5707a156c49ca06a1459f653a14aafc51e --- /dev/null +++ b/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_big_subsample_chi2.yml @@ -0,0 +1,21 @@ +all: + deepstrom: + +base: + epoch_numbers: {"-e": [100]} + batch_sizes: {"-s": [64]} + val_size: {"-v": [10000]} + seed: {"-a": "range(10)"} + quiet: ["-q"] + data_size: {"-t":[20, 50, 100, 200, 500, 1000, 2000]} + dataset: ["--svhn", "--cifar10"] + weights: {"-W": ["cifar100"]} + cut_layer: {"-B": ["block3_pool"]} + +deepstrom: + network: ["deepstrom"] + base: + real_nys: ["-r", ""] + nys_size: {"-m": [256, 512, 1024]} + kernel: ["-C"] + diff --git a/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_cifar10_from_cifar100_and_cifar10.yml b/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_cifar10_from_cifar100_and_cifar10.yml new file mode 100644 index 0000000000000000000000000000000000000000..d97c21e185e6eed9cab934f31abc54f15993ffc8 --- /dev/null +++ b/main/experiments/parameter_files/october_2018/lazyfile_transfert_few_data_cifar10_from_cifar100_and_cifar10.yml @@ -0,0 +1,33 @@ +all: + dense: + deepfried: + deepstrom: + +base: + epoch_numbers: {"-e": [100]} + batch_sizes: {"-s": [64]} + val_size: {"-v": [10000]} + seed: {"-a": "range(5)"} + quiet: ["-q"] + data_size: {"-t":[50, 250, 500, 1000, 2000]} + dataset: ["--cifar10"] + weights: {"-W": ["cifar100", "cifar10"]} + cut_layer: {"-B": ["block3_pool", "block5_conv4", "block5_pool"]} + +dense: + network: ["dense"] + base: + repr_dim: {"-D": [16, 64, 128, 1024]} + +deepfried: + network: ["deepfriedconvnet"] + base: + nbstacks: {"-N": [1, 3, 5, 7]} + +deepstrom: + network: ["deepstrom"] + base: + real_nys: ["-r", ""] + nys_size: {"-m": [16, 64, 128, 256, 512, 1024]} + kernel: ["-C", "-L", "-R"] + diff --git a/main/experiments/scripts/until_october_2018/transfert_few_data_big_subsample_chi2/__init__.py b/main/experiments/scripts/until_october_2018/transfert_few_data_big_subsample_chi2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/scripts/until_october_2018/transfert_few_data_big_subsample_chi2/benchmark_classification.py b/main/experiments/scripts/until_october_2018/transfert_few_data_big_subsample_chi2/benchmark_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..27efb7a97d65d2d545fd74d2de2ef2a97c78ecdf --- /dev/null +++ b/main/experiments/scripts/until_october_2018/transfert_few_data_big_subsample_chi2/benchmark_classification.py @@ -0,0 +1,467 @@ +""" +Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. + +Usage: + benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-W name] [-V] [-b] + benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-W name] [-V] [-b] + benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-W name] [-V] [-b] + +Options: + --help -h Display help and exit. + -q --quiet Set logging level to info. + -V --tensorboard Write tensorboard logs. + -a value --seed value The seed value used for all randomization processed [default: 0] + -t --train-size size Size of train set. + -v size --validation-size size The size of the validation set [default: 10000] + -e numepoch --num-epoch=numepoch The number of epoch. + -s batchsize --batch-size=batchsize The number of example in each batch + -d --dropout val Keep probability of neurons before classif [default: 1.0] + -b --batchnorm Apply batch normalization before softmax layer + -D reprdim --out-dim=reprdim The dimension of the final representation + -f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu] + +Dense: + -l --second-layer-size size Says the size of the second non-linear layer [default: 0] + +Deepfried convnet: + -N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet + -z --real-fastfood Tell fastfood layer to not update its weights + +Deepstrom: + -r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) + -m size --nys-size size The number of example in the nystrom subsample. + -n --non-linear Tell Nystrom to use the non linear activation function on its output. + +Datasets: + --cifar10 Use cifar dataset + --mnist Use mnist dataset + --svhn Use svhn dataset + --cifar100 Use cifar100 dataset + +Dataset related: + -B --cut-layer name The name of the last convolutional layer when loading VVG19Transformer. + -W --weights name The name of the dataset used for weights. + +Possible kernels: + -R --rbf-kernel Says if the rbf kernel should be used for nystrom. + -L --linear-kernel Says if the linear kernel should be used for nystrom. + -C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom. + -E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom. + -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. + -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. + -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. + -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. + -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. + +Kernel related: + -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) + -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. + +""" +import logging +import sys + +import daiquiri +import docopt +import numpy as np +import tensorflow as tf +import time as t +from tensorflow.python.keras.layers import Dense, BatchNormalization + +import skluc.main.data.mldatasets as dataset +from skluc.main.data.transformation.LeCunTransformer import LecunTransformer +from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ + tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayer +from skluc.main.tensorflow_.utils import batch_generator +from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage + +run_opts = tf.RunOptions(report_tensor_allocations_upon_oom=True) + + +def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None, error=None): + printed_r_list = [str(NETWORK), + str(global_acc_val), + str(global_acc_test), + str(training_time), + str(val_eval_time), + str(test_eval_time), + str(NUM_EPOCH), + str(BATCH_SIZE), + str(OUT_DIM), + str(SIZE_SECOND_LAYER), + str(KERNEL_NAME), + str(GAMMA), + str(CONST), + str(NB_STACK), + str(NYS_SUBSAMPLE_SIZE), + str(VALIDATION_SIZE), + str(SEED), + str(ACTIVATION_FUNCTION), + str(NON_LINEAR), + str(REAL_NYSTROM), + str(CUT_LAYER), + str(TRAIN_SIZE), + str(DROPOUT), + str(DATASET), + str(REAL_FASTFOOD), + str(WEIGHTS) + ] + print(",".join(printed_r_list)) + if error is None: + exit() + else: + raise error + + +def get_gamma_value(arguments, dat, chi2=False): + if arguments["--gamma"] is None: + logger.debug("Gamma arguments is None. Need to compute it.") + if chi2: + gamma_value = 1. / compute_euristic_sigma_chi2(dat.train.data) + + else: + gamma_value = 1. / compute_euristic_sigma(dat.train.data) + else: + gamma_value = eval(arguments["--gamma"]) + + logger.debug("Gamma value is {}".format(gamma_value)) + return gamma_value + + +def main(): + input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1] + + x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") + y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") + tf.summary.histogram("convolved_examples", x) + + if NETWORK == "dense": + representation_layer = Dense(OUT_DIM, activation=ACTIVATION_FUNCTION) + elif NETWORK == "deepstrom": + logger.info("Selecting {} deepstrom layer function with " + "subsample size = {}, " + "output_dim = {}, " + "{} activation function " + "and kernel = {}" + .format("real" if REAL_NYSTROM else "learned", + NYS_SUBSAMPLE_SIZE, + OUT_DIM, + "with" if NON_LINEAR else "without", + KERNEL_NAME)) + if TRAIN_SIZE is not None: + subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.validation.data[subsample_indexes] + else: + subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.train.data[subsample_indexes] + logger.debug("Chosen subsample: {}".format(nys_subsample)) + representation_layer = DeepstromLayer(subsample=nys_subsample, + out_dim=OUT_DIM, + activation=ACTIVATION_FUNCTION, + kernel_name=KERNEL_NAME, + real_nystrom=not REAL_NYSTROM, + kernel_dict=kernel_dict) + elif NETWORK == "deepfriedconvnet": + representation_layer = FastFoodLayer(sigma=SIGMA, + nbr_stack=NB_STACK, + trainable=not REAL_FASTFOOD) + else: + raise Exception("Not recognized network") + + input_classif = representation_layer(x) + + if SIZE_SECOND_LAYER > 0: + logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION)) + with tf.variable_scope("second_layer"): + input_classif_2nd_layer = Dense(SIZE_SECOND_LAYER, activation=ACTIVATION_FUNCTION)(input_classif) + else: + logger.debug("No second layer") + input_classif_2nd_layer = input_classif + + with tf.variable_scope("classification"): + if BATCHNORM: + bn = BatchNormalization() + input_classif_2nd_layer = bn(input_classif_2nd_layer) + keep_prob = tf.placeholder(tf.float32, name="keep_prob") + input_drop = tf.nn.dropout(input_classif_2nd_layer, keep_prob) + classif = Dense(output_dim)(input_drop) + + # calcul de la loss + logger.debug("Add softmax layer for classification") + with tf.name_scope("xent"): + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), + name="xentropy_mean") + tf.summary.scalar('loss-xent', cross_entropy) + + # todo learning rate as hyperparameter + # calcul du gradient + with tf.name_scope("train"): + global_step = tf.Variable(0, name="global_step", trainable=False) + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, + global_step=global_step) + + # calcul de l'accuracy + with tf.name_scope("accuracy"): + predictions = tf.argmax(classif, 1) + correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) + accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + tf.summary.scalar("accuracy", accuracy_op) + + merged_summary = tf.summary.merge_all() + + init = tf.global_variables_initializer() + # Create a session for running Ops on the Graph. + # Instantiate a SummaryWriter to output summaries and the Graph. + if TENSORBOARD: + summary_writer = tf.summary.FileWriter("debug_benchmark_classification") + # Initialize all Variable objects + # actual learning + with tf.Session() as sess: + logger.info("Start training") + if TENSORBOARD: + summary_writer.add_graph(sess.graph) + # Initialize all Variable objects + sess.run(init) + # actual learning + # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0} + global_start = t.time() + for i in range(NUM_EPOCH): + logger.debug(memory_usage()) + j = 0 + start = t.time() + for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False): + feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT} + _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict, options=run_opts) + if j % 100 == 0: + logger.info( + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1, + int(data.train[0].shape[ + 0] / BATCH_SIZE) + 1, + X_batch.shape, loss, + acc)) + if TENSORBOARD: + summary_writer.add_summary(summary_str, (j + 1) * (i + 1)) + j += 1 + + logger.info("Evaluation on validation data") + training_time = t.time() - global_start + accuracies_val = [] + i = 0 + val_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, BATCH_SIZE, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}, options=run_opts) + accuracies_val.append(accuracy[0]) + i += 1 + global_acc_val = sum(accuracies_val) / i + VAL_EVAL_TIME = t.time() - val_eval_start + + logger.info("Evaluation on test data") + accuracies_test = [] + i = 0 + test_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, BATCH_SIZE, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}, options=run_opts) + accuracies_test.append(accuracy[0]) + i += 1 + global_acc_test = sum(accuracies_test) / i + TEST_EVAL_TIME = t.time() - test_eval_start + + print_result(global_acc_val=global_acc_val, + global_acc_test=global_acc_test, + training_time=training_time, + val_eval_time=VAL_EVAL_TIME, + test_eval_time=TEST_EVAL_TIME) + + +if __name__ == '__main__': + logger.debug("Command line: {}".format(' '.join(sys.argv))) + arguments = docopt.docopt(__doc__) + logger.debug(arguments) + if arguments["--quiet"]: + daiquiri.setup(level=logging.INFO) + NUM_EPOCH = int(arguments["--num-epoch"]) + BATCH_SIZE = int(arguments["--batch-size"]) + OUT_DIM = int(arguments["--out-dim"]) if arguments["--out-dim"] is not None else None + SIZE_SECOND_LAYER = int(arguments["--second-layer-size"]) + RBF_KERNEL = arguments["--rbf-kernel"] + LINEAR_KERNEL = arguments["--linear-kernel"] + CHI2_KERNEL = arguments["--chi-square-kernel"] + CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"] + CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"] + SIGMOID_KERNEL = arguments["--sigmoid-kernel"] + LAPLACIAN_KERNEL = arguments["--laplacian-kernel"] + STACKED_KERNEL = arguments["--stacked-kernel"] + SUMED_KERNEL = arguments["--sumed-kernel"] + VALIDATION_SIZE = int(arguments["--validation-size"]) + REAL_NYSTROM = arguments["--real-nystrom"] + SEED = int(arguments["--seed"]) # The seed change the data ordering in the dataset (so train/validation/test split may change with != seeds) + TENSORBOARD = arguments["--tensorboard"] + NYS_SUBSAMPLE_SIZE = None + KERNEL_NAME = None + GAMMA = None + CONST = None + NB_STACK = None + kernel_dict = {} + CIFAR_DATASET = bool(arguments["--cifar10"]) + CIFAR100_DATASET = bool(arguments["--cifar100"]) + MNIST_DATASET = bool(arguments["--mnist"]) + SVHN_DATASET = bool(arguments["--svhn"]) + REAL_FASTFOOD = bool(arguments["--real-fastfood"]) + BATCHNORM = bool(arguments["--batchnorm"]) + test_eval_time = None + val_eval_time = None + if arguments["--non-linearity"] == "relu": + ACTIVATION_FUNCTION = tf.nn.relu + elif arguments["--non-linearity"] == "tanh": + ACTIVATION_FUNCTION = tf.nn.tanh + elif arguments["--non-linearity"] is None: + ACTIVATION_FUNCTION = tf.nn.relu + else: + raise ValueError("Not known --non-linearity arg: {}".format(arguments["--non-linearity"])) + NON_LINEAR = ACTIVATION_FUNCTION if arguments["--non-linear"] else None + + if CIFAR_DATASET: + DATASET = "cifar10" + elif MNIST_DATASET: + DATASET = "mnist" + elif SVHN_DATASET: + DATASET = "svhn" + elif CIFAR100_DATASET: + DATASET = "cifar100" + else: + raise ValueError("no know dataset specified") + CUT_LAYER = arguments["--cut-layer"] + + WEIGHTS = arguments["--weights"] + + DROPOUT = float(arguments["--dropout"]) if arguments["--dropout"] is not None else None + logger.debug("DROPOUT value is {} and type {}".format(DROPOUT, type(DROPOUT))) + if arguments["--train-size"] is not None: + TRAIN_SIZE = int(arguments["--train-size"]) + else: + TRAIN_SIZE = arguments["--train-size"] + global_acc_val = None + global_acc_test = None + training_time = None + + SEED_TRAIN_VALIDATION = SEED + if CIFAR_DATASET: + data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + elif MNIST_DATASET: + data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + # todo rendre conv_pool2 parametrable + transformer = LecunTransformer(data_name=WEIGHTS, cut_layer_name="conv_pool_2") + elif SVHN_DATASET: + data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + elif CIFAR100_DATASET: + data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + else: + raise ValueError("No dataset specified") + + data.load() # todo gérer le bug flatten + if not data.is_image(): + data.to_image() # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?) + data.data_astype(np.float32) + data.labels_astype(np.float32) + data.normalize() + + logger.debug("train dataset shape: {}".format(data.train.data.shape)) + data.apply_transformer(transformer) + data.normalize() + data.to_one_hot() + data.flatten() + data.data_astype(np.float32) + data.labels_astype(np.int) + if TRAIN_SIZE is not None: + data.reduce_data_size(int(TRAIN_SIZE)) + + logger.info("Start benchmark with parameters: {}".format(" ".join(sys.argv[1:]))) + logger.info("Using dataset {} with validation size {} and seed for spliting set {}.".format(data.s_name, data.validation_size, data.seed)) + logger.info("Shape of train set data: {}; shape of train set labels: {}".format(data.train[0].shape, data.train[1].shape)) + logger.info("Shape of validation set data: {}; shape of validation set labels: {}".format(data.validation[0].shape, data.validation[1].shape)) + logger.info("Shape of test set data: {}; shape of test set labels: {}".format(data.test[0].shape, data.test[1].shape)) + logger.debug("Sample of label: {}".format(data.train[1][0])) + # todo separated function for parameters parsing + + if arguments["dense"]: + NETWORK = "dense" + elif arguments["deepstrom"]: + NETWORK = "deepstrom" + NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"]) + if OUT_DIM is None: + OUT_DIM = NYS_SUBSAMPLE_SIZE + if RBF_KERNEL: + KERNEL = tf_rbf_kernel + KERNEL_NAME = "rbf" + GAMMA = get_gamma_value(arguments, data) + kernel_dict = {"gamma": GAMMA} + elif LINEAR_KERNEL: + KERNEL = tf_linear_kernel + KERNEL_NAME = "linear" + elif CHI2_KERNEL: + KERNEL = tf_chi_square_CPD + KERNEL_NAME = "chi2_cpd" + elif CHI2_EXP_KERNEL: + KERNEL = tf_chi_square_CPD_exp + KERNEL_NAME = "chi2_exp_cpd" + GAMMA = get_gamma_value(arguments, data, chi2=True) + kernel_dict = {"gamma": GAMMA} + elif CHI2_PD_KERNEL: + KERNEL = tf_chi_square_PD + KERNEL_NAME = "chi2_pd" + elif SIGMOID_KERNEL: + KERNEL = tf_sigmoid_kernel + KERNEL_NAME = "sigmoid" + GAMMA = get_gamma_value(arguments, data) + CONST = float(arguments["--intercept-constant"]) + kernel_dict = {"gamma": GAMMA, "constant": CONST} + elif LAPLACIAN_KERNEL: + KERNEL = tf_laplacian_kernel + KERNEL_NAME = "laplacian" + GAMMA = get_gamma_value(arguments, data) + kernel_dict = {"gamma": np.sqrt(GAMMA)} + elif STACKED_KERNEL: + GAMMA = get_gamma_value(arguments, data) + + + def KERNEL(X, Y): + return tf_stack_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA], + [{"gamma": g_value} for g_value in GAMMA]) + + + KERNEL_NAME = "stacked" + + elif SUMED_KERNEL: + GAMMA = get_gamma_value(arguments, data) + + + def KERNEL(X, Y): + return tf_sum_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA], + [{"gamma": g_value} for g_value in GAMMA]) + + + KERNEL_NAME = "summed" + else: + raise Exception("No kernel function specified for deepstrom") + + elif arguments["deepfriedconvnet"]: + NETWORK = "deepfriedconvnet" + NB_STACK = int(arguments["--nb-stack"]) + GAMMA = get_gamma_value(arguments, data) + SIGMA = 1 / GAMMA + else: + raise Exception("Not recognized network") + + try: + main() + except Exception as e: + print_result(error=e) diff --git a/main/experiments/scripts/until_october_2018/transfert_few_data_cifar10_from_cifar100/__init__.py b/main/experiments/scripts/until_october_2018/transfert_few_data_cifar10_from_cifar100/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/scripts/until_october_2018/transfert_few_data_cifar10_from_cifar100/benchmark_classification.py b/main/experiments/scripts/until_october_2018/transfert_few_data_cifar10_from_cifar100/benchmark_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..27efb7a97d65d2d545fd74d2de2ef2a97c78ecdf --- /dev/null +++ b/main/experiments/scripts/until_october_2018/transfert_few_data_cifar10_from_cifar100/benchmark_classification.py @@ -0,0 +1,467 @@ +""" +Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. + +Usage: + benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-W name] [-V] [-b] + benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-W name] [-V] [-b] + benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-d val] [-B nb] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-W name] [-V] [-b] + +Options: + --help -h Display help and exit. + -q --quiet Set logging level to info. + -V --tensorboard Write tensorboard logs. + -a value --seed value The seed value used for all randomization processed [default: 0] + -t --train-size size Size of train set. + -v size --validation-size size The size of the validation set [default: 10000] + -e numepoch --num-epoch=numepoch The number of epoch. + -s batchsize --batch-size=batchsize The number of example in each batch + -d --dropout val Keep probability of neurons before classif [default: 1.0] + -b --batchnorm Apply batch normalization before softmax layer + -D reprdim --out-dim=reprdim The dimension of the final representation + -f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu] + +Dense: + -l --second-layer-size size Says the size of the second non-linear layer [default: 0] + +Deepfried convnet: + -N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet + -z --real-fastfood Tell fastfood layer to not update its weights + +Deepstrom: + -r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) + -m size --nys-size size The number of example in the nystrom subsample. + -n --non-linear Tell Nystrom to use the non linear activation function on its output. + +Datasets: + --cifar10 Use cifar dataset + --mnist Use mnist dataset + --svhn Use svhn dataset + --cifar100 Use cifar100 dataset + +Dataset related: + -B --cut-layer name The name of the last convolutional layer when loading VVG19Transformer. + -W --weights name The name of the dataset used for weights. + +Possible kernels: + -R --rbf-kernel Says if the rbf kernel should be used for nystrom. + -L --linear-kernel Says if the linear kernel should be used for nystrom. + -C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom. + -E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom. + -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. + -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. + -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. + -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. + -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. + +Kernel related: + -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) + -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. + +""" +import logging +import sys + +import daiquiri +import docopt +import numpy as np +import tensorflow as tf +import time as t +from tensorflow.python.keras.layers import Dense, BatchNormalization + +import skluc.main.data.mldatasets as dataset +from skluc.main.data.transformation.LeCunTransformer import LecunTransformer +from skluc.main.data.transformation.VGG19Transformer import VGG19Transformer +from skluc.main.tensorflow_.kernel import tf_rbf_kernel, tf_linear_kernel, tf_chi_square_CPD, tf_chi_square_CPD_exp, \ + tf_chi_square_PD, tf_sigmoid_kernel, tf_laplacian_kernel, tf_stack_of_kernels, tf_sum_of_kernels +from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayer +from skluc.main.tensorflow_.utils import batch_generator +from skluc.main.utils import logger, compute_euristic_sigma, compute_euristic_sigma_chi2, memory_usage + +run_opts = tf.RunOptions(report_tensor_allocations_upon_oom=True) + + +def print_result(global_acc_val=None, global_acc_test=None, training_time=None, val_eval_time=None, test_eval_time=None, error=None): + printed_r_list = [str(NETWORK), + str(global_acc_val), + str(global_acc_test), + str(training_time), + str(val_eval_time), + str(test_eval_time), + str(NUM_EPOCH), + str(BATCH_SIZE), + str(OUT_DIM), + str(SIZE_SECOND_LAYER), + str(KERNEL_NAME), + str(GAMMA), + str(CONST), + str(NB_STACK), + str(NYS_SUBSAMPLE_SIZE), + str(VALIDATION_SIZE), + str(SEED), + str(ACTIVATION_FUNCTION), + str(NON_LINEAR), + str(REAL_NYSTROM), + str(CUT_LAYER), + str(TRAIN_SIZE), + str(DROPOUT), + str(DATASET), + str(REAL_FASTFOOD), + str(WEIGHTS) + ] + print(",".join(printed_r_list)) + if error is None: + exit() + else: + raise error + + +def get_gamma_value(arguments, dat, chi2=False): + if arguments["--gamma"] is None: + logger.debug("Gamma arguments is None. Need to compute it.") + if chi2: + gamma_value = 1. / compute_euristic_sigma_chi2(dat.train.data) + + else: + gamma_value = 1. / compute_euristic_sigma(dat.train.data) + else: + gamma_value = eval(arguments["--gamma"]) + + logger.debug("Gamma value is {}".format(gamma_value)) + return gamma_value + + +def main(): + input_dim, output_dim = data.train[0].shape[1], data.train[1].shape[1] + + x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") + y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label") + tf.summary.histogram("convolved_examples", x) + + if NETWORK == "dense": + representation_layer = Dense(OUT_DIM, activation=ACTIVATION_FUNCTION) + elif NETWORK == "deepstrom": + logger.info("Selecting {} deepstrom layer function with " + "subsample size = {}, " + "output_dim = {}, " + "{} activation function " + "and kernel = {}" + .format("real" if REAL_NYSTROM else "learned", + NYS_SUBSAMPLE_SIZE, + OUT_DIM, + "with" if NON_LINEAR else "without", + KERNEL_NAME)) + if TRAIN_SIZE is not None: + subsample_indexes = data.get_uniform_class_rand_indices_validation(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.validation.data[subsample_indexes] + else: + subsample_indexes = data.get_uniform_class_rand_indices_train(NYS_SUBSAMPLE_SIZE) + nys_subsample = data.train.data[subsample_indexes] + logger.debug("Chosen subsample: {}".format(nys_subsample)) + representation_layer = DeepstromLayer(subsample=nys_subsample, + out_dim=OUT_DIM, + activation=ACTIVATION_FUNCTION, + kernel_name=KERNEL_NAME, + real_nystrom=not REAL_NYSTROM, + kernel_dict=kernel_dict) + elif NETWORK == "deepfriedconvnet": + representation_layer = FastFoodLayer(sigma=SIGMA, + nbr_stack=NB_STACK, + trainable=not REAL_FASTFOOD) + else: + raise Exception("Not recognized network") + + input_classif = representation_layer(x) + + if SIZE_SECOND_LAYER > 0: + logger.debug("Add second layer of size: {} and activation {}".format(SIZE_SECOND_LAYER, ACTIVATION_FUNCTION)) + with tf.variable_scope("second_layer"): + input_classif_2nd_layer = Dense(SIZE_SECOND_LAYER, activation=ACTIVATION_FUNCTION)(input_classif) + else: + logger.debug("No second layer") + input_classif_2nd_layer = input_classif + + with tf.variable_scope("classification"): + if BATCHNORM: + bn = BatchNormalization() + input_classif_2nd_layer = bn(input_classif_2nd_layer) + keep_prob = tf.placeholder(tf.float32, name="keep_prob") + input_drop = tf.nn.dropout(input_classif_2nd_layer, keep_prob) + classif = Dense(output_dim)(input_drop) + + # calcul de la loss + logger.debug("Add softmax layer for classification") + with tf.name_scope("xent"): + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"), + name="xentropy_mean") + tf.summary.scalar('loss-xent', cross_entropy) + + # todo learning rate as hyperparameter + # calcul du gradient + with tf.name_scope("train"): + global_step = tf.Variable(0, name="global_step", trainable=False) + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, + global_step=global_step) + + # calcul de l'accuracy + with tf.name_scope("accuracy"): + predictions = tf.argmax(classif, 1) + correct_prediction = tf.equal(predictions, tf.argmax(y, 1)) + accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + tf.summary.scalar("accuracy", accuracy_op) + + merged_summary = tf.summary.merge_all() + + init = tf.global_variables_initializer() + # Create a session for running Ops on the Graph. + # Instantiate a SummaryWriter to output summaries and the Graph. + if TENSORBOARD: + summary_writer = tf.summary.FileWriter("debug_benchmark_classification") + # Initialize all Variable objects + # actual learning + with tf.Session() as sess: + logger.info("Start training") + if TENSORBOARD: + summary_writer.add_graph(sess.graph) + # Initialize all Variable objects + sess.run(init) + # actual learning + # feed_dict_val = {x: data.validation[0], y: data.validation[1], keep_prob: 1.0} + global_start = t.time() + for i in range(NUM_EPOCH): + logger.debug(memory_usage()) + j = 0 + start = t.time() + for X_batch, Y_batch in batch_generator(data.train[0], data.train[1], BATCH_SIZE, False): + feed_dict = {x: X_batch, y: Y_batch, keep_prob: DROPOUT} + _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict, options=run_opts) + if j % 100 == 0: + logger.info( + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, NUM_EPOCH, j + 1, + int(data.train[0].shape[ + 0] / BATCH_SIZE) + 1, + X_batch.shape, loss, + acc)) + if TENSORBOARD: + summary_writer.add_summary(summary_str, (j + 1) * (i + 1)) + j += 1 + + logger.info("Evaluation on validation data") + training_time = t.time() - global_start + accuracies_val = [] + i = 0 + val_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.validation.data, data.validation.labels, BATCH_SIZE, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}, options=run_opts) + accuracies_val.append(accuracy[0]) + i += 1 + global_acc_val = sum(accuracies_val) / i + VAL_EVAL_TIME = t.time() - val_eval_start + + logger.info("Evaluation on test data") + accuracies_test = [] + i = 0 + test_eval_start = t.time() + for X_batch, Y_batch in batch_generator(data.test.data, data.test.labels, BATCH_SIZE, False): + accuracy = sess.run([accuracy_op], feed_dict={ + x: X_batch, y: Y_batch, keep_prob: 1.0}, options=run_opts) + accuracies_test.append(accuracy[0]) + i += 1 + global_acc_test = sum(accuracies_test) / i + TEST_EVAL_TIME = t.time() - test_eval_start + + print_result(global_acc_val=global_acc_val, + global_acc_test=global_acc_test, + training_time=training_time, + val_eval_time=VAL_EVAL_TIME, + test_eval_time=TEST_EVAL_TIME) + + +if __name__ == '__main__': + logger.debug("Command line: {}".format(' '.join(sys.argv))) + arguments = docopt.docopt(__doc__) + logger.debug(arguments) + if arguments["--quiet"]: + daiquiri.setup(level=logging.INFO) + NUM_EPOCH = int(arguments["--num-epoch"]) + BATCH_SIZE = int(arguments["--batch-size"]) + OUT_DIM = int(arguments["--out-dim"]) if arguments["--out-dim"] is not None else None + SIZE_SECOND_LAYER = int(arguments["--second-layer-size"]) + RBF_KERNEL = arguments["--rbf-kernel"] + LINEAR_KERNEL = arguments["--linear-kernel"] + CHI2_KERNEL = arguments["--chi-square-kernel"] + CHI2_EXP_KERNEL = arguments["--exp-chi-square-kernel"] + CHI2_PD_KERNEL = arguments["--chi-square-PD-kernel"] + SIGMOID_KERNEL = arguments["--sigmoid-kernel"] + LAPLACIAN_KERNEL = arguments["--laplacian-kernel"] + STACKED_KERNEL = arguments["--stacked-kernel"] + SUMED_KERNEL = arguments["--sumed-kernel"] + VALIDATION_SIZE = int(arguments["--validation-size"]) + REAL_NYSTROM = arguments["--real-nystrom"] + SEED = int(arguments["--seed"]) # The seed change the data ordering in the dataset (so train/validation/test split may change with != seeds) + TENSORBOARD = arguments["--tensorboard"] + NYS_SUBSAMPLE_SIZE = None + KERNEL_NAME = None + GAMMA = None + CONST = None + NB_STACK = None + kernel_dict = {} + CIFAR_DATASET = bool(arguments["--cifar10"]) + CIFAR100_DATASET = bool(arguments["--cifar100"]) + MNIST_DATASET = bool(arguments["--mnist"]) + SVHN_DATASET = bool(arguments["--svhn"]) + REAL_FASTFOOD = bool(arguments["--real-fastfood"]) + BATCHNORM = bool(arguments["--batchnorm"]) + test_eval_time = None + val_eval_time = None + if arguments["--non-linearity"] == "relu": + ACTIVATION_FUNCTION = tf.nn.relu + elif arguments["--non-linearity"] == "tanh": + ACTIVATION_FUNCTION = tf.nn.tanh + elif arguments["--non-linearity"] is None: + ACTIVATION_FUNCTION = tf.nn.relu + else: + raise ValueError("Not known --non-linearity arg: {}".format(arguments["--non-linearity"])) + NON_LINEAR = ACTIVATION_FUNCTION if arguments["--non-linear"] else None + + if CIFAR_DATASET: + DATASET = "cifar10" + elif MNIST_DATASET: + DATASET = "mnist" + elif SVHN_DATASET: + DATASET = "svhn" + elif CIFAR100_DATASET: + DATASET = "cifar100" + else: + raise ValueError("no know dataset specified") + CUT_LAYER = arguments["--cut-layer"] + + WEIGHTS = arguments["--weights"] + + DROPOUT = float(arguments["--dropout"]) if arguments["--dropout"] is not None else None + logger.debug("DROPOUT value is {} and type {}".format(DROPOUT, type(DROPOUT))) + if arguments["--train-size"] is not None: + TRAIN_SIZE = int(arguments["--train-size"]) + else: + TRAIN_SIZE = arguments["--train-size"] + global_acc_val = None + global_acc_test = None + training_time = None + + SEED_TRAIN_VALIDATION = SEED + if CIFAR_DATASET: + data = dataset.Cifar10Dataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + elif MNIST_DATASET: + data = dataset.MnistDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + # todo rendre conv_pool2 parametrable + transformer = LecunTransformer(data_name=WEIGHTS, cut_layer_name="conv_pool_2") + elif SVHN_DATASET: + data = dataset.SVHNDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + elif CIFAR100_DATASET: + data = dataset.Cifar100FineDataset(validation_size=VALIDATION_SIZE, seed=SEED_TRAIN_VALIDATION) + transformer = VGG19Transformer(data_name=WEIGHTS, cut_layer_name=CUT_LAYER) + else: + raise ValueError("No dataset specified") + + data.load() # todo gérer le bug flatten + if not data.is_image(): + data.to_image() # todo gérer le cas où ce sont déjà des images (les flatteniser dans tous les cas?) + data.data_astype(np.float32) + data.labels_astype(np.float32) + data.normalize() + + logger.debug("train dataset shape: {}".format(data.train.data.shape)) + data.apply_transformer(transformer) + data.normalize() + data.to_one_hot() + data.flatten() + data.data_astype(np.float32) + data.labels_astype(np.int) + if TRAIN_SIZE is not None: + data.reduce_data_size(int(TRAIN_SIZE)) + + logger.info("Start benchmark with parameters: {}".format(" ".join(sys.argv[1:]))) + logger.info("Using dataset {} with validation size {} and seed for spliting set {}.".format(data.s_name, data.validation_size, data.seed)) + logger.info("Shape of train set data: {}; shape of train set labels: {}".format(data.train[0].shape, data.train[1].shape)) + logger.info("Shape of validation set data: {}; shape of validation set labels: {}".format(data.validation[0].shape, data.validation[1].shape)) + logger.info("Shape of test set data: {}; shape of test set labels: {}".format(data.test[0].shape, data.test[1].shape)) + logger.debug("Sample of label: {}".format(data.train[1][0])) + # todo separated function for parameters parsing + + if arguments["dense"]: + NETWORK = "dense" + elif arguments["deepstrom"]: + NETWORK = "deepstrom" + NYS_SUBSAMPLE_SIZE = int(arguments["--nys-size"]) + if OUT_DIM is None: + OUT_DIM = NYS_SUBSAMPLE_SIZE + if RBF_KERNEL: + KERNEL = tf_rbf_kernel + KERNEL_NAME = "rbf" + GAMMA = get_gamma_value(arguments, data) + kernel_dict = {"gamma": GAMMA} + elif LINEAR_KERNEL: + KERNEL = tf_linear_kernel + KERNEL_NAME = "linear" + elif CHI2_KERNEL: + KERNEL = tf_chi_square_CPD + KERNEL_NAME = "chi2_cpd" + elif CHI2_EXP_KERNEL: + KERNEL = tf_chi_square_CPD_exp + KERNEL_NAME = "chi2_exp_cpd" + GAMMA = get_gamma_value(arguments, data, chi2=True) + kernel_dict = {"gamma": GAMMA} + elif CHI2_PD_KERNEL: + KERNEL = tf_chi_square_PD + KERNEL_NAME = "chi2_pd" + elif SIGMOID_KERNEL: + KERNEL = tf_sigmoid_kernel + KERNEL_NAME = "sigmoid" + GAMMA = get_gamma_value(arguments, data) + CONST = float(arguments["--intercept-constant"]) + kernel_dict = {"gamma": GAMMA, "constant": CONST} + elif LAPLACIAN_KERNEL: + KERNEL = tf_laplacian_kernel + KERNEL_NAME = "laplacian" + GAMMA = get_gamma_value(arguments, data) + kernel_dict = {"gamma": np.sqrt(GAMMA)} + elif STACKED_KERNEL: + GAMMA = get_gamma_value(arguments, data) + + + def KERNEL(X, Y): + return tf_stack_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA], + [{"gamma": g_value} for g_value in GAMMA]) + + + KERNEL_NAME = "stacked" + + elif SUMED_KERNEL: + GAMMA = get_gamma_value(arguments, data) + + + def KERNEL(X, Y): + return tf_sum_of_kernels(X, Y, [tf_rbf_kernel for _ in GAMMA], + [{"gamma": g_value} for g_value in GAMMA]) + + + KERNEL_NAME = "summed" + else: + raise Exception("No kernel function specified for deepstrom") + + elif arguments["deepfriedconvnet"]: + NETWORK = "deepfriedconvnet" + NB_STACK = int(arguments["--nb-stack"]) + GAMMA = get_gamma_value(arguments, data) + SIGMA = 1 / GAMMA + else: + raise Exception("Not recognized network") + + try: + main() + except Exception as e: + print_result(error=e)