diff --git a/main/experiments/parameter_files/december_2018/lazyfile_classif_end_to_end_subsample_conv_hand_with_augment_keras.yml b/main/experiments/parameter_files/december_2018/lazyfile_classif_end_to_end_subsample_conv_hand_with_augment_keras.yml new file mode 100644 index 0000000000000000000000000000000000000000..6098aeba6f12fc6db97676f6fbb5413119047573 --- /dev/null +++ b/main/experiments/parameter_files/december_2018/lazyfile_classif_end_to_end_subsample_conv_hand_with_augment_keras.yml @@ -0,0 +1,30 @@ +all: + deepstrom_no_gamma: + deepstrom_gamma: + +base: + epoch_numbers: {"-e": [200]} + batch_sizes: {"-s": [64]} + val_size: {"-v": [10000]} + seed: {"-a": "range(1)"} + quiet: ["-q"] + dataset: ["--mnist", "--cifar10", "--svhn"] + subs_every: {"-l": [1, 10, 50, 100, -1]} + + +gamma: + gamma: {"-g": [0.001, 0.005, 0.01, 0.05, 0.1]} + +deepstrom: + network: ["deepstrom"] + base: + nys_size: {"-m": [16]} + +deepstrom_no_gamma: + deepstrom: + kernel: ["-C", "-L"] + +deepstrom_gamma: + deepstrom: + gamma: + kernel: ["-R"] \ No newline at end of file diff --git a/main/experiments/scripts/december_2018/__init__.py b/main/experiments/scripts/december_2018/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/scripts/december_2018/keras_end_to_end_subsample_conv_hand/__init__.py b/main/experiments/scripts/december_2018/keras_end_to_end_subsample_conv_hand/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/experiments/scripts/december_2018/keras_end_to_end_subsample_conv_hand/deepstrom_classif_end_to_end.py b/main/experiments/scripts/december_2018/keras_end_to_end_subsample_conv_hand/deepstrom_classif_end_to_end.py new file mode 100644 index 0000000000000000000000000000000000000000..7d52027f4b02f058d8b500f9d0f8b3df7790f96b --- /dev/null +++ b/main/experiments/scripts/december_2018/keras_end_to_end_subsample_conv_hand/deepstrom_classif_end_to_end.py @@ -0,0 +1,389 @@ +""" +Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network. + +Usage: + benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-V] + benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-V] + benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-V] [-Z number] + +Options: + --help -h Display help and exit. + -q --quiet Set logging level to info. + -V --tensorboard Write tensorboard logs. + -a --seed value The seed value used for all randomization processed [default: 0] + -t --train-size size Size of train set. + -v --validation-size size The size of the validation set [default: 10000] + -e --num-epoch=numepoch The number of epoch. + -s --batch-size=batchsize The number of example in each batch + -d --dropout val Keep probability of neurons before classif [default: 1.0] + -D reprdim --out-dim=reprdim The dimension of the final representation + -f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu] + -l --second-layer-size size Says the size of the second non-linear layer [default: 0] + +Deepfried convnet: + -N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet + -z --real-fastfood Tell fastfood layer to not update its weights + +Deepstrom: + -r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) (not implemented) + -m size --nys-size size The number of example in the nystrom subsample. + -n --non-linear Tell Nystrom to use the non linear activation function on its output. + -Z --subs-every number Tell the number of step (batch) between each pass of the subsample in convolution layers [default: 1] + +Datasets: + --cifar10 Use cifar dataset + --mnist Use mnist dataset + --svhn Use svhn dataset + --cifar100 Use cifar100 dataset + +Possible kernels: + -R --rbf-kernel Says if the rbf kernel should be used for nystrom. + -L --linear-kernel Says if the linear kernel should be used for nystrom. + -C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom. + -E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom. + -P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom. + -S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom. + -A --laplacian-kernel Says if the laplacian kernel should be used for nystrom. + -T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom. + -M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom. + +Kernel related: + -g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet) + -c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel. + +""" +import time as t + +import docopt +import keras +import keras.backend as K +import numpy as np +from keras.layers import Dense, Input, Lambda, concatenate +from keras.models import Model +from keras.optimizers import Adam +from keras.preprocessing.image import ImageDataGenerator + +import skluc.main.data.mldatasets as dataset +from skluc.main.keras_.kernel import map_kernel_name_function +# from skluc.main.keras_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd +from skluc.main.keras_.kernel_approximation.fastfood_layer import FastFoodLayer +from skluc.main.keras_.models import build_lenet_model, build_vgg19_model_glorot +from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter + + +# import tensorflow as tf + + +def evaluation_function(x_data, y_data, model, list_subsample_bases, datagen_eval, paraman, message): + logger.info(f"Evaluation on {message} data") + accuracies_val = [] + i = 0 + eval_start = t.time() + for X_batch, Y_batch in datagen_eval.flow(x_data, y_data, batch_size=paraman["--batch-size"]): + if X_batch.shape[0] != paraman["--batch-size"]: + break + if paraman["network"] == "deepstrom": + loss, acc = model.evaluate([X_batch] + list_subsample_bases, [Y_batch], verbose=0) + else: + loss, acc = model.evaluate([X_batch], [Y_batch], verbose=0) + + accuracies_val += [acc] + i += 1 + if i > int(x_data.shape[0] // paraman["--batch-size"]): + break + + eval_acc = sum(accuracies_val) / i + eval_time = t.time() - eval_start + return eval_acc, eval_time + + +class ParameterManagerMain(ParameterManager): + + def __init__(self, docopt_dict): + super().__init__(docopt_dict) + + self["--out-dim"] = int(self["--out-dim"]) if eval(str(self["--out-dim"])) is not None else None + self["kernel"] = self.init_kernel() + self["network"] = self.init_network() + self["activation_function"] = self.init_non_linearity() + self["dataset"] = self.init_dataset() + self["--nb-stack"] = int(self["--nb-stack"]) if self["--nb-stack"] is not None else None + self["--nys-size"] = int(self["--nys-size"]) if self["--nys-size"] is not None else None + self["--num-epoch"] = int(self["--num-epoch"]) + self["--validation-size"] = int(self["--validation-size"]) + self["--seed"] = int(self["--seed"]) + self["--batch-size"] = int(self["--batch-size"]) + self["deepstrom_activation"] = self.init_deepstrom_activation() + self["--train-size"] = int(self["--train-size"]) if self["--train-size"] is not None else None + self["--second-layer-size"] = int(self["--second-layer-size"]) + self["--subs-every"] = int(self["--subs-every"]) if self["--subs-every"] is not None else None + + self.__kernel_dict = None + + self["nb_subsample_bases"], self["zero_padding_base"] = self.init_number_subsample_bases() + + def init_non_linearity(self): + if self["--non-linearity"] == "tanh": + return keras.activations.tanh + elif self["--non-linearity"] == "relu": + return keras.activations.relu + elif self["--non-linearity"] == "None": + return keras.activations.linear + + def init_number_subsample_bases(self): + if self["network"] == "deepstrom": + remaining = self["--nys-size"] % self["--batch-size"] + quotient = self["--nys-size"] // self["--batch-size"] + if remaining == 0: + return quotient, remaining + else: + logger.warning(f"Subsample size {self['--nys-size']} is not multiple of batch size {self['--batch-size']}, padding with {self['--batch-size'] * remaining} zero_like samples") + return quotient + 1, self["--batch-size"] * remaining + else: + return None, None + + def init_deepstrom_activation(self): + if not self["deepstrom"]: + return None + + if self["--non-linear"]: + return self["--non-linearity"] + else: + return None + + def init_kernel_dict(self, data): + if self["kernel"] == "rbf" or self["network"] == "deepfriedconvnet": + GAMMA = self.get_gamma_value(data) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": GAMMA} + elif self["kernel"] == "chi2_exp_cpd": + GAMMA = self.get_gamma_value(data, chi2=True) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": GAMMA} + elif self["kernel"] == "laplacian": + GAMMA = self.get_gamma_value(data) + self["--gamma"] = GAMMA + self.__kernel_dict = {"gamma": np.sqrt(GAMMA)} + else: + self.__kernel_dict = {} + + def __getitem__(self, item): + if item == "kernel_dict": + return self.__kernel_dict + else: + return super().__getitem__(item) + + +class ResultManagerMain(ResultManager): + def __init__(self): + super().__init__() + self["training_time"] = None + self["val_eval_time"] = None + self["val_acc"] = None + self["test_acc"] = None + self["test_eval_time"] = None + + +def main(paraman: ParameterManagerMain, resman, printman): + if paraman["dataset"] == "mnist": + data = dataset.MnistDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_lenet_model + datagen_train = ImageDataGenerator( + rotation_range=20, + width_shift_range=0.2, + height_shift_range=0.2, + horizontal_flip=False) + elif paraman["dataset"] == "cifar10": + data = dataset.Cifar10Dataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_vgg19_model_glorot + datagen_train = ImageDataGenerator( + rotation_range=20, + width_shift_range=0.2, + height_shift_range=0.2, + horizontal_flip=True) + elif paraman["dataset"] == "cifar100": + data = dataset.Cifar100FineDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_vgg19_model_glorot + datagen_train = ImageDataGenerator( + rotation_range=20, + width_shift_range=0.2, + height_shift_range=0.2, + horizontal_flip=True) + elif paraman["dataset"] == "svhn": + data = dataset.SVHNDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"]) + convmodel_func = build_vgg19_model_glorot + datagen_train = ImageDataGenerator( + rotation_range=20, + width_shift_range=0.2, + height_shift_range=0.2, + horizontal_flip=False) + else: + raise ValueError("Unknown dataset") + + datagen_eval = ImageDataGenerator() + + data.load() + data.to_one_hot() + if not data.is_image(): + data.to_image() + data.data_astype(np.float32) + data.labels_astype(np.float32) + data.normalize() + + if paraman["--train-size"] is not None: + X_train, y_train = data.train.data[:paraman["--train-size"]], data.train.labels[:paraman["--train-size"]] + else: + X_train, y_train = data.train.data, data.train.labels + X_test, y_test = data.test.data, data.test.labels + X_val, y_val = data.validation.data, data.validation.labels + + paraman.init_kernel_dict(X_train) + + # # Model definition + + input_dim = X_train.shape[1:] + output_dim = y_train.shape[1] + + convnet_model = convmodel_func(input_dim) + + input_x = Input(shape=input_dim, name="x") + + repr_x = convnet_model(input_x) + + feature_model = Model(input_x, repr_x) + + logger.debug(paraman["kernel_dict"]) + + list_repr_subsample_bases = [] + if paraman["network"] == "deepstrom": + input_repr_subsample = [Input(batch_shape=(paraman["--batch-size"], feature_model.output_shape[1])) for _ in range(paraman["nb_subsample_bases"])] # todo + if paraman["nb_subsample_bases"] > 1: + input_subsample_concat = concatenate(input_repr_subsample, axis=0) + else: + input_subsample_concat = input_repr_subsample[0] + + slice_layer = Lambda(lambda input: input[:paraman["--nys-size"]], + output_shape=lambda shape: (paraman["--nys-size"], *shape[1:])) + input_subsample_concat = slice_layer(input_subsample_concat) + + if paraman["kernel"] == "linear": + kernel_function = lambda *args, **kwargs: map_kernel_name_function["linear"](*args, **kwargs, normalize=True, **paraman["kernel_dict"]) + elif paraman["kernel"] == "rbf": + kernel_function = lambda *args, **kwargs: map_kernel_name_function["rbf"](*args, **kwargs, tanh_activation=True, normalize=True, **paraman["kernel_dict"]) + elif paraman["kernel"] == "chi2_cpd": + kernel_function = lambda *args, **kwargs: map_kernel_name_function["chi2_cpd"](*args, **kwargs, epsilon=1e-8, tanh_activation=True, normalize=True, **paraman["kernel_dict"]) + elif paraman["kernel"] == "chi2_exp_cpd": + kernel_function = lambda *args, **kwargs: map_kernel_name_function["chi2_exp_cpd"](*args, **kwargs, epsilon=1e-8, tanh_activation=True, normalize=True, **paraman["kernel_dict"]) + else: + raise NotImplementedError(f"unknown kernel function {paraman['kernel']}") + + kernel_layer = Lambda(kernel_function, output_shape=lambda shapes: (shapes[0][0], paraman["--nys-size"])) + kernel_vector = kernel_layer([repr_x, input_subsample_concat]) + + input_classifier = Dense(paraman["--nys-size"], use_bias=False, activation='linear')(kernel_vector) # 512 is the output dim of convolutional layers + + if paraman["--non-linear"]: + input_classifier = paraman["activation_function"](input_classifier) + + subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"]) + nys_subsample = data.validation.data[subsample_indexes] + zero_padding_subsample = np.zeros((paraman["zero_padding_base"], *nys_subsample.shape[1:])) + nys_subsample = np.vstack([nys_subsample, zero_padding_subsample]) + list_subsample_bases = [nys_subsample[i * paraman["--batch-size"]:(i + 1) * paraman["--batch-size"]] for i in range(paraman["nb_subsample_bases"])] + + + elif paraman["network"] == "dense": + dense_layer = Dense(paraman["--out-dim"], activation=paraman["activation_function"]) + input_classifier = dense_layer(repr_x) + + elif paraman["network"] == "deepfriedconvnet": + deepfried_layer = FastFoodLayer(sigma=1 / paraman["--gamma"], nbr_stack=paraman["--nb-stack"], trainable=not paraman["--real-fastfood"]) + input_classifier = deepfried_layer(repr_x) + else: + raise ValueError(f"Not recognized network {paraman['network']}") + + if paraman["--second-layer-size"] > 0: + dense_layer2 = Dense(paraman["--second-layer-size"], activation=paraman["activation_function"]) + input_classifier = dense_layer2(input_classifier) + + with K.name_scope("classification"): + classif = Dense(output_dim, activation="softmax")(input_classifier) + + if paraman["network"] == "deepstrom": + model = Model([input_x] + input_repr_subsample, [classif]) + else: + model = Model([input_x], [classif]) + opt = Adam(1e-4) + model.compile(loss=['categorical_crossentropy'], optimizer=opt, metrics=['accuracy']) + + if paraman["--tensorboard"]: + tensorboard_log_file = f"log/{int(t.time())}/{paraman['dataset']}/nys_size_{paraman['--nys-size']}/" + keras.callbacks.TensorBoard(log_dir=tensorboard_log_file, + batch_size=paraman["--batch-size"], + write_graph=True, + write_grads=True, + write_images=True) + + # actual learning + global_start = t.time() + j = 0 + + for i in range(paraman["--num-epoch"]): + # logger.debug(memory_usage()) + total_loss = 0 + total_acc = 0 + for k, (X_batch, Y_batch) in enumerate(datagen_train.flow(X_train, y_train, batch_size=paraman["--batch-size"])): + if X_batch.shape[0] != paraman["--batch-size"]: + continue + if j % paraman["--subs-every"] == 0: + list_repr_subsample_bases = [feature_model.predict(subs_base) for subs_base in list_subsample_bases] + elif paraman["--subs-every"] == -1 and k == 0: # case 1 times / epoch + list_repr_subsample_bases = [feature_model.predict(subs_base) for subs_base in list_subsample_bases] + + if paraman["network"] == "deepstrom": + loss, acc = model.train_on_batch([X_batch] + list_repr_subsample_bases, [Y_batch]) + else: + loss, acc = model.train_on_batch([X_batch], [Y_batch]) + + if j % 100 == 0: + logger.info( + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"], + k, int(X_train.shape[0] / paraman["--batch-size"]) + 1, + X_batch.shape, loss, + acc)) + total_loss += loss + total_acc += acc + j += 1 + if k > int(X_train.shape[0] // paraman["--batch-size"]): + break + logger.info( + "epoch: {}/{}; loss: {}; acc: {}".format(i, paraman["--num-epoch"], total_loss / k, total_acc / k)) + logger.debug("memory usage: " + memory_usage()) + + if paraman["network"] == "deepstrom": + list_repr_subsample_bases = [feature_model.predict(subs_base) for subs_base in list_subsample_bases] + + training_time = t.time() - global_start + resman["training_time"] = training_time + + acc_val, time_val = evaluation_function(X_val, y_val, model, list_repr_subsample_bases, datagen_eval, paraman, message="validation") + resman["val_acc"] = acc_val + resman["val_eval_time"] = time_val + + acc_test, time_test = evaluation_function(X_test, y_test, model, list_repr_subsample_bases, datagen_eval, paraman, message="test") + resman["test_acc"] = acc_test + resman["test_eval_time"] = time_test + + printman.print() + + +if __name__ == "__main__": + paraman_obj = ParameterManagerMain(docopt.docopt(__doc__)) + resman_obj = ResultManagerMain() + printman_obj = ResultPrinter(paraman_obj, resman_obj) + + try: + main(paraman_obj, resman_obj, printman_obj) + except Exception as e: + printman_obj.print() + raise e