Skip to content
Snippets Groups Projects
Commit fcbb18ce authored by Luc Giffon's avatar Luc Giffon
Browse files

expe keras subsample conv hand

lazyfile
script
parent 72da0ff8
No related branches found
No related tags found
No related merge requests found
all:
deepstrom_no_gamma:
deepstrom_gamma:
base:
epoch_numbers: {"-e": [200]}
batch_sizes: {"-s": [64]}
val_size: {"-v": [10000]}
seed: {"-a": "range(1)"}
quiet: ["-q"]
dataset: ["--mnist", "--cifar10", "--svhn"]
subs_every: {"-l": [1, 10, 50, 100, -1]}
gamma:
gamma: {"-g": [0.001, 0.005, 0.01, 0.05, 0.1]}
deepstrom:
network: ["deepstrom"]
base:
nys_size: {"-m": [16]}
deepstrom_no_gamma:
deepstrom:
kernel: ["-C", "-L"]
deepstrom_gamma:
deepstrom:
gamma:
kernel: ["-R"]
\ No newline at end of file
"""
Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network.
Usage:
benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-V]
benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-V]
benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-V] [-Z number]
Options:
--help -h Display help and exit.
-q --quiet Set logging level to info.
-V --tensorboard Write tensorboard logs.
-a --seed value The seed value used for all randomization processed [default: 0]
-t --train-size size Size of train set.
-v --validation-size size The size of the validation set [default: 10000]
-e --num-epoch=numepoch The number of epoch.
-s --batch-size=batchsize The number of example in each batch
-d --dropout val Keep probability of neurons before classif [default: 1.0]
-D reprdim --out-dim=reprdim The dimension of the final representation
-f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu]
-l --second-layer-size size Says the size of the second non-linear layer [default: 0]
Deepfried convnet:
-N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet
-z --real-fastfood Tell fastfood layer to not update its weights
Deepstrom:
-r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2) (not implemented)
-m size --nys-size size The number of example in the nystrom subsample.
-n --non-linear Tell Nystrom to use the non linear activation function on its output.
-Z --subs-every number Tell the number of step (batch) between each pass of the subsample in convolution layers [default: 1]
Datasets:
--cifar10 Use cifar dataset
--mnist Use mnist dataset
--svhn Use svhn dataset
--cifar100 Use cifar100 dataset
Possible kernels:
-R --rbf-kernel Says if the rbf kernel should be used for nystrom.
-L --linear-kernel Says if the linear kernel should be used for nystrom.
-C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom.
-E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom.
-P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom.
-S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom.
-A --laplacian-kernel Says if the laplacian kernel should be used for nystrom.
-T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom.
-M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom.
Kernel related:
-g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet)
-c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel.
"""
import time as t
import docopt
import keras
import keras.backend as K
import numpy as np
from keras.layers import Dense, Input, Lambda, concatenate
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
import skluc.main.data.mldatasets as dataset
from skluc.main.keras_.kernel import map_kernel_name_function
# from skluc.main.keras_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd
from skluc.main.keras_.kernel_approximation.fastfood_layer import FastFoodLayer
from skluc.main.keras_.models import build_lenet_model, build_vgg19_model_glorot
from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter
# import tensorflow as tf
def evaluation_function(x_data, y_data, model, list_subsample_bases, datagen_eval, paraman, message):
logger.info(f"Evaluation on {message} data")
accuracies_val = []
i = 0
eval_start = t.time()
for X_batch, Y_batch in datagen_eval.flow(x_data, y_data, batch_size=paraman["--batch-size"]):
if X_batch.shape[0] != paraman["--batch-size"]:
break
if paraman["network"] == "deepstrom":
loss, acc = model.evaluate([X_batch] + list_subsample_bases, [Y_batch], verbose=0)
else:
loss, acc = model.evaluate([X_batch], [Y_batch], verbose=0)
accuracies_val += [acc]
i += 1
if i > int(x_data.shape[0] // paraman["--batch-size"]):
break
eval_acc = sum(accuracies_val) / i
eval_time = t.time() - eval_start
return eval_acc, eval_time
class ParameterManagerMain(ParameterManager):
def __init__(self, docopt_dict):
super().__init__(docopt_dict)
self["--out-dim"] = int(self["--out-dim"]) if eval(str(self["--out-dim"])) is not None else None
self["kernel"] = self.init_kernel()
self["network"] = self.init_network()
self["activation_function"] = self.init_non_linearity()
self["dataset"] = self.init_dataset()
self["--nb-stack"] = int(self["--nb-stack"]) if self["--nb-stack"] is not None else None
self["--nys-size"] = int(self["--nys-size"]) if self["--nys-size"] is not None else None
self["--num-epoch"] = int(self["--num-epoch"])
self["--validation-size"] = int(self["--validation-size"])
self["--seed"] = int(self["--seed"])
self["--batch-size"] = int(self["--batch-size"])
self["deepstrom_activation"] = self.init_deepstrom_activation()
self["--train-size"] = int(self["--train-size"]) if self["--train-size"] is not None else None
self["--second-layer-size"] = int(self["--second-layer-size"])
self["--subs-every"] = int(self["--subs-every"]) if self["--subs-every"] is not None else None
self.__kernel_dict = None
self["nb_subsample_bases"], self["zero_padding_base"] = self.init_number_subsample_bases()
def init_non_linearity(self):
if self["--non-linearity"] == "tanh":
return keras.activations.tanh
elif self["--non-linearity"] == "relu":
return keras.activations.relu
elif self["--non-linearity"] == "None":
return keras.activations.linear
def init_number_subsample_bases(self):
if self["network"] == "deepstrom":
remaining = self["--nys-size"] % self["--batch-size"]
quotient = self["--nys-size"] // self["--batch-size"]
if remaining == 0:
return quotient, remaining
else:
logger.warning(f"Subsample size {self['--nys-size']} is not multiple of batch size {self['--batch-size']}, padding with {self['--batch-size'] * remaining} zero_like samples")
return quotient + 1, self["--batch-size"] * remaining
else:
return None, None
def init_deepstrom_activation(self):
if not self["deepstrom"]:
return None
if self["--non-linear"]:
return self["--non-linearity"]
else:
return None
def init_kernel_dict(self, data):
if self["kernel"] == "rbf" or self["network"] == "deepfriedconvnet":
GAMMA = self.get_gamma_value(data)
self["--gamma"] = GAMMA
self.__kernel_dict = {"gamma": GAMMA}
elif self["kernel"] == "chi2_exp_cpd":
GAMMA = self.get_gamma_value(data, chi2=True)
self["--gamma"] = GAMMA
self.__kernel_dict = {"gamma": GAMMA}
elif self["kernel"] == "laplacian":
GAMMA = self.get_gamma_value(data)
self["--gamma"] = GAMMA
self.__kernel_dict = {"gamma": np.sqrt(GAMMA)}
else:
self.__kernel_dict = {}
def __getitem__(self, item):
if item == "kernel_dict":
return self.__kernel_dict
else:
return super().__getitem__(item)
class ResultManagerMain(ResultManager):
def __init__(self):
super().__init__()
self["training_time"] = None
self["val_eval_time"] = None
self["val_acc"] = None
self["test_acc"] = None
self["test_eval_time"] = None
def main(paraman: ParameterManagerMain, resman, printman):
if paraman["dataset"] == "mnist":
data = dataset.MnistDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
convmodel_func = build_lenet_model
datagen_train = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=False)
elif paraman["dataset"] == "cifar10":
data = dataset.Cifar10Dataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
convmodel_func = build_vgg19_model_glorot
datagen_train = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
elif paraman["dataset"] == "cifar100":
data = dataset.Cifar100FineDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
convmodel_func = build_vgg19_model_glorot
datagen_train = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
elif paraman["dataset"] == "svhn":
data = dataset.SVHNDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
convmodel_func = build_vgg19_model_glorot
datagen_train = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=False)
else:
raise ValueError("Unknown dataset")
datagen_eval = ImageDataGenerator()
data.load()
data.to_one_hot()
if not data.is_image():
data.to_image()
data.data_astype(np.float32)
data.labels_astype(np.float32)
data.normalize()
if paraman["--train-size"] is not None:
X_train, y_train = data.train.data[:paraman["--train-size"]], data.train.labels[:paraman["--train-size"]]
else:
X_train, y_train = data.train.data, data.train.labels
X_test, y_test = data.test.data, data.test.labels
X_val, y_val = data.validation.data, data.validation.labels
paraman.init_kernel_dict(X_train)
# # Model definition
input_dim = X_train.shape[1:]
output_dim = y_train.shape[1]
convnet_model = convmodel_func(input_dim)
input_x = Input(shape=input_dim, name="x")
repr_x = convnet_model(input_x)
feature_model = Model(input_x, repr_x)
logger.debug(paraman["kernel_dict"])
list_repr_subsample_bases = []
if paraman["network"] == "deepstrom":
input_repr_subsample = [Input(batch_shape=(paraman["--batch-size"], feature_model.output_shape[1])) for _ in range(paraman["nb_subsample_bases"])] # todo
if paraman["nb_subsample_bases"] > 1:
input_subsample_concat = concatenate(input_repr_subsample, axis=0)
else:
input_subsample_concat = input_repr_subsample[0]
slice_layer = Lambda(lambda input: input[:paraman["--nys-size"]],
output_shape=lambda shape: (paraman["--nys-size"], *shape[1:]))
input_subsample_concat = slice_layer(input_subsample_concat)
if paraman["kernel"] == "linear":
kernel_function = lambda *args, **kwargs: map_kernel_name_function["linear"](*args, **kwargs, normalize=True, **paraman["kernel_dict"])
elif paraman["kernel"] == "rbf":
kernel_function = lambda *args, **kwargs: map_kernel_name_function["rbf"](*args, **kwargs, tanh_activation=True, normalize=True, **paraman["kernel_dict"])
elif paraman["kernel"] == "chi2_cpd":
kernel_function = lambda *args, **kwargs: map_kernel_name_function["chi2_cpd"](*args, **kwargs, epsilon=1e-8, tanh_activation=True, normalize=True, **paraman["kernel_dict"])
elif paraman["kernel"] == "chi2_exp_cpd":
kernel_function = lambda *args, **kwargs: map_kernel_name_function["chi2_exp_cpd"](*args, **kwargs, epsilon=1e-8, tanh_activation=True, normalize=True, **paraman["kernel_dict"])
else:
raise NotImplementedError(f"unknown kernel function {paraman['kernel']}")
kernel_layer = Lambda(kernel_function, output_shape=lambda shapes: (shapes[0][0], paraman["--nys-size"]))
kernel_vector = kernel_layer([repr_x, input_subsample_concat])
input_classifier = Dense(paraman["--nys-size"], use_bias=False, activation='linear')(kernel_vector) # 512 is the output dim of convolutional layers
if paraman["--non-linear"]:
input_classifier = paraman["activation_function"](input_classifier)
subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"])
nys_subsample = data.validation.data[subsample_indexes]
zero_padding_subsample = np.zeros((paraman["zero_padding_base"], *nys_subsample.shape[1:]))
nys_subsample = np.vstack([nys_subsample, zero_padding_subsample])
list_subsample_bases = [nys_subsample[i * paraman["--batch-size"]:(i + 1) * paraman["--batch-size"]] for i in range(paraman["nb_subsample_bases"])]
elif paraman["network"] == "dense":
dense_layer = Dense(paraman["--out-dim"], activation=paraman["activation_function"])
input_classifier = dense_layer(repr_x)
elif paraman["network"] == "deepfriedconvnet":
deepfried_layer = FastFoodLayer(sigma=1 / paraman["--gamma"], nbr_stack=paraman["--nb-stack"], trainable=not paraman["--real-fastfood"])
input_classifier = deepfried_layer(repr_x)
else:
raise ValueError(f"Not recognized network {paraman['network']}")
if paraman["--second-layer-size"] > 0:
dense_layer2 = Dense(paraman["--second-layer-size"], activation=paraman["activation_function"])
input_classifier = dense_layer2(input_classifier)
with K.name_scope("classification"):
classif = Dense(output_dim, activation="softmax")(input_classifier)
if paraman["network"] == "deepstrom":
model = Model([input_x] + input_repr_subsample, [classif])
else:
model = Model([input_x], [classif])
opt = Adam(1e-4)
model.compile(loss=['categorical_crossentropy'], optimizer=opt, metrics=['accuracy'])
if paraman["--tensorboard"]:
tensorboard_log_file = f"log/{int(t.time())}/{paraman['dataset']}/nys_size_{paraman['--nys-size']}/"
keras.callbacks.TensorBoard(log_dir=tensorboard_log_file,
batch_size=paraman["--batch-size"],
write_graph=True,
write_grads=True,
write_images=True)
# actual learning
global_start = t.time()
j = 0
for i in range(paraman["--num-epoch"]):
# logger.debug(memory_usage())
total_loss = 0
total_acc = 0
for k, (X_batch, Y_batch) in enumerate(datagen_train.flow(X_train, y_train, batch_size=paraman["--batch-size"])):
if X_batch.shape[0] != paraman["--batch-size"]:
continue
if j % paraman["--subs-every"] == 0:
list_repr_subsample_bases = [feature_model.predict(subs_base) for subs_base in list_subsample_bases]
elif paraman["--subs-every"] == -1 and k == 0: # case 1 times / epoch
list_repr_subsample_bases = [feature_model.predict(subs_base) for subs_base in list_subsample_bases]
if paraman["network"] == "deepstrom":
loss, acc = model.train_on_batch([X_batch] + list_repr_subsample_bases, [Y_batch])
else:
loss, acc = model.train_on_batch([X_batch], [Y_batch])
if j % 100 == 0:
logger.info(
"epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"],
k, int(X_train.shape[0] / paraman["--batch-size"]) + 1,
X_batch.shape, loss,
acc))
total_loss += loss
total_acc += acc
j += 1
if k > int(X_train.shape[0] // paraman["--batch-size"]):
break
logger.info(
"epoch: {}/{}; loss: {}; acc: {}".format(i, paraman["--num-epoch"], total_loss / k, total_acc / k))
logger.debug("memory usage: " + memory_usage())
if paraman["network"] == "deepstrom":
list_repr_subsample_bases = [feature_model.predict(subs_base) for subs_base in list_subsample_bases]
training_time = t.time() - global_start
resman["training_time"] = training_time
acc_val, time_val = evaluation_function(X_val, y_val, model, list_repr_subsample_bases, datagen_eval, paraman, message="validation")
resman["val_acc"] = acc_val
resman["val_eval_time"] = time_val
acc_test, time_test = evaluation_function(X_test, y_test, model, list_repr_subsample_bases, datagen_eval, paraman, message="test")
resman["test_acc"] = acc_test
resman["test_eval_time"] = time_test
printman.print()
if __name__ == "__main__":
paraman_obj = ParameterManagerMain(docopt.docopt(__doc__))
resman_obj = ResultManagerMain()
printman_obj = ResultPrinter(paraman_obj, resman_obj)
try:
main(paraman_obj, resman_obj, printman_obj)
except Exception as e:
printman_obj.print()
raise e
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment