Skip to content
Snippets Groups Projects
Commit c9460a19 authored by Luc Giffon's avatar Luc Giffon
Browse files

init expe /end_to_end_with_augment script

parent e0fe50d4
No related branches found
No related tags found
No related merge requests found
"""
Benchmark VGG: Benchmarking deepstrom versus other architectures of the VGG network.
Usage:
benchmark_classification dense [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-l size] [-V]
benchmark_classification deepfriedconvnet [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-a value] [-v size] [-e numepoch] [-s batchsize] [-g gammavalue] [-N nbstack] [-l size] [-z] [-V]
benchmark_classification deepstrom [-q] [--cifar100|--cifar10|--mnist|--svhn] [-f name] [-t size] [-r] [-a value] [-v size] [-e numepoch] [-s batchsize] [-D reprdim] [-m size] (-R|-L|-C|-E|-P|-S|-A|-T|-M) [-g gammavalue] [-c cvalue] [-n] [-l size] [-V]
Options:
--help -h Display help and exit.
-q --quiet Set logging level to info.
-V --tensorboard Write tensorboard logs.
-a --seed value The seed value used for all randomization processed [default: 0]
-t --train-size size Size of train set.
-v --validation-size size The size of the validation set [default: 10000]
-e --num-epoch=numepoch The number of epoch.
-s --batch-size=batchsize The number of example in each batch
-d --dropout val Keep probability of neurons before classif [default: 1.0]
-D reprdim --out-dim=reprdim The dimension of the final representation
-f --non-linearity name Tell the model which non-linearity to use when necessary (possible values: "relu", "tanh") [default: relu]
Dense:
-l --second-layer-size size Says the size of the second non-linear layer [default: 0]
Deepfried convnet:
-N nbstack --nb-stack nbstack The number of fastfood stack for deepfriedconvnet
-z --real-fastfood Tell fastfood layer to not update its weights
Deepstrom:
-r --real-nystrom Says if the matrix for deepstrom should be K^(-1/2)
-m size --nys-size size The number of example in the nystrom subsample.
-n --non-linear Tell Nystrom to use the non linear activation function on its output.
Datasets:
--cifar10 Use cifar dataset
--mnist Use mnist dataset
--svhn Use svhn dataset
--cifar100 Use cifar100 dataset
Possible kernels:
-R --rbf-kernel Says if the rbf kernel should be used for nystrom.
-L --linear-kernel Says if the linear kernel should be used for nystrom.
-C --chi-square-kernel Says if the basic additive chi square kernel should be used for nystrom.
-E --exp-chi-square-kernel Says if the exponential chi square kernel should be used for nystrom.
-P --chi-square-PD-kernel Says if the Positive definite version of the basic additive chi square kernel should be used for nystrom.
-S --sigmoid-kernel Says it the sigmoid kernel should be used for nystrom.
-A --laplacian-kernel Says if the laplacian kernel should be used for nystrom.
-T --stacked-kernel Says if the kernels laplacian, chi2 and rbf in a stacked setting should be used for nystrom.
-M --sumed-kernel Says if the kernels laplacian, chi2 and rbf in a summed setting should be used for nystrom.
Kernel related:
-g gammavalue --gamma gammavalue The value of gamma for rbf, chi or hyperbolic tangent kernel (deepstrom and deepfriedconvnet)
-c cvalue --intercept-constant cvalue The value of the intercept constant for the hyperbolic tangent kernel.
"""
import skluc.main.data.mldatasets as dataset
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras.initializers import he_normal
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer
from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd
from skluc.main.tensorflow_.models import build_lenet_model, build_vgg19_model
from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter
from skluc.main.tensorflow_.utils import batch_generator
import time as t
import docopt
class ParameterManagerMain(ParameterManager):
def __init__(self, docopt_dict):
super().__init__(docopt_dict)
self["--out-dim"] = int(self["--out-dim"]) if eval(str(self["--out-dim"])) is not None else None
self["kernel"] = self.init_kernel()
self["network"] = self.init_network()
self["activation_function"] = self.init_non_linearity()
self["dataset"] = self.init_dataset()
self["--nb-stack"] = int(self["--nb-stack"]) if self["--nb-stack"] is not None else None
self["--nys-size"] = int(self["--nys-size"]) if self["--nys-size"] is not None else None
self["--num-epoch"] = int(self["--num-epoch"])
self["--validation-size"] = int(self["--validation-size"])
self["--seed"] = int(self["--seed"])
self["--batch-size"] = int(self["--batch-size"])
self["deepstrom_activation"] = self.init_deepstrom_activation()
self.__kernel_dict = None
def init_deepstrom_activation(self):
if not self["deepstrom"]:
return None
if self["--non-linear"]:
return self["--non-linearity"]
else:
return None
def init_kernel_dict(self, data):
if self["kernel"] == "rbf":
GAMMA = self.get_gamma_value(data)
self["--gamma"] = GAMMA
self.__kernel_dict = {"gamma": GAMMA}
elif self["kernel"] == "chi2_exp_cpd":
GAMMA = self.get_gamma_value(data, chi2=True)
self["--gamma"] = GAMMA
self.__kernel_dict = {"gamma": GAMMA}
elif self["kernel"] == "laplacian":
GAMMA = self.get_gamma_value(data)
self["--gamma"] = GAMMA
self.__kernel_dict = {"gamma": np.sqrt(GAMMA)}
else:
self.__kernel_dict = {}
def __getitem__(self, item):
if item == "kernel_dict":
return self.__kernel_dict
else:
return super().__getitem__(item)
class ResultManagerMain(ResultManager):
def __init__(self):
super().__init__()
self["training_time"] = None
self["val_eval_time"] = None
self["val_acc"] = None
self["test_acc"] = None
self["test_eval_time"] = None
def cifar100_extended_convmodel_func(input_shape, weight_decay=0.0001):
vgg19_convolution_model = build_vgg19_model(input_shape, weight_decay)
vgg19_convolution_model.add(Dense(4096, use_bias=True,
kernel_regularizer=l2(weight_decay), kernel_initializer=he_normal(),
name='fc_cifar100'))
return vgg19_convolution_model
def main(paraman, resman, printman):
if paraman["dataset"] == "mnist":
data = dataset.MnistDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
convmodel_func = build_lenet_model
elif paraman["dataset"] == "cifar10":
data = dataset.Cifar10Dataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
convmodel_func = build_vgg19_model
elif paraman["dataset"] == "cifar100":
data = dataset.Cifar100FineDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
convmodel_func = cifar100_extended_convmodel_func
elif paraman["dataset"] == "svhn":
data = dataset.SVHNDataset(validation_size=paraman["--validation-size"], seed=paraman["--seed"])
convmodel_func = build_vgg19_model
else:
raise ValueError("Unknown dataset")
data.load()
data.to_one_hot()
if not data.is_image():
data.to_image()
data.data_astype(np.float32)
data.labels_astype(np.float32)
data.normalize()
X_train, y_train = data.train.data, data.train.labels
X_test, y_test = data.test.data, data.test.labels
X_val, y_val = data.validation.data, data.validation.labels
datagen = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
datagen.fit(X_train)
paraman.init_kernel_dict(X_train)
# # Model definition
input_dim = X_train.shape[1:]
output_dim = y_train.shape[1]
x = tf.placeholder(tf.float32, shape=[None, *input_dim], name="x")
y = tf.placeholder(tf.float32, shape=[None, output_dim], name="label")
subs = tf.placeholder(tf.float32, shape=[paraman["--nys-size"], *input_dim], name="subsample")
convnet_model = convmodel_func(x.shape[1:])
repr_x = convnet_model(x)
repr_sub = convnet_model(subs)
logger.debug(paraman["kernel_dict"])
if paraman["network"] == "deepstrom":
deepstrom_layer = DeepstromLayerEndToEnd(subsample_size=paraman["--nys-size"],
kernel_name=paraman["kernel"],
kernel_dict=paraman["kernel_dict"],
activation=paraman["deepstrom_activation"],
out_dim=paraman["--out-dim"])
input_classifier = deepstrom_layer([repr_x, repr_sub])
subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"])
nys_subsample = data.validation.data[subsample_indexes]
elif paraman["network"] == "dense":
dense_layer = Dense(paraman["--out-dim"], activation=paraman["activation_function"])
input_classifier = dense_layer(repr_x)
elif paraman["network"] == "deepfriedconvnet":
deepfried_layer = FastFoodLayer(sigma=1/paraman["--gamma"], nbr_stack=paraman["--nb-stack"], trainable=not paraman["--real-fastfood"])
input_classifier = deepfried_layer(repr_x)
else:
raise ValueError(f"Not recognized network {paraman['network']}")
with tf.variable_scope("classification"):
classif = Dense(output_dim)(input_classifier)
# calcul de la loss
with tf.name_scope("xent"):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=classif, name="xentropy"),
name="xentropy_mean")
tf.summary.scalar('loss-xent', cross_entropy)
# calcul du gradient
with tf.name_scope("train"):
global_step = tf.Variable(0, name="global_step", trainable=False)
train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy,
global_step=global_step)
# calcul de l'accuracy
with tf.name_scope("accuracy"):
predictions = tf.argmax(classif, 1)
correct_prediction = tf.equal(predictions, tf.argmax(y, 1))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy_op)
merged_summary = tf.summary.merge_all()
# In[6]:
init = tf.global_variables_initializer()
summary_writer = None
if paraman["--tensorboard"]:
summary_writer = tf.summary.FileWriter("debug_classification_end_to_end")
# In[7]:
with tf.Session() as sess:
logger.info("Start training")
if paraman["--tensorboard"]:
summary_writer.add_graph(sess.graph)
# Initialize all Variable objects
sess.run(init)
# actual learning
global_start = t.time()
j = 0
for i in range(paraman["--num-epoch"]):
logger.debug(memory_usage())
k = 0
for X_batch, Y_batch in datagen.flow(X_train, y_train, batch_size=paraman["--batch-size"]):
if paraman["network"] == "deepstrom":
feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample}
else:
feed_dict = {x: X_batch, y: Y_batch}
_, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict)
if j % 100 == 0:
logger.info(
"epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"],
k, int(data.train[0].shape[0] / paraman["--batch-size"]) + 1,
X_batch.shape, loss,
acc))
if paraman["--tensorboard"]:
summary_writer.add_summary(summary_str, j)
k += 1
j += 1
logger.info("Evaluation on validation data")
training_time = t.time() - global_start
resman["training_time"] = training_time
accuracies_val = []
i = 0
val_eval_start = t.time()
for X_batch, Y_batch in batch_generator(X_val, y_val, 1000, False):
if paraman["network"] == "deepstrom":
feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample}
else:
feed_dict = {x: X_batch, y: Y_batch}
accuracy = sess.run([accuracy_op], feed_dict=feed_dict)
accuracies_val.append(accuracy[0])
i += 1
global_acc_val = sum(accuracies_val) / i
VAL_EVAL_TIME = t.time() - val_eval_start
resman["val_eval_time"] = VAL_EVAL_TIME
resman["val_acc"] = global_acc_val
logger.info("Evaluation on test data")
accuracies_test = []
i = 0
test_eval_start = t.time()
for X_batch, Y_batch in batch_generator(X_test, y_test, 1000, False):
if paraman["network"] == "deepstrom":
feed_dict = {x: X_batch, y: Y_batch, subs: nys_subsample}
else:
feed_dict = {x: X_batch, y: Y_batch}
accuracy = sess.run([accuracy_op], feed_dict=feed_dict)
accuracies_test.append(accuracy[0])
i += 1
global_acc_test = sum(accuracies_test) / i
TEST_EVAL_TIME = t.time() - test_eval_start
resman["test_acc"] = global_acc_test
resman["test_eval_time"] = TEST_EVAL_TIME
printman.print()
if __name__ == "__main__":
paraman_obj = ParameterManagerMain(docopt.docopt(__doc__))
resman_obj = ResultManagerMain()
printman_obj = ResultPrinter(paraman_obj, resman_obj)
try:
main(paraman_obj, resman_obj, printman_obj)
except Exception as e:
printman_obj.print()
raise e
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment