Skip to content
Snippets Groups Projects
Commit 612baedb authored by Luc Giffon's avatar Luc Giffon
Browse files

exp lazyfile_classif_end_to_end_subsample_conv_hand script + param

parent 68059037
No related branches found
No related tags found
No related merge requests found
all: all:
dense:
# deepfried:
deepstrom: deepstrom:
base: base:
...@@ -9,21 +7,11 @@ base: ...@@ -9,21 +7,11 @@ base:
val_size: {"-v": [10000]} val_size: {"-v": [10000]}
seed: {"-a": "range(1)"} seed: {"-a": "range(1)"}
quiet: ["-q"] quiet: ["-q"]
dataset: ["--mnist", "--cifar10", "--cifar100", "--svhn"] dataset: ["--mnist", "--cifar10", "--svhn"]
dense:
network: ["dense"]
base:
repr_dim: {"-D": [16, 64, 128, 1024]}
deepfried:
network: ["deepfriedconvnet"]
base:
nbstacks: {"-N": [1, 3, 5, 7]}
deepstrom: deepstrom:
network: ["deepstrom"] network: ["deepstrom"]
base: base:
nys_size: {"-m": [4, 8, 16, 64, 128, 256, 512]} nys_size: {"-m": [4, 8, 16, 64, 128, 256, 512]}
kernel: ["-C", "-L"] kernel: ["-C"]
...@@ -54,7 +54,7 @@ from tensorflow.python.keras.layers import Dense ...@@ -54,7 +54,7 @@ from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.models import Sequential
from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer
from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd, KernelLayerEndToEnd
from skluc.main.tensorflow_.models import build_lenet_model, build_vgg19_model from skluc.main.tensorflow_.models import build_lenet_model, build_vgg19_model
from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter
from skluc.main.tensorflow_.utils import batch_generator from skluc.main.tensorflow_.utils import batch_generator
...@@ -155,7 +155,14 @@ def main(paraman, resman, printman): ...@@ -155,7 +155,14 @@ def main(paraman, resman, printman):
subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"]) subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"])
nys_subsample = data.validation.data[subsample_indexes] nys_subsample = data.validation.data[subsample_indexes]
y_subsample = data.validation.labels[subsample_indexes]
y_train_y_subsample = np.zeros((X_train.shape[0], paraman["--nys-size"])) # la matrice qui, pour chaque exemple du train dit de quel exemple de base il est de la même classe
for i in range(X_train.shape[0]):
y_train_y_subsample[i][np.where(np.argmax(y_subsample, axis=1) == np.argmax(y_train[i]))[0]] = 1.
y_test_y_subsample = np.zeros((X_test.shape[0], paraman["--nys-size"])) # la matrice qui, pour chaque exemple du test dit de quel exemple de base il est de la même classe
for i in range(X_test.shape[0]):
y_test_y_subsample[i][np.where((y_subsample == y_test[i]).all())[0]] = 1.
# # Model definition # # Model definition
input_dim = X_train.shape[1:] input_dim = X_train.shape[1:]
...@@ -169,14 +176,33 @@ def main(paraman, resman, printman): ...@@ -169,14 +176,33 @@ def main(paraman, resman, printman):
repr_x = convnet_model(x) repr_x = convnet_model(x)
repr_sub = tf.placeholder(tf.float32, shape=[paraman["--nys-size"], *repr_x.shape[1:]], name="subsample_conv_input") repr_sub = tf.placeholder(tf.float32, shape=[paraman["--nys-size"], *repr_x.shape[1:]], name="subsample_conv_input")
y_sub = tf.placeholder(tf.float32, shape=[None, paraman["--nys-size"]], name="subsample_labels")
deepstrom_layer = DeepstromLayerEndToEnd(subsample_size=paraman["--nys-size"], kernel_layer = KernelLayerEndToEnd(subsample_size=paraman["--nys-size"],
kernel_name=paraman["kernel"], kernel_name=paraman["kernel"],
kernel_dict=paraman["kernel_dict"], kernel_dict=paraman["kernel_dict"]
activation=paraman["deepstrom_activation"], )
out_dim=paraman["--out-dim"])
kernel_vec = kernel_layer([repr_x, repr_sub])
input_classifier = deepstrom_layer([repr_x, repr_sub]) with tf.name_scope("mse"):
mse_kernel_vec = tf.losses.mean_squared_error(predictions=kernel_vec, labels=y_sub)
tf.summary.scalar('mse_kernel_vec', mse_kernel_vec)
if paraman["deepstrom_activation"] == "tan":
activation_fct = tf.nn.tanh
elif paraman["deepstrom_activation"] == "relu":
activation_fct = tf.nn.relu
else:
activation_fct = None
if paraman["--out-dim"] is not None and paraman["--out-dim"] > paraman["--nys-size"]:
raise ValueError("Output dim {} is greater than deepstrom subsample size {}. Aborting.".format(paraman["--out-dim"], paraman["--nys-size"]))
elif paraman["--out-dim"] is None:
output_dim_w_nys = paraman["--nys-size"]
else:
output_dim_w_nys = paraman["--out-dim"]
input_classifier = Dense(output_dim_w_nys, activation=activation_fct)(kernel_vec)
with tf.variable_scope("classification"): with tf.variable_scope("classification"):
classif_layer = Dense(output_dim) classif_layer = Dense(output_dim)
...@@ -193,8 +219,11 @@ def main(paraman, resman, printman): ...@@ -193,8 +219,11 @@ def main(paraman, resman, printman):
with tf.name_scope("train"): with tf.name_scope("train"):
global_step = tf.Variable(0, name="global_step", trainable=False) global_step = tf.Variable(0, name="global_step", trainable=False)
lr = 1e-4 lr = 1e-4
train_optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cross_entropy, train_optimizer_classif = tf.train.AdamOptimizer(learning_rate=lr).minimize(cross_entropy,
global_step=global_step)
train_optimizer_kernel_vec = tf.train.AdamOptimizer(learning_rate=lr).minimize(mse_kernel_vec,
global_step=global_step) global_step=global_step)
train_op = tf.group(train_optimizer_classif, train_optimizer_kernel_vec)
# calcul de l'accuracy # calcul de l'accuracy
with tf.name_scope("accuracy"): with tf.name_scope("accuracy"):
...@@ -228,28 +257,35 @@ def main(paraman, resman, printman): ...@@ -228,28 +257,35 @@ def main(paraman, resman, printman):
j = 0 j = 0
for i in range(paraman["--num-epoch"]): for i in range(paraman["--num-epoch"]):
logger.debug(memory_usage()) logger.debug(memory_usage())
k = 0
for X_batch, Y_batch in batch_generator(X_train, y_train, paraman["--batch-size"], False): for i_batch, (X_batch, Y_batch) in enumerate(batch_generator(X_train, y_train, paraman["--batch-size"], False)):
idx_start_batch = i_batch*paraman["--batch-size"]
idx_stop_batch = (i_batch+1)*paraman["--batch-size"]
y_train_y_subsample_batch = y_train_y_subsample[idx_start_batch:idx_stop_batch]
print(np.argmax(Y_batch, axis=1))
print(np.argmax(y_subsample, axis=1))
print(y_train_y_subsample_batch)
print()
if j % paraman["--subs-every"] == 0: if j % paraman["--subs-every"] == 0:
feed_dict = {x: nys_subsample} feed_dict = {x: nys_subsample}
computed_repr_sub, = sess.run([repr_x], feed_dict=feed_dict) computed_repr_sub, = sess.run([repr_x], feed_dict=feed_dict)
try: try:
feed_dict = {x: X_batch, y: Y_batch, repr_sub: computed_repr_sub} feed_dict = {x: X_batch, y: Y_batch, repr_sub: computed_repr_sub, y_sub:y_train_y_subsample_batch}
except NameError as e: except NameError as e:
logger.error("A representation of the subsample must have been computed at least once for it to be used in the deepstrom") logger.error("A representation of the subsample must have been computed at least once for it to be used in the deepstrom")
raise e raise e
_, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict) _, loss, acc, summary_str = sess.run([train_op, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict)
if j % 100 == 0: if j % 100 == 0:
logger.info( logger.info(
"epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"], j + 1, "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"],
int(data.train[0].shape[ k + 1, int(data.train[0].shape[0] / paraman["--batch-size"]) + 1,
0] / paraman["--batch-size"]) + 1,
X_batch.shape, loss, X_batch.shape, loss,
acc)) acc))
if paraman["--tensorboard"]: if paraman["--tensorboard"]:
summary_writer.add_summary(summary_str, j) summary_writer.add_summary(summary_str, j)
j += 1 j += 1
k += 1
logger.info("Evaluation on validation data") logger.info("Evaluation on validation data")
training_time = t.time() - global_start training_time = t.time() - global_start
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment