Skip to content
Snippets Groups Projects
Commit 0fcadc6c authored by Luc Giffon's avatar Luc Giffon
Browse files

deepfriedconvnet now uses the batch generator + slight changes in fastfood layer variable names

parent 6d849225
No related branches found
No related tags found
No related merge requests found
...@@ -13,7 +13,8 @@ Zichao Yang, Marcin Moczulski, Misha Denil, Nando de Freitas, Alex Smola, Le Son ...@@ -13,7 +13,8 @@ Zichao Yang, Marcin Moczulski, Misha Denil, Nando de Freitas, Alex Smola, Le Son
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import skluc.mldatasets as dataset import skluc.mldatasets as dataset
from skluc.neural_networks import bias_variable, weight_variable, conv2d, max_pool_2x2, get_next_batch from skluc.neural_networks import convolution_mnist, classification_mnist, batch_generator
from fasfood_layer import fast_food
tf.logging.set_verbosity(tf.logging.ERROR) tf.logging.set_verbosity(tf.logging.ERROR)
...@@ -43,73 +44,6 @@ X_test = X_test.astype(np.float32) ...@@ -43,73 +44,6 @@ X_test = X_test.astype(np.float32)
Y_train = Y_train.astype(np.float32) Y_train = Y_train.astype(np.float32)
Y_test = Y_test.astype(np.float32) Y_test = Y_test.astype(np.float32)
from fasfood_layer import fast_food
# todo utiliser les fonctions adapate/definies pour ces couches de convolution
def convolution_mnist(input):
with tf.name_scope("conv_pool_1"):
# 32 is the number of filter we'll use. e.g. the number of different
# shapes this layer is able to recognize
W_conv1 = weight_variable([5, 5, 1, 20])
tf.summary.histogram("weights conv1", W_conv1)
b_conv1 = bias_variable([20])
tf.summary.histogram("biases conv1", b_conv1)
# -1 is here to keep the total size constant (784)
h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1)
tf.summary.histogram("act conv1", h_conv1)
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope("conv_pool_2"):
W_conv2 = weight_variable([5, 5, 20, 50])
tf.summary.histogram("weights conv2", W_conv2)
b_conv2 = bias_variable([50])
tf.summary.histogram("biases conv2", b_conv2)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
tf.summary.histogram("act conv2", h_conv2)
h_pool2 = max_pool_2x2(h_conv2)
return h_pool2
# --- Random Fourier Features --- #
def random_variable(shape, sigma):
W = np.random.normal(size=shape, scale=sigma).astype(np.float32)
return tf.Variable(W, name="random_Weights", trainable=False)
def random_biases(shape):
b = np.random.uniform(0, 2 * np.pi, size=shape).astype(np.float32)
return tf.Variable(b, name="random_biase", trainable=False)
# --- Representation Layer --- #
def random_features(conv_out, sigma):
with tf.name_scope("random_features"):
init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
conv_out2 = tf.reshape(conv_out, [-1, init_dim])
W = random_variable((init_dim, init_dim), sigma)
b = random_biases(init_dim)
h1 = tf.matmul(conv_out2, W, name="Wx") + b
h1_cos = tf.cos(h1)
h1_final = tf.scalar_mul(np.sqrt(2.0 / init_dim).astype(np.float32), h1_cos)
return h1_final
def fully_connected(conv_out):
with tf.name_scope("fc_1"):
init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
h_pool2_flat = tf.reshape(conv_out, [-1, init_dim])
W_fc1 = weight_variable([init_dim, 4096*2])
b_fc1 = bias_variable([4096*2])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
tf.summary.histogram("weights", W_fc1)
tf.summary.histogram("biases", b_fc1)
return h_fc1
if __name__ == '__main__': if __name__ == '__main__':
SIGMA = 5.0 SIGMA = 5.0
...@@ -128,26 +62,16 @@ if __name__ == '__main__': ...@@ -128,26 +62,16 @@ if __name__ == '__main__':
# Representation layer # Representation layer
h_conv = convolution_mnist(x_image) h_conv = convolution_mnist(x_image)
# h_conv = x
# out_fc = fully_connected(h_conv) # 95% accuracy # out_fc = fully_connected(h_conv) # 95% accuracy
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=1)) # 83% accuracy (conv) | 56% accuracy (noconv) # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=1)) # 83% accuracy (conv) | 56% accuracy (noconv)
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2)) # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2))
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2, trainable=True)) # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2, trainable=True))
out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv) # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv)
# out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True) # 84% accuracy (conv) | 59% accuracy (noconv) out_fc = fast_food(h_conv, SIGMA, nbr_stack=1, trainable=True) # 84% accuracy (conv) | 59% accuracy (noconv)
# out_fc = random_features(h_conv, SIGMA) # 82% accuracy (conv) | 47% accuracy (noconv) # out_fc = random_features(h_conv, SIGMA) # 82% accuracy (conv) | 47% accuracy (noconv)
# classification # classification
with tf.name_scope("fc_2"): y_conv, keep_prob = classification_mnist(out_fc, output_dim)
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
h_fc1_drop = tf.nn.dropout(out_fc, keep_prob)
dim = np.prod([s.value for s in h_fc1_drop.shape if s.value is not None])
W_fc2 = weight_variable([dim, output_dim])
b_fc2 = bias_variable([output_dim])
tf.summary.histogram("weights", W_fc2)
tf.summary.histogram("biases", b_fc2)
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
# calcul de la loss # calcul de la loss
with tf.name_scope("xent"): with tf.name_scope("xent"):
...@@ -181,19 +105,20 @@ if __name__ == '__main__': ...@@ -181,19 +105,20 @@ if __name__ == '__main__':
# actual learning # actual learning
started = t.time() started = t.time()
feed_dict_val = {x: X_val, y_: Y_val, keep_prob: 1.0} feed_dict_val = {x: X_val, y_: Y_val, keep_prob: 1.0}
for i in range(1100): for _ in range(1):
X_batch = get_next_batch(X_train, i, 64) i = 0
Y_batch = get_next_batch(Y_train, i, 64) for X_batch, Y_batch in batch_generator(X_train, Y_train, 64, circle=True):
feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5} feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5}
# le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
# pour calculer le gradient mais dont l'output ne nous interesse pas # pour calculer le gradient mais dont l'output ne nous interesse pas
_, loss, y_result, x_exp = sess.run([train_optimizer, cross_entropy, y_conv, x_image], feed_dict=feed_dict) _, loss, y_result, x_exp = sess.run([train_optimizer, cross_entropy, y_conv, x_image], feed_dict=feed_dict)
if i % 100 == 0: if i % 100 == 0:
print('step {}, loss {} (with dropout)'.format(i, loss)) print('step {}, loss {} (with dropout)'.format(i, loss))
r_accuracy = sess.run([accuracy], feed_dict=feed_dict_val) r_accuracy = sess.run([accuracy], feed_dict=feed_dict_val)
print("accuracy: {} on validation set (without dropout).".format(r_accuracy)) print("accuracy: {} on validation set (without dropout).".format(r_accuracy))
summary_str = sess.run(merged_summary, feed_dict=feed_dict) summary_str = sess.run(merged_summary, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, i) summary_writer.add_summary(summary_str, i)
i += 1
stoped = t.time() stoped = t.time()
accuracy, preds = sess.run([accuracy, predictions], feed_dict={ accuracy, preds = sess.run([accuracy, predictions], feed_dict={
......
...@@ -105,15 +105,15 @@ def fast_food(conv_out, sigma, nbr_stack=1, trainable=False): ...@@ -105,15 +105,15 @@ def fast_food(conv_out, sigma, nbr_stack=1, trainable=False):
conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT") conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT")
G, G_norm = G_variable((nbr_stack, final_dim), trainable=trainable) G, G_norm = G_variable((nbr_stack, final_dim), trainable=trainable)
tf.summary.histogram("weights G", G) tf.summary.histogram("weights_G", G)
B = B_variable((nbr_stack, final_dim), trainable=trainable) B = B_variable((nbr_stack, final_dim), trainable=trainable)
tf.summary.histogram("weights B", B) tf.summary.histogram("weights_B", B)
H = H_variable(final_dim) H = H_variable(final_dim)
tf.summary.histogram("weights H", H) tf.summary.histogram("weights_H", H)
P = P_variable(final_dim, nbr_stack) P = P_variable(final_dim, nbr_stack)
tf.summary.histogram("weights P", P) tf.summary.histogram("weights_P", P)
S = S_variable((nbr_stack, final_dim), G_norm, trainable=trainable) S = S_variable((nbr_stack, final_dim), G_norm, trainable=trainable)
tf.summary.histogram("weights S", S) tf.summary.histogram("weights_S", S)
conv_out2 = tf.reshape(conv_out2, (1, -1, 1, final_dim)) conv_out2 = tf.reshape(conv_out2, (1, -1, 1, final_dim))
h_ff1 = tf.multiply(conv_out2, B, name="Bx") h_ff1 = tf.multiply(conv_out2, B, name="Bx")
...@@ -131,9 +131,10 @@ def fast_food(conv_out, sigma, nbr_stack=1, trainable=False): ...@@ -131,9 +131,10 @@ def fast_food(conv_out, sigma, nbr_stack=1, trainable=False):
h_ff7 = tf.scalar_mul(tf.sqrt(float(1 / final_dim)), tf.concat([h_ff7_1, h_ff7_2], axis=1)) h_ff7 = tf.scalar_mul(tf.sqrt(float(1 / final_dim)), tf.concat([h_ff7_1, h_ff7_2], axis=1))
return h_ff7 return h_ff7
# todo ajouter ce module a scikit-luc
# --- Hadamard utils --- # # --- Hadamard utils --- #
def dimensionality_constraints(d): def dimensionality_constraints(d):
""" """
Enforce d to be a power of 2 Enforce d to be a power of 2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment