Skip to content
Snippets Groups Projects
Commit 64ca4df5 authored by Luc Giffon's avatar Luc Giffon
Browse files

build stacked fastfood - need to remove diagonal fastfood and do the stacks implicitly

parent dc036659
No related branches found
No related tags found
No related merge requests found
......@@ -42,9 +42,9 @@ def convolution(input):
with tf.name_scope("conv_pool_1"):
# 32 is the number of filter we'll use. e.g. the number of different
# shapes this layer is able to recognize
W_conv1 = weight_variable([5, 5, 1, 32])
W_conv1 = weight_variable([5, 5, 1, 20])
tf.summary.histogram("weights conv1", W_conv1)
b_conv1 = bias_variable([32])
b_conv1 = bias_variable([20])
tf.summary.histogram("biases conv1", b_conv1)
# -1 is here to keep the total size constant (784)
h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1)
......@@ -52,9 +52,9 @@ def convolution(input):
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope("conv_pool_2"):
W_conv2 = weight_variable([5, 5, 32, 64])
W_conv2 = weight_variable([5, 5, 20, 50])
tf.summary.histogram("weights conv2", W_conv2)
b_conv2 = bias_variable([64])
b_conv2 = bias_variable([50])
tf.summary.histogram("biases conv2", b_conv2)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
tf.summary.histogram("act conv2", h_conv2)
......@@ -77,7 +77,7 @@ def random_biases(shape):
# --- Fast Food Naive --- #
def G_variable(d, diag=True):
def G_variable(d, diag=True, trainable=False):
"""
Return a Gaussian Random diagonal matrix converted into Tensorflow Variable.
......@@ -92,11 +92,10 @@ def G_variable(d, diag=True):
else:
G = np.random.normal(size=d).astype(np.float32)
G_norm = np.linalg.norm(G, ord=2)
print("Norm of G is: {}".format(G_norm))
return tf.Variable(G, name="G", trainable=False), G_norm
return tf.Variable(G, name="G", trainable=trainable), G_norm
def B_variable(d, diag=True):
def B_variable(d, diag=True, trainable=False):
"""
Return a random diagonal matrix of -1 and 1 picked uniformly into Tensorflow Variable.
......@@ -108,7 +107,7 @@ def B_variable(d, diag=True):
B = np.diag(np.random.choice([-1, 1], size=d, replace=True)).astype(np.float32)
else:
B = np.random.choice([-1, 1], size=d, replace=True).astype(np.float32)
return tf.Variable(B, name="B", trainable=False)
return tf.Variable(B, name="B", trainable=trainable)
def P_variable(d):
......@@ -138,7 +137,7 @@ def H_variable(d):
return tf.Variable(H, name="H", trainable=False)
def S_variable(d, G_norm, diag=True):
def S_variable(d, G_norm, diag=True, trainable=False):
"""
Return a scaling diagonal matrix of random values picked from a chi distribution.
......@@ -154,7 +153,7 @@ def S_variable(d, G_norm, diag=True):
S = np.diag((1 / G_norm) * scipy.stats.chi.rvs(d, size=d)).astype(np.float32)
else:
S = (1 / G_norm) * scipy.stats.chi.rvs(d, size=d).astype(np.float32)
return tf.Variable(S, name="S", trainable=False)
return tf.Variable(S, name="S", trainable=trainable)
# --- Hadamard utils --- #
......@@ -198,9 +197,8 @@ def random_features(conv_out, sigma):
return h1_final
def fast_food(conv_out, sigma, diag=True, trainable=False):
# todo use te trainable parameter
with tf.name_scope("fastfood"):
def fast_food(conv_out, sigma, nbr_stack=1, diag=True, trainable=False, name="fastfood"):
with tf.name_scope(name + "_diag=" + str(diag) + "_sigma=" + str(sigma)):
init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
final_dim = int(dimensionality_constraints(init_dim))
padding = final_dim - init_dim
......@@ -208,15 +206,15 @@ def fast_food(conv_out, sigma, diag=True, trainable=False):
paddings = tf.constant([[0, 0], [0, padding]])
conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT")
G, G_norm = G_variable(final_dim, diag=diag)
G, G_norm = G_variable(final_dim, diag=diag, trainable=trainable)
tf.summary.histogram("weights G", G)
B = B_variable(final_dim, diag=diag)
B = B_variable(final_dim, diag=diag, trainable=trainable)
tf.summary.histogram("weights B", B)
H = H_variable(final_dim)
tf.summary.histogram("weights H", H)
P = P_variable(final_dim)
tf.summary.histogram("weights P", P)
S = S_variable(final_dim, G_norm, diag=diag)
S = S_variable(final_dim, G_norm, diag=diag, trainable=trainable)
tf.summary.histogram("weights S", S)
if diag:
......@@ -257,8 +255,17 @@ def fully_connected(conv_out):
return h_fc1
def stacked_fastfood(input_, nbr, sigma, diag=False, trainable=False):
l_outputs = []
for i in range(nbr):
output = fast_food(input_, sigma, diag=diag, trainable=trainable, name="fastfood" + str(i))
l_outputs.append(output)
outputs_stacked = tf.concat(l_outputs, axis=1)
return outputs_stacked
if __name__ == '__main__':
SIGMA = 100.0
SIGMA = 5.0
print("Sigma = {}".format(SIGMA))
with tf.Graph().as_default():
......@@ -274,9 +281,14 @@ if __name__ == '__main__':
h_conv = convolution(x_image)
# h_conv = x
# out_fc = fully_connected(h_conv) # 95% accuracy
# out_fc = fast_food(h_conv, SIGMA) # 83% accuracy (conv) | 56% accuracy (noconv)
# out_fc = fast_food(h_conv, SIGMA, diag=False) # 84% accuracy (conv) | 59% accuracy (noconv)
out_fc = random_features(h_conv, SIGMA) # 82% accuracy (conv) | 47% accuracy (noconv)
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA)) # 83% accuracy (conv) | 56% accuracy (noconv)
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False)) # 84% accuracy (conv) | 59% accuracy (noconv)
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv)
# todo: faire une implémentation moins naive: il doit y avoir des blocs dans tf uniquement lorsque j'utilise des matrices
# diagonales, sinon je n'ai besoin que de plusieurs lignes pour la matrice de hadamard
out_fc = tf.nn.relu(stacked_fastfood(h_conv, 2, SIGMA, diag=False, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv)
# out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True) # 84% accuracy (conv) | 59% accuracy (noconv)
# out_fc = random_features(h_conv, SIGMA) # 82% accuracy (conv) | 47% accuracy (noconv)
# classification
with tf.name_scope("fc_2"):
......@@ -315,15 +327,14 @@ if __name__ == '__main__':
# Create a session for running Ops on the Graph.
sess = tf.Session()
# Instantiate a SummaryWriter to output summaries and the Graph.
summary_writer = tf.summary.FileWriter("results_deepfried")
summary_writer = tf.summary.FileWriter("results_deepfried_stacked")
summary_writer.add_graph(sess.graph)
# Initialize all Variable objects
sess.run(init)
# actual learning
started = t.time()
for i in range(500):
batch = mnist.train.next_batch(50)
for i in range(2000):
batch = mnist.train.next_batch(64)
feed_dict = {x: batch[0], y_: batch[1], keep_prob: 0.5}
# le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
# pour calculer le gradient mais dont l'output ne nous interesse pas
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment