From 64ca4df57e3fad4a1c6737669e5f2170d5628a5f Mon Sep 17 00:00:00 2001
From: Luc Giffon <luc.giffon@lif.univ-mrs.fr>
Date: Wed, 29 Nov 2017 14:51:04 +0100
Subject: [PATCH] build stacked fastfood - need to remove diagonal fastfood and
 do the stacks implicitly

---
 main/convnet_random.py | 61 +++++++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/main/convnet_random.py b/main/convnet_random.py
index 397461a..b6b11c8 100644
--- a/main/convnet_random.py
+++ b/main/convnet_random.py
@@ -42,9 +42,9 @@ def convolution(input):
     with tf.name_scope("conv_pool_1"):
         # 32 is the number of filter we'll use. e.g. the number of different
         # shapes this layer is able to recognize
-        W_conv1 = weight_variable([5, 5, 1, 32])
+        W_conv1 = weight_variable([5, 5, 1, 20])
         tf.summary.histogram("weights conv1", W_conv1)
-        b_conv1 = bias_variable([32])
+        b_conv1 = bias_variable([20])
         tf.summary.histogram("biases conv1", b_conv1)
         # -1 is here to keep the total size constant (784)
         h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1)
@@ -52,9 +52,9 @@ def convolution(input):
         h_pool1 = max_pool_2x2(h_conv1)
 
     with tf.name_scope("conv_pool_2"):
-        W_conv2 = weight_variable([5, 5, 32, 64])
+        W_conv2 = weight_variable([5, 5, 20, 50])
         tf.summary.histogram("weights conv2", W_conv2)
-        b_conv2 = bias_variable([64])
+        b_conv2 = bias_variable([50])
         tf.summary.histogram("biases conv2", b_conv2)
         h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
         tf.summary.histogram("act conv2", h_conv2)
@@ -77,7 +77,7 @@ def random_biases(shape):
 
 # --- Fast Food Naive --- #
 
-def G_variable(d, diag=True):
+def G_variable(d, diag=True, trainable=False):
     """
     Return a Gaussian Random diagonal matrix converted into Tensorflow Variable.
 
@@ -92,11 +92,10 @@ def G_variable(d, diag=True):
     else:
         G = np.random.normal(size=d).astype(np.float32)
         G_norm = np.linalg.norm(G, ord=2)
-    print("Norm of G is: {}".format(G_norm))
-    return tf.Variable(G, name="G", trainable=False), G_norm
+    return tf.Variable(G, name="G", trainable=trainable), G_norm
 
 
-def B_variable(d, diag=True):
+def B_variable(d, diag=True, trainable=False):
     """
     Return a random diagonal matrix of -1 and 1 picked uniformly into Tensorflow Variable.
 
@@ -108,7 +107,7 @@ def B_variable(d, diag=True):
         B = np.diag(np.random.choice([-1, 1], size=d, replace=True)).astype(np.float32)
     else:
         B = np.random.choice([-1, 1], size=d, replace=True).astype(np.float32)
-    return tf.Variable(B, name="B", trainable=False)
+    return tf.Variable(B, name="B", trainable=trainable)
 
 
 def P_variable(d):
@@ -138,7 +137,7 @@ def H_variable(d):
     return tf.Variable(H, name="H", trainable=False)
 
 
-def S_variable(d, G_norm, diag=True):
+def S_variable(d, G_norm, diag=True, trainable=False):
     """
     Return a scaling diagonal matrix of random values picked from a chi distribution.
 
@@ -154,7 +153,7 @@ def S_variable(d, G_norm, diag=True):
         S = np.diag((1 / G_norm) * scipy.stats.chi.rvs(d, size=d)).astype(np.float32)
     else:
         S = (1 / G_norm) * scipy.stats.chi.rvs(d, size=d).astype(np.float32)
-    return tf.Variable(S, name="S", trainable=False)
+    return tf.Variable(S, name="S", trainable=trainable)
 
 
 # --- Hadamard utils --- #
@@ -198,9 +197,8 @@ def random_features(conv_out, sigma):
         return h1_final
 
 
-def fast_food(conv_out, sigma, diag=True, trainable=False):
-    # todo use te trainable parameter
-    with tf.name_scope("fastfood"):
+def fast_food(conv_out, sigma, nbr_stack=1, diag=True, trainable=False, name="fastfood"):
+    with tf.name_scope(name + "_diag=" + str(diag) + "_sigma=" + str(sigma)):
         init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
         final_dim = int(dimensionality_constraints(init_dim))
         padding = final_dim - init_dim
@@ -208,15 +206,15 @@ def fast_food(conv_out, sigma, diag=True, trainable=False):
         paddings = tf.constant([[0, 0], [0, padding]])
         conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT")
 
-        G, G_norm = G_variable(final_dim, diag=diag)
+        G, G_norm = G_variable(final_dim, diag=diag, trainable=trainable)
         tf.summary.histogram("weights G", G)
-        B = B_variable(final_dim, diag=diag)
+        B = B_variable(final_dim, diag=diag, trainable=trainable)
         tf.summary.histogram("weights B", B)
         H = H_variable(final_dim)
         tf.summary.histogram("weights H", H)
         P = P_variable(final_dim)
         tf.summary.histogram("weights P", P)
-        S = S_variable(final_dim, G_norm, diag=diag)
+        S = S_variable(final_dim, G_norm, diag=diag, trainable=trainable)
         tf.summary.histogram("weights S", S)
 
         if diag:
@@ -257,8 +255,17 @@ def fully_connected(conv_out):
     return h_fc1
 
 
+def stacked_fastfood(input_, nbr, sigma, diag=False, trainable=False):
+    l_outputs = []
+    for i in range(nbr):
+        output = fast_food(input_, sigma, diag=diag, trainable=trainable, name="fastfood" + str(i))
+        l_outputs.append(output)
+    outputs_stacked = tf.concat(l_outputs, axis=1)
+    return outputs_stacked
+
+
 if __name__ == '__main__':
-    SIGMA = 100.0
+    SIGMA = 5.0
     print("Sigma = {}".format(SIGMA))
 
     with tf.Graph().as_default():
@@ -274,9 +281,14 @@ if __name__ == '__main__':
         h_conv = convolution(x_image)
         # h_conv = x
         # out_fc = fully_connected(h_conv)  # 95% accuracy
-        # out_fc = fast_food(h_conv, SIGMA)  # 83% accuracy (conv) | 56% accuracy (noconv)
-        # out_fc = fast_food(h_conv, SIGMA, diag=False)  # 84% accuracy (conv) | 59% accuracy (noconv)
-        out_fc = random_features(h_conv, SIGMA)  # 82% accuracy (conv) | 47% accuracy (noconv)
+        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA))  # 83% accuracy (conv) | 56% accuracy (noconv)
+        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False))  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False, trainable=True))  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # todo: faire une implémentation moins naive: il doit y avoir des blocs dans tf uniquement lorsque j'utilise des matrices
+        # diagonales, sinon je n'ai besoin que de plusieurs lignes pour la matrice de hadamard
+        out_fc = tf.nn.relu(stacked_fastfood(h_conv, 2, SIGMA, diag=False, trainable=True))  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True)  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # out_fc = random_features(h_conv, SIGMA)  # 82% accuracy (conv) | 47% accuracy (noconv)
 
         # classification
         with tf.name_scope("fc_2"):
@@ -315,15 +327,14 @@ if __name__ == '__main__':
         # Create a session for running Ops on the Graph.
         sess = tf.Session()
         # Instantiate a SummaryWriter to output summaries and the Graph.
-        summary_writer = tf.summary.FileWriter("results_deepfried")
+        summary_writer = tf.summary.FileWriter("results_deepfried_stacked")
         summary_writer.add_graph(sess.graph)
         # Initialize all Variable objects
         sess.run(init)
-
         # actual learning
         started = t.time()
-        for i in range(500):
-            batch = mnist.train.next_batch(50)
+        for i in range(2000):
+            batch = mnist.train.next_batch(64)
             feed_dict = {x: batch[0], y_: batch[1], keep_prob: 0.5}
             # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
             # pour calculer le gradient mais dont l'output ne nous interesse pas
-- 
GitLab