build stacked fastfood - need to remove diagonal fastfood and do the stacks implicitly

64ca4df5 · Luc Giffon · dc036659 · 64ca4df5
Commit 64ca4df5 authored Nov 29, 2017 by Luc Giffon
--- a/main/convnet_random.py
+++ b/main/convnet_random.py
@@ -42,9 +42,9 @@ def convolution(input):
    with tf.name_scope("conv_pool_1"):
        # 32 is the number of filter we'll use. e.g. the number of different
        # shapes this layer is able to recognize
-        W_conv1 = weight_variable([5, 5, 1, 32])
+        W_conv1 = weight_variable([5, 5, 1, 20])
        tf.summary.histogram("weights conv1", W_conv1)
-        b_conv1 = bias_variable([32])
+        b_conv1 = bias_variable([20])
        tf.summary.histogram("biases conv1", b_conv1)
        # -1 is here to keep the total size constant (784)
        h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1)
@@ -52,9 +52,9 @@ def convolution(input):
        h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope("conv_pool_2"):
-        W_conv2 = weight_variable([5, 5, 32, 64])
+        W_conv2 = weight_variable([5, 5, 20, 50])
        tf.summary.histogram("weights conv2", W_conv2)
-        b_conv2 = bias_variable([64])
+        b_conv2 = bias_variable([50])
        tf.summary.histogram("biases conv2", b_conv2)
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        tf.summary.histogram("act conv2", h_conv2)
@@ -77,7 +77,7 @@ def random_biases(shape):

 # --- Fast Food Naive --- #

-def G_variable(d, diag=True):
+def G_variable(d, diag=True, trainable=False):
    """
    Return a Gaussian Random diagonal matrix converted into Tensorflow Variable.

@@ -92,11 +92,10 @@ def G_variable(d, diag=True):
    else:
        G = np.random.normal(size=d).astype(np.float32)
        G_norm = np.linalg.norm(G, ord=2)
-    print("Norm of G is: {}".format(G_norm))
-    return tf.Variable(G, name="G", trainable=False), G_norm
+    return tf.Variable(G, name="G", trainable=trainable), G_norm


-def B_variable(d, diag=True):
+def B_variable(d, diag=True, trainable=False):
    """
    Return a random diagonal matrix of -1 and 1 picked uniformly into Tensorflow Variable.

@@ -108,7 +107,7 @@ def B_variable(d, diag=True):
        B = np.diag(np.random.choice([-1, 1], size=d, replace=True)).astype(np.float32)
    else:
        B = np.random.choice([-1, 1], size=d, replace=True).astype(np.float32)
-    return tf.Variable(B, name="B", trainable=False)
+    return tf.Variable(B, name="B", trainable=trainable)


 def P_variable(d):
@@ -138,7 +137,7 @@ def H_variable(d):
    return tf.Variable(H, name="H", trainable=False)


-def S_variable(d, G_norm, diag=True):
+def S_variable(d, G_norm, diag=True, trainable=False):
    """
    Return a scaling diagonal matrix of random values picked from a chi distribution.

@@ -154,7 +153,7 @@ def S_variable(d, G_norm, diag=True):
        S = np.diag((1 / G_norm) * scipy.stats.chi.rvs(d, size=d)).astype(np.float32)
    else:
        S = (1 / G_norm) * scipy.stats.chi.rvs(d, size=d).astype(np.float32)
-    return tf.Variable(S, name="S", trainable=False)
+    return tf.Variable(S, name="S", trainable=trainable)


 # --- Hadamard utils --- #
@@ -198,9 +197,8 @@ def random_features(conv_out, sigma):
        return h1_final


-def fast_food(conv_out, sigma, diag=True, trainable=False):
-    # todo use te trainable parameter
-    with tf.name_scope("fastfood"):
+def fast_food(conv_out, sigma, nbr_stack=1, diag=True, trainable=False, name="fastfood"):
+    with tf.name_scope(name + "_diag=" + str(diag) + "_sigma=" + str(sigma)):
        init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
        final_dim = int(dimensionality_constraints(init_dim))
        padding = final_dim - init_dim
@@ -208,15 +206,15 @@ def fast_food(conv_out, sigma, diag=True, trainable=False):
        paddings = tf.constant([[0, 0], [0, padding]])
        conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT")

-        G, G_norm = G_variable(final_dim, diag=diag)
+        G, G_norm = G_variable(final_dim, diag=diag, trainable=trainable)
        tf.summary.histogram("weights G", G)
-        B = B_variable(final_dim, diag=diag)
+        B = B_variable(final_dim, diag=diag, trainable=trainable)
        tf.summary.histogram("weights B", B)
        H = H_variable(final_dim)
        tf.summary.histogram("weights H", H)
        P = P_variable(final_dim)
        tf.summary.histogram("weights P", P)
-        S = S_variable(final_dim, G_norm, diag=diag)
+        S = S_variable(final_dim, G_norm, diag=diag, trainable=trainable)
        tf.summary.histogram("weights S", S)

        if diag:
@@ -257,8 +255,17 @@ def fully_connected(conv_out):
    return h_fc1


+def stacked_fastfood(input_, nbr, sigma, diag=False, trainable=False):
+    l_outputs = []
+    for i in range(nbr):
+        output = fast_food(input_, sigma, diag=diag, trainable=trainable, name="fastfood" + str(i))
+        l_outputs.append(output)
+    outputs_stacked = tf.concat(l_outputs, axis=1)
+    return outputs_stacked
+
+
 if __name__ == '__main__':
-    SIGMA = 100.0
+    SIGMA = 5.0
    print("Sigma = {}".format(SIGMA))

    with tf.Graph().as_default():
@@ -274,9 +281,14 @@ if __name__ == '__main__':
        h_conv = convolution(x_image)
        # h_conv = x
        # out_fc = fully_connected(h_conv)  # 95% accuracy
-        # out_fc = fast_food(h_conv, SIGMA)  # 83% accuracy (conv) | 56% accuracy (noconv)
-        # out_fc = fast_food(h_conv, SIGMA, diag=False)  # 84% accuracy (conv) | 59% accuracy (noconv)
-        out_fc = random_features(h_conv, SIGMA)  # 82% accuracy (conv) | 47% accuracy (noconv)
+        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA))  # 83% accuracy (conv) | 56% accuracy (noconv)
+        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False))  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False, trainable=True))  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # todo: faire une implémentation moins naive: il doit y avoir des blocs dans tf uniquement lorsque j'utilise des matrices
+        # diagonales, sinon je n'ai besoin que de plusieurs lignes pour la matrice de hadamard
+        out_fc = tf.nn.relu(stacked_fastfood(h_conv, 2, SIGMA, diag=False, trainable=True))  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True)  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # out_fc = random_features(h_conv, SIGMA)  # 82% accuracy (conv) | 47% accuracy (noconv)

        # classification
        with tf.name_scope("fc_2"):
@@ -315,15 +327,14 @@ if __name__ == '__main__':
        # Create a session for running Ops on the Graph.
        sess = tf.Session()
        # Instantiate a SummaryWriter to output summaries and the Graph.
-        summary_writer = tf.summary.FileWriter("results_deepfried")
+        summary_writer = tf.summary.FileWriter("results_deepfried_stacked")
        summary_writer.add_graph(sess.graph)
        # Initialize all Variable objects
        sess.run(init)
-
        # actual learning
        started = t.time()
-        for i in range(500):
-            batch = mnist.train.next_batch(50)
+        for i in range(2000):
+            batch = mnist.train.next_batch(64)
            feed_dict = {x: batch[0], y_: batch[1], keep_prob: 0.5}
            # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
            # pour calculer le gradient mais dont l'output ne nous interesse pas