deepfriedconvnet now uses the batch generator + slight changes in fastfood layer variable names

0fcadc6c · Luc Giffon · 6d849225 · 0fcadc6c · 0fcadc6c
Commit 0fcadc6c authored 7 years ago by Luc Giffon
--- a/main/deepfriedConvnetMnist.py
+++ b/main/deepfriedConvnetMnist.py
@@ -13,7 +13,8 @@ Zichao Yang, Marcin Moczulski, Misha Denil, Nando de Freitas, Alex Smola, Le Son
 import tensorflow as tf
 import numpy as np
 import skluc.mldatasets as dataset
-from skluc.neural_networks import bias_variable, weight_variable, conv2d, max_pool_2x2, get_next_batch
+from skluc.neural_networks import convolution_mnist, classification_mnist, batch_generator
+from fasfood_layer import fast_food

 tf.logging.set_verbosity(tf.logging.ERROR)

@@ -43,73 +44,6 @@ X_test = X_test.astype(np.float32)
 Y_train = Y_train.astype(np.float32)
 Y_test = Y_test.astype(np.float32)

-from fasfood_layer import fast_food
-
-# todo utiliser les fonctions adapate/definies pour ces couches de convolution
-def convolution_mnist(input):
-    with tf.name_scope("conv_pool_1"):
-        # 32 is the number of filter we'll use. e.g. the number of different
-        # shapes this layer is able to recognize
-        W_conv1 = weight_variable([5, 5, 1, 20])
-        tf.summary.histogram("weights conv1", W_conv1)
-        b_conv1 = bias_variable([20])
-        tf.summary.histogram("biases conv1", b_conv1)
-        # -1 is here to keep the total size constant (784)
-        h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1)
-        tf.summary.histogram("act conv1", h_conv1)
-        h_pool1 = max_pool_2x2(h_conv1)
-
-    with tf.name_scope("conv_pool_2"):
-        W_conv2 = weight_variable([5, 5, 20, 50])
-        tf.summary.histogram("weights conv2", W_conv2)
-        b_conv2 = bias_variable([50])
-        tf.summary.histogram("biases conv2", b_conv2)
-        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
-        tf.summary.histogram("act conv2", h_conv2)
-        h_pool2 = max_pool_2x2(h_conv2)
-
-    return h_pool2
-
-
-# --- Random Fourier Features --- #
-
-def random_variable(shape, sigma):
-    W = np.random.normal(size=shape, scale=sigma).astype(np.float32)
-    return tf.Variable(W, name="random_Weights", trainable=False)
-
-
-def random_biases(shape):
-    b = np.random.uniform(0, 2 * np.pi, size=shape).astype(np.float32)
-    return tf.Variable(b, name="random_biase", trainable=False)
-
-
-# --- Representation Layer --- #
-
-def random_features(conv_out, sigma):
-    with tf.name_scope("random_features"):
-        init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
-        conv_out2 = tf.reshape(conv_out, [-1, init_dim])
-
-        W = random_variable((init_dim, init_dim), sigma)
-        b = random_biases(init_dim)
-        h1 = tf.matmul(conv_out2, W, name="Wx") + b
-        h1_cos = tf.cos(h1)
-        h1_final = tf.scalar_mul(np.sqrt(2.0 / init_dim).astype(np.float32), h1_cos)
-        return h1_final
-
-
-def fully_connected(conv_out):
-    with tf.name_scope("fc_1"):
-        init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
-        h_pool2_flat = tf.reshape(conv_out, [-1, init_dim])
-        W_fc1 = weight_variable([init_dim, 4096*2])
-        b_fc1 = bias_variable([4096*2])
-        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
-        tf.summary.histogram("weights", W_fc1)
-        tf.summary.histogram("biases", b_fc1)
-
-    return h_fc1
-

 if __name__ == '__main__':
    SIGMA = 5.0
@@ -128,26 +62,16 @@ if __name__ == '__main__':

        # Representation layer
        h_conv = convolution_mnist(x_image)
-        # h_conv = x
        # out_fc = fully_connected(h_conv)  # 95% accuracy
        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=1))  # 83% accuracy (conv) | 56% accuracy (noconv)
        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2))
        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2, trainable=True))
-        out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, trainable=True))  # 84% accuracy (conv) | 59% accuracy (noconv)
-        # out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True)  # 84% accuracy (conv) | 59% accuracy (noconv)
+        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, trainable=True))  # 84% accuracy (conv) | 59% accuracy (noconv)
+        out_fc = fast_food(h_conv, SIGMA, nbr_stack=1, trainable=True)  # 84% accuracy (conv) | 59% accuracy (noconv)
        # out_fc = random_features(h_conv, SIGMA)  # 82% accuracy (conv) | 47% accuracy (noconv)

        # classification
-        with tf.name_scope("fc_2"):
-            keep_prob = tf.placeholder(tf.float32, name="keep_prob")
-            h_fc1_drop = tf.nn.dropout(out_fc, keep_prob)
-            dim = np.prod([s.value for s in h_fc1_drop.shape if s.value is not None])
-            W_fc2 = weight_variable([dim, output_dim])
-            b_fc2 = bias_variable([output_dim])
-            tf.summary.histogram("weights", W_fc2)
-            tf.summary.histogram("biases", b_fc2)
-
-            y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
+        y_conv, keep_prob = classification_mnist(out_fc, output_dim)

        # calcul de la loss
        with tf.name_scope("xent"):
@@ -181,9 +105,9 @@ if __name__ == '__main__':
        # actual learning
        started = t.time()
        feed_dict_val = {x: X_val, y_: Y_val, keep_prob: 1.0}
-        for i in range(1100):
-            X_batch = get_next_batch(X_train, i, 64)
-            Y_batch = get_next_batch(Y_train, i, 64)
+        for _ in range(1):
+            i = 0
+            for X_batch, Y_batch in batch_generator(X_train, Y_train, 64, circle=True):
                feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5}
                # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
                # pour calculer le gradient mais dont l'output ne nous interesse pas
@@ -194,6 +118,7 @@ if __name__ == '__main__':
                    print("accuracy: {} on validation set (without dropout).".format(r_accuracy))
                    summary_str = sess.run(merged_summary, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, i)
+                i += 1

        stoped = t.time()
        accuracy, preds = sess.run([accuracy, predictions], feed_dict={

--- a/main/fasfood_layer.py
+++ b/main/fasfood_layer.py
@@ -105,15 +105,15 @@ def fast_food(conv_out, sigma, nbr_stack=1, trainable=False):
        conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT")

        G, G_norm = G_variable((nbr_stack, final_dim), trainable=trainable)
-        tf.summary.histogram("weights G", G)
+        tf.summary.histogram("weights_G", G)
        B = B_variable((nbr_stack, final_dim), trainable=trainable)
-        tf.summary.histogram("weights B", B)
+        tf.summary.histogram("weights_B", B)
        H = H_variable(final_dim)
-        tf.summary.histogram("weights H", H)
+        tf.summary.histogram("weights_H", H)
        P = P_variable(final_dim, nbr_stack)
-        tf.summary.histogram("weights P", P)
+        tf.summary.histogram("weights_P", P)
        S = S_variable((nbr_stack, final_dim), G_norm, trainable=trainable)
-        tf.summary.histogram("weights S", S)
+        tf.summary.histogram("weights_S", S)

        conv_out2 = tf.reshape(conv_out2, (1, -1, 1, final_dim))
        h_ff1 = tf.multiply(conv_out2, B, name="Bx")
@@ -131,9 +131,10 @@ def fast_food(conv_out, sigma, nbr_stack=1, trainable=False):
        h_ff7 = tf.scalar_mul(tf.sqrt(float(1 / final_dim)), tf.concat([h_ff7_1, h_ff7_2], axis=1))
    return h_ff7

-
+# todo ajouter ce module a scikit-luc
 # --- Hadamard utils --- #

+
 def dimensionality_constraints(d):
    """
    Enforce d to be a power of 2