mise a jour de la version pour mnist, correction de quelques bugs: fonctionnel

02ce1166 · Luc Giffon · 0d9edeb1 · 02ce1166
Commit 02ce1166 authored 7 years ago by Luc Giffon
--- a/main/deepfriedConvnetMnist.py
+++ b/main/deepfriedConvnetMnist.py
@@ -13,6 +13,7 @@ Zichao Yang, Marcin Moczulski, Misha Denil, Nando de Freitas, Alex Smola, Le Son
 import tensorflow as tf
 import numpy as np
 import skluc.mldatasets as dataset
+from skluc.neural_networks import bias_variable, weight_variable, conv2d, max_pool_2x2

 tf.logging.set_verbosity(tf.logging.ERROR)

@@ -30,33 +31,20 @@ Y_train = np.array(enc.transform(Y_train))
 X_test, Y_test = mnist["test"]
 X_test = np.array(X_test / 255)
 Y_test = np.array(enc.transform(Y_test))
+
 X_train = X_train.astype(np.float32)
+permut = np.random.permutation(X_train.shape[0])
+val_size = 5000
+X_val = X_train[permut[:val_size]]
+X_train = X_train[permut[val_size:]]
+Y_val = Y_train[permut[:val_size]]
+Y_train = Y_train[permut[val_size:]]
 X_test = X_test.astype(np.float32)
 Y_train = Y_train.astype(np.float32)
 Y_test = Y_test.astype(np.float32)

 from fasfood_layer import fast_food

-# --- Usual functions --- #
-
-def weight_variable(shape):
-    initial = tf.truncated_normal(shape, stddev=0.1)
-    return tf.Variable(initial, name="weights")
-
-
-def bias_variable(shape):
-    initial = tf.constant(0.1, shape=shape)
-    return tf.Variable(initial, name="biases")
-
-
-def conv2d(x, W):
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
-
-
-def max_pool_2x2(x):
-    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
-                          strides=[1, 2, 2, 1], padding='SAME')
-

 def convolution_mnist(input):
    with tf.name_scope("conv_pool_1"):
@@ -112,8 +100,9 @@ def random_features(conv_out, sigma):

 def fully_connected(conv_out):
    with tf.name_scope("fc_1"):
-        h_pool2_flat = tf.reshape(conv_out, [-1, 7 * 7 * 64])
-        W_fc1 = weight_variable([7 * 7 * 64, 4096*2])
+        init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
+        h_pool2_flat = tf.reshape(conv_out, [-1, init_dim])
+        W_fc1 = weight_variable([init_dim, 4096*2])
        b_fc1 = bias_variable([4096*2])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        tf.summary.histogram("weights", W_fc1)
@@ -148,7 +137,6 @@ if __name__ == '__main__':
    print("Sigma = {}".format(SIGMA))

    with tf.Graph().as_default():
-        # todo parametrize dataset
        input_dim, output_dim = X_train.shape[1], Y_train.shape[1]

        x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x")
@@ -162,9 +150,9 @@ if __name__ == '__main__':
        # Representation layer
        h_conv = convolution_mnist(x_image)
        # h_conv = x
-        # out_fc = fully_connected(h_conv)  # 95% accuracy
+        out_fc = fully_connected(h_conv)  # 95% accuracy
        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=1))  # 83% accuracy (conv) | 56% accuracy (noconv)
-        out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2))
+        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2))
        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2, trainable=True))
        # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, trainable=True))  # 84% accuracy (conv) | 59% accuracy (noconv)
        # out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True)  # 84% accuracy (conv) | 59% accuracy (noconv)
@@ -213,15 +201,18 @@ if __name__ == '__main__':
        sess.run(init)
        # actual learning
        started = t.time()
+        feed_dict_val = {x: X_val, y_: Y_val, keep_prob: 1.0}
        for i in range(1100):
            X_batch = get_next_batch(X_train, i, 64)
            Y_batch = get_next_batch(Y_train, i, 64)
            feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5}
            # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
            # pour calculer le gradient mais dont l'output ne nous interesse pas
-            _, loss = sess.run([train_optimizer, cross_entropy], feed_dict=feed_dict)
+            _, loss, y_result, x_exp = sess.run([train_optimizer, cross_entropy, y_conv, x_image], feed_dict=feed_dict)
            if i % 100 == 0:
                print('step {}, loss {} (with dropout)'.format(i, loss))
+                r_accuracy = sess.run([accuracy], feed_dict=feed_dict_val)
+                print("accuracy: {} on validation set (without dropout).".format(r_accuracy))
                summary_str = sess.run(merged_summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, i)