diff --git a/main/deepfriedConvnetMnist.py b/main/deepfriedConvnetMnist.py index 2ebe4f62f8c344086ca96aadcaa7038defe6b1a1..8b5ecb05fd2774f0122100635862e241685661b8 100644 --- a/main/deepfriedConvnetMnist.py +++ b/main/deepfriedConvnetMnist.py @@ -13,6 +13,7 @@ Zichao Yang, Marcin Moczulski, Misha Denil, Nando de Freitas, Alex Smola, Le Son import tensorflow as tf import numpy as np import skluc.mldatasets as dataset +from skluc.neural_networks import bias_variable, weight_variable, conv2d, max_pool_2x2 tf.logging.set_verbosity(tf.logging.ERROR) @@ -30,33 +31,20 @@ Y_train = np.array(enc.transform(Y_train)) X_test, Y_test = mnist["test"] X_test = np.array(X_test / 255) Y_test = np.array(enc.transform(Y_test)) + X_train = X_train.astype(np.float32) +permut = np.random.permutation(X_train.shape[0]) +val_size = 5000 +X_val = X_train[permut[:val_size]] +X_train = X_train[permut[val_size:]] +Y_val = Y_train[permut[:val_size]] +Y_train = Y_train[permut[val_size:]] X_test = X_test.astype(np.float32) Y_train = Y_train.astype(np.float32) Y_test = Y_test.astype(np.float32) from fasfood_layer import fast_food -# --- Usual functions --- # - -def weight_variable(shape): - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial, name="weights") - - -def bias_variable(shape): - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial, name="biases") - - -def conv2d(x, W): - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') - - -def max_pool_2x2(x): - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], padding='SAME') - def convolution_mnist(input): with tf.name_scope("conv_pool_1"): @@ -112,8 +100,9 @@ def random_features(conv_out, sigma): def fully_connected(conv_out): with tf.name_scope("fc_1"): - h_pool2_flat = tf.reshape(conv_out, [-1, 7 * 7 * 64]) - W_fc1 = weight_variable([7 * 7 * 64, 4096*2]) + init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None]) + h_pool2_flat = tf.reshape(conv_out, [-1, init_dim]) + W_fc1 = weight_variable([init_dim, 4096*2]) b_fc1 = bias_variable([4096*2]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) tf.summary.histogram("weights", W_fc1) @@ -148,7 +137,6 @@ if __name__ == '__main__': print("Sigma = {}".format(SIGMA)) with tf.Graph().as_default(): - # todo parametrize dataset input_dim, output_dim = X_train.shape[1], Y_train.shape[1] x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") @@ -162,9 +150,9 @@ if __name__ == '__main__': # Representation layer h_conv = convolution_mnist(x_image) # h_conv = x - # out_fc = fully_connected(h_conv) # 95% accuracy + out_fc = fully_connected(h_conv) # 95% accuracy # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=1)) # 83% accuracy (conv) | 56% accuracy (noconv) - out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2)) + # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2)) # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2, trainable=True)) # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv) # out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True) # 84% accuracy (conv) | 59% accuracy (noconv) @@ -213,15 +201,18 @@ if __name__ == '__main__': sess.run(init) # actual learning started = t.time() + feed_dict_val = {x: X_val, y_: Y_val, keep_prob: 1.0} for i in range(1100): X_batch = get_next_batch(X_train, i, 64) Y_batch = get_next_batch(Y_train, i, 64) feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5} # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler # pour calculer le gradient mais dont l'output ne nous interesse pas - _, loss = sess.run([train_optimizer, cross_entropy], feed_dict=feed_dict) + _, loss, y_result, x_exp = sess.run([train_optimizer, cross_entropy, y_conv, x_image], feed_dict=feed_dict) if i % 100 == 0: print('step {}, loss {} (with dropout)'.format(i, loss)) + r_accuracy = sess.run([accuracy], feed_dict=feed_dict_val) + print("accuracy: {} on validation set (without dropout).".format(r_accuracy)) summary_str = sess.run(merged_summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, i)