diff --git a/main/deepfriedConvnetMnist.py b/main/deepfriedConvnetMnist.py index 5a80f2496c7a53f595cd9f4d716168992e2c889e..ed03ef4306fe3d3568afe6d500abeeedd8cf4f4b 100644 --- a/main/deepfriedConvnetMnist.py +++ b/main/deepfriedConvnetMnist.py @@ -13,7 +13,8 @@ Zichao Yang, Marcin Moczulski, Misha Denil, Nando de Freitas, Alex Smola, Le Son import tensorflow as tf import numpy as np import skluc.mldatasets as dataset -from skluc.neural_networks import bias_variable, weight_variable, conv2d, max_pool_2x2, get_next_batch +from skluc.neural_networks import convolution_mnist, classification_mnist, batch_generator +from fasfood_layer import fast_food tf.logging.set_verbosity(tf.logging.ERROR) @@ -43,73 +44,6 @@ X_test = X_test.astype(np.float32) Y_train = Y_train.astype(np.float32) Y_test = Y_test.astype(np.float32) -from fasfood_layer import fast_food - -# todo utiliser les fonctions adapate/definies pour ces couches de convolution -def convolution_mnist(input): - with tf.name_scope("conv_pool_1"): - # 32 is the number of filter we'll use. e.g. the number of different - # shapes this layer is able to recognize - W_conv1 = weight_variable([5, 5, 1, 20]) - tf.summary.histogram("weights conv1", W_conv1) - b_conv1 = bias_variable([20]) - tf.summary.histogram("biases conv1", b_conv1) - # -1 is here to keep the total size constant (784) - h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1) - tf.summary.histogram("act conv1", h_conv1) - h_pool1 = max_pool_2x2(h_conv1) - - with tf.name_scope("conv_pool_2"): - W_conv2 = weight_variable([5, 5, 20, 50]) - tf.summary.histogram("weights conv2", W_conv2) - b_conv2 = bias_variable([50]) - tf.summary.histogram("biases conv2", b_conv2) - h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) - tf.summary.histogram("act conv2", h_conv2) - h_pool2 = max_pool_2x2(h_conv2) - - return h_pool2 - - -# --- Random Fourier Features --- # - -def random_variable(shape, sigma): - W = np.random.normal(size=shape, scale=sigma).astype(np.float32) - return tf.Variable(W, name="random_Weights", trainable=False) - - -def random_biases(shape): - b = np.random.uniform(0, 2 * np.pi, size=shape).astype(np.float32) - return tf.Variable(b, name="random_biase", trainable=False) - - -# --- Representation Layer --- # - -def random_features(conv_out, sigma): - with tf.name_scope("random_features"): - init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None]) - conv_out2 = tf.reshape(conv_out, [-1, init_dim]) - - W = random_variable((init_dim, init_dim), sigma) - b = random_biases(init_dim) - h1 = tf.matmul(conv_out2, W, name="Wx") + b - h1_cos = tf.cos(h1) - h1_final = tf.scalar_mul(np.sqrt(2.0 / init_dim).astype(np.float32), h1_cos) - return h1_final - - -def fully_connected(conv_out): - with tf.name_scope("fc_1"): - init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None]) - h_pool2_flat = tf.reshape(conv_out, [-1, init_dim]) - W_fc1 = weight_variable([init_dim, 4096*2]) - b_fc1 = bias_variable([4096*2]) - h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) - tf.summary.histogram("weights", W_fc1) - tf.summary.histogram("biases", b_fc1) - - return h_fc1 - if __name__ == '__main__': SIGMA = 5.0 @@ -128,26 +62,16 @@ if __name__ == '__main__': # Representation layer h_conv = convolution_mnist(x_image) - # h_conv = x # out_fc = fully_connected(h_conv) # 95% accuracy # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=1)) # 83% accuracy (conv) | 56% accuracy (noconv) # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2)) # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, nbr_stack=2, trainable=True)) - out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv) - # out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True) # 84% accuracy (conv) | 59% accuracy (noconv) + # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv) + out_fc = fast_food(h_conv, SIGMA, nbr_stack=1, trainable=True) # 84% accuracy (conv) | 59% accuracy (noconv) # out_fc = random_features(h_conv, SIGMA) # 82% accuracy (conv) | 47% accuracy (noconv) # classification - with tf.name_scope("fc_2"): - keep_prob = tf.placeholder(tf.float32, name="keep_prob") - h_fc1_drop = tf.nn.dropout(out_fc, keep_prob) - dim = np.prod([s.value for s in h_fc1_drop.shape if s.value is not None]) - W_fc2 = weight_variable([dim, output_dim]) - b_fc2 = bias_variable([output_dim]) - tf.summary.histogram("weights", W_fc2) - tf.summary.histogram("biases", b_fc2) - - y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 + y_conv, keep_prob = classification_mnist(out_fc, output_dim) # calcul de la loss with tf.name_scope("xent"): @@ -181,19 +105,20 @@ if __name__ == '__main__': # actual learning started = t.time() feed_dict_val = {x: X_val, y_: Y_val, keep_prob: 1.0} - for i in range(1100): - X_batch = get_next_batch(X_train, i, 64) - Y_batch = get_next_batch(Y_train, i, 64) - feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5} - # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler - # pour calculer le gradient mais dont l'output ne nous interesse pas - _, loss, y_result, x_exp = sess.run([train_optimizer, cross_entropy, y_conv, x_image], feed_dict=feed_dict) - if i % 100 == 0: - print('step {}, loss {} (with dropout)'.format(i, loss)) - r_accuracy = sess.run([accuracy], feed_dict=feed_dict_val) - print("accuracy: {} on validation set (without dropout).".format(r_accuracy)) - summary_str = sess.run(merged_summary, feed_dict=feed_dict) - summary_writer.add_summary(summary_str, i) + for _ in range(1): + i = 0 + for X_batch, Y_batch in batch_generator(X_train, Y_train, 64, circle=True): + feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5} + # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler + # pour calculer le gradient mais dont l'output ne nous interesse pas + _, loss, y_result, x_exp = sess.run([train_optimizer, cross_entropy, y_conv, x_image], feed_dict=feed_dict) + if i % 100 == 0: + print('step {}, loss {} (with dropout)'.format(i, loss)) + r_accuracy = sess.run([accuracy], feed_dict=feed_dict_val) + print("accuracy: {} on validation set (without dropout).".format(r_accuracy)) + summary_str = sess.run(merged_summary, feed_dict=feed_dict) + summary_writer.add_summary(summary_str, i) + i += 1 stoped = t.time() accuracy, preds = sess.run([accuracy, predictions], feed_dict={ diff --git a/main/fasfood_layer.py b/main/fasfood_layer.py index f45769ae50b42a536585b8cb88a398d30a524e19..e22002d8ebecee107e1b1a4d531a1e9c9e2e3a49 100644 --- a/main/fasfood_layer.py +++ b/main/fasfood_layer.py @@ -105,15 +105,15 @@ def fast_food(conv_out, sigma, nbr_stack=1, trainable=False): conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT") G, G_norm = G_variable((nbr_stack, final_dim), trainable=trainable) - tf.summary.histogram("weights G", G) + tf.summary.histogram("weights_G", G) B = B_variable((nbr_stack, final_dim), trainable=trainable) - tf.summary.histogram("weights B", B) + tf.summary.histogram("weights_B", B) H = H_variable(final_dim) - tf.summary.histogram("weights H", H) + tf.summary.histogram("weights_H", H) P = P_variable(final_dim, nbr_stack) - tf.summary.histogram("weights P", P) + tf.summary.histogram("weights_P", P) S = S_variable((nbr_stack, final_dim), G_norm, trainable=trainable) - tf.summary.histogram("weights S", S) + tf.summary.histogram("weights_S", S) conv_out2 = tf.reshape(conv_out2, (1, -1, 1, final_dim)) h_ff1 = tf.multiply(conv_out2, B, name="Bx") @@ -131,9 +131,10 @@ def fast_food(conv_out, sigma, nbr_stack=1, trainable=False): h_ff7 = tf.scalar_mul(tf.sqrt(float(1 / final_dim)), tf.concat([h_ff7_1, h_ff7_2], axis=1)) return h_ff7 - +# todo ajouter ce module a scikit-luc # --- Hadamard utils --- # + def dimensionality_constraints(d): """ Enforce d to be a power of 2