From 64ca4df57e3fad4a1c6737669e5f2170d5628a5f Mon Sep 17 00:00:00 2001 From: Luc Giffon <luc.giffon@lif.univ-mrs.fr> Date: Wed, 29 Nov 2017 14:51:04 +0100 Subject: [PATCH] build stacked fastfood - need to remove diagonal fastfood and do the stacks implicitly --- main/convnet_random.py | 61 +++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/main/convnet_random.py b/main/convnet_random.py index 397461a..b6b11c8 100644 --- a/main/convnet_random.py +++ b/main/convnet_random.py @@ -42,9 +42,9 @@ def convolution(input): with tf.name_scope("conv_pool_1"): # 32 is the number of filter we'll use. e.g. the number of different # shapes this layer is able to recognize - W_conv1 = weight_variable([5, 5, 1, 32]) + W_conv1 = weight_variable([5, 5, 1, 20]) tf.summary.histogram("weights conv1", W_conv1) - b_conv1 = bias_variable([32]) + b_conv1 = bias_variable([20]) tf.summary.histogram("biases conv1", b_conv1) # -1 is here to keep the total size constant (784) h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1) @@ -52,9 +52,9 @@ def convolution(input): h_pool1 = max_pool_2x2(h_conv1) with tf.name_scope("conv_pool_2"): - W_conv2 = weight_variable([5, 5, 32, 64]) + W_conv2 = weight_variable([5, 5, 20, 50]) tf.summary.histogram("weights conv2", W_conv2) - b_conv2 = bias_variable([64]) + b_conv2 = bias_variable([50]) tf.summary.histogram("biases conv2", b_conv2) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) tf.summary.histogram("act conv2", h_conv2) @@ -77,7 +77,7 @@ def random_biases(shape): # --- Fast Food Naive --- # -def G_variable(d, diag=True): +def G_variable(d, diag=True, trainable=False): """ Return a Gaussian Random diagonal matrix converted into Tensorflow Variable. @@ -92,11 +92,10 @@ def G_variable(d, diag=True): else: G = np.random.normal(size=d).astype(np.float32) G_norm = np.linalg.norm(G, ord=2) - print("Norm of G is: {}".format(G_norm)) - return tf.Variable(G, name="G", trainable=False), G_norm + return tf.Variable(G, name="G", trainable=trainable), G_norm -def B_variable(d, diag=True): +def B_variable(d, diag=True, trainable=False): """ Return a random diagonal matrix of -1 and 1 picked uniformly into Tensorflow Variable. @@ -108,7 +107,7 @@ def B_variable(d, diag=True): B = np.diag(np.random.choice([-1, 1], size=d, replace=True)).astype(np.float32) else: B = np.random.choice([-1, 1], size=d, replace=True).astype(np.float32) - return tf.Variable(B, name="B", trainable=False) + return tf.Variable(B, name="B", trainable=trainable) def P_variable(d): @@ -138,7 +137,7 @@ def H_variable(d): return tf.Variable(H, name="H", trainable=False) -def S_variable(d, G_norm, diag=True): +def S_variable(d, G_norm, diag=True, trainable=False): """ Return a scaling diagonal matrix of random values picked from a chi distribution. @@ -154,7 +153,7 @@ def S_variable(d, G_norm, diag=True): S = np.diag((1 / G_norm) * scipy.stats.chi.rvs(d, size=d)).astype(np.float32) else: S = (1 / G_norm) * scipy.stats.chi.rvs(d, size=d).astype(np.float32) - return tf.Variable(S, name="S", trainable=False) + return tf.Variable(S, name="S", trainable=trainable) # --- Hadamard utils --- # @@ -198,9 +197,8 @@ def random_features(conv_out, sigma): return h1_final -def fast_food(conv_out, sigma, diag=True, trainable=False): - # todo use te trainable parameter - with tf.name_scope("fastfood"): +def fast_food(conv_out, sigma, nbr_stack=1, diag=True, trainable=False, name="fastfood"): + with tf.name_scope(name + "_diag=" + str(diag) + "_sigma=" + str(sigma)): init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None]) final_dim = int(dimensionality_constraints(init_dim)) padding = final_dim - init_dim @@ -208,15 +206,15 @@ def fast_food(conv_out, sigma, diag=True, trainable=False): paddings = tf.constant([[0, 0], [0, padding]]) conv_out2 = tf.pad(conv_out2, paddings, "CONSTANT") - G, G_norm = G_variable(final_dim, diag=diag) + G, G_norm = G_variable(final_dim, diag=diag, trainable=trainable) tf.summary.histogram("weights G", G) - B = B_variable(final_dim, diag=diag) + B = B_variable(final_dim, diag=diag, trainable=trainable) tf.summary.histogram("weights B", B) H = H_variable(final_dim) tf.summary.histogram("weights H", H) P = P_variable(final_dim) tf.summary.histogram("weights P", P) - S = S_variable(final_dim, G_norm, diag=diag) + S = S_variable(final_dim, G_norm, diag=diag, trainable=trainable) tf.summary.histogram("weights S", S) if diag: @@ -257,8 +255,17 @@ def fully_connected(conv_out): return h_fc1 +def stacked_fastfood(input_, nbr, sigma, diag=False, trainable=False): + l_outputs = [] + for i in range(nbr): + output = fast_food(input_, sigma, diag=diag, trainable=trainable, name="fastfood" + str(i)) + l_outputs.append(output) + outputs_stacked = tf.concat(l_outputs, axis=1) + return outputs_stacked + + if __name__ == '__main__': - SIGMA = 100.0 + SIGMA = 5.0 print("Sigma = {}".format(SIGMA)) with tf.Graph().as_default(): @@ -274,9 +281,14 @@ if __name__ == '__main__': h_conv = convolution(x_image) # h_conv = x # out_fc = fully_connected(h_conv) # 95% accuracy - # out_fc = fast_food(h_conv, SIGMA) # 83% accuracy (conv) | 56% accuracy (noconv) - # out_fc = fast_food(h_conv, SIGMA, diag=False) # 84% accuracy (conv) | 59% accuracy (noconv) - out_fc = random_features(h_conv, SIGMA) # 82% accuracy (conv) | 47% accuracy (noconv) + # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA)) # 83% accuracy (conv) | 56% accuracy (noconv) + # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False)) # 84% accuracy (conv) | 59% accuracy (noconv) + # out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv) + # todo: faire une implémentation moins naive: il doit y avoir des blocs dans tf uniquement lorsque j'utilise des matrices + # diagonales, sinon je n'ai besoin que de plusieurs lignes pour la matrice de hadamard + out_fc = tf.nn.relu(stacked_fastfood(h_conv, 2, SIGMA, diag=False, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv) + # out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True) # 84% accuracy (conv) | 59% accuracy (noconv) + # out_fc = random_features(h_conv, SIGMA) # 82% accuracy (conv) | 47% accuracy (noconv) # classification with tf.name_scope("fc_2"): @@ -315,15 +327,14 @@ if __name__ == '__main__': # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. - summary_writer = tf.summary.FileWriter("results_deepfried") + summary_writer = tf.summary.FileWriter("results_deepfried_stacked") summary_writer.add_graph(sess.graph) # Initialize all Variable objects sess.run(init) - # actual learning started = t.time() - for i in range(500): - batch = mnist.train.next_batch(50) + for i in range(2000): + batch = mnist.train.next_batch(64) feed_dict = {x: batch[0], y_: batch[1], keep_prob: 0.5} # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler # pour calculer le gradient mais dont l'output ne nous interesse pas -- GitLab