diff --git a/main/nystrom/__init__.py b/main/nystrom/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/main/nystrom/nystrom_approx.py b/main/nystrom/nystrom_approx.py new file mode 100644 index 0000000000000000000000000000000000000000..0a3939edc8f5a95feb5491fc7abd249a55198d64 --- /dev/null +++ b/main/nystrom/nystrom_approx.py @@ -0,0 +1,201 @@ +""" +Convnet with nystrom approximation of the feature map. + +""" + +import tensorflow as tf +import numpy as np +from sklearn.metrics.pairwise import rbf_kernel + +import skluc.mldatasets as dataset +from skluc.neural_networks import bias_variable, weight_variable, conv2d, max_pool_2x2, conv_relu_pool, get_next_batch + +tf.logging.set_verbosity(tf.logging.ERROR) + +import time as t + +from sklearn.preprocessing import LabelBinarizer + +enc = LabelBinarizer() +mnist = dataset.MnistDataset() +mnist = mnist.load() +X_train, Y_train = mnist["train"] +X_train = np.array(X_train / 255) +enc.fit(Y_train) +Y_train = np.array(enc.transform(Y_train)) +X_test, Y_test = mnist["test"] +X_test = np.array(X_test / 255) +Y_test = np.array(enc.transform(Y_test)) + +X_train = X_train.astype(np.float32) +permut = np.random.permutation(X_train.shape[0]) +val_size = 5000 + +X_val = X_train[permut[:val_size]] +Y_val = Y_train[permut[:val_size]] +X_train = X_train[permut[val_size:]] +Y_train = Y_train[permut[val_size:]] +X_test = X_test.astype(np.float32) +Y_train = Y_train.astype(np.float32) +Y_test = Y_test.astype(np.float32) + +NYSTROM_SAMPLE_SIZE = 500 +X_nystrom = X_train[np.random.permutation(NYSTROM_SAMPLE_SIZE)] + + +def convolution_mnist(input, trainable=True): + with tf.name_scope("conv_pool_1"): + # 32 is the number of filter we'll use. e.g. the number of different + # shapes this layer is able to recognize + W_conv1 = weight_variable([5, 5, 1, 20], trainable=trainable) + tf.summary.histogram("weights conv1", W_conv1) + b_conv1 = bias_variable([20], trainable=trainable) + tf.summary.histogram("biases conv1", b_conv1) + # -1 is here to keep the total size constant (784) + h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1) + tf.summary.histogram("act conv1", h_conv1) + h_pool1 = max_pool_2x2(h_conv1) + + with tf.name_scope("conv_pool_2"): + W_conv2 = weight_variable([5, 5, 20, 50], trainable=trainable) + tf.summary.histogram("weights conv2", W_conv2) + b_conv2 = bias_variable([50], trainable=trainable) + tf.summary.histogram("biases conv2", b_conv2) + h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) + tf.summary.histogram("act conv2", h_conv2) + h_pool2 = max_pool_2x2(h_conv2) + + return h_pool2 + + +def fully_connected(conv_out): + with tf.name_scope("fc_1"): + init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None]) + h_pool2_flat = tf.reshape(conv_out, [-1, init_dim]) + W_fc1 = weight_variable([init_dim, 4096*2]) + b_fc1 = bias_variable([4096*2]) + h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) + tf.summary.histogram("weights", W_fc1) + tf.summary.histogram("biases", b_fc1) + + return h_fc1 + + +def tf_rbf_kernel(X, Y, gamma): + r1 = tf.reduce_sum(X * X, axis=1) + r1 = tf.reshape(r1, [-1, 1]) + r2 = tf.reduce_sum(Y * Y, axis=1) + r2 = tf.reshape(r2, [1, -1]) + K = tf.matmul(X, tf.transpose(Y)) + K = r1 - 2 * K + r2 + K *= -gamma + K = tf.exp(K) + return K + + +def main(): + GAMMA = 0.001 + print("Gamma = {}".format(GAMMA)) + + with tf.Graph().as_default(): + input_dim, output_dim = X_train.shape[1], Y_train.shape[1] + + x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") + x_nystrom = tf.Variable(X_nystrom, name="nystrom_subsample", trainable=False) + y_ = tf.placeholder(tf.float32, shape=[None, output_dim], name="labels") + + # side size is width or height of the images + side_size = int(np.sqrt(input_dim)) + x_image = tf.reshape(x, [-1, side_size, side_size, 1]) + x_nystrom_image = tf.reshape(x_nystrom, [NYSTROM_SAMPLE_SIZE, side_size, side_size, 1]) + tf.summary.image("digit", x_image, max_outputs=3) + + # Representation layer + with tf.variable_scope("convolution_mnist") as scope_conv_mnist: + h_conv = convolution_mnist(x_image) + scope_conv_mnist.reuse_variables() + h_conv_nystrom_subsample = convolution_mnist(x_nystrom_image, trainable=False) + + init_dim = np.prod([s.value for s in h_conv.shape[1:] if s.value is not None]) + h_conv_flat = tf.reshape(h_conv, [-1, init_dim]) + h_conv_nystrom_subsample_flat = tf.reshape(h_conv_nystrom_subsample, [NYSTROM_SAMPLE_SIZE, init_dim]) + with tf.name_scope("kernel_vec"): + kernel_vector = tf_rbf_kernel(h_conv_flat, h_conv_nystrom_subsample_flat, gamma=GAMMA) + + D = weight_variable((NYSTROM_SAMPLE_SIZE,)) + V = weight_variable((NYSTROM_SAMPLE_SIZE, NYSTROM_SAMPLE_SIZE)) + + out_fc = tf.matmul(kernel_vector, tf.matmul(tf.multiply(D, V), tf.transpose(V))) + + # classification + with tf.name_scope("fc_2"): + keep_prob = tf.placeholder(tf.float32, name="keep_prob") + h_fc1_drop = tf.nn.dropout(out_fc, keep_prob) + dim = np.prod([s.value for s in h_fc1_drop.shape if s.value is not None]) + W_fc2 = weight_variable([dim, output_dim]) + b_fc2 = bias_variable([output_dim]) + tf.summary.histogram("weights", W_fc2) + tf.summary.histogram("biases", b_fc2) + + y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 + + # # calcul de la loss + with tf.name_scope("xent"): + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv, name="xentropy"), + name="xentropy_mean") + tf.summary.scalar('loss-xent', cross_entropy) + + # # calcul du gradient + with tf.name_scope("train"): + global_step = tf.Variable(0, name="global_step", trainable=False) + train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, global_step=global_step) + + # # calcul de l'accuracy + with tf.name_scope("accuracy"): + predictions = tf.argmax(y_conv, 1) + correct_prediction = tf.equal(predictions, tf.argmax(y_, 1)) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + tf.summary.scalar("accuracy", accuracy) + + merged_summary = tf.summary.merge_all() + + init = tf.global_variables_initializer() + # Create a session for running Ops on the Graph. + sess = tf.Session() + # Instantiate a SummaryWriter to output summaries and the Graph. + summary_writer = tf.summary.FileWriter("results_deepfried_stacked") + summary_writer.add_graph(sess.graph) + # Initialize all Variable objects + sess.run(init) + # actual learning + started = t.time() + feed_dict_val = {x: X_val, y_: Y_val, keep_prob: 1.0} + for i in range(10000): + X_batch = get_next_batch(X_train, i, 64) + Y_batch = get_next_batch(Y_train, i, 64) + feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5} + # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler + # pour calculer le gradient mais dont l'output ne nous interesse pas + _, loss, y_result, x_exp, k_vec, eigenvec = sess.run([train_optimizer, cross_entropy, y_conv, x_image, kernel_vector, V], feed_dict=feed_dict) + if i % 100 == 0: + print(k_vec[0]) + print("Difference with identity:", np.linalg.norm(eigenvec - np.eye(*eigenvec.shape))) + print('step {}, loss {} (with dropout)'.format(i, loss)) + r_accuracy = sess.run([accuracy], feed_dict=feed_dict_val) + print("accuracy: {} on validation set (without dropout).".format(r_accuracy)) + summary_str = sess.run(merged_summary, feed_dict=feed_dict) + summary_writer.add_summary(summary_str, i) + + stoped = t.time() + accuracy, preds = sess.run([accuracy, predictions], feed_dict={ + x: X_test, y_: Y_test, keep_prob: 1.0}) + print('test accuracy %g' % accuracy) + np.set_printoptions(threshold=np.nan) + print("Prediction sample: " + str(preds[:50])) + print("Actual values: " + str(np.argmax(Y_test[:50], axis=1))) + print("Elapsed time: %.4f s" % (stoped - started)) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/main/test/test_nystromtf.py b/main/test/test_nystromtf.py new file mode 100644 index 0000000000000000000000000000000000000000..f2970a074553d90a4664a6c4f2a46dd6ea745037 --- /dev/null +++ b/main/test/test_nystromtf.py @@ -0,0 +1,66 @@ +import unittest +import tensorflow as tf +import numpy as np +from sklearn.metrics.pairwise import rbf_kernel + +import skluc.mldatasets as dataset + +from main.nystrom.nystrom_approx import tf_rbf_kernel + + +class TestNystrom(unittest.TestCase): + def setUp(self): + mnist = dataset.MnistDataset() + mnist = mnist.load() + X_train, Y_train = mnist["train"] + X_train = np.array(X_train / 255) + X_test, Y_test = mnist["test"] + X_test = np.array(X_test / 255) + X_train = X_train.astype(np.float32) + permut = np.random.permutation(X_train.shape[0]) + val_size = 5000 + X_val = X_train[permut[:val_size]] + X_train = X_train[permut[val_size:]] + Y_val = Y_train[permut[:val_size]] + Y_train = Y_train[permut[val_size:]] + X_test = X_test.astype(np.float32) + Y_train = Y_train.astype(np.float32) + Y_test = Y_test.astype(np.float32) + + self.X_val = X_val + self.Y_val = Y_val + self.X_train = X_train + self.Y_train = Y_train + self.X_test = X_test + self.Y_test = Y_test + + # todo retirer ça + self.X_val = self.X_val[:100] + + self.sess = tf.InteractiveSession() + + def test_tf_rbf_kernel(self): + gamma = 0.01 + expected_rbf_kernel = rbf_kernel(self.X_val, self.X_val, gamma=gamma) + obtained_rbf_kernel = tf_rbf_kernel(self.X_val, self.X_val, gamma=gamma).eval() + difference_rbf_kernel = np.linalg.norm(expected_rbf_kernel - obtained_rbf_kernel) + self.assertAlmostEqual(difference_rbf_kernel, 0, delta=1e-5) + + example1 = self.X_val[0].reshape((1, -1)) + example2 = self.X_val[1].reshape((1, -1)) + expected_rbf_kernel_value = rbf_kernel(example1, example2, gamma=gamma) + obtained_rbf_kernel_value = tf_rbf_kernel(example1, example2, gamma=gamma).eval() + difference_rbf_kernel_value = np.linalg.norm(expected_rbf_kernel_value - obtained_rbf_kernel_value) + self.assertAlmostEqual(difference_rbf_kernel_value, 0, delta=1e-5) + + expected_rbf_kernel_vector = rbf_kernel(example1, self.X_val, gamma=gamma) + obtained_rbf_kernel_vector = tf_rbf_kernel(example1, self.X_val, gamma=gamma).eval() + difference_rbf_kernel_vector = np.linalg.norm(expected_rbf_kernel_vector - obtained_rbf_kernel_vector) + self.assertAlmostEqual(difference_rbf_kernel_vector, 0, delta=1e-5) + + def tearDown(self): + self.sess.close() + + +if __name__ == '__main__': + unittest.main()