Skip to content
Snippets Groups Projects
Commit 5a9c44bd authored by Luc Giffon's avatar Luc Giffon
Browse files

first try for nystrom layer

parent 9dd0f2c9
No related branches found
No related tags found
No related merge requests found
"""
Convnet with nystrom approximation of the feature map.
"""
import tensorflow as tf
import numpy as np
from sklearn.metrics.pairwise import rbf_kernel
import skluc.mldatasets as dataset
from skluc.neural_networks import bias_variable, weight_variable, conv2d, max_pool_2x2, conv_relu_pool, get_next_batch
tf.logging.set_verbosity(tf.logging.ERROR)
import time as t
from sklearn.preprocessing import LabelBinarizer
enc = LabelBinarizer()
mnist = dataset.MnistDataset()
mnist = mnist.load()
X_train, Y_train = mnist["train"]
X_train = np.array(X_train / 255)
enc.fit(Y_train)
Y_train = np.array(enc.transform(Y_train))
X_test, Y_test = mnist["test"]
X_test = np.array(X_test / 255)
Y_test = np.array(enc.transform(Y_test))
X_train = X_train.astype(np.float32)
permut = np.random.permutation(X_train.shape[0])
val_size = 5000
X_val = X_train[permut[:val_size]]
Y_val = Y_train[permut[:val_size]]
X_train = X_train[permut[val_size:]]
Y_train = Y_train[permut[val_size:]]
X_test = X_test.astype(np.float32)
Y_train = Y_train.astype(np.float32)
Y_test = Y_test.astype(np.float32)
NYSTROM_SAMPLE_SIZE = 500
X_nystrom = X_train[np.random.permutation(NYSTROM_SAMPLE_SIZE)]
def convolution_mnist(input, trainable=True):
with tf.name_scope("conv_pool_1"):
# 32 is the number of filter we'll use. e.g. the number of different
# shapes this layer is able to recognize
W_conv1 = weight_variable([5, 5, 1, 20], trainable=trainable)
tf.summary.histogram("weights conv1", W_conv1)
b_conv1 = bias_variable([20], trainable=trainable)
tf.summary.histogram("biases conv1", b_conv1)
# -1 is here to keep the total size constant (784)
h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1)
tf.summary.histogram("act conv1", h_conv1)
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope("conv_pool_2"):
W_conv2 = weight_variable([5, 5, 20, 50], trainable=trainable)
tf.summary.histogram("weights conv2", W_conv2)
b_conv2 = bias_variable([50], trainable=trainable)
tf.summary.histogram("biases conv2", b_conv2)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
tf.summary.histogram("act conv2", h_conv2)
h_pool2 = max_pool_2x2(h_conv2)
return h_pool2
def fully_connected(conv_out):
with tf.name_scope("fc_1"):
init_dim = np.prod([s.value for s in conv_out.shape if s.value is not None])
h_pool2_flat = tf.reshape(conv_out, [-1, init_dim])
W_fc1 = weight_variable([init_dim, 4096*2])
b_fc1 = bias_variable([4096*2])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
tf.summary.histogram("weights", W_fc1)
tf.summary.histogram("biases", b_fc1)
return h_fc1
def tf_rbf_kernel(X, Y, gamma):
r1 = tf.reduce_sum(X * X, axis=1)
r1 = tf.reshape(r1, [-1, 1])
r2 = tf.reduce_sum(Y * Y, axis=1)
r2 = tf.reshape(r2, [1, -1])
K = tf.matmul(X, tf.transpose(Y))
K = r1 - 2 * K + r2
K *= -gamma
K = tf.exp(K)
return K
def main():
GAMMA = 0.001
print("Gamma = {}".format(GAMMA))
with tf.Graph().as_default():
input_dim, output_dim = X_train.shape[1], Y_train.shape[1]
x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x")
x_nystrom = tf.Variable(X_nystrom, name="nystrom_subsample", trainable=False)
y_ = tf.placeholder(tf.float32, shape=[None, output_dim], name="labels")
# side size is width or height of the images
side_size = int(np.sqrt(input_dim))
x_image = tf.reshape(x, [-1, side_size, side_size, 1])
x_nystrom_image = tf.reshape(x_nystrom, [NYSTROM_SAMPLE_SIZE, side_size, side_size, 1])
tf.summary.image("digit", x_image, max_outputs=3)
# Representation layer
with tf.variable_scope("convolution_mnist") as scope_conv_mnist:
h_conv = convolution_mnist(x_image)
scope_conv_mnist.reuse_variables()
h_conv_nystrom_subsample = convolution_mnist(x_nystrom_image, trainable=False)
init_dim = np.prod([s.value for s in h_conv.shape[1:] if s.value is not None])
h_conv_flat = tf.reshape(h_conv, [-1, init_dim])
h_conv_nystrom_subsample_flat = tf.reshape(h_conv_nystrom_subsample, [NYSTROM_SAMPLE_SIZE, init_dim])
with tf.name_scope("kernel_vec"):
kernel_vector = tf_rbf_kernel(h_conv_flat, h_conv_nystrom_subsample_flat, gamma=GAMMA)
D = weight_variable((NYSTROM_SAMPLE_SIZE,))
V = weight_variable((NYSTROM_SAMPLE_SIZE, NYSTROM_SAMPLE_SIZE))
out_fc = tf.matmul(kernel_vector, tf.matmul(tf.multiply(D, V), tf.transpose(V)))
# classification
with tf.name_scope("fc_2"):
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
h_fc1_drop = tf.nn.dropout(out_fc, keep_prob)
dim = np.prod([s.value for s in h_fc1_drop.shape if s.value is not None])
W_fc2 = weight_variable([dim, output_dim])
b_fc2 = bias_variable([output_dim])
tf.summary.histogram("weights", W_fc2)
tf.summary.histogram("biases", b_fc2)
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
# # calcul de la loss
with tf.name_scope("xent"):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv, name="xentropy"),
name="xentropy_mean")
tf.summary.scalar('loss-xent', cross_entropy)
# # calcul du gradient
with tf.name_scope("train"):
global_step = tf.Variable(0, name="global_step", trainable=False)
train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy, global_step=global_step)
# # calcul de l'accuracy
with tf.name_scope("accuracy"):
predictions = tf.argmax(y_conv, 1)
correct_prediction = tf.equal(predictions, tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy)
merged_summary = tf.summary.merge_all()
init = tf.global_variables_initializer()
# Create a session for running Ops on the Graph.
sess = tf.Session()
# Instantiate a SummaryWriter to output summaries and the Graph.
summary_writer = tf.summary.FileWriter("results_deepfried_stacked")
summary_writer.add_graph(sess.graph)
# Initialize all Variable objects
sess.run(init)
# actual learning
started = t.time()
feed_dict_val = {x: X_val, y_: Y_val, keep_prob: 1.0}
for i in range(10000):
X_batch = get_next_batch(X_train, i, 64)
Y_batch = get_next_batch(Y_train, i, 64)
feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5}
# le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
# pour calculer le gradient mais dont l'output ne nous interesse pas
_, loss, y_result, x_exp, k_vec, eigenvec = sess.run([train_optimizer, cross_entropy, y_conv, x_image, kernel_vector, V], feed_dict=feed_dict)
if i % 100 == 0:
print(k_vec[0])
print("Difference with identity:", np.linalg.norm(eigenvec - np.eye(*eigenvec.shape)))
print('step {}, loss {} (with dropout)'.format(i, loss))
r_accuracy = sess.run([accuracy], feed_dict=feed_dict_val)
print("accuracy: {} on validation set (without dropout).".format(r_accuracy))
summary_str = sess.run(merged_summary, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, i)
stoped = t.time()
accuracy, preds = sess.run([accuracy, predictions], feed_dict={
x: X_test, y_: Y_test, keep_prob: 1.0})
print('test accuracy %g' % accuracy)
np.set_printoptions(threshold=np.nan)
print("Prediction sample: " + str(preds[:50]))
print("Actual values: " + str(np.argmax(Y_test[:50], axis=1)))
print("Elapsed time: %.4f s" % (stoped - started))
if __name__ == '__main__':
main()
\ No newline at end of file
import unittest
import tensorflow as tf
import numpy as np
from sklearn.metrics.pairwise import rbf_kernel
import skluc.mldatasets as dataset
from main.nystrom.nystrom_approx import tf_rbf_kernel
class TestNystrom(unittest.TestCase):
def setUp(self):
mnist = dataset.MnistDataset()
mnist = mnist.load()
X_train, Y_train = mnist["train"]
X_train = np.array(X_train / 255)
X_test, Y_test = mnist["test"]
X_test = np.array(X_test / 255)
X_train = X_train.astype(np.float32)
permut = np.random.permutation(X_train.shape[0])
val_size = 5000
X_val = X_train[permut[:val_size]]
X_train = X_train[permut[val_size:]]
Y_val = Y_train[permut[:val_size]]
Y_train = Y_train[permut[val_size:]]
X_test = X_test.astype(np.float32)
Y_train = Y_train.astype(np.float32)
Y_test = Y_test.astype(np.float32)
self.X_val = X_val
self.Y_val = Y_val
self.X_train = X_train
self.Y_train = Y_train
self.X_test = X_test
self.Y_test = Y_test
# todo retirer ça
self.X_val = self.X_val[:100]
self.sess = tf.InteractiveSession()
def test_tf_rbf_kernel(self):
gamma = 0.01
expected_rbf_kernel = rbf_kernel(self.X_val, self.X_val, gamma=gamma)
obtained_rbf_kernel = tf_rbf_kernel(self.X_val, self.X_val, gamma=gamma).eval()
difference_rbf_kernel = np.linalg.norm(expected_rbf_kernel - obtained_rbf_kernel)
self.assertAlmostEqual(difference_rbf_kernel, 0, delta=1e-5)
example1 = self.X_val[0].reshape((1, -1))
example2 = self.X_val[1].reshape((1, -1))
expected_rbf_kernel_value = rbf_kernel(example1, example2, gamma=gamma)
obtained_rbf_kernel_value = tf_rbf_kernel(example1, example2, gamma=gamma).eval()
difference_rbf_kernel_value = np.linalg.norm(expected_rbf_kernel_value - obtained_rbf_kernel_value)
self.assertAlmostEqual(difference_rbf_kernel_value, 0, delta=1e-5)
expected_rbf_kernel_vector = rbf_kernel(example1, self.X_val, gamma=gamma)
obtained_rbf_kernel_vector = tf_rbf_kernel(example1, self.X_val, gamma=gamma).eval()
difference_rbf_kernel_vector = np.linalg.norm(expected_rbf_kernel_vector - obtained_rbf_kernel_vector)
self.assertAlmostEqual(difference_rbf_kernel_vector, 0, delta=1e-5)
def tearDown(self):
self.sess.close()
if __name__ == '__main__':
unittest.main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment