Skip to content
Snippets Groups Projects
Commit 0d9edeb1 authored by Luc Giffon's avatar Luc Giffon
Browse files

support of the mnist dataset comming from scikit-luc

parent 1aabf81f
No related branches found
No related tags found
No related merge requests found
...@@ -12,11 +12,28 @@ Zichao Yang, Marcin Moczulski, Misha Denil, Nando de Freitas, Alex Smola, Le Son ...@@ -12,11 +12,28 @@ Zichao Yang, Marcin Moczulski, Misha Denil, Nando de Freitas, Alex Smola, Le Son
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import skluc.mldatasets as dataset
tf.logging.set_verbosity(tf.logging.ERROR)
import time as t import time as t
from tensorflow.examples.tutorials.mnist import input_data from sklearn.preprocessing import LabelBinarizer
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
enc = LabelBinarizer()
mnist = dataset.MnistDataset()
mnist = mnist.load()
X_train, Y_train = mnist["train"]
X_train = np.array(X_train / 255)
enc.fit(Y_train)
Y_train = np.array(enc.transform(Y_train))
X_test, Y_test = mnist["test"]
X_test = np.array(X_test / 255)
Y_test = np.array(enc.transform(Y_test))
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
Y_train = Y_train.astype(np.float32)
Y_test = Y_test.astype(np.float32)
from fasfood_layer import fast_food from fasfood_layer import fast_food
...@@ -105,10 +122,25 @@ def fully_connected(conv_out): ...@@ -105,10 +122,25 @@ def fully_connected(conv_out):
return h_fc1 return h_fc1
def mnist_dims(): def get_next_batch(full_set, batch_nbr, batch_size):
input_dim = int(mnist.train.images.shape[1]) """
output_dim = int(mnist.train.labels.shape[1]) Return the next batch of a dataset.
return input_dim, output_dim
This function assumes that all the previous batches of this dataset have been taken with the same size.
:param full_set: the full dataset from which the batch will be taken
:param batch_nbr: the number of the batch
:param batch_size: the size of the batch
:return:
"""
index_start = (batch_nbr * batch_size) % full_set.shape[0]
index_stop = ((batch_nbr + 1) * batch_size) % full_set.shape[0]
if index_stop > index_start:
return full_set[index_start:index_stop]
else:
part1 = full_set[index_start:]
part2 = full_set[:index_stop]
return np.vstack((part1, part2))
if __name__ == '__main__': if __name__ == '__main__':
...@@ -116,8 +148,8 @@ if __name__ == '__main__': ...@@ -116,8 +148,8 @@ if __name__ == '__main__':
print("Sigma = {}".format(SIGMA)) print("Sigma = {}".format(SIGMA))
with tf.Graph().as_default(): with tf.Graph().as_default():
# todo parametrize datset # todo parametrize dataset
input_dim, output_dim = mnist_dims() input_dim, output_dim = X_train.shape[1], Y_train.shape[1]
x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x") x = tf.placeholder(tf.float32, shape=[None, input_dim], name="x")
y_ = tf.placeholder(tf.float32, shape=[None, output_dim], name="labels") y_ = tf.placeholder(tf.float32, shape=[None, output_dim], name="labels")
...@@ -181,9 +213,10 @@ if __name__ == '__main__': ...@@ -181,9 +213,10 @@ if __name__ == '__main__':
sess.run(init) sess.run(init)
# actual learning # actual learning
started = t.time() started = t.time()
for i in range(20000): for i in range(1100):
batch = mnist.train.next_batch(64) X_batch = get_next_batch(X_train, i, 64)
feed_dict = {x: batch[0], y_: batch[1], keep_prob: 0.5} Y_batch = get_next_batch(Y_train, i, 64)
feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5}
# le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler # le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
# pour calculer le gradient mais dont l'output ne nous interesse pas # pour calculer le gradient mais dont l'output ne nous interesse pas
_, loss = sess.run([train_optimizer, cross_entropy], feed_dict=feed_dict) _, loss = sess.run([train_optimizer, cross_entropy], feed_dict=feed_dict)
...@@ -191,12 +224,12 @@ if __name__ == '__main__': ...@@ -191,12 +224,12 @@ if __name__ == '__main__':
print('step {}, loss {} (with dropout)'.format(i, loss)) print('step {}, loss {} (with dropout)'.format(i, loss))
summary_str = sess.run(merged_summary, feed_dict=feed_dict) summary_str = sess.run(merged_summary, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, i) summary_writer.add_summary(summary_str, i)
stoped = t.time()
stoped = t.time()
accuracy, preds = sess.run([accuracy, predictions], feed_dict={ accuracy, preds = sess.run([accuracy, predictions], feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}) x: X_test, y_: Y_test, keep_prob: 1.0})
print('test accuracy %g' % accuracy) print('test accuracy %g' % accuracy)
np.set_printoptions(threshold=np.nan) np.set_printoptions(threshold=np.nan)
print("Prediction sample: " + str(preds[:50])) print("Prediction sample: " + str(preds[:50]))
print("Actual values: " + str(np.argmax(mnist.test.labels[:50], 1))) print("Actual values: " + str(np.argmax(Y_test[:50], axis=1)))
print("Elapsed time: %.4f s" % (stoped - started)) print("Elapsed time: %.4f s" % (stoped - started))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment