"""
nystrom_vs_deepstrom: Compute accuracy efficiency of the nystrom method vs deepstrom.

Usage:
    nystrom_vs_deepstrom (--nystroem | --deepstrom) [-e numepoch -s batchsize -G gammavalue -m subsamplesize]
    nystrom_vs_deepstrom -h | --help

Options:
    -h --help                                               Show this screen.
    --nystroem                                              Run the nystroem version.
    --deepstrom                                             Run the deepstrom version.
    -G gammavalue --gamma-nystrom=gammavalue                The gamma value used in nystrom.
    -m subsamplesize --subsample-size-nystrom=subsamplesize The subsample size for nystrom.
    -e numepoch --num-epoch=numepoch                        The number of epoch. [default: 1]
    -s batchsize --batch-size=batchsize                     The number of example in each batch [default: 50]
"""

import tensorflow as tf
import numpy as np
import skluc.mldatasets as dataset
from sklearn.kernel_approximation import Nystroem
from sklearn.svm import SVC

from skluc.tensorflow.kernel_approximation.nystrom_approx import nystrom_layer
from skluc.tensorflow.utils import batch_generator, classification_mnist

tf.logging.set_verbosity(tf.logging.ERROR)

import docopt


def deepstrom_classif(X_train,
                      Y_train,
                      X_nystrom,
                      batch_size,
                      num_epoch,
                      dataset_cycling,
                      gamma,
                      data_shape,
                      output_dim,
                      output_nystrom_layer,
                      X_test=None,
                      Y_test=None):

    with tf.Graph().as_default():

        x = tf.placeholder(tf.float32, shape=[None, *data_shape], name="x")
        y_ = tf.placeholder(tf.float32, shape=[None, output_dim], name="labels")
        x_nystrom = tf.Variable(X_nystrom, name="nystrom_subsample", trainable=False)

        out_fc = nystrom_layer(x, x_nystrom, gamma, output_dim=output_nystrom_layer)
        y_conv, keep_prob = classification_mnist(out_fc, output_dim=output_dim)

        # # calcul de la loss
        with tf.name_scope("xent"):
            cross_entropy = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv, name="xentropy"),
                name="xentropy_mean")

        # # calcul du gradient
        with tf.name_scope("train"):
            global_step = tf.Variable(0, name="global_step", trainable=False)
            train_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy,
                                                                                  global_step=global_step)

        # # calcul de l'accuracy
        with tf.name_scope("accuracy"):
            predictions = tf.argmax(y_conv, 1)
            correct_prediction = tf.equal(predictions, tf.argmax(y_, 1))
            accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        init = tf.global_variables_initializer()
        # Create a session for running Ops on the Graph.
        with tf.Session() as sess:
            # Initialize all Variable objects
            sess.run(init)
            # actual learning
            for i in range(num_epoch):
                for X_batch, Y_batch in batch_generator(X_train, Y_train, batch_size, dataset_cycling):
                    feed_dict = {x: X_batch, y_: Y_batch, keep_prob: 0.5}
                    sess.run([train_optimizer, cross_entropy], feed_dict=feed_dict)

            accuracy = None
            if X_test is not None and Y_test is not None:
                # testing or predicting may not be wanted
                accuracy = sess.run([accuracy_op], feed_dict={
                    x: X_test, y_: Y_test, keep_prob: 1.0})
            lst_output = [str(accuracy[0]), str(x_nystrom.shape[0]), str(gamma), str(batch_size), str(num_epoch)]
            print(",".join(lst_output))


def nystroem_classif(X_train, Y_train, X_test, Y_test, subsample, gamma):
    nys = Nystroem(kernel="rbf", gamma=gamma, n_components=len(subsample))
    nys.fit(subsample)
    X_train_transformed = nys.transform(X_train)
    X_test_transformed = nys.transform(X_test)
    clf = SVC(kernel="linear")
    clf.fit(X_train_transformed, Y_train)
    score = clf.score(X_test_transformed, Y_test)

    lst_output = [str(score), str(len(subsample)), str(gamma)]
    print(",".join(lst_output))


if __name__ == "__main__":
    arguments = docopt.docopt(__doc__)
    # print(arguments)
    SUBSAMPLE_SIZE = int(arguments["--subsample-size-nystrom"])
    gamma = float(arguments["--gamma-nystrom"])
    nystroem = arguments["--nystroem"]
    deepstrom = arguments["--deepstrom"]
    num_epoch = int(float(arguments["--num-epoch"]))
    batch_size = int(arguments["--batch-size"])

    mnist = dataset.MnistDataset()
    mnist.load()
    mnist.normalize()
    np.random.seed(0)
    indexes_nystrom = np.random.permutation(60000)[:SUBSAMPLE_SIZE]

    if nystroem:
        X_train, Y_train = mnist.train
        X_test, Y_test = mnist.test
        X_subsample = X_train[indexes_nystrom]
        nystroem_classif(X_train=X_train,
                         Y_train=Y_train,
                         X_test=X_test,
                         Y_test=Y_test,
                         subsample=X_subsample,
                         gamma=gamma)
    elif deepstrom:
        mnist.to_one_hot()
        mnist.data_astype(np.float32)
        mnist.labels_astype(np.float32)
        X_train, Y_train = mnist.train
        X_test, Y_test = mnist.test
        X_subsample = X_train[indexes_nystrom]
        deepstrom_classif(X_train=X_train,
                          Y_train=Y_train,
                          X_test=X_test,
                          Y_test=Y_test,
                          gamma=gamma,
                          data_shape=X_train.shape[1:],
                          output_dim=Y_train.shape[1],
                          dataset_cycling=False,
                          num_epoch=num_epoch,
                          output_nystrom_layer=SUBSAMPLE_SIZE,
                          X_nystrom=X_subsample,
                          batch_size=batch_size)