diff --git a/main/experiments/parameter_files/november_2018/lazyfile_classif_end_to_end_subsample_conv_hand.yml b/main/experiments/parameter_files/november_2018/lazyfile_classif_end_to_end_subsample_conv_hand.yml index 738deb29e4ab5c9374c5b0ae5df44a986bd0d68a..4be003dc5a4eff16caeb0f4016013f4eca268343 100644 --- a/main/experiments/parameter_files/november_2018/lazyfile_classif_end_to_end_subsample_conv_hand.yml +++ b/main/experiments/parameter_files/november_2018/lazyfile_classif_end_to_end_subsample_conv_hand.yml @@ -1,6 +1,4 @@ all: - dense: -# deepfried: deepstrom: base: @@ -9,21 +7,11 @@ base: val_size: {"-v": [10000]} seed: {"-a": "range(1)"} quiet: ["-q"] - dataset: ["--mnist", "--cifar10", "--cifar100", "--svhn"] - -dense: - network: ["dense"] - base: - repr_dim: {"-D": [16, 64, 128, 1024]} - -deepfried: - network: ["deepfriedconvnet"] - base: - nbstacks: {"-N": [1, 3, 5, 7]} + dataset: ["--mnist", "--cifar10", "--svhn"] deepstrom: network: ["deepstrom"] base: nys_size: {"-m": [4, 8, 16, 64, 128, 256, 512]} - kernel: ["-C", "-L"] + kernel: ["-C"] diff --git a/main/experiments/scripts/november_2018/end_to_end_subsample_conv_hand_kernel_loss/deepstrom_classif_end_to_end.py b/main/experiments/scripts/november_2018/end_to_end_subsample_conv_hand_kernel_loss/deepstrom_classif_end_to_end.py index 30b5b4b08d6b0c1e4e7d6f88f08e5ed8cb0c1ea1..12bc54a2de4270fed00f94ad6fe35b0520610109 100644 --- a/main/experiments/scripts/november_2018/end_to_end_subsample_conv_hand_kernel_loss/deepstrom_classif_end_to_end.py +++ b/main/experiments/scripts/november_2018/end_to_end_subsample_conv_hand_kernel_loss/deepstrom_classif_end_to_end.py @@ -54,7 +54,7 @@ from tensorflow.python.keras.layers import Dense from tensorflow.python.keras.models import Sequential from skluc.main.tensorflow_.kernel_approximation.fastfood_layer import FastFoodLayer -from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd +from skluc.main.tensorflow_.kernel_approximation.nystrom_layer import DeepstromLayerEndToEnd, KernelLayerEndToEnd from skluc.main.tensorflow_.models import build_lenet_model, build_vgg19_model from skluc.main.utils import logger, memory_usage, ParameterManager, ResultManager, ResultPrinter from skluc.main.tensorflow_.utils import batch_generator @@ -155,7 +155,14 @@ def main(paraman, resman, printman): subsample_indexes = data.get_uniform_class_rand_indices_validation(paraman["--nys-size"]) nys_subsample = data.validation.data[subsample_indexes] - + y_subsample = data.validation.labels[subsample_indexes] + + y_train_y_subsample = np.zeros((X_train.shape[0], paraman["--nys-size"])) # la matrice qui, pour chaque exemple du train dit de quel exemple de base il est de la même classe + for i in range(X_train.shape[0]): + y_train_y_subsample[i][np.where(np.argmax(y_subsample, axis=1) == np.argmax(y_train[i]))[0]] = 1. + y_test_y_subsample = np.zeros((X_test.shape[0], paraman["--nys-size"])) # la matrice qui, pour chaque exemple du test dit de quel exemple de base il est de la même classe + for i in range(X_test.shape[0]): + y_test_y_subsample[i][np.where((y_subsample == y_test[i]).all())[0]] = 1. # # Model definition input_dim = X_train.shape[1:] @@ -169,14 +176,33 @@ def main(paraman, resman, printman): repr_x = convnet_model(x) repr_sub = tf.placeholder(tf.float32, shape=[paraman["--nys-size"], *repr_x.shape[1:]], name="subsample_conv_input") + y_sub = tf.placeholder(tf.float32, shape=[None, paraman["--nys-size"]], name="subsample_labels") - deepstrom_layer = DeepstromLayerEndToEnd(subsample_size=paraman["--nys-size"], + kernel_layer = KernelLayerEndToEnd(subsample_size=paraman["--nys-size"], kernel_name=paraman["kernel"], - kernel_dict=paraman["kernel_dict"], - activation=paraman["deepstrom_activation"], - out_dim=paraman["--out-dim"]) + kernel_dict=paraman["kernel_dict"] + ) + + kernel_vec = kernel_layer([repr_x, repr_sub]) + + with tf.name_scope("mse"): + mse_kernel_vec = tf.losses.mean_squared_error(predictions=kernel_vec, labels=y_sub) + tf.summary.scalar('mse_kernel_vec', mse_kernel_vec) + + if paraman["deepstrom_activation"] == "tan": + activation_fct = tf.nn.tanh + elif paraman["deepstrom_activation"] == "relu": + activation_fct = tf.nn.relu + else: + activation_fct = None - input_classifier = deepstrom_layer([repr_x, repr_sub]) + if paraman["--out-dim"] is not None and paraman["--out-dim"] > paraman["--nys-size"]: + raise ValueError("Output dim {} is greater than deepstrom subsample size {}. Aborting.".format(paraman["--out-dim"], paraman["--nys-size"])) + elif paraman["--out-dim"] is None: + output_dim_w_nys = paraman["--nys-size"] + else: + output_dim_w_nys = paraman["--out-dim"] + input_classifier = Dense(output_dim_w_nys, activation=activation_fct)(kernel_vec) with tf.variable_scope("classification"): classif_layer = Dense(output_dim) @@ -193,8 +219,11 @@ def main(paraman, resman, printman): with tf.name_scope("train"): global_step = tf.Variable(0, name="global_step", trainable=False) lr = 1e-4 - train_optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cross_entropy, + train_optimizer_classif = tf.train.AdamOptimizer(learning_rate=lr).minimize(cross_entropy, global_step=global_step) + train_optimizer_kernel_vec = tf.train.AdamOptimizer(learning_rate=lr).minimize(mse_kernel_vec, + global_step=global_step) + train_op = tf.group(train_optimizer_classif, train_optimizer_kernel_vec) # calcul de l'accuracy with tf.name_scope("accuracy"): @@ -228,28 +257,35 @@ def main(paraman, resman, printman): j = 0 for i in range(paraman["--num-epoch"]): logger.debug(memory_usage()) - - for X_batch, Y_batch in batch_generator(X_train, y_train, paraman["--batch-size"], False): + k = 0 + for i_batch, (X_batch, Y_batch) in enumerate(batch_generator(X_train, y_train, paraman["--batch-size"], False)): + idx_start_batch = i_batch*paraman["--batch-size"] + idx_stop_batch = (i_batch+1)*paraman["--batch-size"] + y_train_y_subsample_batch = y_train_y_subsample[idx_start_batch:idx_stop_batch] + print(np.argmax(Y_batch, axis=1)) + print(np.argmax(y_subsample, axis=1)) + print(y_train_y_subsample_batch) + print() if j % paraman["--subs-every"] == 0: feed_dict = {x: nys_subsample} computed_repr_sub, = sess.run([repr_x], feed_dict=feed_dict) try: - feed_dict = {x: X_batch, y: Y_batch, repr_sub: computed_repr_sub} + feed_dict = {x: X_batch, y: Y_batch, repr_sub: computed_repr_sub, y_sub:y_train_y_subsample_batch} except NameError as e: logger.error("A representation of the subsample must have been computed at least once for it to be used in the deepstrom") raise e - _, loss, acc, summary_str = sess.run([train_optimizer, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict) + _, loss, acc, summary_str = sess.run([train_op, cross_entropy, accuracy_op, merged_summary], feed_dict=feed_dict) if j % 100 == 0: logger.info( - "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"], j + 1, - int(data.train[0].shape[ - 0] / paraman["--batch-size"]) + 1, + "epoch: {}/{}; batch: {}/{}; batch_shape: {}; loss: {}; acc: {}".format(i, paraman["--num-epoch"], + k + 1, int(data.train[0].shape[0] / paraman["--batch-size"]) + 1, X_batch.shape, loss, acc)) if paraman["--tensorboard"]: summary_writer.add_summary(summary_str, j) j += 1 + k += 1 logger.info("Evaluation on validation data") training_time = t.time() - global_start