Skip to content
Snippets Groups Projects
Commit 16c2c1cd authored by Luc Giffon's avatar Luc Giffon
Browse files

add model functions + fix encoding executioner and cluger + change default...

add model functions + fix encoding executioner and cluger + change default pattern in gather_results + add nystrom end to end layer class + add class paramanager etc.
parent 7707a0f8
Branches
No related tags found
No related merge requests found
import os
import pickle
import tarfile
import numpy as np
from skluc.main.data.mldatasets.ImageDataset import ImageDataset
from skluc.main.utils import LabeledData
from skluc.main.utils import logger, check_files
class Cifar100FineDataset(ImageDataset):
HEIGHT = 32
WIDTH = 32
DEPTH = 3
def __init__(self, validation_size=0, seed=None, s_download_dir=None):
self.__s_url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
self.meta = None
name = "cifar100fine"
if s_download_dir is not None:
super().__init__([self.__s_url], name, s_download_dir, validation_size=validation_size, seed=seed)
else:
super().__init__([self.__s_url], name, validation_size=validation_size, seed=seed)
self.__extracted_dirname = os.path.join(self.s_download_dir, "cifar-100-python")
self.__extracted_files = [
'train',
'test',
'meta'
]
self.__extracted_file_paths = [os.path.join(self.__extracted_dirname, file) for file in self.__extracted_files]
def get_cifar100_data(self, keyword):
"""
Get data from the files containing the keyword in their name.
:param keyword:
:return:
"""
full_data = []
full_labels = []
for fpath in self.__extracted_file_paths:
if keyword in fpath.split('/')[-1]:
with open(fpath, 'rb') as f:
pckl_data = pickle.load(f, encoding='bytes')
full_data.append(pckl_data[b'data'])
full_labels.append(pckl_data[b'fine_labels'])
final_data = np.vstack(full_data)
final_label = np.hstack(full_labels)
return final_data, final_label
def get_meta(self):
"""
Get meta data about cifar10 from file.
:return:
"""
for fpath in self.__extracted_file_paths:
if 'meta' in fpath.split('/')[-1]:
with open(fpath, 'rb') as f:
pckl_data = pickle.load(f, encoding='bytes')
meta = pckl_data[b'fine_label_names']
return np.array(meta)
def read(self):
targz_file_path = self.l_filepaths[-1]
if not check_files(self.__extracted_file_paths):
logger.debug("Extracting {} ...".format(targz_file_path))
tar = tarfile.open(targz_file_path, "r:gz")
tar.extractall(path=self.s_download_dir)
else:
logger.debug("File {} has already been extracted".format(targz_file_path))
logger.debug("Get training data of dataset {}".format(self.s_name))
self._train = LabeledData(*self.get_cifar100_data('train'))
logger.debug("Get testing data of dataset {}".format(self.s_name))
self._test = LabeledData(*self.get_cifar100_data('test'))
self.meta = self.get_meta()
self._check_validation_size(self._train[0].shape[0])
...@@ -311,6 +311,120 @@ class DeepstromLayer(tf.keras.layers.Layer): ...@@ -311,6 +311,120 @@ class DeepstromLayer(tf.keras.layers.Layer):
return out return out
class DeepstromLayerEndToEnd(tf.keras.layers.Layer):
def __init__(self,
subsample_size,
kernel_name,
out_dim=None,
activation=None,
sum_of_kernels=False,
stack_of_kernels=False,
kernel_dict={}
):
def init_kernel():
if kernel_name == "rbf":
kernel_fct = rbf_kernel
tf_kernel_fct = tf_rbf_kernel
elif kernel_name == "linear":
kernel_fct = linear_kernel
tf_kernel_fct = tf_linear_kernel
elif kernel_name == "chi2_cpd":
kernel_fct = additive_chi2_kernel
tf_kernel_fct = tf_chi_square_CPD
elif kernel_name == "chi2_exp_cpd":
kernel_fct = chi2_kernel
tf_kernel_fct = tf_chi_square_CPD_exp
elif kernel_name == "chi2_pd":
raise NotImplementedError("Bien verifier que ce code ne fait pas bordel")
elif kernel_name == "laplacian":
tf_kernel_fct = tf_laplacian_kernel
kernel_fct = laplacian_kernel
else:
raise ValueError("Unknown kernel name: {}".format(kernel_name))
return kernel_name, kernel_fct, tf_kernel_fct, kernel_dict
def init_output_dim(subsample_size):
if out_dim is not None and out_dim > subsample_size:
logger.debug("Output dim is greater than deepstrom subsample size. Aborting.")
exit()
elif out_dim is None:
return subsample_size
else:
return out_dim
def init_activation():
if activation == "tan":
activation_fct = tf.nn.tanh
elif activation == "relu":
activation_fct = tf.nn.relu
else:
activation_fct = activation
return activation_fct
super().__init__()
self.__subsample_size = subsample_size
self.__sum_of_kernels = sum_of_kernels
self.__stack_of_kernels = stack_of_kernels
self.__kernel_name, self.__kernel_fct, self.__tf_kernel_fct, self.__kernel_dict = init_kernel()
self.__output_dim = init_output_dim(self.__subsample_size)
self.__activation = init_activation()
self.__W_matrix = None
logger.info("Selecting deepstrom layer function with "
"subsample size = {}, "
"output_dim = {}, "
"{} activation function "
"and kernel = {}"
.format(self.__subsample_size,
self.__output_dim,
"with" if self.__activation else "without",
self.__kernel_name))
def build(self, input_shape):
if self.__output_dim != 0:
# outputdim == 0 means there is no W matrix and the kernel vector is directly added as input to
# the next layer
self.__W_matrix = self.add_variable(
name="W_nystrom",
shape=[self.__subsample_size, self.__output_dim],
initializer=tf.random_normal_initializer(stddev=0.1),
trainable=True
)
def call(self, inputs, **kwargs):
if type(inputs) is not list:
raise ValueError("Inputs of layer deepstrom should be a list")
if len(inputs[0].shape) != 2:
raise ValueError(f"Input x should be 2D but it is {len(inputs[0].shape)}D")
if len(inputs[1].shape) != 2:
raise ValueError(f"Input subsample should be 2D but it is {len(inputs[1].shape)}D")
if inputs[1].shape[0] != self.__subsample_size:
raise ValueError(f"Subsample should be of size {self.__subsample_size}")
if inputs[0][0].shape[0] != inputs[1][0].shape[0]:
raise ValueError(f"Input and subsample should have the same dimension")
input_x = inputs[0]
input_sub = inputs[1]
with tf.name_scope("NystromLayer"):
with tf.name_scope("kernel_vec"):
kernel_vector = self.__tf_kernel_fct(input_x, input_sub, **self.__kernel_dict)
if self.__output_dim != 0:
out = tf.matmul(kernel_vector, self.__W_matrix)
else:
out = kernel_vector
if self.__activation is not None:
out = self.__activation(out)
return out
if __name__ == '__main__': if __name__ == '__main__':
main() DeepstromLayerEndToEnd(subsample_size=64,
kernel_name='chi2_cpd',
kernel_dict={})
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, Flatten, BatchNormalization, Activation
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras.initializers import he_normal
def build_lenet_model(input_shape):
model = Sequential()
model.add(
Conv2D(6, (5, 5), padding='valid', activation='relu', kernel_initializer=he_normal(), input_shape=input_shape))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Conv2D(16, (5, 5), padding='valid', activation='relu', kernel_initializer=he_normal()))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Flatten())
return model
def build_vgg19_model(input_shape, weight_decay=0.0001):
model = Sequential()
# Block 1
model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block1_conv1', input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block1_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))
# Block 2
model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block2_conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block2_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool'))
# Block 3
model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block3_conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block3_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block3_conv3'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block3_conv4'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool'))
# Block 4
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block4_conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block4_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block4_conv3'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block4_conv4'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool'))
# Block 5
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block5_conv1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block5_conv2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block5_conv3'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(512, (3, 3), padding='same', kernel_regularizer=l2(weight_decay),
kernel_initializer=he_normal(), name='block5_conv4'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool'))
model.add(Flatten(name='flatten'))
return model
\ No newline at end of file
# -*- coding: utf-8 -*-
""" """
Cluger Cluger
......
# -*- coding: utf-8 -*-
""" """
The Executioner is a tool used for performing clean experiments by managing the output files generated by them. The Executioner is a tool used for performing clean experiments by managing the output files generated by them.
......
...@@ -9,7 +9,7 @@ Usage: ...@@ -9,7 +9,7 @@ Usage:
Options: Options:
-h --help Show this screen. -h --help Show this screen.
-i --input-dir=<IPATH> Input directory wher to find results -i --input-dir=<IPATH> Input directory wher to find results
-p --patern=regex Specify the pattern of the files to be looked at [default: .+\.stdout]. -p --patern=regex Specify the pattern of the files to be looked at [default: .+\_stdout.txt].
-r --header Says if there is a header in the result files. -r --header Says if there is a header in the result files.
-v --verbose Print the lines of the final file -v --verbose Print the lines of the final file
""" """
......
...@@ -14,6 +14,7 @@ import psutil ...@@ -14,6 +14,7 @@ import psutil
import collections import collections
from sklearn.metrics.pairwise import additive_chi2_kernel from sklearn.metrics.pairwise import additive_chi2_kernel
import tensorflow as tf
daiquiri.setup(level=logging.DEBUG) daiquiri.setup(level=logging.DEBUG)
logger = daiquiri.getLogger() logger = daiquiri.getLogger()
...@@ -189,6 +190,7 @@ def compute_euristic_sigma(dataset_full, slice_size=1000): ...@@ -189,6 +190,7 @@ def compute_euristic_sigma(dataset_full, slice_size=1000):
:return: :return:
""" """
results = [] results = []
dataset_full = np.reshape(dataset_full, (-1, 1))
if slice_size > dataset_full.shape[0]: if slice_size > dataset_full.shape[0]:
slice_size = dataset_full.shape[0] slice_size = dataset_full.shape[0]
for i in range(dataset_full.shape[0] // slice_size): for i in range(dataset_full.shape[0] // slice_size):
...@@ -240,6 +242,7 @@ def compute_euristic_sigma_chi2(dataset_full, slice_size=100): ...@@ -240,6 +242,7 @@ def compute_euristic_sigma_chi2(dataset_full, slice_size=100):
:param dataset: The dataset on which to look for the best sigma :param dataset: The dataset on which to look for the best sigma
:return: :return:
""" """
dataset_full = np.reshape(dataset_full, (-1, 1))
results = [] results = []
if slice_size > dataset_full.shape[0]: if slice_size > dataset_full.shape[0]:
slice_size = dataset_full.shape[0] slice_size = dataset_full.shape[0]
...@@ -297,7 +300,124 @@ LabeledData = collections.namedtuple("LabeledData", ["data", "labels"]) ...@@ -297,7 +300,124 @@ LabeledData = collections.namedtuple("LabeledData", ["data", "labels"])
DownloadableModel = collections.namedtuple("DownloadableModel", ["url", "checksum"]) DownloadableModel = collections.namedtuple("DownloadableModel", ["url", "checksum"])
class DictManager(dict):
pass
# def __getattr__(self, item):
# return self[item]
#
# def __setattr__(self, key, value):
# self[key] = value
#
# def __delattr__(self, item):
# del self[item]
# def __missing__(self, key):
# logger.warning(f"Call to missing key {key} in {self.__class__.__name__}. None value returned.")
# self[key] = None
# return self[key]
class ParameterManager(DictManager):
def get_gamma_value(self, dat, chi2=False):
if self["--gamma"] is None:
logger.debug("Gamma arguments is None. Need to compute it.")
if chi2:
gamma_value = 1. / compute_euristic_sigma_chi2(dat)
else:
gamma_value = 1. / compute_euristic_sigma(dat)
else:
gamma_value = eval(self["--gamma"])
logger.debug("Gamma value is {}".format(gamma_value))
return gamma_value
def init_kernel(self):
if self["--rbf-kernel"]:
return "rbf"
elif self["--linear-kernel"]:
return "linear"
elif self["--chi-square-kernel"]:
return "chi2_cpd"
elif self["--exp-chi-square-kernel"]:
return "chi2_exp_cpd"
elif self["--chi-square-PD-kernel"]:
return "chi2_pd"
elif self["--laplacian-kernel"]:
return "laplacian"
else:
return None
def init_network(self):
if self["dense"]:
return "dense"
elif self["deepfriedconvnet"]:
return "deepfriedconvnet"
elif self["deepstrom"]:
return "deepstrom"
elif self["none"]:
return "none"
def init_non_linearity(self):
if self["--non-linearity"] == "tanh":
return tf.nn.tanh
elif self["--non-linearity"] == "relu":
return tf.nn.relu
elif self["--non-linearity"] == "None":
return None
def init_dataset(self):
if self["--cifar10"]:
return "cifar10"
if self["--cifar100"]:
return "cifar100"
if self["--mnist"]:
return "mnist"
if self["--svhn"]:
return "svhn"
class ResultManager(DictManager):
pass
class ResultPrinter:
def __init__(self, *args, header=True):
self.__dicts = []
self.__dicts.extend(args)
self.__header = header
def _get_ordered_items(self):
all_keys = []
all_values = []
for d in self.__dicts:
keys, values = zip(*d.items())
all_keys.extend(keys)
all_values.extend(values)
arr_keys, arr_values = np.array(all_keys), np.array(all_values)
indexes_sort = np.argsort(arr_keys)
return list(arr_keys[indexes_sort]), list(arr_values[indexes_sort])
def _get_values_ordered_by_keys(self):
_, values = self._get_ordered_items()
return values
def _get_ordered_keys(self):
keys, _ = self._get_ordered_items()
return keys
def add(self, d):
self.__dicts.append(d)
def print(self):
headers, values = self._get_ordered_items()
headers = [str(h) for h in headers]
values = [str(v) for v in values]
if self.__header:
print(",".join(headers))
print(",".join(values))
if __name__ == "__main__": if __name__ == "__main__":
a = np.identity(1000) paraman = ParameterManager({"a": 4})
print(compute_euristic_sigma(a)) resulman = ResultManager({"b": 2})
\ No newline at end of file resprinter = ResultPrinter(paraman)
resprinter.add(resulman)
resprinter.print()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment