diff --git a/skluc/data/mldatasets.py b/skluc/data/mldatasets.py
deleted file mode 100644
index 14e424d9a75407d986c635764357e38dcfeb472c..0000000000000000000000000000000000000000
--- a/skluc/data/mldatasets.py
+++ /dev/null
@@ -1,854 +0,0 @@
-"""
-This module defines the Dataset classes usefull for downloading and loading datasets as numpy.ndarrays.
-
-The currently implemented datasets are:
-    - mnist
-    - cifar10
-"""
-
-import collections
-import gzip
-import os
-import pickle
-import re
-import struct
-import tarfile
-
-import numpy as np
-# --- installed packages
-import scipy.io as sio
-import tensorflow as tf
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelBinarizer
-
-from skluc.utils import silentremove, download_data, check_files, create_directory, logger
-
-__all__ = ['MnistDataset', 'Cifar10Dataset', 'MovieReviewV1Dataset', 'Cifar100FineDataset']
-
-LabeledData = collections.namedtuple("LabeledData", ["data", "labels"])
-
-
-class Dataset(object):
-    """
-    Abstract class implementing basic methods for Dataset retrieval.
-    """
-    # data_groups_private will be used to refer to the attributes self._train and self._test via their names
-    # as stringes. It is usefull when the same operations must be performed on train and test set
-    data_groups_private = ["_train", "_test"]
-    # data_groups_public = ["train", "test", "validation"]
-
-    def __init__(self, l_url, s_name, s_download_dir=os.path.join(os.path.expanduser("~"), "ml_datasets"),
-                 validation_size=0, seed=None):
-        self.l_url = l_url
-        self.l_filenames = []
-        for url in self.l_url:
-            splitted_url = url.split("/")
-            self.l_filenames.append(splitted_url[-1])
-        self.s_name = s_name
-        self.s_download_dir = os.path.join(s_download_dir, self.s_name)
-        self.l_filepaths = [os.path.join(self.s_download_dir, fname) for fname in self.l_filenames]
-        self._train = None
-        self._test = None
-        self.seed = seed
-        self.permuted_index_train = None
-        self.permuted_index_test = None
-        self.permuted_index_validation = None
-        self.validation_size = validation_size
-
-    def reduce_data_size(self, new_size):
-        logger.info("Reducing datasize of dataset {} to .".format(self.s_name, new_size))
-        kept_indices = self.get_uniform_class_rand_indices_train(new_size)
-        self.permuted_index_train = self.permuted_index_train[kept_indices]
-
-    def get_uniform_class_rand_indices_train(self, size):
-        try:
-            kept_indices, _ = train_test_split(np.arange(len(self.train.data)),
-                                               train_size=size, stratify=self.train.labels, random_state=self.seed)
-        except ValueError as e:
-            logger.warning("In Dataset.get_uniform_class_rand_indices_train Handled exception: {}".format(str(e)))
-            logger.debug("Use random indexes instead")
-            kept_indices = np.random.permutation(len(self.train.data))[:size]
-        return kept_indices
-
-    def get_uniform_class_rand_indices_validation(self, size):
-        try:
-            kept_indices, _ = train_test_split(np.arange(len(self.validation.data)),
-                                               train_size=size, stratify=self.validation.labels, random_state=self.seed)
-        except ValueError as e:
-            logger.warning("In Dataset.get_uniform_class_rand_indices_validation Handled exception: {}".format(str(e)))
-            logger.debug("Use random indexes instead")
-            kept_indices = np.random.permutation(len(self.validation.data))[:size]
-        return kept_indices
-
-    @property
-    def train(self):
-        return LabeledData(data=self._train.data[self.permuted_index_train],
-                           labels=self._train.labels[self.permuted_index_train])
-
-    @property
-    def test(self):
-        return LabeledData(data=self._test.data[self.permuted_index_test],
-                           labels=self._test.labels[self.permuted_index_test])
-
-    @property
-    def validation(self):
-        return LabeledData(data=self._train.data[self.permuted_index_validation],
-                           labels=self._train.labels[self.permuted_index_validation])
-
-    def download(self):
-        """
-        Download the dataset.
-
-        :return: None
-        """
-        self.create_directory_tree()
-        if not check_files(self.l_filepaths):
-            logger.debug("Files need to be downloaded")
-            for s_fname in self.l_filepaths:
-                silentremove(s_fname)
-            for s_url in self.l_url:
-                logger.debug("Downloading file at url: {}".format(s_url))
-                s_file_name = s_url.split("/")[-1]
-                download_data(s_url, self.s_download_dir, s_file_name)
-        else:
-            logger.debug("Files {} already exist".format(self.l_filepaths))
-
-    def create_directory_tree(self):
-        """
-        Create the target directory tree
-
-        :return: None
-        """
-        create_directory(self.s_download_dir)
-
-    def _check_validation_size(self, data_length):
-        if self.validation_size > data_length:
-            raise ValueError("The validation set size ({}) is higher than the train set size ({}). " \
-                             "Please choose a little validation set size".format(self.validation_size, data_length))
-        logger.debug("Validation size < data length ({} < {})".format(self.validation_size, data_length))
-
-    def to_one_hot(self):
-        """
-        Convert categorical labels to one hot encoding
-
-        :return:
-        """
-        enc = LabelBinarizer()
-        enc.fit(self._train.labels)
-        logger.info("Apply one hot encoding to dataset {}.".format(self.s_name))
-        for kw in self.data_groups_private:
-            datlab = getattr(self, kw)
-            if len(datlab.labels) == 0:
-                logger.debug("No labels found in {} data of {} dataset".format(kw, self.s_name))
-                continue
-            logger.debug("Apply one hot encoding to {} data of {} dataset".format(kw, self.s_name))
-            labels = np.array(enc.transform(datlab.labels))
-            data = datlab.data
-            setattr(self, kw, LabeledData(data, labels))
-
-    def revert_one_hot(self):
-        logger.info("Revert one hot encoding to dataset {}.".format(self.s_name))
-        for kw in self.data_groups_private:
-            datlab = getattr(self, kw)
-            if len(datlab.labels) == 0:
-                logger.debug("No labels found in {} data of {} dataset".format(kw, self.s_name))
-                continue
-            logger.debug("Apply one hot encoding to {} data of {} dataset".format(kw, self.s_name))
-            labels = np.argmax(datlab.labels, axis=1)
-            data = datlab.data
-            setattr(self, kw, LabeledData(data, labels))
-
-    def normalize(self):
-        """
-        Normalize data.
-
-        Feature scaling normalization.
-
-        :return:
-        """
-        logger.info("Apply normalization to data from dataset {}.".format(self.s_name))
-        for kw in self.data_groups_private:
-            datlab = getattr(self, kw)
-            if len(datlab.labels) == 0:
-                continue
-            data = datlab.data
-            _min = data.min()
-            _max = data.max()
-            data = (data - _min) / (_max - _min)
-            logger.debug("Apply normalization to {} data of {} dataset.".format(kw, self.s_name))
-            setattr(self, kw, LabeledData(data, datlab.labels))
-
-    def data_astype(self, _type):
-        logger.info("Change type of data to {} in the dataset {}.".format(str(_type), self.s_name))
-        for kw in self.data_groups_private:
-            datlab = getattr(self, kw)
-            if len(datlab.labels) == 0:
-                continue
-            logger.debug("Change type of {} data to {} in the dataset {}.".format(kw, str(_type), self.s_name))
-            data = datlab.data
-            logger.debug("{} data was of type {}".format(kw, data.dtype))
-            data = data.astype(_type)
-            logger.debug("{} data is now of type {}".format(kw, data.dtype))
-            setattr(self, kw, LabeledData(data, datlab.labels))
-
-    def labels_astype(self, _type):
-        logger.info("Change type of labels to {} in the dataset {}.".format(str(_type), self.s_name))
-        for kw in self.data_groups_private:
-            datlab = getattr(self, kw)
-            if len(datlab.labels) == 0:
-                continue
-            labels = datlab.labels
-            logger.debug("Change type of {} labels to {} in the dataset {}.".format(kw, str(_type), self.s_name))
-            logger.debug("{} labels were of type {}".format(kw, labels.dtype))
-            labels = labels.astype(_type)
-            logger.debug("{} labels are now of type {}".format(kw, labels.dtype))
-            setattr(self, kw, LabeledData(datlab.data, labels))
-
-    def load(self):
-        # todo faire une verification generique que le jeu de donné à été chargé lorsque des opérations
-        #  sont appliquées aux données
-        logger.info("Loading dataset {}".format(self.s_name))
-        self.download()
-        self.read()
-        if self._train is not None:
-            logger.debug("Construction of random train indexes (seed: {})".format(self.seed))
-            np.random.seed(self.seed)
-            permut = np.random.permutation(self._train[0].shape[0])
-            if self.validation_size > 0:
-                self.permuted_index_train = permut[:-self.validation_size]
-                self.permuted_index_validation = permut[-self.validation_size:]
-            else:
-                self.permuted_index_train = permut
-                self.permuted_index_validation = np.array([])
-        if self._test is not None:
-            logger.debug("Construction of random test indexes (seed: {})".format(self.seed))
-            logger.debug("Dataset size: {}".format(self._train[0].shape[0]))
-            np.random.seed(self.seed)
-            self.permuted_index_test = np.random.permutation(self._test[0].shape[0])
-        if self._train is None and self._test is None:
-            raise Exception("No data loaded at the end of load method.")
-
-
-    # --- Abstract methods
-
-    def read(self):
-        """
-        This method should load dataset in _train and _test attributes.
-        :return:
-        """
-        raise NotImplementedError
-
-
-class MovieReviewV1Dataset(Dataset):
-    data_groups_private = ["_train"]
-    TRAIN_SIZE = 9000
-
-    def apply_transformer(self, transformer_class):
-        # todo, cette fonction devrait marcher pour tout dataset (donc il faudrait la mettre dans la classe Dataset)
-        transformer = transformer_class()
-        transformer_name = transformer.__class__.__name__
-        transform_path = os.path.join(self.s_download_dir, transformer_name)
-        transform_filepaths = [os.path.join(transform_path, kw + ".npz")
-                               for kw in self.data_groups_private]
-        create_directory(transform_path)
-        if check_files(transform_filepaths) and transformer.check_model():
-            # in the case where the transformations already exist in npz files
-            # and the model is the actual good model
-            # but I have no guarantee the transformation has been obtained with the stored model though...
-            # todo make the npz files to store the md5 checksum of the model that has produced them
-            logger.debug("Files {} already exists".format(transform_filepaths))
-            logger.debug("Now load data of files {}".format(transform_filepaths))
-            for kw in self.data_groups_private:
-                npzfile_path = os.path.join(transform_path, kw + ".npz")
-                logger.debug("Loading {}".format(npzfile_path))
-                npzfile = np.load(npzfile_path)
-                data = npzfile[kw + "_data"]
-                labels = npzfile[kw + "_labels"]
-                setattr(self, kw, LabeledData(data=data, labels=labels))
-            # todo être plus intelligent avec le mode debug du logger. Pour l'instant je met tout en debug
-        else:
-            # in the case the transformations doesn't yet exist
-            # one nead to apply it to the data
-            # then to save the transformation
-            logger.debug("Files {} don't exist or model md5 checksum doesn't match. Need to produce them".format(transform_filepaths))
-            logger.info("Apply convolution of {} to dataset {}".format(transformer_name, self.s_name))
-            for kw in self.data_groups_private:
-                data, labels = getattr(self, kw)
-                transformed_data, transformed_labels = transformer.transform(data, labels)
-                setattr(self, kw, LabeledData(data=transformed_data, labels=transformed_labels))
-                dict_attr = {kw + "_data": transformed_data, kw + "_labels": transformed_labels}
-                filepath = os.path.join(transform_path, kw + ".npz")
-
-                logger.debug("Saving transformed {} data to {}".format(kw, filepath))
-                np.savez(filepath, **dict_attr)
-
-    def __init__(self, validation_size=0, seed=0, s_download_dir=None):
-        self.__s_url = "http://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz"
-
-        if s_download_dir is not None:
-            super().__init__([self.__s_url], "moviereview", s_download_dir, validation_size=validation_size, seed=seed)
-        else:
-            super().__init__([self.__s_url], "moviereview", validation_size=validation_size, seed=seed)
-
-        self.__extracted_files = [
-            'rt-polarity.pos',
-            'rt-polarity.neg'
-        ]
-        self.__extracted_dirname = os.path.join(self.s_download_dir, "rt-polaritydata")
-
-        self.__extracted_file_paths = [os.path.join(self.__extracted_dirname, file) for file in self.__extracted_files]
-
-        self.__counter = 1
-        self.__vocab = {"<pad>": 0}
-        self.__reversed_vocab = {0: "<pad>"}
-
-    @property
-    def vocab(self):
-        return self.__vocab
-
-    @property
-    def vocab_inv(self):
-        return self.__reversed_vocab
-
-    def read(self):
-        # todo faire une fonction d'extraction commune?
-        targz_file_path = self.l_filepaths[-1]
-        if not check_files(self.__extracted_file_paths):
-            logger.debug("Extracting {} ...".format(targz_file_path))
-            tar = tarfile.open(targz_file_path, "r:gz")
-            tar.extractall(path=self.s_download_dir)
-        else:
-            logger.debug("File {} has already been extracted".format(targz_file_path))
-
-        data_labeled = MovieReviewV1Dataset.load_data_and_labels(self.__extracted_file_paths[0],
-                                                                 self.__extracted_file_paths[1],
-                                                                 encoding="ISO-8859-1")
-
-        max_ = -1
-        for l in data_labeled[0]:
-            max_ = max(max_, len(l.strip().split()))
-
-        lst_arr_ex = []
-        for ex in data_labeled[0]:
-            splitted_ex = ex.strip().split()
-            splitted_ex_nbr = []
-            for wrd in splitted_ex:
-                if wrd not in self.__vocab:
-                    self.__vocab[wrd] = self.__counter
-                    self.__reversed_vocab[self.__counter] = wrd
-                    self.__counter += 1
-                splitted_ex_nbr.append(self.__vocab[wrd])
-            arr_splitted_ex_nbr = np.pad(splitted_ex_nbr, (0, max_-len(splitted_ex_nbr)), 'constant',
-                                         constant_values=self.__vocab["<pad>"])
-            lst_arr_ex.append(np.reshape(arr_splitted_ex_nbr, (1, -1)))
-        X = np.concatenate(lst_arr_ex, axis=0)
-
-        self._train = LabeledData(data=X,
-                                  labels=data_labeled[1])
-
-    @property
-    def train(self):
-        indexes = self.permuted_index_train[:self.TRAIN_SIZE - self.validation_size]
-        return LabeledData(data=self._train.data[indexes],
-                           labels=self._train.labels[indexes])
-
-    @property
-    def test(self):
-        indexes = self.permuted_index_train[self.TRAIN_SIZE:]
-        return LabeledData(data=self._train.data[indexes],
-                           labels=self._train.labels[indexes])
-
-    @property
-    def validation(self):
-        indexes = self.permuted_index_train[(self.TRAIN_SIZE - self.validation_size):self.TRAIN_SIZE]
-        return LabeledData(data=self._train.data[indexes],
-                           labels=self._train.labels[indexes])
-
-    @property
-    def vocabulary_length(self):
-        return len(self.__vocab)
-
-    @staticmethod
-    def clean_str(string):
-        """
-        Tokenization/string cleaning for all datasets except for SST.
-        Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
-
-        Credit to: https://github.com/dennybritz/cnn-text-classification-tf/blob/master/data_helpers.py
-        """
-        string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
-        string = re.sub(r"\'s", " \'s", string)
-        string = re.sub(r"\'ve", " \'ve", string)
-        string = re.sub(r"n\'t", " n\'t", string)
-        string = re.sub(r"\'re", " \'re", string)
-        string = re.sub(r"\'d", " \'d", string)
-        string = re.sub(r"\'ll", " \'ll", string)
-        string = re.sub(r",", " , ", string)
-        string = re.sub(r"!", " ! ", string)
-        string = re.sub(r"\(", " \( ", string)
-        string = re.sub(r"\)", " \) ", string)
-        string = re.sub(r"\?", " \? ", string)
-        string = re.sub(r"\s{2,}", " ", string)
-        return string.strip().lower()
-
-    @staticmethod
-    def load_data_and_labels(positive_data_file, negative_data_file, encoding='utf-8'):
-        """
-        Loads MR polarity data from files, splits the data into words and generates labels.
-        Returns split sentences and labels.
-
-        Credit to: https://github.com/dennybritz/cnn-text-classification-tf/blob/master/data_helpers.py
-        """
-        # Load data from files
-        positive_examples = list(open(positive_data_file, "r", encoding=encoding).readlines())
-        positive_examples = [s.strip() for s in positive_examples]
-        negative_examples = list(open(negative_data_file, "r", encoding=encoding).readlines())
-        negative_examples = [s.strip() for s in negative_examples]
-        # Split by words
-        x_text = positive_examples + negative_examples
-        x_text = [MovieReviewV1Dataset.clean_str(sent) for sent in x_text]
-        # Generate labels
-        positive_labels = [[0, 1] for _ in positive_examples]
-        negative_labels = [[1, 0] for _ in negative_examples]
-        y = np.concatenate([positive_labels, negative_labels], 0)
-        return LabeledData(data=x_text, labels=y)
-
-    # todo not yet sure the following is usefull
-    # @staticmethod
-    # def batch_iter(data, batch_size, num_epochs, shuffle=True):
-    #     """
-    #     Generates a batch iterator for a dataset.
-    #     """
-    #     data = np.array(data)
-    #     data_size = len(data)
-    #     num_batches_per_epoch = int((len(data) - 1) / batch_size) + 1
-    #     for epoch in range(num_epochs):
-    #         # Shuffle the data at each epoch
-    #         if shuffle:
-    #             shuffle_indices = np.random.permutation(np.arange(data_size))
-    #             shuffled_data = data[shuffle_indices]
-    #         else:
-    #             shuffled_data = data
-    #         for batch_num in range(num_batches_per_epoch):
-    #             start_index = batch_num * batch_size
-    #             end_index = min((batch_num + 1) * batch_size, data_size)
-    #
-    #     yield shuffled_data[start_index:end_index]
-
-
-class ImageDataset(Dataset):
-    HEIGHT = -1
-    WIDTH = -1
-    DEPTH = -1
-
-    def apply_transformer(self, transformer):
-        """
-
-        :param transformer: Transformer object (not a class)
-        :return:
-        """
-        logger.info("Apply transformation {} to data from dataset {}.".format(transformer.__class__.__name__, self.s_name))
-        # todo, cette fonction devrait marcher pour tout dataset (donc il faudrait la mettre dans la classe Dataset)
-        transformer_name = transformer.NAME
-        transform_path = os.path.join(self.s_download_dir, transformer_name)
-        transform_filepaths = [os.path.join(transform_path, kw + ".npz")
-                               for kw in self.data_groups_private]
-        create_directory(transform_path)
-        if check_files(transform_filepaths) and transformer.check_model():
-            # in the case where the transformations already exist in npz files
-            # and the model is the actual good model
-            # but I have no guarantee the transformation has been obtained with the stored model though...
-            # todo make the npz files to store the md5 checksum of the model that has produced them
-            logger.debug("Files {} already exists".format(transform_filepaths))
-            logger.info("Loading transformed data from files {}".format(transform_filepaths))
-            for kw in self.data_groups_private:
-                npzfile_path = os.path.join(transform_path, kw + ".npz")
-                logger.debug("Loading {}".format(npzfile_path))
-                npzfile = np.load(npzfile_path)
-                data = npzfile[kw + "_data"]
-                logger.debug("Shape of {} set: {}".format(kw, data.shape))
-                labels = npzfile[kw + "_labels"]
-                setattr(self, kw, LabeledData(data=data, labels=labels))
-        else:
-            # in the case the transformations doesn't yet exist
-            # one need to apply it to the data
-            # then to save the transformation
-            logger.debug("Files {} don't exist or model md5 checksum doesn't match. Need to produce them".format(transform_filepaths))
-            logger.info("Apply convolution of {} to dataset {}".format(transformer_name, self.s_name))
-            for kw in self.data_groups_private:
-                data, labels = getattr(self, kw)
-                transformed_data, transformed_labels = transformer.transform(data, labels)
-                setattr(self, kw, LabeledData(data=transformed_data, labels=transformed_labels))
-                dict_attr = {kw + "_data": transformed_data, kw + "_labels": transformed_labels}
-                filepath = os.path.join(transform_path, kw + ".npz")
-                logger.debug("Shape of {} set: {}".format(kw, transformed_data.shape))
-                logger.debug("Saving transformed {} data to {}".format(kw, filepath))
-                np.savez(filepath, **dict_attr)
-
-    def to_image(self):
-        """
-        Modify data to present it like images (matrices) instead of vectors.
-
-        :return: The modified data.
-        """
-        if self.HEIGHT == -1 or self.WIDTH == -1 or self.DEPTH == -1:
-            raise ValueError("Height, width and depth static attributes of class {} should be set.".format(self.__class__))
-        for kw in self.data_groups_private:
-            datlab = getattr(self, kw)
-            if datlab is None:
-                continue
-            images_vec = datlab.data
-            labels = datlab.labels
-            length_by_chanel = images_vec.shape[1]/self.DEPTH
-            logger.debug("Images vec shape: {}".format(images_vec.shape))
-            if int(length_by_chanel) != length_by_chanel:
-                raise Exception("Dimensionality problem")
-            else:
-                length_by_chanel = int(length_by_chanel)
-            images_mat = np.reshape(images_vec, (images_vec.shape[0], length_by_chanel, self.DEPTH),
-                                    order='F')
-            images = np.reshape(images_mat, (images_mat.shape[0], self.HEIGHT, self.WIDTH,
-                                             self.DEPTH), order='C')
-            setattr(self, kw, LabeledData(images, labels))
-
-    def flatten(self):
-        """
-        Flatten all the datasets (matrices to vectors)
-
-        :return:
-        """
-        logger.info("Apply flattening to dataset {}.".format(self.s_name))
-        for kw in self.data_groups_private:
-            logger.debug("Flattening data {} of dataset {}".format(kw, self.s_name))
-            datlab = getattr(self, kw)
-            init_dim = np.prod([s for s in datlab.data.shape[1:]])
-            logger.debug("Shape of {} data: {}".format(kw, datlab.data.shape))
-            logger.debug("Number of features in {} data: {}".format(kw, init_dim))
-            data = datlab.data.reshape(datlab.data.shape[0], init_dim)
-            setattr(self, kw, LabeledData(data=data, labels=datlab.labels))
-
-    def rescale(self, factor):
-        """
-        Rescale images by factor.
-
-        :param factor:
-        :return:
-        """
-        sess = tf.InteractiveSession()
-        for kw in self.data_groups_private:
-            datlab = getattr(self, kw)
-            images_mat = datlab.data
-            output_shape = np.multiply(images_mat.shape[1:-1], (factor, factor))
-            labels = datlab.labels
-            logger.debug("Shape of {} data before rescaling: {}".format(kw, images_mat.shape))
-            logger.debug("Excpected output shape for images: {}".format(output_shape))
-            new_image = tf.image.resize_images(images_mat, output_shape).eval()
-            logger.debug("Shape of {} data after rescaling: {}".format(kw, new_image.shape))
-            setattr(self, kw, LabeledData(new_image, labels))
-        sess.close()
-
-    def to_feature_vectors(self):
-        """
-        From a feature representation (W x H x D) of the images, gives the feature vector representation of
-        dimension (N x D) with N being W x H.
-
-        :return:
-        """
-        for kw in self.data_groups_private:
-            datlab = getattr(self, kw)
-            images_mat = datlab.data
-            labels = datlab.labels
-
-            logger.debug("Shape of {} data before reshape: {}".format(kw, images_mat.shape))
-            images_mat = images_mat.reshape(images_mat.shape[0], -1, images_mat.shape[-1])
-            logger.debug("Shape of {} data after reshape: {}".format(kw, images_mat.shape))
-            setattr(self, kw, LabeledData(images_mat, labels))
-
-
-class MnistDataset(ImageDataset):
-
-    HEIGHT = 28
-    WIDTH = 28
-    DEPTH = 1
-
-    def __init__(self, validation_size=0, seed=0, s_download_dir=None):
-        self.__s_root_url = "http://yann.lecun.com/exdb/mnist/"
-        self.__d_leaf_url = {
-            "train_data": "train-images-idx3-ubyte.gz",
-            "train_label": "train-labels-idx1-ubyte.gz",
-            "test_data": "t10k-images-idx3-ubyte.gz",
-            "test_label": "t10k-labels-idx1-ubyte.gz"
-        }
-
-        l_url = [self.__s_root_url + leaf_url for leaf_url in self.__d_leaf_url.values()]
-        if s_download_dir is not None:
-            super().__init__(l_url, "mnist", s_download_dir, validation_size=validation_size, seed=seed)
-        else:
-            super().__init__(l_url, "mnist", validation_size=validation_size, seed=seed)
-
-    @staticmethod
-    def read_gziped_ubyte(fname_img=None, fname_lbl=None):
-        """
-        loosely copied on https://gist.github.com/akesling/5358964
-
-        Python function for importing the MNIST data set.  It returns an iterator
-        of 2-tuples with the first element being the label and the second element
-        being a numpy.uint8 2D array of pixel data for the given image.
-        """
-        # Load everything in some numpy arrays
-        logger.info("Read gziped ubyte file {}".format(fname_img))
-        with gzip.open(fname_lbl, 'rb') as flbl:
-            magic, num = struct.unpack(">II", flbl.read(8))
-            lbl = np.fromstring(flbl.read(), dtype=np.int8)
-
-        logger.info("Read gziped ubyte file {}".format(fname_lbl))
-        with gzip.open(fname_img, 'rb') as fimg:
-            magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
-            img = np.fromstring(fimg.read(), dtype=np.uint8)
-            img = img.reshape(len(lbl), -1)
-
-        return img, lbl
-
-    def read(self):
-        """
-        Return a dict of data where, for each key is associated a (data, label) tuple.
-
-        The values of the tuple are np.ndarray.
-
-        :return: dict
-        """
-        # todo add possibility to provide percentage for validation set instead of size
-        self._train = LabeledData(
-            *self.read_gziped_ubyte(os.path.join(self.s_download_dir, self.__d_leaf_url["train_data"]),
-                                    os.path.join(self.s_download_dir, self.__d_leaf_url["train_label"]))
-        )
-
-        self._test = LabeledData(
-            *self.read_gziped_ubyte(os.path.join(self.s_download_dir, self.__d_leaf_url["test_data"]),
-                                    os.path.join(self.s_download_dir, self.__d_leaf_url["test_label"]))
-        )
-
-        self._check_validation_size(self._train[0].shape[0])
-
-
-class Cifar10Dataset(ImageDataset):
-
-    HEIGHT = 32
-    WIDTH = 32
-    DEPTH = 3
-
-    def __init__(self, validation_size=0, seed=None, s_download_dir=None):
-        self.__s_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
-        self.meta = None
-        name = "cifar10"
-        if s_download_dir is not None:
-            super().__init__([self.__s_url], name, s_download_dir, validation_size=validation_size, seed=seed)
-        else:
-            super().__init__([self.__s_url], name, validation_size=validation_size, seed=seed)
-
-        self.__extracted_dirname = os.path.join(self.s_download_dir, "cifar-10-batches-py")
-        self.__extracted_files =[
-            'batches.meta',
-            'data_batch_1',
-            'data_batch_2',
-            'data_batch_3',
-            'data_batch_4',
-            'data_batch_5',
-            'readme.html',
-            'test_batch'
-        ]
-
-        self.__extracted_file_paths = [os.path.join(self.__extracted_dirname, file) for file in self.__extracted_files]
-
-    def get_cifar10_data(self, keyword):
-        """
-        Get data from the files containing the keyword in their name.
-
-        :param keyword:
-        :return:
-        """
-        full_data = []
-        full_labels = []
-        for fpath in self.__extracted_file_paths:
-            if keyword in fpath.split('/')[-1]:
-                with open(fpath, 'rb') as f:
-                    pckl_data = pickle.load(f, encoding='bytes')
-                    full_data.append(pckl_data[b'data'])
-                    full_labels.append(pckl_data[b'labels'])
-        final_data = np.vstack(full_data)
-        final_label = np.hstack(full_labels)
-
-        return final_data, final_label
-
-    def get_meta(self):
-        """
-        Get meta data about cifar10 from file.
-
-        :return:
-        """
-        for fpath in self.__extracted_file_paths:
-            if 'meta' in fpath.split('/')[-1]:
-                with open(fpath, 'rb') as f:
-                    pckl_data = pickle.load(f, encoding='bytes')
-                    meta = pckl_data[b'label_names']
-        return np.array(meta)
-
-    def read(self):
-        targz_file_path = self.l_filepaths[-1]
-        if not check_files(self.__extracted_file_paths):
-            logger.debug("Extracting {} ...".format(targz_file_path))
-            tar = tarfile.open(targz_file_path, "r:gz")
-            tar.extractall(path=self.s_download_dir)
-        else:
-            logger.debug("File {} has already been extracted".format(targz_file_path))
-
-        logger.debug("Get training data of dataset {}".format(self.s_name))
-        self._train = LabeledData(*self.get_cifar10_data('data'))
-
-        logger.debug("Get testing data of dataset {}".format(self.s_name))
-        self._test = LabeledData(*self.get_cifar10_data('test'))
-        self.meta = self.get_meta()
-
-        self._check_validation_size(self._train[0].shape[0])
-
-
-class Cifar100FineDataset(ImageDataset):
-    HEIGHT = 32
-    WIDTH = 32
-    DEPTH = 3
-
-    def __init__(self, validation_size=0, seed=None, s_download_dir=None):
-        self.__s_url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
-        self.meta = None
-        name = "cifar100fine"
-        if s_download_dir is not None:
-            super().__init__([self.__s_url], name, s_download_dir, validation_size=validation_size, seed=seed)
-        else:
-            super().__init__([self.__s_url], name, validation_size=validation_size, seed=seed)
-
-        self.__extracted_dirname = os.path.join(self.s_download_dir, "cifar-100-python")
-        self.__extracted_files = [
-            'train',
-            'test',
-            'meta'
-        ]
-
-        self.__extracted_file_paths = [os.path.join(self.__extracted_dirname, file) for file in self.__extracted_files]
-
-    def get_cifar100_data(self, keyword):
-        """
-        Get data from the files containing the keyword in their name.
-
-        :param keyword:
-        :return:
-        """
-        full_data = []
-        full_labels = []
-        for fpath in self.__extracted_file_paths:
-            if keyword in fpath.split('/')[-1]:
-                with open(fpath, 'rb') as f:
-                    pckl_data = pickle.load(f, encoding='bytes')
-                    full_data.append(pckl_data[b'data'])
-                    full_labels.append(pckl_data[b'fine_labels'])
-        final_data = np.vstack(full_data)
-        final_label = np.hstack(full_labels)
-
-        return final_data, final_label
-
-    def get_meta(self):
-        """
-        Get meta data about cifar10 from file.
-
-        :return:
-        """
-        for fpath in self.__extracted_file_paths:
-            if 'meta' in fpath.split('/')[-1]:
-                with open(fpath, 'rb') as f:
-                    pckl_data = pickle.load(f, encoding='bytes')
-                    meta = pckl_data[b'fine_label_names']
-        return np.array(meta)
-
-    def read(self):
-        targz_file_path = self.l_filepaths[-1]
-        if not check_files(self.__extracted_file_paths):
-            logger.debug("Extracting {} ...".format(targz_file_path))
-            tar = tarfile.open(targz_file_path, "r:gz")
-            tar.extractall(path=self.s_download_dir)
-        else:
-            logger.debug("File {} has already been extracted".format(targz_file_path))
-
-        logger.debug("Get training data of dataset {}".format(self.s_name))
-        self._train = LabeledData(*self.get_cifar100_data('train'))
-
-        logger.debug("Get testing data of dataset {}".format(self.s_name))
-        self._test = LabeledData(*self.get_cifar100_data('test'))
-        self.meta = self.get_meta()
-
-        self._check_validation_size(self._train[0].shape[0])
-
-
-class SVHNDataset(ImageDataset):
-
-    HEIGHT = 32
-    WIDTH = 32
-    DEPTH = 3
-
-    def __init__(self, validation_size=0, seed=0, s_download_dir=None):
-        self.__s_root_url = "http://ufldl.stanford.edu/housenumbers/"
-        self.__d_leaf_url = {
-            "train_data": "train_32x32.mat",
-            "test_data": "test_32x32.mat",
-        }
-
-        l_url = [self.__s_root_url + leaf_url for leaf_url in self.__d_leaf_url.values()]
-        if s_download_dir is not None:
-            super().__init__(l_url, "svhn", s_download_dir, validation_size=validation_size, seed=seed)
-        else:
-            super().__init__(l_url, "svhn", validation_size=validation_size, seed=seed)
-
-    @staticmethod
-    def read_mat(fname):
-        """
-        loosely copied on https://stackoverflow.com/questions/29185493/read-svhn-dataset-in-python
-
-        Python function for importing the SVHN data set.
-        """
-        # Load everything in some numpy arrays
-        logger.info("Read mat file {}".format(fname))
-        data = sio.loadmat(fname)
-        img = np.moveaxis(data['X'], -1, 0)
-        lbl = data['y']
-        return img, lbl
-
-    def read(self):
-        """
-        Return a dict of data where, for each key is associated a (data, label) tuple.
-
-        The values of the tuple are np.ndarray.
-
-        :return: dict
-        """
-        # todo add possibility to provide percentage for validation set instead of size
-        self._train = LabeledData(
-            *self.read_mat(os.path.join(self.s_download_dir, self.__d_leaf_url["train_data"]))
-        )
-
-        self._test = LabeledData(
-            *self.read_mat(os.path.join(self.s_download_dir, self.__d_leaf_url["test_data"]))
-        )
-
-        self._check_validation_size(self._train[0].shape[0])
-
-
-if __name__ == "__main__":
-    d = Cifar100FineDataset(validation_size=10000)
-    d.load()
-    print("Before preprocessing")
-    print(d.train.data.shape, d.train.labels.shape)
-    print(d.validation.data.shape, d.validation.labels.shape)
-    print(d.test.data.shape, d.test.labels.shape)
-    # d.apply_transformer(VGG19SvhnTransformer)
-    # print("After vgg19 preprocessing")
-    # print(d.train.data.shape, d.train.labels.shape)
-    # print(d.validation.data.shape, d.validation.labels.shape)
-    # print(d.test.data.shape, d.test.labels.shape)
diff --git a/skluc/data/mldatasets/Cifar100FineDataset.py b/skluc/data/mldatasets/Cifar100FineDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..83b6747e04d779e0ae1425256a4834caac037548
--- /dev/null
+++ b/skluc/data/mldatasets/Cifar100FineDataset.py
@@ -0,0 +1,84 @@
+import os
+import pickle
+import tarfile
+
+import numpy as np
+
+from skluc.data.mldatasets import LabeledData
+from skluc.data.mldatasets.ImageDataset import ImageDataset
+from skluc.utils import logger, check_files
+
+
+class Cifar100FineDataset(ImageDataset):
+    HEIGHT = 32
+    WIDTH = 32
+    DEPTH = 3
+
+    def __init__(self, validation_size=0, seed=None, s_download_dir=None):
+        self.__s_url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
+        self.meta = None
+        name = "cifar100fine"
+        if s_download_dir is not None:
+            super().__init__([self.__s_url], name, s_download_dir, validation_size=validation_size, seed=seed)
+        else:
+            super().__init__([self.__s_url], name, validation_size=validation_size, seed=seed)
+
+        self.__extracted_dirname = os.path.join(self.s_download_dir, "cifar-100-python")
+        self.__extracted_files = [
+            'train',
+            'test',
+            'meta'
+        ]
+
+        self.__extracted_file_paths = [os.path.join(self.__extracted_dirname, file) for file in self.__extracted_files]
+
+    def get_cifar100_data(self, keyword):
+        """
+        Get data from the files containing the keyword in their name.
+
+        :param keyword:
+        :return:
+        """
+        full_data = []
+        full_labels = []
+        for fpath in self.__extracted_file_paths:
+            if keyword in fpath.split('/')[-1]:
+                with open(fpath, 'rb') as f:
+                    pckl_data = pickle.load(f, encoding='bytes')
+                    full_data.append(pckl_data[b'data'])
+                    full_labels.append(pckl_data[b'fine_labels'])
+        final_data = np.vstack(full_data)
+        final_label = np.hstack(full_labels)
+
+        return final_data, final_label
+
+    def get_meta(self):
+        """
+        Get meta data about cifar10 from file.
+
+        :return:
+        """
+        for fpath in self.__extracted_file_paths:
+            if 'meta' in fpath.split('/')[-1]:
+                with open(fpath, 'rb') as f:
+                    pckl_data = pickle.load(f, encoding='bytes')
+                    meta = pckl_data[b'fine_label_names']
+        return np.array(meta)
+
+    def read(self):
+        targz_file_path = self.l_filepaths[-1]
+        if not check_files(self.__extracted_file_paths):
+            logger.debug("Extracting {} ...".format(targz_file_path))
+            tar = tarfile.open(targz_file_path, "r:gz")
+            tar.extractall(path=self.s_download_dir)
+        else:
+            logger.debug("File {} has already been extracted".format(targz_file_path))
+
+        logger.debug("Get training data of dataset {}".format(self.s_name))
+        self._train = LabeledData(*self.get_cifar100_data('train'))
+
+        logger.debug("Get testing data of dataset {}".format(self.s_name))
+        self._test = LabeledData(*self.get_cifar100_data('test'))
+        self.meta = self.get_meta()
+
+        self._check_validation_size(self._train[0].shape[0])
diff --git a/skluc/data/mldatasets/Cifar10Dataset.py b/skluc/data/mldatasets/Cifar10Dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c48f8364efd6fa04fe79c1e5c823e460499a395f
--- /dev/null
+++ b/skluc/data/mldatasets/Cifar10Dataset.py
@@ -0,0 +1,90 @@
+import os
+import pickle
+import tarfile
+
+import numpy as np
+
+from skluc.data.mldatasets import LabeledData
+from skluc.data.mldatasets.ImageDataset import ImageDataset
+from skluc.utils import logger, check_files
+
+
+class Cifar10Dataset(ImageDataset):
+
+    HEIGHT = 32
+    WIDTH = 32
+    DEPTH = 3
+
+    def __init__(self, validation_size=0, seed=None, s_download_dir=None):
+        self.__s_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
+        self.meta = None
+        name = "cifar10"
+        if s_download_dir is not None:
+            super().__init__([self.__s_url], name, s_download_dir, validation_size=validation_size, seed=seed)
+        else:
+            super().__init__([self.__s_url], name, validation_size=validation_size, seed=seed)
+
+        self.__extracted_dirname = os.path.join(self.s_download_dir, "cifar-10-batches-py")
+        self.__extracted_files =[
+            'batches.meta',
+            'data_batch_1',
+            'data_batch_2',
+            'data_batch_3',
+            'data_batch_4',
+            'data_batch_5',
+            'readme.html',
+            'test_batch'
+        ]
+
+        self.__extracted_file_paths = [os.path.join(self.__extracted_dirname, file) for file in self.__extracted_files]
+
+    def get_cifar10_data(self, keyword):
+        """
+        Get data from the files containing the keyword in their name.
+
+        :param keyword:
+        :return:
+        """
+        full_data = []
+        full_labels = []
+        for fpath in self.__extracted_file_paths:
+            if keyword in fpath.split('/')[-1]:
+                with open(fpath, 'rb') as f:
+                    pckl_data = pickle.load(f, encoding='bytes')
+                    full_data.append(pckl_data[b'data'])
+                    full_labels.append(pckl_data[b'labels'])
+        final_data = np.vstack(full_data)
+        final_label = np.hstack(full_labels)
+
+        return final_data, final_label
+
+    def get_meta(self):
+        """
+        Get meta data about cifar10 from file.
+
+        :return:
+        """
+        for fpath in self.__extracted_file_paths:
+            if 'meta' in fpath.split('/')[-1]:
+                with open(fpath, 'rb') as f:
+                    pckl_data = pickle.load(f, encoding='bytes')
+                    meta = pckl_data[b'label_names']
+        return np.array(meta)
+
+    def read(self):
+        targz_file_path = self.l_filepaths[-1]
+        if not check_files(self.__extracted_file_paths):
+            logger.debug("Extracting {} ...".format(targz_file_path))
+            tar = tarfile.open(targz_file_path, "r:gz")
+            tar.extractall(path=self.s_download_dir)
+        else:
+            logger.debug("File {} has already been extracted".format(targz_file_path))
+
+        logger.debug("Get training data of dataset {}".format(self.s_name))
+        self._train = LabeledData(*self.get_cifar10_data('data'))
+
+        logger.debug("Get testing data of dataset {}".format(self.s_name))
+        self._test = LabeledData(*self.get_cifar10_data('test'))
+        self.meta = self.get_meta()
+
+        self._check_validation_size(self._train[0].shape[0])
\ No newline at end of file
diff --git a/skluc/data/mldatasets/Dataset.py b/skluc/data/mldatasets/Dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..67a071a07d036883b8ef80b1162425130d082591
--- /dev/null
+++ b/skluc/data/mldatasets/Dataset.py
@@ -0,0 +1,219 @@
+import os
+
+import numpy as np
+from sklearn.cross_validation import train_test_split
+from sklearn.preprocessing import LabelBinarizer
+
+from skluc.data.mldatasets import LabeledData
+from skluc.utils import logger, check_files, silentremove, download_data, create_directory
+
+
+class Dataset(object):
+    """
+    Abstract class implementing basic methods for Dataset retrieval.
+    """
+    # data_groups_private will be used to refer to the attributes self._train and self._test via their names
+    # as stringes. It is usefull when the same operations must be performed on train and test set
+    data_groups_private = ["_train", "_test"]
+    # data_groups_public = ["train", "test", "validation"]
+
+    def __init__(self, l_url, s_name, s_download_dir=os.path.join(os.path.expanduser("~"), "ml_datasets"),
+                 validation_size=0, seed=None):
+        self.l_url = l_url
+        self.l_filenames = []
+        for url in self.l_url:
+            splitted_url = url.split("/")
+            self.l_filenames.append(splitted_url[-1])
+        self.s_name = s_name
+        self.s_download_dir = os.path.join(s_download_dir, self.s_name)
+        self.l_filepaths = [os.path.join(self.s_download_dir, fname) for fname in self.l_filenames]
+        self._train = None
+        self._test = None
+        self.seed = seed
+        self.permuted_index_train = None
+        self.permuted_index_test = None
+        self.permuted_index_validation = None
+        self.validation_size = validation_size
+
+    def reduce_data_size(self, new_size):
+        logger.info("Reducing datasize of dataset {} to .".format(self.s_name, new_size))
+        kept_indices = self.get_uniform_class_rand_indices_train(new_size)
+        self.permuted_index_train = self.permuted_index_train[kept_indices]
+
+    def get_uniform_class_rand_indices_train(self, size):
+        try:
+            kept_indices, _ = train_test_split(np.arange(len(self.train.data)),
+                                               train_size=size, stratify=self.train.labels, random_state=self.seed)
+        except ValueError as e:
+            logger.warning("In Dataset.get_uniform_class_rand_indices_train Handled exception: {}".format(str(e)))
+            logger.debug("Use random indexes instead")
+            kept_indices = np.random.permutation(len(self.train.data))[:size]
+        return kept_indices
+
+    def get_uniform_class_rand_indices_validation(self, size):
+        try:
+            kept_indices, _ = train_test_split(np.arange(len(self.validation.data)),
+                                               train_size=size, stratify=self.validation.labels, random_state=self.seed)
+        except ValueError as e:
+            logger.warning("In Dataset.get_uniform_class_rand_indices_validation Handled exception: {}".format(str(e)))
+            logger.debug("Use random indexes instead")
+            kept_indices = np.random.permutation(len(self.validation.data))[:size]
+        return kept_indices
+
+    @property
+    def train(self):
+        return LabeledData(data=self._train.data[self.permuted_index_train],
+                           labels=self._train.labels[self.permuted_index_train])
+
+    @property
+    def test(self):
+        return LabeledData(data=self._test.data[self.permuted_index_test],
+                           labels=self._test.labels[self.permuted_index_test])
+
+    @property
+    def validation(self):
+        return LabeledData(data=self._train.data[self.permuted_index_validation],
+                           labels=self._train.labels[self.permuted_index_validation])
+
+    def download(self):
+        """
+        Download the dataset.
+
+        :return: None
+        """
+        self.create_directory_tree()
+        if not check_files(self.l_filepaths):
+            logger.debug("Files need to be downloaded")
+            for s_fname in self.l_filepaths:
+                silentremove(s_fname)
+            for s_url in self.l_url:
+                logger.debug("Downloading file at url: {}".format(s_url))
+                s_file_name = s_url.split("/")[-1]
+                download_data(s_url, self.s_download_dir, s_file_name)
+        else:
+            logger.debug("Files {} already exist".format(self.l_filepaths))
+
+    def create_directory_tree(self):
+        """
+        Create the target directory tree
+
+        :return: None
+        """
+        create_directory(self.s_download_dir)
+
+    def _check_validation_size(self, data_length):
+        if self.validation_size > data_length:
+            raise ValueError("The validation set size ({}) is higher than the train set size ({}). " \
+                             "Please choose a little validation set size".format(self.validation_size, data_length))
+        logger.debug("Validation size < data length ({} < {})".format(self.validation_size, data_length))
+
+    def to_one_hot(self):
+        """
+        Convert categorical labels to one hot encoding
+
+        :return:
+        """
+        enc = LabelBinarizer()
+        enc.fit(self._train.labels)
+        logger.info("Apply one hot encoding to dataset {}.".format(self.s_name))
+        for kw in self.data_groups_private:
+            datlab = getattr(self, kw)
+            if len(datlab.labels) == 0:
+                logger.debug("No labels found in {} data of {} dataset".format(kw, self.s_name))
+                continue
+            logger.debug("Apply one hot encoding to {} data of {} dataset".format(kw, self.s_name))
+            labels = np.array(enc.transform(datlab.labels))
+            data = datlab.data
+            setattr(self, kw, LabeledData(data, labels))
+
+    def revert_one_hot(self):
+        logger.info("Revert one hot encoding to dataset {}.".format(self.s_name))
+        for kw in self.data_groups_private:
+            datlab = getattr(self, kw)
+            if len(datlab.labels) == 0:
+                logger.debug("No labels found in {} data of {} dataset".format(kw, self.s_name))
+                continue
+            logger.debug("Apply one hot encoding to {} data of {} dataset".format(kw, self.s_name))
+            labels = np.argmax(datlab.labels, axis=1)
+            data = datlab.data
+            setattr(self, kw, LabeledData(data, labels))
+
+    def normalize(self):
+        """
+        Normalize data.
+
+        Feature scaling normalization.
+
+        :return:
+        """
+        logger.info("Apply normalization to data from dataset {}.".format(self.s_name))
+        for kw in self.data_groups_private:
+            datlab = getattr(self, kw)
+            if len(datlab.labels) == 0:
+                continue
+            data = datlab.data
+            _min = data.min()
+            _max = data.max()
+            data = (data - _min) / (_max - _min)
+            logger.debug("Apply normalization to {} data of {} dataset.".format(kw, self.s_name))
+            setattr(self, kw, LabeledData(data, datlab.labels))
+
+    def data_astype(self, _type):
+        logger.info("Change type of data to {} in the dataset {}.".format(str(_type), self.s_name))
+        for kw in self.data_groups_private:
+            datlab = getattr(self, kw)
+            if len(datlab.labels) == 0:
+                continue
+            logger.debug("Change type of {} data to {} in the dataset {}.".format(kw, str(_type), self.s_name))
+            data = datlab.data
+            logger.debug("{} data was of type {}".format(kw, data.dtype))
+            data = data.astype(_type)
+            logger.debug("{} data is now of type {}".format(kw, data.dtype))
+            setattr(self, kw, LabeledData(data, datlab.labels))
+
+    def labels_astype(self, _type):
+        logger.info("Change type of labels to {} in the dataset {}.".format(str(_type), self.s_name))
+        for kw in self.data_groups_private:
+            datlab = getattr(self, kw)
+            if len(datlab.labels) == 0:
+                continue
+            labels = datlab.labels
+            logger.debug("Change type of {} labels to {} in the dataset {}.".format(kw, str(_type), self.s_name))
+            logger.debug("{} labels were of type {}".format(kw, labels.dtype))
+            labels = labels.astype(_type)
+            logger.debug("{} labels are now of type {}".format(kw, labels.dtype))
+            setattr(self, kw, LabeledData(datlab.data, labels))
+
+    def load(self):
+        # todo faire une verification generique que le jeu de donné à été chargé lorsque des opérations
+        #  sont appliquées aux données
+        logger.info("Loading dataset {}".format(self.s_name))
+        self.download()
+        self.read()
+        if self._train is not None:
+            logger.debug("Construction of random train indexes (seed: {})".format(self.seed))
+            np.random.seed(self.seed)
+            permut = np.random.permutation(self._train[0].shape[0])
+            if self.validation_size > 0:
+                self.permuted_index_train = permut[:-self.validation_size]
+                self.permuted_index_validation = permut[-self.validation_size:]
+            else:
+                self.permuted_index_train = permut
+                self.permuted_index_validation = np.array([])
+        if self._test is not None:
+            logger.debug("Construction of random test indexes (seed: {})".format(self.seed))
+            logger.debug("Dataset size: {}".format(self._train[0].shape[0]))
+            np.random.seed(self.seed)
+            self.permuted_index_test = np.random.permutation(self._test[0].shape[0])
+        if self._train is None and self._test is None:
+            raise Exception("No data loaded at the end of load method.")
+
+
+    # --- Abstract methods
+
+    def read(self):
+        """
+        This method should load dataset in _train and _test attributes.
+        :return:
+        """
+        raise NotImplementedError
diff --git a/skluc/data/mldatasets/ImageDataset.py b/skluc/data/mldatasets/ImageDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..383950e171d3d07dd4be038ebe807bf0188ba9ae
--- /dev/null
+++ b/skluc/data/mldatasets/ImageDataset.py
@@ -0,0 +1,138 @@
+import os
+
+import numpy as np
+import tensorflow as tf
+
+from skluc.data.mldatasets import LabeledData
+from skluc.data.mldatasets.Dataset import Dataset
+from skluc.utils import logger, create_directory, check_files
+
+
+class ImageDataset(Dataset):
+    HEIGHT = -1
+    WIDTH = -1
+    DEPTH = -1
+
+    def apply_transformer(self, transformer):
+        """
+
+        :param transformer: Transformer object (not a class)
+        :return:
+        """
+        logger.info("Apply transformation {} to data from dataset {}.".format(transformer.__class__.__name__, self.s_name))
+        # todo, cette fonction devrait marcher pour tout dataset (donc il faudrait la mettre dans la classe Dataset)
+        transformer_name = transformer.NAME
+        transform_path = os.path.join(self.s_download_dir, transformer_name)
+        transform_filepaths = [os.path.join(transform_path, kw + ".npz")
+                               for kw in self.data_groups_private]
+        create_directory(transform_path)
+        if check_files(transform_filepaths) and transformer.check_model():
+            # in the case where the transformations already exist in npz files
+            # and the model is the actual good model
+            # but I have no guarantee the transformation has been obtained with the stored model though...
+            # todo make the npz files to store the md5 checksum of the model that has produced them
+            logger.debug("Files {} already exists".format(transform_filepaths))
+            logger.info("Loading transformed data from files {}".format(transform_filepaths))
+            for kw in self.data_groups_private:
+                npzfile_path = os.path.join(transform_path, kw + ".npz")
+                logger.debug("Loading {}".format(npzfile_path))
+                npzfile = np.load(npzfile_path)
+                data = npzfile[kw + "_data"]
+                logger.debug("Shape of {} set: {}".format(kw, data.shape))
+                labels = npzfile[kw + "_labels"]
+                setattr(self, kw, LabeledData(data=data, labels=labels))
+        else:
+            # in the case the transformations doesn't yet exist
+            # one need to apply it to the data
+            # then to save the transformation
+            logger.debug("Files {} don't exist or model md5 checksum doesn't match. Need to produce them".format(transform_filepaths))
+            logger.info("Apply convolution of {} to dataset {}".format(transformer_name, self.s_name))
+            for kw in self.data_groups_private:
+                data, labels = getattr(self, kw)
+                transformed_data, transformed_labels = transformer.transform(data, labels)
+                setattr(self, kw, LabeledData(data=transformed_data, labels=transformed_labels))
+                dict_attr = {kw + "_data": transformed_data, kw + "_labels": transformed_labels}
+                filepath = os.path.join(transform_path, kw + ".npz")
+                logger.debug("Shape of {} set: {}".format(kw, transformed_data.shape))
+                logger.debug("Saving transformed {} data to {}".format(kw, filepath))
+                np.savez(filepath, **dict_attr)
+
+    def to_image(self):
+        """
+        Modify data to present it like images (matrices) instead of vectors.
+
+        :return: The modified data.
+        """
+        if self.HEIGHT == -1 or self.WIDTH == -1 or self.DEPTH == -1:
+            raise ValueError("Height, width and depth static attributes of class {} should be set.".format(self.__class__))
+        for kw in self.data_groups_private:
+            datlab = getattr(self, kw)
+            if datlab is None:
+                continue
+            images_vec = datlab.data
+            labels = datlab.labels
+            length_by_chanel = images_vec.shape[1]/self.DEPTH
+            logger.debug("Images vec shape: {}".format(images_vec.shape))
+            if int(length_by_chanel) != length_by_chanel:
+                raise Exception("Dimensionality problem")
+            else:
+                length_by_chanel = int(length_by_chanel)
+            images_mat = np.reshape(images_vec, (images_vec.shape[0], length_by_chanel, self.DEPTH),
+                                    order='F')
+            images = np.reshape(images_mat, (images_mat.shape[0], self.HEIGHT, self.WIDTH,
+                                             self.DEPTH), order='C')
+            setattr(self, kw, LabeledData(images, labels))
+
+    def flatten(self):
+        """
+        Flatten all the datasets (matrices to vectors)
+
+        :return:
+        """
+        logger.info("Apply flattening to dataset {}.".format(self.s_name))
+        for kw in self.data_groups_private:
+            logger.debug("Flattening data {} of dataset {}".format(kw, self.s_name))
+            datlab = getattr(self, kw)
+            init_dim = np.prod([s for s in datlab.data.shape[1:]])
+            logger.debug("Shape of {} data: {}".format(kw, datlab.data.shape))
+            logger.debug("Number of features in {} data: {}".format(kw, init_dim))
+            data = datlab.data.reshape(datlab.data.shape[0], init_dim)
+            setattr(self, kw, LabeledData(data=data, labels=datlab.labels))
+
+    def rescale(self, factor):
+        """
+        Rescale images by factor.
+
+        :param factor:
+        :return:
+        """
+        sess = tf.InteractiveSession()
+        for kw in self.data_groups_private:
+            datlab = getattr(self, kw)
+            images_mat = datlab.data
+            output_shape = np.multiply(images_mat.shape[1:-1], (factor, factor))
+            labels = datlab.labels
+            logger.debug("Shape of {} data before rescaling: {}".format(kw, images_mat.shape))
+            logger.debug("Excpected output shape for images: {}".format(output_shape))
+            new_image = tf.image.resize_images(images_mat, output_shape).eval()
+            logger.debug("Shape of {} data after rescaling: {}".format(kw, new_image.shape))
+            setattr(self, kw, LabeledData(new_image, labels))
+        sess.close()
+
+    def to_feature_vectors(self):
+        """
+        From a feature representation (W x H x D) of the images, gives the feature vector representation of
+        dimension (N x D) with N being W x H.
+
+        :return:
+        """
+        for kw in self.data_groups_private:
+            datlab = getattr(self, kw)
+            images_mat = datlab.data
+            labels = datlab.labels
+
+            logger.debug("Shape of {} data before reshape: {}".format(kw, images_mat.shape))
+            images_mat = images_mat.reshape(images_mat.shape[0], -1, images_mat.shape[-1])
+            logger.debug("Shape of {} data after reshape: {}".format(kw, images_mat.shape))
+            setattr(self, kw, LabeledData(images_mat, labels))
+
diff --git a/skluc/data/mldatasets/MnistDataset.py b/skluc/data/mldatasets/MnistDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..0882c48b5f6763167c630e14f5055f23cf66ee3e
--- /dev/null
+++ b/skluc/data/mldatasets/MnistDataset.py
@@ -0,0 +1,75 @@
+import gzip
+import os
+
+import numpy as np
+
+from skluc.data.mldatasets import LabeledData
+from skluc.data.mldatasets.ImageDataset import ImageDataset
+from skluc.utils import logger
+
+
+class MnistDataset(ImageDataset):
+
+    HEIGHT = 28
+    WIDTH = 28
+    DEPTH = 1
+
+    def __init__(self, validation_size=0, seed=0, s_download_dir=None):
+        self.__s_root_url = "http://yann.lecun.com/exdb/mnist/"
+        self.__d_leaf_url = {
+            "train_data": "train-images-idx3-ubyte.gz",
+            "train_label": "train-labels-idx1-ubyte.gz",
+            "test_data": "t10k-images-idx3-ubyte.gz",
+            "test_label": "t10k-labels-idx1-ubyte.gz"
+        }
+
+        l_url = [self.__s_root_url + leaf_url for leaf_url in self.__d_leaf_url.values()]
+        if s_download_dir is not None:
+            super().__init__(l_url, "mnist", s_download_dir, validation_size=validation_size, seed=seed)
+        else:
+            super().__init__(l_url, "mnist", validation_size=validation_size, seed=seed)
+
+    @staticmethod
+    def read_gziped_ubyte(fname_img=None, fname_lbl=None):
+        """
+        loosely copied on https://gist.github.com/akesling/5358964
+
+        Python function for importing the MNIST data set.  It returns an iterator
+        of 2-tuples with the first element being the label and the second element
+        being a numpy.uint8 2D array of pixel data for the given image.
+        """
+        # Load everything in some numpy arrays
+        logger.info("Read gziped ubyte file {}".format(fname_img))
+        with gzip.open(fname_lbl, 'rb') as flbl:
+            magic, num = struct.unpack(">II", flbl.read(8))
+            lbl = np.fromstring(flbl.read(), dtype=np.int8)
+
+        logger.info("Read gziped ubyte file {}".format(fname_lbl))
+        with gzip.open(fname_img, 'rb') as fimg:
+            magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
+            img = np.fromstring(fimg.read(), dtype=np.uint8)
+            img = img.reshape(len(lbl), -1)
+
+        return img, lbl
+
+    def read(self):
+        """
+        Return a dict of data where, for each key is associated a (data, label) tuple.
+
+        The values of the tuple are np.ndarray.
+
+        :return: dict
+        """
+        # todo add possibility to provide percentage for validation set instead of size
+        self._train = LabeledData(
+            *self.read_gziped_ubyte(os.path.join(self.s_download_dir, self.__d_leaf_url["train_data"]),
+                                    os.path.join(self.s_download_dir, self.__d_leaf_url["train_label"]))
+        )
+
+        self._test = LabeledData(
+            *self.read_gziped_ubyte(os.path.join(self.s_download_dir, self.__d_leaf_url["test_data"]),
+                                    os.path.join(self.s_download_dir, self.__d_leaf_url["test_label"]))
+        )
+
+        self._check_validation_size(self._train[0].shape[0])
+
diff --git a/skluc/data/mldatasets/MovieReviewDataset.py b/skluc/data/mldatasets/MovieReviewDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..10eb0618c763e5d68df0241410ac26b63d2fd23c
--- /dev/null
+++ b/skluc/data/mldatasets/MovieReviewDataset.py
@@ -0,0 +1,206 @@
+import os
+import re
+import tarfile
+
+import numpy as np
+
+from skluc.data.mldatasets import LabeledData
+from skluc.data.mldatasets.Dataset import Dataset
+from skluc.utils import create_directory, check_files, logger
+
+
+class MovieReviewV1Dataset(Dataset):
+    data_groups_private = ["_train"]
+    TRAIN_SIZE = 9000
+
+    def apply_transformer(self, transformer_class):
+        # todo, cette fonction devrait marcher pour tout dataset (donc il faudrait la mettre dans la classe Dataset)
+        transformer = transformer_class()
+        transformer_name = transformer.__class__.__name__
+        transform_path = os.path.join(self.s_download_dir, transformer_name)
+        transform_filepaths = [os.path.join(transform_path, kw + ".npz")
+                               for kw in self.data_groups_private]
+        create_directory(transform_path)
+        if check_files(transform_filepaths) and transformer.check_model():
+            # in the case where the transformations already exist in npz files
+            # and the model is the actual good model
+            # but I have no guarantee the transformation has been obtained with the stored model though...
+            # todo make the npz files to store the md5 checksum of the model that has produced them
+            logger.debug("Files {} already exists".format(transform_filepaths))
+            logger.debug("Now load data of files {}".format(transform_filepaths))
+            for kw in self.data_groups_private:
+                npzfile_path = os.path.join(transform_path, kw + ".npz")
+                logger.debug("Loading {}".format(npzfile_path))
+                npzfile = np.load(npzfile_path)
+                data = npzfile[kw + "_data"]
+                labels = npzfile[kw + "_labels"]
+                setattr(self, kw, LabeledData(data=data, labels=labels))
+            # todo être plus intelligent avec le mode debug du logger. Pour l'instant je met tout en debug
+        else:
+            # in the case the transformations doesn't yet exist
+            # one nead to apply it to the data
+            # then to save the transformation
+            logger.debug("Files {} don't exist or model md5 checksum doesn't match. Need to produce them".format(transform_filepaths))
+            logger.info("Apply convolution of {} to dataset {}".format(transformer_name, self.s_name))
+            for kw in self.data_groups_private:
+                data, labels = getattr(self, kw)
+                transformed_data, transformed_labels = transformer.transform(data, labels)
+                setattr(self, kw, LabeledData(data=transformed_data, labels=transformed_labels))
+                dict_attr = {kw + "_data": transformed_data, kw + "_labels": transformed_labels}
+                filepath = os.path.join(transform_path, kw + ".npz")
+
+                logger.debug("Saving transformed {} data to {}".format(kw, filepath))
+                np.savez(filepath, **dict_attr)
+
+    def __init__(self, validation_size=0, seed=0, s_download_dir=None):
+        self.__s_url = "http://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz"
+
+        if s_download_dir is not None:
+            super().__init__([self.__s_url], "moviereview", s_download_dir, validation_size=validation_size, seed=seed)
+        else:
+            super().__init__([self.__s_url], "moviereview", validation_size=validation_size, seed=seed)
+
+        self.__extracted_files = [
+            'rt-polarity.pos',
+            'rt-polarity.neg'
+        ]
+        self.__extracted_dirname = os.path.join(self.s_download_dir, "rt-polaritydata")
+
+        self.__extracted_file_paths = [os.path.join(self.__extracted_dirname, file) for file in self.__extracted_files]
+
+        self.__counter = 1
+        self.__vocab = {"<pad>": 0}
+        self.__reversed_vocab = {0: "<pad>"}
+
+    @property
+    def vocab(self):
+        return self.__vocab
+
+    @property
+    def vocab_inv(self):
+        return self.__reversed_vocab
+
+    def read(self):
+        # todo faire une fonction d'extraction commune?
+        targz_file_path = self.l_filepaths[-1]
+        if not check_files(self.__extracted_file_paths):
+            logger.debug("Extracting {} ...".format(targz_file_path))
+            tar = tarfile.open(targz_file_path, "r:gz")
+            tar.extractall(path=self.s_download_dir)
+        else:
+            logger.debug("File {} has already been extracted".format(targz_file_path))
+
+        data_labeled = MovieReviewV1Dataset.load_data_and_labels(self.__extracted_file_paths[0],
+                                                                 self.__extracted_file_paths[1],
+                                                                 encoding="ISO-8859-1")
+
+        max_ = -1
+        for l in data_labeled[0]:
+            max_ = max(max_, len(l.strip().split()))
+
+        lst_arr_ex = []
+        for ex in data_labeled[0]:
+            splitted_ex = ex.strip().split()
+            splitted_ex_nbr = []
+            for wrd in splitted_ex:
+                if wrd not in self.__vocab:
+                    self.__vocab[wrd] = self.__counter
+                    self.__reversed_vocab[self.__counter] = wrd
+                    self.__counter += 1
+                splitted_ex_nbr.append(self.__vocab[wrd])
+            arr_splitted_ex_nbr = np.pad(splitted_ex_nbr, (0, max_-len(splitted_ex_nbr)), 'constant',
+                                         constant_values=self.__vocab["<pad>"])
+            lst_arr_ex.append(np.reshape(arr_splitted_ex_nbr, (1, -1)))
+        X = np.concatenate(lst_arr_ex, axis=0)
+
+        self._train = LabeledData(data=X,
+                                  labels=data_labeled[1])
+
+    @property
+    def train(self):
+        indexes = self.permuted_index_train[:self.TRAIN_SIZE - self.validation_size]
+        return LabeledData(data=self._train.data[indexes],
+                           labels=self._train.labels[indexes])
+
+    @property
+    def test(self):
+        indexes = self.permuted_index_train[self.TRAIN_SIZE:]
+        return LabeledData(data=self._train.data[indexes],
+                           labels=self._train.labels[indexes])
+
+    @property
+    def validation(self):
+        indexes = self.permuted_index_train[(self.TRAIN_SIZE - self.validation_size):self.TRAIN_SIZE]
+        return LabeledData(data=self._train.data[indexes],
+                           labels=self._train.labels[indexes])
+
+    @property
+    def vocabulary_length(self):
+        return len(self.__vocab)
+
+    @staticmethod
+    def clean_str(string):
+        """
+        Tokenization/string cleaning for all datasets except for SST.
+        Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
+
+        Credit to: https://github.com/dennybritz/cnn-text-classification-tf/blob/master/data_helpers.py
+        """
+        string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
+        string = re.sub(r"\'s", " \'s", string)
+        string = re.sub(r"\'ve", " \'ve", string)
+        string = re.sub(r"n\'t", " n\'t", string)
+        string = re.sub(r"\'re", " \'re", string)
+        string = re.sub(r"\'d", " \'d", string)
+        string = re.sub(r"\'ll", " \'ll", string)
+        string = re.sub(r",", " , ", string)
+        string = re.sub(r"!", " ! ", string)
+        string = re.sub(r"\(", " \( ", string)
+        string = re.sub(r"\)", " \) ", string)
+        string = re.sub(r"\?", " \? ", string)
+        string = re.sub(r"\s{2,}", " ", string)
+        return string.strip().lower()
+
+    @staticmethod
+    def load_data_and_labels(positive_data_file, negative_data_file, encoding='utf-8'):
+        """
+        Loads MR polarity data from files, splits the data into words and generates labels.
+        Returns split sentences and labels.
+
+        Credit to: https://github.com/dennybritz/cnn-text-classification-tf/blob/master/data_helpers.py
+        """
+        # Load data from files
+        positive_examples = list(open(positive_data_file, "r", encoding=encoding).readlines())
+        positive_examples = [s.strip() for s in positive_examples]
+        negative_examples = list(open(negative_data_file, "r", encoding=encoding).readlines())
+        negative_examples = [s.strip() for s in negative_examples]
+        # Split by words
+        x_text = positive_examples + negative_examples
+        x_text = [MovieReviewV1Dataset.clean_str(sent) for sent in x_text]
+        # Generate labels
+        positive_labels = [[0, 1] for _ in positive_examples]
+        negative_labels = [[1, 0] for _ in negative_examples]
+        y = np.concatenate([positive_labels, negative_labels], 0)
+        return LabeledData(data=x_text, labels=y)
+
+    # todo not yet sure the following is usefull
+    # @staticmethod
+    # def batch_iter(data, batch_size, num_epochs, shuffle=True):
+    #     """
+    #     Generates a batch iterator for a dataset.
+    #     """
+    #     data = np.array(data)
+    #     data_size = len(data)
+    #     num_batches_per_epoch = int((len(data) - 1) / batch_size) + 1
+    #     for epoch in range(num_epochs):
+    #         # Shuffle the data at each epoch
+    #         if shuffle:
+    #             shuffle_indices = np.random.permutation(np.arange(data_size))
+    #             shuffled_data = data[shuffle_indices]
+    #         else:
+    #             shuffled_data = data
+    #         for batch_num in range(num_batches_per_epoch):
+    #             start_index = batch_num * batch_size
+    #             end_index = min((batch_num + 1) * batch_size, data_size)
+    #
+    #     yield shuffled_data[start_index:end_index]
diff --git a/skluc/data/mldatasets/SVHNDataset.py b/skluc/data/mldatasets/SVHNDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb15a84ced44bc16b9edac0525944c0474e4f2f6
--- /dev/null
+++ b/skluc/data/mldatasets/SVHNDataset.py
@@ -0,0 +1,61 @@
+import os
+
+import numpy as np
+import scipy.io as sio
+
+from skluc.data.mldatasets import LabeledData
+from skluc.data.mldatasets.ImageDataset import ImageDataset
+from skluc.utils import logger
+
+
+class SVHNDataset(ImageDataset):
+
+    HEIGHT = 32
+    WIDTH = 32
+    DEPTH = 3
+
+    def __init__(self, validation_size=0, seed=0, s_download_dir=None):
+        self.__s_root_url = "http://ufldl.stanford.edu/housenumbers/"
+        self.__d_leaf_url = {
+            "train_data": "train_32x32.mat",
+            "test_data": "test_32x32.mat",
+        }
+
+        l_url = [self.__s_root_url + leaf_url for leaf_url in self.__d_leaf_url.values()]
+        if s_download_dir is not None:
+            super().__init__(l_url, "svhn", s_download_dir, validation_size=validation_size, seed=seed)
+        else:
+            super().__init__(l_url, "svhn", validation_size=validation_size, seed=seed)
+
+    @staticmethod
+    def read_mat(fname):
+        """
+        loosely copied on https://stackoverflow.com/questions/29185493/read-svhn-dataset-in-python
+
+        Python function for importing the SVHN data set.
+        """
+        # Load everything in some numpy arrays
+        logger.info("Read mat file {}".format(fname))
+        data = sio.loadmat(fname)
+        img = np.moveaxis(data['X'], -1, 0)
+        lbl = data['y']
+        return img, lbl
+
+    def read(self):
+        """
+        Return a dict of data where, for each key is associated a (data, label) tuple.
+
+        The values of the tuple are np.ndarray.
+
+        :return: dict
+        """
+        # todo add possibility to provide percentage for validation set instead of size
+        self._train = LabeledData(
+            *self.read_mat(os.path.join(self.s_download_dir, self.__d_leaf_url["train_data"]))
+        )
+
+        self._test = LabeledData(
+            *self.read_mat(os.path.join(self.s_download_dir, self.__d_leaf_url["test_data"]))
+        )
+
+        self._check_validation_size(self._train[0].shape[0])
diff --git a/skluc/data/mldatasets/__init__.py b/skluc/data/mldatasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed52441e6cf97325e41a7230f2ee02c83fa0a942
--- /dev/null
+++ b/skluc/data/mldatasets/__init__.py
@@ -0,0 +1,34 @@
+"""
+This module defines the Dataset classes usefull for downloading and loading datasets as numpy.ndarrays.
+
+The currently implemented datasets are:
+    - mnist
+    - cifar10
+    - cifar100
+    - svhn
+    - moviereview
+"""
+
+import collections
+
+from skluc.data.mldatasets.Cifar100FineDataset import Cifar100FineDataset
+from skluc.data.mldatasets.Cifar10Dataset import Cifar10Dataset
+from skluc.data.mldatasets.MnistDataset import MnistDataset
+from skluc.data.mldatasets.MovieReviewDataset import MovieReviewV1Dataset
+from skluc.data.mldatasets.SVHNDataset import SVHNDataset
+
+LabeledData = collections.namedtuple("LabeledData", ["data", "labels"])
+
+
+if __name__ == "__main__":
+    d = Cifar100FineDataset(validation_size=10000)
+    d.load()
+    print("Before preprocessing")
+    print(d.train.data.shape, d.train.labels.shape)
+    print(d.validation.data.shape, d.validation.labels.shape)
+    print(d.test.data.shape, d.test.labels.shape)
+    # d.apply_transformer(VGG19SvhnTransformer)
+    # print("After vgg19 preprocessing")
+    # print(d.train.data.shape, d.train.labels.shape)
+    # print(d.validation.data.shape, d.validation.labels.shape)
+    # print(d.test.data.shape, d.test.labels.shape)
diff --git a/skluc/test/test_mldatasets.py b/skluc/test/test_mldatasets.py
index 8be185edd94e83b21aec99cf875762896badadea..66b77b8486fd4a0b6f16ab91e16105ccd29d6e9c 100644
--- a/skluc/test/test_mldatasets.py
+++ b/skluc/test/test_mldatasets.py
@@ -2,6 +2,7 @@ import os
 import unittest
 
 import skluc.data.mldatasets as dataset
+from skluc.utils import silentremove
 
 
 class TestMnistDataset(unittest.TestCase):
@@ -28,7 +29,7 @@ class TestMnistDataset(unittest.TestCase):
 
     def tearDown(self):
         for name in self.full_mnist_names:
-            dataset.silentremove(name)
+            silentremove(name)
 
 
 class TestCifar10Dataset(unittest.TestCase):