diff --git a/multiview_platform/execute.py b/multiview_platform/execute.py index 5772286a7c562fed60466478956d673d4bd04145..c4afa957f446c3094b8dfe6a85ed83cc8048fc2b 100644 --- a/multiview_platform/execute.py +++ b/multiview_platform/execute.py @@ -2,7 +2,7 @@ def execute(): - import multiview_platform.versions as vs + from multiview_platform import versions as vs vs.test_versions() import sys diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index c411fc8b1d0d3916b11262eb00ddfd2f4ab702fd..cba482d1ab1051ee28e8702bf12f550e04043a71 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -15,7 +15,7 @@ from . import monoview_classifiers from . import multiview_classifiers from .multiview.exec_multiview import exec_multiview, exec_multiview_multicore from .monoview.exec_classif_mono_view import exec_monoview, exec_monoview_multicore -from .utils import get_multiview_db as DB +from .utils.dataset import delete_HDF5 from .result_analysis import get_results from .result_analysis import plot_results_noise # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, @@ -607,14 +607,14 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None, def exec_benchmark(nb_cores, stats_iter, nb_multiclass, - benchmark_arguments_dictionaries, classification_indices, - directories, - directory, multi_class_labels, metrics, labels_dictionary, - nb_labels, dataset_var, - exec_one_benchmark=exec_one_benchmark, - exec_one_benchmark_multicore=exec_one_benchmark_multicore, - exec_one_benchmark_mono_core=exec_one_benchmark_mono_core, - get_results=get_results, delete=DB.deleteHDF5): + benchmark_arguments_dictionaries, classification_indices, + directories, + directory, multi_class_labels, metrics, labels_dictionary, + nb_labels, dataset_var, + exec_one_benchmark=exec_one_benchmark, + exec_one_benchmark_multicore=exec_one_benchmark_multicore, + exec_one_benchmark_mono_core=exec_one_benchmark_mono_core, + get_results=get_results, delete=delete_HDF5): r"""Used to execute the needed benchmark(s) on multicore or mono-core functions. Parameters @@ -742,11 +742,11 @@ def exec_classif(arguments): noise_std) args["Base"]["name"] = datasetname - splits = execution.gen_splits(dataset_var.get("Labels").value, args["Classification"]["split"], + splits = execution.gen_splits(dataset_var.get_labels(), args["Classification"]["split"], stats_iter_random_states) multiclass_labels, labels_combinations, indices_multiclass = multiclass.gen_multiclass_labels( - dataset_var.get("Labels").value, multiclass_method, splits) + dataset_var.get_labels(), multiclass_method, splits) k_folds = execution.gen_k_folds(stats_iter, args["Classification"]["nb_folds"], stats_iter_random_states) @@ -757,7 +757,7 @@ def exec_classif(arguments): views, views_indices, all_views = execution.init_views(dataset_var, args["Base"]["views"]) views_dictionary = gen_views_dictionnary(dataset_var, views) nb_views = len(views) - nb_class = dataset_var.get("Metadata").attrs["nbClass"] + nb_class = dataset_var.get_nb_class() metrics = [metric.split(":") for metric in args["Classification"]["metrics"]] if metrics == [["all"]]: diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py index c889c251b82f904655f90d7565fd84dc805dc74f..402f7263354f21c63df59ff3f173c0e94944ca42 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py @@ -2,18 +2,71 @@ import logging import os import select import sys +import errno import h5py import numpy as np from scipy import sparse -from . import get_multiview_db as DB +# from . import get_multiview_db as DB class Dataset(): - def __init__(self, dataset): - self.dataset = dataset + def __init__(self, views=None, labels=None, are_sparse=False, + file_name="dataset.hdf5", view_names=None, path="", + hdf5_file=None, labels_names=None): + if hdf5_file is not None: + self.dataset=hdf5_file + else: + if not os.path.exists(os.path.dirname(os.path.join(path, file_name))): + try: + os.makedirs(os.path.dirname(os.path.join(path, file_name))) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + dataset_file = h5py.File(os.path.join(path, file_name), "w") + if view_names is None: + view_names = ["View"+str(index) for index in range(len(views))] + if isinstance(are_sparse, bool): + are_sparse = [are_sparse for _ in views] + for view_index, (view_name, view, is_sparse) in enumerate(zip(view_names, views, are_sparse)): + view_dataset = dataset_file.create_dataset("View" + str(view_index), + view.shape, + data=view) + view_dataset.attrs["name"] = view_name + view_dataset.attrs["sparse"] = is_sparse + labels_dataset = dataset_file.create_dataset("Labels", + shape=labels.shape, + data=labels) + if labels_names is None: + labels_names = [str(index) for index in np.unique(labels)] + labels_dataset.attrs["names"] = [label_name.encode() + if not isinstance(label_name, bytes) + else label_name + for label_name in labels_names] + meta_data_grp = dataset_file.create_group("Metadata") + meta_data_grp.attrs["nbView"] = len(views) + meta_data_grp.attrs["nbClass"] = len(np.unique(labels)) + meta_data_grp.attrs["datasetLength"] = len(labels) + dataset_file.close() + dataset_file = h5py.File(os.path.join(path, file_name), "r") + self.dataset = dataset_file + self.nb_view = self.dataset.get("Metadata").attrs["nbView"] + self.view_dict = self.get_view_dict() + + def get_view_dict(self): + view_dict = {} + for view_index in range(self.nb_view): + view_dict[self.dataset.get("View" + str(view_index)).attrs["name"]] = view_index + return view_dict + + def get_label_names(self, decode=True): + if decode: + return [label_name.decode("utf-8") + for label_name in self.dataset.get("Labels").attrs["names"]] + else: + return self.dataset.get("Labels").attrs["names"] def init_example_indces(self, example_indices=None): if example_indices is None: @@ -44,10 +97,43 @@ class Dataset(): return sparse_mat + # def copy(self, examples_indices, views_indices, target_dataset): + # new_dataset = Dataset(views=, + # labels=, + # are_sparse=, + # file_name=, + # view_names=, + # path=, + # labels_names=) + # return self.dataset.copy(part_name, target_dataset) + def get_nb_class(self, example_indices=None): example_indices = self.init_example_indces(example_indices) return len(np.unique(self.dataset.get("Labels").value[example_indices])) + def get_labels(self, example_indices=None): + example_indices = self.init_example_indces(example_indices) + return self.dataset.get("Labels").value([example_indices]) + + def copy_view(self, target_dataset=None, source_view_name=None, + target_view_name=None, example_indices=None): + example_indices = self.init_example_indces(example_indices) + new_d_set = target_dataset.create_dataset(target_view_name, + data=self.get_v(self.view_dict[source_view_name], + example_indices=example_indices)) + for key, value in self.get_v(self.view_dict[source_view_name]).attrs.items(): + new_d_set.attrs[key] = value + + + +def datasets_already_exist(pathF, name, nbCores): + """Used to check if it's necessary to copy datasets""" + allDatasetExist = True + for coreIndex in range(nbCores): + import os.path + allDatasetExist *= os.path.isfile( + pathF + name + str(coreIndex) + ".hdf5") + return allDatasetExist def get_v(dataset, view_index, used_indices=None): """Used to extract a view as a numpy array or a sparse mat from the HDF5 dataset""" @@ -137,7 +223,7 @@ def init_multiple_datasets(path_f, name, nb_cores): Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ if nb_cores > 1: - if DB.datasetsAlreadyExist(path_f, name, nb_cores): + if datasets_already_exist(path_f, name, nb_cores): logging.debug( "Info:\t Enough copies of the dataset are already available") pass @@ -152,11 +238,36 @@ def init_multiple_datasets(path_f, name, nb_cores): if not confirmation: sys.exit(0) else: - dataset_files = DB.copyHDF5(path_f, name, nb_cores) + dataset_files = copy_hdf5(path_f, name, nb_cores) logging.debug("Start:\t Creating datasets for multiprocessing") return dataset_files +def copy_hdf5(pathF, name, nbCores): + """Used to copy a HDF5 database in case of multicore computing""" + datasetFile = h5py.File(pathF + name + ".hdf5", "r") + for coreIndex in range(nbCores): + newDataSet = h5py.File(pathF + name + str(coreIndex) + ".hdf5", "w") + for dataset in datasetFile: + datasetFile.copy("/" + dataset, newDataSet["/"]) + newDataSet.close() + +def delete_HDF5(benchmarkArgumentsDictionaries, nbCores, DATASET): + """Used to delete temporary copies at the end of the benchmark""" + if nbCores > 1: + logging.debug("Start:\t Deleting " + str( + nbCores) + " temporary datasets for multiprocessing") + args = benchmarkArgumentsDictionaries[0]["args"] + logging.debug("Start:\t Deleting datasets for multiprocessing") + + for coreIndex in range(nbCores): + os.remove(args["Base"]["pathf"] + args["Base"]["name"] + str(coreIndex) + ".hdf5") + filename = DATASET.filename + DATASET.close() + if "_temp_" in filename: + os.remove(filename) + + def confirm(resp=True, timeout=15): """Used to process answer""" ans = input_(timeout) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 3b7e1f09483af6587e1398d90bdba495e0e3411c..be069e57ba8f6d4d00ce1e5201ca5651ebc4d2ae 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -7,242 +7,118 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_array +from ..utils.dataset import Dataset, copy_hdf5 + # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def copyHDF5(pathF, name, nbCores): - """Used to copy a HDF5 database in case of multicore computing""" - datasetFile = h5py.File(pathF + name + ".hdf5", "r") - for coreIndex in range(nbCores): - newDataSet = h5py.File(pathF + name + str(coreIndex) + ".hdf5", "w") - for dataset in datasetFile: - datasetFile.copy("/" + dataset, newDataSet["/"]) - newDataSet.close() - - -class TanhNormalizer(BaseEstimator, TransformerMixin): - """Normalize data using a tanh function. This is the normalizer used in the so-called "Never-ending paper". - It remains here for reproduceability purposes, but you should use Scikit-Learn normalizers instead! - - """ - - def __init__(self): - self.mean = None - self.std = None - - def fit(self, X, y=None, **fit_params): - X = check_array(X) - self.mean = X.mean(0) - self.mean.shape = (1, len(self.mean)) - self.std = X.std(0) - self.std[self.std == 0] = 1 - self.std.shape = (1, len(self.std)) - return self - - def transform(self, X): - return np.tanh((X - self.mean) / self.std) - - def fit_transform(self, X, y=None, **fit_params): - self.fit(X, **fit_params) - return self.transform(X) -def datasetsAlreadyExist(pathF, name, nbCores): - """Used to check if it's necessary to copy datasets""" - allDatasetExist = True - for coreIndex in range(nbCores): - import os.path - allDatasetExist *= os.path.isfile( - pathF + name + str(coreIndex) + ".hdf5") - return allDatasetExist - -def deleteHDF5(benchmarkArgumentsDictionaries, nbCores, DATASET): - """Used to delete temporary copies at the end of the benchmark""" - if nbCores > 1: - logging.debug("Start:\t Deleting " + str( - nbCores) + " temporary datasets for multiprocessing") - args = benchmarkArgumentsDictionaries[0]["args"] - logging.debug("Start:\t Deleting datasets for multiprocessing") - - for coreIndex in range(nbCores): - os.remove(args["Base"]["pathf"] + args["Base"]["name"] + str(coreIndex) + ".hdf5") - filename = DATASET.filename - DATASET.close() - if "_temp_" in filename: - os.remove(filename) - - -def makeMeNoisy(viewData, random_state, percentage=5): +def make_me_noisy(view_data, random_state, percentage=5): """used to introduce some noise in the generated data""" - viewData = viewData.astype(bool) - nbNoisyCoord = int( - percentage / 100.0 * viewData.shape[0] * viewData.shape[1]) - rows = range(viewData.shape[0]) - cols = range(viewData.shape[1]) - for _ in range(nbNoisyCoord): - rowIdx = random_state.choice(rows) - colIdx = random_state.choice(cols) - viewData[rowIdx, colIdx] = 0 - noisyViewData = viewData.astype(np.uint8) - return noisyViewData - - -def get_plausible_db_hdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", - random_state=None, full=True, add_noise=False, - noise_std=0.15, nbView=3, - nbClass=2, datasetLength=100, randomStateInt=42, nbFeatures = 10): + view_data = view_data.astype(bool) + nb_noisy_coord = int( + percentage / 100.0 * view_data.shape[0] * view_data.shape[1]) + rows = range(view_data.shape[0]) + cols = range(view_data.shape[1]) + for _ in range(nb_noisy_coord): + row_idx = random_state.choice(rows) + col_idx = random_state.choice(cols) + view_data[row_idx, col_idx] = 0 + noisy_view_data = view_data.astype(np.uint8) + return noisy_view_data + + +def get_plausible_db_hdf5(features, path, file_name, nb_class=3, + label_names=["No".encode(), "Yes".encode(), + "Maybe".encode()], + random_state=None, full=True, add_noise=False, + noise_std=0.15, nb_view=3, nb_examples=100, + nb_features=10): """Used to generate a plausible dataset to test the algorithms""" - if not os.path.exists(os.path.dirname(pathF + "Plausible.hdf5")): + if not os.path.exists(os.path.dirname(path + "Plausible.hdf5")): try: - os.makedirs(os.path.dirname(pathF + "Plausible.hdf5")) + os.makedirs(os.path.dirname(path + "Plausible.hdf5")) except OSError as exc: if exc.errno != errno.EEXIST: raise - datasetFile = h5py.File(pathF + "/Plausible.hdf5", "w") - if NB_CLASS == 2: - CLASS_LABELS = np.array( - [0 for _ in range(int(datasetLength / 2))] + [1 for _ in range( - datasetLength - int(datasetLength / 2))]) - for viewIndex in range(nbView): - viewData = np.array( - [np.zeros(nbFeatures) for _ in range(int(datasetLength / 2))] + - [np.ones(nbFeatures) for _ in - range(datasetLength - int(datasetLength / 2))]) - fakeOneIndices = random_state.randint(0, int(datasetLength / 2), - int(datasetLength / 12)) - fakeZeroIndices = random_state.randint(int(datasetLength / 2), - datasetLength, - int(datasetLength / 12)) - - viewData[fakeOneIndices] = np.ones( - (len(fakeOneIndices), nbFeatures)) - viewData[fakeZeroIndices] = np.zeros( - (len(fakeZeroIndices), nbFeatures)) - viewData = makeMeNoisy(viewData, random_state) - viewDset = datasetFile.create_dataset("View" + str(viewIndex), - viewData.shape, - data=viewData.astype( - np.uint8)) - viewDset.attrs["name"] = "ViewNumber" + str(viewIndex) - viewDset.attrs["sparse"] = False - labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) - labelsDset[...] = CLASS_LABELS - labelsDset.attrs["name"] = "Labels" - labelsDset.attrs["names"] = ["No".encode(), "Yes".encode()] - metaDataGrp = datasetFile.create_group("Metadata") - metaDataGrp.attrs["nbView"] = nbView - metaDataGrp.attrs["nbClass"] = 2 - metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) - datasetFile.close() - datasetFile = h5py.File(pathF + "Plausible.hdf5", "r") + views = [] + view_names = [] + are_sparse = [] + if nb_class == 2: + labels = np.array( + [0 for _ in range(int(nb_examples / 2))] + [1 for _ in range( + nb_examples - int(nb_examples / 2))]) + label_names = ["No".encode(), "Yes".encode()] + for view_index in range(nb_view): + view_data = np.array( + [np.zeros(nb_features) for _ in range(int(nb_examples / 2))] + + [np.ones(nb_features) for _ in + range(nb_examples - int(nb_examples / 2))]) + fake_one_indices = random_state.randint(0, int(nb_examples / 2), + int(nb_examples / 12)) + fake_zero_indices = random_state.randint(int(nb_examples / 2), + nb_examples, + int(nb_examples / 12)) + + view_data[fake_one_indices] = np.ones( + (len(fake_one_indices), nb_features)) + view_data[fake_zero_indices] = np.zeros( + (len(fake_zero_indices), nb_features)) + view_data = make_me_noisy(view_data, random_state) + views.append(view_data) + view_names.append("ViewNumber" + str(view_index)) + are_sparse.append(False) + + dataset = Dataset(views=views, labels=labels, + labels_names=label_names, view_names=view_names, + are_sparse=are_sparse, file_name="Plausible.hdf5", + path=path) labels_dictionary = {0: "No", 1: "Yes"} - return datasetFile, labels_dictionary, "Plausible" - elif NB_CLASS >= 3: - firstBound = int(datasetLength / 3) - rest = datasetLength - 2 * int(datasetLength / 3) - scndBound = 2 * int(datasetLength / 3) - thrdBound = datasetLength - CLASS_LABELS = np.array( - [0 for _ in range(firstBound)] + [1 for _ in range(firstBound)] + [2 - for - _ - in - range( - rest)]) - for viewIndex in range(nbView): - viewData = np.array( - [np.zeros(nbFeatures) for _ in range(firstBound)] + - [np.ones(nbFeatures) for _ in range(firstBound)] + - [np.ones(nbFeatures) + 1 for _ in range(rest)]) - fakeOneIndices = random_state.randint(0, firstBound, - int(datasetLength / 12)) + return dataset, labels_dictionary, "Plausible" + elif nb_class >= 3: + firstBound = int(nb_examples / 3) + rest = nb_examples - 2 * int(nb_examples / 3) + scndBound = 2 * int(nb_examples / 3) + thrdBound = nb_examples + labels = np.array( + [0 for _ in range(firstBound)] + + [1 for _ in range(firstBound)] + + [2 for _ in range(rest)] + ) + for view_index in range(nb_view): + view_data = np.array( + [np.zeros(nb_features) for _ in range(firstBound)] + + [np.ones(nb_features) for _ in range(firstBound)] + + [np.ones(nb_features) + 1 for _ in range(rest)]) + fake_one_indices = random_state.randint(0, firstBound, + int(nb_examples / 12)) fakeTwoIndices = random_state.randint(firstBound, scndBound, - int(datasetLength / 12)) - fakeZeroIndices = random_state.randint(scndBound, thrdBound, - int(datasetLength / 12)) - - viewData[fakeOneIndices] = np.ones( - (len(fakeOneIndices), nbFeatures)) - viewData[fakeZeroIndices] = np.zeros( - (len(fakeZeroIndices), nbFeatures)) - viewData[fakeTwoIndices] = np.ones( - (len(fakeTwoIndices), nbFeatures)) + 1 - viewData = makeMeNoisy(viewData, random_state) - viewDset = datasetFile.create_dataset("View" + str(viewIndex), - viewData.shape, - data=viewData.astype( - np.uint8)) - viewDset.attrs["name"] = "ViewNumber" + str(viewIndex) - viewDset.attrs["sparse"] = False - labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) - labelsDset[...] = CLASS_LABELS - labelsDset.attrs["name"] = "Labels" - labelsDset.attrs["names"] = ["No".encode(), "Yes".encode(), - "Maybe".encode()] - metaDataGrp = datasetFile.create_group("Metadata") - metaDataGrp.attrs["nbView"] = nbView - metaDataGrp.attrs["nbClass"] = 3 - metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) - datasetFile.close() - datasetFile = h5py.File(pathF + "Plausible.hdf5", "r") + int(nb_examples / 12)) + fake_zero_indices = random_state.randint(scndBound, thrdBound, + int(nb_examples / 12)) + + view_data[fake_one_indices] = np.ones( + (len(fake_one_indices), nb_features)) + view_data[fake_zero_indices] = np.zeros( + (len(fake_zero_indices), nb_features)) + view_data[fakeTwoIndices] = np.ones( + (len(fakeTwoIndices), nb_features)) + 1 + view_data = make_me_noisy(view_data, random_state) + views.append(view_data) + view_names.append("ViewNumber" + str(view_index)) + are_sparse.append(False) + dataset = Dataset(views=views, labels=labels, + labels_names=label_names, view_names=view_names, + are_sparse=are_sparse, + file_name="Plausible.hdf5", + path=path) labels_dictionary = {0: "No", 1: "Yes", 2: "Maybe"} - return datasetFile, labels_dictionary, "Plausible" - - -# def getFakeDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, random_state): -# """Was used to generateafake dataset to run tests""" -# NB_VIEW = 4 -# DATASET_LENGTH = 30 -# NB_CLASS = 2 -# VIEW_DIMENSIONS = random_state.random_integers(5, 20, NB_VIEW) -# -# DATA = dict((indx, -# np.array([ -# random_state.normal(0.0, 2, viewDimension) -# for i in np.arange(DATASET_LENGTH)])) -# for indx, viewDimension in enumerate(VIEW_DIMENSIONS)) -# -# CLASS_LABELS = random_state.random_integers(0, NB_CLASS - 1, DATASET_LENGTH) -# datasetFile = h5py.File(pathF + "Fake.hdf5", "w") -# for index, viewData in enumerate(DATA.values()): -# if index == 0: -# viewData = random_state.randint(0, 1, (DATASET_LENGTH, 300)).astype( -# np.uint8) -# # np.zeros(viewData.shape, dtype=bool)+np.ones((viewData.shape[0], viewData.shape[1]/2), dtype=bool) -# viewDset = datasetFile.create_dataset("View" + str(index), viewData.shape) -# viewDset[...] = viewData -# viewDset.attrs["name"] = "View" + str(index) -# viewDset.attrs["sparse"] = False -# elif index == 1: -# viewData = sparse.csr_matrix(viewData) -# viewGrp = datasetFile.create_group("View" + str(index)) -# dataDset = viewGrp.create_dataset("data", viewData.data.shape, data=viewData.data) -# indicesDset = viewGrp.create_dataset("indices", viewData.indices.shape, data=viewData.indices) -# indptrDset = viewGrp.create_dataset("indptr", viewData.indptr.shape, data=viewData.indptr) -# viewGrp.attrs["name"] = "View" + str(index) -# viewGrp.attrs["sparse"] = True -# viewGrp.attrs["shape"] = viewData.shape -# else: -# viewDset = datasetFile.create_dataset("View" + str(index), viewData.shape) -# viewDset[...] = viewData -# viewDset.attrs["name"] = "View" + str(index) -# viewDset.attrs["sparse"] = False -# labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) -# labelsDset[...] = CLASS_LABELS -# labelsDset.attrs["name"] = "Labels" -# -# metaDataGrp = datasetFile.create_group("Metadata") -# metaDataGrp.attrs["nbView"] = NB_VIEW -# metaDataGrp.attrs["nbClass"] = NB_CLASS -# metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) -# labels_dictionary = {0: "No", 1: "Yes"} -# datasetFile.close() -# datasetFile = h5py.File(pathF + "Fake.hdf5", "r") -# return datasetFile, labels_dictionary + return dataset, labels_dictionary, "Plausible" class DatasetError(Exception): @@ -259,7 +135,8 @@ def get_classes(labels): raise DatasetError("Dataset must have at least two different labels") -def all_asked_labels_are_available(asked_labels_names_set, available_labels_names): +def all_asked_labels_are_available(asked_labels_names_set, + available_labels_names): for askedLabelName in asked_labels_names_set: if askedLabelName in available_labels_names: pass @@ -272,9 +149,11 @@ def fill_label_names(nb_class, asked_labels_names, random_state, available_labels_names): if len(asked_labels_names) < nb_class: nb_labels_to_add = nb_class - len(asked_labels_names) - labels_names_to_choose = [available_label_name for available_label_name in - available_labels_names - if available_label_name not in asked_labels_names] + labels_names_to_choose = [available_label_name + for available_label_name + in available_labels_names + if available_label_name + not in asked_labels_names] added_labels_names = random_state.choice(labels_names_to_choose, nb_labels_to_add, replace=False) asked_labels_names = list(asked_labels_names) + list(added_labels_names) @@ -366,7 +245,8 @@ def filter_views(dataset_file, temp_dataset, views, used_indices): def copyhdf5_dataset(source_data_file, destination_data_file, source_dataset_name, destination_dataset_name, used_indices): - """Used to copy a view in a new dataset file using only the examples of usedIndices, and copying the args""" + """Used to copy a view in a new dataset file using only the examples of + usedIndices, and copying the args""" new_d_set = destination_data_file.create_dataset(destination_dataset_name, data=source_data_file.get( source_dataset_name).value[ @@ -385,10 +265,11 @@ def get_classic_db_hdf5(views, path_f, name_DB, nb_class, asked_labels_names, """Used to load a hdf5 database""" if full: dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + dataset = Dataset(hdf5_file=dataset_file) dataset_name = name_DB - labels_dictionary = dict( - (label_index, label_name.decode("utf-8")) for label_index, label_name in - enumerate(dataset_file.get("Labels").attrs["names"])) + labels_dictionary = dict((label_index, label_name) + for label_index, label_name + in dataset.get_label_names()) else: asked_labels_names = [asked_label_name.encode("utf8") for asked_label_name in asked_labels_names] @@ -429,7 +310,7 @@ def get_classic_db_hdf5(views, path_f, name_DB, nb_class, asked_labels_names, noise_std) else: pass - return dataset_file, labels_dictionary, dataset_name + return dataset, labels_dictionary, dataset_name def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, @@ -1400,3 +1281,55 @@ def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # # if i != value: # # areAllSame = False # # return areAllSame + + +# def getFakeDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, random_state): +# """Was used to generateafake dataset to run tests""" +# NB_VIEW = 4 +# DATASET_LENGTH = 30 +# NB_CLASS = 2 +# VIEW_DIMENSIONS = random_state.random_integers(5, 20, NB_VIEW) +# +# DATA = dict((indx, +# np.array([ +# random_state.normal(0.0, 2, viewDimension) +# for i in np.arange(DATASET_LENGTH)])) +# for indx, viewDimension in enumerate(VIEW_DIMENSIONS)) +# +# CLASS_LABELS = random_state.random_integers(0, NB_CLASS - 1, DATASET_LENGTH) +# datasetFile = h5py.File(pathF + "Fake.hdf5", "w") +# for index, viewData in enumerate(DATA.values()): +# if index == 0: +# viewData = random_state.randint(0, 1, (DATASET_LENGTH, 300)).astype( +# np.uint8) +# # np.zeros(viewData.shape, dtype=bool)+np.ones((viewData.shape[0], viewData.shape[1]/2), dtype=bool) +# viewDset = datasetFile.create_dataset("View" + str(index), viewData.shape) +# viewDset[...] = viewData +# viewDset.attrs["name"] = "View" + str(index) +# viewDset.attrs["sparse"] = False +# elif index == 1: +# viewData = sparse.csr_matrix(viewData) +# viewGrp = datasetFile.create_group("View" + str(index)) +# dataDset = viewGrp.create_dataset("data", viewData.data.shape, data=viewData.data) +# indicesDset = viewGrp.create_dataset("indices", viewData.indices.shape, data=viewData.indices) +# indptrDset = viewGrp.create_dataset("indptr", viewData.indptr.shape, data=viewData.indptr) +# viewGrp.attrs["name"] = "View" + str(index) +# viewGrp.attrs["sparse"] = True +# viewGrp.attrs["shape"] = viewData.shape +# else: +# viewDset = datasetFile.create_dataset("View" + str(index), viewData.shape) +# viewDset[...] = viewData +# viewDset.attrs["name"] = "View" + str(index) +# viewDset.attrs["sparse"] = False +# labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) +# labelsDset[...] = CLASS_LABELS +# labelsDset.attrs["name"] = "Labels" +# +# metaDataGrp = datasetFile.create_group("Metadata") +# metaDataGrp.attrs["nbView"] = NB_VIEW +# metaDataGrp.attrs["nbClass"] = NB_CLASS +# metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) +# labels_dictionary = {0: "No", 1: "Yes"} +# datasetFile.close() +# datasetFile = h5py.File(pathF + "Fake.hdf5", "r") +# return datasetFile, labels_dictionary \ No newline at end of file diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 3b6f3f570bb573b00dbbea44e1e0b91ad438e4ce..a0c11c76d7969f38aa73efafd2144acc9b586412 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -4,7 +4,7 @@ import unittest import h5py import numpy as np -from .utils import rm_tmp +from .utils import rm_tmp, tmp_path from ..mono_multi_view_classifiers import exec_classif @@ -130,7 +130,6 @@ class Test_InitArgumentDictionaries(unittest.TestCase): ] self.assertEqual(arguments["multiview"][0], expected_output[0]) - def test_init_argument_dictionaries_multiview_complex(self): self.multiview_classifier_arg_value = {"fake_value_2":"plif", "plaf":"plouf"} self.init_kwargs = { @@ -235,7 +234,7 @@ class Test_execBenchmark(unittest.TestCase): rm_tmp() os.mkdir("multiview_platform/tests/tmp_tests") cls.Dataset = h5py.File( - "multiview_platform/tests/tmp_tests/test_file.hdf5", "w") + tmp_path+"test_file.hdf5", "w") cls.labels = cls.Dataset.create_dataset("Labels", data=np.array([0, 1, 2])) cls.argument_dictionaries = [{"a": 4, "args": {}}] @@ -296,7 +295,7 @@ class Test_execBenchmark(unittest.TestCase): @classmethod def tearDownClass(cls): cls.Dataset.close() - path = "multiview_platform/tests/tmp_tests/" + path = tmp_path for file_name in os.listdir(path): os.remove(os.path.join(path, file_name)) os.rmdir(path) @@ -348,7 +347,7 @@ class Test_execOneBenchmark(unittest.TestCase): labels_dictionary={ 0: "a", 1: "b"}, - directory="multiview_platform/tests/tmp_tests/", + directory=tmp_path, classification_indices=( [1, 2, 3, 4], [0, 5, 6, 7, 8]), @@ -387,7 +386,7 @@ class Test_execOneBenchmark(unittest.TestCase): @classmethod def tearDown(cls): - path = "multiview_platform/tests/tmp_tests/" + path = tmp_path for file_name in os.listdir(path): dir_path = os.path.join(path, file_name) if os.path.isdir(dir_path): @@ -414,7 +413,7 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): flag, results = exec_classif.exec_one_benchmark_multicore( nb_cores=2, labels_dictionary={0: "a", 1: "b"}, - directory="multiview_platform/tests/tmp_tests/", + directory=tmp_path, classification_indices=([1, 2, 3, 4], [0, 10, 20, 30, 40]), args=cls.args, k_folds=FakeKfold(), @@ -447,7 +446,7 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): @classmethod def tearDown(cls): - path = "multiview_platform/tests/tmp_tests/" + path = tmp_path for file_name in os.listdir(path): dir_path = os.path.join(path, file_name) if os.path.isdir(dir_path): diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index 763c480cdd0b85e79aaca9df69cbda589523dde9..02fd1185f7cee09e58ee1a336d8afb1a3b4e9013 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -5,7 +5,7 @@ import h5py import numpy as np from sklearn.model_selection import StratifiedKFold -from ..utils import rm_tmp +from ..utils import rm_tmp, tmp_path from ...mono_multi_view_classifiers.monoview import exec_classif_mono_view from ...mono_multi_view_classifiers.monoview_classifiers import decision_tree @@ -18,7 +18,7 @@ class Test_initConstants(unittest.TestCase): rm_tmp() os.mkdir("multiview_platform/tests/tmp_tests") cls.datasetFile = h5py.File( - "multiview_platform/tests/tmp_tests/test.hdf5", "w") + tmp_path+"test.hdf5", "w") cls.random_state = np.random.RandomState(42) cls.args = {"classifier_name": "test_clf"} cls.X_value = cls.random_state.randint(0, 500, (10, 20)) @@ -30,7 +30,7 @@ class Test_initConstants(unittest.TestCase): np.array([1, 3, 5, 7, 9])] cls.labels_names = ["test_true", "test_false"] cls.name = "test" - cls.directory = "multiview_platform/tests/tmp_tests/test_dir/" + cls.directory = tmp_path+"test_dir/" def test_simple(cls): kwargs, \ @@ -56,11 +56,11 @@ class Test_initConstants(unittest.TestCase): @classmethod def tearDownClass(cls): - os.remove("multiview_platform/tests/tmp_tests/test.hdf5") + os.remove(tmp_path+"test.hdf5") os.rmdir( - "multiview_platform/tests/tmp_tests/test_dir/test_clf/test_dataset") - os.rmdir("multiview_platform/tests/tmp_tests/test_dir/test_clf") - os.rmdir("multiview_platform/tests/tmp_tests/test_dir") + tmp_path+"test_dir/test_clf/test_dataset") + os.rmdir(tmp_path+"test_dir/test_clf") + os.rmdir(tmp_path+"test_dir") os.rmdir("multiview_platform/tests/tmp_tests") @@ -108,7 +108,7 @@ class Test_getHPs(unittest.TestCase): cls.random_state = np.random.RandomState(42) cls.X = cls.random_state.randint(0,10,size=(10,5)) cls.y = cls.random_state.randint(0,2,size=10) - cls.output_file_name = "multiview_platform/tests/tmp_tests/" + cls.output_file_name = tmp_path cls.cv = StratifiedKFold(n_splits=2, random_state=cls.random_state) cls.nb_cores = 1 cls.metrics = [["accuracy_score", None]] diff --git a/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py b/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py index a1b1ad2f85d88f65f6b762cfa9d5c01e92e4b762..bfb134d7a039ab3400d6e2e772dfa827d6fc2ed6 100644 --- a/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py +++ b/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py @@ -18,6 +18,7 @@ class FakeDataset(): def get_nb_class(self, example_indices): return np.unique(self.labels[example_indices]) + class FakeDivCoupleClf(diversity_utils.CoupleDiversityFusionClassifier): def __init__(self, rs, classifier_names=None, diff --git a/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py b/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py index 3fb36b314d67184ed43c801ab8e8e355d9ff24d3..3c274c2880108f61d295788b56eb2e473d392742 100644 --- a/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py +++ b/multiview_platform/tests/test_multiview_classifiers/test_weighted_linear_early_fusion.py @@ -4,7 +4,7 @@ import numpy as np import h5py import os -from ..utils import rm_tmp +from ..utils import rm_tmp, tmp_path from multiview_platform.mono_multi_view_classifiers.multiview_classifiers import \ weighted_linear_early_fusion @@ -18,7 +18,7 @@ class Test_WeightedLinearEarlyFusion(unittest.TestCase): cls.view_weights = [0.5, 0.5] os.mkdir("multiview_platform/tests/tmp_tests") cls.dataset_file = h5py.File( - "multiview_platform/tests/tmp_tests/test_file.hdf5", "w") + tmp_path+"test_file.hdf5", "w") cls.labels = cls.dataset_file.create_dataset("Labels", data=np.array([0, 1, 0, 0, 1])) cls.view0_data = cls.random_state.randint(1,10,size=(5, 4)) diff --git a/multiview_platform/tests/test_utils/test_GetMultiviewDB.py b/multiview_platform/tests/test_utils/test_GetMultiviewDB.py index d237daad9cfa2d9eb6b4789b69c9669e47576575..862556e2346fa3c77a0dfe8c49418c2423e8666a 100644 --- a/multiview_platform/tests/test_utils/test_GetMultiviewDB.py +++ b/multiview_platform/tests/test_utils/test_GetMultiviewDB.py @@ -5,7 +5,7 @@ import h5py import numpy as np from ...mono_multi_view_classifiers.utils import get_multiview_db -from ..utils import rm_tmp +from ..utils import rm_tmp, tmp_path class Test_copyhdf5Dataset(unittest.TestCase): @@ -17,7 +17,7 @@ class Test_copyhdf5Dataset(unittest.TestCase): if not os.path.exists("multiview_platform/tests/tmp_tests"): os.mkdir("multiview_platform/tests/tmp_tests") cls.dataset_file = h5py.File( - "multiview_platform/tests/tmp_tests/test_copy.hdf5", "w") + tmp_path+"test_copy.hdf5", "w") cls.dataset = cls.dataset_file.create_dataset("test", data=cls.random_state.randint( 0, 100, (10, 20))) @@ -43,7 +43,7 @@ class Test_copyhdf5Dataset(unittest.TestCase): @classmethod def tearDownClass(cls): - os.remove("multiview_platform/tests/tmp_tests/test_copy.hdf5") + os.remove(tmp_path+"test_copy.hdf5") os.rmdir("multiview_platform/tests/tmp_tests") @@ -57,7 +57,7 @@ class Test_filterViews(unittest.TestCase): if not os.path.exists("multiview_platform/tests/tmp_tests"): os.mkdir("multiview_platform/tests/tmp_tests") cls.dataset_file = h5py.File( - "multiview_platform/tests/tmp_tests/test_copy.hdf5", "w") + tmp_path+"test_copy.hdf5", "w") cls.metadata_group = cls.dataset_file.create_group("Metadata") cls.metadata_group.attrs["nbView"] = 4 @@ -69,7 +69,7 @@ class Test_filterViews(unittest.TestCase): def test_simple_filter(cls): cls.temp_dataset_file = h5py.File( - "multiview_platform/tests/tmp_tests/test_copy_temp.hdf5", "w") + tmp_path+"test_copy_temp.hdf5", "w") cls.dataset_file.copy("Metadata", cls.temp_dataset_file) get_multiview_db.filter_views(cls.dataset_file, cls.temp_dataset_file, cls.views, np.arange(10)) @@ -82,7 +82,7 @@ class Test_filterViews(unittest.TestCase): def test_filter_view_and_examples(cls): cls.temp_dataset_file = h5py.File( - "multiview_platform/tests/tmp_tests/test_copy_temp.hdf5", "w") + tmp_path+"test_copy_temp.hdf5", "w") cls.dataset_file.copy("Metadata", cls.temp_dataset_file) usedIndices = cls.random_state.choice(10, 6, replace=False) get_multiview_db.filter_views(cls.dataset_file, cls.temp_dataset_file, @@ -94,8 +94,8 @@ class Test_filterViews(unittest.TestCase): @classmethod def tearDownClass(cls): - os.remove("multiview_platform/tests/tmp_tests/test_copy.hdf5") - os.remove("multiview_platform/tests/tmp_tests/test_copy_temp.hdf5") + os.remove(tmp_path+"test_copy.hdf5") + os.remove(tmp_path+"test_copy_temp.hdf5") os.rmdir("multiview_platform/tests/tmp_tests") @@ -343,8 +343,8 @@ class Test_getClassicDBhdf5(unittest.TestCase): if not os.path.exists("multiview_platform/tests/tmp_tests"): os.mkdir("multiview_platform/tests/tmp_tests") cls.dataset_file = h5py.File( - "multiview_platform/tests/tmp_tests/test_dataset.hdf5", "w") - cls.pathF = "multiview_platform/tests/tmp_tests/" + tmp_path+"test_dataset.hdf5", "w") + cls.pathF = tmp_path cls.nameDB = "test_dataset" cls.NB_CLASS = 2 cls.askedLabelsNames = ["test_label_1", "test_label_3"] @@ -450,8 +450,8 @@ class Test_getClassicDBhdf5(unittest.TestCase): @classmethod def tearDownClass(cls): os.remove( - "multiview_platform/tests/tmp_tests/test_dataset_temp_view_label_select.hdf5") - os.remove("multiview_platform/tests/tmp_tests/test_dataset.hdf5") + tmp_path+"test_dataset_temp_view_label_select.hdf5") + os.remove(tmp_path+"test_dataset.hdf5") dirs = os.listdir("multiview_platform/tests/tmp_tests") for dir in dirs: print(dir) @@ -465,7 +465,7 @@ class Test_getClassicDBcsv(unittest.TestCase): rm_tmp() if not os.path.exists("multiview_platform/tests/tmp_tests"): os.mkdir("multiview_platform/tests/tmp_tests") - cls.pathF = "multiview_platform/tests/tmp_tests/" + cls.pathF = tmp_path cls.NB_CLASS = 2 cls.nameDB = "test_dataset" cls.askedLabelsNames = ["test_label_1", "test_label_3"] @@ -571,15 +571,49 @@ class Test_getClassicDBcsv(unittest.TestCase): def tearDownClass(cls): for i in range(4): os.remove( - "multiview_platform/tests/tmp_tests/Views/test_view_" + str( + tmp_path+"Views/test_view_" + str( i) + ".csv") - os.rmdir("multiview_platform/tests/tmp_tests/Views") + os.rmdir(tmp_path+"Views") os.remove( - "multiview_platform/tests/tmp_tests/test_dataset-labels-names.csv") - os.remove("multiview_platform/tests/tmp_tests/test_dataset-labels.csv") - os.remove("multiview_platform/tests/tmp_tests/test_dataset.hdf5") + tmp_path+"test_dataset-labels-names.csv") + os.remove(tmp_path+"test_dataset-labels.csv") + os.remove(tmp_path+"test_dataset.hdf5") os.remove( - "multiview_platform/tests/tmp_tests/test_dataset_temp_view_label_select.hdf5") + tmp_path+"test_dataset_temp_view_label_select.hdf5") for file in os.listdir("multiview_platform/tests/tmp_tests"): print( file) os.rmdir("multiview_platform/tests/tmp_tests") + +class Test_get_plausible_db_hdf5(unittest.TestCase): + + @classmethod + def setUpClass(cls): + rm_tmp() + cls.path = tmp_path + cls.nb_class=3 + cls.rs = np.random.RandomState(42) + cls.nb_view=3 + cls.nb_examples = 5 + cls.nb_features = 4 + + @classmethod + def tearDownClass(cls): + rm_tmp() + + def test_simple(self): + dataset, labels_dict, name = get_multiview_db.get_plausible_db_hdf5( + "", self.path, "", nb_class=self.nb_class, random_state=self.rs, + nb_view=3, nb_examples=self.nb_examples, + nb_features=self.nb_features) + self.assertEqual(dataset.init_example_indces(), range(5)) + self.assertEqual(dataset.get_nb_class(), self.nb_class) + + def test_two_class(self): + dataset, labels_dict, name = get_multiview_db.get_plausible_db_hdf5( + "", self.path, "", nb_class=2, random_state=self.rs, + nb_view=3, nb_examples=self.nb_examples, + nb_features=self.nb_features) + self.assertEqual(dataset.init_example_indces(), range(5)) + self.assertEqual(dataset.get_nb_class(), 2) + + diff --git a/multiview_platform/tests/test_utils/test_configuration.py b/multiview_platform/tests/test_utils/test_configuration.py index 324deb20c34c7cdb8ed11f88ca5c4e30321fa8c9..53f1c6605c268bceb065f5ae3b1e2306e762eade 100644 --- a/multiview_platform/tests/test_utils/test_configuration.py +++ b/multiview_platform/tests/test_utils/test_configuration.py @@ -3,7 +3,7 @@ import unittest import yaml import numpy as np -from ..utils import rm_tmp +from ..utils import rm_tmp, tmp_path from multiview_platform.mono_multi_view_classifiers.utils import configuration @@ -12,7 +12,7 @@ class Test_get_the_args(unittest.TestCase): @classmethod def setUpClass(cls): rm_tmp() - cls.path_to_config_file = "multiview_platform/tests/tmp_tests/config_temp.yml" + cls.path_to_config_file = tmp_path+"config_temp.yml" os.mkdir("multiview_platform/tests/tmp_tests") data = {"Base":{"first_arg": 10, "second_arg":[12.5, 1e-06]}, "Classification":{"third_arg":True}} with open(cls.path_to_config_file, "w") as config_file: @@ -20,7 +20,7 @@ class Test_get_the_args(unittest.TestCase): @classmethod def tearDownClass(cls): - os.remove("multiview_platform/tests/tmp_tests/config_temp.yml") + os.remove(tmp_path+"config_temp.yml") os.rmdir("multiview_platform/tests/tmp_tests") def test_file_loading(self): diff --git a/multiview_platform/tests/test_utils/test_dataset.py b/multiview_platform/tests/test_utils/test_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..9b8c002367d69bbb402fe65f00d825c329895f66 --- /dev/null +++ b/multiview_platform/tests/test_utils/test_dataset.py @@ -0,0 +1,97 @@ +import unittest +import h5py +import numpy as np +import os + +from ..utils import rm_tmp, tmp_path + +from ...mono_multi_view_classifiers.utils import dataset + + +class Test_Dataset(unittest.TestCase): + + @classmethod + def setUpClass(cls): + rm_tmp() + os.mkdir(tmp_path) + cls.rs = np.random.RandomState(42) + cls.nb_view = 3 + cls.file_name = "test.hdf5" + cls.nb_examples = 5 + cls.nb_class = 3 + cls.views = [cls.rs.randint(0,10,size=(cls.nb_examples,7)) + for _ in range(cls.nb_view)] + cls.labels = cls.rs.randint(0,cls.nb_class,cls.nb_examples) + cls.dataset_file = h5py.File(os.path.join(tmp_path, cls.file_name)) + cls.view_names = ["ViewN" + str(index) for index in range(len(cls.views))] + cls.are_sparse = [False for _ in cls.views] + for view_index, (view_name, view, is_sparse) in enumerate( + zip(cls.view_names, cls.views, cls.are_sparse)): + view_dataset = cls.dataset_file.create_dataset("View" + str(view_index), + view.shape, + data=view) + view_dataset.attrs["name"] = view_name + view_dataset.attrs["sparse"] = is_sparse + labels_dataset = cls.dataset_file.create_dataset("Labels", + shape=cls.labels.shape, + data=cls.labels) + cls.labels_names = [str(index) for index in np.unique(cls.labels)] + labels_dataset.attrs["names"] = [label_name.encode() + for label_name in cls.labels_names] + meta_data_grp = cls.dataset_file.create_group("Metadata") + meta_data_grp.attrs["nbView"] = len(cls.views) + meta_data_grp.attrs["nbClass"] = len(np.unique(cls.labels)) + meta_data_grp.attrs["datasetLength"] = len(cls.labels) + + @classmethod + def tearDownClass(cls): + cls.dataset_file.close() + rm_tmp() + + def test_simple(self): + dataset_object = dataset.Dataset(hdf5_file=self.dataset_file) + + def test_init_example_indices(self): + example_indices = dataset.Dataset(hdf5_file=self.dataset_file).init_example_indces() + self.assertEqual(example_indices, range(self.nb_examples)) + example_indices = dataset.Dataset(hdf5_file=self.dataset_file).init_example_indces([0,1,2]) + self.assertEqual(example_indices, [0,1,2]) + + def test_get_v(self): + view = dataset.Dataset(hdf5_file=self.dataset_file).get_v(0) + np.testing.assert_array_equal(view, self.views[0]) + view = dataset.Dataset(hdf5_file=self.dataset_file).get_v(1, [0,1,2]) + np.testing.assert_array_equal(view, self.views[1][[0,1,2,], :]) + + def test_get_nb_class(self): + nb_class = dataset.Dataset(hdf5_file=self.dataset_file).get_nb_class() + self.assertEqual(nb_class, self.nb_class) + nb_class = dataset.Dataset(hdf5_file=self.dataset_file).get_nb_class([0]) + self.assertEqual(nb_class, 1) + + def test_from_scratch(self): + dataset_object = dataset.Dataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + file_name="from_scratch"+self.file_name, + view_names=self.view_names, + path=tmp_path, + labels_names=self.labels_names) + nb_class = dataset_object.get_nb_class() + self.assertEqual(nb_class, self.nb_class) + example_indices = dataset_object.init_example_indces() + self.assertEqual(example_indices, range(self.nb_examples)) + view = dataset_object.get_v(0) + np.testing.assert_array_equal(view, self.views[0]) + + def test_get_view_dict(self): + dataset_object = dataset.Dataset(views=self.views, + labels=self.labels, + are_sparse=self.are_sparse, + file_name="from_scratch" + self.file_name, + view_names=self.view_names, + path=tmp_path, + labels_names=self.labels_names) + self.assertEqual(dataset_object.get_view_dict(), {"ViewN0":0, + "ViewN1": 1, + "ViewN2": 2,}) \ No newline at end of file diff --git a/multiview_platform/tests/test_utils/test_execution.py b/multiview_platform/tests/test_utils/test_execution.py index 7079200f75988903d414e529f0f6bfb61fe6053d..f5b9384a42de4256239555ffc298e02be97dc9fc 100644 --- a/multiview_platform/tests/test_utils/test_execution.py +++ b/multiview_platform/tests/test_utils/test_execution.py @@ -3,7 +3,7 @@ import unittest import numpy as np -from ..utils import rm_tmp +from ..utils import rm_tmp, tmp_path from ...mono_multi_view_classifiers.utils import execution @@ -79,26 +79,26 @@ class Test_initRandomState(unittest.TestCase): def setUp(self): rm_tmp() - os.mkdir("multiview_platform/tests/tmp_tests/") + os.mkdir(tmp_path) def tearDown(self): - os.rmdir("multiview_platform/tests/tmp_tests/") + os.rmdir(tmp_path) def test_random_state_42(self): randomState_42 = np.random.RandomState(42) randomState = execution.init_random_state("42", - "multiview_platform/tests/tmp_tests/") - os.remove("multiview_platform/tests/tmp_tests/random_state.pickle") + tmp_path) + os.remove(tmp_path+"random_state.pickle") np.testing.assert_array_equal(randomState.beta(1, 100, 100), randomState_42.beta(1, 100, 100)) def test_random_state_pickle(self): randomState_to_pickle = execution.init_random_state(None, - "multiview_platform/tests/tmp_tests/") + tmp_path) pickled_randomState = execution.init_random_state( - "multiview_platform/tests/tmp_tests/random_state.pickle", - "multiview_platform/tests/tmp_tests/") - os.remove("multiview_platform/tests/tmp_tests/random_state.pickle") + tmp_path+"random_state.pickle", + tmp_path) + os.remove(tmp_path+"random_state.pickle") np.testing.assert_array_equal(randomState_to_pickle.beta(1, 100, 100), pickled_randomState.beta(1, 100, 100)) diff --git a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py index 6b207372e6bf3fee23b5c1cf005b427d77ab0044..17e5290108adbd1e64fb0db9ab180d0f87efd990 100644 --- a/multiview_platform/tests/test_utils/test_hyper_parameter_search.py +++ b/multiview_platform/tests/test_utils/test_hyper_parameter_search.py @@ -5,7 +5,7 @@ import h5py import numpy as np from sklearn.model_selection import StratifiedKFold -from ..utils import rm_tmp +from ..utils import rm_tmp, tmp_path from ...mono_multi_view_classifiers.utils import hyper_parameter_search from ...mono_multi_view_classifiers.multiview_classifiers import weighted_linear_early_fusion @@ -19,7 +19,7 @@ class Test_randomized_search(unittest.TestCase): cls.view_weights = [0.5, 0.5] os.mkdir("multiview_platform/tests/tmp_tests") cls.dataset_file = h5py.File( - "multiview_platform/tests/tmp_tests/test_file.hdf5", "w") + tmp_path+"test_file.hdf5", "w") cls.labels = cls.dataset_file.create_dataset("Labels", data=np.array( [0, 1, 0, 0, 1, 0, 1, 0, 0, 1, ])) @@ -53,6 +53,6 @@ class Test_randomized_search(unittest.TestCase): def test_simple(self): best_params, test_folds_preds = hyper_parameter_search.randomized_search( - self.dataset_file, self.labels.value, "multiview", self.random_state, "multiview_platform/tests/tmp_tests/", + self.dataset_file, self.labels.value, "multiview", self.random_state, tmp_path, weighted_linear_early_fusion, "WeightedLinearEarlyFusion", self.k_folds, 1, ["accuracy_score", None], 2, {}, learning_indices=self.learning_indices) diff --git a/multiview_platform/tests/utils.py b/multiview_platform/tests/utils.py index 5766b68885035b216a141caa8e273c0df74583ae..cc77a9be8b4b960ceb86506aca2ab2c1c4e2f76e 100644 --- a/multiview_platform/tests/utils.py +++ b/multiview_platform/tests/utils.py @@ -1,9 +1,12 @@ import os + +tmp_path = "multiview_platform/tests/tmp_tests/" + def rm_tmp(): try: - for file_name in os.listdir("multiview_platform/tests/tmp_tests"): - os.remove(os.path.join("multiview_platform/tests/tmp_tests", file_name)) - os.rmdir("multiview_platform/tests/tmp_tests") + for file_name in os.listdir(tmp_path): + os.remove(os.path.join(tmp_path, file_name)) + os.rmdir(tmp_path) except: pass