diff --git a/.gitignore b/.gitignore index 321544b79e5399b5cd231d076b48e588f2f64631..8f15990a6d87c5fa581ed09c30e77d9f316047f0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,5 @@ TODO *.pyc -.idea/** -ipynb/.ipynb_checkpoints/** -docs/source/monomulti/.ipynb_checkpoints/** results/* data/* Data/* diff --git a/examples/data/doc_summit.hdf5 b/examples/data/doc_summit.hdf5 deleted file mode 100644 index 68e25ef4604908db4631950088afa7e64acacc7f..0000000000000000000000000000000000000000 Binary files a/examples/data/doc_summit.hdf5 and /dev/null differ diff --git a/multiview_platform/datasets/__init__.py b/multiview_platform/datasets/__init__.py deleted file mode 100644 index 8cfb3abd3fb6871b4c6d41cbce518949ed192e19..0000000000000000000000000000000000000000 --- a/multiview_platform/datasets/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from multiview_platform.datasets.base import * -from multiview_platform.datasets.data_sample import DataSample, Metriclearn_array diff --git a/multiview_platform/datasets/base.py b/multiview_platform/datasets/base.py deleted file mode 100644 index e559b86673672595d1df912c6db85a3e46bcb6eb..0000000000000000000000000000000000000000 --- a/multiview_platform/datasets/base.py +++ /dev/null @@ -1,204 +0,0 @@ -from __future__ import print_function -import pickle -import numpy as np -import numpy.ma as ma -from multiview_platform.datasets.data_sample import DataSample -from six.moves import cPickle as pickle #for performance -import numpy as np - - -def save_dict(di_, filename_): - with open(filename_, 'wb') as f: - pickle.dump(di_, f) - -def load_dict(filename_): - with open(filename_, 'rb') as f: - ret_di = pickle.load(f) - return ret_di - - -def load_data(address, output='array', pickle=False): - if output.startswith(('array')): - views = np.empty((len(address)), dtype=object) - else: - views = {} - i = 0 - nb_samples, nb_features = _determine_dimensions(address) - for addr in address: - data = _load_view_sample(addr, nb_samples , nb_features[i], pickle=pickle) - views[i] = data - i += 1 - return DataSample(data=views) - -def _determine_dimensions(address): - nb_features = [] - nb_samples = 0 - nb_sample_max = -1 - for adr in address: - try: - f = open(adr, "r") - line = f.readline() - nb_samples += 1 - while line : - line = f.readline() - l = line.split() - nb_samples += 1 - nb_features.append(len(l)) - line = f.readline() - if nb_sample_max < nb_samples: - nb_sample_max = nb_samples - f.close() - except IOError: - raise IOError("file adr can't be open") - return nb_sample_max, nb_features - -def _load_view_sample(adr, nb_samples, nb_features, pickle=False): - """Load a sample from file and returns a dictionary - (word,count) - - - Input: - - :param lrows: number or list of rows, - a list of strings if partial=True; - otherwise, based on pref if version="classic" or - "prefix", fact otherwise - :type lrows: int or list of int - :param lcolumns: number or list of columns - a list of strings if partial=True ; - otherwise, based on suff if version="classic" or "suffix", - fact otherwise - :type lcolumns: int or list of int - :param string version: (default = "classic") version name - :param boolean partial: (default value = False) build of partial - if True partial dictionaries are loaded based - on nrows and lcolumns - - - Output: - - :returns: nbL , nbEx , dsample , dpref , dsuff , dfact - :rtype: int , int , dict , dict , dict , dict - - - :Example: - - Let's say you are interested in the samples 10, 25, and 50, and want to - know their class name. - - >>> from metriclearning.datasets.base import load_data_sample - >>> from metriclearning.tests.datasets.get_dataset_path import get_dataset_path - >>> train_file = '3.pautomac_light.train' # '4.spice.train' - >>> data = load_data_sample(adr=get_dataset_path(train_file)) - >>> data.nbL - 4 - >>> data.nbEx - 5000 - >>> data.data - Splearn_array([[ 3., 0., 3., ..., -1., -1., -1.], - [ 3., 3., -1., ..., -1., -1., -1.], - [ 3., 2., 0., ..., -1., -1., -1.], - ..., - [ 3., 1., 3., ..., -1., -1., -1.], - [ 3., 0., 3., ..., -1., -1., -1.], - [ 3., 3., 1., ..., -1., -1., -1.]]) - - """ - #nb_sample, max_length = _read_dimension(adr=adr) - f = open(adr, "r") - line = f.readline() - l = line.split() - nbEx = int(l[0]) - nbL = int(l[1]) - line = f.readline() - data1 = np.zeros((nb_samples, nb_features), dtype=np.float) - data1 += np.NAN - datatrue = np.ones((nb_samples, nb_features), dtype=np.bool) - i = 0 - while line: - l = line.split() - # w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]]) - # dsample[w] = dsample[w] + 1 if w in dsample else 1 - # traitement du mot vide pour les préfixes, suffixes et facteurs - w = [float(x) for x in l[0:]] - data1[i, :len(w)] = w - line = f.readline() - i += 1 - if i > nbEx: - raise IndexError("dimension is not well defined") - masint= np.isnan(data1) - # masint = np.logical_not(masint) - madata1 = ma.MaskedArray(data1, masint) - f.close() - - if pickle: - _create_pickle_files(adr=adr, dsample=madata1) - return madata1 - -# def _read_dimension(adr): -# f = open(adr, "r") -# line = f.readline() -# l = line.split() -# nbEx = int(l[0]) -# nbL = int(l[1]) -# line = f.readline() -# max_length = 0 -# nb_sample = 0 -# while line: -# l = line.split() -# nb_sample += 1 -# length = int(l[0]) -# if max_length < length: -# max_length = length -# line = f.readline() -# f.close() -# if nb_sample != nbEx: -# raise ValueError("check imput file, metadata " + str(nbEx) + -# "do not match number of samples " + str(nb_sample)) -# return nb_sample , max_length - -# def _load_file_1lecture(adr, pickle=False): -# dsample = {} # dictionary (word,count) -# f = open(adr, "r") -# line = f.readline() -# l = line.split() -# nbEx = int(l[0]) -# nbL = int(l[1]) -# line = f.readline() -# data1 = np.zeros((0,0)) -# length = 0 -# while line: -# l = line.split() -# # w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]]) -# # dsample[w] = dsample[w] + 1 if w in dsample else 1 -# # traitement du mot vide pour les préfixes, suffixes et facteurs -# w = [] if int(l[0]) == 0 else [int(x) for x in l[1:]] -# word = np.array(w, ndmin=2, dtype=np.uint32) -# diff = abs(int(l[0]) - length) -# if len(w) > length and not np.array_equal(data1, np.zeros((0,0))): -# data1 = _add_empty(data1, diff) -# elif word.shape[0] < length and not np.array_equal(data1, np.zeros((0,0))): -# word = _add_empty(word, diff) -# -# if np.array_equal(data1, np.zeros((0,0))): -# data1 = word -# else: -# data1 = np.concatenate((data1, word), axis=0) -# length = data1.shape[1] -# line = f.readline() -# -# f.close() -# if pickle: -# _create_pickle_files(adr=adr, dsample=dsample) -# return nbL, nbEx, data1 - - -# def _add_empty(data, diff): -# empty = np.zeros((data.shape[0], diff)) -# empty += -1 -# data = np.concatenate((data, empty), axis=1) -# return data - - -def _create_pickle_files(self, adr, dsample): - f = open(adr + ".sample.pkl", "wb") - pickle.dump(dsample, f) - f.close() diff --git a/multiview_platform/datasets/data_sample.py b/multiview_platform/datasets/data_sample.py deleted file mode 100644 index 833528dae22b84ebe45a551d4f4adb64fc4af3e2..0000000000000000000000000000000000000000 --- a/multiview_platform/datasets/data_sample.py +++ /dev/null @@ -1,161 +0,0 @@ -# -*- coding: utf-8 -*- - -"""This module contains the DataSample class and Splearn_array class -The DataSample class encapsulates a sample 's components -nbL and nbEx numbers, -Splearn_array class inherit from numpy ndarray and contains a 2d data ndarray -with the shape - -==== ==== ==== ==== ==== -x x x x -1 -x x x x x -x x -1 -1 -1 -x -1 -1 -1 -1 --1 -1 -1 -1 -1 -==== ==== ==== ==== ==== - -where -1 a indicates a empty cell, -the number nbL and nbEx and , the fourth dictionaries for sample, -prefix, suffix and factor where they are computed -""" -import numpy as np -import numpy.ma as ma - - -class MultiView_array(ma.MaskedArray): - """Splearn_array inherit from numpy ndarray - - :Example: - - >>> from multiview_platform.datasets.base import load_data - >>> from multiview_platform.datasets.get_dataset_path import get_dataset_path - >>> train_file = '' # '4.spice.train' - >>> data = load_data(adr=get_dataset_path(train_file)) - >>> print(data.__class__) - >>> data.data - - """ - def __new__(cls, data): - shapes_int = [] - index = 0 - new_data = data - shape_ext = len(data) - thekeys = None - if isinstance(data, dict): - shape_ext = len(data) - for key, dat_values in data.items(): - new_data = cls._populate_new_data(index, dat_values, new_data) - shapes_int.append(dat_values.shape[0]) - index += 1 - thekeys = data.keys() - - if isinstance(data, np.ndarray): - shape_ext = data.shape[0] - for dat_values in data: - shapes_int.append(dat_values.shape[0]) - new_data = cls._populate_new_data(index, dat_values, new_data) - index += 1 - # obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls) - # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask) - obj = ma.masked_array(new_data.data, new_data.mask).view(cls) - obj.shapes_int = shapes_int - obj.shape_ext = shape_ext - obj.keys = thekeys - return obj - - - @staticmethod - def _populate_new_data(index, dat_values, new_data): - if index == 0: - if isinstance(dat_values, ma.MaskedArray): - new_data = dat_values - else: - new_data = dat_values.view(ma.MaskedArray) # ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #( - new_data.mask = ma.nomask - else: - if isinstance(dat_values, ma.MaskedArray): - new_data = ma.hstack((new_data, dat_values)) - else: - new_data = ma.hstack((new_data, dat_values.view(ma.MaskedArray) ) ) # ma.masked_array(dat_values, mask=ma.nomask - return new_data - - def __array_finalize__(self, obj): - if obj is None: return - super(MultiView_array, self).__array_finalize__(obj) - self.shapes_int = getattr(obj, 'shapes_int', None) - self.shape_ext = getattr(obj, 'shape_ext', None) - self.keys = getattr(obj, 'keys', None) - - def getCol(self, view, col): - start = np.sum(np.asarray(self.shapes_int[0: view])) - return self.data[start+col, :] - - def getView(self, view): - start = np.sum(np.asarray(self.shapes_int[0: view])) - stop = start + self.shapes_int[view] - return self.data[start:stop, :] - - def getRaw(self, view, raw): - start = np.sum(np.asarray(self.shapes_int[0: view])) - stop = np.sum(np.asarray(self.shapes_int[0: view+1])) - return self.data[start:stop, raw] - -class DataSample(dict): - """ A DataSample instance - - :Example: - - >>> from multiview_platform.datasets.base import load_data - >>> from multiview_platform.datasets.get_dataset_path import get_dataset_path - >>> train_file = '' # '4.spice.train' - >>> data = load_data_sample(adr=get_dataset_path(train_file)) - >>> print - (data.__class__) - - >>> data.data - - - Input: - - :param string adr: adresse and name of the loaden file - :param string type: (default value = 'SPiCe') indicate - the structure of the file - :param lrows: number or list of rows, - a list of strings if partial=True; - otherwise, based on self.pref if version="classic" or - "prefix", self.fact otherwise - :type lrows: int or list of int - :param lcolumns: number or list of columns - a list of strings if partial=True ; - otherwise, based on self.suff if version="classic" or "suffix", - self.fact otherwise - :type lcolumns: int or list of int - :param string version: (default = "classic") version name - :param boolean partial: (default value = False) build of partial - - """ - - def __init__(self, data=None, **kwargs): - - # The dictionary that contains the sample - super(DataSample, self).__init__(kwargs) - self._data = None # Metriclearn_array(np.zeros((0,0))) - if data is not None: - self._data = MultiView_array(data) - - - @property - def data(self): - """Metriclearn_array""" - - return self._data - - @data.setter - def data(self, data): - if isinstance(data, (MultiView_array, np.ndarray, ma.MaskedArray, np.generic)): - self._data = data - else: - raise TypeError("sample should be a MultiView_array.") - - - - diff --git a/multiview_platform/datasets/get_dataset_path.py b/multiview_platform/datasets/get_dataset_path.py deleted file mode 100644 index f533aa84910b5ee415db222072d88b66620754ba..0000000000000000000000000000000000000000 --- a/multiview_platform/datasets/get_dataset_path.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- - - -from __future__ import print_function, division - -import os - - -def get_dataset_path(filename): - """Return the absolute path of a reference dataset for tests - - - Input parameter: - - :param str filename: File name of the file containing reference data - for tests (which must be in ``skgilearn/tests/datasets/``) - - - Output parameters: - - :returns: The absolute path where the file with name **filename** is stored - :rtype: str - - """ - - datasets_path = os.path.dirname(os.path.abspath(__file__)) - return os.path.join(datasets_path, filename) diff --git a/multiview_platform/declare_classifier.py b/multiview_platform/declare_classifier.py deleted file mode 100644 index 9d83f06af5bc53bc01c11e949ff96cdb6be7b7bf..0000000000000000000000000000000000000000 --- a/multiview_platform/declare_classifier.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -import importlib -import inspect - - -class ClassierMakerMultiViewPlatform(): - _benchmark = {"monoview": - {"path_classifier": 'multiview_platform/mono_multi_view_classifier/monoview_classifiers'}, - "multiview": - {"path_classifier_multi": 'multiview_platform/mono_multi_view_classifier/multiview_classifier'}} - - - def __init__(self, classifier_names, classifier_modules=None, classifier_files=None, mod='monoview'): - if classifier_files is None and classifier_names.size != classifier_modules.size: - raise ValueError("attr classifier_names and classifier_modules should have same size") - if classifier_modules is None and classifier_names.size != classifier_files.size: - raise ValueError("attr classifier_names and classifier_files should have same size") - - if classifier_files is None: - for classifier, module in zip(classifier_names, classifier_modules): - my_instance, my_module = self._check_classifier_install - - self._create_class(my_instance, my_module) - - - def _check_classifier_install(self, classifier, module): - try: - my_module = importlib.import_module(module) - except Exception: - raise("the module %d can't be imported" % module) - try: - my_instance = getattr(my_module, classifier) - except AttributeError: - raise AttributeError("The class %d is not in %d" % classifier %module) - return my_instance, my_module - - def _create_class(self, classifier, module): - if mod.startswith('monoview'): - directory = self._benchmark[mod]["path_classifier"] - - - def _get_module_name(self, mymodule): - for name in dir(mymodule): - att = getattr(mymodule, name) - try: - getattr(att, "__module__") - if att.__module__.startswith(mymodule.__name__): - if inspect.isclass(att): - if att == name: - return name - except Exception: - return None - return None \ No newline at end of file diff --git a/multiview_platform/versions.py b/multiview_platform/versions.py deleted file mode 100644 index b60d98541da38a8e2139cc4d4671e302f4c05ced..0000000000000000000000000000000000000000 --- a/multiview_platform/versions.py +++ /dev/null @@ -1,96 +0,0 @@ -# Author-Info -__author__ = "Baptiste Bauvin" -__status__ = "Prototype" # Production, Development, Prototype - - -def test_versions(): - """Used to test if all prerequisites are installed""" - is_up_to_date = True - to_install = [] - - # try: - # import sys - # except ImportError: - # raise - # - # try: - # import cvxopt - # except ImportError: - # is_up_to_date = False - # to_install.append("cvxopt") - # - # try: - # import pyscm - # except ImportError: - # is_up_to_date = False - # to_install.append("pyscm") - # - # try: - # import numpy - # except ImportError: - # is_up_to_date = False - # to_install.append("numpy") - # - # try: - # import scipy - # except ImportError: - # is_up_to_date = False - # to_install.append("scipy") - # - # try: - # import matplotlib - # except ImportError: - # is_up_to_date = False - # to_install.append("matplotlib") - # - # try: - # import sklearn - # except ImportError: - # is_up_to_date = False - # to_install.append("sklearn") - # - # try: - # import logging - # except ImportError: - # is_up_to_date = False - # to_install.append("logging") - # - # try: - # import joblib - # except ImportError: - # is_up_to_date = False - # to_install.append("joblib") - # - # try: - # import argparse - # except ImportError: - # is_up_to_date = False - # to_install.append("argparse") - # - # try: - # import h5py # - # except ImportError: - # is_up_to_date = False - # to_install.append("h5py") - # - # # try: - # # import graphviz # - # # except ImportError: - # # is_up_to_date = False - # # to_install.append("graphviz") - # - # try: - # import pickle # - # except ImportError: - # is_up_to_date = False - # to_install.append("pickle") - # - # if not is_up_to_date: - # print( - # "You can't run at the moment, please install the following modules : \n" + "\n".join( - # to_install)) - # quit() - - -if __name__ == "__main__": - test_versions()