Removed

140a9cd7 · Baptiste Bauvin · 0234bf1b · 140a9cd7 · 0234bf1b · 0234bf1b
Commit 140a9cd7 authored 5 years ago by Baptiste Bauvin
--- a/.gitignore
+++ b/.gitignore
 TODO
 *.pyc
-.idea/**
-ipynb/.ipynb_checkpoints/**
-docs/source/monomulti/.ipynb_checkpoints/**
 results/*
 data/*
 Data/*

--- a/examples/data/doc_summit.hdf5
+++ b/examples/data/doc_summit.hdf5
--- a/multiview_platform/datasets/__init__.py
+++ b/multiview_platform/datasets/__init__.py
-from multiview_platform.datasets.base import *
-from multiview_platform.datasets.data_sample import DataSample, Metriclearn_array
--- a/multiview_platform/datasets/base.py
+++ b/multiview_platform/datasets/base.py
-from __future__ import print_function
-import pickle
-import numpy as np
-import numpy.ma as ma
-from multiview_platform.datasets.data_sample import DataSample
-from six.moves import cPickle as pickle #for performance
-import numpy as np
-def save_dict(di_, filename_):
-    with open(filename_, 'wb') as f:
-        pickle.dump(di_, f)
-def load_dict(filename_):
-    with open(filename_, 'rb') as f:
-        ret_di = pickle.load(f)
-    return ret_di
-def load_data(address, output='array', pickle=False):
-    if output.startswith(('array')):
-        views = np.empty((len(address)), dtype=object)
-    else:
-        views = {}
-    i = 0
-    nb_samples, nb_features = _determine_dimensions(address)
-    for addr in address:
-        data = _load_view_sample(addr, nb_samples , nb_features[i], pickle=pickle)
-        views[i] = data
-        i += 1
-    return DataSample(data=views)
-def _determine_dimensions(address):
-    nb_features = []
-    nb_samples = 0
-    nb_sample_max = -1
-    for adr in address:
-      try:
-          f = open(adr, "r")
-          line = f.readline()
-          nb_samples += 1
-          while line :
-              line = f.readline()
-              l = line.split()
-              nb_samples += 1
-              nb_features.append(len(l))
-              line = f.readline()
-          if nb_sample_max < nb_samples:
-              nb_sample_max = nb_samples
-          f.close()
-      except IOError:
-          raise IOError("file adr can't be open")
-    return nb_sample_max, nb_features
-def _load_view_sample(adr, nb_samples, nb_features, pickle=False):
-    """Load a sample from file and returns a dictionary
-    (word,count)
-    - Input:
-    :param lrows: number or list of rows,
-           a list of strings if partial=True;
-           otherwise, based on pref if version="classic" or
-           "prefix", fact otherwise
-    :type lrows: int or list of int
-    :param lcolumns: number or list of columns
-            a list of strings if partial=True ;
-            otherwise, based on suff if version="classic" or "suffix",
-            fact otherwise
-    :type lcolumns: int or list of int
-    :param string version: (default = "classic") version name
-    :param boolean partial: (default value = False) build of partial
-           if True partial dictionaries are loaded based
-           on nrows and lcolumns
-    - Output:
-    :returns:  nbL , nbEx , dsample , dpref , dsuff  , dfact
-    :rtype: int , int , dict , dict , dict  , dict
-    :Example:
-    Let's say you are interested in the samples 10, 25, and 50, and want to
-    know their class name.
-    >>> from metriclearning.datasets.base import load_data_sample
-    >>> from metriclearning.tests.datasets.get_dataset_path import get_dataset_path
-    >>> train_file = '3.pautomac_light.train' # '4.spice.train'
-    >>> data = load_data_sample(adr=get_dataset_path(train_file))
-    >>> data.nbL
-    4
-    >>> data.nbEx
-    5000
-    >>> data.data
-    Splearn_array([[ 3.,  0.,  3., ..., -1., -1., -1.],
-           [ 3.,  3., -1., ..., -1., -1., -1.],
-           [ 3.,  2.,  0., ..., -1., -1., -1.],
-           ...,
-           [ 3.,  1.,  3., ..., -1., -1., -1.],
-           [ 3.,  0.,  3., ..., -1., -1., -1.],
-           [ 3.,  3.,  1., ..., -1., -1., -1.]])
-    """
-    #nb_sample, max_length = _read_dimension(adr=adr)
-    f = open(adr, "r")
-    line = f.readline()
-    l = line.split()
-    nbEx = int(l[0])
-    nbL = int(l[1])
-    line = f.readline()
-    data1 = np.zeros((nb_samples, nb_features), dtype=np.float)
-    data1 += np.NAN
-    datatrue = np.ones((nb_samples, nb_features), dtype=np.bool)
-    i = 0
-    while line:
-        l = line.split()
-        # w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]])
-        # dsample[w] = dsample[w] + 1 if w in dsample else 1
-        # traitement du mot vide pour les préfixes, suffixes et facteurs
-        w = [float(x) for x in l[0:]]
-        data1[i, :len(w)] = w
-        line = f.readline()
-        i += 1
-        if i > nbEx:
-            raise IndexError("dimension is not well defined")
-    masint= np.isnan(data1)
-    # masint = np.logical_not(masint)
-    madata1 = ma.MaskedArray(data1, masint)
-    f.close()
-    if pickle:
-        _create_pickle_files(adr=adr, dsample=madata1)
-    return madata1
-# def _read_dimension(adr):
-#     f = open(adr, "r")
-#     line = f.readline()
-#     l = line.split()
-#     nbEx = int(l[0])
-#     nbL = int(l[1])
-#     line = f.readline()
-#     max_length = 0
-#     nb_sample = 0
-#     while line:
-#         l = line.split()
-#         nb_sample += 1
-#         length = int(l[0])
-#         if max_length < length:
-#             max_length = length
-#         line = f.readline()
-#     f.close()
-#     if nb_sample != nbEx:
-#         raise ValueError("check imput file, metadata " + str(nbEx) +
-#                          "do not match number of samples " + str(nb_sample))
-#     return nb_sample , max_length
-# def _load_file_1lecture(adr, pickle=False):
-#     dsample = {}  # dictionary (word,count)
-#     f = open(adr, "r")
-#     line = f.readline()
-#     l = line.split()
-#     nbEx = int(l[0])
-#     nbL = int(l[1])
-#     line = f.readline()
-#     data1 = np.zeros((0,0))
-#     length = 0
-#     while line:
-#         l = line.split()
-#         # w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]])
-#         # dsample[w] = dsample[w] + 1 if w in dsample else 1
-#         # traitement du mot vide pour les préfixes, suffixes et facteurs
-#         w = [] if int(l[0]) == 0 else [int(x) for x in l[1:]]
-#         word = np.array(w, ndmin=2, dtype=np.uint32)
-#         diff = abs(int(l[0]) - length)
-#         if len(w) > length and not np.array_equal(data1, np.zeros((0,0))):
-#             data1 = _add_empty(data1, diff)
-#         elif word.shape[0] < length and not np.array_equal(data1, np.zeros((0,0))):
-#             word = _add_empty(word, diff)
-#
-#         if np.array_equal(data1, np.zeros((0,0))):
-#             data1 = word
-#         else:
-#             data1 = np.concatenate((data1, word), axis=0)
-#         length = data1.shape[1]
-#         line = f.readline()
-#
-#     f.close()
-#     if pickle:
-#         _create_pickle_files(adr=adr, dsample=dsample)
-#     return nbL, nbEx, data1
-# def _add_empty(data, diff):
-#     empty = np.zeros((data.shape[0], diff))
-#     empty += -1
-#     data = np.concatenate((data, empty), axis=1)
-#     return data
-def _create_pickle_files(self, adr, dsample):
-    f = open(adr + ".sample.pkl", "wb")
-    pickle.dump(dsample, f)
-    f.close()
--- a/multiview_platform/datasets/data_sample.py
+++ b/multiview_platform/datasets/data_sample.py
-# -*- coding: utf-8 -*-
-"""This module contains the DataSample class and Splearn_array class
-The DataSample class encapsulates a sample 's components
-nbL and nbEx numbers,
-Splearn_array class inherit from numpy ndarray and contains a 2d data ndarray
-with the shape
-==== ====  ====  ====  ====
-x    x     x     x     -1
-x    x     x     x     x
-x    x     -1    -1    -1
-x    -1    -1    -1    -1
-1   -1    -1    -1    -1
-==== ====  ====  ====  ====
-where -1 a indicates a empty cell,
-the number nbL and nbEx and , the fourth dictionaries for sample,
-prefix, suffix and factor where they are computed
-"""
-import numpy as np
-import numpy.ma as ma
-class MultiView_array(ma.MaskedArray):
-    """Splearn_array inherit from numpy ndarray
-    :Example:
-    >>> from multiview_platform.datasets.base import load_data
-    >>> from multiview_platform.datasets.get_dataset_path import get_dataset_path
-    >>> train_file = '' # '4.spice.train'
-    >>> data = load_data(adr=get_dataset_path(train_file))
-    >>> print(data.__class__)
-    >>> data.data
-    """
-    def __new__(cls, data):
-        shapes_int = []
-        index = 0
-        new_data = data
-        shape_ext = len(data)
-        thekeys = None
-        if isinstance(data, dict):
-            shape_ext = len(data)
-            for key, dat_values in data.items():
-                new_data = cls._populate_new_data(index, dat_values, new_data)
-                shapes_int.append(dat_values.shape[0])
-                index += 1
-            thekeys = data.keys()
-        if isinstance(data, np.ndarray):
-            shape_ext = data.shape[0]
-            for dat_values in data:
-                shapes_int.append(dat_values.shape[0])
-                new_data = cls._populate_new_data(index, dat_values, new_data)
-                index += 1
-        # obj =   ma.MaskedArray.__new(new_data)   # new_data.view()  a.MaskedArray(new_data, mask=new_data.mask).view(cls)
-        # bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask)
-        obj = ma.masked_array(new_data.data, new_data.mask).view(cls)
-        obj.shapes_int = shapes_int
-        obj.shape_ext = shape_ext
-        obj.keys = thekeys
-        return obj
-    @staticmethod
-    def _populate_new_data(index, dat_values, new_data):
-        if index == 0:
-            if isinstance(dat_values, ma.MaskedArray):
-                new_data = dat_values
-            else:
-                new_data = dat_values.view(ma.MaskedArray) #  ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #(
-                new_data.mask = ma.nomask
-        else:
-            if isinstance(dat_values, ma.MaskedArray):
-                new_data = ma.hstack((new_data, dat_values))
-            else:
-                new_data = ma.hstack((new_data,  dat_values.view(ma.MaskedArray) ) ) #  ma.masked_array(dat_values, mask=ma.nomask
-        return new_data
-    def __array_finalize__(self, obj):
-        if obj is None: return
-        super(MultiView_array, self).__array_finalize__(obj)
-        self.shapes_int = getattr(obj, 'shapes_int', None)
-        self.shape_ext = getattr(obj, 'shape_ext', None)
-        self.keys = getattr(obj, 'keys', None)
-    def getCol(self, view, col):
-        start = np.sum(np.asarray(self.shapes_int[0: view]))
-        return self.data[start+col, :]
-    def getView(self, view):
-        start = np.sum(np.asarray(self.shapes_int[0: view]))
-        stop = start + self.shapes_int[view]
-        return self.data[start:stop, :]
-    def getRaw(self, view, raw):
-        start = np.sum(np.asarray(self.shapes_int[0: view]))
-        stop = np.sum(np.asarray(self.shapes_int[0: view+1]))
-        return self.data[start:stop, raw]
-class DataSample(dict):
-    """ A DataSample instance
-    :Example:
-    >>> from multiview_platform.datasets.base import load_data
-    >>> from multiview_platform.datasets.get_dataset_path import get_dataset_path
-    >>> train_file = '' # '4.spice.train'
-    >>> data = load_data_sample(adr=get_dataset_path(train_file))
-    >>> print
-    (data.__class__)
-    >>> data.data
-    - Input:
-    :param string adr: adresse and name of the loaden file
-    :param string type: (default value = 'SPiCe') indicate
-           the structure of the file
-    :param lrows: number or list of rows,
-           a list of strings if partial=True;
-           otherwise, based on self.pref if version="classic" or
-           "prefix", self.fact otherwise
-    :type lrows: int or list of int
-    :param lcolumns: number or list of columns
-           a list of strings if partial=True ;
-           otherwise, based on self.suff if version="classic" or "suffix",
-           self.fact otherwise
-    :type lcolumns: int or list of int
-    :param string version: (default = "classic") version name
-    :param boolean partial: (default value = False) build of partial
-    """
-    def __init__(self, data=None, **kwargs):
-        # The dictionary that contains the sample
-        super(DataSample, self).__init__(kwargs)
-        self._data = None # Metriclearn_array(np.zeros((0,0)))
-        if data is not None:
-            self._data = MultiView_array(data)
-    @property
-    def data(self):
-        """Metriclearn_array"""
-        return self._data
-    @data.setter
-    def data(self, data):
-        if isinstance(data, (MultiView_array, np.ndarray, ma.MaskedArray, np.generic)):
-            self._data = data
-        else:
-            raise TypeError("sample should be a MultiView_array.")
--- a/multiview_platform/datasets/get_dataset_path.py
+++ b/multiview_platform/datasets/get_dataset_path.py
-# -*- coding: utf-8 -*-
-from __future__ import print_function, division
-import os
-def get_dataset_path(filename):
-    """Return the absolute path of a reference dataset for tests
-    - Input parameter:
-    :param str filename: File name of the file containing reference data
-        for tests (which must be in ``skgilearn/tests/datasets/``)
-    - Output parameters:
-    :returns: The absolute path where the file with name **filename** is stored
-    :rtype: str
-    """
-    datasets_path = os.path.dirname(os.path.abspath(__file__))
-    return os.path.join(datasets_path, filename)
--- a/multiview_platform/declare_classifier.py
+++ b/multiview_platform/declare_classifier.py
-import os
-import importlib
-import inspect
-class ClassierMakerMultiViewPlatform():
-    _benchmark = {"monoview":
-                      {"path_classifier": 'multiview_platform/mono_multi_view_classifier/monoview_classifiers'},
-                  "multiview":
-                      {"path_classifier_multi": 'multiview_platform/mono_multi_view_classifier/multiview_classifier'}}
-    def __init__(self, classifier_names, classifier_modules=None, classifier_files=None, mod='monoview'):
-        if classifier_files is None and classifier_names.size != classifier_modules.size:
-            raise ValueError("attr classifier_names and  classifier_modules should have same size")
-        if classifier_modules is None and classifier_names.size != classifier_files.size:
-            raise ValueError("attr classifier_names and  classifier_files should have same size")
-        if classifier_files is None:
-            for classifier, module in zip(classifier_names, classifier_modules):
-                my_instance, my_module = self._check_classifier_install
-                self._create_class(my_instance, my_module)
-    def _check_classifier_install(self, classifier, module):
-        try:
-            my_module = importlib.import_module(module)
-        except Exception:
-            raise("the module %d can't be imported" % module)
-        try:
-            my_instance = getattr(my_module, classifier)
-        except AttributeError:
-            raise AttributeError("The class %d is not in %d" % classifier  %module)
-        return my_instance, my_module
-    def _create_class(self, classifier, module):
-        if mod.startswith('monoview'):
-            directory =  self._benchmark[mod]["path_classifier"]
-    def _get_module_name(self, mymodule):
-        for name in dir(mymodule):
-            att = getattr(mymodule, name)
-            try:
-                getattr(att, "__module__")
-                if att.__module__.startswith(mymodule.__name__):
-                    if inspect.isclass(att):
-                        if att == name:
-                            return name
-            except Exception:
-                return None
-        return None
\ No newline at end of file
--- a/multiview_platform/versions.py
+++ b/multiview_platform/versions.py
-# Author-Info
-__author__ = "Baptiste Bauvin"
-__status__ = "Prototype"  # Production, Development, Prototype
-def test_versions():
-    """Used to test if all prerequisites are installed"""
-    is_up_to_date = True
-    to_install = []
-    # try:
-    #     import sys
-    # except ImportError:
-    #     raise
-    #
-    # try:
-    #     import cvxopt
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("cvxopt")
-    #
-    # try:
-    #     import pyscm
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("pyscm")
-    #
-    # try:
-    #     import numpy
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("numpy")
-    #
-    # try:
-    #     import scipy
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("scipy")
-    #
-    # try:
-    #     import matplotlib
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("matplotlib")
-    #
-    # try:
-    #     import sklearn
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("sklearn")
-    #
-    # try:
-    #     import logging
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("logging")
-    #
-    # try:
-    #     import joblib
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("joblib")
-    #
-    # try:
-    #     import argparse
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("argparse")
-    #
-    # try:
-    #     import h5py  #
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("h5py")
-    #
-    # # try:
-    # #     import graphviz  #
-    # # except ImportError:
-    # #     is_up_to_date = False
-    # #     to_install.append("graphviz")
-    #
-    # try:
-    #     import pickle  #
-    # except ImportError:
-    #     is_up_to_date = False
-    #     to_install.append("pickle")
-    #
-    # if not is_up_to_date:
-    #     print(
-    #         "You can't run at the moment, please install the following modules : \n" + "\n".join(
-    #             to_install))
-    #     quit()
-if __name__ == "__main__":
-    test_versions()