Skip to content
Snippets Groups Projects
Commit 140a9cd7 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Removed

parent 0234bf1b
No related branches found
No related tags found
No related merge requests found
TODO TODO
*.pyc *.pyc
.idea/**
ipynb/.ipynb_checkpoints/**
docs/source/monomulti/.ipynb_checkpoints/**
results/* results/*
data/* data/*
Data/* Data/*
......
File deleted
from multiview_platform.datasets.base import *
from multiview_platform.datasets.data_sample import DataSample, Metriclearn_array
from __future__ import print_function
import pickle
import numpy as np
import numpy.ma as ma
from multiview_platform.datasets.data_sample import DataSample
from six.moves import cPickle as pickle #for performance
import numpy as np
def save_dict(di_, filename_):
with open(filename_, 'wb') as f:
pickle.dump(di_, f)
def load_dict(filename_):
with open(filename_, 'rb') as f:
ret_di = pickle.load(f)
return ret_di
def load_data(address, output='array', pickle=False):
if output.startswith(('array')):
views = np.empty((len(address)), dtype=object)
else:
views = {}
i = 0
nb_samples, nb_features = _determine_dimensions(address)
for addr in address:
data = _load_view_sample(addr, nb_samples , nb_features[i], pickle=pickle)
views[i] = data
i += 1
return DataSample(data=views)
def _determine_dimensions(address):
nb_features = []
nb_samples = 0
nb_sample_max = -1
for adr in address:
try:
f = open(adr, "r")
line = f.readline()
nb_samples += 1
while line :
line = f.readline()
l = line.split()
nb_samples += 1
nb_features.append(len(l))
line = f.readline()
if nb_sample_max < nb_samples:
nb_sample_max = nb_samples
f.close()
except IOError:
raise IOError("file adr can't be open")
return nb_sample_max, nb_features
def _load_view_sample(adr, nb_samples, nb_features, pickle=False):
"""Load a sample from file and returns a dictionary
(word,count)
- Input:
:param lrows: number or list of rows,
a list of strings if partial=True;
otherwise, based on pref if version="classic" or
"prefix", fact otherwise
:type lrows: int or list of int
:param lcolumns: number or list of columns
a list of strings if partial=True ;
otherwise, based on suff if version="classic" or "suffix",
fact otherwise
:type lcolumns: int or list of int
:param string version: (default = "classic") version name
:param boolean partial: (default value = False) build of partial
if True partial dictionaries are loaded based
on nrows and lcolumns
- Output:
:returns: nbL , nbEx , dsample , dpref , dsuff , dfact
:rtype: int , int , dict , dict , dict , dict
:Example:
Let's say you are interested in the samples 10, 25, and 50, and want to
know their class name.
>>> from metriclearning.datasets.base import load_data_sample
>>> from metriclearning.tests.datasets.get_dataset_path import get_dataset_path
>>> train_file = '3.pautomac_light.train' # '4.spice.train'
>>> data = load_data_sample(adr=get_dataset_path(train_file))
>>> data.nbL
4
>>> data.nbEx
5000
>>> data.data
Splearn_array([[ 3., 0., 3., ..., -1., -1., -1.],
[ 3., 3., -1., ..., -1., -1., -1.],
[ 3., 2., 0., ..., -1., -1., -1.],
...,
[ 3., 1., 3., ..., -1., -1., -1.],
[ 3., 0., 3., ..., -1., -1., -1.],
[ 3., 3., 1., ..., -1., -1., -1.]])
"""
#nb_sample, max_length = _read_dimension(adr=adr)
f = open(adr, "r")
line = f.readline()
l = line.split()
nbEx = int(l[0])
nbL = int(l[1])
line = f.readline()
data1 = np.zeros((nb_samples, nb_features), dtype=np.float)
data1 += np.NAN
datatrue = np.ones((nb_samples, nb_features), dtype=np.bool)
i = 0
while line:
l = line.split()
# w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]])
# dsample[w] = dsample[w] + 1 if w in dsample else 1
# traitement du mot vide pour les préfixes, suffixes et facteurs
w = [float(x) for x in l[0:]]
data1[i, :len(w)] = w
line = f.readline()
i += 1
if i > nbEx:
raise IndexError("dimension is not well defined")
masint= np.isnan(data1)
# masint = np.logical_not(masint)
madata1 = ma.MaskedArray(data1, masint)
f.close()
if pickle:
_create_pickle_files(adr=adr, dsample=madata1)
return madata1
# def _read_dimension(adr):
# f = open(adr, "r")
# line = f.readline()
# l = line.split()
# nbEx = int(l[0])
# nbL = int(l[1])
# line = f.readline()
# max_length = 0
# nb_sample = 0
# while line:
# l = line.split()
# nb_sample += 1
# length = int(l[0])
# if max_length < length:
# max_length = length
# line = f.readline()
# f.close()
# if nb_sample != nbEx:
# raise ValueError("check imput file, metadata " + str(nbEx) +
# "do not match number of samples " + str(nb_sample))
# return nb_sample , max_length
# def _load_file_1lecture(adr, pickle=False):
# dsample = {} # dictionary (word,count)
# f = open(adr, "r")
# line = f.readline()
# l = line.split()
# nbEx = int(l[0])
# nbL = int(l[1])
# line = f.readline()
# data1 = np.zeros((0,0))
# length = 0
# while line:
# l = line.split()
# # w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]])
# # dsample[w] = dsample[w] + 1 if w in dsample else 1
# # traitement du mot vide pour les préfixes, suffixes et facteurs
# w = [] if int(l[0]) == 0 else [int(x) for x in l[1:]]
# word = np.array(w, ndmin=2, dtype=np.uint32)
# diff = abs(int(l[0]) - length)
# if len(w) > length and not np.array_equal(data1, np.zeros((0,0))):
# data1 = _add_empty(data1, diff)
# elif word.shape[0] < length and not np.array_equal(data1, np.zeros((0,0))):
# word = _add_empty(word, diff)
#
# if np.array_equal(data1, np.zeros((0,0))):
# data1 = word
# else:
# data1 = np.concatenate((data1, word), axis=0)
# length = data1.shape[1]
# line = f.readline()
#
# f.close()
# if pickle:
# _create_pickle_files(adr=adr, dsample=dsample)
# return nbL, nbEx, data1
# def _add_empty(data, diff):
# empty = np.zeros((data.shape[0], diff))
# empty += -1
# data = np.concatenate((data, empty), axis=1)
# return data
def _create_pickle_files(self, adr, dsample):
f = open(adr + ".sample.pkl", "wb")
pickle.dump(dsample, f)
f.close()
# -*- coding: utf-8 -*-
"""This module contains the DataSample class and Splearn_array class
The DataSample class encapsulates a sample 's components
nbL and nbEx numbers,
Splearn_array class inherit from numpy ndarray and contains a 2d data ndarray
with the shape
==== ==== ==== ==== ====
x x x x -1
x x x x x
x x -1 -1 -1
x -1 -1 -1 -1
-1 -1 -1 -1 -1
==== ==== ==== ==== ====
where -1 a indicates a empty cell,
the number nbL and nbEx and , the fourth dictionaries for sample,
prefix, suffix and factor where they are computed
"""
import numpy as np
import numpy.ma as ma
class MultiView_array(ma.MaskedArray):
"""Splearn_array inherit from numpy ndarray
:Example:
>>> from multiview_platform.datasets.base import load_data
>>> from multiview_platform.datasets.get_dataset_path import get_dataset_path
>>> train_file = '' # '4.spice.train'
>>> data = load_data(adr=get_dataset_path(train_file))
>>> print(data.__class__)
>>> data.data
"""
def __new__(cls, data):
shapes_int = []
index = 0
new_data = data
shape_ext = len(data)
thekeys = None
if isinstance(data, dict):
shape_ext = len(data)
for key, dat_values in data.items():
new_data = cls._populate_new_data(index, dat_values, new_data)
shapes_int.append(dat_values.shape[0])
index += 1
thekeys = data.keys()
if isinstance(data, np.ndarray):
shape_ext = data.shape[0]
for dat_values in data:
shapes_int.append(dat_values.shape[0])
new_data = cls._populate_new_data(index, dat_values, new_data)
index += 1
# obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls)
# bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask)
obj = ma.masked_array(new_data.data, new_data.mask).view(cls)
obj.shapes_int = shapes_int
obj.shape_ext = shape_ext
obj.keys = thekeys
return obj
@staticmethod
def _populate_new_data(index, dat_values, new_data):
if index == 0:
if isinstance(dat_values, ma.MaskedArray):
new_data = dat_values
else:
new_data = dat_values.view(ma.MaskedArray) # ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #(
new_data.mask = ma.nomask
else:
if isinstance(dat_values, ma.MaskedArray):
new_data = ma.hstack((new_data, dat_values))
else:
new_data = ma.hstack((new_data, dat_values.view(ma.MaskedArray) ) ) # ma.masked_array(dat_values, mask=ma.nomask
return new_data
def __array_finalize__(self, obj):
if obj is None: return
super(MultiView_array, self).__array_finalize__(obj)
self.shapes_int = getattr(obj, 'shapes_int', None)
self.shape_ext = getattr(obj, 'shape_ext', None)
self.keys = getattr(obj, 'keys', None)
def getCol(self, view, col):
start = np.sum(np.asarray(self.shapes_int[0: view]))
return self.data[start+col, :]
def getView(self, view):
start = np.sum(np.asarray(self.shapes_int[0: view]))
stop = start + self.shapes_int[view]
return self.data[start:stop, :]
def getRaw(self, view, raw):
start = np.sum(np.asarray(self.shapes_int[0: view]))
stop = np.sum(np.asarray(self.shapes_int[0: view+1]))
return self.data[start:stop, raw]
class DataSample(dict):
""" A DataSample instance
:Example:
>>> from multiview_platform.datasets.base import load_data
>>> from multiview_platform.datasets.get_dataset_path import get_dataset_path
>>> train_file = '' # '4.spice.train'
>>> data = load_data_sample(adr=get_dataset_path(train_file))
>>> print
(data.__class__)
>>> data.data
- Input:
:param string adr: adresse and name of the loaden file
:param string type: (default value = 'SPiCe') indicate
the structure of the file
:param lrows: number or list of rows,
a list of strings if partial=True;
otherwise, based on self.pref if version="classic" or
"prefix", self.fact otherwise
:type lrows: int or list of int
:param lcolumns: number or list of columns
a list of strings if partial=True ;
otherwise, based on self.suff if version="classic" or "suffix",
self.fact otherwise
:type lcolumns: int or list of int
:param string version: (default = "classic") version name
:param boolean partial: (default value = False) build of partial
"""
def __init__(self, data=None, **kwargs):
# The dictionary that contains the sample
super(DataSample, self).__init__(kwargs)
self._data = None # Metriclearn_array(np.zeros((0,0)))
if data is not None:
self._data = MultiView_array(data)
@property
def data(self):
"""Metriclearn_array"""
return self._data
@data.setter
def data(self, data):
if isinstance(data, (MultiView_array, np.ndarray, ma.MaskedArray, np.generic)):
self._data = data
else:
raise TypeError("sample should be a MultiView_array.")
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import os
def get_dataset_path(filename):
"""Return the absolute path of a reference dataset for tests
- Input parameter:
:param str filename: File name of the file containing reference data
for tests (which must be in ``skgilearn/tests/datasets/``)
- Output parameters:
:returns: The absolute path where the file with name **filename** is stored
:rtype: str
"""
datasets_path = os.path.dirname(os.path.abspath(__file__))
return os.path.join(datasets_path, filename)
import os
import importlib
import inspect
class ClassierMakerMultiViewPlatform():
_benchmark = {"monoview":
{"path_classifier": 'multiview_platform/mono_multi_view_classifier/monoview_classifiers'},
"multiview":
{"path_classifier_multi": 'multiview_platform/mono_multi_view_classifier/multiview_classifier'}}
def __init__(self, classifier_names, classifier_modules=None, classifier_files=None, mod='monoview'):
if classifier_files is None and classifier_names.size != classifier_modules.size:
raise ValueError("attr classifier_names and classifier_modules should have same size")
if classifier_modules is None and classifier_names.size != classifier_files.size:
raise ValueError("attr classifier_names and classifier_files should have same size")
if classifier_files is None:
for classifier, module in zip(classifier_names, classifier_modules):
my_instance, my_module = self._check_classifier_install
self._create_class(my_instance, my_module)
def _check_classifier_install(self, classifier, module):
try:
my_module = importlib.import_module(module)
except Exception:
raise("the module %d can't be imported" % module)
try:
my_instance = getattr(my_module, classifier)
except AttributeError:
raise AttributeError("The class %d is not in %d" % classifier %module)
return my_instance, my_module
def _create_class(self, classifier, module):
if mod.startswith('monoview'):
directory = self._benchmark[mod]["path_classifier"]
def _get_module_name(self, mymodule):
for name in dir(mymodule):
att = getattr(mymodule, name)
try:
getattr(att, "__module__")
if att.__module__.startswith(mymodule.__name__):
if inspect.isclass(att):
if att == name:
return name
except Exception:
return None
return None
\ No newline at end of file
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def test_versions():
"""Used to test if all prerequisites are installed"""
is_up_to_date = True
to_install = []
# try:
# import sys
# except ImportError:
# raise
#
# try:
# import cvxopt
# except ImportError:
# is_up_to_date = False
# to_install.append("cvxopt")
#
# try:
# import pyscm
# except ImportError:
# is_up_to_date = False
# to_install.append("pyscm")
#
# try:
# import numpy
# except ImportError:
# is_up_to_date = False
# to_install.append("numpy")
#
# try:
# import scipy
# except ImportError:
# is_up_to_date = False
# to_install.append("scipy")
#
# try:
# import matplotlib
# except ImportError:
# is_up_to_date = False
# to_install.append("matplotlib")
#
# try:
# import sklearn
# except ImportError:
# is_up_to_date = False
# to_install.append("sklearn")
#
# try:
# import logging
# except ImportError:
# is_up_to_date = False
# to_install.append("logging")
#
# try:
# import joblib
# except ImportError:
# is_up_to_date = False
# to_install.append("joblib")
#
# try:
# import argparse
# except ImportError:
# is_up_to_date = False
# to_install.append("argparse")
#
# try:
# import h5py #
# except ImportError:
# is_up_to_date = False
# to_install.append("h5py")
#
# # try:
# # import graphviz #
# # except ImportError:
# # is_up_to_date = False
# # to_install.append("graphviz")
#
# try:
# import pickle #
# except ImportError:
# is_up_to_date = False
# to_install.append("pickle")
#
# if not is_up_to_date:
# print(
# "You can't run at the moment, please install the following modules : \n" + "\n".join(
# to_install))
# quit()
if __name__ == "__main__":
test_versions()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment