Skip to content
Snippets Groups Projects
Commit ac1b35b0 authored by Dominique Benielli's avatar Dominique Benielli
Browse files

add some thing

parent c571f3e7
No related branches found
No related tags found
No related merge requests found
......@@ -24,6 +24,9 @@ import sys
sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('../../multiview_platform'))
sys.path.insert(0, os.path.abspath('../..'))
file_loc = os.path.split(__file__)[0]
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(file_loc), '.')))
import multiview_platform
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
......
from multiview_platform.datasets.base import *
from multiview_platform.datasets.data_sample import DataSample, Metriclearn_array
import pickle
import numpy as np
import numpy.ma as ma
from multiview_platform.datasets.data_sample import DataSample
from six.moves import cPickle as pickle #for performance
from __future__ import print_function
import numpy as np
def save_dict(di_, filename_):
with open(filename_, 'wb') as f:
pickle.dump(di_, f)
def load_dict(filename_):
with open(filename_, 'rb') as f:
ret_di = pickle.load(f)
return ret_di
def load_data(address, output='array', pickle=False):
if output.startswith(('array')):
views = np.empty((len(address)), dtype=object)
else:
views = {}
i = 0
nb_samples, nb_features = _determine_dimensions(address)
for addr in address:
data = _load_view_sample(addr, nb_samples , nb_features[i], pickle=pickle)
views[i] = data
i += 1
return DataSample(data=views)
def _determine_dimensions(address):
nb_features = []
nb_samples = 0
nb_sample_max = -1
for adr in address:
try:
f = open(adr, "r")
line = f.readline()
nb_samples += 1
while line :
line = f.readline()
l = line.split()
nb_samples += 1
nb_features.append(len(l))
line = f.readline()
if nb_sample_max < nb_samples:
nb_sample_max = nb_samples
f.close()
except IOError:
raise IOError("file adr can't be open")
return nb_sample_max, nb_features
def _load_view_sample(adr, nb_samples, nb_features, pickle=False):
"""Load a sample from file and returns a dictionary
(word,count)
- Input:
:param lrows: number or list of rows,
a list of strings if partial=True;
otherwise, based on pref if version="classic" or
"prefix", fact otherwise
:type lrows: int or list of int
:param lcolumns: number or list of columns
a list of strings if partial=True ;
otherwise, based on suff if version="classic" or "suffix",
fact otherwise
:type lcolumns: int or list of int
:param string version: (default = "classic") version name
:param boolean partial: (default value = False) build of partial
if True partial dictionaries are loaded based
on nrows and lcolumns
- Output:
:returns: nbL , nbEx , dsample , dpref , dsuff , dfact
:rtype: int , int , dict , dict , dict , dict
:Example:
Let's say you are interested in the samples 10, 25, and 50, and want to
know their class name.
>>> from metriclearning.datasets.base import load_data_sample
>>> from metriclearning.tests.datasets.get_dataset_path import get_dataset_path
>>> train_file = '3.pautomac_light.train' # '4.spice.train'
>>> data = load_data_sample(adr=get_dataset_path(train_file))
>>> data.nbL
4
>>> data.nbEx
5000
>>> data.data
Splearn_array([[ 3., 0., 3., ..., -1., -1., -1.],
[ 3., 3., -1., ..., -1., -1., -1.],
[ 3., 2., 0., ..., -1., -1., -1.],
...,
[ 3., 1., 3., ..., -1., -1., -1.],
[ 3., 0., 3., ..., -1., -1., -1.],
[ 3., 3., 1., ..., -1., -1., -1.]])
"""
#nb_sample, max_length = _read_dimension(adr=adr)
f = open(adr, "r")
line = f.readline()
l = line.split()
nbEx = int(l[0])
nbL = int(l[1])
line = f.readline()
data1 = np.zeros((nb_samples, nb_features), dtype=np.float)
data1 += np.NAN
datatrue = np.ones((nb_samples, nb_features), dtype=np.bool)
i = 0
while line:
l = line.split()
# w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]])
# dsample[w] = dsample[w] + 1 if w in dsample else 1
# traitement du mot vide pour les préfixes, suffixes et facteurs
w = [float(x) for x in l[0:]]
data1[i, :len(w)] = w
line = f.readline()
i += 1
if i > nbEx:
raise IndexError("dimension is not well defined")
masint= np.isnan(data1)
# masint = np.logical_not(masint)
madata1 = ma.MaskedArray(data1, masint)
f.close()
if pickle:
_create_pickle_files(adr=adr, dsample=madata1)
return madata1
# def _read_dimension(adr):
# f = open(adr, "r")
# line = f.readline()
# l = line.split()
# nbEx = int(l[0])
# nbL = int(l[1])
# line = f.readline()
# max_length = 0
# nb_sample = 0
# while line:
# l = line.split()
# nb_sample += 1
# length = int(l[0])
# if max_length < length:
# max_length = length
# line = f.readline()
# f.close()
# if nb_sample != nbEx:
# raise ValueError("check imput file, metadata " + str(nbEx) +
# "do not match number of samples " + str(nb_sample))
# return nb_sample , max_length
# def _load_file_1lecture(adr, pickle=False):
# dsample = {} # dictionary (word,count)
# f = open(adr, "r")
# line = f.readline()
# l = line.split()
# nbEx = int(l[0])
# nbL = int(l[1])
# line = f.readline()
# data1 = np.zeros((0,0))
# length = 0
# while line:
# l = line.split()
# # w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]])
# # dsample[w] = dsample[w] + 1 if w in dsample else 1
# # traitement du mot vide pour les préfixes, suffixes et facteurs
# w = [] if int(l[0]) == 0 else [int(x) for x in l[1:]]
# word = np.array(w, ndmin=2, dtype=np.uint32)
# diff = abs(int(l[0]) - length)
# if len(w) > length and not np.array_equal(data1, np.zeros((0,0))):
# data1 = _add_empty(data1, diff)
# elif word.shape[0] < length and not np.array_equal(data1, np.zeros((0,0))):
# word = _add_empty(word, diff)
#
# if np.array_equal(data1, np.zeros((0,0))):
# data1 = word
# else:
# data1 = np.concatenate((data1, word), axis=0)
# length = data1.shape[1]
# line = f.readline()
#
# f.close()
# if pickle:
# _create_pickle_files(adr=adr, dsample=dsample)
# return nbL, nbEx, data1
# def _add_empty(data, diff):
# empty = np.zeros((data.shape[0], diff))
# empty += -1
# data = np.concatenate((data, empty), axis=1)
# return data
def _create_pickle_files(self, adr, dsample):
f = open(adr + ".sample.pkl", "wb")
pickle.dump(dsample, f)
f.close()
# -*- coding: utf-8 -*-
"""This module contains the DataSample class and Splearn_array class
The DataSample class encapsulates a sample 's components
nbL and nbEx numbers,
Splearn_array class inherit from numpy ndarray and contains a 2d data ndarray
with the shape
==== ==== ==== ==== ====
x x x x -1
x x x x x
x x -1 -1 -1
x -1 -1 -1 -1
-1 -1 -1 -1 -1
==== ==== ==== ==== ====
where -1 a indicates a empty cell,
the number nbL and nbEx and , the fourth dictionaries for sample,
prefix, suffix and factor where they are computed
"""
import numpy as np
import numpy.ma as ma
class MultiView_array(ma.MaskedArray):
"""Splearn_array inherit from numpy ndarray
:Example:
>>> from multiview_platform.datasets.base import load_data
>>> from multiview_platform.datasets.get_dataset_path import get_dataset_path
>>> train_file = '' # '4.spice.train'
>>> data = load_data(adr=get_dataset_path(train_file))
>>> print(data.__class__)
>>> data.data
"""
def __new__(cls, data):
shapes_int = []
index = 0
new_data = data
shape_ext = len(data)
thekeys = None
if isinstance(data, dict):
shape_ext = len(data)
for key, dat_values in data.items():
new_data = cls._populate_new_data(index, dat_values, new_data)
shapes_int.append(dat_values.shape[0])
index += 1
thekeys = data.keys()
if isinstance(data, np.ndarray):
shape_ext = data.shape[0]
for dat_values in data:
shapes_int.append(dat_values.shape[0])
new_data = cls._populate_new_data(index, dat_values, new_data)
index += 1
# obj = ma.MaskedArray.__new(new_data) # new_data.view() a.MaskedArray(new_data, mask=new_data.mask).view(cls)
# bj = super(Metriclearn_array, cls).__new__(cls, new_data.data, new_data.mask)
obj = ma.masked_array(new_data.data, new_data.mask).view(cls)
obj.shapes_int = shapes_int
obj.shape_ext = shape_ext
obj.keys = thekeys
return obj
@staticmethod
def _populate_new_data(index, dat_values, new_data):
if index == 0:
if isinstance(dat_values, ma.MaskedArray):
new_data = dat_values
else:
new_data = dat_values.view(ma.MaskedArray) # ma.masked_array(dat_values, mask=ma.nomask) dat_values.view(ma.MaskedArray) #(
new_data.mask = ma.nomask
else:
if isinstance(dat_values, ma.MaskedArray):
new_data = ma.hstack((new_data, dat_values))
else:
new_data = ma.hstack((new_data, dat_values.view(ma.MaskedArray) ) ) # ma.masked_array(dat_values, mask=ma.nomask
return new_data
def __array_finalize__(self, obj):
if obj is None: return
super(MultiView_array, self).__array_finalize__(obj)
self.shapes_int = getattr(obj, 'shapes_int', None)
self.shape_ext = getattr(obj, 'shape_ext', None)
self.keys = getattr(obj, 'keys', None)
def getCol(self, view, col):
start = np.sum(np.asarray(self.shapes_int[0: view]))
return self.data[start+col, :]
def getView(self, view):
start = np.sum(np.asarray(self.shapes_int[0: view]))
stop = start + self.shapes_int[view]
return self.data[start:stop, :]
def getRaw(self, view, raw):
start = np.sum(np.asarray(self.shapes_int[0: view]))
stop = np.sum(np.asarray(self.shapes_int[0: view+1]))
return self.data[start:stop, raw]
class DataSample(dict):
""" A DataSample instance
:Example:
>>> from multiview_platform.datasets.base import load_data
>>> from multiview_platform.datasets.get_dataset_path import get_dataset_path
>>> train_file = '' # '4.spice.train'
>>> data = load_data_sample(adr=get_dataset_path(train_file))
>>> print
(data.__class__)
>>> data.data
- Input:
:param string adr: adresse and name of the loaden file
:param string type: (default value = 'SPiCe') indicate
the structure of the file
:param lrows: number or list of rows,
a list of strings if partial=True;
otherwise, based on self.pref if version="classic" or
"prefix", self.fact otherwise
:type lrows: int or list of int
:param lcolumns: number or list of columns
a list of strings if partial=True ;
otherwise, based on self.suff if version="classic" or "suffix",
self.fact otherwise
:type lcolumns: int or list of int
:param string version: (default = "classic") version name
:param boolean partial: (default value = False) build of partial
"""
def __init__(self, data=None, **kwargs):
# The dictionary that contains the sample
super(DataSample, self).__init__(kwargs)
self._data = None # Metriclearn_array(np.zeros((0,0)))
if data is not None:
self._data = MultiView_array(data)
@property
def data(self):
"""Metriclearn_array"""
return self._data
@data.setter
def data(self, data):
if isinstance(data, (MultiView_array, np.ndarray, ma.MaskedArray, np.generic)):
self._data = data
else:
raise TypeError("sample should be a MultiView_array.")
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import os
def get_dataset_path(filename):
"""Return the absolute path of a reference dataset for tests
- Input parameter:
:param str filename: File name of the file containing reference data
for tests (which must be in ``skgilearn/tests/datasets/``)
- Output parameters:
:returns: The absolute path where the file with name **filename** is stored
:rtype: str
"""
datasets_path = os.path.dirname(os.path.abspath(__file__))
return os.path.join(datasets_path, filename)
......@@ -14,6 +14,10 @@ __status__ = "Prototype" # Production, Development, Prototype
class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
"""
This class implement a Classifier with adaboost algorithm.
"""
def __init__(self, random_state=None, n_estimators=50,
base_estimator=None, **kwargs):
......
import os, sys, inspect
from multiview_platform.mono_multi_view_classifiers.monoview_classifiers.adaboost import Adaboost
# from multiview_platform.mono_multi_view_classifiers.monoview_classifiers.adaboost import Adaboost
import importlib
classifier_dict = {"0": ['mono', Adaboost,
classifier_dict = {"0": ['mono', 'Adaboost',
'multiview_platform.mono_multi_view_classifiers.monoview_classifiers.adaboost']}
val = classifier_dict["0"]
mymodule = importlib.import_module(val[2])
for name in dir(mymodule):
att = getattr(mymodule, name)
try:
getattr(att, "__module__")
if att.__module__.startswith(mymodule.__name__):
if inspect.isclass(att):
print(att)
print(name)
except Exception:
pass
parameter = {"0":[]}
parameter
instring = "multiview_platform/mono_multi_view_classifiers/monoview_classifiers/"
if instring in mymodule.__file__:
sig = inspect.signature(val[1].__init__)
monInstance = getattr(mymodule, 'Adaboost')
sig = inspect.signature(monInstance.__init__)
for arg_idx, name in enumerate(sig.parameters):
param= sig.parameters[name]
if not name.startswith('self'):
parameter{"0"}.append(name)
parameter["0"].append(name)
if param.default is not inspect.Parameter.empty:
value_default = param.default
......@@ -26,21 +39,39 @@ if instring in mymodule.__file__:
print()
dir(mymodule)
if val[1] in dir(mymodule):
class ConfigurationMaker():
"""
Find the name of the classifier from the dict classier to report
"""
_path_classifier_mono = 'multiview_platform/mono_multi_view_classifier/monoview_classifiers'
_path_classifier_multi = 'multiview_platform/mono_multi_view_classifier/multiview_classifier'
def __init__(self, classifier_dict=None):
if classifier_dict is None:
classifier_dict = {"0": ['mono', 'Adaboost',
'multiview_platform.mono_multi_view_classifiers.monoview_classifiers.adaboost']}
names = []
for key, val in classifier_dict.items():
mymodule = importlib.import_module(val[2])
names.append(self._get_module_name(mymodule))
def _get_module_name(self, mymodule):
for name in dir(mymodule):
att = getattr(mymodule, name)
try:
getattr(att, "__module__")
if att.__module__.startswith(mymodule.__name__):
if inspect.isclass(att):
if att == val[1]:
return name
except Exception:
return None
return None
# class ConfigurationMaker():
# """
#
# """
# _path_classifier_mono = 'multiview_platform/mono_multi_view_classifier/monoview_classifiers'
# _path_classifier_multi = 'multiview_platform/mono_multi_view_classifier/multiview_classifier'
#
# def __init__(self ):
# classifier_dict = {"0": ['mono', Adaboost,
# 'multiview_platform.mono_multi_view_classifiers.monoview_classifiers.']}
#
# for key, val in classifier_dict.items():
# mymodule = importlib.import_module(val[2])
# module_file = mymodule.__file__
# getattr(self._path_classifier_mono, module_file[:-3])
......
import numpy as np
class Parameter_pdata(object):
class __Parameter_pdata:
nbr_i = 0
# option de renormalisation des donnees
# la séparation se faisant à une permutation pret et à un facteur de
# renormalisation pret, on peut choisir de normaliser les données au debut
# de l'algo et/ou à chaque iteration de l'algo et/ou à la fin de l'algo
# on normalise A ou S
_data_norm = {'FlagInit': True, 'FlagIter': False, 'FlagEnd': False}
# % on normalise suivant les colonnes (1) 'dim' (norme des colonnes à 1) ou les
# 'dim'% lignes (2) (norme des lignes à 1)
_Norm = {'p': 1, 'dim': 1, 'x': 'A'}
_list_mode = ['real', 'simul']
_list_x = ['A', 'S']
def __init__(self):
self._Norm['p'] = 1
self._Norm['dim'] = 1
self._Norm['x'] = self._list_x[0]
self.mode = self._list_mode[1]
self.sigma = 20000
self.dim = 1
if self.nbr_i > 0:
raise ValueError("Instance of class Parameter_pdata can be only one")
self.nbr_i += 1
def __str__(self):
return repr(self)
instance = None
# def __init__(self, arg):
# if not Parameter_pdata.instance:
# Parameter_pdata.instance = Parameter_pdata.__Parameter_pdata(arg)
# else:
# Parameter_pdata.instance.val = arg
def __new__(cls): # _new_ est toujours une méthode de classe
if not Parameter_pdata.instance:
Parameter_pdata.instance = Parameter_pdata.__Parameter_pdata()
return Parameter_pdata.instance
def __getattr__(self, attr):
return getattr(self.instance, attr)
# def __setattr__(self, attr, val):
# return setattr(self.instance, attr, val)
def __setattr__(self, name):
return setattr(self.instance, name)
class Parameter_palgo(object):
class __Parameter_palgo:
nbr_i = 0
_list_algo = ['BCVMFB', 'PALS', 'STALS', 'LSfro', 'LSkl']
_stop = {'DifA': False, 'DifS': False,
'ObjFct': True, 'threshold': np.finfo(float).eps}
_pfwt = {'w': 'db6', 'family_pfwt': 'db',
'level': 10, 'K': 4,
'Ls': 3000, 'L1': 3000, 'L2': 3000}
# _wavelette_type = ['db', 'db6']
# 'LS' pour Lee et Seung
# 'Lips' pour la constante de Lipschitz
# 'PALM' pas de preconditionnement
_list_precond = ['LS', 'Lips', 'PALM']
def __init__(self):
self.flagWave = False
self.val = None
algo_value = self._list_algo[1]
self._algo = algo_value
self.gamma = 0.99
self.inf = np.inf
self.eps = np.finfo(float).eps
self.niter = 1000
self.eta_inf = 'eps'
self.eta_sup = 'inf'
self.alpha_A = 0.0
self.p_A = 1
self.p_S = 1
self.alpha_S = 0.0
# self.level = 10
self.alpha_S_eval = False
self.stopThreshold = 10e-5,
self.precond = 'LS' # 'LS' pour Lee et Seung
self.F = None
self.Fstar = None
self.verbose = False
if self.nbr_i > 0:
raise ValueError("Instance of class Parameter_pdata can be only one")
self.nbr_i += 1
def __str__(self):
return repr(self) + repr(self.val)
@property
def algo(self):
return self._algo
@algo.setter
def algo(self, algo_value):
if algo_value not in self._list_algo:
raise NameError("parameter algo must be in %s" % self._list_algo)
else:
self._algo = algo_value
instance = None
# def __init__(self, arg):
# if not Parameter_pdata.instance:
# Parameter_pdata.instance = Parameter_pdata.__Parameter_pdata(arg)
# else:
# Parameter_pdata.instance.val = arg
def __new__(cls): # _new_ est toujours une méthode de classe
if not Parameter_palgo.instance:
Parameter_palgo.instance = Parameter_palgo.__Parameter_palgo()
return Parameter_palgo.instance
def __getattr__(self, attr):
return getattr(self.instance, attr)
# def __setattr__(self, attr, val):
# return setattr(self.instance, attr, val)
def __setattr__(self, name):
return setattr(self.instance, name)
if __name__ == '__main__':
a = Parameter_pdata()
a = Parameter_pdata()
b = Parameter_pdata()
b.val = 6
b.x = 8
a.x = 10
param = Parameter_palgo()
algo = param._list_algo[3]
param.algo = algo
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment