Skip to content
Snippets Groups Projects
Commit 4fdd0a78 authored by Denis Arrivault's avatar Denis Arrivault
Browse files

Correction of sparse hankel serialization

parent 53a9edee
No related branches found
No related tags found
No related merge requests found
Pipeline #
......@@ -9,7 +9,7 @@ unbutu:17.10:
- pip3 install -e .
- nosetests
- python3 setup.py build_sphinx
- cp -r build/sphinx/html/* public/
- cp -r build/sphinx/html public
artifacts:
when: always
untracked: true
......
{"automaton": {"nbL": 4, "nbS": 5, "initial": {"numpy.ndarray": {"values": [-0.000493441997049692, 0.003063469710791502, -0.04407393201558057, -0.10777702616547158, -0.0866391379316951], "dtype": "float64"}}, "final": {"numpy.ndarray": {"values": [0.07757136847945678, -0.024220294003121035, -0.4468125366321232, 0.6277320840897611, -0.5546744333562244], "dtype": "float64"}}, "transitions": [{"numpy.ndarray": {"values": [[0.045121209595118054, -0.24038969827844267, 0.3494499959213531, -0.2811680730534573, -0.21402523377497668], [0.06925800562437727, -0.3006229346282947, 0.2064137536852026, -0.14960814319756102, -0.5580573163749218], [0.02980115192176601, -0.13866480809160522, 0.18362212572805475, -0.20969545230657657, -0.14481622025561255], [0.005699344003198416, -0.023385825120200706, -0.06600665373981858, 0.10749935271466031, -0.1510365460416008], [-0.020086551931479287, 0.09026347555230453, -0.005525585655539293, -0.031355317090309115, 0.2432902242047725]], "dtype": "float64"}}, {"numpy.ndarray": {"values": [[0.07744772079170498, 0.09007073705762163, -0.3047220063293005, 0.27676245498591084, 0.20289396030627946], [-0.09902980483670908, -0.08061846818728234, 0.2585317069225073, -0.12086330214608876, -0.11085207725068613], [-0.06171079202853737, -0.06244151779954791, 0.12007654564862096, 0.0025063746277944722, -0.1567967473145574], [-0.0027369737499654224, -0.009005721984277773, -0.0004600329590916909, -0.00855042647200538, -0.053754646789682024], [0.03098732758871066, 0.03972680066723251, -0.049971133509102664, 0.003576941187496189, 0.14182576205856365]], "dtype": "float64"}}, {"numpy.ndarray": {"values": [[-0.06791915236220136, -0.11357937659088249, 0.379553926040543, -0.21784979894046527, -0.229776950899381], [0.11596642335411327, 0.14914956804629298, -0.13357508376686894, -0.008916063072034729, 0.3484153673774827], [0.01173081754742677, 0.019273800531955328, 0.04142658345867112, -0.03534658856098203, 0.02316491010895624], [0.007328911075541722, 0.005536509132796604, -0.022456082950666895, 0.03611543477693201, -0.03851433900140671], [-0.010589894686551596, -0.010626616553723715, -0.0005431056456617233, -0.02556747670016042, 0.049848888189290286]], "dtype": "float64"}}, {"numpy.ndarray": {"values": [[0.07276211427780344, -0.015719557685580397, 0.07428592814590271, -0.10369861539249554, 0.02475347368832667], [-0.05607105449779084, -0.08896207276035853, 0.2763822539752058, -0.23711255828384722, 0.07372294122304912], [-0.007391294007754002, -0.048741797963871694, -0.6291239733858526, 0.46816276521577743, 0.09251699239092943], [-0.007110224931879211, -0.05623317735897968, -0.366066585676203, -0.013297798115225577, 0.649103317749257], [0.0023355150085563733, -0.02156115126448696, 0.09096243479437824, -0.3843882349306287, 0.6616477207948644]], "dtype": "float64"}}], "type": "classic"}}
\ No newline at end of file
{"automaton": {"nbL": 4, "nbS": 5, "initial": {"numpy.ndarray": {"values": [-0.0004934419970497512, 0.0030634697107912346, -0.044073932015580415, -0.1077770261654714, -0.0866391379316952], "dtype": "float64"}}, "final": {"numpy.ndarray": {"values": [0.07757136847945045, -0.024220294003132026, -0.4468125366321221, 0.627732084089759, -0.554674433356224], "dtype": "float64"}}, "transitions": [{"numpy.ndarray": {"values": [[0.04512120959511772, -0.24038969827844062, 0.34944999592135334, -0.2811680730534579, -0.21402523377497645], [0.0692580056243761, -0.30062293462829204, 0.20641375368520157, -0.14960814319756124, -0.5580573163749153], [0.02980115192176571, -0.13866480809160409, 0.18362212572805459, -0.20969545230657607, -0.14481622025561292], [0.005699344003198349, -0.023385825120201414, -0.06600665373981851, 0.10749935271466007, -0.15103654604159977], [-0.02008655193147911, 0.09026347555230492, -0.005525585655539262, -0.031355317090308935, 0.2432902242047721]], "dtype": "float64"}}, {"numpy.ndarray": {"values": [[0.0774477207917058, 0.09007073705762021, -0.3047220063293013, 0.2767624549859105, 0.20289396030628148], [-0.09902980483670844, -0.08061846818727973, 0.25853170692250554, -0.12086330214608881, -0.11085207725068251], [-0.061710792028537534, -0.06244151779954751, 0.12007654564862075, 0.0025063746277943564, -0.1567967473145572], [-0.002736973749965403, -0.009005721984277787, -0.00046003295909181354, -0.008550426472005344, -0.053754646789681754], [0.030987327588710728, 0.03972680066723246, -0.04997113350910248, 0.0035769411874962344, 0.1418257620585633]], "dtype": "float64"}}, {"numpy.ndarray": {"values": [[-0.06791915236220235, -0.11357937659088102, 0.37955392604054394, -0.21784979894046635, -0.22977695089938127], [0.11596642335411328, 0.14914956804629287, -0.13357508376686902, -0.008916063072034974, 0.3484153673774836], [0.011730817547426673, 0.019273800531955612, 0.0414265834586712, -0.035346588560982, 0.02316491010895583], [0.007328911075541707, 0.005536509132796312, -0.022456082950666856, 0.03611543477693187, -0.038514339001406585], [-0.010589894686551544, -0.010626616553723532, -0.000543105645661794, -0.025567476700160314, 0.04984888818929034]], "dtype": "float64"}}, {"numpy.ndarray": {"values": [[0.07276211427780357, -0.0157195576855797, 0.07428592814590385, -0.10369861539249735, 0.024753473688328077], [-0.05607105449779142, -0.08896207276035666, 0.27638225397521243, -0.2371125582838589, 0.07372294122306285], [-0.007391294007753122, -0.048741797963875705, -0.6291239733858526, 0.46816276521577677, 0.09251699239093385], [-0.007110224931878467, -0.05623317735898056, -0.36606658567620365, -0.013297798115225407, 0.6491033177492604], [0.002335515008556511, -0.021561151264484414, 0.09096243479437888, -0.38438823493062646, 0.6616477207948602]], "dtype": "float64"}}], "type": "classic"}}
\ No newline at end of file
......@@ -2,66 +2,66 @@ automaton:
final:
numpy.ndarray:
dtype: float64
values: [0.07757136847945678, -0.024220294003121035, -0.4468125366321232, 0.6277320840897611,
-0.5546744333562244]
values: [0.07757136847945045, -0.024220294003132026, -0.4468125366321221, 0.627732084089759,
-0.554674433356224]
initial:
numpy.ndarray:
dtype: float64
values: [-0.000493441997049692, 0.003063469710791502, -0.04407393201558057,
-0.10777702616547158, -0.0866391379316951]
values: [-0.0004934419970497512, 0.0030634697107912346, -0.044073932015580415,
-0.1077770261654714, -0.0866391379316952]
nbL: 4
nbS: 5
transitions:
- numpy.ndarray:
dtype: float64
values:
- [0.045121209595118054, -0.24038969827844267, 0.3494499959213531, -0.2811680730534573,
-0.21402523377497668]
- [0.06925800562437727, -0.3006229346282947, 0.2064137536852026, -0.14960814319756102,
-0.5580573163749218]
- [0.02980115192176601, -0.13866480809160522, 0.18362212572805475, -0.20969545230657657,
-0.14481622025561255]
- [0.005699344003198416, -0.023385825120200706, -0.06600665373981858, 0.10749935271466031,
-0.1510365460416008]
- [-0.020086551931479287, 0.09026347555230453, -0.005525585655539293, -0.031355317090309115,
0.2432902242047725]
- [0.04512120959511772, -0.24038969827844062, 0.34944999592135334, -0.2811680730534579,
-0.21402523377497645]
- [0.0692580056243761, -0.30062293462829204, 0.20641375368520157, -0.14960814319756124,
-0.5580573163749153]
- [0.02980115192176571, -0.13866480809160409, 0.18362212572805459, -0.20969545230657607,
-0.14481622025561292]
- [0.005699344003198349, -0.023385825120201414, -0.06600665373981851, 0.10749935271466007,
-0.15103654604159977]
- [-0.02008655193147911, 0.09026347555230492, -0.005525585655539262, -0.031355317090308935,
0.2432902242047721]
- numpy.ndarray:
dtype: float64
values:
- [0.07744772079170498, 0.09007073705762163, -0.3047220063293005, 0.27676245498591084,
0.20289396030627946]
- [-0.09902980483670908, -0.08061846818728234, 0.2585317069225073, -0.12086330214608876,
-0.11085207725068613]
- [-0.06171079202853737, -0.06244151779954791, 0.12007654564862096, 0.0025063746277944722,
-0.1567967473145574]
- [-0.0027369737499654224, -0.009005721984277773, -0.0004600329590916909, -0.00855042647200538,
-0.053754646789682024]
- [0.03098732758871066, 0.03972680066723251, -0.049971133509102664, 0.003576941187496189,
0.14182576205856365]
- [0.0774477207917058, 0.09007073705762021, -0.3047220063293013, 0.2767624549859105,
0.20289396030628148]
- [-0.09902980483670844, -0.08061846818727973, 0.25853170692250554, -0.12086330214608881,
-0.11085207725068251]
- [-0.061710792028537534, -0.06244151779954751, 0.12007654564862075, 0.0025063746277943564,
-0.1567967473145572]
- [-0.002736973749965403, -0.009005721984277787, -0.00046003295909181354, -0.008550426472005344,
-0.053754646789681754]
- [0.030987327588710728, 0.03972680066723246, -0.04997113350910248, 0.0035769411874962344,
0.1418257620585633]
- numpy.ndarray:
dtype: float64
values:
- [-0.06791915236220136, -0.11357937659088249, 0.379553926040543, -0.21784979894046527,
-0.229776950899381]
- [0.11596642335411327, 0.14914956804629298, -0.13357508376686894, -0.008916063072034729,
0.3484153673774827]
- [0.01173081754742677, 0.019273800531955328, 0.04142658345867112, -0.03534658856098203,
0.02316491010895624]
- [0.007328911075541722, 0.005536509132796604, -0.022456082950666895, 0.03611543477693201,
-0.03851433900140671]
- [-0.010589894686551596, -0.010626616553723715, -0.0005431056456617233, -0.02556747670016042,
0.049848888189290286]
- [-0.06791915236220235, -0.11357937659088102, 0.37955392604054394, -0.21784979894046635,
-0.22977695089938127]
- [0.11596642335411328, 0.14914956804629287, -0.13357508376686902, -0.008916063072034974,
0.3484153673774836]
- [0.011730817547426673, 0.019273800531955612, 0.0414265834586712, -0.035346588560982,
0.02316491010895583]
- [0.007328911075541707, 0.005536509132796312, -0.022456082950666856, 0.03611543477693187,
-0.038514339001406585]
- [-0.010589894686551544, -0.010626616553723532, -0.000543105645661794, -0.025567476700160314,
0.04984888818929034]
- numpy.ndarray:
dtype: float64
values:
- [0.07276211427780344, -0.015719557685580397, 0.07428592814590271, -0.10369861539249554,
0.02475347368832667]
- [-0.05607105449779084, -0.08896207276035853, 0.2763822539752058, -0.23711255828384722,
0.07372294122304912]
- [-0.007391294007754002, -0.048741797963871694, -0.6291239733858526, 0.46816276521577743,
0.09251699239092943]
- [-0.007110224931879211, -0.05623317735897968, -0.366066585676203, -0.013297798115225577,
0.649103317749257]
- [0.0023355150085563733, -0.02156115126448696, 0.09096243479437824, -0.3843882349306287,
0.6616477207948644]
- [0.07276211427780357, -0.0157195576855797, 0.07428592814590385, -0.10369861539249735,
0.024753473688328077]
- [-0.05607105449779142, -0.08896207276035666, 0.27638225397521243, -0.2371125582838589,
0.07372294122306285]
- [-0.007391294007753122, -0.048741797963875705, -0.6291239733858526, 0.46816276521577677,
0.09251699239093385]
- [-0.007110224931878467, -0.05623317735898056, -0.36606658567620365, -0.013297798115225407,
0.6491033177492604]
- [0.002335515008556511, -0.021561151264484414, 0.09096243479437888, -0.38438823493062646,
0.6616477207948602]
type: classic
This diff is collapsed.
......@@ -111,6 +111,31 @@ class Hankel(object):
else:
raise ValueError("At least sample_instance or lhankel has to be not None.")
def __eq__(self, other):
print("Hankel equality check")
if self.version != other.version:
print("different versions")
return False
if self.partial != other.partial:
return False
if self.sparse != other.sparse:
return False
if self.build_from_sample != other.build_from_sample:
return False
if self.nbL != other.nbL:
return False
if self.nbEx != other.nbEx:
return False
if len(self.lhankel) != len(other.lhankel):
return False
for lh1, lh2 in zip(self.lhankel, other.lhankel):
if (lh1 != lh2).nnz > 0:
return False
return True
def __ne__(self, other):
return not self.__eq__(other)
@property
def nbL(self):
"""Number of letters"""
......
......@@ -35,12 +35,11 @@
# ######### COPYRIGHT #########
"""This module contains the Serializer class
"""
import numpy as np
from splearn.automaton import Automaton
from splearn.hankel import Hankel
from numpy.f2py.common_rules import findcommonblocks
import scipy.sparse as sps
class Serializer(object):
""" Serializer is an helping object for data serialization
......@@ -49,9 +48,16 @@ class Serializer(object):
@staticmethod
def __serialize(data):
if data is None or isinstance(data, (bool, int, float, str)):
if type(data).__module__ == "numpy":
return np.asscalar(data)
return data
if isinstance(data, list):
return [Serializer.__serialize(val) for val in data]
if isinstance(data, sps.dok_matrix):
k_str = "({0:d},{1:d})"
return {"scipy.dok_matrix": {"shape" : Serializer.__serialize(data.shape), "dtype": str(data.dtype),
"values" : Serializer.__serialize(dict(zip([k_str.format(i,j) for (i,j) in data.keys()],
data.values())))}}
if isinstance(data, dict):
if all(isinstance(k, str) for k in data):
return {k: Serializer.__serialize(v) for k, v in data.items()}
......@@ -71,7 +77,7 @@ class Serializer(object):
if isinstance(data, Hankel):
data_dict = {"nbL":data.nbL, "lhankel" : data.lhankel, "version" : data.version,
"partial" : data.partial, "sparse" : data.sparse,
"build_from_sample" : data.build_from_sample, "ndEx" : data.nbEx}
"build_from_sample" : data.build_from_sample, "nbEx" : data.nbEx}
return {"hankel" : Serializer.__serialize(data_dict)}
raise TypeError("Type %s is not serializabled" % type(data))
......@@ -83,11 +89,26 @@ class Serializer(object):
return tuple(data_str["tuple"])
if "set" in data_str:
return set(data_str["set"])
if "scipy.dok_matrix" in data_str:
data = data_str["scipy.dok_matrix"]
keys = {"shape", "dtype", "values"}
if not keys.issubset(set(data.keys())):
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
values = Serializer.__restore_json(data["values"])
shape = Serializer.__restore_json(data["shape"])
dok = sps.dok_matrix(shape, dtype=data["dtype"])
for k, val in values.items():
k = k.replace("(","").replace(")","")
ind1, ind2 = k.split(",")
dok[(int(ind1), int(ind2))] = val
return dok
if "numpy.ndarray" in data_str:
data = data_str["numpy.ndarray"]
keys = {"values", "dtype"}
if not keys.issubset(set(data.keys())):
raise ValueError("The input data string (" + data_str +
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
return np.array(data["values"], dtype=data["dtype"])
......@@ -95,16 +116,16 @@ class Serializer(object):
data = Serializer.__restore_json(data_str["automaton"])
keys = {"nbL", "nbS", "initial", "final", "transitions", "type"}
if not keys.issubset(set(data.keys())):
raise ValueError("The input data string (" + data_str +
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
return Automaton(nbL=data["nbL"], nbS=data["nbS"], initial=data["initial"], final=data["final"],
transitions=data["transitions"], type=data["type"])
if "hankel" in data_str:
data = Serializer.__restore_json(data_str["hankel"])
keys = {"nbL", "lhankel", "version", "partial", "sparse", "build_from_sample", "ndEx"}
keys = {"nbL", "lhankel", "version", "partial", "sparse", "build_from_sample", "nbEx"}
if not keys.issubset(set(data.keys())):
raise ValueError("The input data string (" + data_str +
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
H = Hankel(version=data["version"], partial=data["partial"], sparse=data["sparse"],
......@@ -129,11 +150,30 @@ class Serializer(object):
return tuple(data_str["tuple"])
if "set" in data_str:
return set(data_str["set"])
if "scipy.dok_matrix" in data_str:
data = data_str["scipy.dok_matrix"]
keys = {"shape", "dtype", "values"}
if not keys.issubset(set(data.keys())):
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
values = Serializer.__restore_json(data["values"])
shape = Serializer.__restore_json(data["shape"])
dok = sps.dok_matrix(shape, dtype=data["dtype"])
for k, val in values.items():
k = k.replace("(","").replace(")","")
ind1, ind2 = k.split(",")
dok[(int(ind1), int(ind2))] = val
return dok
if "numpy.ndarray" in data_str:
data = data_str["numpy.ndarray"]
keys = {"values", "dtype"}
if data is None:
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
if not keys.issubset(set(data.keys())):
raise ValueError("The input data string (" + data_str +
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
return np.array(data["values"], dtype=data["dtype"])
......@@ -141,7 +181,7 @@ class Serializer(object):
data = Serializer.__restore_yaml(data_str["automaton"])
keys = {"nbL", "nbS", "initial", "final", "transitions", "type"}
if not keys.issubset(set(data.keys())):
raise ValueError("The input data string (" + data_str +
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
return Automaton(nbL=data["nbL"], nbS=data["nbS"], initial=Serializer.__restore_yaml(data["initial"]),
......@@ -150,9 +190,9 @@ class Serializer(object):
type=data["type"])
if "hankel" in data_str:
data = Serializer.__restore_json(data_str["hankel"])
keys = {"nbL", "lhankel", "version", "partial", "sparse", "build_from_sample", "ndEx"}
keys = {"nbL", "lhankel", "version", "partial", "sparse", "build_from_sample", "nbEx"}
if not keys.issubset(set(data.keys())):
raise ValueError("The input data string (" + data_str +
raise ValueError("The input data string (" + str(data_str) +
") should contain the following keys : \"" +
'\", \"'.join(keys) + "\"")
H = Hankel(version=data["version"], partial=data["partial"], sparse=data["sparse"],
......
......@@ -36,6 +36,8 @@
import unittest
import numpy as np
import os
from collections import deque
import yaml
from splearn.automaton import Automaton
from splearn.hankel import Hankel
......@@ -81,21 +83,19 @@ class UnitaryTest(unittest.TestCase):
for f in self.formats:
os.remove(get_dataset_path(self.input_file + "_hankel" + "." + f))
# def testReadWriteRealHankel(self):
# adr = get_dataset_path("3.pautomac.train")
# data = load_data_sample(adr=adr)
# X = data.data
# sp = Spectral()
# sp = sp.fit(X)
# H = Hankel( sample_instance=X.sample,
# lrows=6, lcolumns=6, version="classic",
# partial=True, sparse=True, mode_quiet=True)
# for f in self.formats:
# Hankel.write(H, get_dataset_path("3.pautomac.train" + "_hankel" + "." + f), format=f)
# Hb = Hankel.read(get_dataset_path("3.pautomac.train" + "_hankel" + "." + f), format = f)
# self.assertEqual(H, Hb)
# for f in self.formats:
# os.remove(get_dataset_path("3.pautomac.train" + "_hankel" + "." + f))
def testReadWriteRealHankel(self):
adr = get_dataset_path("3.pautomac.train")
data = load_data_sample(adr=adr)
X = data.data
sp = Spectral()
sp = sp.fit(X)
H = sp.hankel
for f in self.formats:
Hankel.write(H, get_dataset_path("3.pautomac.train" + "_hankel" + "." + f), format=f)
Hb = Hankel.read(get_dataset_path("3.pautomac.train" + "_hankel" + "." + f), format = f)
self.assertTrue(H == Hb)
for f in self.formats:
os.remove(get_dataset_path("3.pautomac.train" + "_hankel" + "." + f))
def testOthersSerializationTypes(self):
data = [{'a' : 10, 40 : 'gu'}, {'toto', 5, 2.5, 'b'}, ('gh', 25, 'ko', 1.0)]
......@@ -114,6 +114,27 @@ class UnitaryTest(unittest.TestCase):
self.assertEqual(data, data_json)
self.assertEqual(data, data_yaml)
def testBadTypeSerialieationException(self):
with self.assertRaises(TypeError):
Serializer.data_to_json(deque('ghi'))
def testBadDataException(self):
yamlstr = "- scipy.dok_matrix:\n shape:\n tuple: [1, 1]\n values: {'(0,0)': 1.0}"
with self.assertRaises(ValueError):
Serializer.yaml_to_data(yamlstr)
jsonstr = "{\"scipy.dok_matrix\":{}}"
with self.assertRaises(ValueError):
Serializer.json_to_data(jsonstr)
yamlstr = "- numpy.ndarray:"
with self.assertRaises(ValueError):
Serializer.yaml_to_data(yamlstr)
yamlstr = "- numpy.ndarray:\n dtype: float64"
with self.assertRaises(ValueError):
Serializer.yaml_to_data(yamlstr)
jsonstr = "{\"numpy.ndarray\":{}}"
with self.assertRaises(ValueError):
Serializer.json_to_data(jsonstr)
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment