Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scikit-splearn
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
dev
scikit-splearn
Commits
f14ae87c
There was a problem fetching the pipeline summary.
Commit
f14ae87c
authored
7 years ago
by
Denis Arrivault
Browse files
Options
Downloads
Patches
Plain Diff
populate_dictionnary rewrite in progress
parent
cb7dcfc2
Branches
Branches containing commit
No related tags found
No related merge requests found
Pipeline
#
Changes
2
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
examples/performances_calculation.py
+37
-0
37 additions, 0 deletions
examples/performances_calculation.py
splearn/spectral.py
+136
-100
136 additions, 100 deletions
splearn/spectral.py
with
173 additions
and
100 deletions
examples/performances_calculation.py
0 → 100644
+
37
−
0
View file @
f14ae87c
# -*- coding: utf-8 -*-
'''
Created on 20 févr. 2018
@author: arrivault
'''
import
sys
from
timeit
import
default_timer
as
timer
from
splearn
import
Spectral
from
splearn.tests.datasets.get_dataset_path
import
get_dataset_path
from
splearn.datasets.base
import
load_data_sample
def
test
():
adr
=
get_dataset_path
(
"
3.pautomac.train
"
)
data
=
load_data_sample
(
adr
=
adr
)
X
=
data
.
data
sp1
=
Spectral
()
start
=
timer
()
sp1
=
sp1
.
fit
(
X
)
duration
=
timer
()
-
start
print
(
"
Classic :
"
+
str
(
duration
))
sp2
=
Spectral
()
start
=
timer
()
sp2
=
sp2
.
fit_opt
(
X
)
duration
=
timer
()
-
start
print
(
"
Opt :
"
+
str
(
duration
))
if
sp1
.
hankel
==
sp2
.
hankel
:
print
(
"
Same result.
"
)
else
:
print
(
"
The result is different
"
,
file
=
sys
.
stderr
)
if
__name__
==
'
__main__
'
:
test
()
This diff is collapsed.
Click to expand it.
splearn/spectral.py
+
136
−
100
View file @
f14ae87c
...
@@ -41,15 +41,13 @@
...
@@ -41,15 +41,13 @@
from
__future__
import
division
,
print_function
from
__future__
import
division
,
print_function
import
numpy
as
np
import
numpy
as
np
import
math
import
math
import
threading
import
warnings
lock
=
threading
.
Lock
()
from
splearn.datasets.data_sample
import
SplearnArray
from
splearn.datasets.data_sample
import
SplearnArray
from
splearn.hankel
import
Hankel
from
splearn.hankel
import
Hankel
from
sklearn.base
import
BaseEstimator
from
sklearn.base
import
BaseEstimator
from
sklearn.utils
import
check_array
from
sklearn.utils
import
check_array
from
sklearn.utils.validation
import
NotFittedError
from
sklearn.utils.validation
import
NotFittedError
import
warnings
class
Spectral
(
BaseEstimator
):
class
Spectral
(
BaseEstimator
):
"""
A Spectral estimator instance
"""
A Spectral estimator instance
...
@@ -224,106 +222,135 @@ class Spectral(BaseEstimator):
...
@@ -224,106 +222,135 @@ class Spectral(BaseEstimator):
return
self
return
self
def
_populate_sample_dict
(
self
,
X
):
def
fit_opt
(
self
,
X
,
y
=
None
):
dsample
=
{}
# dictionary (word,count)
"""
Fit the model in a optimal way
- Input:
:param SplearnArray X: object of shape [n_samples,n_features]
Training data
:param ndarray y: (default value = None) not used by Spectral estimator
numpy array of shape [n_samples] Target values
- Output:
:returns: Spectral itself with an automaton attribute instanced
returns an instance of self.
:rtype: Spectral
"""
check_array
(
X
)
if
not
isinstance
(
X
,
SplearnArray
):
self
.
_hankel
=
None
self
.
_automaton
=
None
return
self
X
=
self
.
polulate_dictionnaries_opt
(
X
)
self
.
_hankel
=
Hankel
(
sample_instance
=
X
,
lrows
=
self
.
lrows
,
lcolumns
=
self
.
lcolumns
,
version
=
self
.
version
,
partial
=
self
.
partial
,
sparse
=
self
.
sparse
,
mode_quiet
=
self
.
mode_quiet
)
self
.
_automaton
=
self
.
_hankel
.
to_automaton
(
self
.
rank
,
self
.
mode_quiet
)
# for smooth option compute trigram dictionnary
if
self
.
smooth
==
1
:
self
.
trigram
=
self
.
_threegramdict
(
X
.
sample
)
return
self
def
polulate_dictionnaries_opt
(
self
,
X
):
"""
Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X
- Input:
:param SplearnArray X: object of shape [n_samples,n_features]
Training data
"""
if
not
isinstance
(
X
,
SplearnArray
):
return
X
X
.
sample
=
{}
# dictionary (word,count)
X
.
pref
=
{}
# dictionary (prefix,count)
X
.
suff
=
{}
# dictionary (suffix,count)
X
.
fact
=
{}
# dictionary (factor,count)
if
self
.
partial
:
if
isinstance
(
self
.
lrows
,
int
):
lrowsmax
=
self
.
lrows
version_rows_int
=
True
else
:
version_rows_int
=
False
lrowsmax
=
self
.
lrows
.
__len__
()
if
isinstance
(
self
.
lcolumns
,
int
):
lcolumnsmax
=
self
.
lcolumns
version_columns_int
=
True
else
:
lcolumnsmax
=
self
.
lcolumns
.
__len__
()
version_columns_int
=
False
lmax
=
lrowsmax
+
lcolumnsmax
for
line
in
range
(
X
.
shape
[
0
]):
self
.
_populate_a_word
(
X
,
line
,
lrowsmax
,
version_rows_int
,
lcolumnsmax
,
version_columns_int
,
lmax
)
else
:
for
line
in
range
(
X
.
shape
[
0
]):
for
line
in
range
(
X
.
shape
[
0
]):
self
.
_populate_a_word
(
X
,
line
)
return
X
def
_populate_a_word
(
self
,
X
,
line
,
lrowsmax
=
None
,
version_rows_int
=
None
,
lcolumnsmax
=
None
,
version_columns_int
=
None
,
lmax
=
None
):
w
=
X
[
line
,
:]
w
=
X
[
line
,
:]
w
=
w
[
w
>=
0
]
w
=
w
[
w
>=
0
]
w
=
tuple
([
int
(
x
)
for
x
in
w
[
0
:]])
w
=
tuple
([
int
(
x
)
for
x
in
w
[
0
:]])
dsample
[
w
]
=
dsample
[
w
]
+
1
if
w
in
dsample
else
1
X
.
sample
[
w
]
=
X
.
sample
.
setdefault
(
w
,
0
)
+
1
return
dsample
if
self
.
version
==
"
prefix
"
or
self
.
version
==
"
classic
"
:
# empty word treatment for prefixe, suffix, and factor dictionnaries
# def _populate_new_word(self, X, i, lrowsmax=None, version_rows_int=None,
X
.
pref
[()]
=
X
.
pref
.
setdefault
((),
0
)
+
1
# lcolumnsmax=None, version_columns_int=None, lmax=None):
if
self
.
version
==
"
suffix
"
or
self
.
version
==
"
classic
"
:
# w = X[i, :]
X
.
suff
[()]
=
X
.
suff
.
setdefault
((),
0
)
+
1
# w = w[w >= 0]
if
(
self
.
version
==
"
factor
"
or
self
.
version
==
"
suffix
"
or
# w = tuple([int(x) for x in w[0:]])
self
.
version
==
"
prefix
"
):
# with lock:
X
.
fact
[()]
=
X
.
fact
.
setdefault
((),
0
)
+
len
(
w
)
+
1
# X.sample[w] = X.sample.setdefault(w, 0) + 1
if
self
.
partial
:
# if self.version == "prefix" or self.version == "classic":
for
i
in
range
(
len
(
w
)):
# # empty word treatment for prefixe, suffix, and factor dictionnaries
if
self
.
version
==
"
classic
"
:
# with lock:
if
((
version_rows_int
and
i
+
1
<=
lrowsmax
)
or
# X.pref[()] = X.pref[()] + 1 if () in X.pref else 1
(
not
version_rows_int
and
w
[:
i
+
1
]
in
self
.
lrows
)):
# if self.version == "suffix" or self.version == "classic":
X
.
pref
[
w
[:
i
+
1
]]
=
X
.
pref
.
setdefault
(
w
[:
i
+
1
],
0
)
+
1
# with lock:
if
((
version_columns_int
and
i
+
1
<=
lcolumnsmax
)
or
# X.suff[()] = X.suff[()] + 1 if () in X.suff else 1
(
not
version_columns_int
and
w
[
-
(
i
+
1
):]
in
self
.
lcolumns
)):
# if self.version == "factor" or self.version == "suffix" \
X
.
suff
[
w
[
-
(
i
+
1
):]]
=
X
.
suff
.
setdefault
(
w
[
-
(
i
+
1
):],
0
)
+
1
# or self.version == "prefix":
elif
self
.
version
==
"
prefix
"
:
# with lock:
# dictionaries dpref is populated until
# X.fact[()] = X.fact[()] + len(w) + 1 if () in X.fact else len(w) + 1
# lmax = lrows + lcolumns
#
# dictionaries dfact is populated until lcolumns
# if self.partial:
if
(((
version_rows_int
or
version_columns_int
)
and
i
+
1
<=
lmax
)
or
# for i in range(len(w)):
(
not
version_rows_int
and
w
[:
i
+
1
]
in
self
.
lrows
)
or
# if self.version == "classic":
(
not
version_columns_int
and
w
[:
i
+
1
]
in
self
.
lcolumns
)):
# if (version_rows_int is True and
X
.
pref
[
w
[:
i
+
1
]]
=
X
.
pref
.
setdefault
(
w
[:
i
+
1
],
0
)
+
1
# i + 1 <= lrowsmax) or \
for
j
in
range
(
i
+
1
,
len
(
w
)
+
1
):
# (version_rows_int is False and
if
((
version_columns_int
and
(
j
-
i
)
<=
lmax
)
or
# w[:i + 1] in self.lrows):
(
not
version_columns_int
and
w
[
i
:
j
]
in
self
.
lcolumns
)):
# with lock:
X
.
fact
[
w
[
i
:
j
]]
=
X
.
fact
.
setdefault
(
w
[
i
:
j
],
0
)
+
1
# X.pref[w[:i + 1]] = \
elif
self
.
version
==
"
suffix
"
:
# X.pref[w[:i + 1]] + 1 if w[:i + 1] in X.pref else 1
if
(((
version_rows_int
or
version_columns_int
)
and
i
<=
lmax
)
or
# if (version_columns_int is True and i + 1 <= lcolumnsmax) or \
(
not
version_rows_int
and
w
[
-
(
i
+
1
):]
in
self
.
lrows
)
or
# (version_columns_int is False and w[-( i + 1):] in self.lcolumns):
(
not
version_columns_int
and
w
[
-
(
i
+
1
):]
in
self
.
lcolumns
)):
# with lock:
X
.
suff
[
w
[
-
(
i
+
1
):]]
=
X
.
suff
.
setdefault
(
w
[
-
(
i
+
1
):],
0
)
+
1
# X.suff[w[-(i + 1):]] = X.suff[w[-(i + 1):]] + 1 if \
for
j
in
range
(
i
+
1
,
len
(
w
)
+
1
):
# w[-(i + 1):] in X.suff else 1
if
((
version_rows_int
and
(
j
-
i
)
<=
lmax
)
or
# if self.version == "prefix":
(
not
version_rows_int
and
w
[
i
:
j
]
in
self
.
lrows
)):
# # dictionaries dpref is populated until
X
.
fact
[
w
[
i
:
j
]]
=
X
.
fact
.
setdefault
(
w
[
i
:
j
],
0
)
+
1
# # lmax = lrows + lcolumns
elif
self
.
version
==
"
factor
"
:
# # dictionaries dfact is populated until lcolumns
for
j
in
range
(
i
+
1
,
len
(
w
)
+
1
):
# if ((version_rows_int is True or
if
(((
version_rows_int
or
version_columns_int
)
and
(
j
-
i
)
<=
lmax
)
or
# version_columns_int is True) and
(
not
version_rows_int
and
w
[
i
:
j
]
in
self
.
lrows
)
or
# i + 1 <= lmax) or \
(
not
version_columns_int
and
w
[
i
:
j
]
in
self
.
lcolumns
)):
# (version_rows_int is False and
X
.
fact
[
w
[
i
:
j
]]
=
X
.
fact
.
setdefault
(
w
[
i
:
j
],
0
)
+
1
# (w[:i + 1] in self.lrows)) or \
else
:
# not partial
# (version_columns_int is False and
for
i
in
range
(
len
(
w
)):
# (w[:i + 1] in self.lcolumns)):
X
.
pref
[
w
[:
i
+
1
]]
=
X
.
pref
.
setdefault
(
w
[:
i
+
1
],
0
)
+
1
# X.pref[w[:i + 1]] = X.pref[w[:i + 1]] + 1 \
X
.
suff
[
w
[
i
:]]
=
X
.
suff
.
setdefault
(
w
[
i
:],
0
)
+
1
# if w[:i + 1] in X.pref else 1
for
j
in
range
(
i
+
1
,
len
(
w
)
+
1
):
# for j in range(i + 1, len(w) + 1):
X
.
fact
[
w
[
i
:
j
]]
=
X
.
fact
.
setdefault
(
w
[
i
:
j
],
0
)
+
1
# if (version_columns_int is True and (
# j - i) <= lmax) or \
# (version_columns_int is False and
# (w[i:j] in self.lcolumns)):
# X.fact[w[i:j]] = X.fact[w[i:j]] + 1 \
# if w[i:j] in X.fact else 1
# if self.version == "suffix":
# if ((version_rows_int is True or
# version_columns_int is True) and
# i <= lmax) or \
# (version_rows_int is False and
# (w[-(i + 1):] in self.lrows)) or \
# (version_columns_int is False and
# (w[-(i + 1):] in self.lcolumns)):
# X.suff[w[-(i + 1):]] = X.suff[w[-(i + 1):]] + 1 \
# if w[-(i + 1):] in X.suff else 1
# for j in range(i + 1, len(w) + 1):
# if (version_rows_int is True and (
# j - i) <= lmax) or \
# (version_rows_int is False and
# (w[i:j] in self.lrows)):
# X.fact[w[i:j]] = X.fact[w[i:j]] + 1 \
# if w[i:j] in X.fact else 1
# if self.version == "factor":
# for j in range(i + 1, len(w) + 1):
# if ((version_rows_int is True or
# version_columns_int is True) and
# (j - i) <= lmax) or \
# (version_rows_int is False and
# (w[i:j] in self.lrows)) or \
# (version_columns_int is False and
# (w[i:j] in self.lcolumns)):
# X.fact[w[i:j]] = \
# X.fact[w[i:j]] + 1 if w[i:j] in X.fact else 1
#
# else: # not partial
# for i in range(len(w)):
# X.pref[w[:i + 1]] = X.pref[w[:i + 1]] + 1 \
# if w[:i + 1] in X.pref else 1
# X.suff[w[i:]] = X.suff[w[i:]] + 1 if w[i:] in X.suff else 1
# for j in range(i + 1, len(w) + 1):
# X.fact[w[i:j]] = X.fact[w[i:j]] + 1 \
# if w[i:j] in X.fact else 1
def
polulate_dictionnaries
(
self
,
X
):
def
polulate_dictionnaries
(
self
,
X
):
"""
Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X
"""
Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X
...
@@ -460,6 +487,15 @@ class Spectral(BaseEstimator):
...
@@ -460,6 +487,15 @@ class Spectral(BaseEstimator):
X
.
pref
=
{}
X
.
pref
=
{}
return
X
return
X
def
_populate_sample_dict
(
self
,
X
):
dsample
=
{}
# dictionary (word,count)
for
line
in
range
(
X
.
shape
[
0
]):
w
=
X
[
line
,
:]
w
=
w
[
w
>=
0
]
w
=
tuple
([
int
(
x
)
for
x
in
w
[
0
:]])
dsample
[
w
]
=
dsample
[
w
]
+
1
if
w
in
dsample
else
1
return
dsample
@property
@property
def
trigram
(
self
):
def
trigram
(
self
):
"""
The trigram dictionary
"""
"""
The trigram dictionary
"""
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment