Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
signatures-DF
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DOLPHINFREE experiments
signatures-DF
Commits
4109715c
Commit
4109715c
authored
3 months ago
by
Loïc Lehnhoff
Browse files
Options
Downloads
Patches
Plain Diff
ZIPF correction
parent
e8a58ea6
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
WCT_analysis.ipynb
+634
-298
634 additions, 298 deletions
WCT_analysis.ipynb
WCT_analysis_utils.py
+92
-0
92 additions, 0 deletions
WCT_analysis_utils.py
with
726 additions
and
298 deletions
WCT_analysis.ipynb
+
634
−
298
View file @
4109715c
Source diff could not be displayed: it is too large. Options to address this:
view the blob
.
This diff is collapsed.
Click to expand it.
WCT_analysis_utils.py
+
92
−
0
View file @
4109715c
...
@@ -7,11 +7,14 @@ import numpy as np
...
@@ -7,11 +7,14 @@ import numpy as np
import
pandas
as
pd
import
pandas
as
pd
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
datetime
import
datetime
from
datetime
import
datetime
import
scipy.optimize
as
optimize
from
scipy.interpolate
import
interp1d
from
scipy.interpolate
import
interp1d
import
seaborn
as
sns
import
seaborn
as
sns
import
matplotlib.pyplot
as
plt
import
matplotlib.pyplot
as
plt
from
matplotlib.lines
import
Line2D
from
matplotlib.lines
import
Line2D
from
matplotlib.patches
import
Ellipse
import
matplotlib.transforms
as
transforms
from
scipy
import
stats
from
scipy
import
stats
from
scikit_posthocs
import
posthoc_dunn
from
scikit_posthocs
import
posthoc_dunn
...
@@ -650,6 +653,95 @@ def get_umap(dtw_matrix, save_to, min_dist=0.1, n_neighbors=100, verbose=True):
...
@@ -650,6 +653,95 @@ def get_umap(dtw_matrix, save_to, min_dist=0.1, n_neighbors=100, verbose=True):
if
verbose
:
if
verbose
:
print
(
f
"
\t
Successfully saved the umap results to
{
save_to
}
.
"
)
print
(
f
"
\t
Successfully saved the umap results to
{
save_to
}
.
"
)
def
confidence_ellipse
(
x
,
y
,
ax
,
n_std
=
2.0
,
**
kwargs
):
"""
Create a plot of the covariance confidence ellipse of *x* and *y*.
Parameters
----------
x, y : array-like, shape (n, )
Input data.
ax : matplotlib.axes.Axes
The axes object to draw the ellipse into.
n_std : float
The number of standard deviations to determine the ellipse
'
s radiuses.
**kwargs
Forwarded to `~matplotlib.patches.Ellipse`
"""
if
x
.
size
!=
y
.
size
:
raise
ValueError
(
"
x and y must be the same size
"
)
cov
=
np
.
cov
(
x
,
y
)
pearson
=
cov
[
0
,
1
]
/
np
.
sqrt
(
cov
[
0
,
0
]
*
cov
[
1
,
1
])
# Using a special case to obtain the eigenvalues of this
# two-dimensional dataset.
ell_radius_x
=
np
.
sqrt
(
1
+
pearson
)
ell_radius_y
=
np
.
sqrt
(
1
-
pearson
)
ellipse
=
Ellipse
((
0
,
0
),
width
=
ell_radius_x
*
2
,
height
=
ell_radius_y
*
2
,
**
kwargs
)
# Calculating the standard deviation of x from
# the squareroot of the variance and multiplying
# with the given number of standard deviations.
scale_x
=
np
.
sqrt
(
cov
[
0
,
0
])
*
n_std
scale_y
=
np
.
sqrt
(
cov
[
1
,
1
])
*
n_std
transf
=
transforms
.
Affine2D
()
\
.
rotate_deg
(
45
)
\
.
scale
(
scale_x
,
scale_y
)
\
.
translate
(
np
.
mean
(
x
),
np
.
mean
(
y
))
ellipse
.
set_transform
(
transf
+
ax
.
transData
)
return
ax
.
add_patch
(
ellipse
)
def
mandelbrot_zipf
(
rank
,
c
,
s
,
b
):
"""
Mandelbrot-Zipf law: f = c / (rank + s)^b
c: normalization constant
s: parameter to adjust the tail of the distribution
b: exponent
"""
return
c
/
(
rank
+
s
)
**
b
def
mandelbrot_law_fit
(
df
):
"""
Analyze if the given sizes follow Mandelbrot
'
s law.
Parameters:
df (DataFrame): Rank and Frequency of each element
Returns:
dict: Contains fitting parameters and goodness of fit metrics
"""
ranks
=
df
[
"
Rank
"
]
frequencies
=
df
[
"
Frequency
"
]
# Perform non-linear least squares fitting
try
:
# Initial guesses: c = max frequency, s = 1, b = 1
popt
,
pcov
=
optimize
.
curve_fit
(
mandelbrot_zipf
,
ranks
,
frequencies
,
p0
=
[
max
(
frequencies
),
1
,
1
],
bounds
=
([
0
,
0
,
0
],
[
np
.
inf
,
np
.
inf
,
3
]))
# Calculate R-squared to assess goodness of fit
residuals
=
frequencies
-
mandelbrot_zipf
(
ranks
,
*
popt
)
ss_res
=
np
.
sum
(
residuals
**
2
)
ss_tot
=
np
.
sum
((
frequencies
-
np
.
mean
(
frequencies
))
**
2
)
r_squared
=
1
-
(
ss_res
/
ss_tot
)
return
{
'
c
'
:
popt
[
0
],
# Normalization constant
'
s
'
:
popt
[
1
],
# Adjustment parameter
'
b
'
:
popt
[
2
],
# Exponent
'
r_squared
'
:
r_squared
,
'
is_fit_good
'
:
r_squared
>
0.8
# Threshold for a good fit
}
except
Exception
as
e
:
print
(
f
"
Error fitting Mandelbrot
'
s law:
{
e
}
"
)
return
None
#%%## Main #####
#%%## Main #####
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment