Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
skais
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Raphael Sturgis
skais
Merge requests
!10
Resolve "Image creation bugs with 0 size windows"
Code
Review changes
Check out branch
Download
Patches
Plain diff
Closed
Resolve "Image creation bugs with 0 size windows"
21-image-creation-bugs-with-0-size-windows
into
main
Overview
0
Commits
27
Pipelines
0
Changes
27
Closed
Raphael Sturgis
requested to merge
21-image-creation-bugs-with-0-size-windows
into
main
2 years ago
Overview
0
Commits
27
Pipelines
0
Changes
27
Expand
Closes
#21 (closed)
0
0
Merge request reports
Compare
main
main (base)
and
latest version
latest version
76df9c7d
27 commits,
2 years ago
27 files
+
1140
−
123
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
27
Search (e.g. *.vue) (Ctrl+P)
skais/ais/ais_points.py
+
84
−
61
Options
@@ -3,38 +3,6 @@ import pandas as pd
from
scipy.stats
import
stats
# def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None):
# n_sample = len(df.index)
# result = []
# work_df = df.copy()
#
# index = 0
# while index < n_sample:
# i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit)
# trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time)
# if len(trajectory.df.index) > min_size:
# result.append(trajectory)
# work_df = work_df[i:]
# index += i
#
# return result
#
#
# @jit(nopython=True)
# def compute_trajectory(times, time_gap, size_limit):
# n_samples = len(times)
#
# previous_date = times[0]
#
# i = 0
# for i in range(size_limit):
# if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap):
# return i
# previous_date = times[i]
#
# return i + 1
class
AISPoints
:
# Todo: Should be more elegant
@@ -73,36 +41,91 @@ class AISPoints:
self
.
df
=
self
.
df
[
self
.
df
[
"
heading
"
]
<=
360
]
self
.
df
=
self
.
df
[
self
.
df
[
"
heading
"
]
>=
0
]
def
normalize
(
self
,
features
,
normalization_type
=
"
min-max
"
):
normalization_dict
=
{}
if
normalization_type
==
"
min-max
"
:
for
f
in
features
:
minimum
=
self
.
df
[
f
].
min
()
maximum
=
self
.
df
[
f
].
max
()
diff
=
(
maximum
-
minimum
)
if
diff
==
0
:
print
(
"
Warning: diff = %d
"
,
diff
)
diff
=
1
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
minimum
)
/
diff
normalization_dict
[
f
"
{
f
}
_minimum
"
]
=
minimum
normalization_dict
[
f
"
{
f
}
_maximum
"
]
=
maximum
elif
normalization_type
==
"
standardization
"
:
normalisation_factors
=
(
"
standardization
"
,
{})
for
f
in
features
:
mean
=
self
.
df
[
f
].
mean
()
std
=
self
.
df
[
f
].
std
()
if
std
==
0
:
print
(
"
Warning: std = %d
"
,
std
)
std
=
1
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
mean
)
/
std
normalization_dict
[
f
"
{
f
}
_mean
"
]
=
mean
normalization_dict
[
f
"
{
f
}
_std
"
]
=
std
def
normalize
(
self
,
min_max_features
=
(),
standardization_features
=
(),
third_quartile_features
=
(),
divide_by_value
=
(),
divide_by_max
=
(),
normalization_dict
=
None
):
if
normalization_dict
is
None
:
normalization_dict
=
{}
for
f
in
min_max_features
:
if
f
in
self
.
df
.
columns
:
normalization_dict
[
f
]
=
{
'
type
'
:
'
min-max
'
}
minimum
=
self
.
df
[
f
].
min
()
maximum
=
self
.
df
[
f
].
max
()
diff
=
(
maximum
-
minimum
)
if
diff
==
0
:
print
(
"
Warning: diff = 0
"
)
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
minimum
)
else
:
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
minimum
)
/
diff
normalization_dict
[
f
][
"
minimum
"
]
=
minimum
normalization_dict
[
f
][
"
maximum
"
]
=
maximum
for
f
in
standardization_features
:
if
f
in
self
.
df
.
columns
:
normalization_dict
[
f
]
=
{
'
type
'
:
'
standardization
'
}
mean
=
self
.
df
[
f
].
mean
()
std
=
self
.
df
[
f
].
std
()
if
std
==
0
:
print
(
"
Warning: std = %d
"
,
std
)
std
=
1
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
mean
)
/
std
normalization_dict
[
f
][
"
mean
"
]
=
mean
normalization_dict
[
f
][
"
std
"
]
=
std
for
f
in
third_quartile_features
:
if
f
in
self
.
df
.
columns
:
normalization_dict
[
f
]
=
{
'
type
'
:
'
3rd quartile
'
}
third_quartile
=
self
.
df
[
f
].
quantile
(
0.75
)
if
third_quartile
==
0
:
print
(
"
Warning: third quartile = %d
"
,
third_quartile
)
third_quartile
=
1
self
.
df
[
f
]
=
self
.
df
[
f
]
/
third_quartile
normalization_dict
[
f
][
"
value
"
]
=
third_quartile
for
t
in
divide_by_value
:
f
=
t
[
0
]
value
=
t
[
1
]
if
f
in
self
.
df
.
columns
:
if
value
!=
0
:
normalization_dict
[
f
]
=
{
'
type
'
:
'
divide by value
'
,
'
value
'
:
value
}
self
.
df
[
f
]
=
self
.
df
[
f
]
/
value
else
:
print
(
"
Warning: dividing by 0
"
)
for
f
in
divide_by_max
:
if
f
in
self
.
df
.
columns
:
maximum
=
self
.
df
[
f
].
max
()
normalization_dict
[
f
]
=
{
'
type
'
:
'
divide by max
'
,
'
maximum
'
:
maximum
}
self
.
df
[
f
]
=
self
.
df
[
f
]
/
maximum
else
:
raise
ValueError
(
f
"
{
normalization_type
}
not a valid normalization method. Must be on of [min-max,
"
f
"
standardization]
"
)
return
normalization_type
,
normalization_dict
for
f
in
normalization_dict
:
if
f
in
self
.
df
.
columns
:
if
normalization_dict
[
f
][
'
type
'
]
==
'
min-max
'
:
minimum
=
normalization_dict
[
f
][
"
minimum
"
]
maximum
=
normalization_dict
[
f
][
"
maximum
"
]
diff
=
(
maximum
-
minimum
)
if
diff
==
0
:
print
(
"
Warning: diff = 0
"
)
diff
=
1
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
minimum
)
/
diff
elif
normalization_dict
[
f
][
'
type
'
]
==
"
standardization
"
:
mean
=
normalization_dict
[
f
][
"
mean
"
]
std
=
normalization_dict
[
f
][
"
std
"
]
if
std
==
0
:
print
(
"
Warning: std = 0
"
)
std
=
1
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
mean
)
/
std
elif
normalization_dict
[
f
][
'
type
'
]
==
"
3rd quartile
"
:
third_quartile
=
normalization_dict
[
f
][
"
value
"
]
self
.
df
[
f
]
=
self
.
df
[
f
]
/
third_quartile
elif
normalization_dict
[
f
][
'
type
'
]
==
"
divide by value
"
:
value
=
normalization_dict
[
f
][
"
value
"
]
self
.
df
[
f
]
=
self
.
df
[
f
]
/
value
elif
normalization_dict
[
f
][
'
type
'
]
==
"
divide by max
"
:
maximum
=
normalization_dict
[
f
][
"
maximum
"
]
self
.
df
[
f
]
=
self
.
df
[
f
]
/
maximum
else
:
raise
ValueError
(
f
"
{
normalization_dict
[
f
][
'
type
'
]
}
not a valid normalization method. Must be on of [min-max,
"
f
"
standardization, 3rd quartile, divide by value]
"
)
return
normalization_dict
# New features
def
compute_drift
(
self
):
Loading