Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
skais
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Raphael Sturgis
skais
Merge requests
!1
Develop
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Develop
develop
into
main
Overview
0
Commits
12
Pipelines
0
Changes
21
Merged
Raphael Sturgis
requested to merge
develop
into
main
3 years ago
Overview
0
Commits
12
Pipelines
0
Changes
21
Expand
Make skais installable
0
0
Merge request reports
Compare
main
main (base)
and
latest version
latest version
bf3d0348
12 commits,
3 years ago
21 files
+
1677
−
0
Side-by-side
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
21
Search (e.g. *.vue) (Ctrl+P)
skais/ais/ais_points.py
0 → 100644
+
217
−
0
Options
import
pickle
from
datetime
import
datetime
import
pandas
as
pd
import
numpy
as
np
from
numba
import
jit
from
scipy.stats
import
stats
from
skais.ais.ais_trajectory
import
AISTrajectory
def
compute_trajectories
(
df
,
time_gap
,
min_size
=
50
,
size_limit
=
500
,
interpolation_time
=
None
):
n_sample
=
len
(
df
.
index
)
result
=
[]
work_df
=
df
.
copy
()
index
=
0
while
index
<
n_sample
:
i
=
compute_trajectory
(
df
[
'
ts_sec
'
][
index
:].
to_numpy
(),
time_gap
,
size_limit
)
trajectory
=
AISTrajectory
(
work_df
[:
i
],
interpolation_time
=
interpolation_time
)
if
len
(
trajectory
.
df
.
index
)
>
min_size
:
result
.
append
(
trajectory
)
work_df
=
work_df
[
i
:]
index
+=
i
return
result
@jit
(
nopython
=
True
)
def
compute_trajectory
(
times
,
time_gap
,
size_limit
):
n_samples
=
len
(
times
)
previous_date
=
times
[
0
]
i
=
0
for
i
in
range
(
size_limit
):
if
i
>=
n_samples
or
((
times
[
i
]
-
previous_date
)
/
60
>
time_gap
):
return
i
previous_date
=
times
[
i
]
return
i
+
1
class
AISPoints
:
def
__init__
(
self
,
df
=
None
):
if
'
ts
'
in
df
:
df
[
'
ts
'
]
=
pd
.
to_datetime
(
df
.
ts
)
df
=
df
.
sort_values
([
'
mmsi
'
,
'
ts
'
])
self
.
df
=
df
@staticmethod
def
load_from_csv
(
file_name
):
df
=
pd
.
read_csv
(
file_name
)
ais_points
=
AISPoints
(
df
)
return
ais_points
def
remove_outliers
(
self
,
features
,
rank
=
4
):
if
rank
<=
0
:
raise
ValueError
(
f
"
Rank is equal to
{
rank
}
, must be positive and superior to 0
"
)
for
feature
in
features
:
self
.
df
=
self
.
df
.
drop
(
self
.
df
[(
np
.
abs
(
stats
.
zscore
(
self
.
df
[
feature
]))
>
rank
)].
index
)
def
normalize
(
self
,
features
,
normalization_type
=
"
min-max
"
):
normalization_dict
=
{}
if
normalization_type
==
"
min-max
"
:
for
f
in
features
:
minimum
=
self
.
df
[
f
].
min
()
maximum
=
self
.
df
[
f
].
max
()
diff
=
(
maximum
-
minimum
)
if
diff
==
0
:
print
(
"
Warning: diff = %d
"
,
diff
)
diff
=
1
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
minimum
)
/
diff
normalization_dict
[
f
"
{
f
}
_minimum
"
]
=
minimum
normalization_dict
[
f
"
{
f
}
_maximum
"
]
=
maximum
elif
normalization_type
==
"
standardization
"
:
normalisation_factors
=
(
"
standardization
"
,
{})
for
f
in
features
:
mean
=
self
.
df
[
f
].
mean
()
std
=
self
.
df
[
f
].
std
()
if
std
==
0
:
print
(
"
Warning: std = %d
"
,
std
)
std
=
1
self
.
df
[
f
]
=
(
self
.
df
[
f
]
-
mean
)
/
std
normalization_dict
[
f
"
{
f
}
_mean
"
]
=
mean
normalization_dict
[
f
"
{
f
}
_std
"
]
=
std
else
:
raise
ValueError
(
f
"
{
normalization_type
}
not a valid normalization method. Must be on of [min-max,
"
f
"
standardization]
"
)
return
normalization_type
,
normalization_dict
def
histogram
(
self
,
features
,
bins
=
10
,
ranges
=
None
,
label
=
None
,
y_field
=
'
label
'
):
if
label
is
not
None
:
tmp
=
self
.
df
[
self
.
df
[
y_field
]
==
label
]
else
:
tmp
=
self
.
df
dat
=
tmp
[
features
]
h
=
np
.
histogramdd
(
dat
.
to_numpy
(),
bins
,
ranges
)[
0
]
if
h
.
sum
()
==
0
:
return
np
.
full
(
h
.
shape
,
1
/
h
.
size
)
else
:
return
h
/
h
.
sum
()
def
disjointed_histogram
(
self
,
features
,
bins
,
ranges
,
label
=
None
,
y_field
=
'
label
'
):
if
label
is
not
None
:
tmp
=
self
.
df
[
self
.
df
[
y_field
]
==
label
]
else
:
tmp
=
self
.
df
if
type
(
bins
)
==
int
:
bins
=
[
bins
for
_
in
features
]
histograms
=
[]
for
feature
,
bin
,
f_range
in
zip
(
features
,
bins
,
ranges
):
histograms
.
append
(
np
.
histogram
(
tmp
[
feature
],
bin
,
f_range
))
return
histograms
def
compute_diff_heading_cog
(
self
):
self
.
df
[
"
diff
"
]
=
self
.
df
.
apply
(
lambda
x
:
180
-
abs
(
abs
(
x
[
'
heading
'
]
-
x
[
'
cog
'
])
-
180
),
axis
=
1
)
def
clean_angles
(
self
):
self
.
df
=
self
.
df
[
self
.
df
[
"
cog
"
]
<=
360
]
self
.
df
=
self
.
df
[
self
.
df
[
"
cog
"
]
>=
0
]
self
.
df
=
self
.
df
[
self
.
df
[
"
heading
"
]
<=
360
]
self
.
df
=
self
.
df
[
self
.
df
[
"
heading
"
]
>=
0
]
def
histogram_joint_x_y
(
self
,
x_fields
=
[
"
sog
"
,
"
diff
"
],
x_nb_bins
=
10
,
x_range
=
[[
0
,
30
],
[
0
,
180
]]
,
y_nb_bins
=
2
,
y_fields
=
'
label
'
,
y_range
=
[
0
,
1
]):
return
self
.
histogram
(
x_fields
+
[
y_fields
],
bins
=
[
x_nb_bins
for
i
in
x_fields
]
+
[
y_nb_bins
],
ranges
=
x_range
+
[
y_range
])
def
histogram_x_knowing_y
(
self
,
x_fields
=
[
"
sog
"
,
"
diff
"
],
x_nb_bins
=
10
,
x_range
=
[[
0
,
30
],
[
0
,
180
]]
,
y_nb_bins
=
2
,
y_field
=
'
label
'
):
result
=
[]
for
i
in
range
(
y_nb_bins
):
layer
=
self
.
histogram
(
x_fields
,
bins
=
x_nb_bins
,
ranges
=
x_range
,
label
=
i
,
y_field
=
y_field
)
result
.
append
(
layer
)
return
np
.
stack
(
result
,
axis
=
len
(
x_fields
))
def
disjointed_histogram_x_knowing_y
(
self
,
features
,
x_nb_bins
=
10
,
x_range
=
[[
0
,
1
]]
,
y_nb_bins
=
4
,
y_field
=
'
label
'
):
out
=
[]
for
feature
,
f_range
in
zip
(
features
,
x_range
):
result
=
[]
for
i
in
range
(
y_nb_bins
):
layer
,
_
=
np
.
histogram
(
self
.
df
[
self
.
df
[
y_field
]
==
i
][
feature
].
to_numpy
(),
bins
=
x_nb_bins
,
range
=
f_range
)
if
layer
.
sum
()
==
0
:
layer
=
np
.
full
(
layer
.
shape
,
1
)
result
.
append
(
layer
)
out
.
append
(
np
.
stack
(
result
))
return
out
def
histogram_y_knowing_x
(
self
,
x_fields
=
[
"
sog
"
,
"
diff
"
],
x_nb_bins
=
10
,
x_range
=
[[
0
,
30
],
[
0
,
180
]]
,
y_nb_bins
=
2
,
y_field
=
'
label
'
,
y_range
=
[
0
,
1
]):
h_joint
=
self
.
histogram_joint_x_y
(
x_fields
,
x_nb_bins
,
x_range
,
y_nb_bins
,
y_field
,
y_range
)
y_hist
=
self
.
histogram
(
features
=
y_field
,
bins
=
y_nb_bins
,
ranges
=
[
y_range
])
result
=
np
.
zeros
(
h_joint
.
shape
)
for
idx
,
x
in
np
.
ndenumerate
(
h_joint
):
if
h_joint
[
idx
[:
-
1
]].
sum
()
==
0
:
result
[
idx
]
=
y_hist
[
idx
[
-
1
]]
else
:
result
[
idx
]
=
x
/
h_joint
[
idx
[:
-
1
]].
sum
()
return
result
def
get_trajectories
(
self
,
time_gap
=
30
,
min_size
=
50
,
interpolation_time
=
None
):
if
'
ts
'
in
self
.
df
:
self
.
df
[
'
ts
'
]
=
pd
.
to_datetime
(
self
.
df
[
'
ts
'
],
infer_datetime_format
=
True
)
self
.
df
[
'
ts_sec
'
]
=
self
.
df
[
'
ts
'
].
apply
(
lambda
x
:
datetime
.
timestamp
(
x
))
dat
=
self
.
df
else
:
raise
ValueError
trajectories
=
[]
for
mmsi
in
dat
.
mmsi
.
unique
():
trajectories
+=
compute_trajectories
(
dat
[
dat
[
'
mmsi
'
]
==
mmsi
],
time_gap
,
min_size
=
min_size
,
interpolation_time
=
interpolation_time
)
return
trajectories
def
describe
(
self
):
stats
=
{
"
nb vessels
"
:
len
(
self
.
df
.
mmsi
.
unique
()),
"
nb points
"
:
len
(
self
.
df
.
index
),
"
average speed
"
:
self
.
df
[
'
sog
'
].
mean
(),
"
average diff
"
:
self
.
df
[
'
diff
'
].
mean
()
}
for
n
in
np
.
sort
(
self
.
df
[
'
label
'
].
unique
()):
stats
[
f
"
labeled
{
n
}
"
]
=
len
(
self
.
df
[
self
.
df
[
'
label
'
]
==
n
].
index
)
return
stats
@staticmethod
def
fuse
(
*
args
):
if
len
(
args
)
==
1
:
if
not
isinstance
(
args
[
0
],
list
):
return
args
[
0
]
else
:
table
=
args
[
0
]
else
:
table
=
args
dfs
=
[]
for
aisPosition
in
table
:
dfs
.
append
(
aisPosition
.
df
)
return
AISPoints
(
pd
.
concat
(
dfs
).
reindex
())
Loading