Skip to content
Snippets Groups Projects

Develop

Merged Raphael Sturgis requested to merge develop into main
21 files
+ 1677
0
Compare changes
  • Side-by-side
  • Inline
Files
21
+ 217
0
import pickle
from datetime import datetime
import pandas as pd
import numpy as np
from numba import jit
from scipy.stats import stats
from skais.ais.ais_trajectory import AISTrajectory
def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None):
n_sample = len(df.index)
result = []
work_df = df.copy()
index = 0
while index < n_sample:
i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit)
trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time)
if len(trajectory.df.index) > min_size:
result.append(trajectory)
work_df = work_df[i:]
index += i
return result
@jit(nopython=True)
def compute_trajectory(times, time_gap, size_limit):
n_samples = len(times)
previous_date = times[0]
i = 0
for i in range(size_limit):
if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap):
return i
previous_date = times[i]
return i + 1
class AISPoints:
def __init__(self, df=None):
if 'ts' in df:
df['ts'] = pd.to_datetime(df.ts)
df = df.sort_values(['mmsi', 'ts'])
self.df = df
@staticmethod
def load_from_csv(file_name):
df = pd.read_csv(file_name)
ais_points = AISPoints(df)
return ais_points
def remove_outliers(self, features, rank=4):
if rank <= 0:
raise ValueError(f"Rank is equal to {rank}, must be positive and superior to 0")
for feature in features:
self.df = self.df.drop(self.df[(np.abs(stats.zscore(self.df[feature])) > rank)].index)
def normalize(self, features, normalization_type="min-max"):
normalization_dict = {}
if normalization_type == "min-max":
for f in features:
minimum = self.df[f].min()
maximum = self.df[f].max()
diff = (maximum - minimum)
if diff == 0:
print("Warning: diff = %d", diff)
diff = 1
self.df[f] = (self.df[f] - minimum) / diff
normalization_dict[f"{f}_minimum"] = minimum
normalization_dict[f"{f}_maximum"] = maximum
elif normalization_type == "standardization":
normalisation_factors = ("standardization", {})
for f in features:
mean = self.df[f].mean()
std = self.df[f].std()
if std == 0:
print("Warning: std = %d", std)
std = 1
self.df[f] = (self.df[f] - mean) / std
normalization_dict[f"{f}_mean"] = mean
normalization_dict[f"{f}_std"] = std
else:
raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, "
f"standardization]")
return normalization_type, normalization_dict
def histogram(self, features, bins=10, ranges=None, label=None, y_field='label'):
if label is not None:
tmp = self.df[self.df[y_field] == label]
else:
tmp = self.df
dat = tmp[features]
h = np.histogramdd(dat.to_numpy(), bins, ranges)[0]
if h.sum() == 0:
return np.full(h.shape, 1 / h.size)
else:
return h / h.sum()
def disjointed_histogram(self, features, bins, ranges, label=None, y_field='label'):
if label is not None:
tmp = self.df[self.df[y_field] == label]
else:
tmp = self.df
if type(bins) == int:
bins = [bins for _ in features]
histograms = []
for feature, bin, f_range in zip(features, bins, ranges):
histograms.append(np.histogram(tmp[feature], bin, f_range))
return histograms
def compute_diff_heading_cog(self):
self.df["diff"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180),
axis=1)
def clean_angles(self):
self.df = self.df[self.df["cog"] <= 360]
self.df = self.df[self.df["cog"] >= 0]
self.df = self.df[self.df["heading"] <= 360]
self.df = self.df[self.df["heading"] >= 0]
def histogram_joint_x_y(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]]
, y_nb_bins=2, y_fields='label', y_range=[0, 1]):
return self.histogram(x_fields + [y_fields],
bins=[x_nb_bins for i in x_fields] + [y_nb_bins],
ranges=x_range + [y_range])
def histogram_x_knowing_y(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]]
, y_nb_bins=2, y_field='label'):
result = []
for i in range(y_nb_bins):
layer = self.histogram(x_fields, bins=x_nb_bins, ranges=x_range, label=i, y_field=y_field)
result.append(layer)
return np.stack(result, axis=len(x_fields))
def disjointed_histogram_x_knowing_y(self, features, x_nb_bins=10, x_range=[[0, 1]]
, y_nb_bins=4, y_field='label'):
out = []
for feature, f_range in zip(features, x_range):
result = []
for i in range(y_nb_bins):
layer, _ = np.histogram(self.df[self.df[y_field] == i][feature].to_numpy(), bins=x_nb_bins,
range=f_range)
if layer.sum() == 0:
layer = np.full(layer.shape, 1)
result.append(layer)
out.append(np.stack(result))
return out
def histogram_y_knowing_x(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]]
, y_nb_bins=2, y_field='label', y_range=[0, 1]):
h_joint = self.histogram_joint_x_y(x_fields, x_nb_bins, x_range, y_nb_bins, y_field, y_range)
y_hist = self.histogram(features=y_field, bins=y_nb_bins, ranges=[y_range])
result = np.zeros(h_joint.shape)
for idx, x in np.ndenumerate(h_joint):
if h_joint[idx[:-1]].sum() == 0:
result[idx] = y_hist[idx[-1]]
else:
result[idx] = x / h_joint[idx[:-1]].sum()
return result
def get_trajectories(self, time_gap=30, min_size=50, interpolation_time=None):
if 'ts' in self.df:
self.df['ts'] = pd.to_datetime(self.df['ts'], infer_datetime_format=True)
self.df['ts_sec'] = self.df['ts'].apply(lambda x: datetime.timestamp(x))
dat = self.df
else:
raise ValueError
trajectories = []
for mmsi in dat.mmsi.unique():
trajectories += compute_trajectories(dat[dat['mmsi'] == mmsi], time_gap, min_size=min_size,
interpolation_time=interpolation_time)
return trajectories
def describe(self):
stats = {"nb vessels": len(self.df.mmsi.unique()),
"nb points": len(self.df.index),
"average speed": self.df['sog'].mean(),
"average diff": self.df['diff'].mean()
}
for n in np.sort(self.df['label'].unique()):
stats[f"labeled {n}"] = len(self.df[self.df['label'] == n].index)
return stats
@staticmethod
def fuse(*args):
if len(args) == 1:
if not isinstance(args[0], list):
return args[0]
else:
table = args[0]
else:
table = args
dfs = []
for aisPosition in table:
dfs.append(aisPosition.df)
return AISPoints(pd.concat(dfs).reindex())
Loading