diff --git a/.gitignore b/.gitignore index 1c2d52b6c9c31d02d96bdbd5f5c273d8f68efcb9..b139f3e8d20a35b89f3b7ebe50a95cf38649dc15 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ .idea/* +build/ +skais.egg-info/ +*.coverage +*__pycache__* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e51877722f5f8fd2703dd650d966474e2aa3baa6..830ebea1efd20df4d1061a77678e77cc8478a75f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,7 +10,7 @@ tests: script: - pip install -r requirements.txt - pip install --no-deps . - - pytest --junitxml=report.xml + - pytest --junitxml=report.xml --cov=src -vvv artifacts: when: always reports: diff --git a/README.rst b/README.md similarity index 70% rename from README.rst rename to README.md index c0ffc58b69a1f355c60e78ad0735810e5dcba849..cb3d3abebb11155cdf65e66189f302c39fd04e41 100644 --- a/README.rst +++ b/README.md @@ -1,6 +1,9 @@ skais ===== +[](https://gitlab.lis-lab.fr/raphael.sturgis/skais/-/commits/develop) +[](https://gitlab.lis-lab.fr/raphael.sturgis/skais/-/commits/develop) + ``skais`` is a Python package for Raphael Sturgis' PhD at LIS and Searoutes. Install diff --git a/requirements.txt b/requirements.txt index 7d1581b18d12e886eca1b169a2639465adb91d63..0634b7c3db2603b5ca2ba0a5b6484f863be17d00 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ setuptools~=57.0.0 numpy~=1.19.5 numba~=0.53.1 scipy~=1.5.4 -POT~=0.7.0 \ No newline at end of file +hmmlearn~=0.2.6 +scikit-learn~=1.0.1 \ No newline at end of file diff --git a/setup.py b/setup.py index f92fc917e87c8f11a6dc42ad6f563c4d0969df1c..3b11d2f9d4eff46f07cbf4c1ada186054cfcb264 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ def setup_package(): VERSION = get_version() here = os.path.abspath(os.path.dirname(__file__)) - with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: + with open(os.path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() mod_dir = NAME diff --git a/skais/__init__.py b/skais/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..834dc3a90c4fe62af22a6facecb79e16795e97fb 100644 --- a/skais/__init__.py +++ b/skais/__init__.py @@ -0,0 +1 @@ +__version__ = "0.1a" diff --git a/skais/ais/__init__.py b/skais/ais/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skais/ais/ais_points.py b/skais/ais/ais_points.py index e0981d215f8989f53a1fe5f088b5b9b542631f7c..3367cfd7e489cd913b5e0de2101eb35063ef7676 100644 --- a/skais/ais/ais_points.py +++ b/skais/ais/ais_points.py @@ -1,44 +1,38 @@ -import pickle -from datetime import datetime - -import pandas as pd import numpy as np -from numba import jit +import pandas as pd from scipy.stats import stats -from skais.ais.ais_trajectory import AISTrajectory - - -def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None): - n_sample = len(df.index) - result = [] - work_df = df.copy() - - index = 0 - while index < n_sample: - i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit) - trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time) - if len(trajectory.df.index) > min_size: - result.append(trajectory) - work_df = work_df[i:] - index += i - - return result - -@jit(nopython=True) -def compute_trajectory(times, time_gap, size_limit): - n_samples = len(times) - - previous_date = times[0] - - i = 0 - for i in range(size_limit): - if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap): - return i - previous_date = times[i] - - return i + 1 +# def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None): +# n_sample = len(df.index) +# result = [] +# work_df = df.copy() +# +# index = 0 +# while index < n_sample: +# i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit) +# trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time) +# if len(trajectory.df.index) > min_size: +# result.append(trajectory) +# work_df = work_df[i:] +# index += i +# +# return result +# +# +# @jit(nopython=True) +# def compute_trajectory(times, time_gap, size_limit): +# n_samples = len(times) +# +# previous_date = times[0] +# +# i = 0 +# for i in range(size_limit): +# if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap): +# return i +# previous_date = times[i] +# +# return i + 1 class AISPoints: @@ -49,18 +43,33 @@ class AISPoints: self.df = df - @staticmethod - def load_from_csv(file_name): - df = pd.read_csv(file_name) - ais_points = AISPoints(df) - return ais_points + def describe(self): + description = { + "nb vessels": len(self.df.mmsi.unique()), + "nb points": len(self.df.index), + "average speed": self.df['sog'].mean(), + "average diff": self.df['diff'].mean() + } + + for n in np.sort(self.df['label'].unique()): + description[f"labeled {n}"] = len(self.df[self.df['label'] == n].index) + + return description + # cleaning functions def remove_outliers(self, features, rank=4): if rank <= 0: raise ValueError(f"Rank is equal to {rank}, must be positive and superior to 0") for feature in features: self.df = self.df.drop(self.df[(np.abs(stats.zscore(self.df[feature])) > rank)].index) + def clean_angles(self): + self.df = self.df[self.df["cog"] <= 360] + self.df = self.df[self.df["cog"] >= 0] + + self.df = self.df[self.df["heading"] <= 360] + self.df = self.df[self.df["heading"] >= 0] + def normalize(self, features, normalization_type="min-max"): normalization_dict = {} if normalization_type == "min-max": @@ -92,115 +101,12 @@ class AISPoints: f"standardization]") return normalization_type, normalization_dict - def histogram(self, features, bins=10, ranges=None, label=None, y_field='label'): - if label is not None: - tmp = self.df[self.df[y_field] == label] - else: - tmp = self.df - dat = tmp[features] - h = np.histogramdd(dat.to_numpy(), bins, ranges)[0] - if h.sum() == 0: - return np.full(h.shape, 1 / h.size) - else: - return h / h.sum() - - def disjointed_histogram(self, features, bins, ranges, label=None, y_field='label'): - if label is not None: - tmp = self.df[self.df[y_field] == label] - else: - tmp = self.df - - if type(bins) == int: - bins = [bins for _ in features] - - histograms = [] - for feature, bin, f_range in zip(features, bins, ranges): - histograms.append(np.histogram(tmp[feature], bin, f_range)) - return histograms - - def compute_diff_heading_cog(self): - self.df["diff"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180), - axis=1) - - def clean_angles(self): - self.df = self.df[self.df["cog"] <= 360] - self.df = self.df[self.df["cog"] >= 0] - - self.df = self.df[self.df["heading"] <= 360] - self.df = self.df[self.df["heading"] >= 0] - - def histogram_joint_x_y(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]] - , y_nb_bins=2, y_fields='label', y_range=[0, 1]): - return self.histogram(x_fields + [y_fields], - bins=[x_nb_bins for i in x_fields] + [y_nb_bins], - ranges=x_range + [y_range]) - - def histogram_x_knowing_y(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]] - , y_nb_bins=2, y_field='label'): - result = [] - for i in range(y_nb_bins): - layer = self.histogram(x_fields, bins=x_nb_bins, ranges=x_range, label=i, y_field=y_field) - result.append(layer) - return np.stack(result, axis=len(x_fields)) - - def disjointed_histogram_x_knowing_y(self, features, x_nb_bins=10, x_range=[[0, 1]] - , y_nb_bins=4, y_field='label'): - out = [] - for feature, f_range in zip(features, x_range): - result = [] - for i in range(y_nb_bins): - layer, _ = np.histogram(self.df[self.df[y_field] == i][feature].to_numpy(), bins=x_nb_bins, - range=f_range) - if layer.sum() == 0: - layer = np.full(layer.shape, 1) - result.append(layer) - out.append(np.stack(result)) - return out - - def histogram_y_knowing_x(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]] - , y_nb_bins=2, y_field='label', y_range=[0, 1]): - h_joint = self.histogram_joint_x_y(x_fields, x_nb_bins, x_range, y_nb_bins, y_field, y_range) - y_hist = self.histogram(features=y_field, bins=y_nb_bins, ranges=[y_range]) - - result = np.zeros(h_joint.shape) - - for idx, x in np.ndenumerate(h_joint): - if h_joint[idx[:-1]].sum() == 0: - result[idx] = y_hist[idx[-1]] - else: - result[idx] = x / h_joint[idx[:-1]].sum() - return result - - def get_trajectories(self, time_gap=30, min_size=50, interpolation_time=None): - - if 'ts' in self.df: - - self.df['ts'] = pd.to_datetime(self.df['ts'], infer_datetime_format=True) - self.df['ts_sec'] = self.df['ts'].apply(lambda x: datetime.timestamp(x)) - - dat = self.df - else: - raise ValueError - - trajectories = [] - for mmsi in dat.mmsi.unique(): - trajectories += compute_trajectories(dat[dat['mmsi'] == mmsi], time_gap, min_size=min_size, - interpolation_time=interpolation_time) - - return trajectories - - def describe(self): - stats = {"nb vessels": len(self.df.mmsi.unique()), - "nb points": len(self.df.index), - "average speed": self.df['sog'].mean(), - "average diff": self.df['diff'].mean() - } - - for n in np.sort(self.df['label'].unique()): - stats[f"labeled {n}"] = len(self.df[self.df['label'] == n].index) - - return stats + # New features + def compute_drift(self): + self.df["drift"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180), + axis=1) + # Static methods @staticmethod def fuse(*args): if len(args) == 1: @@ -215,3 +121,9 @@ class AISPoints: dfs.append(aisPosition.df) return AISPoints(pd.concat(dfs).reindex()) + + @staticmethod + def load_from_csv(file_name): + df = pd.read_csv(file_name) + ais_points = AISPoints(df) + return ais_points diff --git a/skais/ais/ais_trajectory.py b/skais/ais/ais_trajectory.py index c3a339c895e80366c42c35560f4a09819cce0af8..b719d3de5350b6ba15da13c857dd69462ac711c0 100644 --- a/skais/ais/ais_trajectory.py +++ b/skais/ais/ais_trajectory.py @@ -1,177 +1,158 @@ -import math - import pandas as pd import numpy as np from numba import jit from scipy.interpolate import interp1d -from skais.utils.geography import great_circle -from skais.utils.stats import calc_std_dev +from skais.ais.ais_points import AISPoints + -PI = math.pi +class NoTimeInformation(Exception): + pass @jit(nopython=True) -def compute_trajectory(times, time_gap, size_limit): +def compute_trajectory(times, time_gap): n_samples = len(times) previous_date = times[0] i = 0 - for i in range(size_limit): - if i >= n_samples: - break - if (times[i] - previous_date) / 60 > time_gap: + while i < n_samples: + if (times[i] - previous_date) > time_gap: break previous_date = times[i] + i += 1 return i -@jit(nopython=True) -def compute_std(dat, radius): - stds = np.empty(dat.shape[0]) - - dat = np.concatenate([np.full(radius, dat[0]), dat, np.full(radius, dat[-1])]) - for i in range(radius, dat.shape[0] - radius): - stds[i - radius] = np.std(dat[i - radius:i + radius + 1]) - - return stds - - -@jit(nopython=True) -def to_rad(degree): - return (degree / 360.0) * (2 * PI) - - -@jit(nopython=True) -def to_deg(rad): - return (rad * 360.0) / (2 * PI) - - -@jit(nopython=True) -def bearing(p1, p2): - long1 = to_rad(p1[0]) - long2 = to_rad(p2[0]) - - lat1 = to_rad(p1[1]) - lat2 = to_rad(p2[1]) - - y = np.sin(long2 - long1) * np.cos(lat2) - x = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(long2 - long1) - return to_deg(np.arctan2(y, x)) - - # @jit(nopython=True) -def compute_position_angle_std(dat, radius): - angles_stds = np.empty(dat.shape[0]) - for i in range(radius, dat.shape[0] - radius): - data = dat[i - radius:i + radius] - n_samples = len(data) - center = (data[:, 0].mean(), data[:, 1].mean()) - - angles_sum = [] - for j in range(n_samples): - p1 = (data[j][0], data[j][1]) - - alpha = bearing(p1, center) - angles_sum.append(alpha) - - angles_stds[i] = calc_std_dev(angles_sum) - return angles_stds - - -@jit(nopython=True) -def compute_position_angle_mean(dat, radius): - angles_means = np.empty(dat.shape[0]) - for i in range(radius, dat.shape[0] - radius): - data = dat[i - radius:i + radius] - n_samples = len(data) - center = (data[:, 0].mean(), data[:, 1].mean()) - - cos = sin = 0 - for j in range(n_samples): - p1 = (data[j][0], data[j][1]) - - alpha = bearing(p1, center) - - cos += np.cos(np.radians(alpha)) - sin += np.sin(np.radians(alpha)) - - angles_means[i] = np.arctan2(sin, cos) - return angles_means - - -def compute_position_dist_mean(dat, radius): - dist_means = np.empty(dat.shape[0]) - for i in range(radius, dat.shape[0] - radius): - data = dat[i - radius:i + radius] - n_samples = len(data) - center = (data[:, 0].mean(), data[:, 1].mean()) - - dist_sum = 0 - for j in range(n_samples - 1): - p1 = (data[j][0], data[j][1]) - - dist_sum += great_circle(p1[0], center[0], p1[1], center[1]) - - dist_means[i] = dist_sum / (n_samples - 1) - return dist_means - - -def compute_position_dist_std(dat, radius): - dist_means = np.empty(dat.shape[0]) - for i in range(radius, dat.shape[0] - radius): - data = dat[i - radius:i + radius] - n_samples = len(data) - center = (data[:, 0].mean(), data[:, 1].mean()) - - dist_sum = [] - for j in range(n_samples - 1): - p1 = (data[j][0], data[j][1]) - - dist_sum.append(great_circle(p1[0], center[0], p1[1], center[1])) - - dist_means[i] = np.std(dist_sum) - return dist_means - - -def compute_point_angles(dat): - angles = np.zeros(dat.shape[0]) - for i in range(1, dat.shape[0] - 1): - p1 = (dat[i - 1][0], dat[i - 1][1]) - p2 = (dat[i][0], dat[i][1]) - p3 = (dat[i + 1][0], dat[i + 1][1]) - - alpha = bearing(p2, p1) - beta = bearing(p2, p3) - - angles[i] = 180 - abs(abs(alpha - beta) - 180) - return angles - - -def l1_angle(dat, radius): - l1 = np.zeros(dat.shape) - - dat = np.concatenate([np.full(radius, dat[0]), dat, np.full(radius, dat[-1])]) - for i in range(radius, dat.shape[0] - radius): - data = dat[i - radius:i + radius+1] - l1[i - radius] = np.linalg.norm(data, ord=1) - - return l1 - +# def compute_position_angle_std(dat, radius): +# angles_stds = np.empty(dat.shape[0]) +# for i in range(radius, dat.shape[0] - radius): +# data = dat[i - radius:i + radius] +# n_samples = len(data) +# center = (data[:, 0].mean(), data[:, 1].mean()) +# +# angles_sum = [] +# for j in range(n_samples): +# p1 = (data[j][0], data[j][1]) +# +# alpha = bearing(p1, center) +# angles_sum.append(alpha) +# +# angles_stds[i] = calc_std_dev(angles_sum) +# return angles_stds +# +# +# @jit(nopython=True) +# def compute_position_angle_mean(dat, radius): +# angles_means = np.empty(dat.shape[0]) +# for i in range(radius, dat.shape[0] - radius): +# data = dat[i - radius:i + radius] +# n_samples = len(data) +# center = (data[:, 0].mean(), data[:, 1].mean()) +# +# cos = sin = 0 +# for j in range(n_samples): +# p1 = (data[j][0], data[j][1]) +# +# alpha = bearing(p1, center) +# +# cos += np.cos(np.radians(alpha)) +# sin += np.sin(np.radians(alpha)) +# +# angles_means[i] = np.arctan2(sin, cos) +# return angles_means +# +# +# def compute_position_dist_mean(dat, radius): +# dist_means = np.empty(dat.shape[0]) +# for i in range(radius, dat.shape[0] - radius): +# data = dat[i - radius:i + radius] +# n_samples = len(data) +# center = (data[:, 0].mean(), data[:, 1].mean()) +# +# dist_sum = 0 +# for j in range(n_samples - 1): +# p1 = (data[j][0], data[j][1]) +# +# dist_sum += great_circle(p1[0], center[0], p1[1], center[1]) +# +# dist_means[i] = dist_sum / (n_samples - 1) +# return dist_means +# +# +# def compute_position_dist_std(dat, radius): +# dist_means = np.empty(dat.shape[0]) +# for i in range(radius, dat.shape[0] - radius): +# data = dat[i - radius:i + radius] +# n_samples = len(data) +# center = (data[:, 0].mean(), data[:, 1].mean()) +# +# dist_sum = [] +# for j in range(n_samples - 1): +# p1 = (data[j][0], data[j][1]) +# +# dist_sum.append(great_circle(p1[0], center[0], p1[1], center[1])) +# +# dist_means[i] = np.std(dist_sum) +# return dist_means +# +# + + +# def l1_angle(dat, radius): +# l1 = np.zeros(dat.shape) +# +# dat = np.concatenate([np.full(radius, dat[0]), dat, np.full(radius, dat[-1])]) +# for i in range(radius, dat.shape[0] - radius): +# data = dat[i - radius:i + radius + 1] +# l1[i - radius] = np.linalg.norm(data, ord=1) +# +# return l1 +# +# +# def l2_angle(dat, radius): +# l2 = np.zeros(dat.shape) +# +# dat = np.concatenate([np.full(radius, dat[0]), dat, np.full(radius, dat[-1])]) +# for i in range(radius, dat.shape[0] - radius): +# data = dat[i - radius:i + radius + 1] +# l2[i - radius] = np.linalg.norm(data, ord=2) +# return l2 +# +# +# def angle_dispersion(dat, radius): +# disp = np.zeros(dat.shape) +# +# dat = np.concatenate([np.full(radius, dat[0]), dat, np.full(radius, dat[-1])]) +# for i in range(radius, dat.shape[0] - radius): +# data = dat[i - radius:i + radius + 1] +# disp[i - radius] = angular_dispersion(np.radians(data)) +# +# return disp + + +def apply_func_on_window(dat, func, radius, on_edge='copy'): + result = np.zeros(dat.shape) + if on_edge == 'copy': + dat = np.concatenate([np.full(radius, dat[0]), dat, np.full(radius, dat[-1])]) + for i in range(radius, dat.shape[0] - radius): + data = dat[i - radius:i + radius + 1] + result[i - radius] = func(data) + return result -def l2_angle(dat, radius): - l2 = np.zeros(dat.shape) - dat = np.concatenate([np.full(radius, dat[0]), dat, np.full(radius, dat[-1])]) - for i in range(radius, dat.shape[0] - radius): - data = dat[i - radius:i + radius+1] - l2[i - radius] = np.linalg.norm(data, ord=2) - return l2 +def apply_time_sequence(dat, time, func): + result = np.empty(dat.shape[0]) + result[0] = func(dat[0], dat[1], time[0], time[1]) + for i in range(1, dat.shape[0]): + result[i] = func(dat[i - 1], dat[i], time[i - 1], time[i]) + return result -class AISTrajectory: +class AISTrajectory(AISPoints): def __init__(self, df, interpolation_time=None): df = df.drop_duplicates(subset=['ts_sec']) @@ -181,7 +162,7 @@ class AISTrajectory: discrete_columns = ['navstatus', 'label'] new_df = pd.DataFrame() t_raw = df['ts_sec'].to_numpy() - t_interp1d = np.arange(start=t_raw[0], stop=t_raw[-1]+1, + t_interp1d = np.arange(start=t_raw[0], stop=t_raw[-1] + 1, step=interpolation_time * 60) new_df['ts_sec'] = t_interp1d @@ -201,103 +182,52 @@ class AISTrajectory: df = new_df # self.df = df.dropna() - self.df = df - - def compute_angle_l1(self, radius): - dat = self.df['angles_diff'].to_numpy() - l1 = l1_angle(dat, radius) - self.df[f"angle_l1"] = l1 - - def compute_angle_l2(self, radius): - dat = self.df['angles_diff'].to_numpy() - l2 = l2_angle(dat, radius) - self.df[f"angle_l2"] = l2 - - def normalize(self, features, normalization_type="min-max"): - normalization_dict = {} - if normalization_type == "min-max": - for f in features: - minimum = self.df[f].min() - maximum = self.df[f].max() - self.df[f] = (self.df[f] - minimum) / (maximum - minimum) - normalization_dict[f"{f}_minimum"] = minimum - normalization_dict[f"{f}_maximum"] = maximum - - elif normalization_type == "standardization": - normalisation_factors = ("standardization", {}) - for f in features: - mean = self.df[f].mean() - std = self.df[f].std() - if std == 0: - print("Warning: std = %d", std) - std = 1 - self.df[f] = (self.df[f] - mean) / std - normalization_dict[f"{f}_mean"] = mean - normalization_dict[f"{f}_std"] = std + AISPoints.__init__(self, df) - else: - raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, " - f"standardization]") - return normalization_type, normalization_dict - - def compute_derivative(self, field): - dt = self.df['ts_sec'].diff() / 60 - - dv = self.df[field].diff().div(dt, axis=0, ) + def __eq__(self, other): + return self.df.equals(other.df) - self.df['d_' + field] = dv - - def compute_diff(self, field1, field2): - self.df["diff"] = self.df.apply(lambda x: 180 - abs(abs(x[field1] - x[field2]) - 180), - axis=1) + def sliding_window(self, size=10, offset=1, fields=None): + result = [] - def compute_all_derivatives(self): - fields = ['cog', 'sog', 'rot', 'heading'] + if len(self.df.index) >= size: + arr = self.to_numpy(fields) + prev_index = 0 + while prev_index + size < len(self.df.index) + 1: + result.append(arr[prev_index:prev_index + size]) + prev_index += offset - for field in fields: - self.compute_derivative(field) + return result - def compute_all_stds(self, radius): - fields = ['cog', 'sog', 'rot', 'heading', 'latitude', 'longitude'] + def apply_func_on_time_window(self, func, radius, column, new_column=None): + dat = self.df[column].to_numpy() + result = apply_func_on_window(dat, func, radius, on_edge='copy') - for field in fields: - dat = self.df[field].to_numpy() - stds = compute_std(dat, radius) - stds[-radius:] = np.nan - stds[:radius] = np.nan - self.df[f"{field}_std"] = stds + if new_column is None: + self.df[column] = result + else: + self.df[new_column] = result - def compute_position_features(self, radius): - dat = np.stack([self.df.longitude.to_numpy(), self.df.latitude.to_numpy()], axis=1) - std = compute_position_angle_std(dat, radius) - std[-radius:] = np.nan - std[:radius] = np.nan - self.df[f"angle_std"] = std + def apply_time_sequence_func(self, func, column, new_column=None): + dat = self.df[column].to_numpy() + time = self.df['ts_sec'].to_numpy() - mean = compute_position_angle_mean(dat, radius) - mean[-radius:] = np.nan - mean[:radius] = np.nan - self.df[f"angle_mean"] = mean + result = apply_time_sequence(dat, time, func) - mean_dist = compute_position_dist_mean(dat, radius) - mean_dist[-radius:] = np.nan - mean_dist[:radius] = np.nan - self.df[f"dist_mean"] = mean_dist + if new_column is None: + self.df[column] = result + else: + self.df[new_column] = result - std_dist = compute_position_dist_std(dat, radius) - std_dist[-radius:] = np.nan - std_dist[:radius] = np.nan - self.df[f"dist_std"] = std_dist + def apply_func_on_points(self, func, column, new_column=None): + dat = self.df[column].to_numpy() - angles = compute_point_angles(dat) - angles[0] = np.nan - angles[-1] = np.nan - self.df[f"angles_diff"] = angles + result = np.array(list(map(func, dat))) - def compute_sqrt_sog(self): - sog = self.df['sog'].to_numpy() - sog[sog < 0] = 0 - self.df["sog_sqrt"] = np.sqrt(sog) + if new_column is None: + self.df[column] = result + else: + self.df[new_column] = result def to_numpy(self, fields=None): @@ -308,18 +238,6 @@ class AISTrajectory: return np.squeeze(df.to_numpy()) - def sliding_window(self, size=10, offset=1, fields=None): - result = [] - - if len(self.df.index) >= size: - arr = self.to_numpy(fields) - prev_index = 0 - while prev_index + size < len(self.df.index) + 1: - result.append(arr[prev_index:prev_index + size]) - prev_index += offset - - return result - def to_geojson(self): coordinates = [] for index, row in self.df.iterrows(): @@ -327,19 +245,97 @@ class AISTrajectory: return {"type": "LineString", "coordinates": coordinates} - def get_stopped_snippets(self, column, exclude_label=0, time_gap=5, size_limit=10000): - df = self.df.drop(self.df[self.df[column] == exclude_label].index) + def split_trajectory(self, time_gap=600): + if 'ts_sec' not in self.df: + raise NoTimeInformation() - work_df = df.copy() - n_sample = len(df.index) + n_sample = len(self.df.index) result = [] + work_df = self.df.copy() index = 0 while index < n_sample: - i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit) + i = compute_trajectory(self.df['ts_sec'][index:].to_numpy(), time_gap) trajectory = AISTrajectory(work_df[:i]) result.append(trajectory) work_df = work_df[i:] index += i return result + + # def compute_angle_l1(self, radius): + # dat = self.df['angles_diff'].to_numpy() + # l1 = l1_angle(dat, radius) + # self.df[f"angle_l1"] = l1 + # + # def compute_angle_l2(self, radius): + # dat = self.df['angles_diff'].to_numpy() + # l2 = l2_angle(dat, radius) + # self.df[f"angle_l2"] = l2 + + # def compute_derivative(self, field): + # dt = self.df['ts_sec'].diff() / 60 + # + # dv = self.df[field].diff().div(dt, axis=0, ) + # + # self.df['d_' + field] = dv + # + # def compute_diff(self, field1, field2): + # self.df["diff"] = self.df.apply(lambda x: 180 - abs(abs(x[field1] - x[field2]) - 180), + # axis=1) + # + # def compute_all_derivatives(self): + # fields = ['cog', 'sog', 'rot', 'heading'] + # + # for field in fields: + # self.compute_derivative(field) + # + # def compute_all_stds(self, radius): + # fields = ['cog', 'sog', 'rot', 'heading', 'latitude', 'longitude'] + # + # for field in fields: + # dat = self.df[field].to_numpy() + # stds = compute_std(dat, radius) + # stds[-radius:] = np.nan + # stds[:radius] = np.nan + # self.df[f"{field}_std"] = stds + # + # def compute_all_dispersions(self, radius): + # fields = ['cog', 'heading', 'angles_diff'] + # for field in fields: + # if field in self.df.columns: + # dat = self.df[field].to_numpy() + # disp = angle_dispersion(dat, radius) + # self.df[f"{field}_disp"] = disp + # + # def compute_position_features(self, radius): + # dat = np.stack([self.df.longitude.to_numpy(), self.df.latitude.to_numpy()], axis=1) + # std = compute_position_angle_std(dat, radius) + # std[-radius:] = np.nan + # std[:radius] = np.nan + # self.df[f"angle_std"] = std + # + # mean = compute_position_angle_mean(dat, radius) + # mean[-radius:] = np.nan + # mean[:radius] = np.nan + # self.df[f"angle_mean"] = mean + # + # mean_dist = compute_position_dist_mean(dat, radius) + # mean_dist[-radius:] = np.nan + # mean_dist[:radius] = np.nan + # self.df[f"dist_mean"] = mean_dist + # + # std_dist = compute_position_dist_std(dat, radius) + # std_dist[-radius:] = np.nan + # std_dist[:radius] = np.nan + # self.df[f"dist_std"] = std_dist + # + # angles = compute_point_angles(dat) + # angles[0] = np.nan + # angles[-1] = np.nan + # self.df[f"angles_diff"] = angles + # + # def compute_sqrt_sog(self): + # sog = self.df['sog'].to_numpy() + # sog[sog < 0] = 0 + # self.df["sog_sqrt"] = np.sqrt(sog) diff --git a/skais/learn/__init__.py b/skais/learn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skais/learn/hmm_gmm_classifier.py b/skais/learn/hmm_gmm_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..953041d1d5c890b2244ae95233e27680b8c49f55 --- /dev/null +++ b/skais/learn/hmm_gmm_classifier.py @@ -0,0 +1,169 @@ +import random + +from hmmlearn.hmm import GMMHMM, GaussianHMM +from numba import jit +from scipy import linalg +from sklearn.datasets import make_spd_matrix +import numpy as np + +def split_trajectories(feature_seq, label_seq, n_classes): + if len(feature_seq) != len(label_seq): + raise ValueError + if len(feature_seq) == 0: + return {} + + sequence_length = len(feature_seq) + current_label = label_seq[0] + result = {i: [] for i in range(n_classes)} + current_seq = [] + sequence_list = [] + for i in range(sequence_length): + if current_label != label_seq[i]: + result[current_label].append(current_seq) + sequence_list.append((current_label, current_seq)) + current_label = label_seq[i] + current_seq = [] + current_seq.append(feature_seq[i]) + + result[current_label].append(current_seq) + sequence_list.append((current_label, current_seq)) + + return result, sequence_list + +class GMMHMMClassifier: + def __init__(self, nb_states): + self.n_features = 0 + + if type(nb_states) is not list: + self.nb_states = np.array([nb_states]) + else: + self.nb_states = np.array(nb_states) + + self.hmms = [] + self.n_classes = len(self.nb_states) + for i, nb_state in enumerate(self.nb_states): + self.hmms.append(GaussianHMM(n_components=nb_state, covariance_type='full')) + + self.hmm = GaussianHMM(n_components=sum(self.nb_states), covariance_type='full', init_params='', n_iter=100) + + self.predict_dictionary = {} + self.predictor = [] + count = 0 + for i, ii in enumerate(self.nb_states): + self.predictor.append({}) + for j in range(ii): + self.predictor[i][j] = count + self.predict_dictionary[count] = i + count += 1 + + def fit(self, x, y): + sequences = {i: [] for i in range(self.n_classes)} + self.n_features = x[0].shape[1] + for feature_seq, label_seq in zip(x, y): + split_seq, _ = split_trajectories(feature_seq, label_seq, self.n_classes) + + for key in sequences.keys(): + sequences[key] += split_seq[key] + + for i, seqs in sequences.items(): + self.hmms[i].n_features = self.n_features + if sum([np.array(s).size for s in seqs]) > sum(self.hmms[i]._get_n_fit_scalars_per_param().values()): + self.hmms[i].fit(np.concatenate(seqs), list(map(len, seqs))) + for j, value in enumerate(self.hmms[i].transmat_.sum(axis=1)): + if value == 0: + self.hmms[i].transmat_[j][j] = 1.0 + self.hmms[i].covars_[j] = make_spd_matrix(self.hmms[i].n_features) + else: + self.hmms[i] = None + + predict = [] + for feature_seq, label_seq in zip(x, y): + _, sequences_list = split_trajectories(feature_seq, label_seq, self.n_classes) + pred = np.array([]) + for label, seq in sequences_list: + if self.hmms[label] is not None: + _, state_sequence = self.hmms[label].decode(np.array(seq), [len(seq)]) + pred = np.append(pred, [self.predictor[label][i] for i in state_sequence]) + if len(pred) != 0: + predict.append(pred) + + start = np.zeros(sum(self.nb_states)) + T_mat = np.zeros((sum(self.nb_states), sum(self.nb_states))) + prior = -1 + count = np.zeros(sum(self.nb_states)) + for pred in predict: + start[int(pred[0])] += 1 + + for p in pred: + if prior != -1: + T_mat[prior][int(p)] += 1 + count[prior] += 1 + prior = int(p) + + for i in range(sum(self.nb_states)): + for j in range(sum(self.nb_states)): + if T_mat[i][j] > 0: + T_mat[i][j] = T_mat[i][j] / count[i] + + self.hmm.startprob_ = start / sum(start) + self.hmm.transmat_ = T_mat + + for i, value in enumerate(self.hmm.transmat_.sum(axis=1)): + if value == 0: + self.hmm.transmat_[i][i] = 1.0 + + means = [] + covars = [] + for i, model in enumerate(self.hmms): + if self.hmms[i] is not None: + means.append(model.means_) + covars.append(model.covars_) + else: + means.append(np.zeros((self.nb_states[i], x[0].shape[1]))) + covars.append(np.stack([make_spd_matrix(x[0].shape[1]) + for _ in range(self.nb_states[i])], axis=0)) + + means = np.concatenate(means) + covars = np.concatenate(covars) + for n, cv in enumerate(covars): + if count[n] <= 3: + covars[n] = np.identity(cv.shape[0]) + if not np.allclose(cv, cv.T) or np.any(linalg.eigvalsh(cv) <= 0): + covars[n] += np.identity(cv.shape[0]) * 10 ** -15 + + self.hmm.means_ = means + self.hmm.covars_ = covars + + def predict(self, X): + X_all = np.concatenate(X) + lenghts = [len(x) for x in X] + + return np.array([self.predict_dictionary[i] for i in self.hmm.predict(X_all, lenghts)]) + + def predict_raw(self, X): + X_all = [x for i in X for x in i] + lenghts = [len(x) for x in X] + + return self.hmm.predict(X_all, lenghts) + +@jit(nopython=True) +def hmm_probabilities(predict, nb_states): + n_states = nb_states.sum() + start = np.zeros(n_states) + T_mat = np.zeros((n_states, n_states)) + for pred in predict: + start[int(pred[0])] += 1 + prior = int(pred[0]) + + for p in pred[1:]: + T_mat[prior][int(p)] += 1 + prior = int(p) + + mat_sum = T_mat.sum(axis=1) + for i in range(n_states): + if mat_sum[i] == 0: + T_mat[i][i] = 1.0 + else: + T_mat[i] = T_mat[i] / mat_sum[i] + + return start / start.sum(), T_mat diff --git a/skais/learn/point_representation.py b/skais/learn/point_representation.py new file mode 100644 index 0000000000000000000000000000000000000000..beaad6b5dc54a6fa107b4632ff98374c275a15f2 --- /dev/null +++ b/skais/learn/point_representation.py @@ -0,0 +1,71 @@ +import numpy as np + + +def histogram(ais_points, features, bins, ranges=None, label=None, y_field='label'): + if label is not None: + tmp = ais_points.df[ais_points.df[y_field] == label] + else: + tmp = ais_points.df + dat = tmp[features] + h = np.histogramdd(dat.to_numpy(), bins, ranges)[0] + if h.sum() == 0: + return np.full(h.shape, 1 / h.size) + else: + return h / h.sum() + + +def disjointed_histogram(ais_points, features, bins, ranges, label=None, y_field='label'): + if label is not None: + tmp = ais_points.df[ais_points.df[y_field] == label] + else: + tmp = ais_points.df + + if type(bins) == int: + bins = [bins for _ in features] + + histograms = [] + for feature, h_bin, f_range in zip(features, bins, ranges): + histograms.append(np.histogram(tmp[feature], h_bin, f_range)) + return histograms + + +def histogram_joint_x_y(ais_points, x_fields, x_nb_bins, x_range, y_fields, y_nb_bins, y_range): + return histogram(ais_points, x_fields + [y_fields], + bins=[x_nb_bins for _ in x_fields] + [y_nb_bins], + ranges=x_range + [y_range]) + + +def histogram_x_knowing_y(ais_points, x_fields, x_nb_bins, x_range, y_nb_bins, y_field): + result = [] + for i in range(y_nb_bins): + layer = histogram(ais_points, x_fields, bins=x_nb_bins, ranges=x_range, label=i, y_field=y_field) + result.append(layer) + return np.stack(result, axis=len(x_fields)) + + +def disjointed_histogram_x_knowing_y(ais_points, features, x_nb_bins, x_range, y_nb_bins, y_field): + out = [] + for feature, f_range in zip(features, x_range): + result = [] + for i in range(y_nb_bins): + layer, _ = np.histogram(ais_points.df[ais_points.df[y_field] == i][feature].to_numpy(), bins=x_nb_bins, + range=f_range) + if layer.sum() == 0: + layer = np.full(layer.shape, 1) + result.append(layer) + out.append(np.stack(result)) + return out + + +def histogram_y_knowing_x(ais_points, x_fields, x_nb_bins, x_range, y_nb_bins, y_field, y_range): + h_joint = histogram_joint_x_y(ais_points, x_fields, x_nb_bins, x_range, y_nb_bins, y_field, y_range) + y_hist = histogram(ais_points, features=y_field, bins=y_nb_bins, ranges=[y_range]) + + result = np.zeros(h_joint.shape) + + for idx, x in np.ndenumerate(h_joint): + if h_joint[idx[:-1]].sum() == 0: + result[idx] = y_hist[idx[-1]] + else: + result[idx] = x / h_joint[idx[:-1]].sum() + return result diff --git a/skais/process/__init__.py b/skais/process/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skais/process/ais_operations.py b/skais/process/ais_operations.py new file mode 100644 index 0000000000000000000000000000000000000000..1075fd079ec6cfdc1bf7190c01428caaf8e049fc --- /dev/null +++ b/skais/process/ais_operations.py @@ -0,0 +1,14 @@ +from skais.ais.ais_trajectory import AISTrajectory + +# Trajectories +""" + Separates AISPoints into individual trajectories +""" + + +def get_trajectories(ais_points): + trajectories = [] + for mmsi in ais_points.df.mmsi.unique(): + trajectories.append(AISTrajectory(ais_points.df[ais_points.df['mmsi'] == mmsi].reset_index(drop=True))) + + return trajectories diff --git a/skais/process/basic_features_operations.py b/skais/process/basic_features_operations.py new file mode 100644 index 0000000000000000000000000000000000000000..c4c7d3bbbefe115b87696c4f02f4be8cb0ba532e --- /dev/null +++ b/skais/process/basic_features_operations.py @@ -0,0 +1,46 @@ +import cmath + +import numpy as np + + +def angular_average_vector(angles): + n = len(angles) + x = np.sum(np.cos(angles)) / n + y = np.sum(np.sin(angles)) / n + + return np.array([x, y]) + + +def angular_dispersion(angles): + x, y = angular_average_vector(angles) + return np.sqrt(x ** 2 + y ** 2) + + +def angular_mean(angles): + x, y = angular_average_vector(angles) + theta = abs(np.arctan2(x, y)) + + if y > 0 and x > 0: # first Q + return theta + if y > 0 >= x: # Second Q + return np.pi / 2 + theta + if y <= 0 < x: # Fourth Q + return np.pi / 2 - theta + else: # Third Q + return - theta + + +def angular_std(angles): + return 1 - angular_dispersion(angles) + + +def angular_time_derivative(angle1, angle2, ts1, ts2): + z1 = np.cos(angle1) + np.sin(angle1) * 1j + z2 = np.cos(angle2) + np.sin(angle2) * 1j + + z = z2 / z1 + return cmath.polar(z)[1] / (ts2 - ts1) + + +def time_derivative(f1, f2, ts1, ts2): + return (f2 - f1) / (ts2 - ts1) diff --git a/skais/process/geography.py b/skais/process/geography.py new file mode 100644 index 0000000000000000000000000000000000000000..3f19bb3a9eb4cc20937ba6b9e8f9e34e60ee96b7 --- /dev/null +++ b/skais/process/geography.py @@ -0,0 +1,55 @@ +import math + +import numpy as np +from numba import jit + +PI = math.pi + + +@jit(nopython=True) +def to_rad(degree): + return (degree / 360.0) * (2 * PI) + + +@jit(nopython=True) +def to_deg(rad): + return (rad * 360.0) / (2 * PI) + + +@jit(nopython=True) +def bearing(p1, p2): + long1 = to_rad(p1[0]) + long2 = to_rad(p2[0]) + + lat1 = to_rad(p1[1]) + lat2 = to_rad(p2[1]) + + y = np.sin(long2 - long1) * np.cos(lat2) + x = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(long2 - long1) + return to_deg(np.arctan2(y, x)) + + +@jit(nopython=True) +def angle_between_three_points(p1, p2, p3): + alpha = bearing(p2, p1) + beta = bearing(p2, p3) + result = alpha - beta + if result > 180: + return 180 - result + else: + return result + + +def compute_point_angles(dat): + angles = np.zeros(dat.shape[0]) + + p1 = (dat[0][0], dat[0][1]) + p2 = (dat[1][0], dat[1][1]) + angles[0] = bearing(p1, p2) + for i in range(1, dat.shape[0] - 1): + p1 = (dat[i - 1][0], dat[i - 1][1]) + p2 = (dat[i][0], dat[i][1]) + p3 = (dat[i + 1][0], dat[i + 1][1]) + + angles[i] = angle_between_three_points(p1, p2, p3) + return angles diff --git a/skais/tests/ais/__init__.py b/skais/tests/ais/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skais/tests/ais/test_ais_points.py b/skais/tests/ais/test_ais_points.py index 1c4b13fb1d950a76ad5532cbd51ea8a14755605c..eea6ea7fa877f46d30d6fafd9426313285a4ade8 100644 --- a/skais/tests/ais/test_ais_points.py +++ b/skais/tests/ais/test_ais_points.py @@ -3,12 +3,14 @@ import unittest import pandas as pd import numpy as np -from skais.ais.ais_points import AISPoints, compute_trajectory, compute_trajectories +from skais.ais.ais_points import AISPoints +from skais.ais.ais_trajectory import AISTrajectory class TestAISPositions(unittest.TestCase): - def setUp(self) -> None: - self.ais_points = AISPoints(pd.DataFrame( + + def test_describe(self): + ais_points = AISPoints(pd.DataFrame( { "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], @@ -17,198 +19,99 @@ class TestAISPositions(unittest.TestCase): } )) - self.ais_trajectories = AISPoints( - pd.DataFrame( - { - 'lat': [0, 0, 1, 1, 2, 2, 3, 3, 4, 4] * 2, - 'long': [0, 0, 0, 0, 0, 1, 2, 3, 4, 5] * 2, - 'sog': [0, 0, 10, 10, 10, 20, 30, 40, 20, 10] * 2, - 'diff': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] * 2, - 'label': [1, 1, 0, 0, 0, 0, 0, 0, 0, 0] * 2, - 'ts': ['2020-03-10 22:10:00', '2020-03-10 22:14:00', '2020-03-10 22:18:00', '2020-03-10 22:22:00', - '2020-03-10 22:30:00', '2020-03-10 22:32:00', '2020-03-10 22:35:00', '2020-03-10 22:40:00', - '2020-03-10 22:45:00', '2020-03-10 22:50:00'] + - ['2020-03-10 22:10:00', '2020-03-10 22:14:00', '2020-03-10 22:18:00', '2020-03-10 22:20:00', - '2020-03-10 23:30:00', '2020-03-10 23:32:00', '2020-03-10 23:35:00', '2020-03-10 23:40:00', - '2020-03-10 23:45:00', '2020-03-10 23:50:00'], - 'mmsi': [100 for i in range(10)] + [101 for i in range(10)] - } - ) + self.assertDictEqual(ais_points.describe(), + { + 'nb vessels': 1, + 'nb points': 21, + 'labeled 0': 13, + 'labeled 1': 8, + 'average speed': 234 / 21, + 'average diff': 1271 / 21 + }) + + + def test_remove_outliers_simple(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)] + [1000] + [666], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} ) + ) + expected = pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)] + [666], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + } + ) + ais_points.remove_outliers(["cog", "heading"]) + pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) - def test_histogram_no_label_simple(self): - result = np.histogramdd(self.ais_points.df[["sog", "diff"]].to_numpy(), 3, [[0, 30], [0, 180]])[0] - result = result / result.sum() + def test_remove_outliers_rank(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)] + [1000] + [666], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} + ) + ) + expected = pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)], + "heading": [0.0 for i in range(0, 359, 10)] + } + ) + ais_points.remove_outliers(["cog", "heading"], rank=2) + pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) - self.assertTrue(np.array_equal(self.ais_points.histogram(["sog", "diff"], 3, [[0, 30], [0, 180]]), - result)) + def test_remove_outliers_not_all_features(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000]} + ) + ) + expected = pd.DataFrame( + { + "cog": [i / 350.0 for i in range(0, 359, 10)] + [0], + "heading": [0.0 for i in range(0, 359, 10)] + [10000] + } + ) + ais_points.remove_outliers(["cog"]) + pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) - def test_histogram_no_label_no_data(self): + def test_remove_outliers_exception(self): ais_points = AISPoints( pd.DataFrame( { - "sog": [], - "diff": [], - "label": [] + "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000] } ) ) + with self.assertRaises(ValueError): + ais_points.remove_outliers(["cog"], rank=0) - self.assertTrue(np.array_equal(ais_points.histogram(["sog", "diff"], 3, [[0, 30], [0, 180]]), - np.full((3, 3), 1 / 9))) - - def test_histogram_label(self): - self.assertTrue(np.array_equal(self.ais_points.histogram(["sog", "diff"], 3, [[0, 30], [0, 180]], label=0), - np.array([[3, 0, 0], [4, 4, 0], [2, 0, 0]]) / 13)) - - def test_histogram_joint_x_y(self): - ground_truth = np.array([[[3, 2], [0, 1], [0, 3]], - [[4, 2], [4, 0], [0, 0]], - [[2, 0], [0, 0], [0, 0]]]) / 21 - - np.testing.assert_array_equal(ground_truth, self.ais_points.histogram_joint_x_y(x_nb_bins=3)) - - def test_histogram_x_knowing_y(self): - ground_truth = np.array([[[3 / 13, 2 / 8], [0, 1 / 8], [0, 3 / 8]], - [[4 / 13, 2 / 8], [4 / 13, 0], [0, 0]], - [[2 / 13, 0], [0, 0], [0, 0]]]) - - np.testing.assert_array_equal(ground_truth, self.ais_points.histogram_x_knowing_y(x_nb_bins=3)) - - def test_histogram_y_knowing_x(self): - ground_truth = np.array([[[3 / 5, 2 / 5], [0, 1], [0, 1]], - [[4 / 6, 2 / 6], [1, 0], [13 / 21, 8 / 21]], - [[1, 0], [13 / 21, 8 / 21], [13 / 21, 8 / 21]]]) - - np.testing.assert_array_equal(ground_truth, self.ais_points.histogram_y_knowing_x(x_nb_bins=3)) - # def test_load_from_csv(self): - # ais_points = AISPoints.load_from_csv("test_load_from_csv.csv") - # - # pd.testing.assert_frame_equal(ais_points.df, self.ais_points.df) - - def test_compute_diff_heading_cog(self): + def test_clean_angles(self): ais_points = AISPoints(pd.DataFrame( { - "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] + "cog": [i for i in range(0, 359, 10)] + [489, 456, -12] + [180, 180, 180], + "heading": [180 for i in range(0, 359, 10)] + [489, 180, 180] + [999, 666, -333], } ) ) - ais_points.compute_diff_heading_cog() - - diff = ais_points.df['diff'].to_numpy() - - np.testing.assert_array_equal(diff, np.array([180, 170, 160, 150, 140, 130, 120, 110, 100, 90, 80, 70, 60, 50, - 40, 30, 20, 10, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, - 120, 130, 140, 150, 160, 170])) - - def test_histogram_x(self): - ground_truth = np.array([[5, 1, 3], - [6, 4, 0], - [2, 0, 0]]) / 21 - - np.testing.assert_array_equal(ground_truth, - self.ais_points.histogram(features=["sog", "diff"], bins=3, - ranges=[[0, 30], [0, 180]])) - - def test_describe(self): - self.assertDictEqual(self.ais_points.describe(), - { - 'nb vessels': 1, - 'nb points': 21, - 'labeled 0': 13, - 'labeled 1': 8, - 'average speed': 234 / 21, - 'average diff': 1271 / 21 - }) - - def test_fuse_single(self): - pd.testing.assert_frame_equal(AISPoints.fuse(self.ais_points).df, self.ais_points.df) - - def test_fuse_simple_list(self): - pd.testing.assert_frame_equal(AISPoints.fuse([self.ais_points]).df, self.ais_points.df) - - def test_fuse_multiple(self): - value = pd.DataFrame( + expected = pd.DataFrame( { - "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1, 2, 3, 7, 15, 14, 12, - 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], - "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132, 35, 45, - 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], - "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - "mmsi": [0 for i in range(42)] + "cog": [i for i in range(0, 359, 10)], + "heading": [180 for i in range(0, 359, 10)] } ) - pd.testing.assert_frame_equal(AISPoints.fuse(self.ais_points, self.ais_points).df.reset_index(drop=True), - value.reset_index(drop=True)) - def test_compute_trajectory_simple(self): - times = np.arange(0, 100, 4) * 60 - result = compute_trajectory.py_func(times, 5, 1000) - expected = 25 - - self.assertEqual(result, expected) - - def test_compute_trajectory_cut(self): - times = np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60]) - result = compute_trajectory.py_func(times, 5, 1000) - expected = 25 - - self.assertEqual(result, expected) - - def test_compute_trajectory_limit(self): - times = np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60]) - result = compute_trajectory.py_func(times, 5, 10) - expected = 10 - - self.assertEqual(result, expected) - - def test_compute_trajectories_simple_split(self): - df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) - result = compute_trajectories(df, 5, min_size=0) - expected = [ - pd.DataFrame({'ts_sec': np.arange(0, 100, 4) * 60}), - pd.DataFrame({'ts_sec': np.arange(120, 200, 4) * 60}) - ] - - self.assertEqual(len(expected), len(result)) - for r, e in zip(result, expected): - pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) - - def test_compute_trajectories_split_limit(self): - a = np.arange(0, 100, 4) - b = np.arange(120, 200, 4) - df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) - result = compute_trajectories(df, 5, min_size=0, size_limit=10) - expected = [ - pd.DataFrame({'ts_sec': a[:10] * 60}), - pd.DataFrame({'ts_sec': a[10:20] * 60}), - pd.DataFrame({'ts_sec': a[20:] * 60}), - pd.DataFrame({'ts_sec': b[:10] * 60}), - pd.DataFrame({'ts_sec': b[10:] * 60}) - ] - - self.assertEqual(len(expected), len(result)) - for r, e in zip(result, expected): - pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) - - def test_compute_trajectories_split_min_size(self): - a = np.arange(0, 100, 4) - b = np.arange(120, 200, 4) - print(len(b)) - df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) - result = compute_trajectories(df, 5, min_size=23) - expected = [ - pd.DataFrame({'ts_sec': a * 60}) - ] - - self.assertEqual(len(expected), len(result)) - for r, e in zip(result, expected): - pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) + ais_points.clean_angles() + result = ais_points.df + + pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True)) def test_normalize_min_max(self): ais_points = AISPoints(pd.DataFrame( @@ -258,7 +161,8 @@ class TestAISPositions(unittest.TestCase): pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True), check_exact=False, rtol=0.05) - def test_disjointed_histogram_label_none(self): + + def test_compute_drift(self): ais_points = AISPoints(pd.DataFrame( { "cog": [i for i in range(0, 359, 10)], @@ -266,146 +170,260 @@ class TestAISPositions(unittest.TestCase): } ) ) - features = ["cog", "heading"] - bins = [10, 3] - ranges = [[0, 360], [0, 360]] - result = ais_points.disjointed_histogram(features, bins, ranges) - expected = [ - np.array([4, 4, 3, 4, 3, 4, 4, 3, 4, 3]), - np.array([0, 36, 0]) - ] + ais_points.compute_drift() - self.assertEqual(len(result), len(expected)) + diff = ais_points.df['drift'].to_numpy() - for r, e in zip(result, expected): - np.testing.assert_array_equal(e, r[0]) + np.testing.assert_array_equal(diff, np.array([180, 170, 160, 150, 140, 130, 120, 110, 100, 90, 80, 70, 60, 50, + 40, 30, 20, 10, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, + 120, 130, 140, 150, 160, 170])) - def test_disjointed_histogram_label_0(self): - ais_points = AISPoints(pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)], - "label": [0 for _ in range(10)] + [1 for _ in range(26)] - } - ) - ) - features = ["cog", "heading"] - bins = [10, 3] - ranges = [[0, 360], [0, 360]] + # def test_histogram_no_label_simple(self): + # result = np.histogramdd(self.ais_points.df[["sog", "diff"]].to_numpy(), 3, [[0, 30], [0, 180]])[0] + # + # result = result / result.sum() + # + # self.assertTrue(np.array_equal(self.ais_points.histogram(["sog", "diff"], 3, [[0, 30], [0, 180]]), + # result)) + # + # def test_histogram_no_label_no_data(self): + # ais_points = AISPoints( + # pd.DataFrame( + # { + # "sog": [], + # "diff": [], + # "label": [] + # } + # ) + # ) + # + # self.assertTrue(np.array_equal(ais_points.histogram(["sog", "diff"], 3, [[0, 30], [0, 180]]), + # np.full((3, 3), 1 / 9))) + # + # def test_histogram_label(self): + # self.assertTrue(np.array_equal(self.ais_points.histogram(["sog", "diff"], 3, [[0, 30], [0, 180]], label=0), + # np.array([[3, 0, 0], [4, 4, 0], [2, 0, 0]]) / 13)) + # + # def test_histogram_joint_x_y(self): + # ground_truth = np.array([[[3, 2], [0, 1], [0, 3]], + # [[4, 2], [4, 0], [0, 0]], + # [[2, 0], [0, 0], [0, 0]]]) / 21 + # + # np.testing.assert_array_equal(ground_truth, self.ais_points.histogram_joint_x_y(x_nb_bins=3)) + # + # def test_histogram_x_knowing_y(self): + # ground_truth = np.array([[[3 / 13, 2 / 8], [0, 1 / 8], [0, 3 / 8]], + # [[4 / 13, 2 / 8], [4 / 13, 0], [0, 0]], + # [[2 / 13, 0], [0, 0], [0, 0]]]) + # + # np.testing.assert_array_equal(ground_truth, self.ais_points.histogram_x_knowing_y(x_nb_bins=3)) + # + # def test_histogram_y_knowing_x(self): + # ground_truth = np.array([[[3 / 5, 2 / 5], [0, 1], [0, 1]], + # [[4 / 6, 2 / 6], [1, 0], [13 / 21, 8 / 21]], + # [[1, 0], [13 / 21, 8 / 21], [13 / 21, 8 / 21]]]) + # + # np.testing.assert_array_equal(ground_truth, self.ais_points.histogram_y_knowing_x(x_nb_bins=3)) + + # def test_load_from_csv(self): + # ais_points = AISPoints.load_from_csv("test_load_from_csv.csv") + # + # pd.testing.assert_frame_equal(ais_points.df, self.ais_points.df) - result = ais_points.disjointed_histogram(features, bins, ranges, label=0) - expected = [ - np.array([4, 4, 2, 0, 0, 0, 0, 0, 0, 0]), - np.array([0, 10, 0]) - ] - self.assertEqual(len(result), len(expected)) + # def test_histogram_x(self): + # ground_truth = np.array([[5, 1, 3], + # [6, 4, 0], + # [2, 0, 0]]) / 21 + # + # np.testing.assert_array_equal(ground_truth, + # self.ais_points.histogram(features=["sog", "diff"], bins=3, + # ranges=[[0, 30], [0, 180]])) - for r, e in zip(result, expected): - np.testing.assert_array_equal(e, r[0]) - def test_disjointed_histogram_bins_int(self): + def test_fuse_single(self): ais_points = AISPoints(pd.DataFrame( { - "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)], - "label": [0 for _ in range(10)] + [1 for _ in range(26)] + "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], + "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], + "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "mmsi": [0 for i in range(21)] } - ) - ) - features = ["cog", "heading"] - bins = 10 - ranges = [[0, 360], [0, 360]] - - result = ais_points.disjointed_histogram(features, bins, ranges) - expected = [ - np.array([4, 4, 3, 4, 3, 4, 4, 3, 4, 3]), - np.array([0, 0, 0, 0, 0, 36, 0, 0, 0, 0]) - ] - - self.assertEqual(len(result), len(expected)) + )) - for r, e in zip(result, expected): - np.testing.assert_array_equal(e, r[0]) + pd.testing.assert_frame_equal(AISPoints.fuse(ais_points).df, ais_points.df) - def test_clean_angles(self): + def test_fuse_simple_list(self): ais_points = AISPoints(pd.DataFrame( { - "cog": [i for i in range(0, 359, 10)] + [489, 456, -12] + [180, 180, 180], - "heading": [180 for i in range(0, 359, 10)] + [489, 180, 180] + [999, 666, -333], - } - ) - ) - - expected = pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] + "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], + "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], + "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "mmsi": [0 for i in range(21)] } - ) - - ais_points.clean_angles() - result = ais_points.df + )) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True)) + pd.testing.assert_frame_equal(AISPoints.fuse([ais_points]).df, ais_points.df) - def test_remove_outliers_simple(self): + def test_fuse_multiple(self): ais_points = AISPoints(pd.DataFrame( { - "cog": [i for i in range(0, 359, 10)] + [1000] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} - ) - ) - expected = pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], + "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], + "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "mmsi": [0 for i in range(21)] } - ) - ais_points.remove_outliers(["cog", "heading"]) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) + )) - def test_remove_outliers_rank(self): - ais_points = AISPoints(pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)] + [1000] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} - ) - ) - expected = pd.DataFrame( + value = pd.DataFrame( { - "cog": [i for i in range(0, 359, 10)], - "heading": [0.0 for i in range(0, 359, 10)] + "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1, 2, 3, 7, 15, 14, 12, + 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], + "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132, 35, 45, + 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], + "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "mmsi": [0 for i in range(42)] } ) - ais_points.remove_outliers(["cog", "heading"], rank=2) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) + pd.testing.assert_frame_equal(AISPoints.fuse(ais_points, ais_points).df.reset_index(drop=True), + value.reset_index(drop=True)) + # + # def test_compute_trajectory_simple(self): + # times = np.arange(0, 100, 4) * 60 + # result = compute_trajectory.py_func(times, 5, 1000) + # expected = 25 + # + # self.assertEqual(result, expected) + # + # def test_compute_trajectory_cut(self): + # times = np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60]) + # result = compute_trajectory.py_func(times, 5, 1000) + # expected = 25 + # + # self.assertEqual(result, expected) + # + # def test_compute_trajectory_limit(self): + # times = np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60]) + # result = compute_trajectory.py_func(times, 5, 10) + # expected = 10 + # + # self.assertEqual(result, expected) + # + # def test_compute_trajectories_simple_split(self): + # df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) + # result = compute_trajectories(df, 5, min_size=0) + # expected = [ + # pd.DataFrame({'ts_sec': np.arange(0, 100, 4) * 60}), + # pd.DataFrame({'ts_sec': np.arange(120, 200, 4) * 60}) + # ] + # + # self.assertEqual(len(expected), len(result)) + # for r, e in zip(result, expected): + # pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) + # + # def test_compute_trajectories_split_limit(self): + # a = np.arange(0, 100, 4) + # b = np.arange(120, 200, 4) + # df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) + # result = compute_trajectories(df, 5, min_size=0, size_limit=10) + # expected = [ + # pd.DataFrame({'ts_sec': a[:10] * 60}), + # pd.DataFrame({'ts_sec': a[10:20] * 60}), + # pd.DataFrame({'ts_sec': a[20:] * 60}), + # pd.DataFrame({'ts_sec': b[:10] * 60}), + # pd.DataFrame({'ts_sec': b[10:] * 60}) + # ] + # + # self.assertEqual(len(expected), len(result)) + # for r, e in zip(result, expected): + # pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) + # + # def test_compute_trajectories_split_min_size(self): + # a = np.arange(0, 100, 4) + # b = np.arange(120, 200, 4) + # print(len(b)) + # df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) + # result = compute_trajectories(df, 5, min_size=23) + # expected = [ + # pd.DataFrame({'ts_sec': a * 60}) + # ] + # + # self.assertEqual(len(expected), len(result)) + # for r, e in zip(result, expected): + # pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) + + + # def test_disjointed_histogram_label_none(self): + # ais_points = AISPoints(pd.DataFrame( + # { + # "cog": [i for i in range(0, 359, 10)], + # "heading": [180 for i in range(0, 359, 10)] + # } + # ) + # ) + # features = ["cog", "heading"] + # bins = [10, 3] + # ranges = [[0, 360], [0, 360]] + # + # result = ais_points.disjointed_histogram(features, bins, ranges) + # expected = [ + # np.array([4, 4, 3, 4, 3, 4, 4, 3, 4, 3]), + # np.array([0, 36, 0]) + # ] + # + # self.assertEqual(len(result), len(expected)) + # + # for r, e in zip(result, expected): + # np.testing.assert_array_equal(e, r[0]) + # + # def test_disjointed_histogram_label_0(self): + # ais_points = AISPoints(pd.DataFrame( + # { + # "cog": [i for i in range(0, 359, 10)], + # "heading": [180 for i in range(0, 359, 10)], + # "label": [0 for _ in range(10)] + [1 for _ in range(26)] + # } + # ) + # ) + # features = ["cog", "heading"] + # bins = [10, 3] + # ranges = [[0, 360], [0, 360]] + # + # result = ais_points.disjointed_histogram(features, bins, ranges, label=0) + # expected = [ + # np.array([4, 4, 2, 0, 0, 0, 0, 0, 0, 0]), + # np.array([0, 10, 0]) + # ] + # + # self.assertEqual(len(result), len(expected)) + # + # for r, e in zip(result, expected): + # np.testing.assert_array_equal(e, r[0]) + # + # def test_disjointed_histogram_bins_int(self): + # ais_points = AISPoints(pd.DataFrame( + # { + # "cog": [i for i in range(0, 359, 10)], + # "heading": [180 for i in range(0, 359, 10)], + # "label": [0 for _ in range(10)] + [1 for _ in range(26)] + # } + # ) + # ) + # features = ["cog", "heading"] + # bins = 10 + # ranges = [[0, 360], [0, 360]] + # + # result = ais_points.disjointed_histogram(features, bins, ranges) + # expected = [ + # np.array([4, 4, 3, 4, 3, 4, 4, 3, 4, 3]), + # np.array([0, 0, 0, 0, 0, 36, 0, 0, 0, 0]) + # ] + # + # self.assertEqual(len(result), len(expected)) + # + # for r, e in zip(result, expected): + # np.testing.assert_array_equal(e, r[0]) - def test_remove_outliers_not_all_features(self): - ais_points = AISPoints(pd.DataFrame( - { - "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000]} - ) - ) - expected = pd.DataFrame( - { - "cog": [i / 350.0 for i in range(0, 359, 10)] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [10000] - } - ) - ais_points.remove_outliers(["cog"]) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) - def test_remove_outliers_exception(self): - ais_points = AISPoints( - pd.DataFrame( - { - "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000] - } - ) - ) - with self.assertRaises(ValueError): - ais_points.remove_outliers(["cog"], rank=0) diff --git a/skais/tests/ais/test_ais_trajectory.py b/skais/tests/ais/test_ais_trajectory.py index d4e51ac1cc49b50ecadb223c507106fe92bddf0d..78fff0bb0b83fc724d6671a4d2ad9582d00b00c8 100644 --- a/skais/tests/ais/test_ais_trajectory.py +++ b/skais/tests/ais/test_ais_trajectory.py @@ -1,62 +1,58 @@ -import math import unittest -import pandas as pd -import numpy as np - -from skais.ais.ais_trajectory import AISTrajectory, compute_std, to_rad, bearing, to_deg, compute_point_angles +from skais.ais.ais_trajectory import * class TestAISTrajectory(unittest.TestCase): - def test_get_stopped_snippets_simple(self): - ais_trajectory = AISTrajectory(pd.DataFrame( - { - "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - "ts_sec": [i for i in range(21)] - } - )) - - expected = pd.DataFrame( - { - "label": [1, 1, 1, 1, 1, 1, 1, 1], - "ts_sec": [i for i in range(13, 21)] - } - ) - - snippets = ais_trajectory.get_stopped_snippets('label') - - self.assertEqual(len(snippets), 1) - pd.testing.assert_frame_equal(expected, snippets[0].df.reset_index(drop=True)) - - def test_get_stopped_snippets_multi_snippets(self): - ais_trajectory = AISTrajectory(pd.DataFrame( - { - "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], - "ts_sec": [i * 60 for i in range(31)] - } - )) - - expected = [ - pd.DataFrame( - { - "label": [1, 1, 1, 1, 1, 1, 1, 1], - "ts_sec": [i * 60 for i in range(13, 21)] - } - ), - pd.DataFrame( - { - "label": [1, 1, 1, ], - "ts_sec": [i * 60 for i in range(25, 28)] - } - ), - ] - - snippets = ais_trajectory.get_stopped_snippets('label', time_gap=1) - - self.assertEqual(len(expected), len(snippets)) - for e, s in zip(expected, snippets): - pd.testing.assert_frame_equal(e, s.df.reset_index(drop=True)) + # def test_get_stopped_snippets_simple(self): + # ais_trajectory = AISTrajectory(pd.DataFrame( + # { + # "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + # "ts_sec": [i for i in range(21)] + # } + # )) + # + # expected = pd.DataFrame( + # { + # "label": [1, 1, 1, 1, 1, 1, 1, 1], + # "ts_sec": [i for i in range(13, 21)] + # } + # ) + # + # snippets = ais_trajectory.get_stopped_snippets('label') + # + # self.assertEqual(len(snippets), 1) + # pd.testing.assert_frame_equal(expected, snippets[0].df.reset_index(drop=True)) + + # def test_get_stopped_snippets_multi_snippets(self): + # ais_trajectory = AISTrajectory(pd.DataFrame( + # { + # "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], + # "ts_sec": [i * 60 for i in range(31)] + # } + # )) + # + # expected = [ + # pd.DataFrame( + # { + # "label": [1, 1, 1, 1, 1, 1, 1, 1], + # "ts_sec": [i * 60 for i in range(13, 21)] + # } + # ), + # pd.DataFrame( + # { + # "label": [1, 1, 1, ], + # "ts_sec": [i * 60 for i in range(25, 28)] + # } + # ), + # ] + # + # snippets = ais_trajectory.get_stopped_snippets('label', time_gap=1) + # + # self.assertEqual(len(expected), len(snippets)) + # for e, s in zip(expected, snippets): + # pd.testing.assert_frame_equal(e, s.df.reset_index(drop=True)) def test_to_geojson(self): trajectory = AISTrajectory( @@ -125,22 +121,22 @@ class TestAISTrajectory(unittest.TestCase): np.testing.assert_array_equal(result, expected) - def test_compute_sqrt_sog(self): - trajectory = AISTrajectory( - pd.DataFrame( - { - "sog": [i for i in range(4)], - "ts_sec": [i for i in range(4)] - } - ) - ) - - trajectory.compute_sqrt_sog() - - result = trajectory.df["sog_sqrt"].to_numpy() - expected = np.array([math.sqrt(i) for i in range(4)]) - - np.testing.assert_array_equal(result, expected) + # def test_compute_sqrt_sog(self): + # trajectory = AISTrajectory( + # pd.DataFrame( + # { + # "sog": [i for i in range(4)], + # "ts_sec": [i for i in range(4)] + # } + # ) + # ) + # + # trajectory.compute_sqrt_sog() + # + # result = trajectory.df["sog_sqrt"].to_numpy() + # expected = np.array([math.sqrt(i) for i in range(4)]) + # + # np.testing.assert_array_equal(result, expected) def test_interpolation(self): trajectory = AISTrajectory( @@ -186,173 +182,81 @@ class TestAISTrajectory(unittest.TestCase): pd.testing.assert_frame_equal(trajectory.df, expected) - def test_compute_angle_l1(self): + def test_split_trajectory_simple(self): trajectory = AISTrajectory( pd.DataFrame( { - "ts_sec": [i for i in range(15)], - "angles_diff": [0 for _ in range(5)] + [1 for _ in range(5)] + [-1 for i in range(5)] + "ts_sec": [i for i in range(0, 3001, 600)] + [i for i in range(4001, 7001, 600)], + "label": [0 for _ in range(0, 3001, 600)] + [1 for _ in range(4001, 7001, 600)] } ) ) - trajectory.compute_angle_l1(2) - - result = trajectory.df["angle_l1"].to_numpy() - expected = np.array([0, 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5]) - - np.testing.assert_array_equal(result, expected) - - def test_compute_angle_l2(self): - trajectory = AISTrajectory( - pd.DataFrame( - { - "ts_sec": [i for i in range(15)], - "angles_diff": [0 for _ in range(5)] + [1 for _ in range(5)] + [-1 for i in range(5)] - } + expected = [ + AISTrajectory( + pd.DataFrame( + { + "ts_sec": [i for i in range(0, 3001, 600)], + "label": [0 for _ in range(0, 3001, 600)] + } + ) + ), + AISTrajectory( + pd.DataFrame( + { + "ts_sec": [i for i in range(4001, 7001, 600)], + "label": [1 for i in range(4001, 7001, 600)] + } + ) ) - ) - - trajectory.compute_angle_l2(2) - - result = trajectory.df["angle_l2"].to_numpy() - expected = np.array(np.sqrt([0, 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5])) - - np.testing.assert_array_equal(result, expected) - - def test_compute_std(self): - dat = np.array([0 for _ in range(5)] + [1 for _ in range(5)] + [-1 for i in range(5)]) - radius = 2 - - result = compute_std.py_func(dat, radius) - expected = np.array( - [0, 0, 0, 0.4, np.sqrt(30 / 125), np.sqrt(30 / 125), 0.4, 0, 0.8, np.sqrt(120 / 125), np.sqrt(120 / 125), - 0.8, 0, 0, 0]) - - np.testing.assert_almost_equal(result, expected) - - def test_to_rad_0(self): - result = to_rad.py_func(0) - expected = 0 - - self.assertAlmostEqual(result, expected) - - def test_to_rad_90(self): - result = to_rad.py_func(90) - expected = np.pi / 2 - - self.assertAlmostEqual(result, expected) - - def test_to_rad_180(self): - result = to_rad.py_func(180) - expected = np.pi - - self.assertAlmostEqual(result, expected) - - def test_to_rad_360(self): - result = to_rad.py_func(360) - expected = 2 * np.pi - - self.assertAlmostEqual(result, expected) - - def test_to_rad_m180(self): - result = to_rad.py_func(-180) - expected = -np.pi - - self.assertAlmostEqual(result, expected) - - def test_to_rad_m90(self): - result = to_rad.py_func(-90) - expected = -np.pi / 2 - - self.assertAlmostEqual(result, expected) - - def test_to_rad_270(self): - result = to_rad.py_func(270) - expected = 3 * np.pi / 2 - - self.assertAlmostEqual(result, expected) - - def test_to_rad_810(self): - result = to_rad.py_func(810) - expected = 9 * np.pi / 2 - - self.assertAlmostEqual(result, expected) - - def test_to_deg_0(self): - result = to_deg.py_func(0) - expected = 0 - - self.assertAlmostEqual(result, expected) - - def test_to_deg_90(self): - result = to_deg.py_func(np.pi / 2) - expected = 90 - - self.assertAlmostEqual(result, expected) - - def test_to_deg_180(self): - result = to_deg.py_func(np.pi) - expected = 180 - - self.assertAlmostEqual(result, expected) - - def test_to_deg_360(self): - result = to_deg.py_func(2 * np.pi) - expected = 360 - - self.assertAlmostEqual(result, expected) - - def test_to_deg_m180(self): - result = to_deg.py_func(-np.pi) - expected = -180 - - self.assertAlmostEqual(result, expected) - - def test_to_deg_m90(self): - result = to_deg.py_func(-np.pi / 2) - expected = -90 - - self.assertAlmostEqual(result, expected) - - def test_to_deg_270(self): - result = to_deg.py_func(3 * np.pi / 2) - expected = 270 - - self.assertAlmostEqual(result, expected) - - def test_to_deg_810(self): - result = to_deg.py_func(9 * np.pi / 2) - expected = 810 - - self.assertAlmostEqual(result, expected) - - def test_bearing_rand(self): - paris = (2.3522, 48.8566) - marseille = (5.3698, 43.2965) - - result = bearing.py_func(paris, marseille) - expected = 158.2694 - - self.assertAlmostEqual(result, expected, places=4) - - def test_bearing_along_equator(self): - p1 = (0, 0) - p2 = (90, 0) - - result = bearing.py_func(p1, p2) - expected = 90 - - self.assertAlmostEqual(result, expected) - - def test_bearing_equator_to_north_pole(self): - p1 = (0, 0) - p2 = (0, 90) - - result = bearing.py_func(p1, p2) - expected = 0 + ] - self.assertAlmostEqual(result, expected) + for e, r in zip(expected, trajectory.split_trajectory(800)): + pd.testing.assert_frame_equal(e.df.reset_index(drop=True), r.df.reset_index(drop=True)) + # def test_compute_angle_l1(self): + # trajectory = AISTrajectory( + # pd.DataFrame( + # { + # "ts_sec": [i for i in range(15)], + # "angles_diff": [0 for _ in range(5)] + [1 for _ in range(5)] + [-1 for i in range(5)] + # } + # ) + # ) + # + # trajectory.compute_angle_l1(2) + # + # result = trajectory.df["angle_l1"].to_numpy() + # expected = np.array([0, 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5]) + # + # np.testing.assert_array_equal(result, expected) + # + # def test_compute_angle_l2(self): + # trajectory = AISTrajectory( + # pd.DataFrame( + # { + # "ts_sec": [i for i in range(15)], + # "angles_diff": [0 for _ in range(5)] + [1 for _ in range(5)] + [-1 for i in range(5)] + # } + # ) + # ) + # + # trajectory.compute_angle_l2(2) + # + # result = trajectory.df["angle_l2"].to_numpy() + # expected = np.array(np.sqrt([0, 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5])) + # + # np.testing.assert_array_equal(result, expected) + # + # def test_compute_std(self): + # dat = np.array([0 for _ in range(5)] + [1 for _ in range(5)] + [-1 for i in range(5)]) + # radius = 2 + # + # result = compute_std.py_func(dat, radius) + # expected = np.array( + # [0, 0, 0, 0.4, np.sqrt(30 / 125), np.sqrt(30 / 125), 0.4, 0, 0.8, np.sqrt(120 / 125), np.sqrt(120 / 125), + # 0.8, 0, 0, 0]) + # + # np.testing.assert_almost_equal(result, expected) # def test_compute_position_angle_std(self): # dat = [(0, i * 10) for i in range(5)] + [(0, 50 - i * 10) for i in range(5)] @@ -376,12 +280,4 @@ class TestAISTrajectory(unittest.TestCase): # dat = [(0, i * 10) for i in range(5)] + [(0, 50 - i * 10) for i in range(5)] # result = compute_position_dist_std(dat, 2) # - # # hard to test - - def test_compute_point_angles(self): - dat = np.array([(0, 0), (0, 10), (10, 10), (0, 10)]) - result = compute_point_angles(dat) - - expected = np.array([0, 90, 0, 0]) - - np.testing.assert_almost_equal(expected, result, decimal=0) + # # hard to test \ No newline at end of file diff --git a/skais/tests/process/__init__.py b/skais/tests/process/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skais/tests/process/test_basic_features_operations.py b/skais/tests/process/test_basic_features_operations.py new file mode 100644 index 0000000000000000000000000000000000000000..da2f62eb6388adf5c8f8036c2b67610aeb0a3626 --- /dev/null +++ b/skais/tests/process/test_basic_features_operations.py @@ -0,0 +1,62 @@ +import unittest + +import numpy as np + +from skais.process.basic_features_operations import angular_mean, angular_std, angular_time_derivative + + +class TestAngular(unittest.TestCase): + def test_angular_mean_simple(self): + x = np.radians(np.array([1, 359])) + + self.assertEqual(0.0, angular_mean(x)) + + def test_angular_mean_simple_2(self): + x = np.radians(np.array([180, 180, 180, 180, 179, 181])) + + self.assertEqual(np.pi, angular_mean(x)) + + def test_angular_mean_simple_3(self): + x = np.radians(np.array([0, 0, 0, 0, 359, 1])) + + self.assertEqual(0.0, angular_mean(x)) + + def test_angular_mean_first_quadrant(self): + x = np.radians(np.array([43, 44, 45, 46, 47])) + + self.assertEqual(np.pi / 4, angular_mean(x)) + + def test_angular_mean_second_quadrant(self): + x = np.radians(np.array([133, 134, 135, 136, 137])) + + self.assertEqual(3 * np.pi / 4, angular_mean(x)) + + def test_angular_mean_third_quadrant(self): + x = np.radians(np.array([223, 224, 225, 226, 227])) + + self.assertEqual(-3 * np.pi / 4, angular_mean(x)) + + def test_angular_mean_fourth_quadrant(self): + x = np.radians(np.array([313, 314, 315, 316, 317])) + + self.assertEqual(-np.pi / 4, angular_mean(x)) + + def test_angular_std(self): + x = np.radians(np.array([0, 0, 0, 0])) + + self.assertEqual(0, angular_std(x)) + + def test_angular_std_2(self): + x = np.radians(np.array([0, 90, 180, 270])) + + self.assertAlmostEqual(1, angular_std(x)) + + def test_angular_time_derivative(self): + result = angular_time_derivative(0, np.pi, 0, 1) + + self.assertAlmostEqual(np.pi, result) + + def test_angular_time_derivative_2(self): + result = angular_time_derivative(7*np.pi/4, np.pi/4, 0, 1) + + self.assertAlmostEqual(np.pi/2, result) \ No newline at end of file diff --git a/skais/tests/process/test_geography.py b/skais/tests/process/test_geography.py new file mode 100644 index 0000000000000000000000000000000000000000..be1f844dfd0cfdddf9bb284ab75b6f5d1c9ab30e --- /dev/null +++ b/skais/tests/process/test_geography.py @@ -0,0 +1,168 @@ +import unittest +from skais.process.geography import * + + +class MyTestCase(unittest.TestCase): + def test_to_rad_0(self): + result = to_rad.py_func(0) + expected = 0 + + self.assertAlmostEqual(result, expected) + + def test_to_rad_90(self): + result = to_rad.py_func(90) + expected = np.pi / 2 + + self.assertAlmostEqual(result, expected) + + def test_to_rad_180(self): + result = to_rad.py_func(180) + expected = np.pi + + self.assertAlmostEqual(result, expected) + + def test_to_rad_360(self): + result = to_rad.py_func(360) + expected = 2 * np.pi + + self.assertAlmostEqual(result, expected) + + def test_to_rad_m180(self): + result = to_rad.py_func(-180) + expected = -np.pi + + self.assertAlmostEqual(result, expected) + + def test_to_rad_m90(self): + result = to_rad.py_func(-90) + expected = -np.pi / 2 + + self.assertAlmostEqual(result, expected) + + def test_to_rad_270(self): + result = to_rad.py_func(270) + expected = 3 * np.pi / 2 + + self.assertAlmostEqual(result, expected) + + def test_to_rad_810(self): + result = to_rad.py_func(810) + expected = 9 * np.pi / 2 + + self.assertAlmostEqual(result, expected) + + def test_to_deg_0(self): + result = to_deg.py_func(0) + expected = 0 + + self.assertAlmostEqual(result, expected) + + def test_to_deg_90(self): + result = to_deg.py_func(np.pi / 2) + expected = 90 + + self.assertAlmostEqual(result, expected) + + def test_to_deg_180(self): + result = to_deg.py_func(np.pi) + expected = 180 + + self.assertAlmostEqual(result, expected) + + def test_to_deg_360(self): + result = to_deg.py_func(2 * np.pi) + expected = 360 + + self.assertAlmostEqual(result, expected) + + def test_to_deg_m180(self): + result = to_deg.py_func(-np.pi) + expected = -180 + + self.assertAlmostEqual(result, expected) + + def test_to_deg_m90(self): + result = to_deg.py_func(-np.pi / 2) + expected = -90 + + self.assertAlmostEqual(result, expected) + + def test_to_deg_270(self): + result = to_deg.py_func(3 * np.pi / 2) + expected = 270 + + self.assertAlmostEqual(result, expected) + + def test_to_deg_810(self): + result = to_deg.py_func(9 * np.pi / 2) + expected = 810 + + self.assertAlmostEqual(result, expected) + + def test_bearing_rand(self): + paris = (2.3522, 48.8566) + marseille = (5.3698, 43.2965) + + result = bearing.py_func(paris, marseille) + expected = 158.2694 + + self.assertAlmostEqual(result, expected, places=4) + + def test_bearing_along_equator(self): + p1 = (0, 0) + p2 = (90, 0) + + result = bearing.py_func(p1, p2) + expected = 90 + + self.assertAlmostEqual(result, expected) + + def test_bearing_equator_to_north_pole(self): + p1 = (0, 0) + p2 = (0, 90) + + result = bearing.py_func(p1, p2) + expected = 0 + + self.assertAlmostEqual(result, expected) + + def test_angle_between_three_points_1(self): + p1 = (0, -10) + p2 = (0, 0) + p3 = (10, 0) + + self.assertAlmostEqual(90, angle_between_three_points.py_func(p1, p2, p3), places=3) + + def test_angle_between_three_points_2(self): + p1 = (0, -10) + p2 = (0, 0) + p3 = (-10, 0) + + self.assertAlmostEqual(-90, angle_between_three_points.py_func(p1, p2, p3), places=3) + + def test_angle_between_three_points_3(self): + p1 = (0, -10) + p2 = (0, 0) + p3 = (10, 10) + + self.assertAlmostEqual(180 - 44.56139, angle_between_three_points.py_func(p1, p2, p3), places=3) + + def test_angle_between_three_points_4(self): + p1 = (0, 0) + p2 = (0, 10) + p3 = (0, 0) + + self.assertAlmostEqual(0, abs(angle_between_three_points.py_func(p1, p2, p3)), places=3) + + def test_compute_point_angles(self): + dat = np.array([(0, 0), (0, 10), (10, 10), (0, 10)]) + result = compute_point_angles(dat) + + expected = np.array([0, 90, 0, 0]) + + np.testing.assert_almost_equal(expected, result, decimal=0) + self.assertTrue(True) + + +if __name__ == '__main__': + unittest.main() diff --git a/skais/utils/experiment_tools.py b/skais/utils/experiment_tools.py index 1a68fabcc98d8700ae1892821df2a6d2369c36ef..85fa9b31ad945381d72c5b1bd3151a3745bc7b97 100644 --- a/skais/utils/experiment_tools.py +++ b/skais/utils/experiment_tools.py @@ -9,6 +9,7 @@ def make_feature_vectors(trajectories, features=None, zero = [0 for _ in range(nb_classes)] for trajectory in trajectories: + trajectory.df.dropna(inplace=True) if len(trajectory.df.index) > length_list: trajectory.df['ts'] = trajectory.df.index trajectory.compute_all_derivatives()