diff --git a/skais/ais/ais_points.py b/skais/ais/ais_points.py index e76b2a446da10a63b47a93bccfaf46766d448195..470562df91848b6f1398600a32ac850d0c85b80b 100644 --- a/skais/ais/ais_points.py +++ b/skais/ais/ais_points.py @@ -1,45 +1,40 @@ -import pickle -from datetime import datetime - -import pandas as pd import numpy as np -from numba import jit +import pandas as pd from scipy.stats import stats from skais.ais.ais_trajectory import AISTrajectory -# TODO: remove -def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None): - n_sample = len(df.index) - result = [] - work_df = df.copy() - - index = 0 - while index < n_sample: - i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit) - trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time) - if len(trajectory.df.index) > min_size: - result.append(trajectory) - work_df = work_df[i:] - index += i - - return result - -# TODO: remove -@jit(nopython=True) -def compute_trajectory(times, time_gap, size_limit): - n_samples = len(times) - - previous_date = times[0] - - i = 0 - for i in range(size_limit): - if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap): - return i - previous_date = times[i] - - return i + 1 +# def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None): +# n_sample = len(df.index) +# result = [] +# work_df = df.copy() +# +# index = 0 +# while index < n_sample: +# i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit) +# trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time) +# if len(trajectory.df.index) > min_size: +# result.append(trajectory) +# work_df = work_df[i:] +# index += i +# +# return result +# +# +# @jit(nopython=True) +# def compute_trajectory(times, time_gap, size_limit): +# n_samples = len(times) +# +# previous_date = times[0] +# +# i = 0 +# for i in range(size_limit): +# if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap): +# return i +# previous_date = times[i] +# +# return i + 1 class AISPoints: @@ -50,6 +45,19 @@ class AISPoints: self.df = df + def describe(self): + description = { + "nb vessels": len(self.df.mmsi.unique()), + "nb points": len(self.df.index), + "average speed": self.df['sog'].mean(), + "average diff": self.df['diff'].mean() + } + + for n in np.sort(self.df['label'].unique()): + description[f"labeled {n}"] = len(self.df[self.df['label'] == n].index) + + return description + # cleaning functions def remove_outliers(self, features, rank=4): if rank <= 0: @@ -96,43 +104,22 @@ class AISPoints: return normalization_type, normalization_dict # New features - # TODO: rename - def compute_diff_heading_cog(self): - self.df["diff"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180), - axis=1) - - - # TODO: redo - def get_trajectories(self, time_gap=30, min_size=50, interpolation_time=None): - - if 'ts' in self.df: - - self.df['ts'] = pd.to_datetime(self.df['ts'], infer_datetime_format=True) - self.df['ts_sec'] = self.df['ts'].apply(lambda x: datetime.timestamp(x)) + def compute_drift(self): + self.df["drift"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180), + axis=1) - dat = self.df - else: - raise ValueError + # Trajectories + """ + Separates AISPoints into individual trajectories + """ + def get_trajectories(self): trajectories = [] - for mmsi in dat.mmsi.unique(): - trajectories += compute_trajectories(dat[dat['mmsi'] == mmsi], time_gap, min_size=min_size, - interpolation_time=interpolation_time) + for mmsi in self.df.mmsi.unique(): + trajectories.append(AISTrajectory(self.df[self.df['mmsi'] == mmsi].reset_index(drop=True))) return trajectories - def describe(self): - stats = {"nb vessels": len(self.df.mmsi.unique()), - "nb points": len(self.df.index), - "average speed": self.df['sog'].mean(), - "average diff": self.df['diff'].mean() - } - - for n in np.sort(self.df['label'].unique()): - stats[f"labeled {n}"] = len(self.df[self.df['label'] == n].index) - - return stats - # Static methods @staticmethod def fuse(*args): @@ -153,4 +140,4 @@ class AISPoints: def load_from_csv(file_name): df = pd.read_csv(file_name) ais_points = AISPoints(df) - return ais_points \ No newline at end of file + return ais_points diff --git a/skais/ais/ais_trajectory.py b/skais/ais/ais_trajectory.py index c3ea2a6cfd4a610caf89948bf3ac597087360077..895b596cb33fea9fc065ce309c90d8751ef0802e 100644 --- a/skais/ais/ais_trajectory.py +++ b/skais/ais/ais_trajectory.py @@ -245,6 +245,9 @@ class AISTrajectory: # self.df = df.dropna() self.df = df + def __eq__(self, other): + return self.df.equals(other.df) + def compute_angle_l1(self, radius): dat = self.df['angles_diff'].to_numpy() l1 = l1_angle(dat, radius) diff --git a/skais/tests/ais/test_ais_points.py b/skais/tests/ais/test_ais_points.py index fda3d9e367ef06921bd959a98cdf24d3f610ccbb..48d8c01894a7579208910f029c7fcb5cf1764f0d 100644 --- a/skais/tests/ais/test_ais_points.py +++ b/skais/tests/ais/test_ais_points.py @@ -3,12 +3,14 @@ import unittest import pandas as pd import numpy as np -from skais.ais.ais_points import AISPoints, compute_trajectory, compute_trajectories +from skais.ais.ais_points import AISPoints +from skais.ais.ais_trajectory import AISTrajectory class TestAISPositions(unittest.TestCase): - def setUp(self) -> None: - self.ais_points = AISPoints(pd.DataFrame( + + def test_describe(self): + ais_points = AISPoints(pd.DataFrame( { "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], @@ -17,25 +19,198 @@ class TestAISPositions(unittest.TestCase): } )) - self.ais_trajectories = AISPoints( + self.assertDictEqual(ais_points.describe(), + { + 'nb vessels': 1, + 'nb points': 21, + 'labeled 0': 13, + 'labeled 1': 8, + 'average speed': 234 / 21, + 'average diff': 1271 / 21 + }) + + + def test_remove_outliers_simple(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)] + [1000] + [666], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} + ) + ) + expected = pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)] + [666], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + } + ) + ais_points.remove_outliers(["cog", "heading"]) + pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) + + + def test_remove_outliers_rank(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)] + [1000] + [666], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} + ) + ) + expected = pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)], + "heading": [0.0 for i in range(0, 359, 10)] + } + ) + ais_points.remove_outliers(["cog", "heading"], rank=2) + pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) + + def test_remove_outliers_not_all_features(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000]} + ) + ) + expected = pd.DataFrame( + { + "cog": [i / 350.0 for i in range(0, 359, 10)] + [0], + "heading": [0.0 for i in range(0, 359, 10)] + [10000] + } + ) + ais_points.remove_outliers(["cog"]) + pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) + + def test_remove_outliers_exception(self): + ais_points = AISPoints( + pd.DataFrame( + { + "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], + "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000] + } + ) + ) + with self.assertRaises(ValueError): + ais_points.remove_outliers(["cog"], rank=0) + + + def test_clean_angles(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)] + [489, 456, -12] + [180, 180, 180], + "heading": [180 for i in range(0, 359, 10)] + [489, 180, 180] + [999, 666, -333], + } + ) + ) + + expected = pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)], + "heading": [180 for i in range(0, 359, 10)] + } + ) + + ais_points.clean_angles() + result = ais_points.df + + pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True)) + + def test_normalize_min_max(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)], + "heading": [180 for i in range(0, 359, 10)] + } + ) + ) + + ais_points.normalize(['cog', 'heading']) + result = ais_points.df + expected = pd.DataFrame( + { + "cog": [i / 350.0 for i in range(0, 359, 10)], + "heading": [0.0 for i in range(0, 359, 10)] + } + ) + + pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True)) + + def test_normalize_standardization(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)], + "heading": [180 for i in range(0, 359, 10)] + } + ) + ) + + ais_points.normalize(['cog', 'heading'], normalization_type="standardization") + result = ais_points.df + expected = pd.DataFrame( + { + "cog": [-1.68458833, -1.58832614, -1.49206395, -1.39580176, -1.29953957, + -1.20327738, -1.10701519, -1.010753, -0.91449081, -0.81822862, + -0.72196643, -0.62570424, -0.52944205, -0.43317986, -0.33691767, + -0.24065548, -0.14439329, -0.0481311, 0.0481311, 0.14439329, + 0.24065548, 0.33691767, 0.43317986, 0.52944205, 0.62570424, + 0.72196643, 0.81822862, 0.91449081, 1.010753, 1.10701519, + 1.20327738, 1.29953957, 1.39580176, 1.49206395, 1.58832614, + 1.68458833], + "heading": [0.0 for i in range(0, 359, 10)] + } + ) + + pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True), + check_exact=False, rtol=0.05) + + + def test_compute_drift(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)], + "heading": [180 for i in range(0, 359, 10)] + } + ) + ) + + ais_points.compute_drift() + + diff = ais_points.df['drift'].to_numpy() + + np.testing.assert_array_equal(diff, np.array([180, 170, 160, 150, 140, 130, 120, 110, 100, 90, 80, 70, 60, 50, + 40, 30, 20, 10, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, + 120, 130, 140, 150, 160, 170])) + + def test_get_trajectories(self): + ais_points = AISPoints( pd.DataFrame( { - 'lat': [0, 0, 1, 1, 2, 2, 3, 3, 4, 4] * 2, - 'long': [0, 0, 0, 0, 0, 1, 2, 3, 4, 5] * 2, - 'sog': [0, 0, 10, 10, 10, 20, 30, 40, 20, 10] * 2, - 'diff': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] * 2, - 'label': [1, 1, 0, 0, 0, 0, 0, 0, 0, 0] * 2, - 'ts': ['2020-03-10 22:10:00', '2020-03-10 22:14:00', '2020-03-10 22:18:00', '2020-03-10 22:22:00', - '2020-03-10 22:30:00', '2020-03-10 22:32:00', '2020-03-10 22:35:00', '2020-03-10 22:40:00', - '2020-03-10 22:45:00', '2020-03-10 22:50:00'] + - ['2020-03-10 22:10:00', '2020-03-10 22:14:00', '2020-03-10 22:18:00', '2020-03-10 22:20:00', - '2020-03-10 23:30:00', '2020-03-10 23:32:00', '2020-03-10 23:35:00', '2020-03-10 23:40:00', - '2020-03-10 23:45:00', '2020-03-10 23:50:00'], - 'mmsi': [100 for i in range(10)] + [101 for i in range(10)] + "mmsi": [123456789 for _ in range(10)] + [987654321 for _ in range(10)], + "ts_sec": [i for i in range(20)] } ) ) + expected = [ + AISTrajectory( + pd.DataFrame( + { + "mmsi": [123456789 for _ in range(10)], + "ts_sec": [i for i in range(10)] + } + ) + ), + AISTrajectory( + pd.DataFrame( + { + "mmsi": [987654321 for _ in range(10)], + "ts_sec": [10+ i for i in range(10)] + } + ) + ) + ] + + for expected_trajectory, result_trajectory in zip(expected, ais_points.get_trajectories()): + pd.testing.assert_frame_equal(expected_trajectory.df, result_trajectory.df) + # def test_histogram_no_label_simple(self): # result = np.histogramdd(self.ais_points.df[["sog", "diff"]].to_numpy(), 3, [[0, 30], [0, 180]])[0] # @@ -88,22 +263,6 @@ class TestAISPositions(unittest.TestCase): # # pd.testing.assert_frame_equal(ais_points.df, self.ais_points.df) - def test_compute_diff_heading_cog(self): - ais_points = AISPoints(pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] - } - ) - ) - - ais_points.compute_diff_heading_cog() - - diff = ais_points.df['diff'].to_numpy() - - np.testing.assert_array_equal(diff, np.array([180, 170, 160, 150, 140, 130, 120, 110, 100, 90, 80, 70, 60, 50, - 40, 30, 20, 10, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, - 120, 130, 140, 150, 160, 170])) # def test_histogram_x(self): # ground_truth = np.array([[5, 1, 3], @@ -114,149 +273,119 @@ class TestAISPositions(unittest.TestCase): # self.ais_points.histogram(features=["sog", "diff"], bins=3, # ranges=[[0, 30], [0, 180]])) - def test_describe(self): - self.assertDictEqual(self.ais_points.describe(), - { - 'nb vessels': 1, - 'nb points': 21, - 'labeled 0': 13, - 'labeled 1': 8, - 'average speed': 234 / 21, - 'average diff': 1271 / 21 - }) def test_fuse_single(self): - pd.testing.assert_frame_equal(AISPoints.fuse(self.ais_points).df, self.ais_points.df) - - def test_fuse_simple_list(self): - pd.testing.assert_frame_equal(AISPoints.fuse([self.ais_points]).df, self.ais_points.df) - - def test_fuse_multiple(self): - value = pd.DataFrame( + ais_points = AISPoints(pd.DataFrame( { - "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1, 2, 3, 7, 15, 14, 12, - 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], - "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132, 35, 45, - 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], - "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - "mmsi": [0 for i in range(42)] + "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], + "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], + "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "mmsi": [0 for i in range(21)] } - ) - pd.testing.assert_frame_equal(AISPoints.fuse(self.ais_points, self.ais_points).df.reset_index(drop=True), - value.reset_index(drop=True)) - - def test_compute_trajectory_simple(self): - times = np.arange(0, 100, 4) * 60 - result = compute_trajectory.py_func(times, 5, 1000) - expected = 25 - - self.assertEqual(result, expected) - - def test_compute_trajectory_cut(self): - times = np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60]) - result = compute_trajectory.py_func(times, 5, 1000) - expected = 25 - - self.assertEqual(result, expected) - - def test_compute_trajectory_limit(self): - times = np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60]) - result = compute_trajectory.py_func(times, 5, 10) - expected = 10 - - self.assertEqual(result, expected) - - def test_compute_trajectories_simple_split(self): - df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) - result = compute_trajectories(df, 5, min_size=0) - expected = [ - pd.DataFrame({'ts_sec': np.arange(0, 100, 4) * 60}), - pd.DataFrame({'ts_sec': np.arange(120, 200, 4) * 60}) - ] - - self.assertEqual(len(expected), len(result)) - for r, e in zip(result, expected): - pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) - - def test_compute_trajectories_split_limit(self): - a = np.arange(0, 100, 4) - b = np.arange(120, 200, 4) - df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) - result = compute_trajectories(df, 5, min_size=0, size_limit=10) - expected = [ - pd.DataFrame({'ts_sec': a[:10] * 60}), - pd.DataFrame({'ts_sec': a[10:20] * 60}), - pd.DataFrame({'ts_sec': a[20:] * 60}), - pd.DataFrame({'ts_sec': b[:10] * 60}), - pd.DataFrame({'ts_sec': b[10:] * 60}) - ] - - self.assertEqual(len(expected), len(result)) - for r, e in zip(result, expected): - pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) - - def test_compute_trajectories_split_min_size(self): - a = np.arange(0, 100, 4) - b = np.arange(120, 200, 4) - print(len(b)) - df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) - result = compute_trajectories(df, 5, min_size=23) - expected = [ - pd.DataFrame({'ts_sec': a * 60}) - ] + )) - self.assertEqual(len(expected), len(result)) - for r, e in zip(result, expected): - pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) + pd.testing.assert_frame_equal(AISPoints.fuse(ais_points).df, ais_points.df) - def test_normalize_min_max(self): + def test_fuse_simple_list(self): ais_points = AISPoints(pd.DataFrame( { - "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] - } - ) - ) - - ais_points.normalize(['cog', 'heading']) - result = ais_points.df - expected = pd.DataFrame( - { - "cog": [i / 350.0 for i in range(0, 359, 10)], - "heading": [0.0 for i in range(0, 359, 10)] + "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], + "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], + "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "mmsi": [0 for i in range(21)] } - ) + )) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True)) + pd.testing.assert_frame_equal(AISPoints.fuse([ais_points]).df, ais_points.df) - def test_normalize_standardization(self): + def test_fuse_multiple(self): ais_points = AISPoints(pd.DataFrame( { - "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] + "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], + "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], + "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "mmsi": [0 for i in range(21)] } - ) - ) + )) - ais_points.normalize(['cog', 'heading'], normalization_type="standardization") - result = ais_points.df - expected = pd.DataFrame( + value = pd.DataFrame( { - "cog": [-1.68458833, -1.58832614, -1.49206395, -1.39580176, -1.29953957, - -1.20327738, -1.10701519, -1.010753, -0.91449081, -0.81822862, - -0.72196643, -0.62570424, -0.52944205, -0.43317986, -0.33691767, - -0.24065548, -0.14439329, -0.0481311, 0.0481311, 0.14439329, - 0.24065548, 0.33691767, 0.43317986, 0.52944205, 0.62570424, - 0.72196643, 0.81822862, 0.91449081, 1.010753, 1.10701519, - 1.20327738, 1.29953957, 1.39580176, 1.49206395, 1.58832614, - 1.68458833], - "heading": [0.0 for i in range(0, 359, 10)] + "sog": [2, 3, 7, 15, 14, 12, 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1, 2, 3, 7, 15, 14, 12, + 18, 25, 21, 12, 11, 16, 19, 2, 5, 15, 12, 7, 8, 9, 1], + "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132, 35, 45, + 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], + "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + "mmsi": [0 for i in range(42)] } ) + pd.testing.assert_frame_equal(AISPoints.fuse(ais_points, ais_points).df.reset_index(drop=True), + value.reset_index(drop=True)) + # + # def test_compute_trajectory_simple(self): + # times = np.arange(0, 100, 4) * 60 + # result = compute_trajectory.py_func(times, 5, 1000) + # expected = 25 + # + # self.assertEqual(result, expected) + # + # def test_compute_trajectory_cut(self): + # times = np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60]) + # result = compute_trajectory.py_func(times, 5, 1000) + # expected = 25 + # + # self.assertEqual(result, expected) + # + # def test_compute_trajectory_limit(self): + # times = np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60]) + # result = compute_trajectory.py_func(times, 5, 10) + # expected = 10 + # + # self.assertEqual(result, expected) + # + # def test_compute_trajectories_simple_split(self): + # df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) + # result = compute_trajectories(df, 5, min_size=0) + # expected = [ + # pd.DataFrame({'ts_sec': np.arange(0, 100, 4) * 60}), + # pd.DataFrame({'ts_sec': np.arange(120, 200, 4) * 60}) + # ] + # + # self.assertEqual(len(expected), len(result)) + # for r, e in zip(result, expected): + # pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) + # + # def test_compute_trajectories_split_limit(self): + # a = np.arange(0, 100, 4) + # b = np.arange(120, 200, 4) + # df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) + # result = compute_trajectories(df, 5, min_size=0, size_limit=10) + # expected = [ + # pd.DataFrame({'ts_sec': a[:10] * 60}), + # pd.DataFrame({'ts_sec': a[10:20] * 60}), + # pd.DataFrame({'ts_sec': a[20:] * 60}), + # pd.DataFrame({'ts_sec': b[:10] * 60}), + # pd.DataFrame({'ts_sec': b[10:] * 60}) + # ] + # + # self.assertEqual(len(expected), len(result)) + # for r, e in zip(result, expected): + # pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) + # + # def test_compute_trajectories_split_min_size(self): + # a = np.arange(0, 100, 4) + # b = np.arange(120, 200, 4) + # print(len(b)) + # df = pd.DataFrame({'ts_sec': np.concatenate([np.arange(0, 100, 4) * 60, np.arange(120, 200, 4) * 60])}) + # result = compute_trajectories(df, 5, min_size=23) + # expected = [ + # pd.DataFrame({'ts_sec': a * 60}) + # ] + # + # self.assertEqual(len(expected), len(result)) + # for r, e in zip(result, expected): + # pd.testing.assert_frame_equal(e.reset_index(drop=True), r.df.reset_index(drop=True)) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True), - check_exact=False, rtol=0.05) # def test_disjointed_histogram_label_none(self): # ais_points = AISPoints(pd.DataFrame( @@ -329,83 +458,4 @@ class TestAISPositions(unittest.TestCase): # for r, e in zip(result, expected): # np.testing.assert_array_equal(e, r[0]) - def test_clean_angles(self): - ais_points = AISPoints(pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)] + [489, 456, -12] + [180, 180, 180], - "heading": [180 for i in range(0, 359, 10)] + [489, 180, 180] + [999, 666, -333], - } - ) - ) - - expected = pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] - } - ) - - ais_points.clean_angles() - result = ais_points.df - - pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True)) - - def test_remove_outliers_simple(self): - ais_points = AISPoints(pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)] + [1000] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} - ) - ) - expected = pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] - } - ) - ais_points.remove_outliers(["cog", "heading"]) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) - - def test_remove_outliers_rank(self): - ais_points = AISPoints(pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)] + [1000] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} - ) - ) - expected = pd.DataFrame( - { - "cog": [i for i in range(0, 359, 10)], - "heading": [0.0 for i in range(0, 359, 10)] - } - ) - ais_points.remove_outliers(["cog", "heading"], rank=2) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) - def test_remove_outliers_not_all_features(self): - ais_points = AISPoints(pd.DataFrame( - { - "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000]} - ) - ) - expected = pd.DataFrame( - { - "cog": [i / 350.0 for i in range(0, 359, 10)] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [10000] - } - ) - ais_points.remove_outliers(["cog"]) - pd.testing.assert_frame_equal(expected.reset_index(drop=True), ais_points.df.reset_index(drop=True)) - - def test_remove_outliers_exception(self): - ais_points = AISPoints( - pd.DataFrame( - { - "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000] - } - ) - ) - with self.assertRaises(ValueError): - ais_points.remove_outliers(["cog"], rank=0)