restructure AISPoints

2137c807 · Raphael Sturgis · 9a94e525 · 2137c807 · 2137c807 · 2137c807
Commit 2137c807 authored Nov 13, 2021 by Raphael Sturgis
--- a/skais/ais/ais_points.py
+++ b/skais/ais/ais_points.py
-import pickle
-from datetime import datetime
-import pandas as pd
 import numpy as np
-from numba import jit
+import pandas as pd
 from scipy.stats import stats
 from skais.ais.ais_trajectory import AISTrajectory
-# TODO: remove
-def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None):
-    n_sample = len(df.index)
-    result = []
-    work_df = df.copy()
-    index = 0
-    while index < n_sample:
-        i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit)
-        trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time)
-        if len(trajectory.df.index) > min_size:
-            result.append(trajectory)
-        work_df = work_df[i:]
-        index += i
-    return result
-# TODO: remove
-@jit(nopython=True)
-def compute_trajectory(times, time_gap, size_limit):
-    n_samples = len(times)
-    previous_date = times[0]
-    i = 0
+# def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None):
-    for i in range(size_limit):
+#     n_sample = len(df.index)
-        if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap):
+#     result = []
-            return i
+#     work_df = df.copy()
-        previous_date = times[i]
+#
+#     index = 0
-    return i + 1
+#     while index < n_sample:
+#         i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit)
+#         trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time)
+#         if len(trajectory.df.index) > min_size:
+#             result.append(trajectory)
+#         work_df = work_df[i:]
+#         index += i
+#
+#     return result
+#
+#
+# @jit(nopython=True)
+# def compute_trajectory(times, time_gap, size_limit):
+#     n_samples = len(times)
+#
+#     previous_date = times[0]
+#
+#     i = 0
+#     for i in range(size_limit):
+#         if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap):
+#             return i
+#         previous_date = times[i]
+#
+#     return i + 1
 class AISPoints:
@@ -50,6 +45,19 @@ class AISPoints:
        self.df = df
+    def describe(self):
+        description = {
+            "nb vessels": len(self.df.mmsi.unique()),
+            "nb points": len(self.df.index),
+            "average speed": self.df['sog'].mean(),
+            "average diff": self.df['diff'].mean()
+        }
+        for n in np.sort(self.df['label'].unique()):
+            description[f"labeled {n}"] = len(self.df[self.df['label'] == n].index)
+        return description
    # cleaning functions
    def remove_outliers(self, features, rank=4):
        if rank <= 0:
@@ -96,43 +104,22 @@ class AISPoints:
        return normalization_type, normalization_dict
    # New features
-    # TODO: rename
+    def compute_drift(self):
-    def compute_diff_heading_cog(self):
+        self.df["drift"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180),
-        self.df["diff"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180),
                                         axis=1)
+    # Trajectories
+    """
+        Separates AISPoints into individual trajectories
+    """
-    # TODO: redo
+    def get_trajectories(self):
-    def get_trajectories(self, time_gap=30, min_size=50, interpolation_time=None):
-        if 'ts' in self.df:
-            self.df['ts'] = pd.to_datetime(self.df['ts'], infer_datetime_format=True)
-            self.df['ts_sec'] = self.df['ts'].apply(lambda x: datetime.timestamp(x))
-            dat = self.df
-        else:
-            raise ValueError
        trajectories = []
-        for mmsi in dat.mmsi.unique():
+        for mmsi in self.df.mmsi.unique():
-            trajectories += compute_trajectories(dat[dat['mmsi'] == mmsi], time_gap, min_size=min_size,
+            trajectories.append(AISTrajectory(self.df[self.df['mmsi'] == mmsi].reset_index(drop=True)))
-                                                 interpolation_time=interpolation_time)
        return trajectories
-    def describe(self):
-        stats = {"nb vessels": len(self.df.mmsi.unique()),
-                 "nb points": len(self.df.index),
-                 "average speed": self.df['sog'].mean(),
-                 "average diff": self.df['diff'].mean()
-                 }
-        for n in np.sort(self.df['label'].unique()):
-            stats[f"labeled {n}"] = len(self.df[self.df['label'] == n].index)
-        return stats
    # Static methods
    @staticmethod
    def fuse(*args):

--- a/skais/ais/ais_trajectory.py
+++ b/skais/ais/ais_trajectory.py
@@ -245,6 +245,9 @@ class AISTrajectory:
        # self.df = df.dropna()
        self.df = df
+    def __eq__(self, other):
+        return self.df.equals(other.df)
    def compute_angle_l1(self, radius):
        dat = self.df['angles_diff'].to_numpy()
        l1 = l1_angle(dat, radius)

--- a/skais/tests/ais/test_ais_points.py
+++ b/skais/tests/ais/test_ais_points.py