From af593660348819b6b08303197d928f6e32e1dc40 Mon Sep 17 00:00:00 2001
From: Raphael <raphael.sturgis@gmail.com>
Date: Mon, 29 Nov 2021 14:58:07 +0100
Subject: [PATCH] data augmentation

---
 requirements.txt                              |  3 +-
 skais/ais/ais_trajectory.py                   |  3 +-
 skais/process/ais_operations.py               |  4 +-
 .../data_augmentation/augmentation_engine.py  | 33 +++++++
 ...{data_augmentor.py => data_transformer.py} |  2 +-
 skais/process/data_augmentation/flip.py       | 17 ++--
 skais/process/data_augmentation/pipeline.py   | 15 ++++
 skais/process/data_augmentation/translator.py | 12 +--
 .../process/data_augmentation/test_flip.py    | 31 ++++++-
 .../data_augmentation/test_translator.py      | 85 +++++++++++++++++++
 10 files changed, 185 insertions(+), 20 deletions(-)
 create mode 100644 skais/process/data_augmentation/augmentation_engine.py
 rename skais/process/data_augmentation/{data_augmentor.py => data_transformer.py} (64%)
 create mode 100644 skais/process/data_augmentation/pipeline.py
 create mode 100644 skais/tests/process/data_augmentation/test_translator.py

diff --git a/requirements.txt b/requirements.txt
index 0634b7c..e6eac08 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ numpy~=1.19.5
 numba~=0.53.1
 scipy~=1.5.4
 hmmlearn~=0.2.6
-scikit-learn~=1.0.1
\ No newline at end of file
+scikit-learn~=1.0.1
+tqdm~=4.62.3
\ No newline at end of file
diff --git a/skais/ais/ais_trajectory.py b/skais/ais/ais_trajectory.py
index 39a5ad5..db3e64e 100644
--- a/skais/ais/ais_trajectory.py
+++ b/skais/ais/ais_trajectory.py
@@ -49,9 +49,10 @@ def apply_time_sequence(dat, time, func):
 
 
 class AISTrajectory(AISPoints):
-    def __init__(self, df, interpolation_time=None):
+    def __init__(self, df, mmsi=0, interpolation_time=None):
         df = df.drop_duplicates(subset=['ts_sec'])
         df = df.sort_values(by=['ts_sec'])
+        self.mmsi = mmsi
         if interpolation_time and len(df.index) > 4:
 
             float_columns = ['longitude', 'latitude', 'cog', 'heading', 'rot', 'sog', 'diff']
diff --git a/skais/process/ais_operations.py b/skais/process/ais_operations.py
index 1075fd0..1115446 100644
--- a/skais/process/ais_operations.py
+++ b/skais/process/ais_operations.py
@@ -9,6 +9,6 @@ from skais.ais.ais_trajectory import AISTrajectory
 def get_trajectories(ais_points):
     trajectories = []
     for mmsi in ais_points.df.mmsi.unique():
-        trajectories.append(AISTrajectory(ais_points.df[ais_points.df['mmsi'] == mmsi].reset_index(drop=True)))
-
+        trajectories.append(AISTrajectory(ais_points.df[ais_points.df['mmsi'] == mmsi].reset_index(drop=True),
+                                          mmsi=mmsi))
     return trajectories
diff --git a/skais/process/data_augmentation/augmentation_engine.py b/skais/process/data_augmentation/augmentation_engine.py
new file mode 100644
index 0000000..d61ff1b
--- /dev/null
+++ b/skais/process/data_augmentation/augmentation_engine.py
@@ -0,0 +1,33 @@
+import tqdm as tqdm
+
+from skais.process.data_augmentation.flip import Flip
+from skais.process.data_augmentation.pipeline import Pipeline
+from skais.process.data_augmentation.translator import Translator
+
+
+class AugmentationEngine:
+    def __init__(self, translation_values, flip_values):
+        self.pipelines = []
+
+        for tv_long, tv_lat in translation_values:
+            self.pipelines.append(Pipeline([Translator(tv_long, tv_lat)]))
+
+        for fv_meridian, fv_parallel in flip_values:
+            self.pipelines.append(Pipeline([Flip(fv_meridian, fv_parallel)]))
+
+        for tv_long, tv_lat in translation_values:
+            translator = Translator(tv_long, tv_lat)
+            for fv_meridian, fv_parallel in flip_values:
+                flip = Flip(fv_meridian, fv_parallel)
+                self.pipelines.append(Pipeline([translator, flip]))
+
+    def transform(self, x, verbose=0):
+        results = x.copy()
+
+        iterator = self.pipelines
+        if verbose > 0:
+            iterator = tqdm.tqdm(self.pipelines)
+        for p in iterator:
+            results += p.transform(x)
+
+        return results
diff --git a/skais/process/data_augmentation/data_augmentor.py b/skais/process/data_augmentation/data_transformer.py
similarity index 64%
rename from skais/process/data_augmentation/data_augmentor.py
rename to skais/process/data_augmentation/data_transformer.py
index 7e5fe20..0e4e1c4 100644
--- a/skais/process/data_augmentation/data_augmentor.py
+++ b/skais/process/data_augmentation/data_transformer.py
@@ -1,3 +1,3 @@
-class DataAugmentor:
+class DataTransformer:
     def transform(self, X):
         pass
diff --git a/skais/process/data_augmentation/flip.py b/skais/process/data_augmentation/flip.py
index 8c73d5c..578ca56 100644
--- a/skais/process/data_augmentation/flip.py
+++ b/skais/process/data_augmentation/flip.py
@@ -1,16 +1,19 @@
 from skais.ais.ais_trajectory import AISTrajectory
-from skais.process.data_augmentation.data_augmentor import DataAugmentor
+from skais.process.data_augmentation.data_transformer import DataTransformer
 
 
-class Flip(DataAugmentor):
+class Flip(DataTransformer):
     def __init__(self, meridian=None, parallel=None):
         self.meridian = meridian
         self.parallel = parallel
 
-    def transform(self, X):
+    def transform(self, x):
         result = []
-        for trajectory in X:
-            df = trajectory.df
-            df['latitude'] = -trajectory.df['latitude']
-            result.append(AISTrajectory(df))
+        if self.parallel is not None:
+            for trajectory in x:
+                df = trajectory.df.copy()
+                df['latitude'] = -trajectory.df['latitude']
+                result.append(AISTrajectory(df))
+        else:
+            result += x.copy()
         return result
diff --git a/skais/process/data_augmentation/pipeline.py b/skais/process/data_augmentation/pipeline.py
new file mode 100644
index 0000000..4e66dc5
--- /dev/null
+++ b/skais/process/data_augmentation/pipeline.py
@@ -0,0 +1,15 @@
+from skais.process.data_augmentation.data_transformer import DataTransformer
+
+
+class Pipeline(DataTransformer):
+    def __init__(self, sequence):
+        for s in sequence:
+            assert (isinstance(s, DataTransformer))
+
+        self.sequence = sequence
+
+    def transform(self, x):
+        result = x.copy()
+        for aug in self.sequence:
+            result = aug.transform(result)
+        return result
diff --git a/skais/process/data_augmentation/translator.py b/skais/process/data_augmentation/translator.py
index b0664fd..f29a252 100644
--- a/skais/process/data_augmentation/translator.py
+++ b/skais/process/data_augmentation/translator.py
@@ -1,16 +1,16 @@
 from skais.ais.ais_trajectory import AISTrajectory
-from skais.process.data_augmentation.data_augmentor import DataAugmentor
+from skais.process.data_augmentation.data_transformer import DataTransformer
 
 
-class Translator(DataAugmentor):
+class Translator(DataTransformer):
     def __init__(self, longitude, latitude):
         self.longitude = longitude
         self.latitude = latitude
 
-    def transform(self, X):
+    def transform(self, x):
         result = []
-        for trajectory in X:
-            df = trajectory.df
+        for trajectory in x:
+            df = trajectory.df.copy()
             df['longitude'] = trajectory.df['longitude'] + self.longitude
             result.append(AISTrajectory(df))
-        return result
\ No newline at end of file
+        return result
diff --git a/skais/tests/process/data_augmentation/test_flip.py b/skais/tests/process/data_augmentation/test_flip.py
index 77f5154..6405ab8 100644
--- a/skais/tests/process/data_augmentation/test_flip.py
+++ b/skais/tests/process/data_augmentation/test_flip.py
@@ -29,7 +29,7 @@ class TestFlip(unittest.TestCase):
         self.trajectories = [t1, t2]
 
     def test_flip_equator(self):
-        aug = Flip(0, None)
+        aug = Flip(None, 0)
 
         result = aug.transform(self.trajectories)
 
@@ -51,11 +51,38 @@ class TestFlip(unittest.TestCase):
                 }
             )
         )
-        expected = [t1,t2]
+        expected = [t1, t2]
 
+        self.assertEqual(len(expected), len(result))
         for t1, t2 in zip(result, expected):
             pd.testing.assert_frame_equal(t1.df, t2.df)
 
+    def test_invariance(self):
+        t1 = AISTrajectory(
+            pd.DataFrame(
+                {
+                    'ts_sec': [i for i in range(10)],
+                    'latitude': [45 + i for i in range(10)],
+                    'longitude': [12 + i for i in range(10)]
+                }
+            )
+        )
+        t2 = AISTrajectory(
+            pd.DataFrame(
+                {
+                    'ts_sec': [i for i in range(10)],
+                    'latitude': [-12 + i for i in range(10)],
+                    'longitude': [12 + i for i in range(10)]
+                }
+            )
+        )
+        expected = [t1, t2]
+        aug = Flip(0, None)
+
+        _ = aug.transform(self.trajectories)
+        for t1, t2 in zip(self.trajectories, expected):
+            pd.testing.assert_frame_equal(t1.df, t2.df)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/skais/tests/process/data_augmentation/test_translator.py b/skais/tests/process/data_augmentation/test_translator.py
new file mode 100644
index 0000000..41e5542
--- /dev/null
+++ b/skais/tests/process/data_augmentation/test_translator.py
@@ -0,0 +1,85 @@
+import unittest
+
+from skais.ais.ais_trajectory import AISTrajectory
+import pandas as pd
+
+from skais.process.data_augmentation.translator import Translator
+
+
+class TestTranslator(unittest.TestCase):
+    def setUp(self):
+        t1 = AISTrajectory(
+            pd.DataFrame(
+                {
+                    'ts_sec': [i for i in range(10)],
+                    'latitude': [0 for _ in range(10)],
+                    'longitude': [12 + i for i in range(10)]
+                }
+            )
+        )
+        t2 = AISTrajectory(
+            pd.DataFrame(
+                {
+                    'ts_sec': [i for i in range(10)],
+                    'latitude': [-12 + i for i in range(10)],
+                    'longitude': [12 + i for i in range(10)]
+                }
+            )
+        )
+
+        self.trajectories = [t1, t2]
+
+    def test_transform_longitude(self):
+        aug = Translator(1, 0)
+
+        result = aug.transform(self.trajectories)
+
+        t1 = AISTrajectory(
+            pd.DataFrame(
+                {
+                    'ts_sec': [i for i in range(10)],
+                    'latitude': [0 for _ in range(10)],
+                    'longitude': [13 + i for i in range(10)]
+                }
+            )
+        )
+        t2 = AISTrajectory(
+            pd.DataFrame(
+                {
+                    'ts_sec': [i for i in range(10)],
+                    'latitude': [-12 + i for i in range(10)],
+                    'longitude': [13 + i for i in range(10)]
+                }
+            )
+        )
+        expected = [t1, t2]
+
+        for t1, t2 in zip(result, expected):
+            pd.testing.assert_frame_equal(t1.df, t2.df)
+
+    def test_invariance(self):
+        t1 = AISTrajectory(
+            pd.DataFrame(
+                {
+                    'ts_sec': [i for i in range(10)],
+                    'latitude': [0 for _ in range(10)],
+                    'longitude': [12 + i for i in range(10)]
+                }
+            )
+        )
+        t2 = AISTrajectory(
+            pd.DataFrame(
+                {
+                    'ts_sec': [i for i in range(10)],
+                    'latitude': [-12 + i for i in range(10)],
+                    'longitude': [12 + i for i in range(10)]
+                }
+            )
+        )
+
+        expected = [t1, t2]
+
+        for t1, t2 in zip(self.trajectories, expected):
+            pd.testing.assert_frame_equal(t1.df, t2.df)
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab