diff --git a/requirements.txt b/requirements.txt index 0634b7c3db2603b5ca2ba0a5b6484f863be17d00..e6eac089238c5908bb07e3e45a306d9ec024d03d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ numpy~=1.19.5 numba~=0.53.1 scipy~=1.5.4 hmmlearn~=0.2.6 -scikit-learn~=1.0.1 \ No newline at end of file +scikit-learn~=1.0.1 +tqdm~=4.62.3 \ No newline at end of file diff --git a/skais/ais/ais_trajectory.py b/skais/ais/ais_trajectory.py index 39a5ad5b08f179d144e94508fc0f4c1e8f88e8e1..db3e64eccdc17b248544f47c3d3174c29bd7cdb0 100644 --- a/skais/ais/ais_trajectory.py +++ b/skais/ais/ais_trajectory.py @@ -49,9 +49,10 @@ def apply_time_sequence(dat, time, func): class AISTrajectory(AISPoints): - def __init__(self, df, interpolation_time=None): + def __init__(self, df, mmsi=0, interpolation_time=None): df = df.drop_duplicates(subset=['ts_sec']) df = df.sort_values(by=['ts_sec']) + self.mmsi = mmsi if interpolation_time and len(df.index) > 4: float_columns = ['longitude', 'latitude', 'cog', 'heading', 'rot', 'sog', 'diff'] diff --git a/skais/process/ais_operations.py b/skais/process/ais_operations.py index 1075fd079ec6cfdc1bf7190c01428caaf8e049fc..11154469a679f5b5aa80d029d3847b1190c86f71 100644 --- a/skais/process/ais_operations.py +++ b/skais/process/ais_operations.py @@ -9,6 +9,6 @@ from skais.ais.ais_trajectory import AISTrajectory def get_trajectories(ais_points): trajectories = [] for mmsi in ais_points.df.mmsi.unique(): - trajectories.append(AISTrajectory(ais_points.df[ais_points.df['mmsi'] == mmsi].reset_index(drop=True))) - + trajectories.append(AISTrajectory(ais_points.df[ais_points.df['mmsi'] == mmsi].reset_index(drop=True), + mmsi=mmsi)) return trajectories diff --git a/skais/process/data_augmentation/augmentation_engine.py b/skais/process/data_augmentation/augmentation_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d61ff1baba4ad46e9301d4b4cec461d62f629358 --- /dev/null +++ b/skais/process/data_augmentation/augmentation_engine.py @@ -0,0 +1,33 @@ +import tqdm as tqdm + +from skais.process.data_augmentation.flip import Flip +from skais.process.data_augmentation.pipeline import Pipeline +from skais.process.data_augmentation.translator import Translator + + +class AugmentationEngine: + def __init__(self, translation_values, flip_values): + self.pipelines = [] + + for tv_long, tv_lat in translation_values: + self.pipelines.append(Pipeline([Translator(tv_long, tv_lat)])) + + for fv_meridian, fv_parallel in flip_values: + self.pipelines.append(Pipeline([Flip(fv_meridian, fv_parallel)])) + + for tv_long, tv_lat in translation_values: + translator = Translator(tv_long, tv_lat) + for fv_meridian, fv_parallel in flip_values: + flip = Flip(fv_meridian, fv_parallel) + self.pipelines.append(Pipeline([translator, flip])) + + def transform(self, x, verbose=0): + results = x.copy() + + iterator = self.pipelines + if verbose > 0: + iterator = tqdm.tqdm(self.pipelines) + for p in iterator: + results += p.transform(x) + + return results diff --git a/skais/process/data_augmentation/data_augmentor.py b/skais/process/data_augmentation/data_transformer.py similarity index 64% rename from skais/process/data_augmentation/data_augmentor.py rename to skais/process/data_augmentation/data_transformer.py index 7e5fe20851ad15b519f522f172a12a02fe04b1e0..0e4e1c4292fd29ab4f9ee95e723ee478976c9c7e 100644 --- a/skais/process/data_augmentation/data_augmentor.py +++ b/skais/process/data_augmentation/data_transformer.py @@ -1,3 +1,3 @@ -class DataAugmentor: +class DataTransformer: def transform(self, X): pass diff --git a/skais/process/data_augmentation/flip.py b/skais/process/data_augmentation/flip.py index 8c73d5c2cac875f8ba602fa15ed0d1cca951a627..578ca56b93ed09dda3f219524411b5c20ae86b71 100644 --- a/skais/process/data_augmentation/flip.py +++ b/skais/process/data_augmentation/flip.py @@ -1,16 +1,19 @@ from skais.ais.ais_trajectory import AISTrajectory -from skais.process.data_augmentation.data_augmentor import DataAugmentor +from skais.process.data_augmentation.data_transformer import DataTransformer -class Flip(DataAugmentor): +class Flip(DataTransformer): def __init__(self, meridian=None, parallel=None): self.meridian = meridian self.parallel = parallel - def transform(self, X): + def transform(self, x): result = [] - for trajectory in X: - df = trajectory.df - df['latitude'] = -trajectory.df['latitude'] - result.append(AISTrajectory(df)) + if self.parallel is not None: + for trajectory in x: + df = trajectory.df.copy() + df['latitude'] = -trajectory.df['latitude'] + result.append(AISTrajectory(df)) + else: + result += x.copy() return result diff --git a/skais/process/data_augmentation/pipeline.py b/skais/process/data_augmentation/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..4e66dc57ba4d1eecf1ddd662f9a48f692169300f --- /dev/null +++ b/skais/process/data_augmentation/pipeline.py @@ -0,0 +1,15 @@ +from skais.process.data_augmentation.data_transformer import DataTransformer + + +class Pipeline(DataTransformer): + def __init__(self, sequence): + for s in sequence: + assert (isinstance(s, DataTransformer)) + + self.sequence = sequence + + def transform(self, x): + result = x.copy() + for aug in self.sequence: + result = aug.transform(result) + return result diff --git a/skais/process/data_augmentation/translator.py b/skais/process/data_augmentation/translator.py index b0664fd3ecb164a79d8815afca97fdb4c575b4db..f29a252f564894a379beaa83d378842b786bf979 100644 --- a/skais/process/data_augmentation/translator.py +++ b/skais/process/data_augmentation/translator.py @@ -1,16 +1,16 @@ from skais.ais.ais_trajectory import AISTrajectory -from skais.process.data_augmentation.data_augmentor import DataAugmentor +from skais.process.data_augmentation.data_transformer import DataTransformer -class Translator(DataAugmentor): +class Translator(DataTransformer): def __init__(self, longitude, latitude): self.longitude = longitude self.latitude = latitude - def transform(self, X): + def transform(self, x): result = [] - for trajectory in X: - df = trajectory.df + for trajectory in x: + df = trajectory.df.copy() df['longitude'] = trajectory.df['longitude'] + self.longitude result.append(AISTrajectory(df)) - return result \ No newline at end of file + return result diff --git a/skais/tests/process/data_augmentation/test_flip.py b/skais/tests/process/data_augmentation/test_flip.py index 77f51540057c60261f149df8672d49d952bc2589..6405ab89dbea84c22753b6b2419afb0a4e606123 100644 --- a/skais/tests/process/data_augmentation/test_flip.py +++ b/skais/tests/process/data_augmentation/test_flip.py @@ -29,7 +29,7 @@ class TestFlip(unittest.TestCase): self.trajectories = [t1, t2] def test_flip_equator(self): - aug = Flip(0, None) + aug = Flip(None, 0) result = aug.transform(self.trajectories) @@ -51,11 +51,38 @@ class TestFlip(unittest.TestCase): } ) ) - expected = [t1,t2] + expected = [t1, t2] + self.assertEqual(len(expected), len(result)) for t1, t2 in zip(result, expected): pd.testing.assert_frame_equal(t1.df, t2.df) + def test_invariance(self): + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [45 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + expected = [t1, t2] + aug = Flip(0, None) + + _ = aug.transform(self.trajectories) + for t1, t2 in zip(self.trajectories, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + if __name__ == '__main__': unittest.main() diff --git a/skais/tests/process/data_augmentation/test_translator.py b/skais/tests/process/data_augmentation/test_translator.py new file mode 100644 index 0000000000000000000000000000000000000000..41e5542b5eb176a839e441258f3c432f60fb6087 --- /dev/null +++ b/skais/tests/process/data_augmentation/test_translator.py @@ -0,0 +1,85 @@ +import unittest + +from skais.ais.ais_trajectory import AISTrajectory +import pandas as pd + +from skais.process.data_augmentation.translator import Translator + + +class TestTranslator(unittest.TestCase): + def setUp(self): + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + + self.trajectories = [t1, t2] + + def test_transform_longitude(self): + aug = Translator(1, 0) + + result = aug.transform(self.trajectories) + + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [13 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [13 + i for i in range(10)] + } + ) + ) + expected = [t1, t2] + + for t1, t2 in zip(result, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + def test_invariance(self): + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + + expected = [t1, t2] + + for t1, t2 in zip(self.trajectories, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) +if __name__ == '__main__': + unittest.main()