diff --git a/.gitignore b/.gitignore index b139f3e8d20a35b89f3b7ebe50a95cf38649dc15..400aa52562e904ae6f2700d433af091f2d14154d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ build/ skais.egg-info/ *.coverage *__pycache__* +venv/ + +local.* \ No newline at end of file diff --git a/VERSION b/VERSION index a6bdd0de29428363391e8c891311e3784c76c6de..89e999f0b8fe6f9c4129846d6d24385f863b90a8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -skais:0.1a +skais:0.2a diff --git a/requirements.txt b/requirements.txt index 0634b7c3db2603b5ca2ba0a5b6484f863be17d00..bac8f3919f75435dd16b8769f761ebf7b5d0c6ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ pandas~=1.1.5 setuptools~=57.0.0 numpy~=1.19.5 -numba~=0.53.1 +numba>0.53.1 scipy~=1.5.4 hmmlearn~=0.2.6 -scikit-learn~=1.0.1 \ No newline at end of file +scikit-learn~=1.0.1 +tqdm~=4.62.3 \ No newline at end of file diff --git a/skais/ais/ais_points.py b/skais/ais/ais_points.py index 2a9df82a1241843d9df6647dba68eb9c43087d9e..3d5758ebd99e17e446a472deef06950705e10720 100644 --- a/skais/ais/ais_points.py +++ b/skais/ais/ais_points.py @@ -3,38 +3,6 @@ import pandas as pd from scipy.stats import stats -# def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None): -# n_sample = len(df.index) -# result = [] -# work_df = df.copy() -# -# index = 0 -# while index < n_sample: -# i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit) -# trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time) -# if len(trajectory.df.index) > min_size: -# result.append(trajectory) -# work_df = work_df[i:] -# index += i -# -# return result -# -# -# @jit(nopython=True) -# def compute_trajectory(times, time_gap, size_limit): -# n_samples = len(times) -# -# previous_date = times[0] -# -# i = 0 -# for i in range(size_limit): -# if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap): -# return i -# previous_date = times[i] -# -# return i + 1 - - class AISPoints: # Todo: Should be more elegant @@ -73,36 +41,91 @@ class AISPoints: self.df = self.df[self.df["heading"] <= 360] self.df = self.df[self.df["heading"] >= 0] - def normalize(self, features, normalization_type="min-max"): - normalization_dict = {} - if normalization_type == "min-max": - for f in features: - minimum = self.df[f].min() - maximum = self.df[f].max() - diff = (maximum - minimum) - if diff == 0: - print("Warning: diff = %d", diff) - diff = 1 - self.df[f] = (self.df[f] - minimum) / diff - normalization_dict[f"{f}_minimum"] = minimum - normalization_dict[f"{f}_maximum"] = maximum - - elif normalization_type == "standardization": - normalisation_factors = ("standardization", {}) - for f in features: - mean = self.df[f].mean() - std = self.df[f].std() - if std == 0: - print("Warning: std = %d", std) - std = 1 - self.df[f] = (self.df[f] - mean) / std - normalization_dict[f"{f}_mean"] = mean - normalization_dict[f"{f}_std"] = std - + def normalize(self, min_max_features=(), standardization_features=(), third_quartile_features=(), + divide_by_value=(), divide_by_max=(), normalization_dict=None): + if normalization_dict is None: + normalization_dict = {} + for f in min_max_features: + if f in self.df.columns: + normalization_dict[f] = {'type': 'min-max'} + minimum = self.df[f].min() + maximum = self.df[f].max() + diff = (maximum - minimum) + if diff == 0: + print("Warning: diff = 0") + self.df[f] = (self.df[f] - minimum) + else: + self.df[f] = (self.df[f] - minimum) / diff + normalization_dict[f]["minimum"] = minimum + normalization_dict[f]["maximum"] = maximum + for f in standardization_features: + if f in self.df.columns: + normalization_dict[f] = {'type': 'standardization'} + mean = self.df[f].mean() + std = self.df[f].std() + if std == 0: + print("Warning: std = %d", std) + std = 1 + self.df[f] = (self.df[f] - mean) / std + normalization_dict[f]["mean"] = mean + normalization_dict[f]["std"] = std + for f in third_quartile_features: + if f in self.df.columns: + normalization_dict[f] = {'type': '3rd quartile'} + third_quartile = self.df[f].quantile(0.75) + if third_quartile == 0: + print("Warning: third quartile = %d", third_quartile) + third_quartile = 1 + self.df[f] = self.df[f] / third_quartile + normalization_dict[f]["value"] = third_quartile + for t in divide_by_value: + f = t[0] + value = t[1] + if f in self.df.columns: + if value != 0: + normalization_dict[f] = {'type': 'divide by value', + 'value': value} + self.df[f] = self.df[f] / value + else: + print("Warning: dividing by 0") + for f in divide_by_max: + if f in self.df.columns: + maximum = self.df[f].max() + normalization_dict[f] = {'type': 'divide by max', + 'maximum': maximum} + self.df[f] = self.df[f] / maximum else: - raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, " - f"standardization]") - return normalization_type, normalization_dict + for f in normalization_dict: + if f in self.df.columns: + if normalization_dict[f]['type'] == 'min-max': + minimum = normalization_dict[f]["minimum"] + maximum = normalization_dict[f]["maximum"] + diff = (maximum - minimum) + if diff == 0: + print("Warning: diff = 0") + diff = 1 + self.df[f] = (self.df[f] - minimum) / diff + elif normalization_dict[f]['type'] == "standardization": + mean = normalization_dict[f]["mean"] + std = normalization_dict[f]["std"] + if std == 0: + print("Warning: std = 0") + std = 1 + self.df[f] = (self.df[f] - mean) / std + elif normalization_dict[f]['type'] == "3rd quartile": + third_quartile = normalization_dict[f]["value"] + self.df[f] = self.df[f] / third_quartile + elif normalization_dict[f]['type'] == "divide by value": + value = normalization_dict[f]["value"] + self.df[f] = self.df[f] / value + elif normalization_dict[f]['type'] == "divide by max": + maximum = normalization_dict[f]["maximum"] + self.df[f] = self.df[f] / maximum + else: + raise ValueError( + f"{normalization_dict[f]['type']} not a valid normalization method. Must be on of [min-max," + f" standardization, 3rd quartile, divide by value]") + return normalization_dict # New features def compute_drift(self): diff --git a/skais/ais/ais_trajectory.py b/skais/ais/ais_trajectory.py index 39a5ad5b08f179d144e94508fc0f4c1e8f88e8e1..db1cd8f7311fbdd1e17cb2e6afc863d1bdc997c6 100644 --- a/skais/ais/ais_trajectory.py +++ b/skais/ais/ais_trajectory.py @@ -1,9 +1,13 @@ +import random + import pandas as pd import numpy as np from numba import jit from scipy.interpolate import interp1d +from skais.utils.geography import great_circle, position_from_distance, get_coord from skais.ais.ais_points import AISPoints +from skais.utils.geometry import bresenham @jit(nopython=True) @@ -36,6 +40,13 @@ def apply_func_on_window(dat, func, radius, on_edge='copy'): data = dat[i - radius:i + radius + 1] result[i - radius] = func(data) return result + elif on_edge == 'ignore': + for i in range(0, dat.shape[0]): + lower_bound = max(0, i - radius) + upper_bound = min(dat.shape[0], i + radius + 1) + data = dat[lower_bound:upper_bound] + result[i] = func(data) + return result else: raise ValueError @@ -49,9 +60,10 @@ def apply_time_sequence(dat, time, func): class AISTrajectory(AISPoints): - def __init__(self, df, interpolation_time=None): + def __init__(self, df, mmsi=0, interpolation_time=None): df = df.drop_duplicates(subset=['ts_sec']) df = df.sort_values(by=['ts_sec']) + self.mmsi = mmsi if interpolation_time and len(df.index) > 4: float_columns = ['longitude', 'latitude', 'cog', 'heading', 'rot', 'sog', 'diff'] @@ -76,8 +88,8 @@ class AISTrajectory(AISPoints): kind='nearest', axis=0)(t_interp1d).astype(int) df = new_df - - # self.df = df.dropna() + if 'sog' in df.columns: + df.loc[df['sog'] < 0, 'sog'] = 0 AISPoints.__init__(self, df) def sliding_window(self, size=10, offset=1, fields=None): @@ -92,9 +104,9 @@ class AISTrajectory(AISPoints): return result - def apply_func_on_time_window(self, func, radius, column, new_column=None): + def apply_func_on_time_window(self, func, radius, column, new_column=None, on_edge='copy'): dat = self.df[column].to_numpy() - result = apply_func_on_window(dat, func, radius, on_edge='copy') + result = apply_func_on_window(dat, func, radius, on_edge) if new_column is None: self.df[column] = result @@ -153,3 +165,104 @@ class AISTrajectory(AISPoints): index += i return result + + def shift_trajectory_to_coordinates(self, target_coordinate=(0, 0), point_index=None, in_place=False): + if point_index is None: + point_index = random.randint(0, len(self.df.index) - 1) + + df = self.df.copy() + new_df = df.copy() + + new_df['latitude'].iat[point_index] = target_coordinate[0] + new_df['longitude'].iat[point_index] = target_coordinate[1] + + new_point = target_coordinate + for i in range(point_index, 0, -1): + current_point = (df.iloc[i]['latitude'], df.iloc[i]['longitude']) + lat_dist = great_circle(current_point[0], df.iloc[i - 1]['latitude'], current_point[1], current_point[1]) + long_dist = great_circle(current_point[0], current_point[0], current_point[1], df.iloc[i - 1]['longitude']) + + if current_point[0] > df.iloc[i - 1]['latitude']: + lat_dist *= -1 + + if current_point[1] > df.iloc[i - 1]['longitude']: + long_dist *= -1 + + new_point = position_from_distance(new_point, (lat_dist, long_dist)) + + new_df['latitude'].iat[i - 1] = new_point[0] + new_df['longitude'].iat[i - 1] = new_point[1] + + new_point = target_coordinate + for i in range(point_index, len(df.index) - 1): + current_point = (df.iloc[i]['latitude'], df.iloc[i]['longitude']) + lat_dist = great_circle(current_point[0], df.iloc[i + 1]['latitude'], current_point[1], current_point[1]) + long_dist = great_circle(current_point[0], current_point[0], current_point[1], df.iloc[i + 1]['longitude']) + + if current_point[0] > df.iloc[i + 1]['latitude']: + lat_dist *= -1 + + if current_point[1] > df.iloc[i + 1]['longitude']: + long_dist *= -1 + + new_point = position_from_distance(new_point, (lat_dist, long_dist)) + + new_df['latitude'].iat[i + 1] = new_point[0] + new_df['longitude'].iat[i + 1] = new_point[1] + + if in_place: + self.df = new_df + return self + else: + return AISTrajectory(new_df, mmsi=self.mmsi) + + def get_time_per_label_shift(self, label_column='label'): + current_label = -1 + result = [] + for index, row in self.df.iterrows(): + if current_label != row[label_column]: + current_label = row[label_column] + result.append((row['ts_sec'], current_label)) + return result + + def generate_array_from_positions(self, height=256, width=256, link=True, bounding_box='fit', features=None, + node_size=0): + nb_channels = 1 + if features is not None: + nb_channels = len(features) + data = np.zeros((height, width, nb_channels), dtype=np.uint8) + if bounding_box != 'fit': + raise ValueError("feature not implemented") + positions = self.df[['longitude', 'latitude']].to_numpy() + min_lon, max_lon = (min(positions[:, 0]), max(positions[:, 0])) + min_lat, max_lat = (min(positions[:, 1]), max(positions[:, 1])) + if min_lat == max_lat: + min_lat -= 1 + max_lat += 1 + if min_lon == max_lon: + min_lon -= 1 + max_lon += 1 + + for longitude, latitude in positions: + x_coord, y_coord = get_coord(latitude, longitude, height, width, min_lat, max_lat, min_lon, max_lon) + + x_lower_bound = max(0, x_coord - node_size) + x_upper_bound = min(height - 1, x_coord + node_size) + + y_lower_bound = max(0, y_coord - node_size) + y_upper_bound = min(width - 1, y_coord + node_size) + + for x in range(x_lower_bound, x_upper_bound + 1): + for y in range(y_lower_bound, y_upper_bound + 1): + data[x, y] = [1] + + if link: + lon, lat = positions[0, 0], positions[0, 1] + for longitude, latitude in positions[1:]: + x_prv, y_prev = get_coord(lat, lon, height, width, min_lat, max_lat, min_lon, max_lon) + x_nxt, y_nxt = get_coord(latitude, longitude, height, width, min_lat, max_lat, min_lon, max_lon) + + lon, lat = longitude, latitude + for x, y in bresenham(x_prv, y_prev, x_nxt, y_nxt): + data[x, y] = [1] + return data diff --git a/skais/process/ais_operations.py b/skais/process/ais_operations.py index 1075fd079ec6cfdc1bf7190c01428caaf8e049fc..11154469a679f5b5aa80d029d3847b1190c86f71 100644 --- a/skais/process/ais_operations.py +++ b/skais/process/ais_operations.py @@ -9,6 +9,6 @@ from skais.ais.ais_trajectory import AISTrajectory def get_trajectories(ais_points): trajectories = [] for mmsi in ais_points.df.mmsi.unique(): - trajectories.append(AISTrajectory(ais_points.df[ais_points.df['mmsi'] == mmsi].reset_index(drop=True))) - + trajectories.append(AISTrajectory(ais_points.df[ais_points.df['mmsi'] == mmsi].reset_index(drop=True), + mmsi=mmsi)) return trajectories diff --git a/skais/process/basic_features_operations.py b/skais/process/basic_features_operations.py index c4c7d3bbbefe115b87696c4f02f4be8cb0ba532e..350ae1751f8dde15978f6d440c1c96a22d39c262 100644 --- a/skais/process/basic_features_operations.py +++ b/skais/process/basic_features_operations.py @@ -18,16 +18,14 @@ def angular_dispersion(angles): def angular_mean(angles): x, y = angular_average_vector(angles) - theta = abs(np.arctan2(x, y)) + theta = np.arctan(y/x) - if y > 0 and x > 0: # first Q + if y > 0 and x > 0: return theta - if y > 0 >= x: # Second Q - return np.pi / 2 + theta - if y <= 0 < x: # Fourth Q - return np.pi / 2 - theta - else: # Third Q - return - theta + elif x <= 0: + return np.pi + theta + else: + return 2*np.pi + theta def angular_std(angles): diff --git a/skais/process/data_augmentation/__init__.py b/skais/process/data_augmentation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skais/process/data_augmentation/augmentation_engine.py b/skais/process/data_augmentation/augmentation_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..64ecb499ab660c94d27e98ce21058f70e3b75ad1 --- /dev/null +++ b/skais/process/data_augmentation/augmentation_engine.py @@ -0,0 +1,39 @@ +import tqdm as tqdm + +from skais.process.data_augmentation.data_transformer import DataTransformer +from skais.process.data_augmentation.flip import Flip +from skais.process.data_augmentation.pipeline import Pipeline +from skais.process.data_augmentation.translator import Translator + + +class AugmentationEngine: + def __init__(self, translation_values=None, flip_values=None, keep_original=True): + self.pipelines = [] + if keep_original: + self.pipelines.append(DataTransformer()) + + if translation_values is not None: + for tv_long, tv_lat in translation_values: + self.pipelines.append(Pipeline([Translator(tv_long, tv_lat)])) + + if flip_values is not None: + for fv_meridian, fv_parallel in flip_values: + self.pipelines.append(Pipeline([Flip(fv_meridian, fv_parallel)])) + + if flip_values is not None and translation_values is not None: + for tv_long, tv_lat in translation_values: + translator = Translator(tv_long, tv_lat) + for fv_meridian, fv_parallel in flip_values: + flip = Flip(fv_meridian, fv_parallel) + self.pipelines.append(Pipeline([translator, flip])) + + def transform(self, x, verbose=0): + results = [] + + iterator = self.pipelines + if verbose > 0: + iterator = tqdm.tqdm(self.pipelines) + for p in iterator: + results += p.transform(x) + + return results diff --git a/skais/process/data_augmentation/data_transformer.py b/skais/process/data_augmentation/data_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..a2c4332e65d65efdd310a0e17069498a2b6eecbd --- /dev/null +++ b/skais/process/data_augmentation/data_transformer.py @@ -0,0 +1,3 @@ +class DataTransformer: + def transform(self, x): + return x diff --git a/skais/process/data_augmentation/flip.py b/skais/process/data_augmentation/flip.py new file mode 100644 index 0000000000000000000000000000000000000000..0917becd8cef0e1ab55a405cba84ec7b2bcd0418 --- /dev/null +++ b/skais/process/data_augmentation/flip.py @@ -0,0 +1,17 @@ +from skais.ais.ais_trajectory import AISTrajectory +from skais.process.data_augmentation.data_transformer import DataTransformer + + +class Flip(DataTransformer): + def __init__(self, meridian=None, parallel=None): + self.meridian = meridian + self.parallel = parallel + + def transform(self, x): + result = [] + if self.parallel is not None: + for trajectory in x: + df = trajectory.df.copy() + df['latitude'] = -trajectory.df['latitude'] + result.append(AISTrajectory(df)) + return result diff --git a/skais/process/data_augmentation/pipeline.py b/skais/process/data_augmentation/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..4e66dc57ba4d1eecf1ddd662f9a48f692169300f --- /dev/null +++ b/skais/process/data_augmentation/pipeline.py @@ -0,0 +1,15 @@ +from skais.process.data_augmentation.data_transformer import DataTransformer + + +class Pipeline(DataTransformer): + def __init__(self, sequence): + for s in sequence: + assert (isinstance(s, DataTransformer)) + + self.sequence = sequence + + def transform(self, x): + result = x.copy() + for aug in self.sequence: + result = aug.transform(result) + return result diff --git a/skais/process/data_augmentation/translator.py b/skais/process/data_augmentation/translator.py new file mode 100644 index 0000000000000000000000000000000000000000..f29a252f564894a379beaa83d378842b786bf979 --- /dev/null +++ b/skais/process/data_augmentation/translator.py @@ -0,0 +1,16 @@ +from skais.ais.ais_trajectory import AISTrajectory +from skais.process.data_augmentation.data_transformer import DataTransformer + + +class Translator(DataTransformer): + def __init__(self, longitude, latitude): + self.longitude = longitude + self.latitude = latitude + + def transform(self, x): + result = [] + for trajectory in x: + df = trajectory.df.copy() + df['longitude'] = trajectory.df['longitude'] + self.longitude + result.append(AISTrajectory(df)) + return result diff --git a/skais/tests/ais/test_ais_points.py b/skais/tests/ais/test_ais_points.py index e4a0310ecd079aac64105174fcf495cca162e651..330f039bd69e5c734480f6ed8bd6a687737364e4 100644 --- a/skais/tests/ais/test_ais_points.py +++ b/skais/tests/ais/test_ais_points.py @@ -17,7 +17,7 @@ class TestAISPositions(unittest.TestCase): "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], "ts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - "mmsi": [0 for i in range(21)] + "mmsi": [0 for _ in range(21)] } ) ) @@ -31,7 +31,7 @@ class TestAISPositions(unittest.TestCase): "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], "ts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - "mmsi": [0 for i in range(21)] + "mmsi": [0 for _ in range(21)] } )) @@ -49,13 +49,13 @@ class TestAISPositions(unittest.TestCase): ais_points = AISPoints(pd.DataFrame( { "cog": [i for i in range(0, 359, 10)] + [1000] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} + "heading": [0.0 for _ in range(0, 359, 10)] + [0] + [0]} ) ) expected = pd.DataFrame( { "cog": [i for i in range(0, 359, 10)] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + "heading": [0.0 for _ in range(0, 359, 10)] + [0] } ) ais_points.remove_outliers(["cog", "heading"]) @@ -65,13 +65,13 @@ class TestAISPositions(unittest.TestCase): ais_points = AISPoints(pd.DataFrame( { "cog": [i for i in range(0, 359, 10)] + [1000] + [666], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [0]} + "heading": [0.0 for _ in range(0, 359, 10)] + [0] + [0]} ) ) expected = pd.DataFrame( { "cog": [i for i in range(0, 359, 10)], - "heading": [0.0 for i in range(0, 359, 10)] + "heading": [0.0 for _ in range(0, 359, 10)] } ) ais_points.remove_outliers(["cog", "heading"], rank=2) @@ -81,13 +81,13 @@ class TestAISPositions(unittest.TestCase): ais_points = AISPoints(pd.DataFrame( { "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000]} + "heading": [0.0 for _ in range(0, 359, 10)] + [0] + [10000]} ) ) expected = pd.DataFrame( { "cog": [i / 350.0 for i in range(0, 359, 10)] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [10000] + "heading": [0.0 for _ in range(0, 359, 10)] + [10000] } ) ais_points.remove_outliers(["cog"]) @@ -98,7 +98,7 @@ class TestAISPositions(unittest.TestCase): pd.DataFrame( { "cog": [i / 350.0 for i in range(0, 359, 10)] + [500] + [0], - "heading": [0.0 for i in range(0, 359, 10)] + [0] + [10000] + "heading": [0.0 for _ in range(0, 359, 10)] + [0] + [10000] } ) ) @@ -109,7 +109,7 @@ class TestAISPositions(unittest.TestCase): ais_points = AISPoints(pd.DataFrame( { "cog": [i for i in range(0, 359, 10)] + [489, 456, -12] + [180, 180, 180], - "heading": [180 for i in range(0, 359, 10)] + [489, 180, 180] + [999, 666, -333], + "heading": [180 for _ in range(0, 359, 10)] + [489, 180, 180] + [999, 666, -333], } ) ) @@ -117,7 +117,7 @@ class TestAISPositions(unittest.TestCase): expected = pd.DataFrame( { "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] + "heading": [180 for _ in range(0, 359, 10)] } ) @@ -130,17 +130,17 @@ class TestAISPositions(unittest.TestCase): ais_points = AISPoints(pd.DataFrame( { "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] + "heading": [180.0 for _ in range(0, 359, 10)] } ) ) - ais_points.normalize(['cog', 'heading']) + ais_points.normalize(min_max_features=["cog", "heading"]) result = ais_points.df expected = pd.DataFrame( { "cog": [i / 350.0 for i in range(0, 359, 10)], - "heading": [0.0 for i in range(0, 359, 10)] + "heading": [0.0 for _ in range(0, 359, 10)] } ) @@ -150,12 +150,12 @@ class TestAISPositions(unittest.TestCase): ais_points = AISPoints(pd.DataFrame( { "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] + "heading": [180 for _ in range(0, 359, 10)] } ) ) - ais_points.normalize(['cog', 'heading'], normalization_type="standardization") + ais_points.normalize(standardization_features=['cog', 'heading']) result = ais_points.df expected = pd.DataFrame( { @@ -167,35 +167,60 @@ class TestAISPositions(unittest.TestCase): 0.72196643, 0.81822862, 0.91449081, 1.010753, 1.10701519, 1.20327738, 1.29953957, 1.39580176, 1.49206395, 1.58832614, 1.68458833], - "heading": [0.0 for i in range(0, 359, 10)] + "heading": [0.0 for _ in range(0, 359, 10)] } ) pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True), check_exact=False, rtol=0.05) - def test_normalize_raise(self): + def test_normalize_3r_quartile(self): ais_points = AISPoints(pd.DataFrame( { "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] + "heading": [180 for _ in range(0, 359, 10)] } ) ) - self.assertRaises( - ValueError, - ais_points.normalize, - ['cog', 'heading'], - normalization_type="non-existing-normalization" + ais_points.normalize(third_quartile_features=["cog", "heading"]) + result = ais_points.df + expected = pd.DataFrame( + { + "cog": [i / 270.0 for i in range(0, 359, 10)], + "heading": [1.0 for _ in range(0, 359, 10)] + } + ) + + pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True), + check_exact=False, rtol=0.05) + + def test_normalize_divide_by_value(self): + ais_points = AISPoints(pd.DataFrame( + { + "cog": [i for i in range(0, 359, 10)], + "heading": [180 for _ in range(0, 359, 10)] + } + ) ) + ais_points.normalize(divide_by_value=[("cog", 10), ("heading", 18)]) + result = ais_points.df + expected = pd.DataFrame( + { + "cog": [i / 10 for i in range(0, 359, 10)], + "heading": [10.0 for _ in range(0, 359, 10)] + } + ) + + pd.testing.assert_frame_equal(expected.reset_index(drop=True), result.reset_index(drop=True), + check_exact=False, rtol=0.05) def test_compute_drift(self): ais_points = AISPoints(pd.DataFrame( { "cog": [i for i in range(0, 359, 10)], - "heading": [180 for i in range(0, 359, 10)] + "heading": [180 for _ in range(0, 359, 10)] } ) ) @@ -217,7 +242,7 @@ class TestAISPositions(unittest.TestCase): "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], "ts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - "mmsi": [0 for i in range(21)] + "mmsi": [0 for _ in range(21)] } )) @@ -230,7 +255,7 @@ class TestAISPositions(unittest.TestCase): "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], "ts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - "mmsi": [0 for i in range(21)] + "mmsi": [0 for _ in range(21)] } )) @@ -243,7 +268,7 @@ class TestAISPositions(unittest.TestCase): "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], "ts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - "mmsi": [0 for i in range(21)] + "mmsi": [0 for _ in range(21)] } )) @@ -256,7 +281,7 @@ class TestAISPositions(unittest.TestCase): "diff": [35, 45, 59, 12, 1, 2, 54, 5, 47, 86, 119, 68, 75, 54, 55, 12, 32, 62, 159, 157, 132], "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], "ts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "mmsi": [0 for i in range(21)] + "mmsi": [0 for _ in range(21)] } )) @@ -269,11 +294,11 @@ class TestAISPositions(unittest.TestCase): "label": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], "ts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - "mmsi": [0 for i in range(42)] + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "mmsi": [0 for _ in range(42)] } ) value['ts'] = pd.to_datetime(value.ts) pd.testing.assert_frame_equal(AISPoints.fuse(ais_points, ais_points).df.reset_index(drop=True), - value.reset_index(drop=True)) \ No newline at end of file + value.reset_index(drop=True)) diff --git a/skais/tests/ais/test_ais_trajectory.py b/skais/tests/ais/test_ais_trajectory.py index 799825c93553514f88370f638d6bfca57e2592dd..6f2b6fa628eb4f52844fb2e07a869f223222e6ea 100644 --- a/skais/tests/ais/test_ais_trajectory.py +++ b/skais/tests/ais/test_ais_trajectory.py @@ -40,7 +40,7 @@ class TestAISTrajectory(unittest.TestCase): for r, e in zip(result, expected): np.testing.assert_array_equal(r, e) - + def test_sliding_window_too_short(self): trajectory = AISTrajectory( pd.DataFrame( @@ -319,4 +319,188 @@ class TestAISTrajectory(unittest.TestCase): self.assertEqual(0, compute_trajectory.py_func(times, 800)) def test_apply_func_on_window(self): - self.assertRaises(ValueError, apply_func_on_window,np.arange(10), 0, 0, 'not valid string') \ No newline at end of file + self.assertRaises(ValueError, apply_func_on_window, np.arange(10), 0, 0, 'not valid string') + + def test_apply_func_on_window_ignore(self): + result = apply_func_on_window(np.arange(10), np.mean, 2, 'ignore') + + expected = np.array([1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8]) + + np.testing.assert_equal(result, expected) + + def test_apply_func_on_window_ignore_short(self): + result = apply_func_on_window(np.arange(5), np.mean, 10, 'ignore') + + expected = np.array([2, 2, 2, 2, 2]) + + np.testing.assert_equal(result, expected) + + def test_get_time_per_label_shift_single_label(self): + trajectory = AISTrajectory( + pd.DataFrame( + { + "label": [1 for _ in range(0, 101, 10)], + "ts_sec": [i for i in range(0, 6001, 600)] + } + ) + ) + + result = trajectory.get_time_per_label_shift() + expected = [(0, 1)] + + self.assertListEqual(result, expected) + + def test_get_time_per_label_shift_label_switch_1(self): + trajectory = AISTrajectory( + pd.DataFrame( + { + "label": [1 for _ in range(11)] + [2 for _ in range(10)], + "ts_sec": [i for i in range(0, 12001, 600)] + } + ) + ) + + result = trajectory.get_time_per_label_shift() + expected = [(0, 1), (6600, 2)] + + self.assertListEqual(result, expected) + + def test_get_time_per_label_shift_label_switch_2(self): + trajectory = AISTrajectory( + pd.DataFrame( + { + "label": [1 for _ in range(11)] + [2 for _ in range(10)] + [1 for _ in range(10)], + "ts_sec": [i for i in range(0, 18001, 600)] + } + ) + ) + + result = trajectory.get_time_per_label_shift() + expected = [(0, 1), (6600, 2), (12600, 1)] + + self.assertListEqual(result, expected) + + def test_generate_array_from_positions(self): + trajectory = AISTrajectory( + pd.DataFrame( + { + "latitude": [0, 10, 0, -10], + "longitude": [0, 10, 10, -10], + "ts_sec": [i for i in range(4)] + } + ) + ) + + result = trajectory.generate_array_from_positions(height=9, width=9, link=False, bounding_box='fit', + features=None, node_size=0).reshape((9, 9)) + expected = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 0, 0, 0, 0]]) + + np.testing.assert_array_equal(result, expected) + + def test_generate_array_from_positions_node_size(self): + trajectory = AISTrajectory( + pd.DataFrame( + { + "latitude": [0, 10, 0, -10], + "longitude": [0, 10, 10, -10], + "ts_sec": [i for i in range(4)] + } + ) + ) + + result = trajectory.generate_array_from_positions(height=9, width=9, link=False, bounding_box='fit', + features=None, node_size=1).reshape((9, 9)) + expected = np.array([[0, 0, 0, 0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 0, 1, 1], + [0, 0, 0, 1, 1, 1, 0, 1, 1], + [0, 0, 0, 1, 1, 1, 0, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0, 0, 0, 0]]) + + np.testing.assert_array_equal(result, expected) + + def test_generate_array_from_positions_with_line(self): + trajectory = AISTrajectory( + pd.DataFrame( + { + "latitude": [0, 10, 0, 20], + "longitude": [0, 10, 20, 20], + "ts_sec": [i for i in range(4)] + } + ) + ) + + result = trajectory.generate_array_from_positions(height=9, width=18, link=True, bounding_box='fit', + features=None, node_size=0).reshape((9, 18)) + expected = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1], + [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]]) + + np.testing.assert_array_equal(result, expected) + + def test_generate_array_from_positions_single_point(self): + trajectory = AISTrajectory( + pd.DataFrame( + { + "latitude": [5], + "longitude": [20], + "ts_sec": [0] + } + ) + ) + + result = trajectory.generate_array_from_positions(height=9, width=9, link=False, bounding_box='fit', + features=None).reshape((9, 9)) + expected = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0]]) + + np.testing.assert_array_equal(result, expected) + + def test_generate_array_from_positions_overlapping_points(self): + trajectory = AISTrajectory( + pd.DataFrame( + { + "latitude": [5, 5, 5, 5], + "longitude": [20, 20, 20, 20], + "ts_sec": [0, 1, 2, 3] + } + ) + ) + + result = trajectory.generate_array_from_positions(height=9, width=9, link=False, bounding_box='fit', + features=None).reshape((9, 9)) + expected = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0]]) + + np.testing.assert_array_equal(result, expected) \ No newline at end of file diff --git a/skais/tests/process/data_augmentation/__init__.py b/skais/tests/process/data_augmentation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/skais/tests/process/data_augmentation/test_engine.py b/skais/tests/process/data_augmentation/test_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..9ea8d4cd92fe338e037e12d21083adb1672c1cfe --- /dev/null +++ b/skais/tests/process/data_augmentation/test_engine.py @@ -0,0 +1,237 @@ +import unittest + +from skais.ais.ais_trajectory import AISTrajectory +from skais.process.data_augmentation.augmentation_engine import AugmentationEngine + +import pandas as pd + + +class TestEngine(unittest.TestCase): + def setUp(self): + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + + self.trajectories = [t1, t2] + + def test_transform_simple_translation(self): + engine = AugmentationEngine(translation_values=[(10, 0), (20, 0)], keep_original=False) + + result = engine.transform(self.trajectories) + + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [22 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [22 + i for i in range(10)] + } + ) + ) + + t3 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [32 + i for i in range(10)] + } + ) + ) + + t4 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [32 + i for i in range(10)] + } + ) + ) + expected = [t1, t2, t3, t4] + + self.assertEqual(len(result), len(expected)) + for t1, t2 in zip(result, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + def test_transform_simple_flip(self): + engine = AugmentationEngine(flip_values=[(None, 0)], keep_original=False) + + result = engine.transform(self.trajectories) + + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [12 - i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + expected = [t1, t2] + + self.assertEqual(len(expected), len(result)) + for t1, t2 in zip(result, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + def test_transform_flip_and_translate(self): + engine = AugmentationEngine(translation_values=[(10, 0)], flip_values=[(None, 0)], keep_original=False) + + result = engine.transform(self.trajectories) + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [22 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [22 + i for i in range(10)] + } + ) + ) + + t3 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t4 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [12 - i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + + t5 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [22 + i for i in range(10)] + } + ) + ) + t6 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [12 - i for i in range(10)], + 'longitude': [22 + i for i in range(10)] + } + ) + ) + + expected = [t1, t2, t3, t4, t5, t6] + + self.assertEqual(len(expected), len(result)) + for t1, t2 in zip(result, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + def test_engine_verbose(self): + engine = AugmentationEngine(flip_values=[(None, 0)], keep_original=False) + + result = engine.transform(self.trajectories, verbose=1) + + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [12 - i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + expected = [t1, t2] + + self.assertEqual(len(expected), len(result)) + for t1, t2 in zip(result, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + def test_engine_keep_original(self): + engine = AugmentationEngine(flip_values=[(None, 0)], keep_original=True) + + result = engine.transform(self.trajectories, verbose=1) + + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [12 - i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + expected = self.trajectories + [t1, t2] + + self.assertEqual(len(expected), len(result)) + for t1, t2 in zip(result, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + +if __name__ == '__main__': + unittest.main() diff --git a/skais/tests/process/data_augmentation/test_flip.py b/skais/tests/process/data_augmentation/test_flip.py new file mode 100644 index 0000000000000000000000000000000000000000..6405ab89dbea84c22753b6b2419afb0a4e606123 --- /dev/null +++ b/skais/tests/process/data_augmentation/test_flip.py @@ -0,0 +1,88 @@ +import unittest +import pandas as pd + +from skais.ais.ais_trajectory import AISTrajectory +from skais.process.data_augmentation.flip import Flip + + +class TestFlip(unittest.TestCase): + def setUp(self): + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [45 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + + self.trajectories = [t1, t2] + + def test_flip_equator(self): + aug = Flip(None, 0) + + result = aug.transform(self.trajectories) + + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-45 - i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [12 - i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + expected = [t1, t2] + + self.assertEqual(len(expected), len(result)) + for t1, t2 in zip(result, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + def test_invariance(self): + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [45 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + expected = [t1, t2] + aug = Flip(0, None) + + _ = aug.transform(self.trajectories) + for t1, t2 in zip(self.trajectories, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + +if __name__ == '__main__': + unittest.main() diff --git a/skais/tests/process/data_augmentation/test_translator.py b/skais/tests/process/data_augmentation/test_translator.py new file mode 100644 index 0000000000000000000000000000000000000000..41e5542b5eb176a839e441258f3c432f60fb6087 --- /dev/null +++ b/skais/tests/process/data_augmentation/test_translator.py @@ -0,0 +1,85 @@ +import unittest + +from skais.ais.ais_trajectory import AISTrajectory +import pandas as pd + +from skais.process.data_augmentation.translator import Translator + + +class TestTranslator(unittest.TestCase): + def setUp(self): + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + + self.trajectories = [t1, t2] + + def test_transform_longitude(self): + aug = Translator(1, 0) + + result = aug.transform(self.trajectories) + + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [13 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [13 + i for i in range(10)] + } + ) + ) + expected = [t1, t2] + + for t1, t2 in zip(result, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) + + def test_invariance(self): + t1 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [0 for _ in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + t2 = AISTrajectory( + pd.DataFrame( + { + 'ts_sec': [i for i in range(10)], + 'latitude': [-12 + i for i in range(10)], + 'longitude': [12 + i for i in range(10)] + } + ) + ) + + expected = [t1, t2] + + for t1, t2 in zip(self.trajectories, expected): + pd.testing.assert_frame_equal(t1.df, t2.df) +if __name__ == '__main__': + unittest.main() diff --git a/skais/tests/process/test_basic_features_operations.py b/skais/tests/process/test_basic_features_operations.py index da2f62eb6388adf5c8f8036c2b67610aeb0a3626..029583b0ef639888f49dbb58ce611786668746f0 100644 --- a/skais/tests/process/test_basic_features_operations.py +++ b/skais/tests/process/test_basic_features_operations.py @@ -9,7 +9,7 @@ class TestAngular(unittest.TestCase): def test_angular_mean_simple(self): x = np.radians(np.array([1, 359])) - self.assertEqual(0.0, angular_mean(x)) + self.assertEqual(2*np.pi, angular_mean(x)) def test_angular_mean_simple_2(self): x = np.radians(np.array([180, 180, 180, 180, 179, 181])) @@ -19,7 +19,7 @@ class TestAngular(unittest.TestCase): def test_angular_mean_simple_3(self): x = np.radians(np.array([0, 0, 0, 0, 359, 1])) - self.assertEqual(0.0, angular_mean(x)) + self.assertEqual(2*np.pi, angular_mean(x)) def test_angular_mean_first_quadrant(self): x = np.radians(np.array([43, 44, 45, 46, 47])) @@ -34,12 +34,12 @@ class TestAngular(unittest.TestCase): def test_angular_mean_third_quadrant(self): x = np.radians(np.array([223, 224, 225, 226, 227])) - self.assertEqual(-3 * np.pi / 4, angular_mean(x)) + self.assertEqual(5 * np.pi / 4, angular_mean(x)) def test_angular_mean_fourth_quadrant(self): x = np.radians(np.array([313, 314, 315, 316, 317])) - self.assertEqual(-np.pi / 4, angular_mean(x)) + self.assertEqual(7*np.pi / 4, angular_mean(x)) def test_angular_std(self): x = np.radians(np.array([0, 0, 0, 0])) diff --git a/skais/tests/utils/test_geography.py b/skais/tests/utils/test_geography.py new file mode 100644 index 0000000000000000000000000000000000000000..8316524cfbc9447da38b9676c5beebc165cd7252 --- /dev/null +++ b/skais/tests/utils/test_geography.py @@ -0,0 +1,39 @@ +import unittest +import numpy as np + +from skais.utils.geography import position_from_distance + +tol = 0.005 + + +class TestGeography(unittest.TestCase): + def test_position_from_distance_1(self): + position = (0, 0) + distance = (10000, 10000) + + expected = (0.09, 0.09) + result = position_from_distance(position, distance) + + np.testing.assert_allclose(expected, result, tol) + + def test_position_from_distance_2(self): + position = (10, 10) + distance = (10000, 10000) + + expected = (10.0636111, 10.064722222222223) + result = position_from_distance(position, distance) + + np.testing.assert_allclose(expected, result, tol) + + def test_position_from_distance_3(self): + position = (75, 75) + distance = (10000, -10000) + + expected = (75.0633333, 74.75333333333333) + result = position_from_distance(position, distance) + + np.testing.assert_allclose(expected, result, tol) + + +if __name__ == '__main__': + unittest.main() diff --git a/skais/tests/utils/test_geometry.py b/skais/tests/utils/test_geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..7a0b76ddb703f43a136c315e5abcf1833460a03d --- /dev/null +++ b/skais/tests/utils/test_geometry.py @@ -0,0 +1,42 @@ +import unittest + +from skais.utils.geometry import bresenham + + +class TestGeometry(unittest.TestCase): + def test_bresenham(self): + result = bresenham(3, 4, 16, 9) + expected = [(3, 4), (4, 4), (5, 5), (6, 5), (7, 6), (8, 6), (9, 6), (10, 7), (11, 7), (12, 7), (13, 8), (14, 8), + (15, 9), (16, 9)] + + self.assertListEqual(result, expected) + + def test_bresenham_inverted(self): + result = bresenham(16, 9, 3, 4) + expected = [(3, 4), (4, 4), (5, 5), (6, 5), (7, 6), (8, 6), (9, 6), (10, 7), (11, 7), (12, 7), (13, 8), + (14, 8), (15, 9), (16, 9)] + + self.assertListEqual(result, expected) + + + def test_bresenham_inverted_2(self): + result = bresenham(16, 4, 3, 9) + expected = [(3, 9), (4, 9), (5, 8), (6, 8), (7, 7), (8, 7), (9, 7), (10, 6), (11, 6), (12, 6), (13, 5), (14, 5), + (15, 4), (16, 4)] + self.assertListEqual(result, expected) + + def test_bresenham_same_line(self): + result = bresenham(3, 4, 10, 4) + expected = [(3, 4), (4, 4), (5, 4), (6, 4), (7, 4), (8, 4), (9, 4), (10, 4)] + + self.assertListEqual(result, expected) + + def test_bresenham_same_column(self): + result = bresenham(3, 4, 3, 10) + expected = [(3, 4), (3, 5), (3, 6), (3, 7), (3, 8), (3, 9), (3, 10)] + + self.assertListEqual(result, expected) + + +if __name__ == '__main__': + unittest.main() diff --git a/skais/utils/__pycache__/__init__.cpython-38.pyc b/skais/utils/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index a9577174079f5508328c6cc2288ec7bc1d22e257..0000000000000000000000000000000000000000 Binary files a/skais/utils/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/skais/utils/__pycache__/geography.cpython-38.pyc b/skais/utils/__pycache__/geography.cpython-38.pyc deleted file mode 100644 index 4c93677db8d3da9f08059537b93a7a25c1f32d7d..0000000000000000000000000000000000000000 Binary files a/skais/utils/__pycache__/geography.cpython-38.pyc and /dev/null differ diff --git a/skais/utils/config.py b/skais/utils/config.py index 5cfa2f0adc7b66f09f8108e5a36afa64494d3255..c386a1f7210aca284a1e9b999af82fa096be9cc9 100644 --- a/skais/utils/config.py +++ b/skais/utils/config.py @@ -8,6 +8,8 @@ import os from pathlib import Path from configparser import ConfigParser +import psycopg2 + def get_config_file(): """ @@ -58,6 +60,7 @@ def get_data_path(): .format(data_path, config_file)) return data_path + def get_db_config(): """ Read db config from user configuration file. @@ -82,4 +85,14 @@ def get_db_config(): if not dict: raise Exception('Invalid data path: {}. Update configuration file {}' .format(dict, config_file)) - return dict \ No newline at end of file + return dict + + +def get_db_connection(): + db_config = get_db_config() + connection = psycopg2.connect(user=db_config['user'], + password=db_config['password'], + host=db_config['host'], + port=db_config['port'], + database=db_config['database']) + return connection diff --git a/skais/utils/experiment_tools.py b/skais/utils/experiment_tools.py index 85fa9b31ad945381d72c5b1bd3151a3745bc7b97..8c4900f7bd2fb51a306496821112fc3e16e00055 100644 --- a/skais/utils/experiment_tools.py +++ b/skais/utils/experiment_tools.py @@ -12,8 +12,6 @@ def make_feature_vectors(trajectories, features=None, trajectory.df.dropna(inplace=True) if len(trajectory.df.index) > length_list: trajectory.df['ts'] = trajectory.df.index - trajectory.compute_all_derivatives() - trajectory.compute_diff('heading', 'cog') windows = trajectory.sliding_window(length_list, offset=sliding_window_gap, fields=features + [label_field]) diff --git a/skais/utils/geography.py b/skais/utils/geography.py index 82c5fa4010bfd90e33d0a1733fbc53f91dd34a9b..de21a9aed417ba8e959aa0bb5a5780f9b879634f 100644 --- a/skais/utils/geography.py +++ b/skais/utils/geography.py @@ -1,7 +1,9 @@ -from sklearn.metrics.pairwise import haversine_distances import numpy as np from numba import jit +R = 6371000 + + @jit(nopython=True) def great_circle(lat1, lat2, long1, long2): x1 = np.radians(lat1) @@ -9,10 +11,8 @@ def great_circle(lat1, lat2, long1, long2): x2 = np.radians(lat2) y2 = np.radians(long2) - R = 6371000 - delta_x = x2 - x1 - delta_y= y2 -y1 + delta_y = y2 - y1 a = np.sin(delta_x / 2) * np.sin(delta_x / 2) + np.cos(x1) * np.cos(x2) * \ np.sin(delta_y / 2) * np.sin(delta_y / 2) @@ -21,3 +21,23 @@ def great_circle(lat1, lat2, long1, long2): d = R * c return d + + +def get_coord(lat, lon, height, width, min_lat, max_lat, min_lon, max_lon): + x_coord = max(min(height - int(height * (lat - min_lat) / (max_lat - min_lat)) - 1, height - 1), 0) + y_coord = max(min(int((width - 1) * (lon - min_lon) / (max_lon - min_lon)), width - 1), 0) + + return x_coord, y_coord + + +def position_from_distance(position, distances): + lat = np.arcsin( + np.sin(np.radians(position[0])) * np.cos(distances[0] / R) + np.cos(np.radians(position[0])) * np.sin( + distances[0] / R)) + + long = np.radians(position[1]) + np.arctan2( + np.sin(distances[1] / R) * np.cos(np.radians(position[0])), + np.cos(distances[1] / R) - (np.sin(np.radians(position[1])) * np.sin(lat)) + ) + + return np.degrees(lat), np.degrees(long) diff --git a/skais/utils/geometry.py b/skais/utils/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..0919af4a5b6dd1501a9b5fcb0f3bf746a336cec3 --- /dev/null +++ b/skais/utils/geometry.py @@ -0,0 +1,58 @@ +def bresenham(x1, y1, x2, y2): + dx = int(x2 - x1) + dy = int(y2 - y1) + + sx = sy = 1 + if dx < 0: + sx = -1 + if dy < 0: + sy = -1 + + + pixels = [] + if abs(dx) > abs(dy): # slope < 1 + if x1 > x2: + tmp = x2 + x2 = x1 + x1 = tmp + + tmp = y2 + y2 = y1 + y1 = tmp + sy *= -1 + + y = y1 + p = (2 * abs(dy)) - abs(dx) + pixels.append((x1, y1)) + + for x in range(x1 + 1, x2 + 1): + if p < 0: + p += 2 * abs(dy) + else: + y += sy + p += (2 * abs(dy)) - (2 * abs(dx)) + pixels.append((x, y)) + else: # slope >= 1 + if y1 > y2: + tmp = x2 + x2 = x1 + x1 = tmp + + tmp = y2 + y2 = y1 + y1 = tmp + sx *= -1 + x = x1 + + pixels.append((x1, y1)) + p = (2 * abs(dx)) - abs(dy) + for y in range(y1 + 1, y2 + 1): + if p < 0: + p += 2 * abs(dx) + else: + x += sx + p += (2 * abs(dx)) - (2 * abs(dy)) + pixels.append((x, y)) + + return pixels +