diff --git a/skais/ais/ais_trajectory.py b/skais/ais/ais_trajectory.py index 157e2f0a424ff0584b541da7749a89787afc7016..0e832e8fe87efdf63145079f1653a324ff0fbb07 100644 --- a/skais/ais/ais_trajectory.py +++ b/skais/ais/ais_trajectory.py @@ -224,32 +224,46 @@ class AISTrajectory: l2 = l2_angle(dat, radius) self.df[f"angle_l2"] = l2 - def normalize(self, features, normalization_type="min-max"): - normalization_dict = {} - if normalization_type == "min-max": - for f in features: - minimum = self.df[f].min() - maximum = self.df[f].max() - self.df[f] = (self.df[f] - minimum) / (maximum - minimum) - normalization_dict[f"{f}_minimum"] = minimum - normalization_dict[f"{f}_maximum"] = maximum - - elif normalization_type == "standardization": - normalisation_factors = ("standardization", {}) - for f in features: - mean = self.df[f].mean() - std = self.df[f].std() - if std == 0: - print("Warning: std = %d", std) - std = 1 - self.df[f] = (self.df[f] - mean) / std - normalization_dict[f"{f}_mean"] = mean - normalization_dict[f"{f}_std"] = std - + def normalize(self, features, normalization_type="min-max", dictionary=None): + normalization_dict = None + if dictionary is not None: + if dictionary["type"] == "min-max": + for f in features: + minimum = dictionary[f"{f}_minimum"] + maximum = dictionary[f"{f}_maximum"] + self.df[f] = (self.df[f] - minimum) / (maximum - minimum) + + elif dictionary["type"] == "standardization": + for f in features: + mean = dictionary[f"{f}_mean"] + std = dictionary[f"{f}_std"] + self.df[f] = (self.df[f] - mean) / std else: - raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, " - f"standardization]") - return normalization_type, normalization_dict + normalization_dict = {"type": normalization_type} + if normalization_type == "min-max": + for f in features: + minimum = self.df[f].min() + maximum = self.df[f].max() + self.df[f] = (self.df[f] - minimum) / (maximum - minimum) + normalization_dict[f"{f}_minimum"] = minimum + normalization_dict[f"{f}_maximum"] = maximum + + elif normalization_type == "standardization": + for f in features: + mean = self.df[f].mean() + std = self.df[f].std() + if std == 0: + print("Warning: std = %d", std) + std = 1 + self.df[f] = (self.df[f] - mean) / std + normalization_dict[f"{f}_mean"] = mean + normalization_dict[f"{f}_std"] = std + + else: + raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, " + f"standardization]") + + return normalization_dict def compute_derivative(self, field): dt = self.df['ts_sec'].diff() / 60 diff --git a/skais/utils/experiment_tools.py b/skais/utils/experiment_tools.py index 1a68fabcc98d8700ae1892821df2a6d2369c36ef..85fa9b31ad945381d72c5b1bd3151a3745bc7b97 100644 --- a/skais/utils/experiment_tools.py +++ b/skais/utils/experiment_tools.py @@ -9,6 +9,7 @@ def make_feature_vectors(trajectories, features=None, zero = [0 for _ in range(nb_classes)] for trajectory in trajectories: + trajectory.df.dropna(inplace=True) if len(trajectory.df.index) > length_list: trajectory.df['ts'] = trajectory.df.index trajectory.compute_all_derivatives()