diff --git a/skais/ais/ais_points.py b/skais/ais/ais_points.py index 2a9df82a1241843d9df6647dba68eb9c43087d9e..450458d87deaf7b146cadc299fbd29cbb2577654 100644 --- a/skais/ais/ais_points.py +++ b/skais/ais/ais_points.py @@ -73,35 +73,59 @@ class AISPoints: self.df = self.df[self.df["heading"] <= 360] self.df = self.df[self.df["heading"] >= 0] - def normalize(self, features, normalization_type="min-max"): - normalization_dict = {} - if normalization_type == "min-max": - for f in features: - minimum = self.df[f].min() - maximum = self.df[f].max() - diff = (maximum - minimum) - if diff == 0: - print("Warning: diff = %d", diff) - diff = 1 - self.df[f] = (self.df[f] - minimum) / diff - normalization_dict[f"{f}_minimum"] = minimum - normalization_dict[f"{f}_maximum"] = maximum - - elif normalization_type == "standardization": - normalisation_factors = ("standardization", {}) - for f in features: - mean = self.df[f].mean() - std = self.df[f].std() - if std == 0: - print("Warning: std = %d", std) - std = 1 - self.df[f] = (self.df[f] - mean) / std - normalization_dict[f"{f}_mean"] = mean - normalization_dict[f"{f}_std"] = std + def normalize(self, features, normalization_type="min-max", normalization_dict=None): + if normalization_dict is None: + normalization_dict = {'normalization_type': normalization_type} + if normalization_type == "min-max": + for f in features: + minimum = self.df[f].min() + maximum = self.df[f].max() + diff = (maximum - minimum) + if diff == 0: + print("Warning: diff = %d", diff) + diff = 1 + self.df[f] = (self.df[f] - minimum) / diff + normalization_dict[f"{f}_minimum"] = minimum + normalization_dict[f"{f}_maximum"] = maximum + + elif normalization_type == "standardization": + for f in features: + mean = self.df[f].mean() + std = self.df[f].std() + if std == 0: + print("Warning: std = %d", std) + std = 1 + self.df[f] = (self.df[f] - mean) / std + normalization_dict[f"{f}_mean"] = mean + normalization_dict[f"{f}_std"] = std + else: + raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, " + f"standardization]") else: - raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, " - f"standardization]") + normalization_type = normalization_dict['normalization_type'] + if normalization_type == "min-max": + for f in features: + minimum = normalization_dict[f"{f}_minimum"] + maximum = normalization_dict[f"{f}_maximum"] + diff = (maximum - minimum) + if diff == 0: + print("Warning: diff = %d", diff) + diff = 1 + self.df[f] = (self.df[f] - minimum) / diff + + elif normalization_type == "standardization": + for f in features: + mean = normalization_dict[f"{f}_mean"] + std = normalization_dict[f"{f}_std"] + if std == 0: + print("Warning: std = %d", std) + std = 1 + self.df[f] = (self.df[f] - mean) / std + + else: + raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, " + f"standardization]") return normalization_type, normalization_dict # New features