From 3b89a43e071511f1f90c8412b1e7f5842fa5ed3a Mon Sep 17 00:00:00 2001
From: Raphael <raphael.sturgis@gmail.com>
Date: Tue, 9 Nov 2021 09:24:32 +0100
Subject: [PATCH] added normalisation with dictionnary

---
 skais/ais/ais_trajectory.py     | 64 ++++++++++++++++++++-------------
 skais/utils/experiment_tools.py |  1 +
 2 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/skais/ais/ais_trajectory.py b/skais/ais/ais_trajectory.py
index 157e2f0..0e832e8 100644
--- a/skais/ais/ais_trajectory.py
+++ b/skais/ais/ais_trajectory.py
@@ -224,32 +224,46 @@ class AISTrajectory:
         l2 = l2_angle(dat, radius)
         self.df[f"angle_l2"] = l2
 
-    def normalize(self, features, normalization_type="min-max"):
-        normalization_dict = {}
-        if normalization_type == "min-max":
-            for f in features:
-                minimum = self.df[f].min()
-                maximum = self.df[f].max()
-                self.df[f] = (self.df[f] - minimum) / (maximum - minimum)
-                normalization_dict[f"{f}_minimum"] = minimum
-                normalization_dict[f"{f}_maximum"] = maximum
-
-        elif normalization_type == "standardization":
-            normalisation_factors = ("standardization", {})
-            for f in features:
-                mean = self.df[f].mean()
-                std = self.df[f].std()
-                if std == 0:
-                    print("Warning: std = %d", std)
-                    std = 1
-                self.df[f] = (self.df[f] - mean) / std
-                normalization_dict[f"{f}_mean"] = mean
-                normalization_dict[f"{f}_std"] = std
-
+    def normalize(self, features, normalization_type="min-max", dictionary=None):
+        normalization_dict = None
+        if dictionary is not None:
+            if dictionary["type"] == "min-max":
+                for f in features:
+                    minimum = dictionary[f"{f}_minimum"]
+                    maximum = dictionary[f"{f}_maximum"]
+                    self.df[f] = (self.df[f] - minimum) / (maximum - minimum)
+
+            elif dictionary["type"] == "standardization":
+                for f in features:
+                    mean = dictionary[f"{f}_mean"]
+                    std = dictionary[f"{f}_std"]
+                    self.df[f] = (self.df[f] - mean) / std
         else:
-            raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, "
-                             f"standardization]")
-        return normalization_type, normalization_dict
+            normalization_dict = {"type": normalization_type}
+            if normalization_type == "min-max":
+                for f in features:
+                    minimum = self.df[f].min()
+                    maximum = self.df[f].max()
+                    self.df[f] = (self.df[f] - minimum) / (maximum - minimum)
+                    normalization_dict[f"{f}_minimum"] = minimum
+                    normalization_dict[f"{f}_maximum"] = maximum
+
+            elif normalization_type == "standardization":
+                for f in features:
+                    mean = self.df[f].mean()
+                    std = self.df[f].std()
+                    if std == 0:
+                        print("Warning: std = %d", std)
+                        std = 1
+                    self.df[f] = (self.df[f] - mean) / std
+                    normalization_dict[f"{f}_mean"] = mean
+                    normalization_dict[f"{f}_std"] = std
+
+            else:
+                raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, "
+                                 f"standardization]")
+
+        return normalization_dict
 
     def compute_derivative(self, field):
         dt = self.df['ts_sec'].diff() / 60
diff --git a/skais/utils/experiment_tools.py b/skais/utils/experiment_tools.py
index 1a68fab..85fa9b3 100644
--- a/skais/utils/experiment_tools.py
+++ b/skais/utils/experiment_tools.py
@@ -9,6 +9,7 @@ def make_feature_vectors(trajectories, features=None,
     zero = [0 for _ in range(nb_classes)]
 
     for trajectory in trajectories:
+        trajectory.df.dropna(inplace=True)
         if len(trajectory.df.index) > length_list:
             trajectory.df['ts'] = trajectory.df.index
             trajectory.compute_all_derivatives()
-- 
GitLab