Skip to content
Snippets Groups Projects

Resolve "Image creation bugs with 0 size windows"

Closed Raphael Sturgis requested to merge 21-image-creation-bugs-with-0-size-windows into main
27 files
+ 1140
123
Compare changes
  • Side-by-side
  • Inline
Files
27
+ 84
61
@@ -3,38 +3,6 @@ import pandas as pd
@@ -3,38 +3,6 @@ import pandas as pd
from scipy.stats import stats
from scipy.stats import stats
# def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None):
# n_sample = len(df.index)
# result = []
# work_df = df.copy()
#
# index = 0
# while index < n_sample:
# i = compute_trajectory(df['ts_sec'][index:].to_numpy(), time_gap, size_limit)
# trajectory = AISTrajectory(work_df[:i], interpolation_time=interpolation_time)
# if len(trajectory.df.index) > min_size:
# result.append(trajectory)
# work_df = work_df[i:]
# index += i
#
# return result
#
#
# @jit(nopython=True)
# def compute_trajectory(times, time_gap, size_limit):
# n_samples = len(times)
#
# previous_date = times[0]
#
# i = 0
# for i in range(size_limit):
# if i >= n_samples or ((times[i] - previous_date) / 60 > time_gap):
# return i
# previous_date = times[i]
#
# return i + 1
class AISPoints:
class AISPoints:
# Todo: Should be more elegant
# Todo: Should be more elegant
@@ -73,36 +41,91 @@ class AISPoints:
@@ -73,36 +41,91 @@ class AISPoints:
self.df = self.df[self.df["heading"] <= 360]
self.df = self.df[self.df["heading"] <= 360]
self.df = self.df[self.df["heading"] >= 0]
self.df = self.df[self.df["heading"] >= 0]
def normalize(self, features, normalization_type="min-max"):
def normalize(self, min_max_features=(), standardization_features=(), third_quartile_features=(),
normalization_dict = {}
divide_by_value=(), divide_by_max=(), normalization_dict=None):
if normalization_type == "min-max":
if normalization_dict is None:
for f in features:
normalization_dict = {}
minimum = self.df[f].min()
for f in min_max_features:
maximum = self.df[f].max()
if f in self.df.columns:
diff = (maximum - minimum)
normalization_dict[f] = {'type': 'min-max'}
if diff == 0:
minimum = self.df[f].min()
print("Warning: diff = %d", diff)
maximum = self.df[f].max()
diff = 1
diff = (maximum - minimum)
self.df[f] = (self.df[f] - minimum) / diff
if diff == 0:
normalization_dict[f"{f}_minimum"] = minimum
print("Warning: diff = 0")
normalization_dict[f"{f}_maximum"] = maximum
self.df[f] = (self.df[f] - minimum)
else:
elif normalization_type == "standardization":
self.df[f] = (self.df[f] - minimum) / diff
normalisation_factors = ("standardization", {})
normalization_dict[f]["minimum"] = minimum
for f in features:
normalization_dict[f]["maximum"] = maximum
mean = self.df[f].mean()
for f in standardization_features:
std = self.df[f].std()
if f in self.df.columns:
if std == 0:
normalization_dict[f] = {'type': 'standardization'}
print("Warning: std = %d", std)
mean = self.df[f].mean()
std = 1
std = self.df[f].std()
self.df[f] = (self.df[f] - mean) / std
if std == 0:
normalization_dict[f"{f}_mean"] = mean
print("Warning: std = %d", std)
normalization_dict[f"{f}_std"] = std
std = 1
self.df[f] = (self.df[f] - mean) / std
 
normalization_dict[f]["mean"] = mean
 
normalization_dict[f]["std"] = std
 
for f in third_quartile_features:
 
if f in self.df.columns:
 
normalization_dict[f] = {'type': '3rd quartile'}
 
third_quartile = self.df[f].quantile(0.75)
 
if third_quartile == 0:
 
print("Warning: third quartile = %d", third_quartile)
 
third_quartile = 1
 
self.df[f] = self.df[f] / third_quartile
 
normalization_dict[f]["value"] = third_quartile
 
for t in divide_by_value:
 
f = t[0]
 
value = t[1]
 
if f in self.df.columns:
 
if value != 0:
 
normalization_dict[f] = {'type': 'divide by value',
 
'value': value}
 
self.df[f] = self.df[f] / value
 
else:
 
print("Warning: dividing by 0")
 
for f in divide_by_max:
 
if f in self.df.columns:
 
maximum = self.df[f].max()
 
normalization_dict[f] = {'type': 'divide by max',
 
'maximum': maximum}
 
self.df[f] = self.df[f] / maximum
else:
else:
raise ValueError(f"{normalization_type} not a valid normalization method. Must be on of [min-max, "
for f in normalization_dict:
f"standardization]")
if f in self.df.columns:
return normalization_type, normalization_dict
if normalization_dict[f]['type'] == 'min-max':
 
minimum = normalization_dict[f]["minimum"]
 
maximum = normalization_dict[f]["maximum"]
 
diff = (maximum - minimum)
 
if diff == 0:
 
print("Warning: diff = 0")
 
diff = 1
 
self.df[f] = (self.df[f] - minimum) / diff
 
elif normalization_dict[f]['type'] == "standardization":
 
mean = normalization_dict[f]["mean"]
 
std = normalization_dict[f]["std"]
 
if std == 0:
 
print("Warning: std = 0")
 
std = 1
 
self.df[f] = (self.df[f] - mean) / std
 
elif normalization_dict[f]['type'] == "3rd quartile":
 
third_quartile = normalization_dict[f]["value"]
 
self.df[f] = self.df[f] / third_quartile
 
elif normalization_dict[f]['type'] == "divide by value":
 
value = normalization_dict[f]["value"]
 
self.df[f] = self.df[f] / value
 
elif normalization_dict[f]['type'] == "divide by max":
 
maximum = normalization_dict[f]["maximum"]
 
self.df[f] = self.df[f] / maximum
 
else:
 
raise ValueError(
 
f"{normalization_dict[f]['type']} not a valid normalization method. Must be on of [min-max,"
 
f" standardization, 3rd quartile, divide by value]")
 
return normalization_dict
# New features
# New features
def compute_drift(self):
def compute_drift(self):
Loading