Skip to content
Snippets Groups Projects
Commit 57d2a200 authored by Raphael Sturgis's avatar Raphael Sturgis
Browse files

moved histogram functions

parent 4c46b543
No related branches found
No related tags found
1 merge request!6Develop
...@@ -8,7 +8,7 @@ from scipy.stats import stats ...@@ -8,7 +8,7 @@ from scipy.stats import stats
from skais.ais.ais_trajectory import AISTrajectory from skais.ais.ais_trajectory import AISTrajectory
# TODO: remove
def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None): def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolation_time=None):
n_sample = len(df.index) n_sample = len(df.index)
result = [] result = []
...@@ -26,6 +26,7 @@ def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolatio ...@@ -26,6 +26,7 @@ def compute_trajectories(df, time_gap, min_size=50, size_limit=500, interpolatio
return result return result
# TODO: remove
@jit(nopython=True) @jit(nopython=True)
def compute_trajectory(times, time_gap, size_limit): def compute_trajectory(times, time_gap, size_limit):
n_samples = len(times) n_samples = len(times)
...@@ -92,32 +93,7 @@ class AISPoints: ...@@ -92,32 +93,7 @@ class AISPoints:
f"standardization]") f"standardization]")
return normalization_type, normalization_dict return normalization_type, normalization_dict
def histogram(self, features, bins=10, ranges=None, label=None, y_field='label'): # TODO: rename
if label is not None:
tmp = self.df[self.df[y_field] == label]
else:
tmp = self.df
dat = tmp[features]
h = np.histogramdd(dat.to_numpy(), bins, ranges)[0]
if h.sum() == 0:
return np.full(h.shape, 1 / h.size)
else:
return h / h.sum()
def disjointed_histogram(self, features, bins, ranges, label=None, y_field='label'):
if label is not None:
tmp = self.df[self.df[y_field] == label]
else:
tmp = self.df
if type(bins) == int:
bins = [bins for _ in features]
histograms = []
for feature, bin, f_range in zip(features, bins, ranges):
histograms.append(np.histogram(tmp[feature], bin, f_range))
return histograms
def compute_diff_heading_cog(self): def compute_diff_heading_cog(self):
self.df["diff"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180), self.df["diff"] = self.df.apply(lambda x: 180 - abs(abs(x['heading'] - x['cog']) - 180),
axis=1) axis=1)
...@@ -129,48 +105,10 @@ class AISPoints: ...@@ -129,48 +105,10 @@ class AISPoints:
self.df = self.df[self.df["heading"] <= 360] self.df = self.df[self.df["heading"] <= 360]
self.df = self.df[self.df["heading"] >= 0] self.df = self.df[self.df["heading"] >= 0]
def histogram_joint_x_y(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]]
, y_nb_bins=2, y_fields='label', y_range=[0, 1]):
return self.histogram(x_fields + [y_fields],
bins=[x_nb_bins for i in x_fields] + [y_nb_bins],
ranges=x_range + [y_range])
def histogram_x_knowing_y(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]]
, y_nb_bins=2, y_field='label'):
result = []
for i in range(y_nb_bins):
layer = self.histogram(x_fields, bins=x_nb_bins, ranges=x_range, label=i, y_field=y_field)
result.append(layer)
return np.stack(result, axis=len(x_fields))
def disjointed_histogram_x_knowing_y(self, features, x_nb_bins=10, x_range=[[0, 1]]
, y_nb_bins=4, y_field='label'):
out = []
for feature, f_range in zip(features, x_range):
result = []
for i in range(y_nb_bins):
layer, _ = np.histogram(self.df[self.df[y_field] == i][feature].to_numpy(), bins=x_nb_bins,
range=f_range)
if layer.sum() == 0:
layer = np.full(layer.shape, 1)
result.append(layer)
out.append(np.stack(result))
return out
def histogram_y_knowing_x(self, x_fields=["sog", "diff"], x_nb_bins=10, x_range=[[0, 30], [0, 180]]
, y_nb_bins=2, y_field='label', y_range=[0, 1]):
h_joint = self.histogram_joint_x_y(x_fields, x_nb_bins, x_range, y_nb_bins, y_field, y_range)
y_hist = self.histogram(features=y_field, bins=y_nb_bins, ranges=[y_range])
result = np.zeros(h_joint.shape)
for idx, x in np.ndenumerate(h_joint):
if h_joint[idx[:-1]].sum() == 0:
result[idx] = y_hist[idx[-1]]
else:
result[idx] = x / h_joint[idx[:-1]].sum()
return result
# TODO: redo
def get_trajectories(self, time_gap=30, min_size=50, interpolation_time=None): def get_trajectories(self, time_gap=30, min_size=50, interpolation_time=None):
if 'ts' in self.df: if 'ts' in self.df:
......
import numpy as np
def histogram(ais_points, features, bins, ranges=None, label=None, y_field='label'):
if label is not None:
tmp = ais_points.df[ais_points.df[y_field] == label]
else:
tmp = ais_points.df
dat = tmp[features]
h = np.histogramdd(dat.to_numpy(), bins, ranges)[0]
if h.sum() == 0:
return np.full(h.shape, 1 / h.size)
else:
return h / h.sum()
def disjointed_histogram(ais_points, features, bins, ranges, label=None, y_field='label'):
if label is not None:
tmp = ais_points.df[ais_points.df[y_field] == label]
else:
tmp = ais_points.df
if type(bins) == int:
bins = [bins for _ in features]
histograms = []
for feature, h_bin, f_range in zip(features, bins, ranges):
histograms.append(np.histogram(tmp[feature], h_bin, f_range))
return histograms
def histogram_joint_x_y(ais_points, x_fields, x_nb_bins, x_range, y_fields, y_nb_bins, y_range):
return histogram(ais_points, x_fields + [y_fields],
bins=[x_nb_bins for _ in x_fields] + [y_nb_bins],
ranges=x_range + [y_range])
def histogram_x_knowing_y(ais_points, x_fields, x_nb_bins, x_range, y_nb_bins, y_field):
result = []
for i in range(y_nb_bins):
layer = histogram(ais_points, x_fields, bins=x_nb_bins, ranges=x_range, label=i, y_field=y_field)
result.append(layer)
return np.stack(result, axis=len(x_fields))
def disjointed_histogram_x_knowing_y(ais_points, features, x_nb_bins, x_range, y_nb_bins, y_field):
out = []
for feature, f_range in zip(features, x_range):
result = []
for i in range(y_nb_bins):
layer, _ = np.histogram(ais_points.df[ais_points.df[y_field] == i][feature].to_numpy(), bins=x_nb_bins,
range=f_range)
if layer.sum() == 0:
layer = np.full(layer.shape, 1)
result.append(layer)
out.append(np.stack(result))
return out
def histogram_y_knowing_x(ais_points, x_fields, x_nb_bins, x_range, y_nb_bins, y_field, y_range):
h_joint = histogram_joint_x_y(ais_points, x_fields, x_nb_bins, x_range, y_nb_bins, y_field, y_range)
y_hist = histogram(ais_points, features=y_field, bins=y_nb_bins, ranges=[y_range])
result = np.zeros(h_joint.shape)
for idx, x in np.ndenumerate(h_joint):
if h_joint[idx[:-1]].sum() == 0:
result[idx] = y_hist[idx[-1]]
else:
result[idx] = x / h_joint[idx[:-1]].sum()
return result
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment