diff --git a/.env.template b/.env.template
new file mode 100644
index 0000000000000000000000000000000000000000..1ed407e49cb11f7fda5797de1f4f9174f8547b71
--- /dev/null
+++ b/.env.template
@@ -0,0 +1,38 @@
+DATA_DIR="~/data/ocean_engineering_2022_code/data/"
+MODEL_DIR="~/data/ocean_engineering_2022_code/models/"
+
+NE_DATA
+LAX_DATA
+
+NE_TRAIN_INDICES="91 85 52 1 50 60 19 77 78 51 47 80 32 39 81 12 10 72 67 31 41 40 97 61 0 82 70 29 59 17 79 13 83 33 62 27 36 66 2 88 53 8 15 75 28 64 76 35 42 48 49 93 20 89 6 96 69 56 30 38 44 25 37 22 84 54 57 7 21 45 24 26 46 65 92 5 3 14 73 87"
+NE_TEST_INDICES="94 43 63 74 58 23 9 16 4 71 86 90 11 18 34 55 95 68"
+
+MASK_VALUE=99999.99
+TIME_SERIES_LENGTH=100
+
+# LSTM
+LTSM_BATCH_SIZE=4
+
+# SOG
+LSTM_SOG_NUMBER_OF_LAYERS=5
+LSTM_SOG_NUMBER_OF_UNITS=10
+
+# COG
+LSTM_COG_NUMBER_OF_LAYERS=3
+LSTM_COG_NUMBER_OF_UNITS=50
+
+# HEADING
+LSTM_HEADING_NUMBER_OF_LAYERS=3
+LSTM_HEADING_NUMBER_OF_UNITS=500
+
+# POSITION
+LSTM_POSITION_NUMBER_OF_LAYERS=2
+LSTM_POSITION_NUMBER_OF_UNITS=500
+
+# RAW_NO_SOG
+LSTM_RAW_NO_SOG_NUMBER_OF_LAYERS=1
+LSTM_RAW_NO_SOG_NUMBER_OF_UNITS=500
+
+# RAW
+LSTM_RAW_NUMBER_OF_LAYERS=6
+LSTM_RAW_NUMBER_OF_UNITS=200
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..0997a5b82179c82f3969c7396acb1d6ae9339ab1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.env
+venv/
+__pycache__
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1612f0b76bd6e369572824862b6ed3f171a9431
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,55 @@
+absl-py==1.4.0
+astunparse==1.6.3
+cachetools==5.3.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+flatbuffers==23.3.3
+gast==0.4.0
+google-auth==2.17.2
+google-auth-oauthlib==1.0.0
+google-pasta==0.2.0
+grpcio==1.53.0
+h5py==3.8.0
+hmmlearn==0.2.8
+idna==3.4
+jax==0.4.8
+joblib==1.2.0
+keras==2.12.0
+libclang==16.0.0
+llvmlite==0.39.1
+Markdown==3.4.3
+MarkupSafe==2.1.2
+ml-dtypes==0.0.4
+numba==0.56.4
+numpy==1.23.5
+oauthlib==3.2.2
+opt-einsum==3.3.0
+packaging==23.0
+pandas==2.0.0
+protobuf==4.22.1
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+pytz==2023.3
+requests==2.28.2
+requests-oauthlib==1.3.1
+rsa==4.9
+scikit-learn==1.2.2
+scipy==1.10.1
+six==1.16.0
+tensorboard==2.12.1
+tensorboard-data-server==0.7.0
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.12.0
+tensorflow-estimator==2.12.0
+tensorflow-io-gcs-filesystem==0.32.0
+termcolor==2.2.0
+threadpoolctl==3.1.0
+tqdm==4.65.0
+typing_extensions==4.5.0
+tzdata==2023.3
+urllib3==1.26.15
+Werkzeug==2.2.3
+wrapt==1.14.1
+matplotlib==3.7.1
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/models/hmm.py b/src/models/hmm.py
new file mode 100644
index 0000000000000000000000000000000000000000..309b378abc82ed9e5873a456f6c60a31ca820604
--- /dev/null
+++ b/src/models/hmm.py
@@ -0,0 +1,13 @@
+import os
+import pickle
+
+from skais.learn.hmm_gmm_classifier import GMMHMMClassifier
+
+
+def train_HMM(x, y, n_states, name):
+    model = GMMHMMClassifier(n_states, max_iter=100, verbose=False)
+
+    model.fit(x, y)
+
+    pickle.dump(model, open(f"{os.environ['MODEL_DIR']}/hmm/{name}.pkl", 'wb'))
+
diff --git a/src/models/lstm.py b/src/models/lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab3202a2b59f0b43a68f1d3de34ee8a02163f93d
--- /dev/null
+++ b/src/models/lstm.py
@@ -0,0 +1,43 @@
+import os
+
+import numpy as np
+from dotenv import load_dotenv
+from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
+from keras.models import load_model
+
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import get_normalisation_factors, normalize_set_of_trajectories
+from src.utils.features_list import FEATURES_COMBINATIONS
+from src.utils.models import create_LSTM_model
+from src.utils.utils import unison_shuffled_copies
+
+
+def train_lstm(model, x_train, y_train, x_test, y_test, batch_size, name):
+    early_stopping = EarlyStopping(monitor='val_loss', patience=10, mode='min')
+    model_path = f"{os.environ['MODEL_DIR']}/lstm/{name}.hdf5"
+    mcp_save = ModelCheckpoint(model_path)
+    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=0, min_delta=1e-4,
+                                       mode='min')
+    model.fit(x_train, y_train, batch_size=batch_size,
+              validation_data=(x_test, y_test),
+              callbacks=[early_stopping, mcp_save, reduce_lr_loss],
+              epochs=100,
+              verbose=1)
+
+    saved_model = load_model(model_path)
+    return saved_model
+
+
+def create_lstm(x, y, features, nb_layers, nb_units, name):
+    nb_samples = len(x)
+    x, y = unison_shuffled_copies(x, y)
+
+    x_train = np.array(x[:int(0.8 * nb_samples)])
+    y_train = np.array(y[:int(0.8 * nb_samples)])
+
+    x_test = np.array(x[int(0.8 * nb_samples):])
+    y_test = np.array(y[int(0.8 * nb_samples):])
+
+    model = create_LSTM_model(nb_layers, nb_units, len(features), mask_value=float(os.environ["MASK_VALUE"]), timesteps=100)
+
+    train_lstm(model, x_train, y_train, x_test, y_test, int(os.environ['LSTM_BATCH_SIZE']), name)
\ No newline at end of file
diff --git a/src/models/transformer.py b/src/models/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d12876012655ddcfef2a1a85eadc79fe852cf60
--- /dev/null
+++ b/src/models/transformer.py
@@ -0,0 +1,218 @@
+import os
+
+import numpy as np
+import tensorflow as tf
+from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
+from keras.models import load_model
+
+from src.utils.utils import unison_shuffled_copies
+
+
+def get_angles(pos, i, d_model):
+    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
+    return pos * angle_rates
+
+
+def positional_encoding(position, d_model):
+    angle_rads = get_angles(np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)
+
+    # apply sin to even indices in the array; 2i
+    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
+
+    # apply cos to odd indices in the array; 2i+1
+    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
+    pos_encoding = angle_rads[np.newaxis, ...]
+    return tf.cast(pos_encoding, dtype=tf.float32)
+
+
+class Encoder(tf.keras.layers.Layer):
+    def __init__(self, num_layers=4, d_model=512, num_heads=8, dff=2048,
+                 maximum_position_encoding=10000, dropout=0.0, **kwargs):
+        super(Encoder, self).__init__()
+        self.num_layers = num_layers
+        self.d_model = d_model
+        self.num_heads = num_heads
+        self.dff = dff
+        self.maximum_position_encoding = maximum_position_encoding
+        self.dropout = dropout
+
+        self.pos = positional_encoding(maximum_position_encoding, d_model)
+
+        self.encoder_layers = [EncoderLayer(d_model=d_model, num_heads=num_heads, dff=dff, dropout=dropout) for _ in
+                               range(num_layers)]
+
+        self.dropout_layer = tf.keras.layers.Dropout(dropout)
+        self.conv1d = tf.keras.layers.Conv1D(self.d_model, 1, activation='relu')
+
+    def call(self, inputs, training=None):
+        x = inputs
+        x = self.conv1d(x)
+        # positional encoding
+        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
+        x += self.pos[:, :tf.shape(x)[1], :]
+
+        x = self.dropout_layer(x, training=training)
+        # Encoder layer
+        for encoder_layer in self.encoder_layers:
+            x = encoder_layer(x)
+
+        return x
+
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "num_layers": self.num_layers,
+            "d_model": self.d_model,
+            "num_heads": self.num_heads,
+            "dff": self.dff,
+            "maximum_position_encoding": self.maximum_position_encoding,
+            "dropout": self.dropout,
+        })
+        return config
+
+
+class EncoderLayer(tf.keras.layers.Layer):
+    def __init__(self, d_model=512, num_heads=8, dff=2048, dropout=0.0, **kwargs):
+        super(EncoderLayer, self).__init__()
+
+        self.multi_head_attention = MultiHeadAttention(d_model, num_heads)
+        self.dropout_attention = tf.keras.layers.Dropout(dropout)
+        self.add_attention = tf.keras.layers.Add()
+        self.layer_norm_attention = tf.keras.layers.LayerNormalization(epsilon=1e-6)
+
+        self.dense1 = tf.keras.layers.Dense(dff, activation='relu')
+        self.dense2 = tf.keras.layers.Dense(d_model)
+        self.dropout_dense = tf.keras.layers.Dropout(dropout)
+        self.add_dense = tf.keras.layers.Add()
+        self.layer_norm_dense = tf.keras.layers.LayerNormalization(epsilon=1e-6)
+
+    def call(self, inputs, mask=None, training=None):
+        attention = self.multi_head_attention([inputs, inputs, inputs], mask=[mask, mask])
+        attention = self.dropout_attention(attention, training=training)
+        x = self.add_attention([inputs, attention])
+        x = self.layer_norm_attention(x)
+
+        ## Feed Forward
+        dense = self.dense1(x)
+        dense = self.dense2(dense)
+        dense = self.dropout_dense(dense, training=training)
+        x = self.add_dense([x, dense])
+        x = self.layer_norm_dense(x)
+
+        return x
+
+
+class MultiHeadAttention(tf.keras.layers.Layer):
+    def __init__(self, d_model=512, num_heads=8, causal=False, dropout=0.0):
+        super(MultiHeadAttention, self).__init__()
+        self.causal = causal
+
+        assert d_model % num_heads == 0
+        depth = d_model // num_heads
+
+        self.w_query = tf.keras.layers.Dense(d_model)
+        self.split_reshape_query = tf.keras.layers.Reshape((-1, num_heads, depth))
+        self.split_permute_query = tf.keras.layers.Permute((2, 1, 3))
+
+        self.w_value = tf.keras.layers.Dense(d_model)
+        self.split_reshape_value = tf.keras.layers.Reshape((-1, num_heads, depth))
+        self.split_permute_value = tf.keras.layers.Permute((2, 1, 3))
+
+        self.w_key = tf.keras.layers.Dense(d_model)
+        self.split_reshape_key = tf.keras.layers.Reshape((-1, num_heads, depth))
+        self.split_permute_key = tf.keras.layers.Permute((2, 1, 3))
+
+        self.attention = tf.keras.layers.Attention(dropout=dropout)
+        self.join_permute_attention = tf.keras.layers.Permute((2, 1, 3))
+        self.join_reshape_attention = tf.keras.layers.Reshape((-1, d_model))
+
+        self.dense = tf.keras.layers.Dense(d_model)
+
+    def call(self, inputs, mask=None, training=None):
+        q = inputs[0]
+        v = inputs[1]
+        k = inputs[2] if len(inputs) > 2 else v
+
+        query = self.w_query(q)
+        query = self.split_reshape_query(query)
+        query = self.split_permute_query(query)
+
+        value = self.w_value(v)
+        value = self.split_reshape_value(value)
+        value = self.split_permute_value(value)
+
+        key = self.w_key(k)
+        key = self.split_reshape_key(key)
+        key = self.split_permute_key(key)
+
+        if mask is not None:
+            if mask[0] is not None:
+                mask[0] = tf.keras.layers.Reshape((-1, 1))(mask[0])
+                mask[0] = tf.keras.layers.Permute((2, 1))(mask[0])
+            if mask[1] is not None:
+                mask[1] = tf.keras.layers.Reshape((-1, 1))(mask[1])
+                mask[1] = tf.keras.layers.Permute((2, 1))(mask[1])
+
+        attention = self.attention([query, value, key], mask=mask, use_causal_mask=self.causal)
+        attention = self.join_permute_attention(attention)
+        attention = self.join_reshape_attention(attention)
+
+        x = self.dense(attention)
+
+        return x
+
+
+def get_tn(num_layers=4, d_model=128, dff=512, num_heads=8, mask_value=0, input_size=1):
+    dropout_rate = 0.1
+    nb_inputs = input_size
+    nb_outputs = int(os.environ['OUTPUT_SIZE'])
+    timesteps = int(os.environ['TIME_SERIES_LENGTH'])
+
+    encoder = Encoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff,
+                      dropout=dropout_rate)
+
+    input = tf.keras.layers.Input(shape=(timesteps, nb_inputs))
+
+    mask = tf.keras.layers.Masking(mask_value=mask_value, input_shape=(timesteps, nb_inputs))(input)
+    e = encoder(mask)
+    prediction_layer = tf.keras.layers.Conv1D(nb_outputs, 1, activation='softmax')(e)
+
+    model = tf.keras.models.Model(inputs=[input], outputs=prediction_layer)
+
+    optimizer = "Adam"
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=optimizer,
+                  metrics=['accuracy'])
+
+    return model
+
+def train_tn(model, x_train, y_train, x_test, y_test, batch_size, name):
+    early_stopping = EarlyStopping(monitor='val_loss', patience=10, mode='min')
+    model_path = f"{os.environ['MODEL_DIR']}/tn/{name}.hdf5"
+    mcp_save = ModelCheckpoint(model_path)
+    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=0, min_delta=1e-4,
+                                       mode='min')
+    model.fit(x_train, y_train, batch_size=batch_size,
+              validation_data=(x_test, y_test),
+              callbacks=[early_stopping, mcp_save, reduce_lr_loss],
+              epochs=100,
+              verbose=1)
+
+    saved_model = load_model(model_path)
+    return saved_model
+
+
+def create_tn(x, y, features, number_of_layers, d_model, dff, number_of_heads, name):
+    nb_samples = len(x)
+    x, y = unison_shuffled_copies(x, y)
+
+    x_train = np.array(x[:int(0.8 * nb_samples)])
+    y_train = np.array(y[:int(0.8 * nb_samples)])
+
+    x_test = np.array(x[int(0.8 * nb_samples):])
+    y_test = np.array(y[int(0.8 * nb_samples):])
+
+    model = get_tn(num_layers=number_of_layers, d_model=d_model, dff=dff, num_heads=number_of_heads, mask_value=int(os.environ['MASK_VALUE']),
+                   input_size=len(features))
+
+    train_tn(model, x_train, y_train, x_test, y_test, int(os.environ['TN_BATCH_SIZE']), name)
\ No newline at end of file
diff --git a/src/scripts/__init__.py b/src/scripts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/scripts/accuracy_f1_score_per_model.py b/src/scripts/accuracy_f1_score_per_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..18e71714656ba9ae9138e4cac8f8a7695edcd331
--- /dev/null
+++ b/src/scripts/accuracy_f1_score_per_model.py
@@ -0,0 +1,75 @@
+import os
+import pickle
+from pathlib import Path
+
+import numpy as np
+from dotenv import load_dotenv
+from sklearn.metrics import accuracy_score, f1_score
+from tqdm import tqdm
+
+from src.scripts.extract_predictions import prediction_pipelines
+
+
+def get_acc_and_f1(trajectories, column):
+    y_true = []
+    y_pred = []
+    for t in trajectories:
+        y_true.append(t.df.label.to_numpy())
+        y_pred.append(t.df[column].to_numpy())
+    y_true = np.concatenate(y_true)
+    y_pred = np.concatenate(y_pred)
+
+    return accuracy_score(y_true, y_pred), f1_score(y_true, y_pred, average='macro')
+
+if __name__ == '__main__':
+    load_dotenv()
+    data_dir = os.environ["DATA_DIR"]
+
+    trajectories_LAX = pickle.load(open(f"{data_dir}/predictions/lax_predictions.pkl", 'rb'))
+    trajectories_NE = pickle.load(open(f"{data_dir}/predictions/ne_predictions.pkl", 'rb'))
+
+    iterator = tqdm(prediction_pipelines, colour="magenta")
+    data = []
+
+    results = []
+    for name, name_pretty, _, _, runs, _ in iterator:
+        iterator.set_description(name)
+
+        accs_lax = []
+        f1s_lax = []
+        accs_ne = []
+        f1s_ne = []
+        for i in tqdm(range(runs), colour="blue", leave=False):
+            column_name = f'prediction_{name}_{i}'
+            acc_lax, f1_lax = get_acc_and_f1(trajectories_LAX, column_name)
+            accs_lax.append(acc_lax)
+            f1s_lax.append(f1_lax)
+
+            acc_ne, f1_ne = get_acc_and_f1(trajectories_NE, column_name)
+            accs_ne.append(acc_ne)
+            f1s_ne.append(f1_ne)
+
+        acc_lax = np.mean(accs_lax)
+        f1_lax = np.mean(f1s_lax)
+        acc_ne = np.mean(accs_ne)
+        f1_ne = np.mean(f1s_ne)
+
+        acc_lax_std = np.std(accs_lax)
+        f1_lax_std = np.std(f1s_lax)
+        acc_ne_std = np.std(accs_ne)
+        f1_ne_std = np.std(f1s_ne)
+
+        results.append((acc_lax, acc_lax_std, f1_lax, f1_lax_std, acc_ne, acc_ne_std, f1_ne, f1_ne_std))
+
+    print("| Model | Accuracy LAX | Macro F1 LAX | Accuracy NE | Macro F1 NE\n"
+          "|---------------------:|---------------------:|-------------------:|----------------------:|---------------------:|")
+
+    for result, pipeline in zip(results, prediction_pipelines):
+        acc_lax, acc_lax_std, f1_lax, f1_lax_std, acc_ne, acc_ne_std, f1_ne, f1_ne_std = result
+        name, name_pretty, _, _, _, _, = pipeline
+        print(f"{name_pretty} | {acc_lax:.3f} ({acc_lax_std:.3f}) | {f1_lax:.3f} ({f1_lax_std:.3f}) | "
+              f"{acc_ne:.3f} ({acc_ne_std:.3f}) | {f1_ne:.3f} ({f1_ne_std:.3f})")
+
+
+
+
diff --git a/src/scripts/event_statistics.py b/src/scripts/event_statistics.py
new file mode 100644
index 0000000000000000000000000000000000000000..0777fcddc2c3ad394ef8b9af36a57a59a8d7701d
--- /dev/null
+++ b/src/scripts/event_statistics.py
@@ -0,0 +1,88 @@
+import os
+import pickle
+
+import numpy as np
+import pandas as pd
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+from skais.learn.hmm_gmm_classifier import split_trajectories
+from tqdm import tqdm
+
+from src.scripts.extract_predictions import prediction_pipelines
+
+behaviours = ['Underway', 'Moored', 'At Anchor', 'Drifting']
+factor = 0.5
+disable = True
+model_ids = [0, 2, 6, 4, 9, -1]
+
+if __name__ == '__main__':
+    load_dotenv()
+    data_dir = os.environ["DATA_DIR"]
+    trajectories = pickle.load(open(f"{data_dir}/predictions/lax_predictions.pkl", 'rb'))
+
+    data = {
+        'name': []
+    }
+
+    for b in behaviours:
+        data[f'{b} events recognized'] = []
+        data[f'{b} events missed'] = []
+        data[f'{b} events recognized std'] = []
+        data[f'{b} events missed std'] = []
+
+    points = AISPoints.fuse(trajectories).df
+    y_true = points['label'].to_numpy()
+
+    print("| Model | Underway | Moored | At Anchor | Drifting |\n"
+          "|---------------------:|---------------------:|-------------------:|----------------------:|---------------------:|")
+
+    iterator = tqdm([prediction_pipelines[i] for i in model_ids], colour="blue", position=0, leave=True, disable=disable)
+    for name, name_pretty, _, _, runs, _ in iterator:
+        iterator.set_description(name)
+
+        data['name'].append(name_pretty)
+
+        missed_stack = []
+        recognized_stack = []
+
+        for i in tqdm(range(runs), colour='yellow', position=1, leave=False, disable=disable):
+            y_pred = points[f'prediction_{name}_{i}'].to_numpy()
+
+            missed = [0 for _ in behaviours]
+            recognized = [0 for _ in behaviours]
+
+            split, _ = split_trajectories(y_pred, y_true, 4)
+            for i, _ in enumerate(tqdm(behaviours, colour='green', position=2, leave=False, disable=disable)):
+                for event in tqdm(split[i], colour='magenta', position=3, leave=False, disable=disable):
+                    unique, counts = np.unique(event, return_counts=True)
+                    d = dict(zip(unique, counts))
+                    if i not in d:
+                        missed[i] += 1
+                    else:
+                        if (d[i]/len(event)) > factor:
+                            recognized[i] += 1
+                        else:
+                            missed[i] += 1
+            missed_stack.append(missed)
+            recognized_stack.append(recognized)
+
+        missed_mean = np.mean(missed_stack, axis=0)
+        missed_std = np.std(missed_stack, axis=0)
+        recognized_mean = np.mean(recognized_stack, axis=0)
+        recognized_std = np.std(recognized_stack, axis=0)
+
+        for i, b in enumerate(behaviours):
+            data[f'{b} events recognized'].append(recognized_mean[i])
+            data[f'{b} events missed'].append(missed_mean[i])
+            data[f'{b} events recognized std'].append(recognized_std[i])
+            data[f'{b} events missed std'].append(missed_std[i])
+
+        print(
+            f"{name_pretty} | {recognized_mean[0]/(recognized_mean[0]+missed_mean[0]):.3f} ({recognized_std[0]/(recognized_mean[0]+missed_mean[0]):.3f}) | "
+            f"{recognized_mean[1]/(recognized_mean[1]+missed_mean[1]):.3f} ({recognized_std[1]/(recognized_mean[1]+missed_mean[1]):.3f}) | "
+            f"{recognized_mean[2]/(recognized_mean[2]+missed_mean[2]):.3f} ({recognized_std[2]/(recognized_mean[2]+missed_mean[2]):.3f}) | "
+            f"{recognized_mean[3]/(recognized_mean[3]+missed_mean[3]):.3f} ({recognized_std[3]/(recognized_mean[3]+missed_mean[3]):.3f}) | "
+        )
+    df = pd.DataFrame(data)
+    df.fillna(0, inplace=True)
+    print(df.to_markdown(index=False))
diff --git a/src/scripts/extract_predictions.py b/src/scripts/extract_predictions.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb826caae605cb67a3135c840d91a9c5e4660be9
--- /dev/null
+++ b/src/scripts/extract_predictions.py
@@ -0,0 +1,78 @@
+from copy import deepcopy
+from pathlib import Path
+import os
+
+from src.utils.data_import import load_dataset_from_csv
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+import warnings
+from tqdm import tqdm
+from dotenv import load_dotenv
+
+from src.utils.experiment_tools import match_prediction_to_trajectories
+from predictors import *
+
+warnings.filterwarnings("ignore")
+time_window = 30
+prediction_pipelines = [
+    ("navstatus", "Navigation Status", navstatus_preprocessing, navstatus_trajectory_predictor, 1, None),
+
+    ("dt_raw", "Decision Tree with Raw Features", dt_raw_preprocessing, dt_predictor, 10, None),
+    ("rf_raw", "Random Forest with Raw Features", rf_raw_preprocessing, rf_predictor, 10, None),
+    ("lstm_raw", "LSTM with Raw Features", sequence_raw_preprocessing, lstm_raw_trajectory_predictor, 10, None),
+    ("tn_raw", "Transformer Network with Raw Features", sequence_raw_preprocessing, tn_raw_trajectory_predictor, 10,
+     None),
+
+    ("dt_custom", "Decision Tree with Custom Features", dt_custom_preprocessing, dt_predictor, 10, time_window),
+    ("rf_custom", "Random Forest with Custom Features", rf_custom_preprocessing, rf_predictor, 10, time_window),
+    ("hmm_custom", "HMM with Custom Features", hmm_custom_preprocessing, hmm_custom_predictor, 10, time_window),
+    ("lstm_custom", "LSTM with Custom Features", sequence_custom_preprocessing, lstm_custom_trajectory_predictor, 10,
+     int(os.environ['LSTM_CUSTOM_TIME_WINDOW'])),
+    ("tn_custom", "Transformer Network with Custom Features", sequence_custom_preprocessing,
+     tn_custom_trajectory_predictor, 10, int(os.environ['TN_CUSTOM_TIME_WINDOW'])),
+
+    ("lstm_augmented", "LSTM with Augmented Data", sequence_raw_preprocessing, lstm_raw_trajectory_predictor_augmented,
+     10, None),
+    ("tn_augmented", "Transformer Network with Augmented Data", sequence_raw_preprocessing,
+     tn_raw_trajectory_predictor_augmented, 10, None),
+]
+
+keep_previous_prediction = True
+
+if __name__ == '__main__':
+    load_dotenv()
+    data_dir = os.environ["DATA_DIR"]
+    Path(data_dir).mkdir(parents=True, exist_ok=True)
+    Path(f"{data_dir}/predictions").mkdir(parents=True, exist_ok=True)
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], os.environ["NE_TRAIN_INDICES"].split(" "))
+    trajectories_NE = load_dataset_from_csv(os.environ["NE_DATA"], os.environ["NE_TEST_INDICES"].split(" "))
+    trajectories_LAX = load_dataset_from_csv(os.environ["LAX_DATA"])
+
+    for name, name_pretty, preprocessing, predictor, n_runs, aux_data in tqdm(prediction_pipelines, colour="green", position=0, leave=True):
+        calculated = True
+        for i in range(n_runs):
+            if f'prediction_{name}_{i}' not in trajectories_LAX[-1].df.columns:
+                calculated = False
+                break
+            if f'prediction_{name}_{i}' not in trajectories_NE[-1].df.columns:
+                calculated = False
+                break
+
+        if calculated:
+            continue
+
+        prediction_data_LAX = preprocessing(deepcopy(trajectories_train), deepcopy(trajectories_LAX), 'LAX', aux_data)
+        prediction_data_NE = preprocessing(deepcopy(trajectories_train), deepcopy(trajectories_NE), 'NE', aux_data)
+
+        for i in tqdm(range(n_runs), colour="blue", position=1, leave=False):
+            y_predict_LAX = predictor(prediction_data_LAX, run=i)
+            match_prediction_to_trajectories(trajectories_LAX, y_predict_LAX, name=f'prediction_{name}_{i}',
+                                             tqdm_disable=False, tqdm_position=2)
+
+            y_predict_NE = predictor(prediction_data_NE, run=i)
+            match_prediction_to_trajectories(trajectories_NE, y_predict_NE, name=f'prediction_{name}_{i}',
+                                             tqdm_disable=False, tqdm_position=2)
+
+        pickle.dump(trajectories_LAX, open(f"{data_dir}/predictions/lax_predictions.pkl", 'wb'))
+        pickle.dump(trajectories_NE, open(f"{data_dir}/predictions/ne_predictions.pkl", 'wb'))
diff --git a/src/scripts/figures/draw_accuracy_for_different_features.py b/src/scripts/figures/draw_accuracy_for_different_features.py
new file mode 100644
index 0000000000000000000000000000000000000000..f32f477c607ea961a74af903ce3ed2d1084c2c1d
--- /dev/null
+++ b/src/scripts/figures/draw_accuracy_for_different_features.py
@@ -0,0 +1,89 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.model_selection import GridSearchCV
+from sklearn.tree import DecisionTreeClassifier
+from tqdm import tqdm
+
+from src.utils.experiment_tools import convert_to_vectors_not_one_hot
+from src.utils.features_list import FEATURES_COMBINATIONS
+from src.utils.utils import unison_shuffled_copies, evaluate_model
+
+
+def generate_data(features, trajectories_train, trajectories_test_LAX, trajectories_test_NE):
+    x_train, y_train = convert_to_vectors_not_one_hot(trajectories_train, features)
+    x_train = np.concatenate(x_train)
+    y_train = np.concatenate(y_train)
+
+    x_test_lax, y_test_lax = convert_to_vectors_not_one_hot(trajectories_test_LAX, features)
+    x_test_lax = np.concatenate(x_test_lax)
+    y_test_lax = np.concatenate(y_test_lax)
+
+    x_test_ne, y_test_ne = convert_to_vectors_not_one_hot(trajectories_test_NE, features)
+    x_test_ne = np.concatenate(x_test_ne)
+    y_test_ne = np.concatenate(y_test_ne)
+
+    return x_train, y_train, x_test_lax, y_test_lax, x_test_ne, y_test_ne
+
+def train_model(params, x_train, y_train):
+    dt = DecisionTreeClassifier(max_depth=params['max_depth'], criterion=params['criterion'], class_weight='balanced',
+                                splitter="best")
+    dt.fit(x_train, y_train)
+    return dt
+def draw_accuracy_graph(features, average_accs_NE, std_accs_NE, average_accs_LAX, std_accs_LAX):
+    barWidth = 0.3
+    X = np.arange(len(features))
+
+    plt.bar(X - barWidth / 2, average_accs_NE, width=barWidth, yerr=std_accs_NE, label='NE')
+    plt.bar(X + barWidth / 2, average_accs_LAX, width=barWidth, yerr=std_accs_LAX, label='LAX')
+
+    plt.xticks(X, [i for i in range(1, 7)])
+    plt.xlabel('Configuration')
+    plt.ylabel('accuracy')
+    plt.legend()
+
+    plt.show()
+
+
+def draw_dt_accuracy_per_features(trajectories_train, trajectories_test_LAX, trajectories_test_NE):
+    tree_para = {'criterion': ['gini', 'entropy'],
+                 'max_depth': [4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 20, 30, 40, 50, 70, 90, 120, 150]}
+    for features in FEATURES_COMBINATIONS:
+        x_train, y_train, x_test_lax, y_test_lax, x_test_ne, y_test_ne = generate_data(
+            features, trajectories_train, trajectories_test_LAX, trajectories_test_NE
+        )
+        accs_LAX = []
+        accs_NE = []
+        clf = GridSearchCV(DecisionTreeClassifier(class_weight='balanced', splitter="best"), tree_para, cv=5, verbose=2,
+                           n_jobs=-1)
+        clf.fit(x_train, y_train)
+        performances = []
+        for _ in tqdm(range(10)):
+            try:
+                x_train, y_train = unison_shuffled_copies(x_train, y_train)
+                model = train_model(clf.best_params_, x_train[:int(0.8 * len(x_train))],
+                                    y_train[:int(0.8 * len(x_train))])
+                acc, f1, c_mat = evaluate_model(model, x_test_lax, y_test_lax)
+                accs_LAX.append(acc)
+                acc, _, _ = evaluate_model(model, x_test_ne, y_test_ne)
+                accs_NE.append(acc)
+            except Exception as e:
+                print(f"Failed : {e}")
+            performances.append(
+                {
+                    'features': features,
+                    'average_acc_lax': np.mean(accs_LAX),
+                    'std_acc_lax': np.std(accs_LAX),
+                    'average_acc_ne': np.mean(accs_NE),
+                    'std_acc_ne': np.std(accs_NE),
+                }
+            )
+    features = [perf["features"] for perf in performances]
+    average_accs_LAX = [perf["average_acc_lax"] for perf in performances]
+    std_accs_LAX = [perf["std_acc_lax"] for perf in performances]
+    average_accs_NE = [perf["average_acc_ne"] for perf in performances]
+    std_accs_NE = [perf["std_acc_ne"] for perf in performances]
+    draw_accuracy_graph(features, average_accs_NE, std_accs_NE, average_accs_LAX, std_accs_LAX)
+
+
+if __name__ == '__main__':
+    draw_dt_accuracy_per_features()
\ No newline at end of file
diff --git a/src/scripts/near_transition_performance.py b/src/scripts/near_transition_performance.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bef106c91aeef0f15b31731131877f74e541b57
--- /dev/null
+++ b/src/scripts/near_transition_performance.py
@@ -0,0 +1,107 @@
+import os
+import pickle
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+from dotenv import load_dotenv
+from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
+from tqdm import tqdm
+
+from src.scripts.extract_predictions import prediction_pipelines
+from src.utils.utils import get_transition_indexes, get_point_near_transition
+
+np.seterr(divide='ignore', invalid='ignore')
+
+
+def get_near_point_performances_multi_run(name, windows, runs):
+    accs_in = []
+    f1s_in = []
+    recall_per_class = []
+    precision_per_class = []
+    trajectories = pickle.load(open(f"{data_dir}/predictions/lax_predictions.pkl", 'rb'))
+    transitions = get_transition_indexes(trajectories)
+
+    for window in tqdm(windows, colour="blue", position=1, desc="window", leave=False):
+        points, _ = get_point_near_transition(trajectories, window, transitions)
+        y_true = points['label'].to_numpy()
+        acc_tmp = []
+        f1_tmp = []
+
+        recall_per_class_per_run = [[] for _ in range(4)]
+        precision_per_class_per_run = [[] for _ in range(4)]
+        for run in tqdm(range(runs), colour="green", position=2, desc="run", leave=False):
+            y_pred = points[f'prediction_{name}_{run}'].to_numpy()
+
+            acc_tmp.append(accuracy_score(y_true, y_pred))
+            f1_tmp.append(f1_score(y_true, y_pred, average='macro'))
+            matrix = confusion_matrix(y_true, y_pred)
+
+            precision = matrix.diagonal() / matrix.sum(axis=0)
+            recall = matrix.diagonal() / matrix.sum(axis=1)
+            for i in range(4):
+                recall_per_class_per_run[i].append(recall[i])
+                precision_per_class_per_run[i].append(precision[i])
+
+        accs_in.append(np.mean(acc_tmp))
+        f1s_in.append(np.mean(f1_tmp))
+        tmp_recall_per_class = []
+        tmp_precision_per_class = []
+        for i in range(4):
+            tmp_recall_per_class.append(np.mean(recall_per_class_per_run[i]))
+            tmp_precision_per_class.append(np.mean(precision_per_class_per_run[i]))
+        recall_per_class.append(tmp_recall_per_class)
+        precision_per_class.append(tmp_precision_per_class)
+    return accs_in, f1s_in, np.array(recall_per_class), np.array(precision_per_class)
+
+
+classes = ['Underway', 'Moored', 'At Anchor', 'Drifting']
+model_ids = [0, 2, 6, 4, 9, -1]
+
+windows = [1, 5, 10, 20, 30, 40, 50, 60]
+
+if __name__ == '__main__':
+    load_dotenv()
+    data_dir = os.environ["DATA_DIR"]
+
+    trajectories_LAX = pickle.load(open(f"{data_dir}/predictions/lax_predictions.pkl", 'rb'))
+    trajectories_NE = pickle.load(open(f"{data_dir}/predictions/ne_predictions.pkl", 'rb'))
+    Path(f"images").mkdir(parents=True, exist_ok=True)
+
+    iterator = tqdm([prediction_pipelines[i] for i in model_ids], colour="magenta", position=0, leave=True)
+    data = []
+    for name, name_pretty, _, _, runs, _ in iterator:
+        iterator.set_description(name)
+        accs_in, f1s_in, recall_per_class, precision_per_class = get_near_point_performances_multi_run(name, windows,
+                                                                                                       runs=runs)
+        data.append((name, name_pretty, runs, accs_in, f1s_in, recall_per_class, precision_per_class))
+
+    fig1, model_acc_comp = plt.subplots()
+    fig2, model_f1_comp = plt.subplots()
+    for name, name_pretty, runs, accs_in, f1s_in, recall_per_class, precision_per_class in data:
+        fig3, model_per_class_precision = plt.subplots()
+        fig4, model_per_class_recall = plt.subplots()
+
+        model_acc_comp.plot(windows, accs_in, label=name_pretty)
+        model_f1_comp.plot(windows, f1s_in, label=name_pretty)
+
+        for i in range(4):
+            model_per_class_precision.plot(windows, precision_per_class[:, i], label=classes[i])
+            model_per_class_recall.plot(windows, recall_per_class[:, i], label=classes[i])
+
+        model_per_class_precision.legend()
+        model_per_class_precision.set_title(name_pretty)
+        fig3.savefig(f"images/{name}_precision_per_class.png")
+
+        model_per_class_recall.legend()
+        model_per_class_recall.set_title(name_pretty)
+        fig4.savefig(f"images/{name}_recall_per_class.png")
+    model_acc_comp.legend()
+    model_acc_comp.set_xlabel('Window radius (seconds)')
+    model_acc_comp.set_title('Accuracy')
+    fig1.savefig(f"images/acc.png")
+
+    model_f1_comp.legend()
+    model_f1_comp.set_xlabel('Window radius (seconds)')
+    model_f1_comp.set_title('F1 score')
+    fig2.savefig(f"mages/f1.png")
diff --git a/src/scripts/predictors.py b/src/scripts/predictors.py
new file mode 100644
index 0000000000000000000000000000000000000000..f067d9458f5c4eeed7945746ef2c3d1a50f13dd3
--- /dev/null
+++ b/src/scripts/predictors.py
@@ -0,0 +1,396 @@
+import pickle
+from copy import copy
+import os
+
+from skais.learn.hmm_gmm_classifier import GMMHMMClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import GridSearchCV
+from sklearn.tree import DecisionTreeClassifier
+
+import numpy as np
+from skais.ais.ais_points import AISPoints
+from skais.utils.config import get_data_path
+from keras.models import load_model
+
+from src.models.transformer import Encoder, EncoderLayer, MultiHeadAttention
+from src.utils.features import compute_selected_features_on_trajectories
+
+from src.utils.experiment_tools import normalize_set_of_trajectories, convert_to_vectors, split_to_size_list, \
+    convert_to_vectors_not_one_hot
+from src.utils.utils import unison_shuffled_copies
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+def navstatus_preprocessing(_, trajectories, __, ___):
+    return trajectories
+
+
+def dt_raw_preprocessing(trajectories_train, trajectories_test, dataset, _):
+    features = ['sog', 'cog', 'heading', 'longitude', 'latitude']
+    fname = f"raw_convert_to_vectors_not_one_hot_{dataset}.pkl"
+    if os.path.isfile(fname):
+        x_train, y_train, x_test, y_test = pickle.load(open(fname, 'rb'))
+    else:
+        x_train, y_train = convert_to_vectors_not_one_hot(trajectories_train, features, tqdm_disable=True)
+        x_train = np.concatenate(x_train)
+        y_train = np.concatenate(y_train)
+
+        x_test, y_test = convert_to_vectors_not_one_hot(trajectories_test, features, tqdm_disable=True)
+        x_test = np.concatenate(x_test)
+        y_test = np.concatenate(y_test)
+        pickle.dump((x_train, y_train, x_test, y_test), open(fname, 'wb'))
+
+    tree_para = {'criterion': ['gini', 'entropy'],
+                 'max_depth': [1, 2, 5, 10, 20, 50]}
+    clf = GridSearchCV(DecisionTreeClassifier(class_weight='balanced', splitter="best"), tree_para, cv=5, verbose=0,
+                       n_jobs=-1)
+    clf.fit(x_train, y_train)
+
+    return x_train, y_train, x_test, y_test, clf.best_params_
+
+
+def dt_custom_preprocessing(trajectories_train, trajectories_test, dataset, time_window):
+    features = ['sog', 'drift', 'delta_sog', 'delta_cog', 'delta_heading', 'std_cog',
+                'std_hdg', 'std_delta_cog', 'std_delta_heading', 'std_pos', 'std_sog']
+
+    fname = f"custom_convert_to_vectors_not_one_hot_{dataset}_{time_window}.pkl"
+    if os.path.isfile(fname):
+        x_train, y_train, x_test, y_test = pickle.load(open(fname, 'rb'))
+    else:
+        trajectories_train = compute_selected_features_on_trajectories(trajectories_train, time_window, min_size=-1,
+                                                                       tqdm_disable=True)
+        trajectories_test = compute_selected_features_on_trajectories(trajectories_test, time_window, min_size=-1,
+                                                                      tqdm_disable=True)
+    
+        points = AISPoints.fuse(trajectories_train)
+        normalisation_factors = points.normalize(
+            standardization_features=['delta_sog', 'std_cog', 'std_hdg', 'std_delta_cog',
+                                      'std_delta_heading',
+                                      'std_pos', 'std_sog'],
+            third_quartile_features=['sog'],
+            divide_by_value=[('drift', 180), ('delta_cog', 180), ('delta_heading', 180),
+                             ('latitude', 90), ('longitude', 180)]
+        )
+        normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+        normalize_set_of_trajectories(trajectories_test, normalisation_factors)
+    
+        x_train, y_train = convert_to_vectors_not_one_hot(trajectories_train, features, tqdm_disable=True)
+        x_train = np.concatenate(x_train)
+        y_train = np.concatenate(y_train)
+    
+        x_test, y_test = convert_to_vectors_not_one_hot(trajectories_test, features, tqdm_disable=True)
+        x_test = np.concatenate(x_test)
+        y_test = np.concatenate(y_test)
+
+        pickle.dump((x_train, y_train, x_test, y_test), open(fname, 'wb'))
+
+    tree_para = {'criterion': ['gini', 'entropy'],
+                 'max_depth': [1, 2, 5, 10, 20, 50]}
+    clf = GridSearchCV(DecisionTreeClassifier(class_weight='balanced', splitter="best"), tree_para, cv=5, verbose=0,
+                       n_jobs=-1)
+    clf.fit(x_train, y_train)
+
+    return x_train, y_train, x_test, y_test, clf.best_params_
+
+
+def rf_raw_preprocessing(trajectories_train, trajectories_test, dataset, _):
+    features = ['sog', 'cog', 'heading', 'longitude', 'latitude']
+    fname = f"raw_convert_to_vectors_not_one_hot_{dataset}.pkl"
+    if os.path.isfile(fname):
+        x_train, y_train, x_test, y_test = pickle.load(open(fname, 'rb'))
+    else:
+        x_train, y_train = convert_to_vectors_not_one_hot(trajectories_train, features, tqdm_disable=True)
+        x_train = np.concatenate(x_train)
+        y_train = np.concatenate(y_train)
+    
+        x_test, y_test = convert_to_vectors_not_one_hot(trajectories_test, features, tqdm_disable=True)
+        x_test = np.concatenate(x_test)
+        y_test = np.concatenate(y_test)
+
+        pickle.dump((x_train, y_train, x_test, y_test), open(fname, 'wb'))
+
+    rf_para = {'n_estimators': [5, 10, 20, 50, 100],
+               'max_depth': [1, 2, 5, 10, 20, 50]}
+    clf = GridSearchCV(RandomForestClassifier(class_weight='balanced'), rf_para, cv=5, verbose=0,
+                       n_jobs=-1)
+    clf.fit(x_train, y_train)
+
+    return x_train, y_train, x_test, y_test, clf.best_params_
+
+
+def rf_custom_preprocessing(trajectories_train, trajectories_test, dataset, time_window):
+    features = ['sog', 'drift', 'delta_sog', 'delta_cog', 'delta_heading', 'std_cog',
+                'std_hdg', 'std_delta_cog', 'std_delta_heading', 'std_pos', 'std_sog']
+    fname = f"custom_convert_to_vectors_not_one_hot_{dataset}_{time_window}.pkl"
+    if os.path.isfile(fname):
+        x_train, y_train, x_test, y_test = pickle.load(open(fname, 'rb'))
+    else:
+        trajectories_train = compute_selected_features_on_trajectories(trajectories_train, time_window, min_size=-1,
+                                                                       tqdm_disable=True)
+        trajectories_test = compute_selected_features_on_trajectories(trajectories_test, time_window, min_size=-1,
+                                                                      tqdm_disable=True)
+    
+        points = AISPoints.fuse(trajectories_train)
+        normalisation_factors = points.normalize(
+            standardization_features=['delta_sog', 'std_cog', 'std_hdg', 'std_delta_cog',
+                                      'std_delta_heading',
+                                      'std_pos', 'std_sog'],
+            third_quartile_features=['sog'],
+            divide_by_value=[('drift', 180), ('delta_cog', 180), ('delta_heading', 180),
+                             ('latitude', 90), ('longitude', 180)]
+        )
+        normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+        normalize_set_of_trajectories(trajectories_test, normalisation_factors)
+    
+        x_train, y_train = convert_to_vectors_not_one_hot(trajectories_train, features, tqdm_disable=True)
+        x_train = np.concatenate(x_train)
+        y_train = np.concatenate(y_train)
+    
+        x_test, y_test = convert_to_vectors_not_one_hot(trajectories_test, features, tqdm_disable=True)
+        x_test = np.concatenate(x_test)
+        y_test = np.concatenate(y_test)
+
+        pickle.dump((x_train, y_train, x_test, y_test), open(fname, 'wb'))
+
+    rf_para = {'n_estimators': [5, 10, 20, 50, 100],
+               'max_depth': [1, 2, 5, 10, 20, 50]}
+    clf = GridSearchCV(RandomForestClassifier(class_weight='balanced'), rf_para, cv=5, verbose=0,
+                       n_jobs=-1)
+    clf.fit(x_train, y_train)
+
+    return x_train, y_train, x_test, y_test, clf.best_params_
+
+
+def sequence_raw_preprocessing(trajectories_train, trajectories_test, dataset, _):
+    fname = f"sequence_vector_{dataset}.pkl"
+    if os.path.isfile(fname):
+        x_raw, y_raw = pickle.load(open(fname, 'rb'))
+    else:
+        points = AISPoints.fuse(trajectories_train)
+        normalisation_factors = points.normalize(
+            third_quartile_features=['sog'],
+            divide_by_value=[('latitude', 90), ('longitude', 180), ('heading', 180), ('cog', 180)]
+        )
+
+        normalize_set_of_trajectories(trajectories_test, normalisation_factors)
+
+        features_raw = ['sog', 'cog', 'heading', 'longitude', 'latitude']
+        x_raw, y_raw = convert_to_vectors(trajectories_test, features_raw)
+        x_raw, y_raw = split_to_size_list(x_raw, y_raw, size=int(os.environ['TIME_SERIES_LENGTH']), value=float(os.environ['MASK_VALUE']))
+        pickle.dump((x_raw, y_raw), open(fname, 'wb'))
+    return x_raw, y_raw
+
+
+def hmm_custom_preprocessing(trajectories_train, trajectories_test, dataset, time_window):
+    features = ['sog', 'drift', 'delta_sog', 'delta_cog', 'delta_heading', 'std_cog',
+                'std_hdg', 'std_delta_cog', 'std_delta_heading', 'std_pos', 'std_sog']
+    fname = f"custom_convert_to_vectors_not_one_hot_not_concatenated_{dataset}_{time_window}.pkl"
+    if os.path.isfile(fname):
+        x_train, y_train, x_test, y_test = pickle.load(open(fname, 'rb'))
+    else:
+        trajectories_train = compute_selected_features_on_trajectories(trajectories_train, time_window, min_size=-1,
+                                                                       tqdm_disable=True)
+        trajectories_test = compute_selected_features_on_trajectories(trajectories_test, time_window,
+                                                                      min_size=-1, tqdm_disable=True)
+
+        points = AISPoints.fuse(trajectories_train)
+        normalisation_factors = points.normalize(
+            standardization_features=['delta_sog', 'std_cog', 'std_hdg', 'std_delta_cog',
+                                      'std_delta_heading',
+                                      'std_pos', 'std_sog'],
+            third_quartile_features=['sog'],
+            divide_by_value=[('drift', 180), ('delta_cog', 180), ('delta_heading', 180),
+                             ('latitude', 90), ('longitude', 180)]
+        )
+
+        normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+        normalize_set_of_trajectories(trajectories_test, normalisation_factors)
+
+        x_train, y_train = convert_to_vectors_not_one_hot(trajectories_train, features)
+        x_test, y_test = convert_to_vectors_not_one_hot(trajectories_test, features)
+        pickle.dump((x_train, y_train, x_test, y_test), open(fname, 'wb'))
+
+    return x_train, y_train, x_test, y_test
+
+
+def sequence_custom_preprocessing(trajectories_train, trajectories_test, dataset, time_window):
+    features = ['sog', 'drift', 'delta_sog', 'delta_cog', 'delta_heading', 'std_cog',
+                'std_hdg', 'std_delta_cog', 'std_delta_heading', 'std_pos', 'std_sog']
+    fname = f"custom_sequence_{dataset}_{time_window}.pkl"
+    if os.path.isfile(fname):
+        result = pickle.load(open(fname, 'rb'))
+    else:
+        trajectories_train = compute_selected_features_on_trajectories(trajectories_train, time_window, min_size=-1,
+                                                                       tqdm_disable=True)
+        trajectories_test = compute_selected_features_on_trajectories(trajectories_test, time_window,
+                                                                      min_size=-1, tqdm_disable=True)
+
+        points = AISPoints.fuse(trajectories_train)
+        normalisation_factors = points.normalize(
+            standardization_features=['delta_sog', 'std_cog', 'std_hdg', 'std_delta_cog',
+                                      'std_delta_heading',
+                                      'std_pos', 'std_sog'],
+            third_quartile_features=['sog'],
+            divide_by_value=[('drift', 180), ('delta_cog', 180), ('delta_heading', 180),
+                             ('latitude', 90), ('longitude', 180)]
+        )
+        normalize_set_of_trajectories(trajectories_test, normalisation_factors)
+
+        x, y = convert_to_vectors(trajectories_test, features)
+        result = split_to_size_list(x, y, size=int(os.environ['TIME_SERIES_LENGTH']), value=float(os.environ['MASK_VALUE']))
+        pickle.dump(result, open(fname, 'wb'))
+
+    return result
+
+
+def navstatus_trajectory_predictor(trajectories, run=0):
+    _ = run
+    X = [t.df[['navstatus']].to_numpy().astype(int) for t in trajectories]
+    x_true = []
+    for x in X:
+        new_x = copy(x)
+        new_x[x == 0] = 0
+        new_x[x == 1] = 2
+        new_x[x == 2] = 3
+        new_x[x == 3] = 4
+        new_x[x == 4] = 4
+        new_x[x == 5] = 1
+        new_x[x >= 6] = 4
+        x_true.append(new_x)
+
+    return np.concatenate(x_true)
+
+
+def lstm_custom_trajectory_predictor(data, run=0):
+    batch_size = int(os.environ['LSTM_BATCH_SIZE_INFERENCE'])
+
+    x, y = data
+
+    name = f"CUSTOM_{run}"
+    model = load_model(f"{os.environ['MODEL_DIR']}/lstm/{name}.hdf5")
+    predictions = model.predict(np.array(x), batch_size=batch_size, verbose=0)
+
+    M = np.ma.masked_equal(np.concatenate(x)[:, 0], float(os.environ['MASK_VALUE']))
+    return np.argmax(np.concatenate(predictions, axis=0), axis=1)[~M.mask.reshape(-1)]
+
+
+def tn_custom_trajectory_predictor(data, run=0):
+    batch_size = int(os.environ['TN_BATCH_SIZE_INFERENCE'])
+
+    x, y = data
+    
+    name = f"CUSTOM_{run}"
+    model = load_model(f"{os.environ['MODEL_DIR']}/tn/{name}.hdf5",
+        custom_objects={'Encoder': Encoder,
+                        'EncoderLayer': EncoderLayer,
+                        'MultiHeadAttention': MultiHeadAttention})
+    predictions = model.predict(np.array(x), batch_size=batch_size, verbose=0)
+
+    M = np.ma.masked_equal(np.concatenate(x)[:, 0], float(os.environ['MASK_VALUE']))
+    return np.argmax(np.concatenate(predictions, axis=0), axis=1)[~M.mask.reshape(-1)]
+
+
+def lstm_raw_trajectory_predictor(data, run=0):
+    batch_size = int(os.environ['LSTM_BATCH_SIZE_INFERENCE'])
+
+    x, y = data
+
+    name = f"5_{run}"
+    model = load_model(f"{os.environ['MODEL_DIR']}/lstm/{name}.hdf5")
+    predictions = model.predict(np.array(x), batch_size=batch_size, verbose=0)
+
+    M = np.ma.masked_equal(np.concatenate(x)[:, 0], float(os.environ['MASK_VALUE']))
+    return np.argmax(np.concatenate(predictions, axis=0), axis=1)[~M.mask.reshape(-1)]
+
+
+def tn_raw_trajectory_predictor(data, run=0):
+    batch_size = int(os.environ['TN_BATCH_SIZE_INFERENCE'])
+
+    x, y = data
+
+    name = f"5_{run}"
+    model = load_model(f"{os.environ['MODEL_DIR']}/tn/{name}.hdf5",
+        custom_objects={'Encoder': Encoder,
+                        'EncoderLayer': EncoderLayer,
+                        'MultiHeadAttention': MultiHeadAttention})
+    predictions = model.predict(np.array(x), batch_size=batch_size, verbose=0)
+
+    M = np.ma.masked_equal(np.concatenate(x)[:, 0], float(os.environ['MASK_VALUE']))
+    return np.argmax(np.concatenate(predictions, axis=0), axis=1)[~M.mask.reshape(-1)]
+
+
+def lstm_raw_trajectory_predictor_augmented(data, run=0):
+    batch_size = int(os.environ['LSTM_BATCH_SIZE_INFERENCE'])
+
+    x, y = data
+
+    name = f"AUGMENTED_{run}"
+    model = load_model(f"{os.environ['MODEL_DIR']}/lstm/{name}.hdf5", compile=False)
+    optimizer = "Adam"
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=optimizer,
+                  metrics=['accuracy'])
+    predictions = model.predict(np.array(x), batch_size=batch_size, verbose=0)
+
+    M = np.ma.masked_equal(np.concatenate(x)[:, 0], float(os.environ['MASK_VALUE']))
+    return np.argmax(np.concatenate(predictions, axis=0), axis=1)[~M.mask.reshape(-1)]
+
+
+def tn_raw_trajectory_predictor_augmented(data, run=0):
+    batch_size = int(os.environ['TN_BATCH_SIZE_INFERENCE'])
+
+    x, y = data
+
+    name = f"AUGMENTED_{run}"
+    model = load_model(f"{os.environ['MODEL_DIR']}/tn/{name}.hdf5",
+        custom_objects={'Encoder': Encoder,
+                        'EncoderLayer': EncoderLayer}, compile=False)
+    optimizer = "Adam"
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=optimizer,
+                  metrics=['accuracy'])
+    predictions = model.predict(np.array(x), batch_size=batch_size, verbose=0)
+
+    M = np.ma.masked_equal(np.concatenate(x)[:, 0], float(os.environ['MASK_VALUE']))
+    return np.argmax(np.concatenate(predictions, axis=0), axis=1)[~M.mask.reshape(-1)]
+
+
+def dt_predictor(data, run=0):
+    _ = run
+    x_train, y_train, x_test, y_test, best_params = data
+    x_train, y_train = unison_shuffled_copies(x_train, y_train)
+    dt = DecisionTreeClassifier(max_depth=best_params['max_depth'], criterion=best_params['criterion'],
+                                class_weight='balanced',
+                                splitter="best")
+    dt.fit(x_train, y_train)
+
+    return dt.predict(x_test)
+
+
+def rf_predictor(data, run=0):
+    _ = run
+    x_train, y_train, x_test, y_test, best_params = data
+    x_train, y_train = unison_shuffled_copies(x_train, y_train)
+    rf = RandomForestClassifier(n_estimators=best_params['n_estimators'], max_depth=best_params['max_depth'], n_jobs=-1)
+    rf.fit(x_train, y_train)
+
+    return rf.predict(x_test)
+
+
+def hmm_raw_predictor(data, run=0):
+    x_train, y_train, x_test, y_test = data
+
+    name = f"5_{run}"
+    model = pickle.load(open(f"{os.environ['MODEL_DIR']}/hmm/{name}.pkl", 'rb'))
+    predictions = model.predict(x_test)
+
+    return np.array(predictions)
+
+def hmm_custom_predictor(data, run=0):
+    x_train, y_train, x_test, y_test = data
+
+    name = f"CUSTOM_{run}"
+    model = pickle.load(open(f"{os.environ['MODEL_DIR']}/hmm/{name}.pkl", 'rb'))
+    predictions = model.predict(x_test)
+
+    return np.array(predictions)
\ No newline at end of file
diff --git a/src/scripts/train_HMM/train_custom_features.py b/src/scripts/train_HMM/train_custom_features.py
new file mode 100644
index 0000000000000000000000000000000000000000..15212209704460c7a547ab161ab06beb0ee9abde
--- /dev/null
+++ b/src/scripts/train_HMM/train_custom_features.py
@@ -0,0 +1,40 @@
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+
+from src.models.hmm import train_HMM
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import normalize_set_of_trajectories, compute_features_on_trajectories_interpolated, \
+    convert_to_vectors_not_one_hot
+from src.utils.features_list import FEATURES_LIST
+
+if __name__ == '__main__':
+    run = int(sys.argv[1])
+    load_dotenv()
+    Path(f"{os.environ['MODEL_DIR']}/hmm/").mkdir(parents=True, exist_ok=True)
+
+    n_states = [int(i) for i in os.environ["HMM_CUSTOM_NUMBER_OF_STATES"].split(" ")]
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], [int(i) for i in os.environ["NE_TRAIN_INDICES"].split(" ")])
+
+    compute_features_on_trajectories_interpolated(trajectories_train, int(os.environ["HMM_CUSTOM_TIME_WINDOW"]), tqdm_position=1)
+
+    points = AISPoints.fuse(trajectories_train)
+    normalisation_factors = points.normalize(
+        standardization_features=['delta_sog', 'std_cog', 'std_hdg', 'std_delta_cog',
+                                  'std_delta_heading',
+                                  'std_pos', 'std_sog'],
+        third_quartile_features=['sog'],
+        divide_by_value=[('drift', 180), ('delta_cog', 180), ('delta_heading', 180),
+                         ('latitude', 90), ('longitude', 180)]
+    )
+    normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+
+    x_train, y_train = convert_to_vectors_not_one_hot(trajectories_train, FEATURES_LIST)
+
+    name = f"CUSTOM_{run}"
+
+    train_HMM(x_train, y_train, n_states, name)
\ No newline at end of file
diff --git a/src/scripts/train_HMM/train_raw.py b/src/scripts/train_HMM/train_raw.py
new file mode 100644
index 0000000000000000000000000000000000000000..c84770f615a86fad649e40c52734df5d2655e7ac
--- /dev/null
+++ b/src/scripts/train_HMM/train_raw.py
@@ -0,0 +1,49 @@
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+
+from src.models.hmm import train_HMM
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import normalize_set_of_trajectories, convert_to_vectors_not_one_hot
+from src.utils.features_list import FEATURES_COMBINATIONS
+
+if __name__ == '__main__':
+    config = int(sys.argv[1])
+    run = int(sys.argv[2])
+    load_dotenv()
+    Path(f"{os.environ['MODEL_DIR']}/hmm/").mkdir(parents=True, exist_ok=True)
+
+    if config == 0:
+        n_states = [int(i) for i in os.environ["HMM_SOG_NUMBER_OF_STATES"].split(" ")]
+    elif config == 1:
+        n_states = [int(i) for i in os.environ["HMM_COG_NUMBER_OF_STATES"].split(" ")]
+    elif config == 2:
+        n_states = [int(i) for i in os.environ["HMM_HEADING_NUMBER_OF_STATES"].split(" ")]
+    elif config == 3:
+        n_states = [int(i) for i in os.environ["HMM_POSITION_NUMBER_OF_STATES"].split(" ")]
+    elif config == 4:
+        n_states = [int(i) for i in os.environ["HMM_RAW_NO_SOG_NUMBER_OF_STATES"].split(" ")]
+    elif config == 5:
+        n_states = [int(i) for i in os.environ["HMM_RAW_NUMBER_OF_STATES"].split(" ")]
+    else:
+        print("Config not recognized")
+        exit(1)
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], [int(i) for i in os.environ["NE_TRAIN_INDICES"].split(" ")])
+
+    points = AISPoints.fuse(trajectories_train)
+    normalisation_factors = points.normalize(
+        third_quartile_features=['sog'],
+        divide_by_value=[('latitude', 90), ('longitude', 180), ('heading', 180), ('cog', 180)]
+    )
+
+    normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+
+    x_train, y_train = convert_to_vectors_not_one_hot(trajectories_train, FEATURES_COMBINATIONS[config])
+
+    name = f"{config}_{run}"
+
+    train_HMM(x_train, y_train, n_states, name)
\ No newline at end of file
diff --git a/src/scripts/train_HMM/~/data/ocean_engineering_2022_code/models/hmm/0_0.pkl b/src/scripts/train_HMM/~/data/ocean_engineering_2022_code/models/hmm/0_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..90b07f4801bb56637d538814dc186478b44ca19f
Binary files /dev/null and b/src/scripts/train_HMM/~/data/ocean_engineering_2022_code/models/hmm/0_0.pkl differ
diff --git a/src/scripts/train_HMM/~/data/ocean_engineering_2022_code/models/hmm/CUSTOM_0.pkl b/src/scripts/train_HMM/~/data/ocean_engineering_2022_code/models/hmm/CUSTOM_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f0abf9b907e8cb05241ab0a4869daa65720cee01
Binary files /dev/null and b/src/scripts/train_HMM/~/data/ocean_engineering_2022_code/models/hmm/CUSTOM_0.pkl differ
diff --git a/src/scripts/train_all.sh b/src/scripts/train_all.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b32e6526b706de2feb4487204627ccab39047498
--- /dev/null
+++ b/src/scripts/train_all.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+export PYTHONPATH="${PYTHONPATH}:../../"
+
+for i in {0..9}
+do
+  for j in {0..5}
+  do
+    python3 train_HMM/train_raw.py "$j" "$i"
+    python3 train_lstm/train_raw.py "$j" "$i"
+    python3 train_transformer/train_raw.py "$j" "$i"
+  done
+
+  python3 train_HMM/train_custom_features.py "$i"
+  
+  python3 train_lstm/train_augmented.py "$i"
+  python3 train_lstm/train_custom_features.py "$i"
+  
+  python3 train_transformer/train_augmented.py "$i"
+  python3 train_transformer/train_custom_features.py "$i"
+done
+
diff --git a/src/scripts/train_lstm/train_augmented.py b/src/scripts/train_lstm/train_augmented.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f1bc77510acb4a2d9475116ca2b0babff2cd8aa
--- /dev/null
+++ b/src/scripts/train_lstm/train_augmented.py
@@ -0,0 +1,36 @@
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+
+from src.models.lstm import create_lstm
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import normalize_set_of_trajectories, convert_to_vectors, split_to_size_list
+from src.utils.utils import shift
+
+if __name__ == '__main__':
+    run = int(sys.argv[1])
+    load_dotenv()
+    Path(f"{os.environ['MODEL_DIR']}/lstm/").mkdir(parents=True, exist_ok=True)
+    number_of_layers = int(os.environ["LSTM_RAW_NUMBER_OF_LAYERS"])
+    number_of_units = int(os.environ["LSTM_RAW_NUMBER_OF_UNITS"])
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], [int(i) for i in os.environ["NE_TRAIN_INDICES"].split(" ")])
+
+    points = AISPoints.fuse(trajectories_train)
+    normalisation_factors = points.normalize(
+        third_quartile_features=['sog'],
+        divide_by_value=[('latitude', 90), ('longitude', 180), ('heading', 180), ('cog', 180)]
+    )
+    features = ['sog', 'cog', 'heading', 'longitude', 'latitude']
+    normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+
+    shifted = shift(trajectories_train, int(os.environ["LSTM_AUGMENTED_AUGMENTATION_FACTOR"]))
+    x_train, y_train = convert_to_vectors(shifted, features)
+    x_train, y_train = split_to_size_list(x_train, y_train, size=int(os.environ["TIME_SERIES_LENGTH"]), value=float(os.environ["MASK_VALUE"]))
+
+    name = f"AUGMENTED_{run}"
+
+    create_lstm(x_train, y_train, features, number_of_layers, number_of_units, run)
\ No newline at end of file
diff --git a/src/scripts/train_lstm/train_custom_features.py b/src/scripts/train_lstm/train_custom_features.py
new file mode 100644
index 0000000000000000000000000000000000000000..84f2a4270619c22c14f156e518f39aa9003cc510
--- /dev/null
+++ b/src/scripts/train_lstm/train_custom_features.py
@@ -0,0 +1,42 @@
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+
+from src.models.lstm import create_lstm
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import normalize_set_of_trajectories, convert_to_vectors, split_to_size_list, \
+    compute_features_on_trajectories_interpolated
+from src.utils.features_list import FEATURES_LIST
+
+if __name__ == '__main__':
+    run = int(sys.argv[1])
+    load_dotenv()
+    Path(f"{os.environ['MODEL_DIR']}/lstm/").mkdir(parents=True, exist_ok=True)
+
+    number_of_layers = int(os.environ["LSTM_CUSTOM_NUMBER_OF_LAYERS"])
+    number_of_units = int(os.environ["LSTM_CUSTOM_NUMBER_OF_UNITS"])
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], [int(i) for i in os.environ["NE_TRAIN_INDICES"].split(" ")])
+
+    compute_features_on_trajectories_interpolated(trajectories_train, int(os.environ["LSTM_CUSTOM_TIME_WINDOW"]), tqdm_position=1)
+
+    points = AISPoints.fuse(trajectories_train)
+    normalisation_factors = points.normalize(
+        standardization_features=['delta_sog', 'std_cog', 'std_hdg', 'std_delta_cog',
+                                  'std_delta_heading',
+                                  'std_pos', 'std_sog'],
+        third_quartile_features=['sog'],
+        divide_by_value=[('drift', 180), ('delta_cog', 180), ('delta_heading', 180),
+                         ('latitude', 90), ('longitude', 180)]
+    )
+    normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+
+    x_train, y_train = convert_to_vectors(trajectories_train, FEATURES_LIST)
+    x_train, y_train = split_to_size_list(x_train, y_train, size=int(os.environ["TIME_SERIES_LENGTH"]), value=float(os.environ["MASK_VALUE"]))
+
+    name = f"CUSTOM_{run}"
+
+    create_lstm(x_train, y_train, FEATURES_LIST, number_of_layers, number_of_units, name)
\ No newline at end of file
diff --git a/src/scripts/train_lstm/train_raw.py b/src/scripts/train_lstm/train_raw.py
new file mode 100644
index 0000000000000000000000000000000000000000..37a47ecaf868fad33cbd6f6f530d29b23ea38664
--- /dev/null
+++ b/src/scripts/train_lstm/train_raw.py
@@ -0,0 +1,57 @@
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+
+from src.models.lstm import create_lstm
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import normalize_set_of_trajectories, convert_to_vectors, split_to_size_list
+from src.utils.features_list import FEATURES_COMBINATIONS
+
+if __name__ == '__main__':
+    config = int(sys.argv[1])
+    run = int(sys.argv[2])
+    load_dotenv()
+    Path(f"{os.environ['MODEL_DIR']}/lstm/").mkdir(parents=True, exist_ok=True)
+
+    if config == 0:
+        number_of_layers = int(os.environ["LSTM_SOG_NUMBER_OF_LAYERS"])
+        number_of_units = int(os.environ["LSTM_SOG_NUMBER_OF_UNITS"])
+    elif config == 1:
+        number_of_layers = int(os.environ["LSTM_COG_NUMBER_OF_LAYERS"])
+        number_of_units = int(os.environ["LSTM_COG_NUMBER_OF_UNITS"])
+    elif config == 2:
+        number_of_layers = int(os.environ["LSTM_HEADING_NUMBER_OF_LAYERS"])
+        number_of_units = int(os.environ["LSTM_HEADING_NUMBER_OF_UNITS"])
+    elif config == 3:
+        number_of_layers = int(os.environ["LSTM_POSITION_NUMBER_OF_LAYERS"])
+        number_of_units = int(os.environ["LSTM_POSITION_NUMBER_OF_UNITS"])
+    elif config == 4:
+        number_of_layers = int(os.environ["LSTM_RAW_NO_SOG_NUMBER_OF_LAYERS"])
+        number_of_units = int(os.environ["LSTM_RAW_NO_SOG_NUMBER_OF_UNITS"])
+    elif config == 5:
+        number_of_layers = int(os.environ["LSTM_RAW_NUMBER_OF_LAYERS"])
+        number_of_units = int(os.environ["LSTM_RAW_NUMBER_OF_UNITS"])
+    else:
+        print("Config not recognized")
+        exit(1)
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], [int(i) for i in os.environ["NE_TRAIN_INDICES"].split(" ")])
+
+    points = AISPoints.fuse(trajectories_train)
+    normalisation_factors = points.normalize(
+        third_quartile_features=['sog'],
+        divide_by_value=[('latitude', 90), ('longitude', 180), ('heading', 180), ('cog', 180)]
+    )
+
+    normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+
+    x_train, y_train = convert_to_vectors(trajectories_train, FEATURES_COMBINATIONS[config])
+    x_train, y_train = split_to_size_list(x_train, y_train, size=int(os.environ["TIME_SERIES_LENGTH"]), value=float(os.environ["MASK_VALUE"]))
+
+    features = FEATURES_COMBINATIONS[config]
+    name = f"{config}_{run}"
+
+    create_lstm(x_train, y_train, features, number_of_layers, number_of_units, run)
\ No newline at end of file
diff --git a/src/scripts/train_lstm/~/data/ocean_engineering_2022_code/models/lstm/0_0_5_10.hdf5 b/src/scripts/train_lstm/~/data/ocean_engineering_2022_code/models/lstm/0_0_5_10.hdf5
new file mode 100644
index 0000000000000000000000000000000000000000..a15baeaedb867603939920d2fb6d17486cd2d01e
Binary files /dev/null and b/src/scripts/train_lstm/~/data/ocean_engineering_2022_code/models/lstm/0_0_5_10.hdf5 differ
diff --git a/src/scripts/train_lstm/~/data/ocean_engineering_2022_code/models/lstm/CUSTOM_0.hdf5 b/src/scripts/train_lstm/~/data/ocean_engineering_2022_code/models/lstm/CUSTOM_0.hdf5
new file mode 100644
index 0000000000000000000000000000000000000000..99e1a7c48c5812323a23beeefe726242e8e53888
Binary files /dev/null and b/src/scripts/train_lstm/~/data/ocean_engineering_2022_code/models/lstm/CUSTOM_0.hdf5 differ
diff --git a/src/scripts/train_transformer/train_augmented.py b/src/scripts/train_transformer/train_augmented.py
new file mode 100644
index 0000000000000000000000000000000000000000..eeb8083da460b537def9c055a809283382a86197
--- /dev/null
+++ b/src/scripts/train_transformer/train_augmented.py
@@ -0,0 +1,40 @@
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+
+from src.models.transformer import create_tn
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import normalize_set_of_trajectories, convert_to_vectors, split_to_size_list
+from src.utils.utils import shift
+
+if __name__ == '__main__':
+    run = int(sys.argv[1])
+    load_dotenv()
+    Path(f"{os.environ['MODEL_DIR']}/tn/").mkdir(parents=True, exist_ok=True)
+
+    number_of_layers = int(os.environ["TN_RAW_NUMBER_OF_LAYERS"])
+    d_model = int(os.environ["TN_RAW_D_MODEL"])
+    dff = int(os.environ["TN_RAW_DFF"])
+    number_of_heads = int(os.environ["TN_RAW_NUMBER_OF_HEADS"])
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], [int(i) for i in os.environ["NE_TRAIN_INDICES"].split(" ")])
+
+    points = AISPoints.fuse(trajectories_train)
+    normalisation_factors = points.normalize(
+        third_quartile_features=['sog'],
+        divide_by_value=[('latitude', 90), ('longitude', 180), ('heading', 180), ('cog', 180)]
+    )
+    features = ['sog', 'cog', 'heading', 'longitude', 'latitude']
+    normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+
+    shifted = shift(trajectories_train, int(os.environ["TN_AUGMENTED_AUGMENTATION_FACTOR"]))
+    x_train, y_train = convert_to_vectors(shifted, features)
+    x_train, y_train = split_to_size_list(x_train, y_train, size=int(os.environ["TIME_SERIES_LENGTH"]), value=float(os.environ["MASK_VALUE"]))
+
+
+    name = f"AUGMENTED_{run}"
+
+    create_tn(x_train, y_train, features, number_of_layers, d_model, dff, number_of_heads, name)
\ No newline at end of file
diff --git a/src/scripts/train_transformer/train_custom_features.py b/src/scripts/train_transformer/train_custom_features.py
new file mode 100644
index 0000000000000000000000000000000000000000..75233521c2abd38c0a7874fd080f4b2ae6751167
--- /dev/null
+++ b/src/scripts/train_transformer/train_custom_features.py
@@ -0,0 +1,44 @@
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+
+from src.models.transformer import create_tn
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import normalize_set_of_trajectories, convert_to_vectors, split_to_size_list, \
+    compute_features_on_trajectories_interpolated
+from src.utils.features_list import FEATURES_LIST
+
+if __name__ == '__main__':
+    run = int(sys.argv[1])
+    load_dotenv()
+    Path(f"{os.environ['MODEL_DIR']}/tn/").mkdir(parents=True, exist_ok=True)
+
+    number_of_layers = int(os.environ["TN_CUSTOM_NUMBER_OF_LAYERS"])
+    d_model = int(os.environ["TN_CUSTOM_D_MODEL"])
+    dff = int(os.environ["TN_CUSTOM_DFF"])
+    number_of_heads = int(os.environ["TN_CUSTOM_NUMBER_OF_HEADS"])
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], [int(i) for i in os.environ["NE_TRAIN_INDICES"].split(" ")])
+
+    compute_features_on_trajectories_interpolated(trajectories_train, int(os.environ["TN_CUSTOM_TIME_WINDOW"]), tqdm_position=1)
+
+    points = AISPoints.fuse(trajectories_train)
+    normalisation_factors = points.normalize(
+        standardization_features=['delta_sog', 'std_cog', 'std_hdg', 'std_delta_cog',
+                                  'std_delta_heading',
+                                  'std_pos', 'std_sog'],
+        third_quartile_features=['sog'],
+        divide_by_value=[('drift', 180), ('delta_cog', 180), ('delta_heading', 180),
+                         ('latitude', 90), ('longitude', 180)]
+    )
+    normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+
+    x_train, y_train = convert_to_vectors(trajectories_train, FEATURES_LIST)
+    x_train, y_train = split_to_size_list(x_train, y_train, size=int(os.environ["TIME_SERIES_LENGTH"]), value=float(os.environ["MASK_VALUE"]))
+
+    name = f"CUSTOM_{run}"
+
+    create_tn(x_train, y_train, FEATURES_LIST, number_of_layers, d_model, dff, number_of_heads, name)
\ No newline at end of file
diff --git a/src/scripts/train_transformer/train_raw.py b/src/scripts/train_transformer/train_raw.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbd26bc7150ab538314dccf0298450430ec363df
--- /dev/null
+++ b/src/scripts/train_transformer/train_raw.py
@@ -0,0 +1,69 @@
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+from skais.ais.ais_points import AISPoints
+
+from src.models.transformer import create_tn
+from src.utils.data_import import load_dataset_from_csv
+from src.utils.experiment_tools import normalize_set_of_trajectories, convert_to_vectors, split_to_size_list
+from src.utils.features_list import FEATURES_COMBINATIONS
+
+if __name__ == '__main__':
+    config = int(sys.argv[1])
+    run = int(sys.argv[2])
+    load_dotenv()
+    Path(f"{os.environ['MODEL_DIR']}/tn/").mkdir(parents=True, exist_ok=True)
+
+    if config == 0:
+        number_of_layers = int(os.environ["TN_SOG_NUMBER_OF_LAYERS"])
+        d_model = int(os.environ["TN_SOG_D_MODEL"])
+        dff = int(os.environ["TN_SOG_DFF"])
+        number_of_heads = int(os.environ["TN_SOG_NUMBER_OF_HEADS"])
+    elif config == 1:
+        number_of_layers = int(os.environ["TN_COG_NUMBER_OF_LAYERS"])
+        d_model = int(os.environ["TN_COG_D_MODEL"])
+        dff = int(os.environ["TN_COG_DFF"])
+        number_of_heads = int(os.environ["TN_COG_NUMBER_OF_HEADS"])
+    elif config == 2:
+        number_of_layers = int(os.environ["TN_HEADING_NUMBER_OF_LAYERS"])
+        d_model = int(os.environ["TN_HEADING_D_MODEL"])
+        dff = int(os.environ["TN_HEADING_DFF"])
+        number_of_heads = int(os.environ["TN_HEADING_NUMBER_OF_HEADS"])
+    elif config == 3:
+        number_of_layers = int(os.environ["TN_POSITION_NUMBER_OF_LAYERS"])
+        d_model = int(os.environ["TN_POSITION_D_MODEL"])
+        dff = int(os.environ["TN_POSITION_DFF"])
+        number_of_heads = int(os.environ["TN_POSITION_NUMBER_OF_HEADS"])
+    elif config == 4:
+        number_of_layers = int(os.environ["TN_RAW_NO_SOG_NUMBER_OF_LAYERS"])
+        d_model = int(os.environ["TN_RAW_NO_SOG_D_MODEL"])
+        dff = int(os.environ["TN_RAW_NO_SOG_DFF"])
+        number_of_heads = int(os.environ["TN_RAW_NO_SOG_NUMBER_OF_HEADS"])
+    elif config == 5:
+        number_of_layers = int(os.environ["TN_RAW_NUMBER_OF_LAYERS"])
+        d_model = int(os.environ["TN_RAW_D_MODEL"])
+        dff = int(os.environ["TN_RAW_DFF"])
+        number_of_heads = int(os.environ["TN_RAW_NUMBER_OF_HEADS"])
+    else:
+        print("Config not recognized")
+        exit(1)
+
+    trajectories_train = load_dataset_from_csv(os.environ["NE_DATA"], [int(i) for i in os.environ["NE_TRAIN_INDICES"].split(" ")])
+
+    points = AISPoints.fuse(trajectories_train)
+    normalisation_factors = points.normalize(
+        third_quartile_features=['sog'],
+        divide_by_value=[('latitude', 90), ('longitude', 180), ('heading', 180), ('cog', 180)]
+    )
+
+    normalize_set_of_trajectories(trajectories_train, normalisation_factors)
+
+    x_train, y_train = convert_to_vectors(trajectories_train, FEATURES_COMBINATIONS[config])
+    x_train, y_train = split_to_size_list(x_train, y_train, size=int(os.environ["TIME_SERIES_LENGTH"]), value=float(os.environ["MASK_VALUE"]))
+
+    features = FEATURES_COMBINATIONS[config]
+    name = f"{config}_{run}"
+
+    create_tn(x_train, y_train, features, number_of_layers, d_model, dff, number_of_heads, name)
\ No newline at end of file
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/utils/data_import.py b/src/utils/data_import.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bc2ac59a12cb914a407922e545b34d3e495fd87
--- /dev/null
+++ b/src/utils/data_import.py
@@ -0,0 +1,22 @@
+import pandas as pd
+from skais.ais.ais_points import AISPoints
+from skais.process.ais_operations import get_trajectories
+
+from src.utils.experiment_tools import clean_and_interpolate_trajectories
+
+
+def load_dataset_from_csv(csv_path, indices=None):
+    df = pd.read_csv(csv_path)
+    df = df[df.label != -1]
+
+    if not indices:
+        mmsis = df.mmsi.unique()
+        for mmsi in [mmsis[i] for i in indices]:
+            df = df[df.mmsi != mmsi]
+
+    points = AISPoints(df)
+    points.remove_outliers(['sog'])
+    points.df['ts_sec'] = points.df['ts'].astype('int64') // 1e9
+    trajectories = get_trajectories(points)
+
+    return clean_and_interpolate_trajectories(trajectories, 60, 300, cut_off=50)
\ No newline at end of file
diff --git a/src/utils/experiment_tools.py b/src/utils/experiment_tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..5434508d5485022546bd341c5b7cbbcdf3eab572
--- /dev/null
+++ b/src/utils/experiment_tools.py
@@ -0,0 +1,287 @@
+import numpy as np
+import tqdm as tqdm
+from skais.ais.ais_points import AISPoints
+
+from src.utils.features import compute_selected_features
+
+
+def clean_and_interpolate_trajectories(trajectories, interpolation_time=60, time_gap=3600, cut_off=-1):
+    trajectories_split = []
+    for trajectory in tqdm.tqdm(trajectories, desc='splitting trajectories', position=0):
+        trajectories_split += trajectory.split_trajectory(time_gap=time_gap, interpolation=interpolation_time)
+
+    result = []
+    for trajectory in tqdm.tqdm(trajectories_split, desc='cleaning trajectories', position=0):
+        if len(trajectory.df.index) > cut_off:
+            trajectory.remove_outliers(['sog'])
+            result.append(trajectory)
+
+    return result
+
+
+def match_prediction_to_trajectories(trajectories, prediction, name='prediction', tqdm_disable=True, tqdm_position=0, tqdm_colour='magenta'):
+    index = 0
+    for trajectory in tqdm.tqdm(trajectories, disable=tqdm_disable, colour=tqdm_colour, position=tqdm_position, leave=False):
+        trajectory.df[name] = prediction[index:index + len(trajectory.df.index)]
+        index += len(trajectory.df.index)
+
+    return trajectories
+
+def get_normalisation_factors(trajectories):
+    points = AISPoints.fuse(trajectories)
+    return points.normalize(
+        third_quartile_features=['sog'],
+        divide_by_value=[('latitude', 90), ('longitude', 180), ('heading', 180), ('cog', 180)]
+    )
+
+
+def compute_features_on_trajectories_interpolated(trajectories, window_size, tqdm_position=0):
+    trajectories_with_computed_features = []
+    for trajectory in tqdm.tqdm(trajectories, leave=False, position=tqdm_position):
+        if len(trajectory.df.index) > 12:
+            compute_selected_features(trajectory, window_size)
+            trajectories_with_computed_features.append(trajectory)
+    return trajectories_with_computed_features
+#
+#
+# def make_feature_vectors(trajectories, features=None,
+#                          label_field='label', nb_classes=2, sliding_window_gap=10, length_list=15, verbose=0):
+#     if features is None:
+#         features = ['rot', 'sog', 'd_sog', 'd_cog', 'd_heading', 'd_rot']
+#     x = []
+#     y = []
+#     features_trajectories = []
+#     label_trajectories = []
+#     zero = [0 for _ in range(nb_classes)]
+#
+#     iterator = trajectories
+#     if verbose > 0:
+#         iterator = tqdm.tqdm(trajectories)
+#     for trajectory in iterator:
+#         if len(trajectory.df.index) > length_list:
+#             trajectory.df['ts'] = trajectory.df.index
+#             windows = trajectory.sliding_window(length_list, offset=sliding_window_gap,
+#                                                 fields=features + [label_field])
+#
+#             features_trajectories.append(trajectory.to_numpy(fields=features))
+#             label_trajectories.append(trajectory.to_numpy(fields=[label_field]).astype(int))
+#
+#             if length_list > 0:
+#                 for window in windows:
+#                     label = zero.copy()
+#                     label[int(window[length_list // 2, -1])] = 1
+#                     x.append(window[:, :-1])
+#                     y.append(label)
+#
+#     return features_trajectories, label_trajectories, x, y
+#
+#
+def normalize_set_of_trajectories(trajectories, dictionary):
+    for t in trajectories:
+        t.normalize(normalization_dict=dictionary)
+#
+#
+# def split_trajectory_in_window(trajectory, size=10, offset=1):
+#     trajectories = []
+#     for i in range(0, len(trajectory.df.index) - size, offset):
+#         df = trajectory.df.iloc[i:i + size]
+#         trajectories.append(AISTrajectory(df))
+#     return trajectories
+#
+# def quick_split_trajectory_in_window(trajectory, size=10, offset=1, features=None):
+#     trajectories = []
+#
+#     if features is None:
+#         x = trajectory.df[['latitude', 'longitude']].to_numpy()
+#     else:
+#         x = trajectory.df[['latitude', 'longitude'] + features].to_numpy()
+#
+#     for i in range(0, len(trajectory.df.index) - size, offset):
+#         v = x[i:i + size]
+#         trajectories.append(v)
+#     return trajectories
+#
+# def split_trajectory_in_time_window(trajectory, time_window=10, offset=1):
+#     trajectories = []
+#     label = []
+#     min_ts = trajectory.df.ts_sec.min()
+#     for i in range(0, len(trajectory.df.index), offset):
+#         time = trajectory.df.iloc[i].ts_sec
+#         if time - time_window >= min_ts:
+#             df = trajectory.df[
+#                 (trajectory.df['ts_sec'] > time - time_window) & (trajectory.df['ts_sec'] < time + time_window)]
+#             trajectories.append(AISTrajectory(df))
+#             label.append(trajectory.df.iloc[i].label)
+#     return trajectories, label
+#
+#
+# def split_trajectory_in_window_single_label(trajectory, size=10, offset=1):
+#     trajectories = []
+#     labels = []
+#     for i in range(0, len(trajectory.df.index) - size, offset):
+#         df = trajectory.df.iloc[i:i + size]
+#         if len(df['label'].unique()) == 1:
+#             trajectories.append(AISTrajectory(df))
+#             labels.append(df['label'].iloc[0])
+#     return trajectories, labels
+#
+#
+# def split_in_time_windows(x, y, size=10, offset=1, label_position=5):
+#     assert (len(x) == len(y))
+#     assert offset > 0
+#
+#     x_windows = []
+#     y_windows = []
+#     for tx, ty in zip(x, y):
+#         assert len(tx) == len(ty)
+#         index = 0
+#         while index + size < len(tx):
+#             x_windows.append(tx[index:index + size])
+#             y_windows.append(ty[index + label_position])
+#             index += offset
+#
+#     return x_windows, y_windows
+#
+# def all_equal(iterator):
+#     iterator = iter(iterator)
+#     try:
+#         first = next(iterator)
+#     except StopIteration:
+#         return True
+#     return all(first == x for x in iterator)
+#
+# def split_in_time_windows_single_label(x, y, size=10, offset=1):
+#     assert (len(x) == len(y))
+#     assert offset > 0
+#
+#     x_windows = []
+#     y_windows = []
+#     for tx, ty in zip(x, y):
+#         assert len(tx) == len(ty)
+#         index = 0
+#         while index + size < len(tx):
+#             if all_equal(ty[index:index + size]):
+#                 x_windows.append(tx[index:index + size])
+#                 y_windows.append(ty[index])
+#                 index += offset
+#             else:
+#                 index += 1
+#
+#     return x_windows, y_windows
+#
+def convert_to_vectors(trajectories, features):
+    X = []
+    Y = []
+    for trajectory in trajectories:
+        X.append(trajectory.df[features].to_numpy())
+        y = trajectory.df['label'].to_numpy()
+        np.zeros((y.size, 4))
+        y_one_hot = np.zeros((y.size, 4))
+        y_one_hot[np.arange(y.size), y] = 1
+        Y.append(y_one_hot)
+    return X, Y
+
+
+def convert_to_vectors_not_one_hot(trajectories, features, tqdm_disable=False, tqdm_position=1, tqdm_leave=False):
+    X = []
+    Y = []
+    for trajectory in tqdm.tqdm(trajectories, position=tqdm_position, leave=tqdm_leave, disable=tqdm_disable):
+        X.append(trajectory.df[features].to_numpy())
+        y = trajectory.df['label'].to_numpy()
+        Y.append(y)
+    return X, Y
+#
+#
+# def get_scores(model, x_test, y_test):
+#     predictions = []
+#     for x in tqdm.tqdm(x_test):
+#         predictions.append(model.predict(np.array([x])))
+#
+#     y_predict = np.concatenate(predictions, axis=1)[0]
+#     y_true = np.concatenate(y_test)
+#
+#     return get_scores_label(np.argmax(y_true, axis=1), np.argmax(y_predict, axis=1))
+#
+#
+# def get_scores_label(y_true, y_predict):
+#     acc = metrics.accuracy_score(y_true, y_predict)
+#     precision = metrics.precision_score(y_true, y_predict,
+#                                         average='weighted')
+#     recall = metrics.recall_score(y_true, y_predict, average='weighted')
+#     f1 = metrics.f1_score(y_true, y_predict, average='weighted')
+#
+#     confusion_matrix = metrics.confusion_matrix(y_true, y_predict)
+#
+#     return acc, precision, recall, f1, confusion_matrix
+#
+#
+# def get_scores_per_label(y_true, y_predict):
+#     confusion_matrix = metrics.confusion_matrix(y_true, y_predict)
+#
+#     return confusion_matrix.diagonal() / confusion_matrix.sum(axis=1)
+#
+#
+def split_to_size_and_complete(x, y, size=100, value=np.nan):
+    result = []
+    labels = []
+    for i in range(0, len(x), size):
+        result.append(x[i:i + size])
+        labels.append(y[i:i + size])
+    if 100 > len(result[-1]):
+        result[-1] = np.concatenate((result[-1], np.full((100 - len(result[-1]), result[-1].shape[1]), value)))
+        labels[-1] = np.concatenate((labels[-1], np.full((100 - len(labels[-1]), labels[-1].shape[1]), 0)))
+    return result, labels
+
+
+def split_to_size_list(X, Y, size=100, value=np.nan):
+    x_n = []
+    y_n = []
+    for x, y in zip(X, Y):
+        x, y = split_to_size_and_complete(x, y, size, value)
+        x_n += x
+        y_n += y
+    return x_n, y_n
+#
+#
+# def evaluate_model_one_hot(model, x_test, y_test):
+#     predictions = []
+#     for x in x_test:
+#         predictions.append(model.predict(np.array([x])))
+#
+#     y_predict = np.argmax(np.concatenate(predictions, axis=1)[0], axis=1)
+#     y_true = np.argmax(np.concatenate(y_test), axis=1)
+#
+#     acc = metrics.accuracy_score(y_true, y_predict)
+#     precision = metrics.precision_score(y_true, y_predict, average='weighted')
+#     recall = metrics.recall_score(y_true, y_predict, average='weighted')
+#     f1 = metrics.f1_score(y_true, y_predict, average='weighted')
+#
+#     return acc, precision, recall, f1
+#
+#
+# def evaluate_model_one_hot_masked(model, x_test, y_test, mask_value=99999.99, batch_size=16, verbose=0):
+#     predictions = model.predict(np.array(x_test), batch_size=batch_size, verbose=verbose)
+#     M = np.ma.masked_equal(np.concatenate(x_test)[:,0], mask_value)
+#     y_predict = np.argmax(np.concatenate(predictions, axis=0), axis=1)[~M.mask.reshape(-1)]
+#
+#     y_true = np.argmax(np.concatenate(y_test, axis=0), axis=1)[~M.mask.reshape(-1)]
+#
+#     acc = metrics.accuracy_score(y_true, y_predict)
+#     precision = metrics.precision_score(y_true, y_predict, average='weighted')
+#     recall = metrics.recall_score(y_true, y_predict, average='weighted')
+#     f1 = metrics.f1_score(y_true, y_predict, average='weighted')
+#
+#     return acc, precision, recall, f1
+#
+# def split_on_time(trajectories, time_gap, cut_off=-1):
+#     trajectories_split = []
+#     for trajectory in tqdm.tqdm(trajectories, desc='splitting trajectories', position=0):
+#         trajectories_split += trajectory.split_trajectory(time_gap=time_gap, interpolation=None)
+#
+#     result = []
+#     for trajectory in tqdm.tqdm(trajectories_split, desc='cleaning trajectories', position=0):
+#         if len(trajectory.df.index) > cut_off:
+#             trajectory.remove_outliers(['sog'])
+#             result.append(trajectory)
+#
+#     return result
\ No newline at end of file
diff --git a/src/utils/features.py b/src/utils/features.py
new file mode 100644
index 0000000000000000000000000000000000000000..034a8876cdb1114e39426e0593c8b1eef00f4968
--- /dev/null
+++ b/src/utils/features.py
@@ -0,0 +1,106 @@
+from copy import deepcopy
+
+import numpy as np
+import skais.utils.geography
+from skais.process.basic_features_operations import time_derivative, angular_std, angular_time_derivative, angular_mean
+from tqdm import tqdm
+
+FEATURES_LIST = ['sog', 'drift', 'delta_sog', 'delta_cog', 'delta_heading', 'std_cog',
+                 'std_hdg', 'std_delta_cog', 'std_delta_heading', 'std_pos', 'std_sog']
+
+
+def position_std(data):
+    long_mean = np.degrees(angular_mean(np.radians(data[:, 0])))
+    lat_mean = np.degrees(angular_mean(np.radians(data[:, 1])))
+
+    distance_sum = 0
+    for long, lat in data:
+        distance_sum += skais.utils.geography.great_circle.py_func(lat_mean, lat, long_mean, long) ** 2
+    return np.sqrt(distance_sum / len(data))
+
+
+def compute_selected_features_on_trajectories(trajectories, radius, min_size=12, tqdm_disable=False):
+    out = []
+    for trajectory in tqdm(deepcopy(trajectories), disable=tqdm_disable):
+        if len(trajectory.df.index) > min_size:
+            compute_selected_features(trajectory, radius)
+            out.append(trajectory)
+    return out
+
+
+def compute_selected_features_on_trajectories_multi_time_window(trajectories, radii, min_size=12):
+    out = []
+    features = None
+    for trajectory in tqdm(deepcopy(trajectories), desc="computing features"):
+        trajectory.remove_outliers(['sog'])
+        trajectory.clean_angles()
+        if len(trajectory.df.index) > min_size:
+            features = compute_selected_features_multi_time_window(trajectory, radii)
+            out.append(trajectory)
+    return out, features
+
+
+def compute_selected_features(trajectory, time_window_radius):
+    trajectory.clean_angles()
+    trajectory.compute_drift()
+
+    trajectory.apply_func_on_time_sequence(time_derivative, 'sog', new_column='delta_sog')
+
+    trajectory.apply_func_on_points(np.radians, 'cog', new_column='cog_r')
+    trajectory.apply_func_on_points(np.radians, 'heading', new_column='heading_r')
+    trajectory.apply_func_on_time_sequence(angular_time_derivative, 'cog_r',
+                                           new_column='delta_cog')
+    trajectory.apply_func_on_points(np.abs, 'delta_cog')  # curvature cog:
+    trajectory.apply_func_on_time_sequence(angular_time_derivative, 'heading_r',
+                                           new_column='delta_heading')
+    trajectory.apply_func_on_points(np.abs, 'delta_heading')  # curvature cog
+
+    trajectory.apply_func_on_time_window(angular_std, time_window_radius, 'cog_r', 'std_cog', on_edge='ignore')
+    trajectory.apply_func_on_time_window(angular_std, time_window_radius, 'heading_r', 'std_hdg', on_edge='ignore')
+    trajectory.apply_func_on_time_window(angular_std, time_window_radius, 'delta_cog', 'std_delta_cog',
+                                         on_edge='ignore')
+    trajectory.apply_func_on_time_window(angular_std, time_window_radius, 'delta_heading', 'std_delta_heading',
+                                         on_edge='ignore')
+
+    trajectory.apply_func_on_time_window(position_std, time_window_radius, ['longitude', 'latitude'], 'std_pos',
+                                         on_edge='ignore')
+
+    trajectory.apply_func_on_time_window(np.std, time_window_radius, 'sog', 'std_sog', on_edge='ignore')
+
+
+def compute_selected_features_multi_time_window(trajectory, time_window_radii):
+    trajectory.compute_drift()
+    trajectory.apply_func_on_time_sequence(time_derivative, 'sog', new_column='delta_sog')
+    trajectory.apply_func_on_points(np.radians, 'cog', new_column='cog_r')
+    trajectory.apply_func_on_points(np.radians, 'heading', new_column='heading_r')
+    trajectory.apply_func_on_time_sequence(angular_time_derivative, 'cog_r',
+                                           new_column='delta_cog')
+    trajectory.apply_func_on_points(np.abs, 'delta_cog')  # curvature cog:
+    trajectory.apply_func_on_time_sequence(angular_time_derivative, 'heading_r',
+                                           new_column='delta_heading')
+    trajectory.apply_func_on_points(np.abs, 'delta_heading')  # curvature cog
+
+    features_list = ['sog', 'drift', 'delta_sog', 'delta_cog', 'delta_heading']
+    for time_window_radius in time_window_radii:
+        trajectory.apply_func_on_time_window(angular_std, time_window_radius, 'cog_r', f'std_cog_{time_window_radius}',
+                                             on_edge='ignore')
+        trajectory.apply_func_on_time_window(angular_std, time_window_radius, 'heading_r',
+                                             f'std_hdg_{time_window_radius}', on_edge='ignore')
+        trajectory.apply_func_on_time_window(angular_std, time_window_radius, 'delta_cog',
+                                             f'std_delta_cog_{time_window_radius}',
+                                             on_edge='ignore')
+        trajectory.apply_func_on_time_window(angular_std, time_window_radius, 'delta_heading',
+                                             f'std_delta_heading_{time_window_radius}',
+                                             on_edge='ignore')
+
+        trajectory.apply_func_on_time_window(position_std, time_window_radius, ['longitude', 'latitude'],
+                                             f'std_pos_{time_window_radius}',
+                                             on_edge='ignore')
+
+        trajectory.apply_func_on_time_window(np.std, time_window_radius, 'sog', f'std_sog_{time_window_radius}',
+                                             on_edge='ignore')
+
+        features_list += [f'std_cog_{time_window_radius}', f'std_hdg_{time_window_radius}',
+                          f'std_delta_cog_{time_window_radius}', f'std_delta_heading_{time_window_radius}',
+                          f'std_pos_{time_window_radius}', f'std_sog_{time_window_radius}']
+    return features_list
diff --git a/src/utils/features_list.py b/src/utils/features_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b0c98602ee2bfaa03adac2bf408e6de7c928b81
--- /dev/null
+++ b/src/utils/features_list.py
@@ -0,0 +1,6 @@
+FEATURES_LIST = ['sog', 'drift', 'delta_sog', 'delta_cog', 'delta_heading', 'std_cog',
+                 'std_hdg', 'std_delta_cog', 'std_delta_heading', 'std_pos', 'std_sog']
+
+FEATURES_COMBINATIONS = [['sog'], ['cog'], ['heading'], ['latitude', 'longitude'],
+                         ['cog', 'heading', 'longitude', 'latitude'],
+                         ['sog', 'cog', 'heading', 'longitude', 'latitude']]
\ No newline at end of file
diff --git a/src/utils/models.py b/src/utils/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a76cc51bbe515c87794d57ccead25b3346e59c3
--- /dev/null
+++ b/src/utils/models.py
@@ -0,0 +1,62 @@
+import numpy as np
+from keras.layers import Dense, Bidirectional, LSTM, TimeDistributed, Masking
+from keras.models import Sequential
+from tensorflow import keras
+from tensorflow.keras import layers
+
+
+def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
+    # Normalization and Attention
+    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
+    x = layers.MultiHeadAttention(
+        key_dim=head_size, num_heads=num_heads, dropout=dropout
+    )(x, x)
+    x = layers.Dropout(dropout)(x)
+    res = x + inputs
+
+    # Feed Forward Part
+    x = layers.LayerNormalization(epsilon=1e-6)(res)
+    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
+    x = layers.Dropout(dropout)(x)
+    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
+    return x + res
+
+
+def build_transformer(
+        input_shape,
+        head_size,
+        num_heads,
+        ff_dim,
+        num_transformer_blocks,
+        mlp_units,
+        dropout=0,
+        mlp_dropout=0,
+        n_classes=2
+):
+    inputs = keras.Input(shape=input_shape)
+    x = inputs
+    for _ in range(num_transformer_blocks):
+        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
+
+    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
+    for dim in mlp_units:
+        x = layers.Dense(dim, activation="relu")(x)
+        x = layers.Dropout(mlp_dropout)(x)
+    outputs = layers.Dense(n_classes, activation="softmax")(x)
+    return keras.Model(inputs, outputs)
+
+def create_LSTM_model(nb_layers, nb_units, nb_inputs, mask_value=np.nan, timesteps=100):
+    model = Sequential()
+
+    model.add(Masking(mask_value=mask_value, input_shape=(timesteps, nb_inputs)))
+    model.add(Bidirectional(LSTM(nb_units, return_sequences=True)))
+    for _ in range(nb_layers - 1):
+        model.add(Bidirectional(LSTM(nb_units, return_sequences=True)))
+    model.add(TimeDistributed(Dense(4, activation='softmax')))
+
+    optimizer = "Adam"
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=optimizer,
+                  metrics=['accuracy'])
+
+    return model
diff --git a/src/utils/utils.py b/src/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..12d39c219af830f5fe28e5e6026b63e1e10d2563
--- /dev/null
+++ b/src/utils/utils.py
@@ -0,0 +1,78 @@
+from random import random
+
+import numpy as np
+import pandas as pd
+from skais.learn.data_augmentation import shift_positions
+from sklearn import metrics
+from tqdm import tqdm
+
+
+def generate_between(a, b):
+    return a + (b - a) * random()
+
+
+def shift(trajectories, s=1):
+    result = []
+    if s == 0:
+        return trajectories
+    for _ in range(s):
+        for trajectory in trajectories:
+            result.append(
+                shift_positions(
+                    trajectory,
+                    generate_between(-np.pi, np.pi),
+                    generate_between(-np.pi, np.pi),
+                    generate_between(-np.pi, np.pi)
+                )
+            )
+    return result
+
+def unison_shuffled_copies(a, b):
+    c = list(zip(a, b))
+
+    random.shuffle(c)
+
+    return zip(*c)
+
+def evaluate_model(model, x_test, y_test):
+    y_predict = model.predict(x_test)
+    y_true = y_test
+
+    acc = metrics.accuracy_score(y_true, y_predict)
+    f1 = metrics.f1_score(y_true, y_predict, average='weighted')
+    c_mat = metrics.confusion_matrix(y_true, y_predict)
+    return acc, f1, c_mat
+
+
+def get_transition_indexes(trajectories):
+    indexes = []
+    for trajectory in tqdm(trajectories, leave=False, colour='yellow'):
+        df = trajectory.df[['label']].rolling(2).apply(lambda x: x[0] != x[-1], raw=True).dropna().astype(int)
+        index = df[df['label'] != 0].index
+        indexes.append(index)
+    return indexes
+
+def get_point_near_transition(trajectories, radius, transitions=None, keep_duplicates=False):
+    if transitions is None:
+        transitions = get_transition_indexes(trajectories)
+    result = []
+    negative = []
+    for indexes, trajectory in tqdm(zip(transitions, trajectories), total=len(trajectories), leave=False, colour='green'):
+        nb_data_points = len(trajectory.df.index)
+        segments = []
+        for index in indexes:
+            position = trajectory.df.index.get_loc(index)
+            segments.append(trajectory.df.iloc[max(0, position - radius):min(position + radius, nb_data_points)])
+
+        if len(segments) > 0:
+            trajectory_segments = pd.concat(segments)
+            if not keep_duplicates:
+                trajectory_segments = trajectory_segments[~trajectory_segments.index.duplicated(keep='first')]
+
+            negative.append(trajectory.df.drop(trajectory_segments.index))
+            result.append(trajectory_segments)
+        else:
+            negative.append(trajectory.df)
+
+    return pd.concat(result), pd.concat(negative)
+