Select Git revision
-
valentin.emiya authoredvalentin.emiya authored
preds_to_viz.py 3.35 KiB
from collections import defaultdict
import plotly.graph_objects as go
import numpy as np
from pathlib import Path
import os
from dotenv import find_dotenv, load_dotenv
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE, MDS, Isomap, LocallyLinearEmbedding
from sklearn.preprocessing import normalize
if __name__ == "__main__":
load_dotenv(find_dotenv('.env'))
dir_name = "results/models/predictions"
dir_path = Path(os.environ["project_dir"]) / dir_name
dct_dataset_true_labels = dict()
dct_dataset_algo_preds = defaultdict(dict)
for dataset_path in dir_path.glob('*'):
dataset_name = dataset_path.name
max_forest_size = np.max(list(map(lambda x : int(x.name), dataset_path.glob("*"))))
for forest_size_path in dataset_path.glob("*"):
pruned_forest_size = int(forest_size_path.name)
if pruned_forest_size != int(10 / 100 * max_forest_size) and pruned_forest_size != max_forest_size:
continue
for algoname_path in forest_size_path.glob("*"):
algoname = algoname_path.name
if algoname == "true_labels.npz":
if dct_dataset_true_labels.get(dataset_name, None) is None:
# store the true labels for the task
true_labels_path = algoname_path
loaded_true_labels = np.load(true_labels_path)["Y_true"]
dct_dataset_true_labels[dataset_name] = loaded_true_labels
else:
continue
else:
path_predictions = algoname_path / "predictions_train.npz"
loaded_predictions = np.load(path_predictions)["Y_preds"]
dct_dataset_algo_preds[dataset_name][algoname] = loaded_predictions
print(dct_dataset_true_labels)
print(dct_dataset_algo_preds)
for dataset_name in dct_dataset_algo_preds:
predictions_algo = dct_dataset_algo_preds[dataset_name]["NN-OMP"].T
try:
predictions_total = dct_dataset_algo_preds[dataset_name]["None"].T
except:
continue
real_preds = dct_dataset_true_labels[dataset_name].reshape(1, -1)
predictions_total = np.vstack([predictions_total, real_preds])
normalized_predictions_algo = normalize(predictions_algo)
normalized_predictions_total = normalize(predictions_total)
sim = normalized_predictions_algo @ normalized_predictions_total.T
sim_equals_1 = np.isclose(sim, 1)
bool_indices_tree_algo = np.sum(sim_equals_1, axis=0).astype(bool)
# concat = np.vstack([predictions_algo, predictions_total])
for perp in range(1, 20, 3):
# tsne_obj = TSNE(n_components=2, perplexity=perp)
tsne_obj = Isomap(n_components=2, n_neighbors=perp)
X_embedded = tsne_obj.fit_transform(predictions_total)
fig = go.Figure()
fig.add_trace(go.Scatter(x=X_embedded[:, 0], y=X_embedded[:, 1], mode='markers', name="Base"))
fig.add_trace(go.Scatter(x=X_embedded[bool_indices_tree_algo, 0], y=X_embedded[bool_indices_tree_algo, 1], mode='markers', name="NN-OMP"))
fig.add_trace(go.Scatter(x=X_embedded[-1:, 0], y=X_embedded[-1:, 1], mode='markers', name="NN-OMP"))
fig.update_layout(title=f"Isomap {perp}")
fig.show()