Skip to content
Snippets Groups Projects
Select Git revision
  • e4bcb02f96698c3cf3675c4c8bb74c56751b5de4
  • master default protected
  • v0.1.4
  • v0.1.3
  • v0.1.2
  • v0.1.1
  • v0.1
7 results

setup.py

Blame
  • preds_to_viz.py 3.35 KiB
    from collections import defaultdict
    import plotly.graph_objects as go
    import numpy as np
    from pathlib import Path
    import os
    
    from dotenv import find_dotenv, load_dotenv
    from sklearn.decomposition import PCA
    from sklearn.manifold import TSNE, MDS, Isomap, LocallyLinearEmbedding
    from sklearn.preprocessing import normalize
    
    if __name__ == "__main__":
        load_dotenv(find_dotenv('.env'))
        dir_name = "results/models/predictions"
        dir_path = Path(os.environ["project_dir"]) / dir_name
    
        dct_dataset_true_labels = dict()
        dct_dataset_algo_preds = defaultdict(dict)
        for dataset_path in dir_path.glob('*'):
            dataset_name = dataset_path.name
            max_forest_size = np.max(list(map(lambda x : int(x.name), dataset_path.glob("*"))))
            for forest_size_path in dataset_path.glob("*"):
                pruned_forest_size = int(forest_size_path.name)
                if pruned_forest_size != int(10 / 100 * max_forest_size) and pruned_forest_size != max_forest_size:
                    continue
                for algoname_path in forest_size_path.glob("*"):
                    algoname = algoname_path.name
                    if algoname == "true_labels.npz":
                        if dct_dataset_true_labels.get(dataset_name, None) is None:
                            # store the true labels for the task
                            true_labels_path = algoname_path
                            loaded_true_labels = np.load(true_labels_path)["Y_true"]
                            dct_dataset_true_labels[dataset_name] = loaded_true_labels
                        else:
                            continue
                    else:
                        path_predictions = algoname_path / "predictions_train.npz"
                        loaded_predictions = np.load(path_predictions)["Y_preds"]
                        dct_dataset_algo_preds[dataset_name][algoname] = loaded_predictions
    
        print(dct_dataset_true_labels)
        print(dct_dataset_algo_preds)
    
        for dataset_name in dct_dataset_algo_preds:
            predictions_algo = dct_dataset_algo_preds[dataset_name]["NN-OMP"].T
            try:
                predictions_total = dct_dataset_algo_preds[dataset_name]["None"].T
            except:
                continue
            real_preds = dct_dataset_true_labels[dataset_name].reshape(1, -1)
    
            predictions_total = np.vstack([predictions_total, real_preds])
    
            normalized_predictions_algo = normalize(predictions_algo)
            normalized_predictions_total = normalize(predictions_total)
            sim = normalized_predictions_algo @ normalized_predictions_total.T
    
            sim_equals_1 = np.isclose(sim, 1)
            bool_indices_tree_algo = np.sum(sim_equals_1, axis=0).astype(bool)
    
            # concat = np.vstack([predictions_algo, predictions_total])
            for perp in range(1, 20, 3):
                # tsne_obj = TSNE(n_components=2, perplexity=perp)
                tsne_obj = Isomap(n_components=2, n_neighbors=perp)
                X_embedded = tsne_obj.fit_transform(predictions_total)
                fig = go.Figure()
                fig.add_trace(go.Scatter(x=X_embedded[:, 0], y=X_embedded[:, 1], mode='markers', name="Base"))
                fig.add_trace(go.Scatter(x=X_embedded[bool_indices_tree_algo, 0], y=X_embedded[bool_indices_tree_algo, 1], mode='markers', name="NN-OMP"))
                fig.add_trace(go.Scatter(x=X_embedded[-1:, 0], y=X_embedded[-1:, 1], mode='markers', name="NN-OMP"))
    
                fig.update_layout(title=f"Isomap {perp}")
                fig.show()