Upload New File

f49c3881 · Akrem Sellami · 0e975876 · f49c3881
Commit f49c3881 authored Apr 13, 2020 by Akrem Sellami
--- a/multi_view_graph_representation_learning/monomodal_graphsage.py
+++ b/multi_view_graph_representation_learning/monomodal_graphsage.py
+import networkx as nx
+import pandas as pd
+import numpy as np
+import os
+import random
+import stellargraph as sg
+from stellargraph.data import EdgeSplitter
+from stellargraph.mapper import GraphSAGELinkGenerator
+from stellargraph.layer import GraphSAGE, link_classification
+from stellargraph.data import UniformRandomWalk
+from stellargraph.data import UnsupervisedSampler
+from sklearn.model_selection import train_test_split
+import scipy.sparse
+from scipy.sparse import csr_matrix
+from sklearn.model_selection import KFold
+from tensorflow.keras.callbacks import ModelCheckpoint
+from tensorflow.keras.models import load_model
+import tensorflow.keras
+from tensorflow.keras.layers import Input, Dense, concatenate
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Dropout
+from tensorflow.keras.callbacks import EarlyStopping
+import matplotlib.pyplot as plt
+
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+from stellargraph.mapper import GraphSAGENodeGenerator
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+plt.switch_backend('agg')
+import sys as os
+import os
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.decomposition import PCA
+from sklearn.metrics import mean_squared_error
+from math import sqrt
+from tensorflow.keras.optimizers import SGD, Adadelta, Adam
+import numpy as np
+from sklearn.model_selection import KFold
+from tensorflow.keras.callbacks import ModelCheckpoint
+from tensorflow.keras.models import load_model
+import pickle
+# from tensorflow import keras
+from sklearn import preprocessing, feature_extraction, model_selection
+from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
+from sklearn.metrics import accuracy_score
+from stellargraph import globalvar
+from stellargraph import datasets
+from IPython.display import display, HTML
+
+
+# loading multimodal fMRI data
+def load_data(sub, view):
+    # Import Task fMRI data
+    if view == 1:
+        view_tfmri = np.load(os.path.join(path, "tfmri/{}/gii_matrix_fsaverage5.npy".format(sub)))
+        return view_tfmri
+
+    # Import Resting-State fMRI data
+    if view == 2:
+        view_rsfmri = np.load(os.path.join(path, "rsfmri/{}/correlation_matrix_fsaverage5.npy".format(sub)))
+        return view_rsfmri
+
+    # Import concatenated fMRI data
+    if view == 3:
+        view_rsfmri = np.load(os.path.join(path, "rsfmri/{}/correlation_matrix_fsaverage5.npy".format(sub)))
+        view_tfmri = np.load(os.path.join(path, "tfmri/{}/gii_matrix_fsaverage5.npy".format(sub)))
+        fmri_data = np.concatenate([view_tfmri, view_rsfmri], axis=1)
+        return fmri_data
+
+
+def load_graph():
+    graph = None
+    with open("adj_matrix.pck", "rb") as f:
+        graph = pickle.load(f)
+    return graph
+
+
+# Path
+path = "/home/asellami/data_fsaverage5"
+
+print('View 1: task-fMRI')
+print('View 2: resting-state fMRI')
+print('View=3: concatenated views (task-fMRI + rest-fMRI)')
+
+# view =1: tfmri, view =2: rsfmri, view=3: concatenated views (task-fMRI + rest-fMRI)
+view = 1
+
+# activation functions
+hidden_layer = 'relu'
+output_layer = 'linear'
+
+# missing data
+missing_data = [36]
+index_subjects = np.arange(3, 43)
+index_subjects = np.delete(index_subjects, np.argwhere(index_subjects == missing_data))
+
+dimensions = [2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
+for dim in dimensions:
+    # create directory
+    directory = '{}'.format(dim)
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    # Cross Validation
+    kf = KFold(n_splits=2)
+    #print(kf.get_n_splits(index_subjects))
+    #print("number of splits:", kf)
+    #print("number of features:", dimensions)
+    cvscores_mse_test = []
+    cvscores_rmse_test = []
+    cvscores_mse_train = []
+    cvscores_rmse_train = []
+
+    # create directory
+    directory = '{}'.format(dim)
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    # load training and testing data (nodes_features)
+    print('Load training data... (view {})'.format(view))
+    train_data = np.concatenate([load_data(sub, view) for sub in index_subjects])
+    print("Shape of the training data:", train_data.shape)
+    # print('Load testdata... (view {})'.format(view))
+    # test_data = np.concatenate([load_data(sub, view) for sub in index_subjects[test_index]])
+    # print("Shape of the test data:", test_data.shape)
+    # Data normalization to range [-1, 1]
+    print("Data normalization to range [0, 1]")
+    scaler = MinMaxScaler()
+    normalized_train_data = scaler.fit_transform(train_data)
+    #normalized_test_data = scaler.fit_transform(test_data)
+    nodes_features_train = normalized_train_data
+    #nodes_features_test = normalized_test_data
+
+    # Load the adjacency matrix for one subjects
+    adjacency_matrix = load_graph()
+    adj = csr_matrix.todense(adjacency_matrix).astype(int)
+
+    # Construct the Graph based on the adjacency matrix
+    D = nx.DiGraph(adj)
+    print(nx.info(D))
+
+    # Construct the whole adjacency matrix for training using training samples (subjects)
+    number_training_subjects = 39
+    # adj_train=np.zeros([number_training_subjects*adj.shape[0], number_training_subjects*adj.shape[0]], dtype='uint8')
+    print("number of training subjects", number_training_subjects)
+
+    cord_adj = np.argwhere(adj == 1)
+    print("shape of cord_adj", cord_adj.shape)
+    for i in range(1, number_training_subjects):
+        D.add_edges_from(cord_adj + (adj.shape[0] * i), weight=1)
+    print(nx.info(D))
+    nx.write_edgelist(D, "list_edges.edgelist")
+    g_nx = nx.read_edgelist("list_edges.edgelist", create_using=nx.DiGraph(), nodetype=int)
+    print(nx.info(g_nx))
+
+    # generate id nodes
+    nodes_id = np.arange(0, nodes_features_train.shape[0])
+
+    features = pd.DataFrame(data=nodes_features_train, index=np.arange(0, nodes_features_train.shape[0]))
+    G = sg.StellarGraph(g_nx, node_features=features)
+
+    # Create the StellarGraph object
+    features = pd.DataFrame(data=nodes_features_train)
+    G = sg.StellarGraph(g_nx, node_features=features)
+    print(G.info())
+
+    # Specify the optional parameter values: root nodes, the number of walks to take per node, the length of each walk, and random seed.
+
+    nodes = list(G.nodes())
+    number_of_walks = 1
+    length = 5
+
+    # Create the UnsupervisedSampler instance with the relevant parameters passed to it.
+    unsupervised_samples = UnsupervisedSampler(
+        G, nodes=nodes, length=length, number_of_walks=number_of_walks
+    )
+    # Split data to train and test
+    # X_train, X_test=train_test_split(features, test_size=0.2)
+
+    # Specify the model GraphSage
+
+    batch_size = 50
+    epochs = 4
+    num_samples = [5, 5]
+
+    generator = GraphSAGELinkGenerator(G, batch_size, num_samples)
+    train_gen = generator.flow(unsupervised_samples)
+    layer_sizes = [110, dim]
+    graphsage = GraphSAGE(
+        layer_sizes=layer_sizes, generator=generator, bias=True, dropout=0.0, normalize="l2"
+    )
+
+    # Build the model and expose input and output sockets of graphsage, for node pair inputs:
+    x_inp, x_out = graphsage.in_out_tensors()
+
+    prediction = link_classification(
+        output_dim=1, output_act="sigmoid", edge_embedding_method="ip"
+    )(x_out)
+
+    model = tensorflow.keras.Model(inputs=x_inp, outputs=prediction)
+
+    model.compile(
+        optimizer=tensorflow.keras.optimizers.Adam(lr=1e-3),
+        loss=tensorflow.keras.losses.binary_crossentropy,
+        metrics=[tensorflow.keras.metrics.binary_accuracy],
+    )
+    history = model.fit(
+        train_gen,
+        epochs=epochs,
+        verbose=1,
+        workers=4,
+        shuffle=True,
+    )
+
+    # Construct embedding model
+    x_inp_src = x_inp[0::2]
+    x_out_src = x_out[0]
+    embedding_model = tensorflow.keras.Model(inputs=x_inp_src, outputs=x_out_src)
+
+    node_ids = np.arange(0, nodes_features_train.shape[0])
+
+    node_gen = GraphSAGENodeGenerator(G, batch_size, num_samples).flow(node_ids)
+
+    node_embeddings = embedding_model.predict(node_gen, workers=4, verbose=1)
+
+    print("node embedding shape", node_embeddings.shape)
+    np.save("{}/node_embeddings.npy".format(dim), node_embeddings)