diff --git a/multi_view_graph_representation_learning/monomodal_graphsage.py b/multi_view_graph_representation_learning/monomodal_graphsage.py new file mode 100644 index 0000000000000000000000000000000000000000..10c4eead1ef9392923043f7fd71e959710ed6aa1 --- /dev/null +++ b/multi_view_graph_representation_learning/monomodal_graphsage.py @@ -0,0 +1,228 @@ +import networkx as nx +import pandas as pd +import numpy as np +import os +import random +import stellargraph as sg +from stellargraph.data import EdgeSplitter +from stellargraph.mapper import GraphSAGELinkGenerator +from stellargraph.layer import GraphSAGE, link_classification +from stellargraph.data import UniformRandomWalk +from stellargraph.data import UnsupervisedSampler +from sklearn.model_selection import train_test_split +import scipy.sparse +from scipy.sparse import csr_matrix +from sklearn.model_selection import KFold +from tensorflow.keras.callbacks import ModelCheckpoint +from tensorflow.keras.models import load_model +import tensorflow.keras +from tensorflow.keras.layers import Input, Dense, concatenate +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Dropout +from tensorflow.keras.callbacks import EarlyStopping +import matplotlib.pyplot as plt + +from sklearn.decomposition import PCA +from sklearn.manifold import TSNE +from stellargraph.mapper import GraphSAGENodeGenerator +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + +plt.switch_backend('agg') +import sys as os +import os +from sklearn.preprocessing import StandardScaler, MinMaxScaler +from sklearn.decomposition import PCA +from sklearn.metrics import mean_squared_error +from math import sqrt +from tensorflow.keras.optimizers import SGD, Adadelta, Adam +import numpy as np +from sklearn.model_selection import KFold +from tensorflow.keras.callbacks import ModelCheckpoint +from tensorflow.keras.models import load_model +import pickle +# from tensorflow import keras +from sklearn import preprocessing, feature_extraction, model_selection +from sklearn.linear_model import LogisticRegressionCV, LogisticRegression +from sklearn.metrics import accuracy_score +from stellargraph import globalvar +from stellargraph import datasets +from IPython.display import display, HTML + + +# loading multimodal fMRI data +def load_data(sub, view): + # Import Task fMRI data + if view == 1: + view_tfmri = np.load(os.path.join(path, "tfmri/{}/gii_matrix_fsaverage5.npy".format(sub))) + return view_tfmri + + # Import Resting-State fMRI data + if view == 2: + view_rsfmri = np.load(os.path.join(path, "rsfmri/{}/correlation_matrix_fsaverage5.npy".format(sub))) + return view_rsfmri + + # Import concatenated fMRI data + if view == 3: + view_rsfmri = np.load(os.path.join(path, "rsfmri/{}/correlation_matrix_fsaverage5.npy".format(sub))) + view_tfmri = np.load(os.path.join(path, "tfmri/{}/gii_matrix_fsaverage5.npy".format(sub))) + fmri_data = np.concatenate([view_tfmri, view_rsfmri], axis=1) + return fmri_data + + +def load_graph(): + graph = None + with open("adj_matrix.pck", "rb") as f: + graph = pickle.load(f) + return graph + + +# Path +path = "/home/asellami/data_fsaverage5" + +print('View 1: task-fMRI') +print('View 2: resting-state fMRI') +print('View=3: concatenated views (task-fMRI + rest-fMRI)') + +# view =1: tfmri, view =2: rsfmri, view=3: concatenated views (task-fMRI + rest-fMRI) +view = 1 + +# activation functions +hidden_layer = 'relu' +output_layer = 'linear' + +# missing data +missing_data = [36] +index_subjects = np.arange(3, 43) +index_subjects = np.delete(index_subjects, np.argwhere(index_subjects == missing_data)) + +dimensions = [2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] +for dim in dimensions: + # create directory + directory = '{}'.format(dim) + if not os.path.exists(directory): + os.makedirs(directory) + # Cross Validation + kf = KFold(n_splits=2) + #print(kf.get_n_splits(index_subjects)) + #print("number of splits:", kf) + #print("number of features:", dimensions) + cvscores_mse_test = [] + cvscores_rmse_test = [] + cvscores_mse_train = [] + cvscores_rmse_train = [] + + # create directory + directory = '{}'.format(dim) + if not os.path.exists(directory): + os.makedirs(directory) + # load training and testing data (nodes_features) + print('Load training data... (view {})'.format(view)) + train_data = np.concatenate([load_data(sub, view) for sub in index_subjects]) + print("Shape of the training data:", train_data.shape) + # print('Load testdata... (view {})'.format(view)) + # test_data = np.concatenate([load_data(sub, view) for sub in index_subjects[test_index]]) + # print("Shape of the test data:", test_data.shape) + # Data normalization to range [-1, 1] + print("Data normalization to range [0, 1]") + scaler = MinMaxScaler() + normalized_train_data = scaler.fit_transform(train_data) + #normalized_test_data = scaler.fit_transform(test_data) + nodes_features_train = normalized_train_data + #nodes_features_test = normalized_test_data + + # Load the adjacency matrix for one subjects + adjacency_matrix = load_graph() + adj = csr_matrix.todense(adjacency_matrix).astype(int) + + # Construct the Graph based on the adjacency matrix + D = nx.DiGraph(adj) + print(nx.info(D)) + + # Construct the whole adjacency matrix for training using training samples (subjects) + number_training_subjects = 39 + # adj_train=np.zeros([number_training_subjects*adj.shape[0], number_training_subjects*adj.shape[0]], dtype='uint8') + print("number of training subjects", number_training_subjects) + + cord_adj = np.argwhere(adj == 1) + print("shape of cord_adj", cord_adj.shape) + for i in range(1, number_training_subjects): + D.add_edges_from(cord_adj + (adj.shape[0] * i), weight=1) + print(nx.info(D)) + nx.write_edgelist(D, "list_edges.edgelist") + g_nx = nx.read_edgelist("list_edges.edgelist", create_using=nx.DiGraph(), nodetype=int) + print(nx.info(g_nx)) + + # generate id nodes + nodes_id = np.arange(0, nodes_features_train.shape[0]) + + features = pd.DataFrame(data=nodes_features_train, index=np.arange(0, nodes_features_train.shape[0])) + G = sg.StellarGraph(g_nx, node_features=features) + + # Create the StellarGraph object + features = pd.DataFrame(data=nodes_features_train) + G = sg.StellarGraph(g_nx, node_features=features) + print(G.info()) + + # Specify the optional parameter values: root nodes, the number of walks to take per node, the length of each walk, and random seed. + + nodes = list(G.nodes()) + number_of_walks = 1 + length = 5 + + # Create the UnsupervisedSampler instance with the relevant parameters passed to it. + unsupervised_samples = UnsupervisedSampler( + G, nodes=nodes, length=length, number_of_walks=number_of_walks + ) + # Split data to train and test + # X_train, X_test=train_test_split(features, test_size=0.2) + + # Specify the model GraphSage + + batch_size = 50 + epochs = 4 + num_samples = [5, 5] + + generator = GraphSAGELinkGenerator(G, batch_size, num_samples) + train_gen = generator.flow(unsupervised_samples) + layer_sizes = [110, dim] + graphsage = GraphSAGE( + layer_sizes=layer_sizes, generator=generator, bias=True, dropout=0.0, normalize="l2" + ) + + # Build the model and expose input and output sockets of graphsage, for node pair inputs: + x_inp, x_out = graphsage.in_out_tensors() + + prediction = link_classification( + output_dim=1, output_act="sigmoid", edge_embedding_method="ip" + )(x_out) + + model = tensorflow.keras.Model(inputs=x_inp, outputs=prediction) + + model.compile( + optimizer=tensorflow.keras.optimizers.Adam(lr=1e-3), + loss=tensorflow.keras.losses.binary_crossentropy, + metrics=[tensorflow.keras.metrics.binary_accuracy], + ) + history = model.fit( + train_gen, + epochs=epochs, + verbose=1, + workers=4, + shuffle=True, + ) + + # Construct embedding model + x_inp_src = x_inp[0::2] + x_out_src = x_out[0] + embedding_model = tensorflow.keras.Model(inputs=x_inp_src, outputs=x_out_src) + + node_ids = np.arange(0, nodes_features_train.shape[0]) + + node_gen = GraphSAGENodeGenerator(G, batch_size, num_samples).flow(node_ids) + + node_embeddings = embedding_model.predict(node_gen, workers=4, verbose=1) + + print("node embedding shape", node_embeddings.shape) + np.save("{}/node_embeddings.npy".format(dim), node_embeddings)