Added code for Deep CMM

junaidiiith · Jul 7, 2024 · b729d57 · b729d57
1 parent ee385b7
commit b729d57
Show file tree

Hide file tree

Showing 38 changed files with 3,888 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -160,3 +160,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+
+.vscode/*
diff --git a/dataset/ecore_non_dup_models.pkl b/dataset/ecore_non_dup_models.pkl
diff --git a/embedding.py b/embedding.py
@@ -0,0 +1,71 @@
+import networkx as nx
+from scipy.sparse.linalg import eigsh
+import settings
+import sys
+import torch
+from torch_geometric.nn import Node2Vec
+from tqdm.auto import tqdm
+from trainers.utils import device
+import trainers.utils as utils
+
+
+NODE2VEC_EPOCHS = 50
+NODE2VEC_WALK_LENGTH = 30
+NODE2VEC_CONTEXT_SIZE = 20
+NODE2VEC_DIM = 64
+NODE2VEC_NEG_SAMPLES = 4
+NODE2VEC_BATCH_SIZE = 128
+NODE2VEC_LR = 0.01
+NODE2VEC_WALKS_PER_NODE = 30
+NODE2VEC_NUM_WORKERS = 4 if sys.platform == 'linux' else 0
+NODE2VEC_P = 1
+NODE2VEC_Q = 1
+
+
+def get_node2vec_embedding(g):
+    edge_index = utils.get_edge_index(g)
+    node2vec = Node2Vec(
+        edge_index,
+        embedding_dim=NODE2VEC_DIM,
+        walk_length=NODE2VEC_WALK_LENGTH,
+        context_size=NODE2VEC_CONTEXT_SIZE,
+        walks_per_node=NODE2VEC_WALKS_PER_NODE,
+        num_negative_samples=NODE2VEC_NEG_SAMPLES,
+        p=NODE2VEC_P,
+        q=NODE2VEC_Q,
+        sparse=True,
+    ).to(device)
+
+    num_workers = NODE2VEC_NUM_WORKERS
+    loader = node2vec.loader(batch_size=NODE2VEC_BATCH_SIZE, shuffle=True, num_workers=num_workers)
+    optimizer = torch.optim.SparseAdam(list(node2vec.parameters()), lr=NODE2VEC_LR)
+    node2vec.train()
+    total_loss = 0
+    for epoch in range(1, NODE2VEC_EPOCHS + 1):
+    # for epoch in tqdm(range(1, NODE2VEC_EPOCHS + 1), desc='Training Node2Vec For Node Embeddings'):
+        total_loss = 0
+        for pos_rw, neg_rw in loader:
+            optimizer.zero_grad()
+            loss = node2vec.loss(pos_rw.to(device), neg_rw.to(device))
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+
+        loss = total_loss / len(loader)
+        if epoch % 20 == 0 and settings.verbose:
+            print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
+
+    return node2vec.embedding.weight.detach().cpu().numpy()
+
+
+def laplacian_eigenmaps_embeddings(G, dimensions=NODE2VEC_DIM):
+    dimensions = min(dimensions, G.number_of_nodes() - 2)
+    L = nx.laplacian_matrix(G).astype(float)
+    _, eigvecs = eigsh(L, k=dimensions+1, which='SM')
+    embeddings = eigvecs[:, 1:]  # Skip the first eigenvector
+    return embeddings
+
+
+def get_adjacency_matrix_embedding(G):
+    A = nx.adjacency_matrix(G).todense()
+    return A
diff --git a/gnn_models/DGI.py b/gnn_models/DGI.py
@@ -0,0 +1,211 @@
+import math
+import torch
+import torch.nn.functional as F
+from torch.nn import Parameter
+from sklearn.linear_model import LogisticRegression
+from torch_geometric.nn.inits import reset, uniform
+import sklearn
+import torch.nn as nn
+from torch_geometric.nn import GCNConv, GATConv
+
+EPS = 1e-15
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+class DeepGraphInfomax(torch.nn.Module):
+    r"""The Deep Graph Infomax model from the
+    `"Deep Graph Infomax" <https://arxiv.org/abs/1809.10341>`_
+    paper based on user-defined encoder and summary model :math:`\mathcal{E}`
+    and :math:`\mathcal{R}` respectively, and a corruption function
+    :math:`\mathcal{C}`.
+
+    Args:
+        hidden_channels (int): The latent space dimensionality.
+        encoder (Module): The encoder module :math:`\mathcal{E}`.
+        summary (callable): The readout function :math:`\mathcal{R}`.
+        corruption (callable): The corruption function :math:`\mathcal{C}`.
+    """
+
+    def __init__(self, hidden_channels, encoder, out_channels, summary, corruption, cluster):
+        super(DeepGraphInfomax, self).__init__()
+        self.hidden_channels = hidden_channels
+        self.encoder = encoder
+        self.summary = summary
+        self.corruption = corruption
+        self.weight = Parameter(torch.Tensor(hidden_channels, hidden_channels))
+        self.reset_parameters()
+        self.K = out_channels
+        self.cluster_temp = 30
+        self.init = torch.rand(self.K, hidden_channels)
+        self.cluster = cluster
+
+    def reset_parameters(self):
+        reset(self.encoder)
+        reset(self.summary)
+        uniform(self.hidden_channels, self.weight)
+
+
+    def forward(self, x, edge_index):
+        """Returns the latent space for the input arguments, their
+        corruptions and their summary representation."""
+        pos_z = self.encoder(x, edge_index)
+        cor = self.corruption(x, edge_index)
+        cor = cor if isinstance(cor, tuple) else (cor, )
+        neg_z = self.encoder(*cor)
+        summary = self.summary(pos_z)
+        num_iter = 1
+        mu_init, _, _ = self.cluster(pos_z, self.K, num_iter, self.cluster_temp, self.init)
+        mu, r, dist = self.cluster(pos_z, self.K, 1, self.cluster_temp, mu_init.detach().clone())
+        return pos_z, neg_z, summary, mu, r, dist
+
+    def discriminate(self, z, summary, sigmoid=True):
+        r"""Given the patch-summary pair :obj:`z` and :obj:`summary`, computes
+        the probability scores assigned to this patch-summary pair.
+
+        Args:
+            z (Tensor): The latent space.
+            sigmoid (bool, optional): If set to :obj:`False`, does not apply
+                the logistic sigmoid function to the output.
+                (default: :obj:`True`)
+        """
+        #print("shape", z.shape,summary.shape)
+        value = torch.matmul(z, torch.matmul(self.weight, summary))
+        return torch.sigmoid(value) if sigmoid else value
+
+    def loss(self, pos_z, neg_z, summary):
+        r"""Computes the mutal information maximization objective."""
+        pos_loss = -torch.log(
+            self.discriminate(pos_z, summary, sigmoid=True) + EPS).mean()
+        neg_loss = -torch.log(
+            1 - self.discriminate(neg_z, summary, sigmoid=True) + EPS).mean()
+
+        # print('pos_loss = {}, neg_loss = {}'.format(pos_loss, neg_loss))
+        # bin_adj_nodiag = bin_adj * (torch.ones(bin_adj.shape[0], bin_adj.shape[0]) - torch.eye(bin_adj.shape[0]))
+        # modularity = (1./bin_adj_nodiag.sum()) * (r.t() @ mod @ r).trace()
+        return pos_loss + neg_loss #+ modularity
+
+    def comm_loss(self, pos_z, mu):
+        return -torch.log(self.discriminate(pos_z, self.summary(mu), sigmoid=True) + EPS).mean()
+
+    def modularity(self, r, bin_adj, mod):
+        r, bin_adj, mod = r.to(device), bin_adj.to(device), mod.to(device)
+        bin_adj_nodiag = bin_adj * (torch.ones(bin_adj.shape[0], bin_adj.shape[0]).to(device) - torch.eye(bin_adj.shape[0]).to(device))
+        return (1. / bin_adj_nodiag.sum()) * (r.t() @ mod @ r).trace()
+
+    def spectral_clustering_loss(self, X, y, alpha):
+        n_clusters = y.shape[1]
+        norm = torch.norm(X, p=2, dim=1, keepdim=True)
+        X = X / norm
+
+        # Compute affinity matrix A using Gaussian kernel
+        A = torch.exp(-torch.norm(X[:, None, :] - X[None, :, :], dim=-1) ** 2)
+
+        # Compute degree matrix D
+        D = torch.diag(torch.sum(A, dim=1))
+
+        # Compute Laplacian matrix L
+        L = D - A
+
+        # Compute eigenvectors of L corresponding to the smallest eigenvalues
+        _, eigvecs = torch.linalg.eigh(L, UPLO='U')
+
+        # Normalize the eigenvectors
+        eigvecs = F.normalize(eigvecs[:, :n_clusters], dim=1)
+
+        # Compute cluster assignments
+        y_pred = F.softmax(torch.mm(eigvecs, eigvecs.t()), dim=1)
+
+        # Compute spectral clustering loss
+        loss = -torch.trace(torch.mm(y.t(), torch.log(y_pred))) + alpha * torch.trace(torch.mm(y.t(), L))
+
+        return loss
+
+    def test(self, train_z, train_y, test_z, test_y, solver='lbfgs',
+             multi_class='auto', *args, **kwargs):
+        r"""Evaluates latent space quality via a logistic regression downstream
+        task."""
+        clf = LogisticRegression(solver=solver, multi_class=multi_class, *args,
+                                 **kwargs).fit(train_z.detach().cpu().numpy(),
+                                               train_y.detach().cpu().numpy())
+        return clf.score(test_z.detach().cpu().numpy(),
+                         test_y.detach().cpu().numpy())
+
+    def __repr__(self):
+        return f'{self.__class__.__name__}(hidden_dim={self.hidden_channels}, encoder={self.encoder},\
+              summary={self.summary}, weight={self.weight}, K={self.K}, cluster={self.cluster})'
+
+
+
+def GELU(x):
+    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
+
+
+class Encoder(nn.Module):
+    def __init__(self, in_channels, hidden_channels):
+        super(Encoder, self).__init__()
+        self.conv = GCNConv(in_channels, hidden_channels) # , cached=True)
+        # self.gat = GATConv(in_channels, 64, heads=8, dropout=0.0)
+        self.prelu = nn.PReLU(hidden_channels)
+        # self.ac = nn.ELU()
+        # self.prop = APPNP(10, 0.1)
+
+    def forward(self, x, edge_index):
+        x = self.conv(x, edge_index)
+        x = self.prelu(x)
+        # x = self.prop(x, edge_index)
+        return x
+
+
+
+class Summarizer(nn.Module):
+    def __init__(self):
+        super(Summarizer, self).__init__()
+
+    def forward(self, z):
+        return torch.sigmoid(z.mean(dim=0))
+
+def corruption(x, edge_index):
+    return x[torch.randperm(x.size(0))], edge_index
+
+def cluster_net(data, k, num_iter, cluster_temp, init):
+    '''
+    pytorch (differentiable) implementation of soft k-means clustering.
+    '''
+    #normalize x so it lies on the unit sphere
+    data = torch.diag(1./torch.norm(data, p=2, dim=1)) @ data
+    #use kmeans++ initialization if nothing is provided
+    if init is None:
+        data_np = data.detach().numpy()
+        norm = (data_np**2).sum(axis=1)
+        init = sklearn.cluster.k_means_._k_init(data_np, k, norm, sklearn.utils.check_random_state(None))
+        init = torch.tensor(init, requires_grad=True)
+        if num_iter == 0: return init
+    mu = init
+    mu = mu.to(device)
+
+    data = data.to(device)
+    n = data.shape[0]
+    d = data.shape[1]
+#    data = torch.diag(1./torch.norm(data, dim=1, p=2))@data
+    for _ in range(num_iter):
+        #get distances between all data points and cluster centers
+#        dist = torch.cosine_similarity(data[:, None].expand(n, k, d).reshape((-1, d)), mu[None].expand(n, k, d).reshape((-1, d))).reshape((n, k))
+        dist = data @ mu.t()
+        #cluster responsibilities via softmax
+        r = torch.softmax(cluster_temp*dist, 1).to(device)
+        #total responsibility of each cluster
+        cluster_r = r.sum(dim=0)
+        #mean of points in each cluster weighted by responsibility
+        cluster_mean = (r.t().unsqueeze(1) @ data.expand(k, *data.shape)).squeeze(1)
+        #update cluster means
+        new_mu = torch.diag(1/cluster_r) @ cluster_mean
+        mu = new_mu
+    dist = data @ mu.t()
+    r = torch.softmax(cluster_temp*dist, 1)
+    return mu, r, dist
+
+'''
+def summary(z, x, edge_index):
+    capsule_model = CapsuleLayer(z.size(1), z.size(1))
+    comm_emb = capsule_model(z.unsqueeze(0)).squeeze(0)
+    return torch.sigmoid(comm_emb.mean(dim=0))
+'''
diff --git a/gnn_models/__init__.py b/gnn_models/__init__.py
diff --git a/gnn_models/custom_gnn.py b/gnn_models/custom_gnn.py
@@ -0,0 +1,103 @@
+import torch_geometric
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+
+class GNNModel(torch.nn.Module):
+    """GNN Model with multiple layers"""
+    def __init__(self, model_name, input_dim, hidden_dim, out_dim, num_layers, num_heads=None, residual=False, l_norm=False, dropout=0.1):
+        super(GNNModel, self).__init__()
+        gnn_model = getattr(torch_geometric.nn, model_name)
+        self.conv_layers = nn.ModuleList()
+        if model_name == 'GINConv':
+            input_layer = gnn_model(nn.Sequential(nn.Linear(input_dim, hidden_dim), nn.ReLU()), train_eps=True)
+        elif num_heads is None:
+            input_layer = gnn_model(input_dim, hidden_dim, aggr='SumAggregation')
+        else:
+            input_layer = gnn_model(input_dim, hidden_dim, heads=num_heads, aggr='SumAggregation')
+        self.conv_layers.append(input_layer)
+
+        for _ in range(num_layers - 2):
+            if model_name == 'GINConv':
+                self.conv_layers.append(gnn_model(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()), train_eps=True))
+            elif num_heads is None:
+                self.conv_layers.append(gnn_model(hidden_dim, hidden_dim, aggr='SumAggregation'))
+            else:
+                self.conv_layers.append(gnn_model(num_heads*hidden_dim, hidden_dim, heads=num_heads, aggr='SumAggregation'))
+
+        if model_name == 'GINConv':
+            self.conv_layers.append(gnn_model(nn.Sequential(nn.Linear(hidden_dim, out_dim), nn.ReLU()), train_eps=True))
+        else:
+            self.conv_layers.append(gnn_model(hidden_dim if num_heads is None else num_heads*hidden_dim, out_dim, aggr='SumAggregation'))
+
+        self.activation = nn.ReLU()
+        self.layer_norm = nn.LayerNorm(hidden_dim if num_heads is None else num_heads*hidden_dim) if l_norm else None
+        self.residual = residual
+        self.dropout = nn.Dropout(dropout)
+
+
+    def forward(self, in_feat, edge_index):
+        h = in_feat
+        h = self.conv_layers[0](h, edge_index)
+        h = self.activation(h)
+        if self.layer_norm is not None:
+            h = self.layer_norm(h)
+        h = self.dropout(h)
+
+        for conv in self.conv_layers[1:-1]:
+            h = conv(h, edge_index) if not self.residual else conv(h, edge_index) + h
+            h = self.activation(h)
+            if self.layer_norm is not None:
+                h = self.layer_norm(h)
+            h = self.dropout(h)
+
+        h = self.conv_layers[-1](h, edge_index)
+        return h
+
+
+class GraphClusteringLoss(nn.Module):
+    def __init__(self):
+        super(GraphClusteringLoss, self).__init__()
+        # Initialize lambda as a learnable parameter
+        self.lambda_param = nn.Parameter(torch.tensor(0.5))  # Initial value of lambda
+
+
+    def forward(self, A: torch.Tensor, Y: torch.Tensor):
+        n, C = Y.shape
+
+        Y = F.softmax(Y, dim=1)  # Shape: n x C
+
+        Z = Y.T.unsqueeze(2) * Y.T.unsqueeze(1)
+        total_ewc = (Z * A.unsqueeze(0)).sum(dim=(1, 2))
+        nwc = Y.sum(dim=0)  # Shape: C
+        max_ewc = (nwc * (nwc - 1) + 1e-8) / 2  # Avoid division by zero
+
+        cohesion = (total_ewc / max_ewc).sum()
+
+        inter_cluster_edges = 0
+        for i in range(C):
+            for j in range(i+1, C):
+                Y_i = Y[:, i].unsqueeze(1)
+                Y_j = Y[:, j].unsqueeze(1)
+                A_ij = Y_i @ Y_j.T
+                ice = (A_ij * A).sum()
+                inter_cluster_edges += ice
+
+        total_edges = A.sum()
+        coupling = inter_cluster_edges / (total_edges + 1e-9)  # Avoid division by zero
+
+        loss = -cohesion + coupling
+
+        # node_clusters = torch.argmax(Y, dim=1).cpu().numpy()
+        # clusters = {i: c.item() for i, c in enumerate(node_clusters)}
+        # g = nx.from_numpy_array(A.detach().cpu().numpy())
+        # metrics = get_modularization_scores(g, clusters)
+
+        # if settings.verbose:
+        #     print(f'Loss: {loss.item():.4f}, Cohesion: {cohesion.item():.4f}, Coupling: {coupling.item():.4f}, Lambda: {self.lambda_param.item():.4f}')
+        #     print(f'Actual Cohesion: {metrics["cohesion"]:.4f}, Actual Coupling: {metrics["coupling"]:.4f}, Clusters: {len(set(clusters.values()))}')
+
+        return loss
+