Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Model & Dataset] facebook & sp2gcl #201

Merged
merged 19 commits into from
Jul 5, 2024
79 changes: 79 additions & 0 deletions examples/sp2_gcl/evaluation_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

import os
from tensorlayerx.model import TrainOneStep, WithLoss
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorlayerx as tlx
import tensorlayerx.nn as nn



class SemiSpvzLoss(WithLoss):
def __init__(self, net, loss_fn):
super(SemiSpvzLoss, self).__init__(backbone=net, loss_fn=loss_fn)

def forward(self, data, label):
logits = self.backbone_network(data['x'])
train_logits = tlx.gather(logits, data['train_idx'])
train_y = tlx.gather(data['y'], data['train_idx'])
loss = self._loss_fn(train_logits, train_y)
return loss

class LogReg(nn.Module):
def __init__(self, hid_dim, out_dim):
super(LogReg, self).__init__()
self.linear = nn.Linear(in_features=hid_dim, out_features=out_dim,W_init=tlx.initializers.xavier_uniform(), b_init=tlx.initializers.zeros())

def forward(self, x):
# 前向传递
return self.linear(x)

def node_evaluation(emb, y, train_idx, valid_idx, test_idx, lr=1e-2, weight_decay=1e-4):

nclass = y.max().item() + 1
train_idx, valid_idx, test_idx, y = train_idx, valid_idx, test_idx, y
logreg = LogReg(hid_dim=emb.shape[1], out_dim=nclass)
opt = tlx.optimizers.Adam(lr=lr, weight_decay=weight_decay)
train_weights = logreg.trainable_weights
loss = tlx.losses.softmax_cross_entropy_with_logits
loss_func = SemiSpvzLoss(logreg, loss)
train_one_step = TrainOneStep(loss_func, opt,train_weights)

data = {
'x': emb,
'y': y,
'train_idx':train_idx,
'valid_idx':valid_idx,
'test_idx':test_idx
}
best_val_acc = 0
eval_acc = 0
pred = None

for epoch in range(2000):
logreg.set_train()
loss = train_one_step(data=data, label=y)
logreg.set_eval()
if valid_idx.size(0) != 0:
val_logits = logreg(emb[valid_idx])
val_preds = tlx.argmax(val_logits, axis=1)
val_acc = tlx.reduce_sum(val_preds == y[valid_idx]).float() / valid_idx.size(0)
else:
train_logits = logreg(emb[train_idx])
train_preds = tlx.argmax(train_logits, axis=1)
train_acc = tlx.reduce_sum(train_preds == y[train_idx]).float() / train_idx.size(0)
val_acc = train_acc

test_logits = logreg(emb[test_idx])
test_preds = tlx.argmax(test_logits, axis=1)
test_acc = tlx.reduce_sum(test_preds == y[test_idx]).float() / test_idx.size(0)

if val_acc >= best_val_acc:
best_val_acc = val_acc
if test_acc > eval_acc:
eval_acc = test_acc
pred = test_preds

return eval_acc, pred


140 changes: 140 additions & 0 deletions examples/sp2_gcl/node_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import os
from tensorlayerx.model import WithLoss, TrainOneStep
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import time
import argparse
from gammagl.utils import split
from evaluation_test import node_evaluation
from gammagl.models import EigenMLP, SpaSpeNode, Encoder
from scipy.sparse import csr_matrix
import numpy as np
import scipy.sparse.linalg
import tensorlayerx as tlx
from gammagl.utils import to_scipy_sparse_matrix
import networkx as nx
from gammagl.datasets import FacebookPagePage

def connected_components(sparse_adj):
G = nx.from_scipy_sparse_array(sparse_adj)
cc = nx.connected_components(G)

components = []
lens = []

for c in cc:
c = list(c)
components.append(c)
lens.append(len(c))

return lens, components
def compute_laplacian(data):

edge_index = data.edge_index
num_nodes = data.num_nodes
row, col = edge_index
data_adj = csr_matrix((np.ones(len(row)), (row, col)), shape=(num_nodes, num_nodes))
degree = np.array(data_adj.sum(axis=1)).flatten()
deg_inv_sqrt = 1.0 / np.sqrt(degree)
deg_inv_sqrt[np.isinf(deg_inv_sqrt)] = 0
I = csr_matrix(np.eye(num_nodes))
D_inv_sqrt = csr_matrix((deg_inv_sqrt, (np.arange(num_nodes), np.arange(num_nodes))))
L = I - D_inv_sqrt.dot(data_adj).dot(D_inv_sqrt)
e, u = scipy.sparse.linalg.eigsh(L, k=100, which='SM', tol=1e-3)
adj = to_scipy_sparse_matrix(data.edge_index)
lens, components = connected_components(adj)
data.e = tlx.convert_to_tensor(e, dtype=tlx.float32)
data.u = tlx.convert_to_tensor(u, dtype=tlx.float32)

return data, lens, components


class ContrastiveLoss(WithLoss):
def __init__(self, model, temp=1.0):
super(ContrastiveLoss, self).__init__(backbone=model, loss_fn=None)
self.temp = temp

def forward(self, data, label):
h_node_spa, h_node_spe = self.backbone_network(data['x'], data['edge_index'], data['e'], data['u'])
h1 = tlx.l2_normalize(h_node_spa, axis=-1, eps=1e-12)
h2 = tlx.l2_normalize(h_node_spe, axis=-1, eps=1e-12)
logits = tlx.matmul(h1, h2.transpose(-2, -1)) / self.temp
labels = tlx.arange(start=0, limit=h1.shape[0], delta=1, dtype=tlx.int64)
loss = 0.5 * tlx.losses.softmax_cross_entropy_with_logits(logits, labels) + 0.5 * tlx.losses.softmax_cross_entropy_with_logits(logits.transpose(-2, -1), labels)
return loss
def main(args):
global edge, e, u, test_idx
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this line doing?

print(args.dataset)
if args.dataset in ['pubmed-3', 'flickr', 'arxiv', 'wiki', 'facebook']:
dataset = FacebookPagePage(root='data/facebook')
data = dataset[0]
data, lens, components = compute_laplacian(data)
x = tlx.convert_to_tensor(data.x, dtype=tlx.float32)
edge = tlx.convert_to_tensor(data.edge_index, dtype=tlx.int64)
e = tlx.convert_to_tensor(data.e[:args.spe_dim], dtype=tlx.float32)
u = tlx.convert_to_tensor(data.u[:, :args.spe_dim], dtype=tlx.float32)
y = tlx.convert_to_tensor(data.y)
print(y.min().item(), y.max().item())
if 'train_mask' in data.keys:
if len(data.train_mask.size()) > 1:
train_idx = tlx.where(data.train_mask[:, args.seed])[0]
val_idx = tlx.where(data.val_mask[:, args.seed])[0]
test_idx = tlx.where(data.test_mask)[0]
else:
train_idx = tlx.where(data.train_mask)[0]
val_idx = tlx.where(data.val_mask)[0]
test_idx = tlx.where(data.test_mask)[0]
else:
train_idx, val_idx, test_idx = split(y)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think this is useful. Usually, the train, valid, test split should be done in the dataset. You may directly use data.train_mask .etc to get the idx instead of add a new function in the util.


else:
pass

print('test_idx:',len(test_idx))
spa_encoder = Encoder(x.size(1), args.hidden_dim, args.hidden_dim)
spe_encoder = EigenMLP(args.spe_dim, args.hidden_dim, args.hidden_dim, args.period)
model = SpaSpeNode(spa_encoder, spe_encoder, hidden_dim=args.hidden_dim, t=args.t)
optimizer = tlx.optimizers.Adam(lr=args.lr, weight_decay=args.weight_decay)
train_weights = model.trainable_weights
loss_func = ContrastiveLoss(model, temp=args.t)
train_one_step = TrainOneStep(loss_func, optimizer, train_weights)

data_all = {
'x': data.x,
'edge_index': data.edge_index,
'e': data.e,
'u': data.u,
}

t1 = time.time()
for i in range(1000):
model.set_eval()
spe_emb = model.spe_encoder(e, u).detach()
t2 = time.time()
print("t2-t1:",t2 - t1)

for idx in range(100):
model.set_train()
loss = train_one_step(data=data_all, label=data.y)
if (idx+1) % 10 == 0:
model.set_eval()
spa_emb = model.spa_encoder(x, edge).detach()
spe_emb = model.spe_encoder(e, u).detach()
acc, pred = node_evaluation((spa_emb + spe_emb)/2, y, train_idx, val_idx, test_idx)
print(acc)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=0)
parser.add_argument('--cuda', type=int, default=3)
parser.add_argument('--dataset', default='wiki')
parser.add_argument('--spe_dim', type=int, default=100)
parser.add_argument('--period', type=int, default=20)
parser.add_argument('--hidden_dim', type=int, default=512)
parser.add_argument('--t', type=float, default=1.0)
parser.add_argument('--lr', type=float, default=1e-3)
parser.add_argument('--weight_decay', type=float, default=0)
args = parser.parse_args()
print(args)
main(args)
40 changes: 40 additions & 0 deletions examples/sp2_gcl/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
@ -0,0 +1,40 @@
# Graph Contrastive Learning with Stable and Scalable

- Paper link: [https://proceedings.neurips.cc/paper_files/paper/2023/file/8e9a6582caa59fda0302349702965171-Paper-Conference.pdf](https://arxiv.org/abs/2201.11349)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

链接不对

- Author's code repo: [https://github.com/bdy9527/Sp2GCL](https://github.com/TaurusTaurus-Rui/DR-GST).

# Dataset Statics

| Dataset | # Nodes | # Edges | # Classes |
|----------|---------|----------|-----------|
| PubMed | 19,717 | 88,648 | 3 |
| Wiki-CS | 11,701 | 216,123 | 10 |
| Facebook | 22,470 | 342,004 | 4 |
| Flickr | 89,250 | 899,756 | 7 |
| PPI | 56,928 | 1,226,368| 121 |



Results
-------

```bash

TL_BACKEND="torch" python node_main.py --dataset Facebook
TL_BACKEND="torch" python node_main.py --dataset PubMed
TL_BACKEND="torch" python node_main.py --dataset Wiki-CS
TL_BACKEND="torch" python node_batch.py --dataset Flickr
TL_BACKEND="torch" python node_batch.py --dataset PPI
```


# Dataset Statics

| Dataset | Paper Code | Out(th) |
|----------|------|------------|
| PubMed | 82.3±0.3 | OOM |
| Wiki-CS | 79.42±0.19 | 78.60±0.14 |
| Facebook | 90.43±0.13 | 85.35±0.09 |
| PPI | 74.28±0.22 | 79.30±0.12 |
| Flickr | 52.05±0.33 | 52.09±0.28 |
4 changes: 3 additions & 1 deletion gammagl/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .wikics import WikiCS
from .blogcatalog import BlogCatalog
from .molecule_net import MoleculeNet
from .facebook import FacebookPagePage

__all__ = [
'Amazon',
Expand All @@ -40,7 +41,8 @@
'AMiner',
'PolBlogs',
'WikiCS',
'MoleculeNet'
'MoleculeNet',
'FacebookPagePage'
]

classes = __all__
67 changes: 67 additions & 0 deletions gammagl/datasets/facebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from typing import Callable, Optional
import os
import numpy as np
import tensorlayerx as tlx

from gammagl.data import Graph, InMemoryDataset, download_url

class FacebookPagePage(InMemoryDataset):
r"""The Facebook Page-Page network dataset introduced in the
`"Multi-scale Attributed Node Embedding"
<https://arxiv.org/abs/1909.13021>`_ paper.
Nodes represent verified pages on Facebook and edges are mutual likes.
It contains 22,470 nodes, 342,004 edges, 128 node features and 4 classes.

Args:
root (str): Root directory where the dataset should be saved.
transform (callable, optional): A function/transform that takes in an
:obj:`gammagl.data.Graph` object and returns a transformed
version. The data object will be transformed before every access.
(default: :obj:`None`)
pre_transform (callable, optional): A function/transform that takes in
an :obj:`gammagl.data.Graph` object and returns a
transformed version. The data object will be transformed before
being saved to disk. (default: :obj:`None`)
force_reload (bool, optional): Whether to re-process the dataset.
(default: :obj:`False`)
"""

url = 'https://graphmining.ai/datasets/ptg/facebook.npz'

def __init__(
self,
root: str,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, this argument can be optional, as we have a cached mechanism compared to PyG.

transform: Optional[Callable] = None,
pre_transform: Optional[Callable] = None,
force_reload: bool = False,
) -> None:
super().__init__(root, transform, pre_transform, force_reload=force_reload)
self.data, self.slices=self.load_data(self.processed_paths[0])

@property
def raw_file_names(self) -> str:
return 'facebook.npz'

@property
def processed_file_names(self) -> str:
return tlx.BACKEND + '_data.pt'

def download(self) -> None:
download_url(self.url, self.raw_dir)

def process(self) -> None:
data = np.load(self.raw_paths[0], 'r', allow_pickle=True)
x = tlx.convert_to_tensor(data['features'], dtype=tlx.float32)
y = tlx.convert_to_tensor(data['target'], dtype=tlx.int64)
edge_index = tlx.convert_to_tensor(data['edges'], dtype=tlx.int64)
edge_index = edge_index.T
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tried if this can work in the other backend like 'mindspore'?


data = Graph(x=x, edge_index=edge_index, y=y)

if self.pre_transform is not None:
data = self.pre_transform(data)

self.save_data(self.collate([data]), self.processed_paths[0])



6 changes: 5 additions & 1 deletion gammagl/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from .sfgcn import SFGCNModel
from .grace_spco import Grace_Spco_Encoder, Grace_Spco_Model
from .graphormer import Graphormer
from .sp2gcl import SpaSpeNode,Encoder,EigenMLP

__all__ = [
'GCNModel',
Expand Down Expand Up @@ -107,7 +108,10 @@
'GGDModel',
'Specformer',
'SFGCNModel',
'Graphormer'
'Graphormer',
'Encoder',
'EigenMLP',
'SpaSpeNode'
]

classes = __all__
Loading