Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Model & Dataset] facebook & sp2gcl #201

Merged
merged 19 commits into from
Jul 5, 2024
79 changes: 79 additions & 0 deletions examples/sp2_gcl/evaluation_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

import os
from tensorlayerx.model import TrainOneStep, WithLoss
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorlayerx as tlx
import tensorlayerx.nn as nn



class SemiSpvzLoss(WithLoss):
def __init__(self, net, loss_fn):
super(SemiSpvzLoss, self).__init__(backbone=net, loss_fn=loss_fn)

def forward(self, data, label):
logits = self.backbone_network(data['x'])
train_logits = tlx.gather(logits, data['train_idx'])
train_y = tlx.gather(data['y'], data['train_idx'])
loss = self._loss_fn(train_logits, train_y)
return loss

class LogReg(nn.Module):
def __init__(self, hid_dim, out_dim):
super(LogReg, self).__init__()
self.linear = nn.Linear(in_features=hid_dim, out_features=out_dim,W_init=tlx.initializers.xavier_uniform(), b_init=tlx.initializers.zeros())

def forward(self, x):
return self.linear(x)


def node_evaluation(emb, y, train_idx, valid_idx, test_idx, lr=1e-2, weight_decay=1e-4):

nclass = y.max().item() + 1
train_idx, valid_idx, test_idx, y = train_idx, valid_idx, test_idx, y
logreg = LogReg(hid_dim=emb.shape[1], out_dim=nclass)
opt = tlx.optimizers.Adam(lr=lr, weight_decay=weight_decay)
train_weights = logreg.trainable_weights
loss = tlx.losses.softmax_cross_entropy_with_logits
loss_func = SemiSpvzLoss(logreg, loss)
train_one_step = TrainOneStep(loss_func, opt,train_weights)

data = {
'x': emb,
'y': y,
'train_idx':train_idx,
'valid_idx':valid_idx,
'test_idx':test_idx
}
best_val_acc = 0
eval_acc = 0
pred = None

for epoch in range(2000):
logreg.set_train()
loss = train_one_step(data=data, label=y)
logreg.set_eval()
if valid_idx.size(0) != 0:
val_logits = logreg(emb[valid_idx])
val_preds = tlx.argmax(val_logits, axis=1)
val_acc = tlx.reduce_sum(val_preds == y[valid_idx]).float() / valid_idx.size(0)
else:
train_logits = logreg(emb[train_idx])
train_preds = tlx.argmax(train_logits, axis=1)
train_acc = tlx.reduce_sum(train_preds == y[train_idx]).float() / train_idx.size(0)
val_acc = train_acc

test_logits = logreg(emb[test_idx])
test_preds = tlx.argmax(test_logits, axis=1)
test_acc = tlx.reduce_sum(test_preds == y[test_idx]).float() / test_idx.size(0)

if val_acc >= best_val_acc:
best_val_acc = val_acc
if test_acc > eval_acc:
eval_acc = test_acc
pred = test_preds

return eval_acc, pred


34 changes: 34 additions & 0 deletions examples/sp2_gcl/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
@ -0,0 +1,40 @@
# Graph Contrastive Learning with Stable and Scalable

- Paper link: [https://proceedings.neurips.cc/paper_files/paper/2023/file/8e9a6582caa59fda0302349702965171-Paper-Conference.pdf](https://arxiv.org/abs/2201.11349)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

链接不对

- Author's code repo: [https://github.com/bdy9527/Sp2GCL](https://github.com/TaurusTaurus-Rui/DR-GST).

# Dataset Statics

| Dataset | # Nodes | # Edges | # Classes |
|----------|---------|----------|-----------|
| PubMed | 19,717 | 88,648 | 3 |
| Wiki-CS | 11,701 | 216,123 | 10 |
| Facebook | 22,470 | 342,004 | 4 |



Results
-------

```bash

TL_BACKEND="torch" python sp2gcl_trainer.py --dataset facebook
TL_BACKEND="torch" python sp2gcl_trainer.py --dataset wikics
TL_BACKEND="torch" python sp2gcl_trainer.py --dataset pubmed
```


# Dataset Statics

| Dataset | Paper Code | Out(th) |
|----------|------|------------|
| PubMed | 82.3±0.3 | OOM |
| Wiki-CS | 79.42±0.19 | 76.79 ± 0.61 |
| Facebook | 90.43±0.13 | 85.35±0.26 |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

调参

144 changes: 144 additions & 0 deletions examples/sp2_gcl/sp2gcl_trainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import os
# os.environ['TL_BACKEND'] = 'torch'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from gammagl.utils import mask_to_index
from tensorlayerx.model import WithLoss, TrainOneStep
import argparse
from evaluation_test import node_evaluation
from gammagl.models import EigenMLP, SpaSpeNode, Encoder
from scipy.sparse import csr_matrix
import numpy as np
import scipy.sparse.linalg
import tensorlayerx as tlx
from gammagl.datasets import FacebookPagePage,WikiCS,Planetoid


def split(node_labels):

y = node_labels
train_ratio = 0.1
val_ratio = 0.1
test_ratio = 0.8

N = len(y)
train_num = int(N * train_ratio)
val_num = int(N * (train_ratio + val_ratio))

idx = np.arange(N)
np.random.shuffle(idx)

train_idx = idx[:train_num]
val_idx = idx[train_num:val_num]
test_idx = idx[val_num:]

train_idx = tlx.convert_to_tensor(train_idx)
val_idx = tlx.convert_to_tensor(val_idx)
test_idx = tlx.convert_to_tensor(test_idx)

return train_idx, val_idx, test_idx


def compute_laplacian(data):

edge_index = data.edge_index
num_nodes = data.num_nodes
row, col = edge_index
data_adj = csr_matrix((np.ones(len(row)), (row, col)), shape=(num_nodes, num_nodes))
degree = np.array(data_adj.sum(axis=1)).flatten()
deg_inv_sqrt = 1.0 / np.sqrt(degree)
deg_inv_sqrt[np.isinf(deg_inv_sqrt)] = 0
I = csr_matrix(np.eye(num_nodes))
D_inv_sqrt = csr_matrix((deg_inv_sqrt, (np.arange(num_nodes), np.arange(num_nodes))))
L = I - D_inv_sqrt.dot(data_adj).dot(D_inv_sqrt)
e, u = scipy.sparse.linalg.eigsh(L, k=100, which='SM', tol=1e-3)
data.e = tlx.convert_to_tensor(e, dtype=tlx.float32)
data.u = tlx.convert_to_tensor(u, dtype=tlx.float32)

return data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

试试用 get_laplacian 接口替换



class ContrastiveLoss(WithLoss):
def __init__(self, model, temp=1.0):
super(ContrastiveLoss, self).__init__(backbone=model, loss_fn=None)
self.temp = temp

def forward(self, data, label):
h_node_spa, h_node_spe = self.backbone_network(data['x'], data['edge_index'], data['e'], data['u'])
h1 = tlx.l2_normalize(h_node_spa, axis=-1, eps=1e-12)
h2 = tlx.l2_normalize(h_node_spe, axis=-1, eps=1e-12)
logits = tlx.matmul(h1, tlx.transpose(h2, perm=(1, 0))) / self.temp
labels = tlx.arange(start=0, limit=h1.shape[0], delta=1, dtype=tlx.int64)
loss = 0.5 * tlx.losses.softmax_cross_entropy_with_logits(logits, labels) + 0.5 * tlx.losses.softmax_cross_entropy_with_logits(logits.transpose(-2, -1), labels)
return loss

def main(args):
if args.dataset in ['pubmed', 'wikics', 'facebook']:
if args.dataset == 'facebook':
dataset = FacebookPagePage(root='data/facebook')
elif args.dataset == 'wikics':
dataset = WikiCS(root='data/wikics')
elif args.dataset == 'pubmed':
dataset = dataset = Planetoid(root='', name='pubmed')
data = dataset[0]
data = compute_laplacian(data)
x = tlx.convert_to_tensor(data.x, dtype=tlx.float32)
edge = tlx.convert_to_tensor(data.edge_index, dtype=tlx.int64)
e = tlx.convert_to_tensor(data.e[:args.spe_dim], dtype=tlx.float32)
u = tlx.convert_to_tensor(data.u[:, :args.spe_dim], dtype=tlx.float32)
y = tlx.convert_to_tensor(data.y)
if 'train_mask' in data.keys:
if len(data.train_mask.size()) > 1:
train_idx = mask_to_index(data.train_mask)
test_idx = mask_to_index(data.test_mask)
val_idx = mask_to_index(data.val_mask)
else:
train_idx = tlx.where(data.train_mask)[0]
val_idx = tlx.where(data.val_mask)[0]
test_idx = tlx.where(data.test_mask)[0]
else:
train_idx, val_idx, test_idx = split(y)

else:
pass

spa_encoder = Encoder(x.shape[1], args.hidden_dim, args.hidden_dim)
spe_encoder = EigenMLP(args.spe_dim, args.hidden_dim, args.hidden_dim, args.period)
model = SpaSpeNode(spa_encoder, spe_encoder, hidden_dim=args.hidden_dim, t=args.t)
optimizer = tlx.optimizers.Adam(lr=args.lr, weight_decay=args.weight_decay)
train_weights = model.trainable_weights
loss_func = ContrastiveLoss(model, temp=args.t)
train_one_step = TrainOneStep(loss_func, optimizer, train_weights)

data_all = {
'x': data.x,
'edge_index': data.edge_index,
'e': data.e,
'u': data.u,
}

for epoch in range(args.num_epochs):
model.set_train()
loss = train_one_step(data=data_all, label=data.y)
if (epoch + 1) % 10 == 0:
model.set_eval()
spa_emb = tlx.detach(model.spa_encoder(x, edge))
spe_emb = tlx.detach(model.spe_encoder(e, u))
acc, pred = node_evaluation((spa_emb + spe_emb)/2, y, train_idx, val_idx, test_idx)
print(f'Epoch {epoch+1}/{args.num_epochs}, Accuracy: {acc}')


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=0)
parser.add_argument('--cuda', type=int, default=3)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seed去点,设置device参考其他trainer写法

parser.add_argument('--dataset', default='facebook')
parser.add_argument('--spe_dim', type=int, default=100)
parser.add_argument('--period', type=int, default=20)
parser.add_argument('--hidden_dim', type=int, default=512)
parser.add_argument('--t', type=float, default=1.0)
parser.add_argument('--lr', type=float, default=1e-3)
parser.add_argument('--weight_decay', type=float, default=0)
parser.add_argument('--num_epochs', type=int, default=100)
args = parser.parse_args()
main(args)
4 changes: 3 additions & 1 deletion gammagl/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .wikics import WikiCS
from .blogcatalog import BlogCatalog
from .molecule_net import MoleculeNet
from .facebook import FacebookPagePage

__all__ = [
'Amazon',
Expand All @@ -40,7 +41,8 @@
'AMiner',
'PolBlogs',
'WikiCS',
'MoleculeNet'
'MoleculeNet',
'FacebookPagePage'
]

classes = __all__
67 changes: 67 additions & 0 deletions gammagl/datasets/facebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from typing import Callable, Optional
import os
import numpy as np
import tensorlayerx as tlx

from gammagl.data import Graph, InMemoryDataset, download_url

class FacebookPagePage(InMemoryDataset):
r"""The Facebook Page-Page network dataset introduced in the
`"Multi-scale Attributed Node Embedding"
<https://arxiv.org/abs/1909.13021>`_ paper.
Nodes represent verified pages on Facebook and edges are mutual likes.
It contains 22,470 nodes, 342,004 edges, 128 node features and 4 classes.

Args:
root (str): Root directory where the dataset should be saved.
transform (callable, optional): A function/transform that takes in an
:obj:`gammagl.data.Graph` object and returns a transformed
version. The data object will be transformed before every access.
(default: :obj:`None`)
pre_transform (callable, optional): A function/transform that takes in
an :obj:`gammagl.data.Graph` object and returns a
transformed version. The data object will be transformed before
being saved to disk. (default: :obj:`None`)
force_reload (bool, optional): Whether to re-process the dataset.
(default: :obj:`False`)
"""

url = 'https://graphmining.ai/datasets/ptg/facebook.npz'

def __init__(
self,
root: str,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, this argument can be optional, as we have a cached mechanism compared to PyG.

transform: Optional[Callable] = None,
pre_transform: Optional[Callable] = None,
force_reload: bool = False,
) -> None:
super().__init__(root, transform, pre_transform, force_reload=force_reload)
self.data, self.slices=self.load_data(self.processed_paths[0])

@property
def raw_file_names(self) -> str:
return 'facebook.npz'

@property
def processed_file_names(self) -> str:
return tlx.BACKEND + '_data.pt'

def download(self) -> None:
download_url(self.url, self.raw_dir)

def process(self) -> None:
data = np.load(self.raw_paths[0], 'r', allow_pickle=True)
x = tlx.convert_to_tensor(data['features'], dtype=tlx.float32)
y = tlx.convert_to_tensor(data['target'], dtype=tlx.int64)
edge_index = tlx.convert_to_tensor(data['edges'], dtype=tlx.int64)
edge_index = edge_index.T
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tried if this can work in the other backend like 'mindspore'?


data = Graph(x=x, edge_index=edge_index, y=y)

if self.pre_transform is not None:
data = self.pre_transform(data)

self.save_data(self.collate([data]), self.processed_paths[0])



6 changes: 5 additions & 1 deletion gammagl/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from .sfgcn import SFGCNModel
from .grace_spco import Grace_Spco_Encoder, Grace_Spco_Model
from .graphormer import Graphormer
from .sp2gcl import SpaSpeNode,Encoder,EigenMLP

__all__ = [
'GCNModel',
Expand Down Expand Up @@ -107,7 +108,10 @@
'GGDModel',
'Specformer',
'SFGCNModel',
'Graphormer'
'Graphormer',
'Encoder',
'EigenMLP',
'SpaSpeNode'
]

classes = __all__
Loading