-
Notifications
You must be signed in to change notification settings - Fork 78
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Model & Dataset] facebook & sp2gcl #201
Changes from 5 commits
08b2faa
7810d58
85dd253
68bc722
04e1400
4a477d6
d321bca
7a69f51
e50e2e8
d9b6d60
d41db9c
94ab090
d4fb492
ba9a242
d731cdd
58e55a0
b8146d6
4a14c22
26d1f1b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
|
||
import os | ||
from tensorlayerx.model import TrainOneStep, WithLoss | ||
os.environ['CUDA_VISIBLE_DEVICES'] = '0' | ||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | ||
import tensorlayerx as tlx | ||
import tensorlayerx.nn as nn | ||
|
||
|
||
|
||
class SemiSpvzLoss(WithLoss): | ||
def __init__(self, net, loss_fn): | ||
super(SemiSpvzLoss, self).__init__(backbone=net, loss_fn=loss_fn) | ||
|
||
def forward(self, data, label): | ||
logits = self.backbone_network(data['x']) | ||
train_logits = tlx.gather(logits, data['train_idx']) | ||
train_y = tlx.gather(data['y'], data['train_idx']) | ||
loss = self._loss_fn(train_logits, train_y) | ||
return loss | ||
|
||
class LogReg(nn.Module): | ||
def __init__(self, hid_dim, out_dim): | ||
super(LogReg, self).__init__() | ||
self.linear = nn.Linear(in_features=hid_dim, out_features=out_dim,W_init=tlx.initializers.xavier_uniform(), b_init=tlx.initializers.zeros()) | ||
|
||
def forward(self, x): | ||
# 前向传递 | ||
return self.linear(x) | ||
|
||
def node_evaluation(emb, y, train_idx, valid_idx, test_idx, lr=1e-2, weight_decay=1e-4): | ||
|
||
nclass = y.max().item() + 1 | ||
train_idx, valid_idx, test_idx, y = train_idx, valid_idx, test_idx, y | ||
logreg = LogReg(hid_dim=emb.shape[1], out_dim=nclass) | ||
opt = tlx.optimizers.Adam(lr=lr, weight_decay=weight_decay) | ||
train_weights = logreg.trainable_weights | ||
loss = tlx.losses.softmax_cross_entropy_with_logits | ||
loss_func = SemiSpvzLoss(logreg, loss) | ||
train_one_step = TrainOneStep(loss_func, opt,train_weights) | ||
|
||
data = { | ||
'x': emb, | ||
'y': y, | ||
'train_idx':train_idx, | ||
'valid_idx':valid_idx, | ||
'test_idx':test_idx | ||
} | ||
best_val_acc = 0 | ||
eval_acc = 0 | ||
pred = None | ||
|
||
for epoch in range(2000): | ||
logreg.set_train() | ||
loss = train_one_step(data=data, label=y) | ||
logreg.set_eval() | ||
if valid_idx.size(0) != 0: | ||
val_logits = logreg(emb[valid_idx]) | ||
val_preds = tlx.argmax(val_logits, axis=1) | ||
val_acc = tlx.reduce_sum(val_preds == y[valid_idx]).float() / valid_idx.size(0) | ||
else: | ||
train_logits = logreg(emb[train_idx]) | ||
train_preds = tlx.argmax(train_logits, axis=1) | ||
train_acc = tlx.reduce_sum(train_preds == y[train_idx]).float() / train_idx.size(0) | ||
val_acc = train_acc | ||
|
||
test_logits = logreg(emb[test_idx]) | ||
test_preds = tlx.argmax(test_logits, axis=1) | ||
test_acc = tlx.reduce_sum(test_preds == y[test_idx]).float() / test_idx.size(0) | ||
|
||
if val_acc >= best_val_acc: | ||
best_val_acc = val_acc | ||
if test_acc > eval_acc: | ||
eval_acc = test_acc | ||
pred = test_preds | ||
|
||
return eval_acc, pred | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import os | ||
from tensorlayerx.model import WithLoss, TrainOneStep | ||
os.environ['CUDA_VISIBLE_DEVICES'] = '0' | ||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | ||
import time | ||
import argparse | ||
from gammagl.utils import split | ||
from evaluation_test import node_evaluation | ||
from gammagl.models import EigenMLP, SpaSpeNode, Encoder | ||
from scipy.sparse import csr_matrix | ||
import numpy as np | ||
import scipy.sparse.linalg | ||
import tensorlayerx as tlx | ||
from gammagl.utils import to_scipy_sparse_matrix | ||
import networkx as nx | ||
from gammagl.datasets import FacebookPagePage | ||
|
||
def connected_components(sparse_adj): | ||
G = nx.from_scipy_sparse_array(sparse_adj) | ||
cc = nx.connected_components(G) | ||
|
||
components = [] | ||
lens = [] | ||
|
||
for c in cc: | ||
c = list(c) | ||
components.append(c) | ||
lens.append(len(c)) | ||
|
||
return lens, components | ||
def compute_laplacian(data): | ||
|
||
edge_index = data.edge_index | ||
num_nodes = data.num_nodes | ||
row, col = edge_index | ||
data_adj = csr_matrix((np.ones(len(row)), (row, col)), shape=(num_nodes, num_nodes)) | ||
degree = np.array(data_adj.sum(axis=1)).flatten() | ||
deg_inv_sqrt = 1.0 / np.sqrt(degree) | ||
deg_inv_sqrt[np.isinf(deg_inv_sqrt)] = 0 | ||
I = csr_matrix(np.eye(num_nodes)) | ||
D_inv_sqrt = csr_matrix((deg_inv_sqrt, (np.arange(num_nodes), np.arange(num_nodes)))) | ||
L = I - D_inv_sqrt.dot(data_adj).dot(D_inv_sqrt) | ||
e, u = scipy.sparse.linalg.eigsh(L, k=100, which='SM', tol=1e-3) | ||
adj = to_scipy_sparse_matrix(data.edge_index) | ||
lens, components = connected_components(adj) | ||
data.e = tlx.convert_to_tensor(e, dtype=tlx.float32) | ||
data.u = tlx.convert_to_tensor(u, dtype=tlx.float32) | ||
|
||
return data, lens, components | ||
|
||
|
||
class ContrastiveLoss(WithLoss): | ||
def __init__(self, model, temp=1.0): | ||
super(ContrastiveLoss, self).__init__(backbone=model, loss_fn=None) | ||
self.temp = temp | ||
|
||
def forward(self, data, label): | ||
h_node_spa, h_node_spe = self.backbone_network(data['x'], data['edge_index'], data['e'], data['u']) | ||
h1 = tlx.l2_normalize(h_node_spa, axis=-1, eps=1e-12) | ||
h2 = tlx.l2_normalize(h_node_spe, axis=-1, eps=1e-12) | ||
logits = tlx.matmul(h1, h2.transpose(-2, -1)) / self.temp | ||
labels = tlx.arange(start=0, limit=h1.shape[0], delta=1, dtype=tlx.int64) | ||
loss = 0.5 * tlx.losses.softmax_cross_entropy_with_logits(logits, labels) + 0.5 * tlx.losses.softmax_cross_entropy_with_logits(logits.transpose(-2, -1), labels) | ||
return loss | ||
def main(args): | ||
global edge, e, u, test_idx | ||
print(args.dataset) | ||
if args.dataset in ['pubmed-3', 'flickr', 'arxiv', 'wiki', 'facebook']: | ||
dataset = FacebookPagePage(root='data/facebook') | ||
data = dataset[0] | ||
data, lens, components = compute_laplacian(data) | ||
x = tlx.convert_to_tensor(data.x, dtype=tlx.float32) | ||
edge = tlx.convert_to_tensor(data.edge_index, dtype=tlx.int64) | ||
e = tlx.convert_to_tensor(data.e[:args.spe_dim], dtype=tlx.float32) | ||
u = tlx.convert_to_tensor(data.u[:, :args.spe_dim], dtype=tlx.float32) | ||
y = tlx.convert_to_tensor(data.y) | ||
print(y.min().item(), y.max().item()) | ||
if 'train_mask' in data.keys: | ||
if len(data.train_mask.size()) > 1: | ||
train_idx = tlx.where(data.train_mask[:, args.seed])[0] | ||
val_idx = tlx.where(data.val_mask[:, args.seed])[0] | ||
test_idx = tlx.where(data.test_mask)[0] | ||
else: | ||
train_idx = tlx.where(data.train_mask)[0] | ||
val_idx = tlx.where(data.val_mask)[0] | ||
test_idx = tlx.where(data.test_mask)[0] | ||
else: | ||
train_idx, val_idx, test_idx = split(y) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not think this is useful. Usually, the train, valid, test split should be done in the dataset. You may directly use |
||
|
||
else: | ||
pass | ||
|
||
print('test_idx:',len(test_idx)) | ||
spa_encoder = Encoder(x.size(1), args.hidden_dim, args.hidden_dim) | ||
spe_encoder = EigenMLP(args.spe_dim, args.hidden_dim, args.hidden_dim, args.period) | ||
model = SpaSpeNode(spa_encoder, spe_encoder, hidden_dim=args.hidden_dim, t=args.t) | ||
optimizer = tlx.optimizers.Adam(lr=args.lr, weight_decay=args.weight_decay) | ||
train_weights = model.trainable_weights | ||
loss_func = ContrastiveLoss(model, temp=args.t) | ||
train_one_step = TrainOneStep(loss_func, optimizer, train_weights) | ||
|
||
data_all = { | ||
'x': data.x, | ||
'edge_index': data.edge_index, | ||
'e': data.e, | ||
'u': data.u, | ||
} | ||
|
||
t1 = time.time() | ||
for i in range(1000): | ||
model.set_eval() | ||
spe_emb = model.spe_encoder(e, u).detach() | ||
t2 = time.time() | ||
print("t2-t1:",t2 - t1) | ||
|
||
for idx in range(100): | ||
model.set_train() | ||
loss = train_one_step(data=data_all, label=data.y) | ||
if (idx+1) % 10 == 0: | ||
model.set_eval() | ||
spa_emb = model.spa_encoder(x, edge).detach() | ||
spe_emb = model.spe_encoder(e, u).detach() | ||
acc, pred = node_evaluation((spa_emb + spe_emb)/2, y, train_idx, val_idx, test_idx) | ||
print(acc) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--seed', type=int, default=0) | ||
parser.add_argument('--cuda', type=int, default=3) | ||
parser.add_argument('--dataset', default='wiki') | ||
parser.add_argument('--spe_dim', type=int, default=100) | ||
parser.add_argument('--period', type=int, default=20) | ||
parser.add_argument('--hidden_dim', type=int, default=512) | ||
parser.add_argument('--t', type=float, default=1.0) | ||
parser.add_argument('--lr', type=float, default=1e-3) | ||
parser.add_argument('--weight_decay', type=float, default=0) | ||
args = parser.parse_args() | ||
print(args) | ||
main(args) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
@ -0,0 +1,40 @@ | ||
# Graph Contrastive Learning with Stable and Scalable | ||
|
||
- Paper link: [https://proceedings.neurips.cc/paper_files/paper/2023/file/8e9a6582caa59fda0302349702965171-Paper-Conference.pdf](https://arxiv.org/abs/2201.11349) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 链接不对 |
||
- Author's code repo: [https://github.com/bdy9527/Sp2GCL](https://github.com/TaurusTaurus-Rui/DR-GST). | ||
|
||
# Dataset Statics | ||
|
||
| Dataset | # Nodes | # Edges | # Classes | | ||
|----------|---------|----------|-----------| | ||
| PubMed | 19,717 | 88,648 | 3 | | ||
| Wiki-CS | 11,701 | 216,123 | 10 | | ||
| Facebook | 22,470 | 342,004 | 4 | | ||
| Flickr | 89,250 | 899,756 | 7 | | ||
| PPI | 56,928 | 1,226,368| 121 | | ||
|
||
|
||
|
||
Results | ||
------- | ||
|
||
```bash | ||
|
||
TL_BACKEND="torch" python node_main.py --dataset Facebook | ||
TL_BACKEND="torch" python node_main.py --dataset PubMed | ||
TL_BACKEND="torch" python node_main.py --dataset Wiki-CS | ||
TL_BACKEND="torch" python node_batch.py --dataset Flickr | ||
TL_BACKEND="torch" python node_batch.py --dataset PPI | ||
``` | ||
|
||
|
||
# Dataset Statics | ||
|
||
| Dataset | Paper Code | Out(th) | | ||
|----------|------|------------| | ||
| PubMed | 82.3±0.3 | OOM | | ||
| Wiki-CS | 79.42±0.19 | 78.60±0.14 | | ||
| Facebook | 90.43±0.13 | 85.35±0.09 | | ||
| PPI | 74.28±0.22 | 79.30±0.12 | | ||
| Flickr | 52.05±0.33 | 52.09±0.28 | |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
from typing import Callable, Optional | ||
import os | ||
import numpy as np | ||
import tensorlayerx as tlx | ||
|
||
from gammagl.data import Graph, InMemoryDataset, download_url | ||
|
||
class FacebookPagePage(InMemoryDataset): | ||
r"""The Facebook Page-Page network dataset introduced in the | ||
`"Multi-scale Attributed Node Embedding" | ||
<https://arxiv.org/abs/1909.13021>`_ paper. | ||
Nodes represent verified pages on Facebook and edges are mutual likes. | ||
It contains 22,470 nodes, 342,004 edges, 128 node features and 4 classes. | ||
|
||
Args: | ||
root (str): Root directory where the dataset should be saved. | ||
transform (callable, optional): A function/transform that takes in an | ||
:obj:`gammagl.data.Graph` object and returns a transformed | ||
version. The data object will be transformed before every access. | ||
(default: :obj:`None`) | ||
pre_transform (callable, optional): A function/transform that takes in | ||
an :obj:`gammagl.data.Graph` object and returns a | ||
transformed version. The data object will be transformed before | ||
being saved to disk. (default: :obj:`None`) | ||
force_reload (bool, optional): Whether to re-process the dataset. | ||
(default: :obj:`False`) | ||
""" | ||
|
||
url = 'https://graphmining.ai/datasets/ptg/facebook.npz' | ||
|
||
def __init__( | ||
self, | ||
root: str, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, this argument can be optional, as we have a cached mechanism compared to |
||
transform: Optional[Callable] = None, | ||
pre_transform: Optional[Callable] = None, | ||
force_reload: bool = False, | ||
) -> None: | ||
super().__init__(root, transform, pre_transform, force_reload=force_reload) | ||
self.data, self.slices=self.load_data(self.processed_paths[0]) | ||
|
||
@property | ||
def raw_file_names(self) -> str: | ||
return 'facebook.npz' | ||
|
||
@property | ||
def processed_file_names(self) -> str: | ||
return tlx.BACKEND + '_data.pt' | ||
|
||
def download(self) -> None: | ||
download_url(self.url, self.raw_dir) | ||
|
||
def process(self) -> None: | ||
data = np.load(self.raw_paths[0], 'r', allow_pickle=True) | ||
x = tlx.convert_to_tensor(data['features'], dtype=tlx.float32) | ||
y = tlx.convert_to_tensor(data['target'], dtype=tlx.int64) | ||
edge_index = tlx.convert_to_tensor(data['edges'], dtype=tlx.int64) | ||
edge_index = edge_index.T | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you tried if this can work in the other backend like 'mindspore'? |
||
|
||
data = Graph(x=x, edge_index=edge_index, y=y) | ||
|
||
if self.pre_transform is not None: | ||
data = self.pre_transform(data) | ||
|
||
self.save_data(self.collate([data]), self.processed_paths[0]) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this line doing?