forked from BUPT-GAMMA/OpenHGNN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
HGSL.py
397 lines (341 loc) · 16 KB
/
HGSL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from . import BaseModel, register_model
@register_model('HGSL')
class HGSL(BaseModel):
r"""
HGSL, Heterogeneous Graph Structure Learning from `paper <http://www.shichuan.org/doc/100.pdf>`_.
Parameters
----------
feat_dims : dict
The feature dimensions of different node types.
undirected_relations : str
The HGSL model can only handle undirected heterographs, while in the dgl.heterograph format, directed edges are
stored in two different edge types, separately and symmetrically, to represent undirected edge. Hence you have
to specify which relations are those distinct undirected relations. In this parameter, each undirected relation
is separated with a comma. For example, in a heterograph with 2 undirected relations: paper-author and
paper-subject, there are 4 type of edges stored in the dgl.heterograph: paper-author, author-paper,
paper-subject, subject-paper. Then this parameter can be "paper-author,paper-subject",
"author-paper,paper-subject", "paper-author,subject-paper" or "author-paper,subject-paper".
device: str
The GPU device to select, like 'cuda:0'.
metapaths : list
The metapath name list.
mp_emb_dim : int
The dimension of metapath embeddings from metapath2vec.
hidden_dim : int
The dimension of mapped features in the graph generating procedure.
num_heads: int
Number of heads in the K-head weighted cosine similarity function.
fs_eps : float
Threshold of feature similarity graph :math:`\epsilon^{FS}`.
fp_eps : float
Threshold of feature propagation graph :math:`\epsilon^{FP}`.
mp_eps : float
Threshold of semantic graph :math:`\epsilon^{MP}`.
gnn_emd_dim : int
The dimension of hidden layers of the downstream GNN.
gnn_dropout : float
The dropout ratio of features in the downstream GNN.
category : str
The target node type which the model will predict on.
out_dim : int
number of classes of the target node type.
Attributes
-----------
fgg_direct : nn.ModuleDict
Feature similarity graph generator(:math:`S_r^{FS}`) dict in equation 2 of paper, in which keys are
undirected-relation strs.
fgg_left: nn.ModuleDict
Feature propagation graph generator(:math:`S_r^{FH}`) dict which generates the graphs in equation 5 of paper.
fgg_right: nn.ModuleDict
Feature propagation graph generator(:math:`S_r^{FT}`) dict which generates the graphs in equation 6 of paper.
fg_agg : nn.ModuleDict
A channel attention layer, in which a layer fuses one feature similarity graph and two feature propagation
graphs generated, in equation 7 of paper.
sgg_gen : nn.ModuleDict
Semantic subgraph generator(:math:`S_{r,m}^{MP}`) dict, in equation 8 of paper.
sg_agg : nn.ModuleDict
The channel attention layer which fuses semantic subgraphs, in equation 9 of paper.
overall_g_agg : nn.ModuleDict
The channel attention layer which fuses the learned feature graph, semantic graph and the original graph.
encoder : nn.ModuleDict
The type-specific mapping layer in equation 1 of paper.
Note
----
This model under the best config has some slight differences compared with the code given by the paper author,
which seems having little impact on performance:
1. The regularization item in loss is on all parameters of the model, while in the author's code, it is only on the
generated adjacent matrix. If you want to implement the latter, a new task of OpenHGNN is needed.
2. The normalization of input adjacent matrix is separately on different adjacent matrices of different
relations, while in the author's code, it is on the entire adjacent matrix composed of adjacent matrices of all
relations.
"""
@classmethod
def build_model_from_args(cls, args, hg):
feat_dims = dict()
for ntype in hg.ntypes:
feat_dims[ntype] = hg.nodes[ntype].data['h'].shape[1]
# Extract undirected_relations
und_rels = args.undirected_relations.split(',')
undirected_relations = list()
for etype in hg.canonical_etypes:
if etype[1] in und_rels:
undirected_relations.append(etype)
device = hg.device
metapaths = list()
for feature_name in hg.nodes["paper"].data.keys():
if "m2v" in feature_name:
metapaths.append(feature_name)
mp_emb_dim = hg.nodes["paper"].data["pap_m2v_emb"].shape[1]
return cls(feat_dims=feat_dims, undirected_relations=undirected_relations, device=device, metapaths=metapaths,
mp_emb_dim=mp_emb_dim, hidden_dim=args.hidden_dim, num_heads=args.num_heads,
fs_eps=args.fs_eps, fp_eps=args.fp_eps, mp_eps=args.mp_eps,
gnn_emd_dim=args.gnn_emd_dim, gnn_dropout=args.gnn_dropout,
category=args.category, num_class=args.out_dim)
def __init__(self, feat_dims, undirected_relations, device, metapaths, mp_emb_dim, hidden_dim, num_heads, fs_eps,
fp_eps, mp_eps, gnn_emd_dim, gnn_dropout, category, num_class):
super().__init__()
self.device = device
self.ud_rels = undirected_relations
self.node_types = list(feat_dims.keys())
self.feat_dims = feat_dims
self.non_linear = nn.ReLU()
self.category = category
self.metapaths = metapaths
nnmd = nn.ModuleDict
self.fgg_direct, self.fgg_left, self.fgg_right, self.fg_agg, self.sgg_gen, self.sg_agg, self.overall_g_agg = \
nnmd({}), nnmd({}), nnmd({}), nnmd({}), nnmd({}), nnmd({}), nnmd({})
# Feature encoder
self.encoder = nnmd(
dict(zip(self.node_types, [nn.Linear(feat_dims[node_type], hidden_dim) for node_type in self.node_types])))
for canonical_etype in undirected_relations:
undirected_relation = canonical_etype[1]
# Feature Graph Generator
self.fgg_direct[undirected_relation] = GraphGenerator(hidden_dim, num_heads, fs_eps, self.device)
self.fgg_left[undirected_relation] = GraphGenerator(feat_dims[canonical_etype[0]], num_heads, fp_eps,
self.device)
self.fgg_right[undirected_relation] = GraphGenerator(feat_dims[canonical_etype[2]], num_heads, fp_eps,
self.device)
self.fg_agg[undirected_relation] = GraphChannelAttLayer(3)
# Semantic Graph Generator
self.sgg_gen[undirected_relation] = nnmd(dict(
zip(metapaths, [GraphGenerator(mp_emb_dim, num_heads, mp_eps, self.device) for _ in metapaths])))
self.sg_agg[undirected_relation] = GraphChannelAttLayer(len(metapaths))
# Overall Graph Generator
self.overall_g_agg[undirected_relation] = GraphChannelAttLayer(3)
# Graph Convolution
if len(set(feat_dims.values())) == 1:
self.GCN = GCN(list(self.feat_dims.values())[0], gnn_emd_dim, num_class, gnn_dropout)
else:
raise Exception("Downstream model GCN can only accept features for "
"different node types of the same dimension")
def forward(self, hg, h_features):
r"""
Parameters
----------
hg : dgl.DGlHeteroGraph
All input data is stored in this graph.
The graph should be an undirected heterogeneous graph.
Every node type in graph should have its feature named 'h' and the same feature dimension.
Every node type in graph should have its metapath2vec embedding feature named 'xxx_m2v_emb'
and the same feature dimension.
h_features : dict
Not used.
Returns
--------
result : dict
The target node type and the corresponding node embeddings.
"""
def generate_node_indexes(hg):
indexes = dict()
index = 0
for node_type in hg.ntypes:
indexes[node_type] = (index, index + hg.num_nodes(node_type))
index += hg.num_nodes(node_type)
return indexes
def construct_homo_adj(new_adjs, hg, node_indexes, device):
new_homo_adj = torch.zeros(size=(hg.num_nodes(), hg.num_nodes())).to(device)
for canonical_etype, new_adj in new_adjs.items():
row_range = node_indexes[canonical_etype[0]]
column_range = node_indexes[canonical_etype[2]]
new_homo_adj[row_range[0]:row_range[1], column_range[0]:column_range[1]] = new_adj
new_homo_adj += new_homo_adj.t()
new_homo_adj = F.normalize(new_homo_adj, dim=0, p=1)
return new_homo_adj
def construct_homo_feature(hg, device):
homo_feature = list()
for ntype in hg.ntypes:
homo_feature.append(hg.nodes[ntype].data['h'])
homo_feature = torch.cat(homo_feature, dim=0).to(device)
return homo_feature
# Heterogeneous Feature Mapping
mapped_feats = dict()
for ntype in self.node_types:
mapped_feats[ntype] = self.non_linear(self.encoder[ntype](hg.nodes[ntype].data['h']))
# Heterogeneous Graph Generation
new_adjs = dict()
for canonical_etype in self.ud_rels:
undirected_relation = canonical_etype[1]
ori_g = F.normalize(hg.adj(etype=canonical_etype).to_dense().to(self.device), dim=1, p=2)
# Feature Graph Generation
fg_direct = self.fgg_direct[undirected_relation](mapped_feats[canonical_etype[0]],
mapped_feats[canonical_etype[2]])
fmat_l, fmat_r = hg.nodes[canonical_etype[0]].data['h'], hg.nodes[canonical_etype[2]].data['h']
sim_l, sim_r = self.fgg_left[undirected_relation](fmat_l, fmat_l), self.fgg_right[undirected_relation](
fmat_r, fmat_r)
fg_left, fg_right = sim_l.mm(ori_g), sim_r.mm(ori_g.t()).t()
feat_g = self.fg_agg[undirected_relation]([fg_direct, fg_left, fg_right])
# Semantic Graph Generation
sem_g_list = [self.sgg_gen[undirected_relation][mp](hg.nodes[canonical_etype[0]].data[mp],
hg.nodes[canonical_etype[2]].data[mp]) for mp in
self.metapaths]
sem_g = self.sg_agg[undirected_relation](sem_g_list)
# Overall Graph
new_adjs[canonical_etype] = self.overall_g_agg[undirected_relation]([feat_g, sem_g, ori_g])
node_indexes = generate_node_indexes(hg)
new_homo_adj = construct_homo_adj(new_adjs, hg, node_indexes, self.device)
homo_feature = construct_homo_feature(hg, self.device)
x = self.GCN(homo_feature, new_homo_adj)
result = {self.category: x[node_indexes[self.category][0]:node_indexes[self.category][1], :]}
return result
class MetricCalcLayer(nn.Module):
r"""
Calculate metric in equation 3 of paper.
Parameters
----------
nhid : int
The dimension of mapped features in the graph generating procedure.
"""
def __init__(self, nhid):
super().__init__()
self.weight = nn.Parameter(torch.FloatTensor(1, nhid))
nn.init.xavier_uniform_(self.weight)
def forward(self, h):
r"""
Parameters
----------
h : tensor
The result of the Hadamard product in equation 3 of paper.
"""
return h * self.weight
class GraphGenerator(nn.Module):
r"""
Generate a graph using similarity.
"""
def __init__(self, dim, num_head=2, threshold=0.1, dev=None):
super(GraphGenerator, self).__init__()
self.threshold = threshold
self.metric_layer = nn.ModuleList()
for i in range(num_head):
self.metric_layer.append(MetricCalcLayer(dim))
self.num_head = num_head
self.dev = dev
def forward(self, left_h, right_h):
r"""
Parameters
----------
left_h : tensor
The first input embedding matrix.
right_h : tensor
The second input embedding matrix.
"""
def cos_sim(a, b, eps=1e-8):
a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n))
b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
return sim_mt
if torch.sum(left_h) == 0 or torch.sum(right_h) == 0:
return torch.zeros((left_h.shape[0], right_h.shape[0])).to(self.dev)
s = torch.zeros((left_h.shape[0], right_h.shape[0])).to(self.dev)
zero_lines = torch.nonzero(torch.sum(left_h, 1) == 0)
# The ReLU function will generate zero lines, which lead to the nan (divided by zero) problem.
if len(zero_lines) > 0:
left_h[zero_lines, :] += 1e-8
for i in range(self.num_head):
weighted_left_h = self.metric_layer[i](left_h)
weighted_right_h = self.metric_layer[i](right_h)
s += cos_sim(weighted_left_h, weighted_right_h)
s /= self.num_head
s = torch.where(s < self.threshold, torch.zeros_like(s), s)
return s
class GraphChannelAttLayer(nn.Module):
r"""
The graph channel attention layer in equation 7, 9 and 10 of paper.
"""
def __init__(self, num_channel):
super(GraphChannelAttLayer, self).__init__()
self.weight = nn.Parameter(torch.Tensor(num_channel, 1, 1))
nn.init.constant_(self.weight, 0.1) # equal weight
def forward(self, adj_list):
r"""
Parameters
----------
adj_list : list
The list of adjacent matrices.
"""
adj_list = torch.stack(adj_list)
# Row normalization of all graphs generated
adj_list = F.normalize(adj_list, dim=1, p=1)
# Hadamard product + summation -> Conv
return torch.sum(adj_list * F.softmax(self.weight, dim=0), dim=0)
class GCN(nn.Module):
r"""
The downstream GCN model.
"""
def __init__(self, nfeat, nhid, nclass, dropout):
super(GCN, self).__init__()
self.gc1 = GraphConvolution(nfeat, nhid)
self.gc2 = GraphConvolution(nhid, nclass)
self.dropout = dropout
def forward(self, x, adj):
r"""
Parameters
----------
x : tensor
The feature matrix.
adj : tensor
The adjacent matrix.
"""
x = F.relu(self.gc1(x, adj))
x = F.dropout(x, self.dropout, training=self.training)
x = self.gc2(x, adj)
return x
class GraphConvolution(nn.Module):
r"""
The downstream GCN layer.
"""
def __init__(self, in_features, out_features, bias=True):
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
super(GraphConvolution, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.FloatTensor(in_features, out_features))
if bias:
self.bias = Parameter(torch.FloatTensor(out_features))
else:
self.register_parameter('bias', None)
reset_parameters(self)
def forward(self, inputs, adj):
r"""
Parameters
----------
inputs : tensor
The feature matrix.
adj : tensor
The adjacent matrix.
"""
support = torch.mm(inputs, self.weight) # HW in GCN
output = torch.mm(adj, support) # AHW
if self.bias is not None:
return output + self.bias
else:
return output