openhgnn/models/HGSL.py

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from . import BaseModel, register_model


@register_model('HGSL')
class HGSL(BaseModel):
    r"""
    HGSL, Heterogeneous Graph Structure Learning from `paper <http://www.shichuan.org/doc/100.pdf>`_.

    Parameters
    ----------
    feat_dims : dict
        The feature dimensions of different node types.
    undirected_relations : str
        The HGSL model can only handle undirected heterographs, while in the dgl.heterograph format, directed edges are
        stored in two different edge types, separately and symmetrically, to represent undirected edge. Hence you have
        to specify which relations are those distinct undirected relations. In this parameter, each undirected relation
        is separated with a comma. For example, in a heterograph with 2 undirected relations: paper-author and
        paper-subject, there are 4 type of edges stored in the dgl.heterograph: paper-author, author-paper,
        paper-subject, subject-paper. Then this parameter can be "paper-author,paper-subject",
        "author-paper,paper-subject", "paper-author,subject-paper" or "author-paper,subject-paper".
    device: str
        The GPU device to select, like 'cuda:0'.
    metapaths : list
        The metapath name list.
    mp_emb_dim : int
        The dimension of metapath embeddings from metapath2vec.
    hidden_dim : int
        The dimension of mapped features in the graph generating procedure.
    num_heads: int
        Number of heads in the K-head weighted cosine similarity function.
    fs_eps : float
        Threshold of feature similarity graph :math:`\epsilon^{FS}`.
    fp_eps : float
        Threshold of feature propagation graph :math:`\epsilon^{FP}`.
    mp_eps : float
        Threshold of semantic graph :math:`\epsilon^{MP}`.
    gnn_emd_dim : int
        The dimension of hidden layers of the downstream GNN.
    gnn_dropout : float
        The dropout ratio of features in the downstream GNN.
    category : str
        The target node type which the model will predict on.
    out_dim : int
        number of classes of the target node type.

    Attributes
    -----------
    fgg_direct : nn.ModuleDict
        Feature similarity graph generator(:math:`S_r^{FS}`) dict in equation 2 of paper, in which keys are
        undirected-relation strs.
    fgg_left: nn.ModuleDict
        Feature propagation graph generator(:math:`S_r^{FH}`) dict which generates the graphs in equation 5 of paper.
    fgg_right: nn.ModuleDict
        Feature propagation graph generator(:math:`S_r^{FT}`) dict which generates the graphs in equation 6 of paper.
    fg_agg : nn.ModuleDict
        A channel attention layer, in which a layer fuses one feature similarity graph and two feature propagation
        graphs generated, in equation 7 of paper.
    sgg_gen : nn.ModuleDict
        Semantic subgraph generator(:math:`S_{r,m}^{MP}`) dict, in equation 8 of paper.
    sg_agg : nn.ModuleDict
        The channel attention layer which fuses semantic subgraphs, in equation 9 of paper.
    overall_g_agg : nn.ModuleDict
        The channel attention layer which fuses the learned feature graph, semantic graph and the original graph.
    encoder : nn.ModuleDict
        The type-specific mapping layer in equation 1 of paper.

    Note
    ----
    This model under the best config has some slight differences compared with the code given by the paper author,
    which seems having little impact on performance:

    1. The regularization item in loss is on all parameters of the model, while in the author's code, it is only on the
       generated adjacent matrix. If you want to implement the latter, a new task of OpenHGNN is needed.

    2. The normalization of input adjacent matrix is separately on different adjacent matrices of different
       relations, while in the author's code, it is on the entire adjacent matrix composed of adjacent matrices of all
       relations.
    """

    @classmethod
    def build_model_from_args(cls, args, hg):

        feat_dims = dict()
        for ntype in hg.ntypes:
            feat_dims[ntype] = hg.nodes[ntype].data['h'].shape[1]

        # Extract undirected_relations
        und_rels = args.undirected_relations.split(',')
        undirected_relations = list()
        for etype in hg.canonical_etypes:
            if etype[1] in und_rels:
                undirected_relations.append(etype)

        device = hg.device

        metapaths = list()
        for feature_name in hg.nodes["paper"].data.keys():
            if "m2v" in feature_name:
                metapaths.append(feature_name)

        mp_emb_dim = hg.nodes["paper"].data["pap_m2v_emb"].shape[1]

        return cls(feat_dims=feat_dims, undirected_relations=undirected_relations, device=device, metapaths=metapaths,
                   mp_emb_dim=mp_emb_dim, hidden_dim=args.hidden_dim, num_heads=args.num_heads,
                   fs_eps=args.fs_eps, fp_eps=args.fp_eps, mp_eps=args.mp_eps,
                   gnn_emd_dim=args.gnn_emd_dim, gnn_dropout=args.gnn_dropout,
                   category=args.category, num_class=args.out_dim)

    def __init__(self, feat_dims, undirected_relations, device, metapaths, mp_emb_dim, hidden_dim, num_heads, fs_eps,
                 fp_eps, mp_eps, gnn_emd_dim, gnn_dropout, category, num_class):

        super().__init__()
        self.device = device
        self.ud_rels = undirected_relations
        self.node_types = list(feat_dims.keys())
        self.feat_dims = feat_dims
        self.non_linear = nn.ReLU()
        self.category = category
        self.metapaths = metapaths

        nnmd = nn.ModuleDict
        self.fgg_direct, self.fgg_left, self.fgg_right, self.fg_agg, self.sgg_gen, self.sg_agg, self.overall_g_agg = \
            nnmd({}), nnmd({}), nnmd({}), nnmd({}), nnmd({}), nnmd({}), nnmd({})
        # Feature encoder
        self.encoder = nnmd(
            dict(zip(self.node_types, [nn.Linear(feat_dims[node_type], hidden_dim) for node_type in self.node_types])))

        for canonical_etype in undirected_relations:
            undirected_relation = canonical_etype[1]
            # Feature Graph Generator
            self.fgg_direct[undirected_relation] = GraphGenerator(hidden_dim, num_heads, fs_eps, self.device)
            self.fgg_left[undirected_relation] = GraphGenerator(feat_dims[canonical_etype[0]], num_heads, fp_eps,
                                                                self.device)
            self.fgg_right[undirected_relation] = GraphGenerator(feat_dims[canonical_etype[2]], num_heads, fp_eps,
                                                                 self.device)
            self.fg_agg[undirected_relation] = GraphChannelAttLayer(3)

            # Semantic Graph Generator
            self.sgg_gen[undirected_relation] = nnmd(dict(
                zip(metapaths, [GraphGenerator(mp_emb_dim, num_heads, mp_eps, self.device) for _ in metapaths])))
            self.sg_agg[undirected_relation] = GraphChannelAttLayer(len(metapaths))

            # Overall Graph Generator
            self.overall_g_agg[undirected_relation] = GraphChannelAttLayer(3)

        # Graph Convolution
        if len(set(feat_dims.values())) == 1:
            self.GCN = GCN(list(self.feat_dims.values())[0], gnn_emd_dim, num_class, gnn_dropout)
        else:
            raise Exception("Downstream model GCN can only accept features for "
                            "different node types of the same dimension")

    def forward(self, hg, h_features):
        r"""
        Parameters
        ----------
        hg : dgl.DGlHeteroGraph
            All input data is stored in this graph.
            The graph should be an undirected heterogeneous graph.
            Every node type in graph should have its feature named 'h' and the same feature dimension.
            Every node type in graph should have its metapath2vec embedding feature named 'xxx_m2v_emb'
            and the same feature dimension.
        h_features : dict
            Not used.

        Returns
        --------
        result : dict
            The target node type and the corresponding node embeddings.
        """
        def generate_node_indexes(hg):
            indexes = dict()
            index = 0
            for node_type in hg.ntypes:
                indexes[node_type] = (index, index + hg.num_nodes(node_type))
                index += hg.num_nodes(node_type)

            return indexes

        def construct_homo_adj(new_adjs, hg, node_indexes, device):
            new_homo_adj = torch.zeros(size=(hg.num_nodes(), hg.num_nodes())).to(device)

            for canonical_etype, new_adj in new_adjs.items():
                row_range = node_indexes[canonical_etype[0]]
                column_range = node_indexes[canonical_etype[2]]
                new_homo_adj[row_range[0]:row_range[1], column_range[0]:column_range[1]] = new_adj

            new_homo_adj += new_homo_adj.t()
            new_homo_adj = F.normalize(new_homo_adj, dim=0, p=1)
            return new_homo_adj

        def construct_homo_feature(hg, device):
            homo_feature = list()
            for ntype in hg.ntypes:
                homo_feature.append(hg.nodes[ntype].data['h'])

            homo_feature = torch.cat(homo_feature, dim=0).to(device)
            return homo_feature

        # Heterogeneous Feature Mapping
        mapped_feats = dict()
        for ntype in self.node_types:
            mapped_feats[ntype] = self.non_linear(self.encoder[ntype](hg.nodes[ntype].data['h']))

        # Heterogeneous Graph Generation
        new_adjs = dict()
        for canonical_etype in self.ud_rels:
            undirected_relation = canonical_etype[1]
            ori_g = F.normalize(hg.adj(etype=canonical_etype).to_dense().to(self.device), dim=1, p=2)

            # Feature Graph Generation
            fg_direct = self.fgg_direct[undirected_relation](mapped_feats[canonical_etype[0]],
                                                             mapped_feats[canonical_etype[2]])

            fmat_l, fmat_r = hg.nodes[canonical_etype[0]].data['h'], hg.nodes[canonical_etype[2]].data['h']
            sim_l, sim_r = self.fgg_left[undirected_relation](fmat_l, fmat_l), self.fgg_right[undirected_relation](
                fmat_r, fmat_r)
            fg_left, fg_right = sim_l.mm(ori_g), sim_r.mm(ori_g.t()).t()

            feat_g = self.fg_agg[undirected_relation]([fg_direct, fg_left, fg_right])

            # Semantic Graph Generation
            sem_g_list = [self.sgg_gen[undirected_relation][mp](hg.nodes[canonical_etype[0]].data[mp],
                                                                hg.nodes[canonical_etype[2]].data[mp]) for mp in
                          self.metapaths]
            sem_g = self.sg_agg[undirected_relation](sem_g_list)

            # Overall Graph
            new_adjs[canonical_etype] = self.overall_g_agg[undirected_relation]([feat_g, sem_g, ori_g])

        node_indexes = generate_node_indexes(hg)
        new_homo_adj = construct_homo_adj(new_adjs, hg, node_indexes, self.device)
        homo_feature = construct_homo_feature(hg, self.device)
        x = self.GCN(homo_feature, new_homo_adj)
        result = {self.category: x[node_indexes[self.category][0]:node_indexes[self.category][1], :]}
        return result


class MetricCalcLayer(nn.Module):
    r"""
    Calculate metric in equation 3 of paper.

    Parameters
    ----------
    nhid : int
        The dimension of mapped features in the graph generating procedure.
    """
    def __init__(self, nhid):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(1, nhid))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, h):
        r"""
        Parameters
        ----------
        h : tensor
            The result of the Hadamard product in equation 3 of paper.
        """
        return h * self.weight


class GraphGenerator(nn.Module):
    r"""
    Generate a graph using similarity.
    """
    def __init__(self, dim, num_head=2, threshold=0.1, dev=None):
        super(GraphGenerator, self).__init__()
        self.threshold = threshold
        self.metric_layer = nn.ModuleList()
        for i in range(num_head):
            self.metric_layer.append(MetricCalcLayer(dim))
        self.num_head = num_head
        self.dev = dev

    def forward(self, left_h, right_h):
        r"""
        Parameters
        ----------
        left_h : tensor
            The first input embedding matrix.
        right_h : tensor
            The second input embedding matrix.
        """
        def cos_sim(a, b, eps=1e-8):
            a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
            a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n))
            b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
            sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
            return sim_mt

        if torch.sum(left_h) == 0 or torch.sum(right_h) == 0:
            return torch.zeros((left_h.shape[0], right_h.shape[0])).to(self.dev)
        s = torch.zeros((left_h.shape[0], right_h.shape[0])).to(self.dev)
        zero_lines = torch.nonzero(torch.sum(left_h, 1) == 0)
        # The ReLU function will generate zero lines, which lead to the nan (divided by zero) problem.
        if len(zero_lines) > 0:
            left_h[zero_lines, :] += 1e-8
        for i in range(self.num_head):
            weighted_left_h = self.metric_layer[i](left_h)
            weighted_right_h = self.metric_layer[i](right_h)
            s += cos_sim(weighted_left_h, weighted_right_h)
        s /= self.num_head
        s = torch.where(s < self.threshold, torch.zeros_like(s), s)
        return s


class GraphChannelAttLayer(nn.Module):
    r"""
    The graph channel attention layer in equation 7, 9 and 10 of paper.
    """
    def __init__(self, num_channel):
        super(GraphChannelAttLayer, self).__init__()
        self.weight = nn.Parameter(torch.Tensor(num_channel, 1, 1))
        nn.init.constant_(self.weight, 0.1)  # equal weight

    def forward(self, adj_list):
        r"""
        Parameters
        ----------
        adj_list : list
            The list of adjacent matrices.
        """
        adj_list = torch.stack(adj_list)
        # Row normalization of all graphs generated
        adj_list = F.normalize(adj_list, dim=1, p=1)
        # Hadamard product + summation -> Conv
        return torch.sum(adj_list * F.softmax(self.weight, dim=0), dim=0)


class GCN(nn.Module):
    r"""
    The downstream GCN model.
    """
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        r"""
        Parameters
        ----------
        x : tensor
            The feature matrix.
        adj : tensor
            The adjacent matrix.
        """
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return x


class GraphConvolution(nn.Module):
    r"""
    The downstream GCN layer.
    """
    def __init__(self, in_features, out_features, bias=True):

        def reset_parameters(self):
            stdv = 1. / math.sqrt(self.weight.size(1))
            self.weight.data.uniform_(-stdv, stdv)
            if self.bias is not None:
                self.bias.data.uniform_(-stdv, stdv)

        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        reset_parameters(self)

    def forward(self, inputs, adj):
        r"""
        Parameters
        ----------
        inputs : tensor
            The feature matrix.
        adj : tensor
            The adjacent matrix.
        """
        support = torch.mm(inputs, self.weight)  # HW in GCN
        output = torch.mm(adj, support)  # AHW
        if self.bias is not None:
            return output + self.bias
        else:
            return output