# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This package implements common layers to help building
graph neural networks.
"""
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import pgl
from pgl.nn import functional as GF
__all__ = [
'GCNConv',
"GATConv",
'APPNP',
'GCNII',
'TransformerConv',
'GINConv',
"GraphSageConv",
"PinSageConv",
]
[docs]class GraphSageConv(nn.Layer):
""" GraphSAGE is a general inductive framework that leverages node feature
information (e.g., text attributes) to efficiently generate node embeddings
for previously unseen data.
Paper reference:
Hamilton, Will, Zhitao Ying, and Jure Leskovec.
"Inductive representation learning on large graphs."
Advances in neural information processing systems. 2017.
Args:
input_size: The size of the inputs.
hidden_size: The size of outputs
aggr_func: (default "sum") Aggregation function for GraphSage ["sum", "mean", "max", "min"].
"""
def __init__(self, input_size, hidden_size, aggr_func="sum"):
super(GraphSageConv, self).__init__()
assert aggr_func in ["sum", "mean", "max", "min"], \
"Only support 'sum', 'mean', 'max', 'min' built-in receive function."
self.aggr_func = "reduce_%s" % aggr_func
self.self_linear = nn.Linear(input_size, hidden_size)
self.neigh_linear = nn.Linear(input_size, hidden_size)
[docs] def forward(self, graph, feature, act=None):
"""
Args:
graph: `pgl.Graph` instance.
feature: A tensor with shape (num_nodes, input_size)
act: (default None) Activation for outputs and before normalize.
Return:
A tensor with shape (num_nodes, output_size)
"""
def _send_func(src_feat, dst_feat, edge_feat):
return {"msg": src_feat["h"]}
def _recv_func(message):
return getattr(message, self.aggr_func)(message["msg"])
msg = graph.send(_send_func, src_feat={"h": feature})
neigh_feature = graph.recv(reduce_func=_recv_func, msg=msg)
self_feature = self.self_linear(feature)
neigh_feature = self.neigh_linear(neigh_feature)
output = self_feature + neigh_feature
if act is not None:
output = getattr(F, act)(output)
output = F.normalize(output, axis=1)
return output
[docs]class PinSageConv(nn.Layer):
""" PinSage combines efficient random walks and graph convolutions to
generate embeddings of nodes (i.e., items) that incorporate both graph
structure as well as node feature information.
Paper reference:
Ying, Rex, et al.
"Graph convolutional neural networks for web-scale recommender systems."
Proceedings of the 24th ACM SIGKDD International Conference on Knowledge
Discovery & Data Mining. 2018.
Args:
input_size: The size of the inputs.
hidden_size: The size of outputs
aggr_func: (default "sum") Aggregation function for GraphSage ["sum", "mean", "max", "min"].
"""
def __init__(self, input_size, hidden_size, aggr_func="sum"):
super(PinSageConv, self).__init__()
assert aggr_func in ["sum", "mean", "max", "min"], \
"Only support 'sum', 'mean', 'max', 'min' built-in receive function."
self.aggr_func = "reduce_%s" % aggr_func
self.self_linear = nn.Linear(input_size, hidden_size)
self.neigh_linear = nn.Linear(input_size, hidden_size)
[docs] def forward(self, graph, nfeat, efeat, act=None):
"""
Args:
graph: `pgl.Graph` instance.
nfeat: A tensor with shape (num_nodes, input_size)
efeat: A tensor with shape (num_edges, 1) denotes edge weight.
act: (default None) Activation for outputs and before normalize.
Return:
A tensor with shape (num_nodes, output_size)
"""
def _send_func(src_feat, dst_feat, edge_feat):
return {'msg': src_feat["h"] * edge_feat["w"]}
def _recv_func(message):
return getattr(message, self.aggr_func)(message["msg"])
msg = graph.send(
_send_func, src_feat={"h": nfeat}, edge_feat={"w": efeat})
neigh_feature = graph.recv(reduce_func=_recv_func, msg=msg)
self_feature = self.self_linear(feature)
neigh_feature = self.neigh_linear(neigh_feature)
output = self_feature + neigh_feature
if act is not None:
output = getattr(F, act)(output)
output = F.normalize(output, axis=1)
return output
[docs]class GCNConv(nn.Layer):
"""Implementation of graph convolutional neural networks (GCN)
This is an implementation of the paper SEMI-SUPERVISED CLASSIFICATION
WITH GRAPH CONVOLUTIONAL NETWORKS (https://arxiv.org/pdf/1609.02907.pdf).
Args:
input_size: The size of the inputs.
output_size: The size of outputs
activation: The activation for the output.
norm: If :code:`norm` is True, then the feature will be normalized.
"""
def __init__(self, input_size, output_size, activation=None, norm=True):
super(GCNConv, self).__init__()
self.input_size = input_size
self.output_size = output_size
self.linear = nn.Linear(input_size, output_size, bias_attr=False)
self.bias = self.create_parameter(shape=[output_size], is_bias=True)
self.norm = norm
if isinstance(activation, str):
activation = getattr(F, activation)
self.activation = activation
[docs] def forward(self, graph, feature, norm=None):
"""
Args:
graph: `pgl.Graph` instance.
feature: A tensor with shape (num_nodes, input_size)
norm: (default None). If :code:`norm` is not None, then the feature will be normalized by given norm. If :code:`norm` is None and :code:`self.norm` is `true`, then we use `lapacian degree norm`.
Return:
A tensor with shape (num_nodes, output_size)
"""
if self.norm and norm is None:
norm = GF.degree_norm(graph)
if self.input_size > self.output_size:
feature = self.linear(feature)
if norm is not None:
feature = feature * norm
output = graph.send_recv(feature, "sum")
if self.input_size <= self.output_size:
output = self.linear(output)
if norm is not None:
output = output * norm
output = output + self.bias
if self.activation is not None:
output = self.activation(output)
return output
[docs]class GATConv(nn.Layer):
"""Implementation of graph attention networks (GAT)
This is an implementation of the paper GRAPH ATTENTION NETWORKS
(https://arxiv.org/abs/1710.10903).
Args:
input_size: The size of the inputs.
hidden_size: The hidden size for gat.
activation: (default None) The activation for the output.
num_heads: (default 1) The head number in gat.
feat_drop: (default 0.6) Dropout rate for feature.
attn_drop: (default 0.6) Dropout rate for attention.
concat: (default True) Whether to concat output heads or average them.
"""
def __init__(self,
input_size,
hidden_size,
feat_drop=0.6,
attn_drop=0.6,
num_heads=1,
concat=True,
activation=None):
super(GATConv, self).__init__()
self.hidden_size = hidden_size
self.num_heads = num_heads
self.feat_drop = feat_drop
self.attn_drop = attn_drop
self.concat = concat
self.linear = nn.Linear(input_size, num_heads * hidden_size)
self.weight_src = self.create_parameter(shape=[num_heads, hidden_size])
self.weight_dst = self.create_parameter(shape=[num_heads, hidden_size])
self.feat_dropout = nn.Dropout(p=feat_drop)
self.attn_dropout = nn.Dropout(p=attn_drop)
self.leaky_relu = nn.LeakyReLU(negative_slope=0.2)
if isinstance(activation, str):
activation = getattr(F, activation)
self.activation = activation
def _send_attention(self, src_feat, dst_feat, edge_feat):
alpha = src_feat["src"] + dst_feat["dst"]
alpha = self.leaky_relu(alpha)
return {"alpha": alpha, "h": src_feat["h"]}
def _reduce_attention(self, msg):
alpha = msg.reduce_softmax(msg["alpha"])
alpha = paddle.reshape(alpha, [-1, self.num_heads, 1])
if self.attn_drop > 1e-15:
alpha = self.attn_dropout(alpha)
feature = msg["h"]
feature = paddle.reshape(feature,
[-1, self.num_heads, self.hidden_size])
feature = feature * alpha
if self.concat:
feature = paddle.reshape(feature,
[-1, self.num_heads * self.hidden_size])
else:
feature = paddle.mean(feature, axis=1)
feature = msg.reduce(feature, pool_type="sum")
return feature
[docs] def forward(self, graph, feature):
"""
Args:
graph: `pgl.Graph` instance.
feature: A tensor with shape (num_nodes, input_size)
Return:
If `concat=True` then return a tensor with shape (num_nodes, hidden_size),
else return a tensor with shape (num_nodes, hidden_size * num_heads)
"""
if self.feat_drop > 1e-15:
feature = self.feat_dropout(feature)
feature = self.linear(feature)
feature = paddle.reshape(feature,
[-1, self.num_heads, self.hidden_size])
attn_src = paddle.sum(feature * self.weight_src, axis=-1)
attn_dst = paddle.sum(feature * self.weight_dst, axis=-1)
msg = graph.send(
self._send_attention,
src_feat={"src": attn_src,
"h": feature},
dst_feat={"dst": attn_dst})
output = graph.recv(reduce_func=self._reduce_attention, msg=msg)
if self.activation is not None:
output = self.activation(output)
return output
[docs]class APPNP(nn.Layer):
"""Implementation of APPNP of "Predict then Propagate: Graph Neural Networks
meet Personalized PageRank" (ICLR 2019).
Args:
k_hop: K Steps for Propagation
alpha: The hyperparameter of alpha in the paper.
Return:
A tensor with shape (num_nodes, hidden_size)
"""
def __init__(self, alpha=0.2, k_hop=10):
super(APPNP, self).__init__()
self.alpha = alpha
self.k_hop = k_hop
[docs] def forward(self, graph, feature, norm=None):
"""
Args:
graph: `pgl.Graph` instance.
feature: A tensor with shape (num_nodes, input_size)
norm: (default None). If :code:`norm` is not None, then the feature will be normalized by given norm. If :code:`norm` is None, then we use `lapacian degree norm`.
Return:
A tensor with shape (num_nodes, output_size)
"""
if norm is None:
norm = GF.degree_norm(graph)
h0 = feature
for _ in range(self.k_hop):
feature = feature * norm
feature = graph.send_recv(feature)
feature = feature * norm
feature = self.alpha * h0 + (1 - self.alpha) * feature
return feature
[docs]class GCNII(nn.Layer):
"""Implementation of GCNII of "Simple and Deep Graph Convolutional Networks"
paper: https://arxiv.org/pdf/2007.02133.pdf
Args:
hidden_size: The size of inputs and outputs.
activation: The activation for the output.
k_hop: Number of layers for gcnii.
lambda_l: The hyperparameter of lambda in the paper.
alpha: The hyperparameter of alpha in the paper.
dropout: Feature dropout rate.
"""
def __init__(self,
hidden_size,
activation=None,
lambda_l=0.5,
alpha=0.2,
k_hop=10,
dropout=0.6):
super(GCNII, self).__init__()
self.hidden_size = hidden_size
self.activation = activation
self.lambda_l = lambda_l
self.alpha = alpha
self.k_hop = k_hop
self.dropout = dropout
self.drop_fn = nn.Dropout(dropout)
self.mlps = nn.LayerList()
for _ in range(k_hop):
self.mlps.append(nn.Linear(hidden_size, hidden_size))
if isinstance(activation, str):
activation = getattr(F, activation)
self.activation = activation
[docs] def forward(self, graph, feature, norm=None):
"""
Args:
graph: `pgl.Graph` instance.
feature: A tensor with shape (num_nodes, input_size)
norm: (default None). If :code:`norm` is not None, then the feature will be normalized by given norm. If :code:`norm` is None, then we use `lapacian degree norm`.
Return:
A tensor with shape (num_nodes, output_size)
"""
if norm is None:
norm = GF.degree_norm(graph)
h0 = feature
for i in range(self.k_hop):
beta_i = np.log(1.0 * self.lambda_l / (i + 1) + 1)
feature = self.drop_fn(feature)
feature = feature * norm
feature = graph.send_recv(feature)
feature = feature * norm
feature = self.alpha * h0 + (1 - self.alpha) * feature
feature_transed = self.mlps[i](feature)
feature = beta_i * feature_transed + (1 - beta_i) * feature
if self.activation is not None:
feature = self.activation(feature)
return feature
[docs]class GINConv(nn.Layer):
"""Implementation of Graph Isomorphism Network (GIN) layer.
This is an implementation of the paper How Powerful are Graph Neural Networks?
(https://arxiv.org/pdf/1810.00826.pdf).
In their implementation, all MLPs have 2 layers. Batch normalization is applied
on every hidden layer.
Args:
input_size: The size of input.
output_size: The size of output.
activation: The activation for the output.
init_eps: float, optional
Initial :math:`\epsilon` value, default is 0.
train_eps: bool, optional
if True, :math:`\epsilon` will be a learnable parameter.
"""
def __init__(self,
input_size,
output_size,
activation=None,
init_eps=0.0,
train_eps=False):
super(GINConv, self).__init__()
self.input_size = input_size
self.output_size = output_size
self.linear1 = nn.Linear(input_size, output_size, bias_attr=True)
self.linear2 = nn.Linear(output_size, output_size, bias_attr=True)
self.layer_norm = nn.LayerNorm(output_size)
if train_eps:
self.epsilon = self.create_parameter(
shape=[1, 1],
dtype='float32',
default_initializer=nn.initializer.Constant(value=init_eps))
else:
self.epsilon = init_eps
if isinstance(activation, str):
activation = getattr(F, activation)
self.activation = activation
[docs] def forward(self, graph, feature):
"""
Args:
graph: `pgl.Graph` instance.
feature: A tensor with shape (num_nodes, input_size)
Return:
A tensor with shape (num_nodes, output_size)
"""
neigh_feature = graph.send_recv(feature, reduce_func="sum")
output = neigh_feature + feature * (self.epsilon + 1.0)
output = self.linear1(output)
output = self.layer_norm(output)
if self.activation is not None:
output = self.activation(output)
output = self.linear2(output)
return output