


本文采用的是ASTGCN这篇采用图网络进行交通预测论文提供的数据集PEMS,包括PEMS04和PEMS08。其中PEMS04是2018年1月1日开始采集的连续59天的307的探测器获得的流量数据,每5分钟采集一次,所以原始流量数据data.npz读取后shape为(307, 16992, 3),其中3维特征为flow, occupy, speed,原始邻接矩阵数据是一个distance.csv文件,它包含是from,to,distance的格式,方便起见,本文距离(对应图上的边权)只要节点相连都取1。相似的是,PEMS08是2016年7月1日开始采集的连续62天170个节点的流量数据,其数据shape为(170, 17856, 3)



首先是ChebNet模型的构建,它将SCNN中的谱域图卷积核替换为了切比雪夫多项式,如下式,最终要学习的只有交通流量预测机器学习 交通流量预测的意义_数据_02个参数,大大减少了SCNN巨大的参数量(拉普拉斯矩阵特征分解求解特征向量)。

交通流量预测机器学习 交通流量预测的意义_交通流量预测机器学习_03

ChebNet的代码实现如下,不过,虽然减少了参数量,但是卷积核有了严格的空间局限性,交通流量预测机器学习 交通流量预测的意义_交通流量预测_04就是卷积核的“感受野半径”,即将距离中心节点交通流量预测机器学习 交通流量预测的意义_交通流量预测_05个节点作为邻域节点(交通流量预测机器学习 交通流量预测的意义_交通流量预测_06时便相当于普通的3x3卷积,邻域为1)。

class ChebConv(nn.Module):

    def __init__(self, in_c, out_c, K, bias=True, normalize=True):
        ChebNet conv
        :param in_c: input channels
        :param out_c:  output channels
        :param K: the order of Chebyshev Polynomial
        :param bias:  if use bias
        :param normalize:  if use norm
        super(ChebConv, self).__init__()
        self.normalize = normalize

        self.weight = nn.Parameter(torch.Tensor(K + 1, 1, in_c, out_c))  # [K+1, 1, in_c, out_c]

        if bias:
            self.bias = nn.Parameter(torch.Tensor(1, 1, out_c))
            self.register_parameter("bias", None)

        self.K = K + 1

    def forward(self, inputs, graph):

        :param inputs: he input data, [B, N, C]
        :param graph: the graph structure, [N, N]
        :return: convolution result, [B, N, D]
        L = ChebConv.get_laplacian(graph, self.normalize)  # [N, N]
        mul_L = self.cheb_polynomial(L).unsqueeze(1)  # [K, 1, N, N]
        result = torch.matmul(mul_L, inputs)  # [K, B, N, C]
        result = torch.matmul(result, self.weight)  # [K, B, N, D]
        result = torch.sum(result, dim=0) + self.bias  # [B, N, D]

        return result

    def cheb_polynomial(self, laplacian):
        Compute the Chebyshev Polynomial, according to the graph laplacian

        :param laplacian: the multi order Chebyshev laplacian, [K, N, N]
        N = laplacian.size(0)  # [N, N]
        multi_order_laplacian = torch.zeros([self.K, N, N], device=laplacian.device, dtype=torch.float)  # [K, N, N]
        multi_order_laplacian[0] = torch.eye(N, device=laplacian.device, dtype=torch.float)

        if self.K == 1:
            return multi_order_laplacian
            multi_order_laplacian[1] = laplacian
            if self.K == 2:
                return multi_order_laplacian
                for k in range(2, self.K):
                    multi_order_laplacian[k] = 2 * torch.mm(laplacian, multi_order_laplacian[k - 1]) - \
                                               multi_order_laplacian[k - 2]

        return multi_order_laplacian

    def get_laplacian(graph, normalize):
        compute the laplacian of the graph
        :param graph: the graph structure without self loop, [N, N]
        :param normalize: whether to used the normalized laplacian
        if normalize:
            D = torch.diag(torch.sum(graph, dim=-1) ** (-1 / 2))
            L = torch.eye(graph.size(0), device=graph.device, dtype=graph.dtype) - torch.mm(torch.mm(D, graph), D)
            D = torch.diag(torch.sum(graph, dim=-1))
            L = D - graph
        return L

class ChebNet(nn.Module):

    def __init__(self, in_c, hid_c, out_c, K):
        :param in_c: int, number of input channels.
        :param hid_c: int, number of hidden channels.
        :param out_c: int, number of output channels.
        :param K:
        super(ChebNet, self).__init__()
        self.conv1 = ChebConv(in_c=in_c, out_c=hid_c, K=K)
        self.conv2 = ChebConv(in_c=hid_c, out_c=out_c, K=K)
        self.act = nn.ReLU()

    def forward(self, data, device):
        graph_data = data["graph"].to(device)[0]  # [N, N]
        flow_x = data["flow_x"].to(device)  # [B, N, H, D]

        B, N = flow_x.size(0), flow_x.size(1)

        flow_x = flow_x.view(B, N, -1)  # [B, N, H*D]

        output_1 = self.act(self.conv1(flow_x, graph_data))
        output_2 = self.act(self.conv2(output_1, graph_data))

        return output_2.unsqueeze(2)

接着是GCN模型的构建,按照下面的公式,先计算出标准化的拉普拉斯矩阵,再和交通流量预测机器学习 交通流量预测的意义_数据_07交通流量预测机器学习 交通流量预测的意义_数据_08先后矩阵乘法,就得到了最后的输出。GCN是一个著名的谱域图卷积方法,它对ChebNet进行进一步简化,只采用一阶切比雪夫多项式,一个卷积核只有一个交通流量预测机器学习 交通流量预测的意义_数据_09需要学习,虽然卷积核减小了,但是通过多层堆叠可以获得卷积神经网络类似的能力。因此,GCN也被认为是谱域到空域的一个过渡方法。

交通流量预测机器学习 交通流量预测的意义_交通流量预测_10


class GCN(nn.Module):
    def __init__(self, in_c, hid_c, out_c):
        :param in_c: input channels
        :param hid_c:  hidden nodes
        :param out_c:  output channels
        super(GCN, self).__init__()
        self.linear_1 = nn.Linear(in_c, hid_c)
        self.linear_2 = nn.Linear(hid_c, out_c)
        self.act = nn.ReLU()

    def forward(self, data, device):
        graph_data = data["graph"].to(device)[0]  # [N, N]
        graph_data = self.process_graph(graph_data)

        flow_x = data["flow_x"].to(device)  # [B, N, H, D]

        B, N = flow_x.size(0), flow_x.size(1)

        flow_x = flow_x.view(B, N, -1)  # [B, N, H*D]  H = 6, D = 1

        output_1 = self.linear_1(flow_x)  # [B, N, hid_C]
        output_1 = self.act(torch.matmul(graph_data, output_1))  # [N, N], [B, N, Hid_C]

        output_2 = self.linear_2(output_1)
        output_2 = self.act(torch.matmul(graph_data, output_2))  # [B, N, 1, Out_C]

        return output_2.unsqueeze(2)

    def process_graph(graph_data):
        N = graph_data.size(0)
        matrix_i = torch.eye(N, dtype=graph_data.dtype, device=graph_data.device)
        graph_data += matrix_i  # A~ [N, N]

        degree_matrix = torch.sum(graph_data, dim=-1, keepdim=False)  # [N]
        degree_matrix = degree_matrix.pow(-1)
        degree_matrix[degree_matrix == float("inf")] = 0.  # [N]

        degree_matrix = torch.diag(degree_matrix)  # [N, N]

        return torch.mm(degree_matrix, graph_data)  # D^(-1) * A = \hat(A)


  1. 计算节点之间的关联度
    交通流量预测机器学习 交通流量预测的意义_卷积核_11
  2. 使用softmax函数对每个节点的注意力系数进行归一化
    交通流量预测机器学习 交通流量预测的意义_ci_12
  3. 利用上述得到的注意力系数对邻域节点进行有区别的信息聚合,其中,交通流量预测机器学习 交通流量预测的意义_数据_13便是softmax归一化之后的注意力系数,交通流量预测机器学习 交通流量预测的意义_交通流量预测机器学习_14为共享的卷积核参数,交通流量预测机器学习 交通流量预测的意义_交通流量预测机器学习_15为节点特征。
    交通流量预测机器学习 交通流量预测的意义_数据_16


class GraphAttentionLayer(nn.Module):
    def __init__(self, in_c, out_c, alpha=0.2):
        graph attention layer
        :param in_c:
        :param out_c:
        :param alpha:
        super(GraphAttentionLayer, self).__init__()
        self.in_c = in_c
        self.out_c = out_c
        self.alpha = alpha

        self.W = nn.Parameter(torch.empty(size=(in_c, out_c)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        self.a = nn.Parameter(torch.empty(size=(2 * out_c, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)
        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, features, adj):
        B, N = features.size(0), features.size(1)
        adj = adj + torch.eye(N, dtype=adj.dtype).cuda()  # A+I
        h = torch.matmul(features, self.W)  # [B,N,out_features]
        # [B, N, N, 2 * out_features]
        a_input = torch.cat([h.repeat(1, 1, N).view(B, N * N, -1), h.repeat(1, N, 1)], dim=2).view(B, N, -1, 2 * self.out_c)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(3))  # [B,N, N, 1] => [B, N, N]
        zero_vec = -1e12 * torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)  # [B,N,N]
        attention = F.softmax(attention, dim=2)  # softmax [N, N]
        # attention = F.dropout(attention, 0.5)
        h_prime = torch.matmul(attention, h)  # [B,N, N]*[N, out_features] => [B,N, out_features]
        return h_prime

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'

class GAT(nn.Module):
    def __init__(self, in_c, hid_c, out_c, n_heads=8):
        :param in_c: int, number of input channels.
        :param hid_c: int, number of hidden channels.
        :param out_c: int, number of output channels.
        :param K:
        super(GAT, self).__init__()
        self.attentions = nn.ModuleList([GraphAttentionLayer(in_c, hid_c) for _ in range(n_heads)])
        # self.conv1 = GraphAttentionLayer(in_c, hid_c)
        self.conv2 = GraphAttentionLayer(hid_c * n_heads, out_c)
        self.act = nn.ReLU()

    def forward(self, data):
        # data prepare
        adj = data["graph"][0]  # [N, N]
        x = data["flow_x"]  # [B, N, H, D]
        B, N = x.size(0), x.size(1)
        x = x.view(B, N, -1)  # [B, N, H*D]

        # forward
        outputs = torch.cat([attention(x, adj) for attention in self.attentions], dim=-1)
        outputs = self.act(outputs)
        # output_1 = self.act(self.conv1(flow_x, adj))
        output_2 = self.act(self.conv2(outputs, adj))

        return output_2.unsqueeze(2)  # [B,1,N,1]



最后做个总结,本文只是对几个图卷积模型进行了简单的实验,事实上三个模型都有类似flow_x = flow_x.view(B, N, -1)的代码段,这代表我们将时序数据拼接到了一起,这就无疑等同于放弃了时间信息,实际上,对于这种时序任务,时间信息是至关重要的,像STGCN、ASTGCN、DCRNN等方法都是考虑时序特征才获得了很不错的效果。

