tensorflow attend操作代码

原创

TechOnly 2022-07-19 19:43:51 博主文章分类：TensorFlow ©著作权

©著作权归作者所有：来自51CTO博客作者TechOnly的原创作品，请联系作者获取转载授权，否则将追究法律责任

调用方法：

with tf.variable_scope('t_attend_r'):
    t_a_r = layers.block(t, r, r, Q_lengths=t_len, K_lengths=r_len)

with tf.variable_scope('r_attend_t'):
    r_a_t = layers.block(r, t, t, Q_lengths=r_len, K_lengths=t_len)

其中

def block(
    Q, K, V, 
    Q_lengths, K_lengths, 
    attention_type='dot', 
    is_layer_norm=True, 
    is_mask=True, mask_value=-2**32+1,
    drop_prob=None):
    '''Add a block unit from https://arxiv.org/pdf/1706.03762.pdf.
    Args:
        Q: [batch, Q_time, Q_dimension]
        K: [batch, time, K_dimension]
        V: [batch, time, V_dimension]

        Q_length: [batch]
        K_length: [batch]

    Returns:
        a tensor with shape [batch, Q_time, Q_dimension]

    '''
    att = attention(Q, K, V, 
                    Q_lengths, K_lengths, 
                    attention_type='dot', 
                    is_mask=is_mask, mask_value=mask_value,
                    drop_prob=drop_prob)
    if is_layer_norm:
        with tf.variable_scope('attention_layer_norm'):
            y = op.layer_norm_debug(Q + att)
    else:
        y = Q + att

    z = FFN(y)
    if is_layer_norm:
        with tf.variable_scope('FFN_layer_norm'):
            w = op.layer_norm_debug(y + z)
    else:
        w = y + z
    return w


def attention(
    Q, K, V, 
    Q_lengths, K_lengths, 
    attention_type='dot', 
    is_mask=True, mask_value=-2**32+1,
    drop_prob=None):
    '''Add attention layer.
    Args:
        Q: [batch, Q_time, Q_dimension]
        K: [batch, time, K_dimension]
        V: [batch, time, V_dimension]

        Q_length: [batch]
        K_length: [batch]

    Returns:
        a tensor with shape [batch, Q_time, Q_dimension]

    '''
    assert attention_type in ('dot', 'bilinear')
    if attention_type == 'dot':
        assert Q.shape[-1] == K.shape[-1]

    Q_time = Q.shape[1]
    K_time = K.shape[1]

    if attention_type == 'dot':
        logits = op.dot_sim(Q, K) #[batch, Q_time, time]
    if attention_type == 'bilinear':
        logits = op.bilinear_sim(Q, K)

    if is_mask:
        mask = op.mask(Q_lengths, K_lengths, Q_time, K_time) #[batch, Q_time, K_time]
        logits = mask * logits + (1 - mask) * mask_value
    
    attention = tf.nn.softmax(logits)

    if drop_prob is not None:
        print('use attention drop')
        attention = tf.nn.dropout(attention, drop_prob)

    return op.weighted_sum(attention, V)
    
def FFN(x, out_dimension_0=None, out_dimension_1=None):
    '''Add two dense connected layer, max(0, x*W0+b0)*W1+b1.

    Args:
        x: [batch, time, dimension]
        out_dimension: a number which is the output dimension

    Returns:
        [batch, time, out_dimension]

    '''
    with tf.variable_scope('FFN_1'):
        y = op.dense(x, out_dimension_0)
        y = tf.nn.relu(y)
    with tf.variable_scope('FFN_2'):
        z = op.dense(y, out_dimension_1) #, add_bias=False)  #!!!!
    return