以下添加blackhole属性的nexthop及相关路由。

# ip -6 nexthop add id 1 blackhole
# 
# ip nexthop
id 1 blackhole 
#
# ip -6 nexthop add id 2 blackhole
#
# ip nexthop add id 3 group 1
# 
# ip nexthop   
id 1 blackhole 
id 2 blackhole 
id 3 group 1 
# 
# ip -6 route add 3ffe::/64 nhid 3     
#
$ ip -6 route
::1 dev lo proto kernel metric 256 pref medium
blackhole 3ffe::/64 nhid 3 metric 1024 pref medium
        nexthop dev lo weight 1

blackhole属性不能与网关,出接口,封装和FDB一同使用,之后的内核代码中可见到此判断。

# ip nexthop add id 5 via 3ffe::1 blackhole
Error: Blackhole attribute can not be used with gateway, oif, encap or fdb.

由于nexthop并不是常用的路由配置,内核中涉及nexthop的地方,通常使用unlikely。

blackhole下一跳添加

内核函数rtm_new_nexthop处理nexthop的添加操作,有netlink消息解析函数rtm_to_nh_config和添加函数nexthop_add组成。

static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
               struct netlink_ext_ack *extack)
{
    struct net *net = sock_net(skb->sk);
    struct nh_config cfg;
    struct nexthop *nh;
    int err;

    err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
    if (!err) {
        nh = nexthop_add(net, &cfg, extack);
        if (IS_ERR(nh))
            err = PTR_ERR(nh);
    }

如果配置了blackhole属性,设置nh_config结构体的成员nh_blackhole为1。此处可见blackhole属性不能与网关,出接口等一同配置。

static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
                struct nlmsghdr *nlh, struct nh_config *cfg,
                struct netlink_ext_ack *extack)
{

    if (tb[NHA_BLACKHOLE]) {
        if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
            tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
            NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
            goto out;
        }

        cfg->nh_blackhole = 1;
        err = 0;
        goto out;
    }

nexthop创建时,对于blackhole下一跳,设置reject_nh为1,并将命名空间中的回环接口设置为下一跳出接口。

static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
                      struct netlink_ext_ack *extack)
{
    struct nh_info *nhi;
    struct nexthop *nh;

    nh = nexthop_alloc();
    if (!nh)
        return ERR_PTR(-ENOMEM);

    nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
    ...

    if (cfg->nh_fdb)
        nhi->fdb_nh = 1;

    if (cfg->nh_blackhole) {
        nhi->reject_nh = 1;
        cfg->nh_ifindex = net->loopback_dev->ifindex;
    }
    switch (cfg->nh_family) {
    case AF_INET:
        err = nh_create_ipv4(net, nh, nhi, cfg, extack);
        break;
    case AF_INET6:
        err = nh_create_ipv6(net, nh, nhi, cfg, extack);
        break;
    }

blackhole下一跳组

如下,ID 1和2为blackhole属性的下一跳,在组内不能有多于一个的blackhole下一跳。

# ip nexthop add id 3 group 1/2
Error: Blackhole nexthop can not be used in a group with more than 1 path.

内核函数valid_group_nh对此进行检查。

static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
               bool *is_fdb, struct netlink_ext_ack *extack)
{
    if (nh->is_group) {
        ...
    } else {
        struct nh_info *nhi = rtnl_dereference(nh->nh_info);

        if (nhi->reject_nh && npaths > 1) {
            NL_SET_ERR_MSG(extack,
                       "Blackhole nexthop can not be used in a group with more than 1 path");
            return false;
        }
        *is_fdb = nhi->fdb_nh;
    }

    return true;

下一跳blackhole判断

函数nexthop_is_blackhole用于判断nexthop是否是指了reject_nh,对于组group,以上也看到,组内的下一跳个数不会大于1,如果大于1,表面当前组不是blackhole。

static inline bool nexthop_is_blackhole(const struct nexthop *nh)
{        
    const struct nh_info *nhi;

    if (nh->is_group) {
        struct nh_group *nh_grp;

        nh_grp = rcu_dereference_rtnl(nh->nh_grp);
        if (nh_grp->num_nh > 1) 
            return false;

        nh = nh_grp->nh_entries[0].nh;
    }

    nhi = rcu_dereference_rtnl(nh->nh_info);
    return nhi->reject_nh;
}

IPv6通用路由查询

在查询到fib6_node之后,由函数rt6_device_match进行出接口判断,如果匹配的为blackhole下一跳,调到do_create创建blackhole路由缓存。

INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
                         struct fib6_table *table, struct flowi6 *fl6,
                         const struct sk_buff *skb, int flags)
{
    struct fib6_result res = {};
    struct fib6_node *fn;
    struct rt6_info *rt;

    if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
        flags &= ~RT6_LOOKUP_F_IFACE;

    rcu_read_lock();
    fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
    res.f6i = rcu_dereference(fn->leaf);
    if (!res.f6i)
        res.f6i = net->ipv6.fib6_null_entry;
    else
        rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif, flags);

    if (res.f6i == net->ipv6.fib6_null_entry) {
        fn = fib6_backtrack(fn, &fl6->saddr);
        if (fn) goto restart;

        rt = net->ipv6.ip6_null_entry;
        dst_hold(&rt->dst);
        goto out;
    } else if (res.fib6_flags & RTF_REJECT) {
        goto do_create;
    }

如下函数,blackhole路由缓存的处理函数为dst_discard_out和dst_discard,匹配的流量都将被丢弃。

static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
{
    rt->dst.error = ip6_rt_type_to_error(fib6_type);

    switch (fib6_type) {
    case RTN_BLACKHOLE:
        rt->dst.output = dst_discard_out;
        rt->dst.input = dst_discard;
        break;

如果路由查询未指定出接口(oif为零),并且也没有指定源地址,如果查询到的路由项为nexthop,而且其具有blackhole属性,设置RTF_REJECT标志,和RTN_BLACKHOLE类型。

static void rt6_device_match(struct net *net, struct fib6_result *res,
                 const struct in6_addr *saddr, int oif, int flags)
{
    struct fib6_info *f6i = res->f6i;
    struct fib6_info *spf6i;
    struct fib6_nh *nh;

    if (!oif && ipv6_addr_any(saddr)) {
        if (unlikely(f6i->nh)) {
            nh = nexthop_fib6_nh(f6i->nh);
            if (nexthop_is_blackhole(f6i->nh))
                goto out_blackhole;
        } else {
            nh = f6i->fib6_nh;
        }
        if (!(nh->fib_nh_flags & RTNH_F_DEAD))
            goto out;
    }

遍历fib6_info链表,对于配置了nexthop的路由项,由函数rt6_nh_dev_match处理,查找符合条件的下一跳,如果找到结束处理。

for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
        bool matched = false;

        if (unlikely(spf6i->nh)) {
            nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr, oif, flags);
            if (nh)
                matched = true;
        } else {
            nh = spf6i->fib6_nh;
            if (__rt6_device_match(net, nh, saddr, oif, flags))
                matched = true;
        }
        if (matched) {
            res->f6i = spf6i;
            goto out;
        }
    }

如果以上查找未匹配,并且当前路由项具有blackhole属性,跳到out_blackhole处理。

if (oif && flags & RT6_LOOKUP_F_IFACE) {
        res->f6i = net->ipv6.fib6_null_entry;
        nh = res->f6i->fib6_nh;
        goto out;
    }

    if (unlikely(f6i->nh)) {
        nh = nexthop_fib6_nh(f6i->nh);
        if (nexthop_is_blackhole(f6i->nh))
            goto out_blackhole;
    } else {
        nh = f6i->fib6_nh;
    }

    if (nh->fib_nh_flags & RTNH_F_DEAD) {
        res->f6i = net->ipv6.fib6_null_entry;
        nh = res->f6i->fib6_nh;
    }
out:
    res->nh = nh;
    res->fib6_type = res->f6i->fib6_type;
    res->fib6_flags = res->f6i->fib6_flags;
    return;

out_blackhole:
    res->fib6_flags |= RTF_REJECT;
    res->fib6_type = RTN_BLACKHOLE;
    res->nh = nh;

如下函数rt6_nh_dev_match,对于具有blackhole属性的nexthop,返回NULL。

static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
                    struct fib6_result *res, const struct in6_addr *saddr,
                    int oif, int flags)
{
    struct fib6_nh_dm_arg arg = {
        .net   = net,
        .saddr = saddr,
        .oif   = oif,
        .flags = flags,
    };

    if (nexthop_is_blackhole(nh))
        return NULL;

    if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
        return arg.nh;

    return NULL;

IPv6输入输出路由查询

input和output路由查询函数最终调用的都是ip6_pol_route,其中核心查找函数为fib6_table_lookup。

struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
                   int oif, struct flowi6 *fl6, const struct sk_buff *skb, int flags)
{
    struct fib6_result res = {};
    struct rt6_info *rt = NULL;

    fib6_table_lookup(net, table, oif, fl6, &res, strict);
    if (res.f6i == net->ipv6.fib6_null_entry)
        goto out;

函数fib6_table_lookup查询操作调用的为通用的fib6_node_lookup函数,之后rt6_select负责选择合适的路由项。

int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
              struct flowi6 *fl6, struct fib6_result *res, int strict)
{
    struct fib6_node *fn, *saved_fn;

    fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
    saved_fn = fn;

    if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
        oif = 0;

redo_rt6_select:
    rt6_select(net, fn, oif, res, strict);

rt6_select功能主要由函数__find_rr_leaf完成,如果路由项指定的下一跳为nexthop所定义,检查nexthop是否具有blackhole属性,为真将结果fib6_result的标志设置为RTF_REJECT,将类型设置为RTN_BLACKHOLE。

static void __find_rr_leaf(struct fib6_info *f6i_start, struct fib6_info *nomatch, u32 metric,
               struct fib6_result *res, struct fib6_info **cont, int oif, int strict, bool *do_rr, int *mpri)
{
    struct fib6_info *f6i;

    for (f6i = f6i_start; f6i && f6i != nomatch;
         f6i = rcu_dereference(f6i->fib6_next)) {
        struct fib6_nh *nh;

        if (cont && f6i->fib6_metric != metric) {
            *cont = f6i;
            return;
        }
        if (fib6_check_expired(f6i)) continue;

        if (unlikely(f6i->nh)) {
            struct fib6_nh_frl_arg arg = {
                .flags  = f6i->fib6_flags,
                .oif    = oif,
                .strict = strict,
                .mpri   = mpri,
                .do_rr  = do_rr
            };
            if (nexthop_is_blackhole(f6i->nh)) {
                res->fib6_flags = RTF_REJECT;
                res->fib6_type = RTN_BLACKHOLE;
                res->f6i = f6i;
                res->nh = nexthop_fib6_nh(f6i->nh);
                return;
            }

IPv4路由查询

如下fib_table_lookup函数,对于查询到的blackhole属性的nexthop,跳到out_reject,返回错误-EINVAL。

const struct fib_prop fib_props[RTN_MAX + 1] = {
     [RTN_BLACKHOLE] = {
        .error  = -EINVAL,
        .scope  = RT_SCOPE_UNIVERSE,
    },

int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
             struct fib_result *res, int fib_flags)
{
    struct fib_alias *fa;

out_reject:
#ifdef CONFIG_IP_FIB_TRIE_STATS
            this_cpu_inc(stats->semantic_match_passed);
#endif
            trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
            return err;
        }
        if (fi->fib_flags & RTNH_F_DEAD)
            continue;

        if (unlikely(fi->nh)) {
            if (nexthop_is_blackhole(fi->nh)) {
                err = fib_props[RTN_BLACKHOLE].error;
                goto out_reject;
            }

            nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp, &nhsel);
            if (nhc)
                goto set_result;
            goto miss;
        }

对于fib_lookup函数,如果fib_table_lookup的返回值为-EINVAL,fib_lookup将返回网络不可达错误-ENETUNREACH。

static inline int fib_lookup(struct net *net, struct flowi4 *flp,
                 struct fib_result *res, unsigned int flags)
{
    struct fib_table *tb;
    int err = -ENETUNREACH;

    flags |= FIB_LOOKUP_NOREF;
    if (net->ipv4.fib_has_custom_rules)
        return __fib_lookup(net, flp, res, flags);


    tb = rcu_dereference_rtnl(net->ipv4.fib_main);
    if (tb)
        err = fib_table_lookup(tb, flp, res, flags);

    if (!err) goto out;

    tb = rcu_dereference_rtnl(net->ipv4.fib_default);
    if (tb)
        err = fib_table_lookup(tb, flp, res, flags);

out:
    if (err == -EAGAIN) err = -ENETUNREACH;

    return err;

多路径路由选择

不管是通用路由查找,还是input/output路由查找,都会使用fib6_select_path做路径选择。如果不存在多个路径,此函数直接返回。如果路由项指定了nexthop,由函数nexthop_path_fib6_result进行选择。

void fib6_select_path(const struct net *net, struct fib6_result *res,
              struct flowi6 *fl6, int oif, bool have_oif_match,
              const struct sk_buff *skb, int strict)
{
    struct fib6_info *sibling, *next_sibling;
    struct fib6_info *match = res->f6i;

    if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
        goto out;

    if (match->nh && have_oif_match && res->nh)
        return;

    /* We might have already computed the hash for ICMPv6 errors. In such
     * case it will always be non-zero. Otherwise now is the time to do it.
     */
    if (!fl6->mp_hash &&
        (!match->nh || nexthop_is_multipath(match->nh)))
        fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);

    if (unlikely(match->nh)) {
        nexthop_path_fib6_result(res, fl6->mp_hash);
        return;
    }

如果路由项的nexthop设置了reject_nh标志,fib6_result结构的成员fib6_flags增加标志RTF_REJECT,将类型设置为RTN_BLACKHOLE。

static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
{
    struct nexthop *nh = res->f6i->nh;
    struct nh_info *nhi;

    nh = nexthop_select_path(nh, hash);

    nhi = rcu_dereference_rtnl(nh->nh_info);
    if (nhi->reject_nh) {
        res->fib6_type = RTN_BLACKHOLE;
        res->fib6_flags |= RTF_REJECT;
        res->nh = nexthop_fib6_nh(nh);
    } else {
        res->nh = &nhi->fib6_nh;
    }

内核版本 5.10