以下添加blackhole属性的nexthop及相关路由。
# ip -6 nexthop add id 1 blackhole
#
# ip nexthop
id 1 blackhole
#
# ip -6 nexthop add id 2 blackhole
#
# ip nexthop add id 3 group 1
#
# ip nexthop
id 1 blackhole
id 2 blackhole
id 3 group 1
#
# ip -6 route add 3ffe::/64 nhid 3
#
$ ip -6 route
::1 dev lo proto kernel metric 256 pref medium
blackhole 3ffe::/64 nhid 3 metric 1024 pref medium
nexthop dev lo weight 1
blackhole属性不能与网关,出接口,封装和FDB一同使用,之后的内核代码中可见到此判断。
# ip nexthop add id 5 via 3ffe::1 blackhole
Error: Blackhole attribute can not be used with gateway, oif, encap or fdb.
由于nexthop并不是常用的路由配置,内核中涉及nexthop的地方,通常使用unlikely。
blackhole下一跳添加
内核函数rtm_new_nexthop处理nexthop的添加操作,有netlink消息解析函数rtm_to_nh_config和添加函数nexthop_add组成。
static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nh_config cfg;
struct nexthop *nh;
int err;
err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
if (!err) {
nh = nexthop_add(net, &cfg, extack);
if (IS_ERR(nh))
err = PTR_ERR(nh);
}
如果配置了blackhole属性,设置nh_config结构体的成员nh_blackhole为1。此处可见blackhole属性不能与网关,出接口等一同配置。
static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
struct nlmsghdr *nlh, struct nh_config *cfg,
struct netlink_ext_ack *extack)
{
if (tb[NHA_BLACKHOLE]) {
if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
goto out;
}
cfg->nh_blackhole = 1;
err = 0;
goto out;
}
nexthop创建时,对于blackhole下一跳,设置reject_nh为1,并将命名空间中的回环接口设置为下一跳出接口。
static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
struct netlink_ext_ack *extack)
{
struct nh_info *nhi;
struct nexthop *nh;
nh = nexthop_alloc();
if (!nh)
return ERR_PTR(-ENOMEM);
nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
...
if (cfg->nh_fdb)
nhi->fdb_nh = 1;
if (cfg->nh_blackhole) {
nhi->reject_nh = 1;
cfg->nh_ifindex = net->loopback_dev->ifindex;
}
switch (cfg->nh_family) {
case AF_INET:
err = nh_create_ipv4(net, nh, nhi, cfg, extack);
break;
case AF_INET6:
err = nh_create_ipv6(net, nh, nhi, cfg, extack);
break;
}
blackhole下一跳组
如下,ID 1和2为blackhole属性的下一跳,在组内不能有多于一个的blackhole下一跳。
# ip nexthop add id 3 group 1/2
Error: Blackhole nexthop can not be used in a group with more than 1 path.
内核函数valid_group_nh对此进行检查。
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
bool *is_fdb, struct netlink_ext_ack *extack)
{
if (nh->is_group) {
...
} else {
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
if (nhi->reject_nh && npaths > 1) {
NL_SET_ERR_MSG(extack,
"Blackhole nexthop can not be used in a group with more than 1 path");
return false;
}
*is_fdb = nhi->fdb_nh;
}
return true;
下一跳blackhole判断
函数nexthop_is_blackhole用于判断nexthop是否是指了reject_nh,对于组group,以上也看到,组内的下一跳个数不会大于1,如果大于1,表面当前组不是blackhole。
static inline bool nexthop_is_blackhole(const struct nexthop *nh)
{
const struct nh_info *nhi;
if (nh->is_group) {
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
if (nh_grp->num_nh > 1)
return false;
nh = nh_grp->nh_entries[0].nh;
}
nhi = rcu_dereference_rtnl(nh->nh_info);
return nhi->reject_nh;
}
IPv6通用路由查询
在查询到fib6_node之后,由函数rt6_device_match进行出接口判断,如果匹配的为blackhole下一跳,调到do_create创建blackhole路由缓存。
INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table, struct flowi6 *fl6,
const struct sk_buff *skb, int flags)
{
struct fib6_result res = {};
struct fib6_node *fn;
struct rt6_info *rt;
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
flags &= ~RT6_LOOKUP_F_IFACE;
rcu_read_lock();
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
res.f6i = rcu_dereference(fn->leaf);
if (!res.f6i)
res.f6i = net->ipv6.fib6_null_entry;
else
rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif, flags);
if (res.f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn) goto restart;
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
goto out;
} else if (res.fib6_flags & RTF_REJECT) {
goto do_create;
}
如下函数,blackhole路由缓存的处理函数为dst_discard_out和dst_discard,匹配的流量都将被丢弃。
static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
{
rt->dst.error = ip6_rt_type_to_error(fib6_type);
switch (fib6_type) {
case RTN_BLACKHOLE:
rt->dst.output = dst_discard_out;
rt->dst.input = dst_discard;
break;
如果路由查询未指定出接口(oif为零),并且也没有指定源地址,如果查询到的路由项为nexthop,而且其具有blackhole属性,设置RTF_REJECT标志,和RTN_BLACKHOLE类型。
static void rt6_device_match(struct net *net, struct fib6_result *res,
const struct in6_addr *saddr, int oif, int flags)
{
struct fib6_info *f6i = res->f6i;
struct fib6_info *spf6i;
struct fib6_nh *nh;
if (!oif && ipv6_addr_any(saddr)) {
if (unlikely(f6i->nh)) {
nh = nexthop_fib6_nh(f6i->nh);
if (nexthop_is_blackhole(f6i->nh))
goto out_blackhole;
} else {
nh = f6i->fib6_nh;
}
if (!(nh->fib_nh_flags & RTNH_F_DEAD))
goto out;
}
遍历fib6_info链表,对于配置了nexthop的路由项,由函数rt6_nh_dev_match处理,查找符合条件的下一跳,如果找到结束处理。
for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
bool matched = false;
if (unlikely(spf6i->nh)) {
nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr, oif, flags);
if (nh)
matched = true;
} else {
nh = spf6i->fib6_nh;
if (__rt6_device_match(net, nh, saddr, oif, flags))
matched = true;
}
if (matched) {
res->f6i = spf6i;
goto out;
}
}
如果以上查找未匹配,并且当前路由项具有blackhole属性,跳到out_blackhole处理。
if (oif && flags & RT6_LOOKUP_F_IFACE) {
res->f6i = net->ipv6.fib6_null_entry;
nh = res->f6i->fib6_nh;
goto out;
}
if (unlikely(f6i->nh)) {
nh = nexthop_fib6_nh(f6i->nh);
if (nexthop_is_blackhole(f6i->nh))
goto out_blackhole;
} else {
nh = f6i->fib6_nh;
}
if (nh->fib_nh_flags & RTNH_F_DEAD) {
res->f6i = net->ipv6.fib6_null_entry;
nh = res->f6i->fib6_nh;
}
out:
res->nh = nh;
res->fib6_type = res->f6i->fib6_type;
res->fib6_flags = res->f6i->fib6_flags;
return;
out_blackhole:
res->fib6_flags |= RTF_REJECT;
res->fib6_type = RTN_BLACKHOLE;
res->nh = nh;
如下函数rt6_nh_dev_match,对于具有blackhole属性的nexthop,返回NULL。
static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
struct fib6_result *res, const struct in6_addr *saddr,
int oif, int flags)
{
struct fib6_nh_dm_arg arg = {
.net = net,
.saddr = saddr,
.oif = oif,
.flags = flags,
};
if (nexthop_is_blackhole(nh))
return NULL;
if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
return arg.nh;
return NULL;
IPv6输入输出路由查询
input和output路由查询函数最终调用的都是ip6_pol_route,其中核心查找函数为fib6_table_lookup。
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, const struct sk_buff *skb, int flags)
{
struct fib6_result res = {};
struct rt6_info *rt = NULL;
fib6_table_lookup(net, table, oif, fl6, &res, strict);
if (res.f6i == net->ipv6.fib6_null_entry)
goto out;
函数fib6_table_lookup查询操作调用的为通用的fib6_node_lookup函数,之后rt6_select负责选择合适的路由项。
int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
struct flowi6 *fl6, struct fib6_result *res, int strict)
{
struct fib6_node *fn, *saved_fn;
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
oif = 0;
redo_rt6_select:
rt6_select(net, fn, oif, res, strict);
rt6_select功能主要由函数__find_rr_leaf完成,如果路由项指定的下一跳为nexthop所定义,检查nexthop是否具有blackhole属性,为真将结果fib6_result的标志设置为RTF_REJECT,将类型设置为RTN_BLACKHOLE。
static void __find_rr_leaf(struct fib6_info *f6i_start, struct fib6_info *nomatch, u32 metric,
struct fib6_result *res, struct fib6_info **cont, int oif, int strict, bool *do_rr, int *mpri)
{
struct fib6_info *f6i;
for (f6i = f6i_start; f6i && f6i != nomatch;
f6i = rcu_dereference(f6i->fib6_next)) {
struct fib6_nh *nh;
if (cont && f6i->fib6_metric != metric) {
*cont = f6i;
return;
}
if (fib6_check_expired(f6i)) continue;
if (unlikely(f6i->nh)) {
struct fib6_nh_frl_arg arg = {
.flags = f6i->fib6_flags,
.oif = oif,
.strict = strict,
.mpri = mpri,
.do_rr = do_rr
};
if (nexthop_is_blackhole(f6i->nh)) {
res->fib6_flags = RTF_REJECT;
res->fib6_type = RTN_BLACKHOLE;
res->f6i = f6i;
res->nh = nexthop_fib6_nh(f6i->nh);
return;
}
IPv4路由查询
如下fib_table_lookup函数,对于查询到的blackhole属性的nexthop,跳到out_reject,返回错误-EINVAL。
const struct fib_prop fib_props[RTN_MAX + 1] = {
[RTN_BLACKHOLE] = {
.error = -EINVAL,
.scope = RT_SCOPE_UNIVERSE,
},
int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
struct fib_result *res, int fib_flags)
{
struct fib_alias *fa;
out_reject:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
return err;
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
if (unlikely(fi->nh)) {
if (nexthop_is_blackhole(fi->nh)) {
err = fib_props[RTN_BLACKHOLE].error;
goto out_reject;
}
nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp, &nhsel);
if (nhc)
goto set_result;
goto miss;
}
对于fib_lookup函数,如果fib_table_lookup的返回值为-EINVAL,fib_lookup将返回网络不可达错误-ENETUNREACH。
static inline int fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
struct fib_table *tb;
int err = -ENETUNREACH;
flags |= FIB_LOOKUP_NOREF;
if (net->ipv4.fib_has_custom_rules)
return __fib_lookup(net, flp, res, flags);
tb = rcu_dereference_rtnl(net->ipv4.fib_main);
if (tb)
err = fib_table_lookup(tb, flp, res, flags);
if (!err) goto out;
tb = rcu_dereference_rtnl(net->ipv4.fib_default);
if (tb)
err = fib_table_lookup(tb, flp, res, flags);
out:
if (err == -EAGAIN) err = -ENETUNREACH;
return err;
多路径路由选择
不管是通用路由查找,还是input/output路由查找,都会使用fib6_select_path做路径选择。如果不存在多个路径,此函数直接返回。如果路由项指定了nexthop,由函数nexthop_path_fib6_result进行选择。
void fib6_select_path(const struct net *net, struct fib6_result *res,
struct flowi6 *fl6, int oif, bool have_oif_match,
const struct sk_buff *skb, int strict)
{
struct fib6_info *sibling, *next_sibling;
struct fib6_info *match = res->f6i;
if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
if (match->nh && have_oif_match && res->nh)
return;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
if (!fl6->mp_hash &&
(!match->nh || nexthop_is_multipath(match->nh)))
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (unlikely(match->nh)) {
nexthop_path_fib6_result(res, fl6->mp_hash);
return;
}
如果路由项的nexthop设置了reject_nh标志,fib6_result结构的成员fib6_flags增加标志RTF_REJECT,将类型设置为RTN_BLACKHOLE。
static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
{
struct nexthop *nh = res->f6i->nh;
struct nh_info *nhi;
nh = nexthop_select_path(nh, hash);
nhi = rcu_dereference_rtnl(nh->nh_info);
if (nhi->reject_nh) {
res->fib6_type = RTN_BLACKHOLE;
res->fib6_flags |= RTF_REJECT;
res->nh = nexthop_fib6_nh(nh);
} else {
res->nh = &nhi->fib6_nh;
}
内核版本 5.10