Linux内核中网络设备连接状态监测
在Linux
中,网络设备会定时地检测设备是否处于可传递状态。当状态发生变化时,网络设备驱动程序会调用netif_carrier_on()
或者netif_carrier_off()
函数来通知内核。插拔网络设备网线或者另一端的设备关闭或禁止,都会导致连接状态改变。
netif_carrier_on() - 设备驱动监测到设备传递信号时调用
netif_carrier_off() - 设备驱动监测到设备丢失信号时调用
Linux
内核中实现上述两个函数的代码位于net/sched/sch_generic.c
文件中。它们均会调用linkwatch_fire_event()
将事件加入到事件队列进行调度。
以netif_carrier_on()
为例,函数调用过程如下:
netif_carrier_on() - net/sched/sch_generic.c
linkwatch_fire_event() - net/core/link_watch.c
linkwatch_add_event() - net/core/link_watch.c
linkwatch_schedule_work() - net/core/link_watch.c
mod_delayed_work() - include/linux/workqueue.h
mod_delayed_work_on() - kernel/workqueue.c
__queue_delayed_work() - kernel/workqueue.c
schedule_delayed_work() - include/linux/workqueue.h
queue_delayed_work() - include/linux/workqueue.h
queue_delayed_work_on() - kernel/workqueue.c
__queue_delayed_work() - kernel/workqueue.c
接收到信号
当监测到设备传递信号时函数netif_carrier_on()
会被调用,并调用linkwatch_fire_event()
函数将设备加入到事件处理队列进行处理。
/**
* netif_carrier_on - set carrier
* @dev: network device
*
* Device has detected acquisition of carrier.
*/
void netif_carrier_on(struct net_device *dev)
{
// 清除nocarrier状态
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
// 设备未注册则直接返回
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
// 增加设备carrier up状态次数
atomic_inc(&dev->carrier_up_count);
// 加入事件处理队列进行处理
linkwatch_fire_event(dev);
if (netif_running(dev))
__netdev_watchdog_up(dev);
}
}
EXPORT_SYMBOL(netif_carrier_on);
丢失信号
当监测到设备信号丢失时函数netif_carrier_off()
会被调用,并调用linkwatch_fire_event()
函数将设备加入到事件处理队列进行处理。
/**
* netif_carrier_off - clear carrier
* @dev: network device
*
* Device has detected loss of carrier.
*/
void netif_carrier_off(struct net_device *dev)
{
// 设置nocarrier状态
if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
// 设备未注册则直接返回
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
// 增加设备carrier down状态次数
atomic_inc(&dev->carrier_down_count);
// 加入事件处理队列进行处理
linkwatch_fire_event(dev);
}
}
EXPORT_SYMBOL(netif_carrier_off);
加入队列
linkwatch_fire_event()
函数将设备加入到事件队列,并且进行事件调度,调度中会根据是否为紧急事件做不同处理。
void linkwatch_fire_event(struct net_device *dev)
{
// 事件是否紧急
bool urgent = linkwatch_urgent_event(dev);
// 设置待处理(pending)事件标记
if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
// 添加事件到事件列表
linkwatch_add_event(dev);
} else if (!urgent)
// 设置pending后,不紧急,直接返回
return;
// 事件调度
linkwatch_schedule_work(urgent);
}
EXPORT_SYMBOL(linkwatch_fire_event);
static bool linkwatch_urgent_event(struct net_device *dev)
{
// 设备未运行,不紧急
if (!netif_running(dev))
return false;
// 设备的索引号与链路索引号不等,紧急
if (dev->ifindex != dev_get_iflink(dev))
return true;
// 设备作为bond接口或者team接口,紧急
if (netif_is_lag_port(dev) || netif_is_lag_master(dev))
return true;
// 连接与否 && 发送队列排队规则改变与否,结果作为是否紧急标志
return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
}
static void linkwatch_add_event(struct net_device *dev)
{
unsigned long flags;
spin_lock_irqsave(&lweventlist_lock, flags);
// 若未添加,则添加设备到事件列表lweventlist
if (list_empty(&dev->link_watch_list)) {
list_add_tail(&dev->link_watch_list, &lweventlist);
dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
}
spin_unlock_irqrestore(&lweventlist_lock, flags);
}
static void linkwatch_schedule_work(int urgent)
{
// 当前时间到下次的延迟
unsigned long delay = linkwatch_nextevent - jiffies;
// 已经设置了紧急标记,则返回
if (test_bit(LW_URGENT, &linkwatch_flags))
return;
/* Minimise down-time: drop delay for up event. */
// 紧急,delay = 0,立即执行
if (urgent) {
// 已经设置了紧急标记,则返回
if (test_and_set_bit(LW_URGENT, &linkwatch_flags))
return;
// 紧急,设置delay = 0,立即执行
delay = 0;
}
/* If we wrap around we'll delay it by at most HZ. */
// 如果大于1s则立即执行
if (delay > HZ)
delay = 0;
/*
* If urgent, schedule immediate execution; otherwise, don't
* override the existing timer.
*/
// 如果设置了紧急标记,则立即执行
if (test_bit(LW_URGENT, &linkwatch_flags))
mod_delayed_work(system_wq, &linkwatch_work, 0);
// 未设置紧急标记,则按照delay执行
else
schedule_delayed_work(&linkwatch_work, delay);
}
事件处理
在net/core/link_watch.c
中声明了delayed_work结构(即linkwatch_work)和事件处理函数linkwatch_event()。
static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
static void linkwatch_event(struct work_struct *dummy)
{
rtnl_lock();
__linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies));
rtnl_unlock();
}
static void __linkwatch_run_queue(int urgent_only)
{
// urgent_only: 1 - 未到达下一次调度时间, 0 - 已到达下次调度时间
#define MAX_DO_DEV_PER_LOOP 100
// 每次处理的设备个数,最大100个
int do_dev = MAX_DO_DEV_PER_LOOP;
struct net_device *dev;
LIST_HEAD(wrk);
/* Give urgent case more budget */
// 紧急,则加大为200个
if (urgent_only)
do_dev += MAX_DO_DEV_PER_LOOP;
/*
* Limit the number of linkwatch events to one
* per second so that a runaway driver does not
* cause a storm of messages on the netlink
* socket. This limit does not apply to up events
* while the device qdisc is down.
*/
// 已达到调度时间
if (!urgent_only)
linkwatch_nextevent = jiffies + HZ;
/* Limit wrap-around effect on delay. */
// 未到达调度时间,并且下一次调度在当前时间的1s以后,那么设置调度时间是当前时间
else if (time_after(linkwatch_nextevent, jiffies + HZ))
linkwatch_nextevent = jiffies;
// 清除紧急标志
clear_bit(LW_URGENT, &linkwatch_flags);
spin_lock_irq(&lweventlist_lock);
// 将两个链表进行合并为一个链表并初始化为空表,合并后为wrk,清空lweventlist
list_splice_init(&lweventlist, &wrk);
// 遍历合并后的链表wrk
while (!list_empty(&wrk) && do_dev > 0) {
// 获取设备
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
// 从链表移除设备
list_del_init(&dev->link_watch_list);
// 设备不存在/未达到调度时间且不紧急
if (!netif_device_present(dev) ||
(urgent_only && !linkwatch_urgent_event(dev))) {
// 添加到链表尾部,继续处理
list_add_tail(&dev->link_watch_list, &lweventlist);
continue;
}
/* We must free netdev tracker under
* the spinlock protection.
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
spin_unlock_irq(&lweventlist_lock);
// 处理设备状态
linkwatch_do_dev(dev);
do_dev--;
spin_lock_irq(&lweventlist_lock);
}
/* Add the remaining work back to lweventlist */
// 将两个链表进行合并为一个链表并初始化为空表,合并后为lweventlist,清空wrk
list_splice_init(&wrk, &lweventlist);
// 链表有未处理事件,则以非紧急状态调度队列
if (!list_empty(&lweventlist))
linkwatch_schedule_work(0);
spin_unlock_irq(&lweventlist_lock);
}
static void linkwatch_do_dev(struct net_device *dev)
{
/*
* Make sure the above read is complete since it can be
* rewritten as soon as we clear the bit below.
*/
smp_mb__before_atomic();
/* We are about to handle this device,
* so new events can be accepted
*/
// 清除待处理(pending)标志
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
rfc2863_policy(dev);
// 设备是up状态
if (dev->flags & IFF_UP) {
// 链路连接
if (netif_carrier_ok(dev))
// 启用流控排队规则qdisc
dev_activate(dev);
else
// 关闭流控排队规则qdisc
dev_deactivate(dev);
// 改变设备状态
netdev_state_change(dev);
}
/* Note: our callers are responsible for calling netdev_tracker_free().
* This is the reason we use __dev_put() instead of dev_put().
*/
__dev_put(dev);
}