内核中提供了等待队列,作用是实现阻塞操作。比如,当一个应用程序去读取设备上的数据时,可能设备驱动中暂时没有数据,那么此时可以把当前进程suspend,等待有数据输入了,即条件满足时,在将此进程唤醒继续执行。
1. 创建一个等待队列
在Linux内核中,wait_queue_head_t代表一个等待队列,只需要定义一个wait_queue_head_t类型的变量,就表示创建一个等待队列,还需要调用如下接口来初始化此队列:
staitc wait_queue_head_t prod_wq;
init_waitqueue_head(&prod_wq);
staitc wait_queue_head_t prod_wq;
init_waitqueue_head(&prod_wq);
具体看一下wait_queue_head_t数据类型:
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;
就是一个链表和一把自旋锁,链表是用于保存等待该队列的wait_queue_t类型waiter对象(此类型对象内部的private成员保存了当前的任务对象task_struct *),自旋锁是为了保证对链表操作的原子性。这里简单的看一下wait_queue_t数据类型:
typedef struct __wait_queue wait_queue_t;
struct __wait_queue {
unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
void *private; // 保存当前任务的task_struct对象地址
wait_queue_func_t func; // 用于唤醒被挂起任务的回调函数
struct list_head task_list; // 连接到wait_queue_head_t中的task_list链表
};
typedef struct __wait_queue wait_queue_t;
struct __wait_queue {
unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
void *private; // 保存当前任务的task_struct对象地址
wait_queue_func_t func; // 用于唤醒被挂起任务的回调函数
struct list_head task_list; // 连接到wait_queue_head_t中的task_list链表
};
让当前进程开始等待
内核提供了如下的接口来让当前进程在条件不满足的情况下,阻塞等待:
wait_event(wq, condition)
wait_event_timeout(wq, condition, timeout)
wait_event_interruptible(wq, condition)
wait_event_interruptible_timeout(wq, condition, timeout)
wait_event(wq, condition)
wait_event_timeout(wq, condition, timeout)
wait_event_interruptible(wq, condition)
wait_event_interruptible_timeout(wq, condition, timeout)
返回值如下:
1) -ERESTARTSYS: 表示被信号激活唤醒
2) > 0: 表示condition满足,返回值表示距离设定超时还有多久
3) = 0: 表示超时发生
其内部实现源码都很类似,只是有些细节不太一样,这里以wait_event_interruptible()为例子,看看其源码:
#define __wait_event_interruptible(wq, condition, ret) \
do { \
// 定义一个waiter对象
DEFINE_WAIT(__wait); \
\
for (;;) { \
// 将waiter对象加入到等待链表中,并设置当前task的状态为TASK_INTERRUPTIBLE
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
if (condition) \
break; \
if (!signal_pending(current)) { \
// 进行任务调度,
schedule(); \
continue; \
} \
ret = -ERESTARTSYS; \
break; \
} \
// 将waiter对象从等待链表中删除
finish_wait(&wq, &__wait); \
} while (0)
#define __wait_event_interruptible(wq, condition, ret) \
do { \
// 定义一个waiter对象
DEFINE_WAIT(__wait); \
\
for (;;) { \
// 将waiter对象加入到等待链表中,并设置当前task的状态为TASK_INTERRUPTIBLE
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
if (condition) \
break; \
if (!signal_pending(current)) { \
// 进行任务调度,
schedule(); \
continue; \
} \
ret = -ERESTARTSYS; \
break; \
} \
// 将waiter对象从等待链表中删除
finish_wait(&wq, &__wait); \
} while (0)
当我们调用wait_event_interruptible()接口时,会先判断condition是否满足,如果不满足,则会suspend当前task。
这里再看一下DEFINE_WAIT宏的源码,可以发现其private成员总是保存这当前task对象的地址current,还有一个成员func也是非常重要的,保存着task被唤醒前的操作方法,这里暂不说明,待下面的wait_up唤醒等待队列时再进行分析:
#define DEFINE_WAIT(name) \
wait_queue_t name = { \
.private = current, \
.func = autoremove_wake_function, \
.task_list = LIST_HEAD_INIT((name).task_list), \
}
#define DEFINE_WAIT(name) \
wait_queue_t name = { \
.private = current, \
.func = autoremove_wake_function, \
.task_list = LIST_HEAD_INIT((name).task_list), \
}
唤醒此等待队列上的进程:
内核提供了如下的接口:
void wake_up(wait_queue_head_t *q);
void wake_up_interruptible(wait_queue_head_t *q);
void wake_up_interruptible_all(wait_queue_head_t *q);
void wake_up(wait_queue_head_t *q);
void wake_up_interruptible(wait_queue_head_t *q);
void wake_up_interruptible_all(wait_queue_head_t *q);
这里以分析wake_up_interruptible()函数的源码进行说明唤醒task的原理,因为其他的唤醒过程都是类似的。最后都会调用到__wake_up_common()这个函数:
void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int sync, void *key)
{
wait_queue_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, sync, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int sync, void *key)
{
wait_queue_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, sync, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
从上面的源码可以看出最终就是调用了等待队列q上的task_list链表上的waiter对象的func方法,在前面又提到过这个方法就是autoremove_wake_function():
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
// 将wait对象private成员保存的task添加到run queue中,便于系统的调度
int ret = default_wake_function(wait, mode, sync, key);
// 将此wait对象从链表中删除
if (ret)
list_del_init(&wait->task_list);
return ret;
}
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
// 将wait对象private成员保存的task添加到run queue中,便于系统的调度
int ret = default_wake_function(wait, mode, sync, key);
// 将此wait对象从链表中删除
if (ret)
list_del_init(&wait->task_list);
return ret;
}
defailt_wake_function()的源码如下,又看到我们熟悉的private成员
int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
void *key)
{
return try_to_wake_up(curr->private, mode, sync);
}
int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
void *key)
{
return try_to_wake_up(curr->private, mode, sync);
}
ry_to_wake_up()函数的源码比较长,这里就截取能体现其大致逻辑的代码进行说明:
static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
{
old_state = p->state;
if (!(old_state & state)) // 进行状态的判断
goto out;
// 如果task对象没有处于running state,则跳到out_activate处
if (unlikely(task_running(rq, p)))
goto out_activate;
......
out_activate:
schedstat_inc(p, se.nr_wakeups);
if (sync)
schedstat_inc(p, se.nr_wakeups_sync);
if (orig_cpu != cpu)
schedstat_inc(p, se.nr_wakeups_migrate);
if (cpu == this_cpu)
schedstat_inc(p, se.nr_wakeups_local);
else
schedstat_inc(p, se.nr_wakeups_remote);
update_rq_clock(rq);
activate_task(rq, p, 1); // 将此task对象加入到run queue
success = 1;
out_running:
trace_sched_wakeup(rq, p);
check_preempt_curr(rq, p, sync);
p->state = TASK_RUNNING; // 设置task对象的状态为TASK_RUNNING
if (p->sched_class->task_wake_up)
p->sched_class->task_wake_up(rq, p);
out:
current->se.last_wakeup = current->se.sum_exec_runtime;
task_rq_unlock(rq, &flags);
return success;
}
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
{
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
enqueue_task(rq, p, wakeup);
inc_nr_running(rq);
}
static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
{
old_state = p->state;
if (!(old_state & state)) // 进行状态的判断
goto out;
// 如果task对象没有处于running state,则跳到out_activate处
if (unlikely(task_running(rq, p)))
goto out_activate;
......
out_activate:
schedstat_inc(p, se.nr_wakeups);
if (sync)
schedstat_inc(p, se.nr_wakeups_sync);
if (orig_cpu != cpu)
schedstat_inc(p, se.nr_wakeups_migrate);
if (cpu == this_cpu)
schedstat_inc(p, se.nr_wakeups_local);
else
schedstat_inc(p, se.nr_wakeups_remote);
update_rq_clock(rq);
activate_task(rq, p, 1); // 将此task对象加入到run queue
success = 1;
out_running:
trace_sched_wakeup(rq, p);
check_preempt_curr(rq, p, sync);
p->state = TASK_RUNNING; // 设置task对象的状态为TASK_RUNNING
if (p->sched_class->task_wake_up)
p->sched_class->task_wake_up(rq, p);
out:
current->se.last_wakeup = current->se.sum_exec_runtime;
task_rq_unlock(rq, &flags);
return success;
}
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
{
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
enqueue_task(rq, p, wakeup);
inc_nr_running(rq);
}
内核调度任务,总是从就绪列表run queue中选择优先级最高的任务来运行。等待队列的唤醒操作,实际上就是把阻塞在此等待队列上的进程,加入到run queue中,等待调度器在下次调度时对其继续运行。
4. 例子:
一个简单的例子,我们常见的生产者-消费者模型:生产者每生产一个任务,就等待消费者将此任务处理掉,然后再生产下一个任务;消费者每接收到一个任务,就将其消耗掉,并通知生产者继续生产;
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/wait.h>
#define ENTER() printk(KERN_DEBUG "%s() Enter", __func__)
#define EXIT() printk(KERN_DEBUG "%s() Exit", __func__)
#define ERR(fmt, args...) printk(KERN_ERR "%s()-%d: " fmt "\n", __func__, __LINE__, ##args)
#define DBG(fmt, args...) printk(KERN_DEBUG "%s()-%d: " fmt "\n", __func__, __LINE__, ##args)
MODULE_LICENSE("GPL");
struct work {
char name[64];
void (*work_func)(void *data);
void *data;
};
static void do_work(void *data)
{
int num = (int)data;
DBG("work num is %d", num);
msleep_interruptible(1000);
}
static struct task_struct *producer = NULL;
static struct task_struct *consumer = NULL;
static wait_queue_head_t prod_wq;
static wait_queue_head_t cons_wq;
static struct work *work = NULL;
static int producer_thr(void *arg)
{
int num = 0;
ENTER();
while (!kthread_should_stop()) {
int ret = wait_event_interruptible(prod_wq, (work == NULL));
if (ret == -ERESTARTSYS) {
DBG("wake up by signal");
continue;
}
// DBG("ret = %d", ret);
work = kzalloc(sizeof(struct work), GFP_KERNEL);
if (!work) {
ERR("kzalloc fail");
break;
}
num++;
snprintf(work->name, sizeof(work->name), "debug-work");
work->work_func = do_work;
work->data = (void *)num;
wake_up_interruptible(&cons_wq);
}
EXIT();
return 0;
}
static int consumer_thr(void *arg)
{
ENTER();
wake_up_interruptible(&prod_wq);
while (!kthread_should_stop()) {
int ret = wait_event_interruptible(cons_wq, (work != NULL));
if (ret == -ERESTARTSYS) {
DBG("wait_up by signal");
continue;
}
// DBG("ret = %d", ret);
DBG("excute work: %s", work->name);
work->work_func(work->data);
kfree(work);
work = NULL;
wake_up_interruptible(&prod_wq);
}
EXIT();
return 0;
}
static __init int wq_demo_init(void)
{
ENTER();
init_waitqueue_head(&prod_wq);
init_waitqueue_head(&cons_wq);
producer = kthread_run(producer_thr, NULL, "producer-thr");
if (!producer) {
ERR("kthread_run fail");
goto _fail;
}
consumer = kthread_run(consumer_thr, NULL, "consumer-thr");
if (!consumer) {
ERR("kthread_run fail");
goto _fail;
}
EXIT();
return 0;
_fail:
if (producer)
kthread_stop(producer);
if (consumer)
kthread_stop(consumer);
return -ECHILD;
}
static __exit void wq_demo_exit(void)
{
ENTER();
if (producer)
kthread_stop(producer);
if (consumer)
kthread_stop(consumer);
EXIT();
}
module_init(wq_demo_init);
module_exit(wq_demo_exit);
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/wait.h>
#define ENTER() printk(KERN_DEBUG "%s() Enter", __func__)
#define EXIT() printk(KERN_DEBUG "%s() Exit", __func__)
#define ERR(fmt, args...) printk(KERN_ERR "%s()-%d: " fmt "\n", __func__, __LINE__, ##args)
#define DBG(fmt, args...) printk(KERN_DEBUG "%s()-%d: " fmt "\n", __func__, __LINE__, ##args)
MODULE_LICENSE("GPL");
struct work {
char name[64];
void (*work_func)(void *data);
void *data;
};
static void do_work(void *data)
{
int num = (int)data;
DBG("work num is %d", num);
msleep_interruptible(1000);
}
static struct task_struct *producer = NULL;
static struct task_struct *consumer = NULL;
static wait_queue_head_t prod_wq;
static wait_queue_head_t cons_wq;
static struct work *work = NULL;
static int producer_thr(void *arg)
{
int num = 0;
ENTER();
while (!kthread_should_stop()) {
int ret = wait_event_interruptible(prod_wq, (work == NULL));
if (ret == -ERESTARTSYS) {
DBG("wake up by signal");
continue;
}
// DBG("ret = %d", ret);
work = kzalloc(sizeof(struct work), GFP_KERNEL);
if (!work) {
ERR("kzalloc fail");
break;
}
num++;
snprintf(work->name, sizeof(work->name), "debug-work");
work->work_func = do_work;
work->data = (void *)num;
wake_up_interruptible(&cons_wq);
}
EXIT();
return 0;
}
static int consumer_thr(void *arg)
{
ENTER();
wake_up_interruptible(&prod_wq);
while (!kthread_should_stop()) {
int ret = wait_event_interruptible(cons_wq, (work != NULL));
if (ret == -ERESTARTSYS) {
DBG("wait_up by signal");
continue;
}
// DBG("ret = %d", ret);
DBG("excute work: %s", work->name);
work->work_func(work->data);
kfree(work);
work = NULL;
wake_up_interruptible(&prod_wq);
}
EXIT();
return 0;
}
static __init int wq_demo_init(void)
{
ENTER();
init_waitqueue_head(&prod_wq);
init_waitqueue_head(&cons_wq);
producer = kthread_run(producer_thr, NULL, "producer-thr");
if (!producer) {
ERR("kthread_run fail");
goto _fail;
}
consumer = kthread_run(consumer_thr, NULL, "consumer-thr");
if (!consumer) {
ERR("kthread_run fail");
goto _fail;
}
EXIT();
return 0;
_fail:
if (producer)
kthread_stop(producer);
if (consumer)
kthread_stop(consumer);
return -ECHILD;
}
static __exit void wq_demo_exit(void)
{
ENTER();
if (producer)
kthread_stop(producer);
if (consumer)
kthread_stop(consumer);
EXIT();
}
module_init(wq_demo_init);
module_exit(wq_demo_exit);