文章目录
- 一,前言
- 二, 应用层读写到块设备数据框架图
- 三,ll_rw_block接口分析
- 3.1 buffer_head结构体
- 3.2 ll_rw_block接口实现
- 3.3 构建bio、获取设备i/o请求队列
- 3.4 队列的"构造请求函数"由来
- 3.4.1 默认的__make_request接口
- 四,总结
一,前言
在内核空间构建了块设备驱动程序,创建了块设备的设备节点,那么用户空间的APP如果通过该设备节点去读写对应的块设备呢?接下来便逐步分析这个过程。分析用户程序是如何一步步读写到块设备的数据的。
二, 应用层读写到块设备数据框架图
通用块层提供了一个接口ll_rw_block( )用来对逻辑块进行读写操作。应用层对块设备的读写操作都会调用到ll_rw_block( )函数来执行。
void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]);
rw:进行读操作还是写操作,一般可选的值为 READ、WRITE 或者 READA 等;
nr:bhs数组元素个数;
bhs:要进行读写操作的数据块数组;
三,ll_rw_block接口分析
函数原型:
void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]);
在分析ll_rw_block接口前,先看下buffer_head结构体
3.1 buffer_head结构体
struct buffer_head {
unsigned long b_state; /* buffer state bitmap (see above) */
struct buffer_head *b_this_page;/* circular list of page's buffers */
struct page *b_page; /* the page this bh is mapped to */
sector_t b_blocknr; /* start block number */ // 数据块号
size_t b_size; /* size of mapping */ // 数据块大小
char *b_data; /* pointer to data within the page */
struct block_device *b_bdev; // 数据块所属设备
bh_end_io_t *b_end_io; /* I/O completion */
void *b_private; /* reserved for b_end_io */
struct list_head b_assoc_buffers; /* associated with another mapping */
struct address_space *b_assoc_map; /* mapping this buffer is
associated with */
atomic_t b_count; /* users using this buffer_head */
};
3.2 ll_rw_block接口实现
void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
{
int i;
// 遍历数据块数组,取出每一个数据块
for (i = 0; i < nr; i++) {
struct buffer_head *bh = bhs[i];
if (rw == SWRITE)
lock_buffer(bh);
else if (test_set_buffer_locked(bh))
continue;
if (rw == WRITE || rw == SWRITE) {
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
submit_bh(WRITE, bh); // 提交写数据块
continue;
}
} else {
if (!buffer_uptodate(bh)) {
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
submit_bh(rw, bh); // 提交读数据块
continue;
}
}
unlock_buffer(bh);
}
}
3.3 构建bio、获取设备i/o请求队列
submit_bh(WRITE, bh)/submit_bh(rw, bh) ->
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio->bi_io_vec[0].bv_page = bh->b_page;
bio->bi_io_vec[0].bv_len = bh->b_size;
bio->bi_io_vec[0].bv_offset = bh_offset(bh);
bio->bi_vcnt = 1;
bio->bi_idx = 0;
bio->bi_size = bh->b_size;
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
bio_get(bio);
// 提交bio
submit_bio(rw, bio) ->
generic_make_request(bio) ->
__generic_make_request(bio) ->
q = bdev_get_queue(bio->bi_bdev) -> //获取块设备的i/o请求队列
ret = q->make_request_fn(q, bio) -> // // 调用队列的"构造请求函数"处理bio
3.4 队列的"构造请求函数"由来
队列的"构造请求函数"有两个来源,一是,使用默认的函数__make_request;二是,在块设备驱动程序中设置。
3.4.1 默认的__make_request接口
__make_request被设置为队列的"构造请求函数"的过程
blk_init_queue ->
blk_init_queue_node ->
blk_queue_make_request(q, __make_request);
q->make_request_fn = mfn;
__make_request接口实现
static int __make_request(request_queue_t *q, struct bio *bio)
{
struct request *req;
int el_ret, nr_sectors, barrier, err;
const unsigned short prio = bio_prio(bio);
const int sync = bio_sync(bio);
int rw_flags;
nr_sectors = bio_sectors(bio); // 要读写的扇区个数
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
* ISA dma in theory)
*/
blk_queue_bounce(q, &bio);
barrier = bio_barrier(bio);
if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
err = -EOPNOTSUPP;
goto end_io;
}
spin_lock_irq(q->queue_lock);
if (unlikely(barrier) || elv_queue_empty(q))
goto get_rq;
// 首先通过调用 elv_merge 方法尝试将当前I/O请求与其他正在排队的I/O请求进行合并,
// 如果当前I/O请求与正在排队的I/O请求相邻,那么就可以合并为一个I/O请求,从而减少对设备I/O请求的次数。
el_ret = elv_merge(q, &req, bio);
switch (el_ret) {
case ELEVATOR_BACK_MERGE: // 与其他I/O请求合并成功
BUG_ON(!rq_mergeable(req));
if (!ll_back_merge_fn(q, req, bio))
break;
blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
req->biotail->bi_next = bio;
req->biotail = bio;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
drive_stat_acct(req, nr_sectors, 0);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out;
case ELEVATOR_FRONT_MERGE: // 与其他I/O请求合并成功
BUG_ON(!rq_mergeable(req));
if (!ll_front_merge_fn(q, req, bio))
break;
blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
bio->bi_next = req->bio;
req->bio = bio;
/*
* may not be valid. if the low level driver said
* it didn't need a bounce buffer then it better
* not touch req->buffer either...
*/
req->buffer = bio_data(bio);
req->current_nr_sectors = bio_cur_sectors(bio);
req->hard_cur_sectors = req->current_nr_sectors;
req->sector = req->hard_sector = bio->bi_sector;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
drive_stat_acct(req, nr_sectors, 0);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out;
/* ELV_NO_MERGE: elevator says don't/can't merge. */
default:
;
}
// 无法和其他I/O请求合并,则构建一个i/o请求。
get_rq:
/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
* rq allocator and io schedulers.
*/
rw_flags = bio_data_dir(bio);
if (sync)
rw_flags |= REQ_RW_SYNC;
/*
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
req = get_request_wait(q, rw_flags, bio);
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
* We don't worry about that case for efficiency. It won't happen
* often, and the elevators are able to handle it.
*/
// 根据bio构建一个i/o请求
init_request_from_bio(req, bio);
spin_lock_irq(q->queue_lock);
if (elv_queue_empty(q))
blk_plug_device(q);
// 向队列提交i/o请求
add_request(q, req);
out:
// 同步处理队列的i/o请求
if (sync)
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
return 0;
end_io:
bio_endio(bio, nr_sectors << 9, err);
return 0;
}
__generic_unplug_device(q) ->
q->request_fn(q); // 调用队列的处理函数,由块设备驱动程序提供 ramblock_queue = blk_init_queue(do_ramblock_request, &ramblock_lock);
四,总结
用户程序通过块设备驱动节点,读写块设备数据的流程
- 块设备驱动程序初始化时调用blk_init_queue接口初始一个i/o请求队列,并为该队列提供一个队列"请求处理函数";
- 用户程序调用read()/write(),会转换成通用块层ll_rw_block接口的调用;
- ll_rw_block接口通过调用submit_bh接口提交读写数据块;
- submit_bh接口构建并设置bio,然后通过submit_bio接口提交bio;
- 调用块设备队列的"构造请求函数"处理bio,即__make_request接口处理bio;
- __make_request接口首先尝试和其他i/o请求合并bio以减少对设备I/O请求的次数;
- 如果合并不成功,则构建一个新的i/o请求;
- 所有bio处理(和其他i/o请求合并或构建一个新的i/o请求)完成或者用户强制sync时,会处理所有的i/o请求,调用__generic_unplug_device接口,调用队列的"请求处理函数"处理所有请求(读写数据)。