BIO bi_sector submit_bio make_request_fn

BIO结构中有一个很重要的字段叫做bi_sector,在高版本中这个字段已经叫bi_iter.bi_sector了，这个不是重点，重点是下面要说的。

当读写一个block device的时候，会提交一个bio数据结构给make_request_fn，那么这个bio结构中的bi_sector到底表示什么意思呢？

在bio.h中有这么一行注释

sector_t bi_sector; /* device address in 512 byte sectors */

大意是说bi_sector是设备的地址，什么地址？以sector（512字节）为单位，也就是说以这个sector为起始地址，去block设备请求数据。

一般硬盘都是以扇区（sector）为单位的，而且一般也只有硬盘有扇区，Linux中的分区比如/dev/sda1是建立在硬盘/dev/sda的基础上的，对于每一个分区来讲，我们通过fdisk来查看分区的细节信息，如下：

Device Boot Start End Sectors Size Id Type
/dev/sda1 * 2048 999423 997376 487M 83 Linux
/dev/sda2 1001470 41940991 40939522 19.5G 5 Extended

硬盘sda有两个分区，分别是/dev/sda1和/dev/sda2，值得注意的是，分区/dev/sda1的起始扇区是2048，/dev/sda2的起始分区是1001470，这个起始扇区在本文中非常重要。

对于硬盘来讲，在make_request_fn中，bio的bi_sector代表什么呢？

代表的就是硬盘的扇区，比如bi_sector为0，则表示从0扇区开始读取或者写入数据。

对于分区来讲，在make_request_fn中，bio的bi_sector又代表什么呢？

同样，比如bio的bi_sector为0，还是表示从0扇区开始读取或者写入数据。只是，make_request_fn中很难收到这样的请求了，除非这个分区的起始扇区为0。为什么呢？这就是起始扇区的原因，对于分区来讲，收到的bio请求中，这个bi_sector总是大于等于起始扇区的，比如对于/dev/sda2来讲，收到的请求中的bi_sector总是大于等于1001470的。

总结一下，无论是硬盘还是分区，在make_request_fn中，收到的bio请求中的bi_sector已经是真实对应硬盘的物理扇区位置了。

再说一下submit_bio，

我们可以自己构建一个bio，然后调用submit_bio去直接对block设备读取或者写入数据，特别要注意的是，通过submit_bio出去的bio中的bi_sector是有可能会被改变的，如果操作的是分区，在真正提交到make_request_fn之前，会被加上该分区对应的起始扇区的，特别需要注意。

看看源代码，以2.6为例，注意黑体部分

void submit_bio(int rw, struct bio *bio)
{
int count = bio_sectors(bio);

bio->bi_rw |= rw;

   /*
   * If it's a regular read/write or a barrier with data attached,
   * go through the normal accounting stuff before submission.
   */
   if (bio_has_data(bio)) {
       if (rw & WRITE) {
           count_vm_events(PGPGOUT, count);
       } else {
           task_io_account_read(bio->bi_size);
           count_vm_events(PGPGIN, count);
       }

       if (unlikely(block_dump)) {
           char b[BDEVNAME_SIZE];
           printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
           current->comm, task_pid_nr(current),
               (rw & WRITE) ? "WRITE" : "READ",
               (unsigned long long)bio->bi_sector,
               bdevname(bio->bi_bdev, b));
       }
   }

generic_make_request(bio);
}

void generic_make_request(struct bio *bio)
{
   if (current->bio_tail) {
       /* make_request is active */
       *(current->bio_tail) = bio;
       bio->bi_next = NULL;
       current->bio_tail = &bio->bi_next;
       return;
   }
   /* following loop may be a bit non-obvious, and so deserves some
   * explanation.
   * Before entering the loop, bio->bi_next is NULL (as all callers
   * ensure that) so we have a list with a single bio.
   * We pretend that we have just taken it off a longer list, so
   * we assign bio_list to the next (which is NULL) and bio_tail
   * to &bio_list, thus initialising the bio_list of new bios to be
   * added. __generic_make_request may indeed add some more bios
   * through a recursive call to generic_make_request. If it
   * did, we find a non-NULL value in bio_list and re-enter the loop
   * from the top. In this case we really did just take the bio
   * of the top of the list (no pretending) and so fixup bio_list and
   * bio_tail or bi_next, and call into __generic_make_request again.
   *
   * The loop was structured like this to make only one call to
   * __generic_make_request (which is important as it is large and
   * inlined) and to keep the structure simple.
   */
   BUG_ON(bio->bi_next);
   do {
       current->bio_list = bio->bi_next;
       if (bio->bi_next == NULL)
           current->bio_tail = &current->bio_list;
       else
           bio->bi_next = NULL;
       __generic_make_request(bio);
       bio = current->bio_list;
   } while (bio);
   current->bio_tail = NULL; /* deactivate */
}

static inline void __generic_make_request(struct bio *bio)
{
   struct request_queue *q;
   sector_t old_sector;
   int ret, nr_sectors = bio_sectors(bio);
   dev_t old_dev;
   int err = -EIO;

might_sleep();

if (bio_check_eod(bio, nr_sectors))
goto end_io;

   /*
   * Resolve the mapping until finished. (drivers are
   * still free to implement/resolve their own stacking
   * by explicitly returning 0)
   *
   * NOTE: we don't repeat the blk_size check for each new device.
   * Stacking drivers are expected to know what they are doing.
   */
   old_sector = -1;
   old_dev = 0;
   do {
       char b[BDEVNAME_SIZE];

       q = bdev_get_queue(bio->bi_bdev);
       if (unlikely(!q)) {
           printk(KERN_ERR
           "generic_make_request: Trying to access "
               "nonexistent block-device %s (%Lu)\n",
               bdevname(bio->bi_bdev, b),
               (long long) bio->bi_sector);
           goto end_io;
       }

       if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
           nr_sectors > queue_max_hw_sectors(q))) {
           printk(KERN_ERR "bio too big device %s (%u > %u)\n",
           bdevname(bio->bi_bdev, b),
           bio_sectors(bio),
           queue_max_hw_sectors(q));
           goto end_io;
       }

if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
goto end_io;

if (should_fail_request(bio))
goto end_io;

       /*
       * If this device has partitions, remap block n
       * of partition p to block n+start(p) of the disk.
       */
       blk_partition_remap(bio);

if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
goto end_io;

if (old_sector != -1)
trace_block_remap(q, bio, old_dev, old_sector);

old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;

if (bio_check_eod(bio, nr_sectors))
goto end_io;

       if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
       !blk_queue_discard(q)) {
           err = -EOPNOTSUPP;
           goto end_io;
       }

trace_block_bio_queue(q, bio);

ret = q->make_request_fn(q, bio);
} while (ret);

return;

end_io:
bio_endio(bio, err);
}

*
* If bio->bi_dev is a partition, remap the location
*/
static inline void blk_partition_remap(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;

if (bio_sectors(bio) && bdev != bdev->bd_contains) {
struct hd_struct *p = bdev->bd_part;

bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;

       trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
               bdev->bd_dev,
               bio->bi_sector - p->start_sect);
   }
}

BIO bi_sector submit_bio make_request_fn

猜你喜欢