4, basic method exported to FS
===========================
void submit_bio(int rw, struct bio *bio)
{
int count = bio_sectors(bio);
bio->bi_rw |= rw;
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
if (!bio_empty_barrier(bio)) {
BIO_BUG_ON(!bio->bi_size);
BIO_BUG_ON(!bio->bi_io_vec);
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
task_io_account_read(bio->bi_size);
count_vm_events(PGPGIN, count);
}
if (unlikely(block_dump)) {
char b[BDEVNAME_SIZE];
printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
current->comm, task_pid_nr(current),
(rw & WRITE) ? "WRITE" : "READ",
(unsigned long long)bio->bi_sector,
bdevname(bio->bi_bdev, b));
}
}
generic_make_request(bio);
}
void generic_make_request(struct bio *bio)
{
if (current->bio_tail) { /* make_request is active */
*(current->bio_tail) = bio;
bio->bi_next = NULL;
current->bio_tail = &bio->bi_next;
return;
}
/* following loop may be a bit non-obvious, and so deserves some
* explanation.
* Before entering the loop, bio->bi_next is NULL (as all callers
* ensure that) so we have a list with a single bio.
*
* We pretend that we have just taken it off a longer list, so
* we assign bio_list to the next (which is NULL) and bio_tail
* to &bio_list, thus initialising the bio_list of new bios to be
* added. __generic_make_request may indeed add some more bios
* through a recursive call to generic_make_request. If it
* did, we find a non-NULL value in bio_list and re-enter the loop
* from the top. In this case we really did just take the bio
* of the top of the list (no pretending) and so fixup bio_list and
* bio_tail or bi_next, and call into __generic_make_request again.
*
* The loop was structured like this to make only one call to
* __generic_make_request (which is important as it is large and
* inlined) and to keep the structure simple.
*/
BUG_ON(bio->bi_next);
do {
current->bio_list = bio->bi_next;
if (bio->bi_next == NULL)
current->bio_tail = ¤t->bio_list;
else
bio->bi_next = NULL;
__generic_make_request(bio);
bio = current->bio_list;
} while (bio);
current->bio_tail = NULL; /* deactivate */
}
static inline void __generic_make_request(struct bio *bio)
{
struct request_queue *q;
sector_t old_sector;
int ret, nr_sectors = bio_sectors(bio);
dev_t old_dev;
int err = -EIO;
might_sleep();
/* 1,
* check physical overflow
*/
if (bio_check_eod(bio, nr_sectors))
goto end_io;
/*
* Resolve the mapping until finished. (drivers are
* still free to implement/resolve their own stacking
* by explicitly returning 0)
*
* NOTE: we don't repeat the blk_size check for each new device.
* Stacking drivers are expected to know what they are doing.
*/
old_sector = -1;
old_dev = 0;
do {
char b[BDEVNAME_SIZE];
/* 2,
* determine physical disk
*/
q = bdev_get_queue(bio->bi_bdev);
if (!q) {
printk(KERN_ERR
"generic_make_request: Trying to access "
"nonexistent block-device %s (%Lu)\n",
bdevname(bio->bi_bdev, b),
(long long) bio->bi_sector);
end_io:
bio_endio(bio, err);
break;
}
/* 3,
* overflow controler's capability
*/
if (unlikely(nr_sectors > q->max_hw_sectors)) {
printk(KERN_ERR "bio too big device %s (%u > %u)\n",
bdevname(bio->bi_bdev, b),
bio_sectors(bio),
q->max_hw_sectors);
goto end_io;
}
/* 4,
* elevator still healthy
*/
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
goto end_io;
/* 5,
* if just a poke
*/
if (should_fail_request(bio))
goto end_io;
/* 6,
* If this device has partitions, remap block n
* of partition p to block n+start(p) of the disk.
*/
blk_partition_remap(bio);
if (old_sector != -1)
blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
old_sector);
blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
if (bio_check_eod(bio, nr_sectors))
goto end_io;
/* 7,
* If hard to pose
*/
if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
err = -EOPNOTSUPP;
goto end_io;
}
/* 8,
* do it, but where assigned?
* void blk_queue_make_request(struct request_queue *q,
* make_request_fn *mfn)
* and how about __make_request?
* it is dft method when Q init
*/
ret = q->make_request_fn(q, bio);
} while (ret);
}
static int __make_request(struct request_queue *q, struct bio *bio)
{
struct request *req;
int el_ret, nr_sectors, barrier, err;
const unsigned short prio = bio_prio(bio);
const int sync = bio_sync(bio);
int rw_flags;
nr_sectors = bio_sectors(bio);
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
* ISA dma in theory)
*/
blk_queue_bounce(q, &bio);
barrier = bio_barrier(bio);
if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
err = -EOPNOTSUPP;
goto end_io;
}
spin_lock_irq(q->queue_lock);
/* 1,
* bar do no merge in principle
*/
if (unlikely(barrier) || elv_queue_empty(q))
goto get_rq;
/* 2,
* try merge
*/
el_ret = elv_merge(q, &req, bio);
switch (el_ret) {
case ELEVATOR_BACK_MERGE:
BUG_ON(!rq_mergeable(req));
if (!ll_back_merge_fn(q, req, bio))
break;
blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
req->biotail->bi_next = bio;
req->biotail = bio;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
drive_stat_acct(req, 0);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out;
case ELEVATOR_FRONT_MERGE:
BUG_ON(!rq_mergeable(req));
if (!ll_front_merge_fn(q, req, bio))
break;
blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
bio->bi_next = req->bio;
req->bio = bio;
/*
* may not be valid. if the low level driver said
* it didn't need a bounce buffer then it better
* not touch req->buffer either...
*/
req->buffer = bio_data(bio);
req->current_nr_sectors = bio_cur_sectors(bio);
req->hard_cur_sectors = req->current_nr_sectors;
req->sector = req->hard_sector = bio->bi_sector;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
drive_stat_acct(req, 0);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out;
/* ELV_NO_MERGE: elevator says don't/can't merge. */
default:
break;
}
get_rq:
/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
* rq allocator and io schedulers.
*/
rw_flags = bio_data_dir(bio);
if (sync)
rw_flags |= REQ_RW_SYNC;
/* 3,
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
req = get_request_wait(q, rw_flags, bio);
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
*
* We don't worry about that case for efficiency. It won't happen
* often, and the elevators are able to handle it.
*/
init_request_from_bio(req, bio);
spin_lock_irq(q->queue_lock);
if (elv_queue_empty(q))
blk_plug_device(q);
/* 4,
* in case of new request, and then wait on queue
*/
add_request(q, req);
out:
if (sync) {
/* 5,
* if neccessary, drive disk rotating,
* even current gas is above $100
*/
__generic_unplug_device(q);
}
spin_unlock_irq(q->queue_lock);
return 0;
end_io:
bio_endio(bio, err);
return 0;
}