diff options
author | Ming Lei <ming.lei@redhat.com> | 2017-10-14 17:22:29 +0800 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2017-11-01 08:20:02 -0600 |
commit | de1482974080ec9ef414bf048b2646b246b63f6e (patch) | |
tree | 3ecdc2b581a83848c43205c2fd9b6e97a6808f6a /block/blk-mq.c | |
parent | 63ba8e31c3ac6393b07c6e18538814a730478766 (diff) | |
download | lwn-de1482974080ec9ef414bf048b2646b246b63f6e.tar.gz lwn-de1482974080ec9ef414bf048b2646b246b63f6e.zip |
blk-mq: introduce .get_budget and .put_budget in blk_mq_ops
For SCSI devices, there is often a per-request-queue depth, which needs
to be respected before queuing one request.
Currently blk-mq always dequeues the request first, then calls
.queue_rq() to dispatch the request to lld. One obvious issue with this
approach is that I/O merging may not be successful, because when the
per-request-queue depth can't be respected, .queue_rq() has to return
BLK_STS_RESOURCE, and then this request has to stay in hctx->dispatch
list. This means it never gets a chance to be merged with other IO.
This patch introduces .get_budget and .put_budget callback in blk_mq_ops,
then we can try to get reserved budget first before dequeuing request.
If the budget for queueing I/O can't be satisfied, we don't need to
dequeue request at all. Hence the request can be left in the IO
scheduler queue, for more merging opportunities.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 43 |
1 files changed, 38 insertions, 5 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 40cba1b1978f..dcb467369999 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1048,7 +1048,8 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx) return true; } -bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) +bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, + bool got_budget) { struct blk_mq_hw_ctx *hctx; struct request *rq; @@ -1057,6 +1058,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) if (list_empty(list)) return false; + WARN_ON(!list_is_singular(list) && got_budget); + /* * Now process all the entries, sending them to the driver. */ @@ -1074,16 +1077,30 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) * The initial allocation attempt failed, so we need to * rerun the hardware queue when a tag is freed. */ - if (!blk_mq_dispatch_wait_add(hctx)) + if (!blk_mq_dispatch_wait_add(hctx)) { + if (got_budget) + blk_mq_put_dispatch_budget(hctx); break; + } /* * It's possible that a tag was freed in the window * between the allocation failure and adding the * hardware queue to the wait queue. */ - if (!blk_mq_get_driver_tag(rq, &hctx, false)) + if (!blk_mq_get_driver_tag(rq, &hctx, false)) { + if (got_budget) + blk_mq_put_dispatch_budget(hctx); + break; + } + } + + if (!got_budget) { + ret = blk_mq_get_dispatch_budget(hctx); + if (ret == BLK_STS_RESOURCE) break; + if (ret != BLK_STS_OK) + goto fail_rq; } list_del_init(&rq->queuelist); @@ -1111,6 +1128,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) break; } + fail_rq: if (unlikely(ret != BLK_STS_OK)) { errors++; blk_mq_end_request(rq, BLK_STS_IOERR); @@ -1169,6 +1187,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) { int srcu_idx; + bool run_queue; /* * We should be running this queue from one of the CPUs that @@ -1185,15 +1204,18 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - blk_mq_sched_dispatch_requests(hctx); + run_queue = blk_mq_sched_dispatch_requests(hctx); rcu_read_unlock(); } else { might_sleep(); srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); - blk_mq_sched_dispatch_requests(hctx); + run_queue = blk_mq_sched_dispatch_requests(hctx); srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); } + + if (run_queue) + blk_mq_run_hw_queue(hctx, true); } /* @@ -1582,6 +1604,13 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (!blk_mq_get_driver_tag(rq, NULL, false)) goto insert; + ret = blk_mq_get_dispatch_budget(hctx); + if (ret == BLK_STS_RESOURCE) { + blk_mq_put_driver_tag(rq); + goto insert; + } else if (ret != BLK_STS_OK) + goto fail_rq; + new_cookie = request_to_qc_t(hctx, rq); /* @@ -1598,6 +1627,7 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, __blk_mq_requeue_request(rq); goto insert; default: + fail_rq: *cookie = BLK_QC_T_NONE; blk_mq_end_request(rq, ret); return; @@ -2582,6 +2612,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->ops->queue_rq) return -EINVAL; + if (!set->ops->get_budget ^ !set->ops->put_budget) + return -EINVAL; + if (set->queue_depth > BLK_MQ_MAX_DEPTH) { pr_info("blk-mq: reduced tag depth to %u\n", BLK_MQ_MAX_DEPTH); |