diff options
author | Ming Lei <ming.lei@redhat.com> | 2018-01-17 11:25:57 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-01-17 09:46:54 -0700 |
commit | 396eaf21ee17c476e8f66249fb1f4a39003d0ab4 (patch) | |
tree | 20b146db746cc279c734fd937128c45cb8ed5c76 /block | |
parent | 0f95549c0ea1e8075ae049202088b2c6a0cb40ad (diff) | |
download | lwn-396eaf21ee17c476e8f66249fb1f4a39003d0ab4.tar.gz lwn-396eaf21ee17c476e8f66249fb1f4a39003d0ab4.zip |
blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback
blk_insert_cloned_request() is called in the fast path of a dm-rq driver
(e.g. blk-mq request-based DM mpath). blk_insert_cloned_request() uses
blk_mq_request_bypass_insert() to directly append the request to the
blk-mq hctx->dispatch_list of the underlying queue.
1) This way isn't efficient enough because the hctx spinlock is always
used.
2) With blk_insert_cloned_request(), we completely bypass underlying
queue's elevator and depend on the upper-level dm-rq driver's elevator
to schedule IO. But dm-rq currently can't get the underlying queue's
dispatch feedback at all. Without knowing whether a request was issued
or not (e.g. due to underlying queue being busy) the dm-rq elevator will
not be able to provide effective IO merging (as a side-effect of dm-rq
currently blindly destaging a request from its elevator only to requeue
it after a delay, which kills any opportunity for merging). This
obviously causes very bad sequential IO performance.
Fix this by updating blk_insert_cloned_request() to use
blk_mq_request_direct_issue(). blk_mq_request_direct_issue() allows a
request to be issued directly to the underlying queue and returns the
dispatch feedback (blk_status_t). If blk_mq_request_direct_issue()
returns BLK_SYS_RESOURCE the dm-rq driver will now use DM_MAPIO_REQUEUE
to _not_ destage the request. Whereby preserving the opportunity to
merge IO.
With this, request-based DM's blk-mq sequential IO performance is vastly
improved (as much as 3X in mpath/virtio-scsi testing).
Signed-off-by: Ming Lei <ming.lei@redhat.com>
[blk-mq.c changes heavily influenced by Ming Lei's initial solution, but
they were refactored to make them less fragile and easier to read/review]
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 3 | ||||
-rw-r--r-- | block/blk-mq.c | 37 | ||||
-rw-r--r-- | block/blk-mq.h | 3 |
3 files changed, 33 insertions, 10 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 7ba607527487..55f338020254 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2500,8 +2500,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - blk_mq_request_bypass_insert(rq, true); - return BLK_STS_OK; + return blk_mq_request_direct_issue(rq); } spin_lock_irqsave(q->queue_lock, flags); diff --git a/block/blk-mq.c b/block/blk-mq.c index ddc46f215bfa..e383a20809f4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1775,15 +1775,19 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx, struct request *rq, - bool run_queue) + bool run_queue, bool bypass_insert) { - blk_mq_sched_insert_request(rq, false, run_queue, false, - hctx->flags & BLK_MQ_F_BLOCKING); + if (!bypass_insert) + blk_mq_sched_insert_request(rq, false, run_queue, false, + hctx->flags & BLK_MQ_F_BLOCKING); + else + blk_mq_request_bypass_insert(rq, run_queue); } static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, - blk_qc_t *cookie) + blk_qc_t *cookie, + bool bypass_insert) { struct request_queue *q = rq->q; bool run_queue = true; @@ -1794,7 +1798,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, goto insert; } - if (q->elevator) + if (q->elevator && !bypass_insert) goto insert; if (!blk_mq_get_driver_tag(rq, NULL, false)) @@ -1807,7 +1811,9 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, return __blk_mq_issue_directly(hctx, rq, cookie); insert: - __blk_mq_fallback_to_insert(hctx, rq, run_queue); + __blk_mq_fallback_to_insert(hctx, rq, run_queue, bypass_insert); + if (bypass_insert) + return BLK_STS_RESOURCE; return BLK_STS_OK; } @@ -1822,15 +1828,30 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, hctx_lock(hctx, &srcu_idx); - ret = __blk_mq_try_issue_directly(hctx, rq, cookie); + ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false); if (ret == BLK_STS_RESOURCE) - __blk_mq_fallback_to_insert(hctx, rq, true); + __blk_mq_fallback_to_insert(hctx, rq, true, false); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); hctx_unlock(hctx, srcu_idx); } +blk_status_t blk_mq_request_direct_issue(struct request *rq) +{ + blk_status_t ret; + int srcu_idx; + blk_qc_t unused_cookie; + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu); + + hctx_lock(hctx, &srcu_idx); + ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true); + hctx_unlock(hctx, srcu_idx); + + return ret; +} + static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); diff --git a/block/blk-mq.h b/block/blk-mq.h index 8591a54d989b..e3ebc93646ca 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -74,6 +74,9 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); +/* Used by blk_insert_cloned_request() to issue request directly */ +blk_status_t blk_mq_request_direct_issue(struct request *rq); + /* * CPU -> queue mappings */ |