diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-09 11:20:07 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-09 11:20:07 -0800 |
commit | 3e28850cbd359bed841b832200f9fc208a9ef040 (patch) | |
tree | ef4e5b294f934f58fc08feb89d24291b71c01d4a | |
parent | 1dc1f92e24d6a5479ae8ceea3e2fac69f8d9dab7 (diff) | |
parent | 26af1cd00364ce20dbec66b93ef42f9d42dc6953 (diff) | |
download | lwn-3e28850cbd359bed841b832200f9fc208a9ef040.tar.gz lwn-3e28850cbd359bed841b832200f9fc208a9ef040.zip |
Merge tag 'for-5.16/block-2021-11-09' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
- Set of fixes for the batched tag allocation (Ming, me)
- add_disk() error handling fix (Luis)
- Nested queue quiesce fixes (Ming)
- Shared tags init error handling fix (Ye)
- Misc cleanups (Jean, Ming, me)
* tag 'for-5.16/block-2021-11-09' of git://git.kernel.dk/linux-block:
nvme: wait until quiesce is done
scsi: make sure that request queue queiesce and unquiesce balanced
scsi: avoid to quiesce sdev->request_queue two times
blk-mq: add one API for waiting until quiesce is done
blk-mq: don't free tags if the tag_set is used by other device in queue initialztion
block: fix device_add_disk() kobject_create_and_add() error handling
block: ensure cached plug request matches the current queue
block: move queue enter logic into blk_mq_submit_bio()
block: make bio_queue_enter() fast-path available inline
block: split request allocation components into helpers
block: have plug stored requests hold references to the queue
blk-mq: update hctx->nr_active in blk_mq_end_request_batch()
blk-mq: add RQF_ELV debug entry
blk-mq: only try to run plug merge if request has same queue with incoming bio
block: move RQF_ELV setting into allocators
dm: don't stop request queue after the dm device is suspended
block: replace always false argument with 'false'
block: assign correct tag before doing prefetch of request
blk-mq: fix redundant check of !e expression
-rw-r--r-- | block/blk-core.c | 61 | ||||
-rw-r--r-- | block/blk-merge.c | 6 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 1 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 15 | ||||
-rw-r--r-- | block/blk-mq.c | 187 | ||||
-rw-r--r-- | block/blk-mq.h | 12 | ||||
-rw-r--r-- | block/blk.h | 35 | ||||
-rw-r--r-- | block/genhd.c | 8 | ||||
-rw-r--r-- | drivers/md/dm.c | 10 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 4 | ||||
-rw-r--r-- | drivers/scsi/scsi_lib.c | 62 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 1 | ||||
-rw-r--r-- | include/scsi/scsi_device.h | 1 |
13 files changed, 263 insertions, 140 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index ac1de7d73a45..b043de2baaac 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -386,30 +386,6 @@ void blk_cleanup_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_cleanup_queue); -static bool blk_try_enter_queue(struct request_queue *q, bool pm) -{ - rcu_read_lock(); - if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter)) - goto fail; - - /* - * The code that increments the pm_only counter must ensure that the - * counter is globally visible before the queue is unfrozen. - */ - if (blk_queue_pm_only(q) && - (!pm || queue_rpm_status(q) == RPM_SUSPENDED)) - goto fail_put; - - rcu_read_unlock(); - return true; - -fail_put: - blk_queue_exit(q); -fail: - rcu_read_unlock(); - return false; -} - /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer @@ -442,10 +418,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) return 0; } -static inline int bio_queue_enter(struct bio *bio) +int __bio_queue_enter(struct request_queue *q, struct bio *bio) { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - while (!blk_try_enter_queue(q, false)) { struct gendisk *disk = bio->bi_bdev->bd_disk; @@ -742,7 +716,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_OK; } -static noinline_for_stack bool submit_bio_checks(struct bio *bio) +noinline_for_stack bool submit_bio_checks(struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct request_queue *q = bdev_get_queue(bdev); @@ -860,22 +834,23 @@ end_io: return false; } -static void __submit_bio(struct bio *bio) +static void __submit_bio_fops(struct gendisk *disk, struct bio *bio) { - struct gendisk *disk = bio->bi_bdev->bd_disk; - if (unlikely(bio_queue_enter(bio) != 0)) return; + if (submit_bio_checks(bio) && blk_crypto_bio_prep(&bio)) + disk->fops->submit_bio(bio); + blk_queue_exit(disk->queue); +} - if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio)) - goto queue_exit; - if (!disk->fops->submit_bio) { +static void __submit_bio(struct bio *bio) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + + if (!disk->fops->submit_bio) blk_mq_submit_bio(bio); - return; - } - disk->fops->submit_bio(bio); -queue_exit: - blk_queue_exit(disk->queue); + else + __submit_bio_fops(disk, bio); } /* @@ -1615,7 +1590,13 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule) flush_plug_callbacks(plug, from_schedule); if (!rq_list_empty(plug->mq_list)) blk_mq_flush_plug_list(plug, from_schedule); - if (unlikely(!from_schedule && plug->cached_rq)) + /* + * Unconditionally flush out cached requests, even if the unplug + * event came from schedule. Since we know hold references to the + * queue for cached requests, we don't want a blocked task holding + * up a queue freeze/quiesce event. + */ + if (unlikely(!rq_list_empty(plug->cached_rq))) blk_mq_free_plug_rqs(plug); } diff --git a/block/blk-merge.c b/block/blk-merge.c index df69f4bb7717..893c1a60b701 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -1101,9 +1101,11 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, * the same queue, there should be only one such rq in a queue */ *same_queue_rq = true; + + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; } - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK) - return true; return false; } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f5076c173477..4f2cf8399f3d 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -308,6 +308,7 @@ static const char *const rqf_name[] = { RQF_NAME(SPECIAL_PAYLOAD), RQF_NAME(ZONE_WRITE_LOCKED), RQF_NAME(MQ_POLL_SLEPT), + RQF_NAME(ELV), }; #undef RQF_NAME diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c62b966dfaba..4be652fa38e7 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -370,15 +370,20 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, bool ret = false; enum hctx_type type; - if (e && e->type->ops.bio_merge) - return e->type->ops.bio_merge(q, bio, nr_segs); + if (bio_queue_enter(bio)) + return false; + + if (e && e->type->ops.bio_merge) { + ret = e->type->ops.bio_merge(q, bio, nr_segs); + goto out_put; + } ctx = blk_mq_get_ctx(q); hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); type = hctx->type; if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || list_empty_careful(&ctx->rq_lists[type])) - return false; + goto out_put; /* default per sw-queue merge */ spin_lock(&ctx->lock); @@ -391,6 +396,8 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, ret = true; spin_unlock(&ctx->lock); +out_put: + blk_queue_exit(q); return ret; } @@ -497,7 +504,7 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, * busy in case of 'none' scheduler, and this way may save * us one extra enqueue & dequeue to sw queue. */ - if (!hctx->dispatch_busy && !e && !run_queue_async) { + if (!hctx->dispatch_busy && !run_queue_async) { blk_mq_try_issue_list_directly(hctx, list); if (list_empty(list)) goto out; diff --git a/block/blk-mq.c b/block/blk-mq.c index 07eb1412760b..629cf421417f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -251,22 +251,18 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q) EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); /** - * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished + * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done * @q: request queue. * - * Note: this function does not prevent that the struct request end_io() - * callback function is invoked. Once this function is returned, we make - * sure no dispatch can happen until the queue is unquiesced via - * blk_mq_unquiesce_queue(). + * Note: it is driver's responsibility for making sure that quiesce has + * been started. */ -void blk_mq_quiesce_queue(struct request_queue *q) +void blk_mq_wait_quiesce_done(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned int i; bool rcu = false; - blk_mq_quiesce_queue_nowait(q); - queue_for_each_hw_ctx(q, hctx, i) { if (hctx->flags & BLK_MQ_F_BLOCKING) synchronize_srcu(hctx->srcu); @@ -276,6 +272,22 @@ void blk_mq_quiesce_queue(struct request_queue *q) if (rcu) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done); + +/** + * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished + * @q: request queue. + * + * Note: this function does not prevent that the struct request end_io() + * callback function is invoked. Once this function is returned, we make + * sure no dispatch can happen until the queue is unquiesced via + * blk_mq_unquiesce_queue(). + */ +void blk_mq_quiesce_queue(struct request_queue *q) +{ + blk_mq_quiesce_queue_nowait(q); + blk_mq_wait_quiesce_done(q); +} EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); /* @@ -405,12 +417,15 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data, for (i = 0; tag_mask; i++) { if (!(tag_mask & (1UL << i))) continue; - prefetch(tags->static_rqs[tag]); tag = tag_offset + i; + prefetch(tags->static_rqs[tag]); tag_mask &= ~(1UL << i); rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns); rq_list_add(data->cached_rq, rq); + nr++; } + /* caller already holds a reference, add for remainder */ + percpu_ref_get_many(&data->q->q_usage_counter, nr - 1); data->nr_tags -= nr; return rq_list_pop(data->cached_rq); @@ -419,7 +434,6 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data, static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) { struct request_queue *q = data->q; - struct elevator_queue *e = q->elevator; u64 alloc_time_ns = 0; struct request *rq; unsigned int tag; @@ -431,7 +445,11 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) if (data->cmd_flags & REQ_NOWAIT) data->flags |= BLK_MQ_REQ_NOWAIT; - if (e) { + if (q->elevator) { + struct elevator_queue *e = q->elevator; + + data->rq_flags |= RQF_ELV; + /* * Flush/passthrough requests are special and go directly to the * dispatch list. Don't include reserved tags in the @@ -447,7 +465,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) retry: data->ctx = blk_mq_get_ctx(q); data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx); - if (!e) + if (!(data->rq_flags & RQF_ELV)) blk_mq_tag_busy(data->hctx); /* @@ -490,7 +508,6 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, .q = q, .flags = flags, .cmd_flags = op, - .rq_flags = q->elevator ? RQF_ELV : 0, .nr_tags = 1, }; struct request *rq; @@ -520,7 +537,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, .q = q, .flags = flags, .cmd_flags = op, - .rq_flags = q->elevator ? RQF_ELV : 0, .nr_tags = 1, }; u64 alloc_time_ns = 0; @@ -561,6 +577,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, if (!q->elevator) blk_mq_tag_busy(data.hctx); + else + data.rq_flags |= RQF_ELV; ret = -EWOULDBLOCK; tag = blk_mq_get_tag(&data); @@ -627,10 +645,8 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug) { struct request *rq; - while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) { - percpu_ref_get(&rq->q->q_usage_counter); + while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) blk_mq_free_request(rq); - } } static void req_bio_endio(struct request *rq, struct bio *bio, @@ -815,6 +831,13 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, { struct request_queue *q = hctx->queue; + /* + * All requests should have been marked as RQF_MQ_INFLIGHT, so + * update hctx->nr_active in batch + */ + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) + __blk_mq_sub_active_requests(hctx, nr_tags); + blk_mq_put_tags(hctx->tags, tag_array, nr_tags); percpu_ref_put_many(&q->q_usage_counter, nr_tags); } @@ -2232,7 +2255,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) plug->rq_count = 0; if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { - blk_mq_plug_issue_direct(plug, from_schedule); + blk_mq_plug_issue_direct(plug, false); if (rq_list_empty(plug->mq_list)) return; } @@ -2472,6 +2495,83 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) return BLK_MAX_REQUEST_COUNT; } +static bool blk_attempt_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs, bool *same_queue_rq) +{ + if (!blk_queue_nomerges(q) && bio_mergeable(bio)) { + if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq)) + return true; + if (blk_mq_sched_bio_merge(q, bio, nr_segs)) + return true; + } + return false; +} + +static struct request *blk_mq_get_new_requests(struct request_queue *q, + struct blk_plug *plug, + struct bio *bio, + unsigned int nsegs, + bool *same_queue_rq) +{ + struct blk_mq_alloc_data data = { + .q = q, + .nr_tags = 1, + .cmd_flags = bio->bi_opf, + }; + struct request *rq; + + if (unlikely(bio_queue_enter(bio))) + return NULL; + if (unlikely(!submit_bio_checks(bio))) + goto put_exit; + if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq)) + goto put_exit; + + rq_qos_throttle(q, bio); + + if (plug) { + data.nr_tags = plug->nr_ios; + plug->nr_ios = 1; + data.cached_rq = &plug->cached_rq; + } + + rq = __blk_mq_alloc_requests(&data); + if (rq) + return rq; + + rq_qos_cleanup(q, bio); + if (bio->bi_opf & REQ_NOWAIT) + bio_wouldblock_error(bio); +put_exit: + blk_queue_exit(q); + return NULL; +} + +static inline struct request *blk_mq_get_request(struct request_queue *q, + struct blk_plug *plug, + struct bio *bio, + unsigned int nsegs, + bool *same_queue_rq) +{ + if (plug) { + struct request *rq; + + rq = rq_list_peek(&plug->cached_rq); + if (rq && rq->q == q) { + if (unlikely(!submit_bio_checks(bio))) + return NULL; + if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq)) + return NULL; + plug->cached_rq = rq_list_next(rq); + INIT_LIST_HEAD(&rq->queuelist); + rq_qos_throttle(q, bio); + return rq; + } + } + + return blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq); +} + /** * blk_mq_submit_bio - Create and send a request to block device. * @bio: Bio pointer. @@ -2495,47 +2595,20 @@ void blk_mq_submit_bio(struct bio *bio) unsigned int nr_segs = 1; blk_status_t ret; + if (unlikely(!blk_crypto_bio_prep(&bio))) + return; + blk_queue_bounce(q, &bio); if (blk_may_split(q, bio)) __blk_queue_split(q, &bio, &nr_segs); if (!bio_integrity_prep(bio)) - goto queue_exit; - - if (!blk_queue_nomerges(q) && bio_mergeable(bio)) { - if (blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq)) - goto queue_exit; - if (blk_mq_sched_bio_merge(q, bio, nr_segs)) - goto queue_exit; - } - - rq_qos_throttle(q, bio); + return; plug = blk_mq_plug(q, bio); - if (plug && plug->cached_rq) { - rq = rq_list_pop(&plug->cached_rq); - INIT_LIST_HEAD(&rq->queuelist); - } else { - struct blk_mq_alloc_data data = { - .q = q, - .nr_tags = 1, - .cmd_flags = bio->bi_opf, - .rq_flags = q->elevator ? RQF_ELV : 0, - }; - - if (plug) { - data.nr_tags = plug->nr_ios; - plug->nr_ios = 1; - data.cached_rq = &plug->cached_rq; - } - rq = __blk_mq_alloc_requests(&data); - if (unlikely(!rq)) { - rq_qos_cleanup(q, bio); - if (bio->bi_opf & REQ_NOWAIT) - bio_wouldblock_error(bio); - goto queue_exit; - } - } + rq = blk_mq_get_request(q, plug, bio, nr_segs, &same_queue_rq); + if (unlikely(!rq)) + return; trace_block_getrq(bio); @@ -2616,10 +2689,6 @@ void blk_mq_submit_bio(struct bio *bio) /* Default case. */ blk_mq_sched_insert_request(rq, false, true, true); } - - return; -queue_exit: - blk_queue_exit(q); } static size_t order_to_size(unsigned int order) @@ -3605,7 +3674,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx = hctxs[j]; if (hctx) { - __blk_mq_free_map_and_rqs(set, j); blk_mq_exit_hctx(q, set, hctx, j); hctxs[j] = NULL; } @@ -4113,8 +4181,13 @@ fallback: list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); if (q->nr_hw_queues != set->nr_hw_queues) { + int i = prev_nr_hw_queues; + pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n", nr_hw_queues, prev_nr_hw_queues); + for (; i < set->nr_hw_queues; i++) + __blk_mq_free_map_and_rqs(set, i); + set->nr_hw_queues = prev_nr_hw_queues; blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); goto fallback; diff --git a/block/blk-mq.h b/block/blk-mq.h index 28859fc5faee..cb0b5482ca5e 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -225,12 +225,18 @@ static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) atomic_inc(&hctx->nr_active); } -static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) +static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, + int val) { if (blk_mq_is_shared_tags(hctx->flags)) - atomic_dec(&hctx->queue->nr_active_requests_shared_tags); + atomic_sub(val, &hctx->queue->nr_active_requests_shared_tags); else - atomic_dec(&hctx->nr_active); + atomic_sub(val, &hctx->nr_active); +} + +static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) +{ + __blk_mq_sub_active_requests(hctx, 1); } static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) diff --git a/block/blk.h b/block/blk.h index 7afffd548daf..b4fed2033e48 100644 --- a/block/blk.h +++ b/block/blk.h @@ -55,6 +55,41 @@ void blk_free_flush_queue(struct blk_flush_queue *q); void blk_freeze_queue(struct request_queue *q); void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic); void blk_queue_start_drain(struct request_queue *q); +int __bio_queue_enter(struct request_queue *q, struct bio *bio); +bool submit_bio_checks(struct bio *bio); + +static inline bool blk_try_enter_queue(struct request_queue *q, bool pm) +{ + rcu_read_lock(); + if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter)) + goto fail; + + /* + * The code that increments the pm_only counter must ensure that the + * counter is globally visible before the queue is unfrozen. + */ + if (blk_queue_pm_only(q) && + (!pm || queue_rpm_status(q) == RPM_SUSPENDED)) + goto fail_put; + + rcu_read_unlock(); + return true; + +fail_put: + blk_queue_exit(q); +fail: + rcu_read_unlock(); + return false; +} + +static inline int bio_queue_enter(struct bio *bio) +{ + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + + if (blk_try_enter_queue(q, false)) + return 0; + return __bio_queue_enter(q, bio); +} #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, diff --git a/block/genhd.c b/block/genhd.c index febaaa55125a..a4e9e8ebd941 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -469,11 +469,15 @@ int device_add_disk(struct device *parent, struct gendisk *disk, disk->part0->bd_holder_dir = kobject_create_and_add("holders", &ddev->kobj); - if (!disk->part0->bd_holder_dir) + if (!disk->part0->bd_holder_dir) { + ret = -ENOMEM; goto out_del_integrity; + } disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); - if (!disk->slave_dir) + if (!disk->slave_dir) { + ret = -ENOMEM; goto out_put_holder_dir; + } ret = bd_register_pending_holders(disk); if (ret < 0) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8d3157241262..662742a310cb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1927,16 +1927,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, dm_table_event_callback(t, event_callback, md); - /* - * The queue hasn't been stopped yet, if the old table type wasn't - * for request-based during suspension. So stop it to prevent - * I/O mapping before resume. - * This must be done before setting the queue restrictions, - * because request-based dm may be run just after the setting. - */ - if (request_based) - dm_stop_queue(q); - if (request_based) { /* * Leverage the fact that request-based DM targets are diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 838b5e2058be..4b5de8f5435a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4518,6 +4518,8 @@ static void nvme_stop_ns_queue(struct nvme_ns *ns) { if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags)) blk_mq_quiesce_queue(ns->queue); + else + blk_mq_wait_quiesce_done(ns->queue); } /* @@ -4637,6 +4639,8 @@ void nvme_stop_admin_queue(struct nvme_ctrl *ctrl) { if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) blk_mq_quiesce_queue(ctrl->admin_q); + else + blk_mq_wait_quiesce_done(ctrl->admin_q); } EXPORT_SYMBOL_GPL(nvme_stop_admin_queue); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1344553afe70..b731c2983515 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2665,6 +2665,40 @@ scsi_target_resume(struct scsi_target *starget) } EXPORT_SYMBOL(scsi_target_resume); +static int __scsi_internal_device_block_nowait(struct scsi_device *sdev) +{ + if (scsi_device_set_state(sdev, SDEV_BLOCK)) + return scsi_device_set_state(sdev, SDEV_CREATED_BLOCK); + + return 0; +} + +void scsi_start_queue(struct scsi_device *sdev) +{ + if (cmpxchg(&sdev->queue_stopped, 1, 0)) + blk_mq_unquiesce_queue(sdev->request_queue); +} + +static void scsi_stop_queue(struct scsi_device *sdev, bool nowait) +{ + /* + * The atomic variable of ->queue_stopped covers that + * blk_mq_quiesce_queue* is balanced with blk_mq_unquiesce_queue. + * + * However, we still need to wait until quiesce is done + * in case that queue has been stopped. + */ + if (!cmpxchg(&sdev->queue_stopped, 0, 1)) { + if (nowait) + blk_mq_quiesce_queue_nowait(sdev->request_queue); + else + blk_mq_quiesce_queue(sdev->request_queue); + } else { + if (!nowait) + blk_mq_wait_quiesce_done(sdev->request_queue); + } +} + /** * scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state * @sdev: device to block @@ -2681,24 +2715,16 @@ EXPORT_SYMBOL(scsi_target_resume); */ int scsi_internal_device_block_nowait(struct scsi_device *sdev) { - struct request_queue *q = sdev->request_queue; - int err = 0; - - err = scsi_device_set_state(sdev, SDEV_BLOCK); - if (err) { - err = scsi_device_set_state(sdev, SDEV_CREATED_BLOCK); - - if (err) - return err; - } + int ret = __scsi_internal_device_block_nowait(sdev); /* * The device has transitioned to SDEV_BLOCK. Stop the * block layer from calling the midlayer with this device's * request queue. */ - blk_mq_quiesce_queue_nowait(q); - return 0; + if (!ret) + scsi_stop_queue(sdev, true); + return ret; } EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait); @@ -2719,25 +2745,17 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait); */ static int scsi_internal_device_block(struct scsi_device *sdev) { - struct request_queue *q = sdev->request_queue; int err; mutex_lock(&sdev->state_mutex); - err = scsi_internal_device_block_nowait(sdev); + err = __scsi_internal_device_block_nowait(sdev); if (err == 0) - blk_mq_quiesce_queue(q); + scsi_stop_queue(sdev, false); mutex_unlock(&sdev->state_mutex); return err; } -void scsi_start_queue(struct scsi_device *sdev) -{ - struct request_queue *q = sdev->request_queue; - - blk_mq_unquiesce_queue(q); -} - /** * scsi_internal_device_unblock_nowait - resume a device after a block request * @sdev: device to resume diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8682663e7368..2949d9ac7484 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -798,6 +798,7 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); +void blk_mq_wait_quiesce_done(struct request_queue *q); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 79c3045611fa..83a7890f1479 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -207,6 +207,7 @@ struct scsi_device { * creation time */ unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ + unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ atomic_t disk_events_disable_depth; /* disable depth for disk events */ |