diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-12-20 13:37:58 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-12-20 13:37:58 -0800 |
commit | 11167b29e53b9a06635309445ead7edfd54e6616 (patch) | |
tree | dcce6411b09ea57feb7cd11ff8e76841b6558142 | |
parent | 7c05bd92305d13e18945270b7bfaf300d53f6ed2 (diff) | |
parent | 85672ca9ceeaa1dcf2777a7048af5f4aee3fd02b (diff) | |
download | lwn-11167b29e53b9a06635309445ead7edfd54e6616.tar.gz lwn-11167b29e53b9a06635309445ead7edfd54e6616.zip |
Merge tag 'block-6.13-20241220' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe:
- Minor cleanups for bdev/nvme using the helpers introduced
- Revert of a deadlock fix that still needs more work
- Fix a UAF of hctx in the cpu hotplug code
* tag 'block-6.13-20241220' of git://git.kernel.dk/linux:
block: avoid to reuse `hctx` not removed from cpuhp callback list
block: Revert "block: Fix potential deadlock while freezing queue and acquiring sysfs_lock"
nvme: use blk_validate_block_size() for max LBA check
block/bdev: use helper for max block size check
-rw-r--r-- | block/bdev.c | 3 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 16 | ||||
-rw-r--r-- | block/blk-mq.c | 40 | ||||
-rw-r--r-- | block/blk-sysfs.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 2 |
5 files changed, 35 insertions, 30 deletions
diff --git a/block/bdev.c b/block/bdev.c index 738e3c8457e7..9d73a8fbf7f9 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -155,8 +155,7 @@ int set_blocksize(struct file *file, int size) struct inode *inode = file->f_mapping->host; struct block_device *bdev = I_BDEV(inode); - /* Size must be a power of two, and between 512 and PAGE_SIZE */ - if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) + if (blk_validate_block_size(size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index cd5ea6eaa76b..156e9bb07abf 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -275,13 +275,15 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; - lockdep_assert_held(&q->sysfs_dir_lock); - + mutex_lock(&q->sysfs_dir_lock); if (!q->mq_sysfs_init_done) - return; + goto unlock; queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); + +unlock: + mutex_unlock(&q->sysfs_dir_lock); } int blk_mq_sysfs_register_hctxs(struct request_queue *q) @@ -290,10 +292,9 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) unsigned long i; int ret = 0; - lockdep_assert_held(&q->sysfs_dir_lock); - + mutex_lock(&q->sysfs_dir_lock); if (!q->mq_sysfs_init_done) - return ret; + goto unlock; queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); @@ -301,5 +302,8 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) break; } +unlock: + mutex_unlock(&q->sysfs_dir_lock); + return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 6b6111513986..8ac19d4ae3c0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4412,6 +4412,15 @@ struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue); +/* + * Only hctx removed from cpuhp list can be reused + */ +static bool blk_mq_hctx_is_reusable(struct blk_mq_hw_ctx *hctx) +{ + return hlist_unhashed(&hctx->cpuhp_online) && + hlist_unhashed(&hctx->cpuhp_dead); +} + static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( struct blk_mq_tag_set *set, struct request_queue *q, int hctx_idx, int node) @@ -4421,7 +4430,7 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( /* reuse dead hctx first */ spin_lock(&q->unused_hctx_lock); list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) { - if (tmp->numa_node == node) { + if (tmp->numa_node == node && blk_mq_hctx_is_reusable(tmp)) { hctx = tmp; break; } @@ -4453,8 +4462,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, unsigned long i, j; /* protect against switching io scheduler */ - lockdep_assert_held(&q->sysfs_lock); - + mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { int old_node; int node = blk_mq_get_hctx_node(set, i); @@ -4487,6 +4495,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, xa_for_each_start(&q->hctx_table, j, hctx, j) blk_mq_exit_hctx(q, set, hctx, j); + mutex_unlock(&q->sysfs_lock); /* unregister cpuhp callbacks for exited hctxs */ blk_mq_remove_hw_queues_cpuhp(q); @@ -4518,14 +4527,10 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, xa_init(&q->hctx_table); - mutex_lock(&q->sysfs_lock); - blk_mq_realloc_hw_ctxs(set, q); if (!q->nr_hw_queues) goto err_hctxs; - mutex_unlock(&q->sysfs_lock); - INIT_WORK(&q->timeout_work, blk_mq_timeout_work); blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); @@ -4544,7 +4549,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, return 0; err_hctxs: - mutex_unlock(&q->sysfs_lock); blk_mq_release(q); err_exit: q->mq_ops = NULL; @@ -4925,12 +4929,12 @@ static bool blk_mq_elv_switch_none(struct list_head *head, return false; /* q->elevator needs protection from ->sysfs_lock */ - lockdep_assert_held(&q->sysfs_lock); + mutex_lock(&q->sysfs_lock); /* the check has to be done with holding sysfs_lock */ if (!q->elevator) { kfree(qe); - goto out; + goto unlock; } INIT_LIST_HEAD(&qe->node); @@ -4940,7 +4944,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head, __elevator_get(qe->type); list_add(&qe->node, head); elevator_disable(q); -out: +unlock: + mutex_unlock(&q->sysfs_lock); + return true; } @@ -4969,9 +4975,11 @@ static void blk_mq_elv_switch_back(struct list_head *head, list_del(&qe->node); kfree(qe); + mutex_lock(&q->sysfs_lock); elevator_switch(q, t); /* drop the reference acquired in blk_mq_elv_switch_none */ elevator_put(t); + mutex_unlock(&q->sysfs_lock); } static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, @@ -4991,11 +4999,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; - list_for_each_entry(q, &set->tag_list, tag_set_list) { - mutex_lock(&q->sysfs_dir_lock); - mutex_lock(&q->sysfs_lock); + list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_freeze_queue(q); - } /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5051,11 +5056,8 @@ switch_back: list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_elv_switch_back(&head, q); - list_for_each_entry(q, &set->tag_list, tag_set_list) { + list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_unfreeze_queue(q); - mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); - } /* Free the excess tags when nr_hw_queues shrink. */ for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 64f70c713d2f..767598e719ab 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -706,11 +706,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, if (entry->load_module) entry->load_module(disk, page, length); - mutex_lock(&q->sysfs_lock); blk_mq_freeze_queue(q); + mutex_lock(&q->sysfs_lock); res = entry->store(disk, page, length); - blk_mq_unfreeze_queue(q); mutex_unlock(&q->sysfs_lock); + blk_mq_unfreeze_queue(q); return res; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d169a30eb935..a970168a3014 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2034,7 +2034,7 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, * or smaller than a sector size yet, so catch this early and don't * allow block I/O. */ - if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) { + if (blk_validate_block_size(bs)) { bs = (1 << 9); valid = false; } |