summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2024-09-17 08:32:53 -0600
committerJens Axboe <axboe@kernel.dk>2024-09-17 08:32:53 -0600
commit42b16d3ac371a2fac9b6f08fd75f23f34ba3955a (patch)
treed15a2fe1f7441361b972bc787af5122adc3fcb71 /block
parent4208c562a27899212e8046080555e0f204e0579a (diff)
parent98f7e32f20d28ec452afb208f9cffc08448a2652 (diff)
downloadlwn-42b16d3ac371a2fac9b6f08fd75f23f34ba3955a.tar.gz
lwn-42b16d3ac371a2fac9b6f08fd75f23f34ba3955a.zip
Merge tag 'v6.11' into for-6.12/block
Merge in 6.11 final to get the fix for preventing deadlocks on an elevator switch, as there's a fixup for that patch. * tag 'v6.11': (1788 commits) Linux 6.11 Revert "KVM: VMX: Always honor guest PAT on CPUs that support self-snoop" pinctrl: pinctrl-cy8c95x0: Fix regcache cifs: Fix signature miscalculation mm: avoid leaving partial pfn mappings around in error case drm/xe/client: add missing bo locking in show_meminfo() drm/xe/client: fix deadlock in show_meminfo() drm/xe/oa: Enable Xe2+ PES disaggregation drm/xe/display: fix compat IS_DISPLAY_STEP() range end drm/xe: Fix access_ok check in user_fence_create drm/xe: Fix possible UAF in guc_exec_queue_process_msg drm/xe: Remove fence check from send_tlb_invalidation drm/xe/gt: Remove double include net: netfilter: move nf flowtable bpf initialization in nf_flow_table_module_init() PCI: Fix potential deadlock in pcim_intx() workqueue: Clear worker->pool in the worker thread context net: tighten bad gso csum offset check in virtio_net_hdr netlink: specs: mptcp: fix port endianness net: dpaa: Pad packets to ETH_ZLEN mptcp: pm: Fix uaf in __timer_delete_sync ...
Diffstat (limited to 'block')
-rw-r--r--block/bio-integrity.c4
-rw-r--r--block/blk-lib.c23
-rw-r--r--block/blk-mq-tag.c5
-rw-r--r--block/blk-sysfs.c22
-rw-r--r--block/blk-throttle.c11
-rw-r--r--block/elevator.c21
-rw-r--r--block/elevator.h2
7 files changed, 58 insertions, 30 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 357a022eed41..88e3ad73c385 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -167,10 +167,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct bio_integrity_payload *bip = bio_integrity(bio);
- if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) >
- queue_max_hw_sectors(q))
- return 0;
-
if (bip->bip_vcnt > 0) {
struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
bool same_page = false;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9f735efa6c94..4c9f20a689f7 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -111,13 +111,20 @@ static sector_t bio_write_zeroes_limit(struct block_device *bdev)
(UINT_MAX >> SECTOR_SHIFT) & ~bs_mask);
}
+/*
+ * There is no reliable way for the SCSI subsystem to determine whether a
+ * device supports a WRITE SAME operation without actually performing a write
+ * to media. As a result, write_zeroes is enabled by default and will be
+ * disabled if a zeroing operation subsequently fails. This means that this
+ * queue limit is likely to change at runtime.
+ */
static void __blkdev_issue_write_zeroes(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop, unsigned flags)
+ struct bio **biop, unsigned flags, sector_t limit)
{
+
while (nr_sects) {
- unsigned int len = min_t(sector_t, nr_sects,
- bio_write_zeroes_limit(bdev));
+ unsigned int len = min(nr_sects, limit);
struct bio *bio;
if ((flags & BLKDEV_ZERO_KILLABLE) &&
@@ -141,12 +148,14 @@ static void __blkdev_issue_write_zeroes(struct block_device *bdev,
static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp, unsigned flags)
{
+ sector_t limit = bio_write_zeroes_limit(bdev);
struct bio *bio = NULL;
struct blk_plug plug;
int ret = 0;
blk_start_plug(&plug);
- __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags);
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio,
+ flags, limit);
if (bio) {
if ((flags & BLKDEV_ZERO_KILLABLE) &&
fatal_signal_pending(current)) {
@@ -265,12 +274,14 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
unsigned flags)
{
+ sector_t limit = bio_write_zeroes_limit(bdev);
+
if (bdev_read_only(bdev))
return -EPERM;
- if (bdev_write_zeroes_sectors(bdev)) {
+ if (limit) {
__blkdev_issue_write_zeroes(bdev, sector, nr_sects,
- gfp_mask, biop, flags);
+ gfp_mask, biop, flags, limit);
} else {
if (flags & BLKDEV_ZERO_NOFALLBACK)
return -EOPNOTSUPP;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index cc57e2dd9a0b..2cafcf11ee8b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
unsigned int users;
+ unsigned long flags;
struct blk_mq_tags *tags = hctx->tags;
/*
@@ -56,11 +57,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
return;
}
- spin_lock_irq(&tags->lock);
+ spin_lock_irqsave(&tags->lock, flags);
users = tags->active_queues + 1;
WRITE_ONCE(tags->active_queues, users);
blk_mq_update_wake_batch(tags, users);
- spin_unlock_irq(&tags->lock);
+ spin_unlock_irqrestore(&tags->lock, flags);
}
/*
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 60116d13cb80..e85941bec857 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -23,6 +23,7 @@
struct queue_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct gendisk *disk, char *page);
+ int (*load_module)(struct gendisk *disk, const char *page, size_t count);
ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
};
@@ -413,6 +414,14 @@ static struct queue_sysfs_entry _prefix##_entry = { \
.store = _prefix##_store, \
};
+#define QUEUE_RW_LOAD_MODULE_ENTRY(_prefix, _name) \
+static struct queue_sysfs_entry _prefix##_entry = { \
+ .attr = { .name = _name, .mode = 0644 }, \
+ .show = _prefix##_show, \
+ .load_module = _prefix##_load_module, \
+ .store = _prefix##_store, \
+}
+
QUEUE_RW_ENTRY(queue_requests, "nr_requests");
QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
@@ -420,7 +429,7 @@ QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
-QUEUE_RW_ENTRY(elv_iosched, "scheduler");
+QUEUE_RW_LOAD_MODULE_ENTRY(elv_iosched, "scheduler");
QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
QUEUE_RO_ENTRY(queue_physical_block_size, "physical_block_size");
@@ -670,6 +679,17 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
if (!entry->store)
return -EIO;
+ /*
+ * If the attribute needs to load a module, do it before freezing the
+ * queue to ensure that the module file can be read when the request
+ * queue is the one for the device storing the module file.
+ */
+ if (entry->load_module) {
+ res = entry->load_module(disk, page, length);
+ if (res)
+ return res;
+ }
+
blk_mq_freeze_queue(q);
mutex_lock(&q->sysfs_lock);
res = entry->store(disk, page, length);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 9c5bbd261724..2c4192e12efa 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -31,14 +31,6 @@ static struct workqueue_struct *kthrotld_workqueue;
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
-/* We measure latency for request size from <= 4k to >= 1M */
-#define LATENCY_BUCKET_SIZE 9
-
-struct latency_bucket {
- unsigned long total_latency; /* ns / 1024 */
- int samples;
-};
-
struct throtl_data
{
/* service tree for active throtl groups */
@@ -116,9 +108,6 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
return tg->iops[rw];
}
-#define request_bucket_index(sectors) \
- clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
-
/**
* throtl_log - log debug message via blktrace
* @sq: the service_queue being reported
diff --git a/block/elevator.c b/block/elevator.c
index f13d552a32c8..c355b55d0107 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -698,17 +698,26 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
return 0;
e = elevator_find_get(q, elevator_name);
- if (!e) {
- request_module("%s-iosched", elevator_name);
- e = elevator_find_get(q, elevator_name);
- if (!e)
- return -EINVAL;
- }
+ if (!e)
+ return -EINVAL;
ret = elevator_switch(q, e);
elevator_put(e);
return ret;
}
+int elv_iosched_load_module(struct gendisk *disk, const char *buf,
+ size_t count)
+{
+ char elevator_name[ELV_NAME_MAX];
+
+ if (!elv_support_iosched(disk->queue))
+ return -EOPNOTSUPP;
+
+ strscpy(elevator_name, buf, sizeof(elevator_name));
+
+ return request_module("%s-iosched", strstrip(elevator_name));
+}
+
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
size_t count)
{
diff --git a/block/elevator.h b/block/elevator.h
index 3fe18e1a8692..2a78544bf201 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -148,6 +148,8 @@ extern void elv_unregister(struct elevator_type *);
* io scheduler sysfs switching
*/
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
+int elv_iosched_load_module(struct gendisk *disk, const char *page,
+ size_t count);
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
extern bool elv_bio_merge_ok(struct request *, struct bio *);