summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
authorKeith Busch <kbusch@kernel.org>2020-08-27 10:38:57 -0700
committerSagi Grimberg <sagi@grimberg.me>2020-08-28 16:43:57 -0700
commite83d776f9f98b4af18d67f05f9d1f3042dbe62c7 (patch)
treeacff2f2d7843fb0f2a3d8dfebc4642e0f67df63c /drivers/nvme/host/core.c
parent192f6c29bb28bfd0a17e6ad331d09f1ec84143d0 (diff)
downloadlwn-e83d776f9f98b4af18d67f05f9d1f3042dbe62c7.tar.gz
lwn-e83d776f9f98b4af18d67f05f9d1f3042dbe62c7.zip
nvme: only use power of two io boundaries
The kernel requires a power of two for boundaries because that's the only way it can efficiently split commands that cross them. A controller, however, may report a non-power of two boundary. The driver had been rounding the controller's value to one the kernel can use, but splitting on the wrong boundary provides no benefit on the device side, and incurs additional submission overhead from non-optimal splits. Don't provide any boundary hint if the controller's value can't be used and log a warning when first scanning a disk's unreported IO boundary. Since the chunk sector logic has grown, move it to a separate function. Cc: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Keith Busch <kbusch@kernel.org> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c47
1 files changed, 38 insertions, 9 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d6186208abf9..5702a3843746 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2026,13 +2026,49 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_unfreeze_queue(disk->queue);
}
+static inline bool nvme_first_scan(struct gendisk *disk)
+{
+ /* nvme_alloc_ns() scans the disk prior to adding it */
+ return !(disk->flags & GENHD_FL_UP);
+}
+
+static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ u32 iob;
+
+ if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+ is_power_of_2(ctrl->max_hw_sectors))
+ iob = ctrl->max_hw_sectors;
+ else
+ iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
+
+ if (!iob)
+ return;
+
+ if (!is_power_of_2(iob)) {
+ if (nvme_first_scan(ns->disk))
+ pr_warn("%s: ignoring unaligned IO boundary:%u\n",
+ ns->disk->disk_name, iob);
+ return;
+ }
+
+ if (blk_queue_is_zoned(ns->disk->queue)) {
+ if (nvme_first_scan(ns->disk))
+ pr_warn("%s: ignoring zoned namespace IO boundary\n",
+ ns->disk->disk_name);
+ return;
+ }
+
+ blk_queue_chunk_sectors(ns->queue, iob);
+}
+
static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
- u32 iob;
/*
* If identify namespace failed, use default 512 byte block size so
@@ -2060,12 +2096,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
return -ENODEV;
}
- if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
- is_power_of_2(ctrl->max_hw_sectors))
- iob = ctrl->max_hw_sectors;
- else
- iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
-
ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
/* the PI implementation requires metadata equal t10 pi tuple size */
@@ -2097,8 +2127,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
}
}
- if (iob && !blk_queue_is_zoned(ns->queue))
- blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
+ nvme_set_chunk_sectors(ns, id);
nvme_update_disk_info(disk, ns, id);
#ifdef CONFIG_NVME_MULTIPATH
if (ns->head->disk) {