summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2022-10-30 16:50:15 +0100
committerChristoph Hellwig <hch@lst.de>2022-11-15 10:50:31 +0100
commit1b96f862ecccb3e6f950eba584bebf22955cecc5 (patch)
tree85ca6d248f86146a2d487a0c90fbc1280e179124
parente4fbcf32c860f98103ca7f1dc8c0dc69e2219ec6 (diff)
downloadlwn-1b96f862ecccb3e6f950eba584bebf22955cecc5.tar.gz
lwn-1b96f862ecccb3e6f950eba584bebf22955cecc5.zip
nvme: implement the DEAC bit for the Write Zeroes command
While the specification allows devices to either deallocate data or to actually write zeroes on any Write Zeroes command, many SSDs only do the sensible thing and deallocate data when the DEAC bit is specific. Set it when it is supported and the caller doesn't explicitly opt out of deallocation. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Keith Busch <kbusch@kernel.org> Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/nvme/host/core.c13
-rw-r--r--drivers/nvme/host/nvme.h1
-rw-r--r--include/linux/nvme.h1
3 files changed, 14 insertions, 1 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f94b05c585cb..1a87a072fbed 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -850,8 +850,11 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC))
+ cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC);
+
if (nvme_ns_has_pi(ns)) {
- cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
+ cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE1:
@@ -2003,6 +2006,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
}
}
+ /*
+ * Only set the DEAC bit if the device guarantees that reads from
+ * deallocated data return zeroes. While the DEAC bit does not
+ * require that, it must be a no-op if reads from deallocated data
+ * do not return zeroes.
+ */
+ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
+ ns->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f9df10653f3c..16b34a491495 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -455,6 +455,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head)
enum nvme_ns_features {
NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
+ NVME_NS_DEAC, /* DEAC bit in Write Zeores supported */
};
struct nvme_ns {
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1d102b662e88..d6be2a686100 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -964,6 +964,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4,
+ NVME_WZ_DEAC = 1 << 9,
};
struct nvme_dsm_cmd {