From 8e41226324e7c00f2087bfbc9f470d665e92df18 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:54:27 +0200 Subject: nvme: switch to uuid_t Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- include/linux/nvme.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/nvme.h') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b625bacf37ef..e400a69fa1d3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -16,6 +16,7 @@ #define _LINUX_NVME_H #include +#include /* NQN names in commands fields specified one size */ #define NVMF_NQN_FIELD_LEN 256 @@ -843,7 +844,7 @@ struct nvmf_connect_command { }; struct nvmf_connect_data { - __u8 hostid[16]; + uuid_t hostid; __le16 cntlid; char resv4[238]; char subsysnqn[NVMF_NQN_FIELD_LEN]; -- cgit v1.2.3 From 39673e1995381b09a63cc7e9d0aea7cf871cb359 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 15:36:28 +0100 Subject: nvme.h: add struct nvme_host_mem_buf_desc and HMB flags Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- include/linux/nvme.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/nvme.h') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index e400a69fa1d3..180a2fdbcaef 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -587,6 +587,11 @@ struct nvme_feat_auto_pst { __le64 entries[32]; }; +enum { + NVME_HOST_MEM_ENABLE = (1 << 0), + NVME_HOST_MEM_RETURN = (1 << 1), +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -671,6 +676,12 @@ struct nvme_features { __u32 rsvd12[4]; }; +struct nvme_host_mem_buf_desc { + __le64 addr; + __le32 size; + __u32 rsvd; +}; + struct nvme_create_cq { __u8 opcode; __u8 flags; -- cgit v1.2.3 From b85cf7348ab50e2042b732e19031b1d22eedc741 Mon Sep 17 00:00:00 2001 From: Arnav Dawn Date: Fri, 12 May 2017 17:12:03 +0200 Subject: nvme.h: add dword 12 - 15 fields to struct nvme_features Signed-off-by: Arnav Dawn [hch: split from a larger patch, new changelog] Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- include/linux/nvme.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/nvme.h') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 180a2fdbcaef..51ca4771be2c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -673,7 +673,10 @@ struct nvme_features { union nvme_data_ptr dptr; __le32 fid; __le32 dword11; - __u32 rsvd12[4]; + __le32 dword12; + __le32 dword13; + __le32 dword14; + __le32 dword15; }; struct nvme_host_mem_buf_desc { -- cgit v1.2.3 From 97f6ef6464dbd235a4d9bdfc05d949aab24fc927 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Wed, 24 May 2017 16:39:55 +0800 Subject: nvme-pci: remap BAR0 to cover admin CQ doorbell for large stride The existing driver initially maps 8192 bytes of BAR0 which is intended to cover doorbells of admin SQ and CQ. However, if a large stride, e.g. 10, is used, the doorbell of admin CQ will be out of 8192 bytes. Consequently, a page fault will be raised when the admin CQ doorbell is accessed in nvme_configure_admin_queue(). This patch fixes this issue by remapping BAR0 before accessing admin CQ doorbell if the initial mapping is not enough. Signed-off-by: Xu Yu Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 65 ++++++++++++++++++++++++++++++++----------------- include/linux/nvme.h | 1 + 2 files changed, 44 insertions(+), 22 deletions(-) (limited to 'include/linux/nvme.h') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ebd5cdfc0174..5278ed9811a6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -95,6 +95,7 @@ struct nvme_dev { int q_depth; u32 db_stride; void __iomem *bar; + unsigned long bar_mapped_size; struct work_struct reset_work; struct work_struct remove_work; struct timer_list watchdog_timer; @@ -1320,6 +1321,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) return 0; } +static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) +{ + return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride); +} + +static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + if (size <= dev->bar_mapped_size) + return 0; + if (size > pci_resource_len(pdev, 0)) + return -ENOMEM; + if (dev->bar) + iounmap(dev->bar); + dev->bar = ioremap(pci_resource_start(pdev, 0), size); + if (!dev->bar) { + dev->bar_mapped_size = 0; + return -ENOMEM; + } + dev->bar_mapped_size = size; + dev->dbs = dev->bar + NVME_REG_DBS; + + return 0; +} + static int nvme_configure_admin_queue(struct nvme_dev *dev) { int result; @@ -1327,6 +1354,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); struct nvme_queue *nvmeq; + result = nvme_remap_bar(dev, db_bar_size(dev, 0)); + if (result < 0) + return result; + dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? NVME_CAP_NSSRC(cap) : 0; @@ -1679,16 +1710,12 @@ static void nvme_setup_host_mem(struct nvme_dev *dev) nvme_free_host_mem(dev); } -static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) -{ - return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); -} - static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); - int result, nr_io_queues, size; + int result, nr_io_queues; + unsigned long size; nr_io_queues = num_online_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); @@ -1707,20 +1734,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) nvme_release_cmb(dev); } - size = db_bar_size(dev, nr_io_queues); - if (size > 8192) { - iounmap(dev->bar); - do { - dev->bar = ioremap(pci_resource_start(pdev, 0), size); - if (dev->bar) - break; - if (!--nr_io_queues) - return -ENOMEM; - size = db_bar_size(dev, nr_io_queues); - } while (1); - dev->dbs = dev->bar + 4096; - adminq->q_db = dev->dbs; - } + do { + size = db_bar_size(dev, nr_io_queues); + result = nvme_remap_bar(dev, size); + if (!result) + break; + if (!--nr_io_queues) + return -ENOMEM; + } while (1); + adminq->q_db = dev->dbs; /* Deregister the admin queue's interrupt */ pci_free_irq(pdev, 0, adminq); @@ -2240,8 +2262,7 @@ static int nvme_dev_map(struct nvme_dev *dev) if (pci_request_mem_regions(pdev, "nvme")) return -ENODEV; - dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); - if (!dev->bar) + if (nvme_remap_bar(dev, NVME_REG_DBS + 4096)) goto release; return 0; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 51ca4771be2c..706a0fbfe28e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -102,6 +102,7 @@ enum { NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ + NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */ }; #define NVME_CAP_MQES(cap) ((cap) & 0xffff) -- cgit v1.2.3 From 0add5e8e588c65c5ac6a3255f624260bf889128d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 7 Jun 2017 11:45:29 +0200 Subject: nvmet: use NVME_IDENTIFY_DATA_SIZE Use NVME_IDENTIFY_DATA_SIZE define instead of hard coding the magic 4096 value. Signed-off-by: Johannes Thumshirn Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke [hch: converted three more users] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/lightnvm.c | 2 +- drivers/nvme/host/pci.c | 4 ++-- drivers/nvme/target/admin-cmd.c | 4 ++-- drivers/nvme/target/discovery.c | 2 +- include/linux/nvme.h | 2 ++ 5 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux/nvme.h') diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 2d7a2889866f..e1ef8e9b41cb 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cd1725095531..63e5a3d3f0dc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -183,8 +183,8 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); - BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); + BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index ff1f97006322..96c144325443 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -336,7 +336,7 @@ out: static void nvmet_execute_identify_nslist(struct nvmet_req *req) { - static const int buf_size = 4096; + static const int buf_size = NVME_IDENTIFY_DATA_SIZE; struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ns *ns; u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid); @@ -504,7 +504,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) } break; case nvme_admin_identify: - req->data_len = 4096; + req->data_len = NVME_IDENTIFY_DATA_SIZE; switch (cmd->identify.cns) { case NVME_ID_CNS_NS: req->execute = nvmet_execute_identify_ns; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 1aaf597e81fc..c7a90384dd75 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } case nvme_admin_identify: - req->data_len = 4096; + req->data_len = NVME_IDENTIFY_DATA_SIZE; switch (cmd->identify.cns) { case NVME_ID_CNS_CTRL: req->execute = diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 706a0fbfe28e..782d557c5535 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -665,6 +665,8 @@ struct nvme_identify { __u32 rsvd11[5]; }; +#define NVME_IDENTIFY_DATA_SIZE 4096 + struct nvme_features { __u8 opcode; __u8 flags; -- cgit v1.2.3 From af8b86e9a7ffb9528e745b7ea25b18545699482c Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 7 Jun 2017 11:45:30 +0200 Subject: nvme: introduce NVMe Namespace Identification Descriptor structures Signed-off-by: Johannes Thumshirn Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux/nvme.h') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 782d557c5535..f2344aa923e8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -290,6 +290,7 @@ enum { NVME_ID_CNS_NS = 0x00, NVME_ID_CNS_CTRL = 0x01, NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, + NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_CTRL_NS_LIST = 0x12, @@ -316,6 +317,22 @@ enum { NVME_NS_DPS_PI_TYPE3 = 3, }; +struct nvme_ns_id_desc { + __u8 nidt; + __u8 nidl; + __le16 reserved; +}; + +#define NVME_NIDT_EUI64_LEN 8 +#define NVME_NIDT_NGUID_LEN 16 +#define NVME_NIDT_UUID_LEN 16 + +enum { + NVME_NIDT_EUI64 = 0x01, + NVME_NIDT_NGUID = 0x02, + NVME_NIDT_UUID = 0x03, +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; -- cgit v1.2.3 From c61d788b8b1fe57aaf03ac0b5c636c7388ebfd20 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 7 Jun 2017 11:45:36 +0200 Subject: nvmet: allow overriding the NVMe VS via configfs Allow overriding the announced NVMe Version of a via configfs. This is particularly helpful when debugging new features for the host or target side without bumping the hard coded version (as the target might not be fully compliant to the announced version yet). Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Guan Junxiong Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 4 ++++ 2 files changed, 41 insertions(+) (limited to 'include/linux/nvme.h') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 83bfe28fe7da..a358ecd93e11 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -650,8 +650,45 @@ out_unlock: CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host); +static ssize_t nvmet_subsys_version_show(struct config_item *item, + char *page) +{ + struct nvmet_subsys *subsys = to_subsys(item); + + if (NVME_TERTIARY(subsys->ver)) + return snprintf(page, PAGE_SIZE, "%d.%d.%d\n", + (int)NVME_MAJOR(subsys->ver), + (int)NVME_MINOR(subsys->ver), + (int)NVME_TERTIARY(subsys->ver)); + else + return snprintf(page, PAGE_SIZE, "%d.%d\n", + (int)NVME_MAJOR(subsys->ver), + (int)NVME_MINOR(subsys->ver)); +} + +static ssize_t nvmet_subsys_version_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + int major, minor, tertiary = 0; + int ret; + + + ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary); + if (ret != 2 && ret != 3) + return -EINVAL; + + down_write(&nvmet_config_sem); + subsys->ver = NVME_VS(major, minor, tertiary); + up_write(&nvmet_config_sem); + + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, version); + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, + &nvmet_subsys_attr_version, NULL, }; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f2344aa923e8..acb484935603 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1085,4 +1085,8 @@ struct nvme_completion { #define NVME_VS(major, minor, tertiary) \ (((major) << 16) | ((minor) << 8) | (tertiary)) +#define NVME_MAJOR(ver) ((ver) >> 16) +#define NVME_MINOR(ver) (((ver) >> 8) & 0xff) +#define NVME_TERTIARY(ver) ((ver) & 0xff) + #endif /* _LINUX_NVME_H */ -- cgit v1.2.3 From 435e809058bafaa8f0bf8f55f37508b01734c9a5 Mon Sep 17 00:00:00 2001 From: Guan Junxiong Date: Tue, 13 Jun 2017 09:26:15 +0800 Subject: nvme: add fields into identify controller data structure Add the new to NVMe 1.3 fields EDSTT, DSTO, FWUG, HCTMA, MNTMT, MXTMT, and SANICAP into the idenfity controller data structure. Signed-off-by: Guan Junxiong Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux/nvme.h') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index acb484935603..6d476f242ee6 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -209,9 +209,15 @@ struct nvme_id_ctrl { __u8 tnvmcap[16]; __u8 unvmcap[16]; __le32 rpmbs; - __u8 rsvd316[4]; + __le16 edstt; + __u8 dsto; + __u8 fwug; __le16 kas; - __u8 rsvd322[190]; + __le16 hctma; + __le16 mntmt; + __le16 mxtmt; + __le32 sanicap; + __u8 rsvd332[180]; __u8 sqes; __u8 cqes; __le16 maxcmd; -- cgit v1.2.3 From 6b8190d61a622e095f04451437953acd2d74b371 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Thu, 15 Jun 2017 10:44:30 -0600 Subject: nvme: implement NS Optimal IO Boundary from 1.3 Spec The NVMe 1.3 spec introduces Namespace Optimal IO Boundaries (NOIOB), which standardizes the stripe mechanism we currently have quirks for. This patch implements the necessary logic to handle this new feature. Signed-off-by: Scott Bauer Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 9 +++++++++ drivers/nvme/host/nvme.h | 1 + include/linux/nvme.h | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux/nvme.h') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4ff5114f467d..0ddd6b9af7fc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1080,6 +1080,12 @@ static void nvme_init_integrity(struct nvme_ns *ns) } #endif /* CONFIG_BLK_DEV_INTEGRITY */ +static void nvme_set_chunk_size(struct nvme_ns *ns) +{ + u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9)); + blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); +} + static void nvme_config_discard(struct nvme_ns *ns) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1139,12 +1145,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->lba_shift == 0) ns->lba_shift = 9; bs = 1 << ns->lba_shift; + ns->noiob = le16_to_cpu(id->noiob); blk_mq_freeze_queue(disk->queue); if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) nvme_prep_integrity(disk, id, bs); blk_queue_logical_block_size(ns->queue, bs); + if (ns->noiob) + nvme_set_chunk_size(ns); if (ns->ms && !blk_get_integrity(disk) && !ns->ext) nvme_init_integrity(ns); if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f27c58b860f4..ec8c7363934d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -202,6 +202,7 @@ struct nvme_ns { bool ext; u8 pi_type; unsigned long flags; + u16 noiob; #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6d476f242ee6..291587a0743f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -282,7 +282,7 @@ struct nvme_id_ns { __le16 nabsn; __le16 nabo; __le16 nabspf; - __u16 rsvd46; + __le16 noiob; __u8 nvmcap[16]; __u8 rsvd64[40]; __u8 nguid[16]; -- cgit v1.2.3 From f5d118406247acfc4fc481e441e01ea4d6318fdc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jun 2017 12:03:06 -0600 Subject: nvme: add support for streams and directives This adds support for Directives in NVMe, particular for the Streams directive. Support for Directives is a new feature in NVMe 1.3. It allows a user to pass in information about where to store the data, so that it the device can do so most effiently. If an application is managing and writing data with different life times, mixing differently retentioned data onto the same locations on flash can cause write amplification to grow. This, in turn, will reduce performance and life time of the device. Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 151 +++++++++++++++++++++++++++++++++++++++++++++-- drivers/nvme/host/nvme.h | 4 ++ include/linux/nvme.h | 48 +++++++++++++++ 3 files changed, 199 insertions(+), 4 deletions(-) (limited to 'include/linux/nvme.h') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index aee37b73231d..5c50f53e32f3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -65,6 +65,10 @@ static bool force_apst; module_param(force_apst, bool, 0644); MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); +static bool streams; +module_param(streams, bool, 0644); +MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); + struct workqueue_struct *nvme_wq; EXPORT_SYMBOL_GPL(nvme_wq); @@ -297,6 +301,105 @@ struct request *nvme_alloc_request(struct request_queue *q, } EXPORT_SYMBOL_GPL(nvme_alloc_request); +static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) +{ + struct nvme_command c; + + memset(&c, 0, sizeof(c)); + + c.directive.opcode = nvme_admin_directive_send; + c.directive.nsid = cpu_to_le32(0xffffffff); + c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE; + c.directive.dtype = NVME_DIR_IDENTIFY; + c.directive.tdtype = NVME_DIR_STREAMS; + c.directive.endir = enable ? NVME_DIR_ENDIR : 0; + + return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0); +} + +static int nvme_disable_streams(struct nvme_ctrl *ctrl) +{ + return nvme_toggle_streams(ctrl, false); +} + +static int nvme_enable_streams(struct nvme_ctrl *ctrl) +{ + return nvme_toggle_streams(ctrl, true); +} + +static int nvme_get_stream_params(struct nvme_ctrl *ctrl, + struct streams_directive_params *s, u32 nsid) +{ + struct nvme_command c; + + memset(&c, 0, sizeof(c)); + memset(s, 0, sizeof(*s)); + + c.directive.opcode = nvme_admin_directive_recv; + c.directive.nsid = cpu_to_le32(nsid); + c.directive.numd = sizeof(*s); + c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; + c.directive.dtype = NVME_DIR_STREAMS; + + return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s)); +} + +static int nvme_configure_directives(struct nvme_ctrl *ctrl) +{ + struct streams_directive_params s; + int ret; + + if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES)) + return 0; + if (!streams) + return 0; + + ret = nvme_enable_streams(ctrl); + if (ret) + return ret; + + ret = nvme_get_stream_params(ctrl, &s, 0xffffffff); + if (ret) + return ret; + + ctrl->nssa = le16_to_cpu(s.nssa); + if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { + dev_info(ctrl->device, "too few streams (%u) available\n", + ctrl->nssa); + nvme_disable_streams(ctrl); + return 0; + } + + ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); + dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); + return 0; +} + +/* + * Check if 'req' has a write hint associated with it. If it does, assign + * a valid namespace stream to the write. + */ +static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, + struct request *req, u16 *control, + u32 *dsmgmt) +{ + enum rw_hint streamid = req->write_hint; + + if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE) + streamid = 0; + else { + streamid--; + if (WARN_ON_ONCE(streamid > ctrl->nr_streams)) + return; + + *control |= NVME_RW_DTYPE_STREAMS; + *dsmgmt |= streamid << 16; + } + + if (streamid < ARRAY_SIZE(req->q->write_hints)) + req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9; +} + static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { @@ -348,6 +451,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { + struct nvme_ctrl *ctrl = ns->ctrl; u16 control = 0; u32 dsmgmt = 0; @@ -375,6 +479,9 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) + nvme_assign_write_stream(ctrl, req, &control, &dsmgmt); + if (ns->ms) { switch (ns->pi_type) { case NVME_NS_DPS_PI_TYPE3: @@ -1094,8 +1201,15 @@ static void nvme_config_discard(struct nvme_ns *ns) BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - ns->queue->limits.discard_alignment = logical_block_size; - ns->queue->limits.discard_granularity = logical_block_size; + if (ctrl->nr_streams && ns->sws && ns->sgs) { + unsigned int sz = logical_block_size * ns->sws * ns->sgs; + + ns->queue->limits.discard_alignment = sz; + ns->queue->limits.discard_granularity = sz; + } else { + ns->queue->limits.discard_alignment = logical_block_size; + ns->queue->limits.discard_granularity = logical_block_size; + } blk_queue_max_discard_sectors(ns->queue, UINT_MAX); blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); @@ -1135,6 +1249,7 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + struct nvme_ctrl *ctrl = ns->ctrl; u16 bs; /* @@ -1149,7 +1264,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) blk_mq_freeze_queue(disk->queue); - if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) + if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) nvme_prep_integrity(disk, id, bs); blk_queue_logical_block_size(ns->queue, bs); if (ns->noiob) @@ -1161,7 +1276,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); - if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) + if (ctrl->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); blk_mq_unfreeze_queue(disk->queue); } @@ -1766,6 +1881,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_hide_latency_tolerance(ctrl->device); nvme_configure_apst(ctrl); + nvme_configure_directives(ctrl); ctrl->identified = true; @@ -2158,6 +2274,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) return ret; } +static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) +{ + struct streams_directive_params s; + int ret; + + if (!ctrl->nr_streams) + return 0; + + ret = nvme_get_stream_params(ctrl, &s, ns->ns_id); + if (ret) + return ret; + + ns->sws = le32_to_cpu(s.sws); + ns->sgs = le16_to_cpu(s.sgs); + + if (ns->sws) { + unsigned int bs = 1 << ns->lba_shift; + + blk_queue_io_min(ns->queue, bs * ns->sws); + if (ns->sgs) + blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); + } + + return 0; +} + static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -2187,6 +2329,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); + nvme_setup_streams_ns(ctrl, ns); sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ec8c7363934d..f616835afc4c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -147,6 +147,8 @@ struct nvme_ctrl { u16 oncs; u16 vid; u16 oacs; + u16 nssa; + u16 nr_streams; atomic_t abort_limit; u8 event_limit; u8 vwc; @@ -199,6 +201,8 @@ struct nvme_ns { unsigned ns_id; int lba_shift; u16 ms; + u16 sgs; + u32 sws; bool ext; u8 pi_type; unsigned long flags; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 291587a0743f..f516a975bb21 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -253,6 +253,7 @@ enum { NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, }; @@ -303,6 +304,19 @@ enum { NVME_ID_CNS_CTRL_LIST = 0x13, }; +enum { + NVME_DIR_IDENTIFY = 0x00, + NVME_DIR_STREAMS = 0x01, + NVME_DIR_SND_ID_OP_ENABLE = 0x01, + NVME_DIR_SND_ST_OP_REL_ID = 0x01, + NVME_DIR_SND_ST_OP_REL_RSC = 0x02, + NVME_DIR_RCV_ID_OP_PARAM = 0x01, + NVME_DIR_RCV_ST_OP_PARAM = 0x01, + NVME_DIR_RCV_ST_OP_STATUS = 0x02, + NVME_DIR_RCV_ST_OP_RESOURCE = 0x03, + NVME_DIR_ENDIR = 0x01, +}; + enum { NVME_NS_FEAT_THIN = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, @@ -560,6 +574,7 @@ enum { NVME_RW_PRINFO_PRCHK_APP = 1 << 11, NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRACT = 1 << 13, + NVME_RW_DTYPE_STREAMS = 1 << 4, }; struct nvme_dsm_cmd { @@ -634,6 +649,8 @@ enum nvme_admin_opcode { nvme_admin_download_fw = 0x11, nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, + nvme_admin_directive_send = 0x19, + nvme_admin_directive_recv = 0x1a, nvme_admin_dbbuf = 0x7C, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, @@ -797,6 +814,24 @@ struct nvme_get_log_page_command { __u32 rsvd14[2]; }; +struct nvme_directive_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + union nvme_data_ptr dptr; + __le32 numd; + __u8 doper; + __u8 dtype; + __le16 dspec; + __u8 endir; + __u8 tdtype; + __u16 rsvd15; + + __u32 rsvd16[3]; +}; + /* * Fabrics subcommands. */ @@ -927,6 +962,18 @@ struct nvme_dbbuf { __u32 rsvd12[6]; }; +struct streams_directive_params { + __u16 msl; + __u16 nssa; + __u16 nsso; + __u8 rsvd[10]; + __u32 sws; + __u16 sgs; + __u16 nsa; + __u16 nso; + __u8 rsvd2[6]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -947,6 +994,7 @@ struct nvme_command { struct nvmf_property_set_command prop_set; struct nvmf_property_get_command prop_get; struct nvme_dbbuf dbbuf; + struct nvme_directive_cmd directive; }; }; -- cgit v1.2.3 From 7aa1f42752f0d31a5bb6d0d5bac92fc8c2044ce2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 18 Jun 2017 16:15:59 +0300 Subject: nvme: use a single NVME_AQ_DEPTH and relax it to 32 No need to differentiate fabrics from pci/loop, also lower it to 32 as we don't really need 256 inflight admin commands. Signed-off-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/fabrics.c | 8 +------- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/pci.c | 1 - drivers/nvme/host/rdma.c | 10 +++++----- drivers/nvme/target/discovery.c | 2 +- drivers/nvme/target/loop.c | 4 +--- drivers/nvme/target/rdma.c | 2 +- include/linux/nvme.h | 2 +- 8 files changed, 11 insertions(+), 20 deletions(-) (limited to 'include/linux/nvme.h') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 7ca2d4d70aec..a59a243b81c6 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -392,13 +392,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) cmd.connect.opcode = nvme_fabrics_command; cmd.connect.fctype = nvme_fabrics_type_connect; cmd.connect.qid = 0; - - /* - * fabrics spec sets a minimum of depth 32 for admin queue, - * so set the queue with this depth always until - * justification otherwise. - */ - cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); + cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1); /* * Set keep-alive timeout in seconds granularity (ms * 1000) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5165007e86a6..5d5ecefd8dbe 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -36,7 +36,7 @@ */ #define NVME_FC_NR_AEN_COMMANDS 1 #define NVME_FC_AQ_BLKMQ_DEPTH \ - (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) + (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) #define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) enum nvme_fc_queue_flags { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2a9ee769ce9e..32a98e2740ad 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -36,7 +36,6 @@ #include "nvme.h" #define NVME_Q_DEPTH 1024 -#define NVME_AQ_DEPTH 256 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 01dc723e6acf..bc0322bf7d27 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -48,7 +48,7 @@ */ #define NVME_RDMA_NR_AEN_COMMANDS 1 #define NVME_RDMA_AQ_BLKMQ_DEPTH \ - (NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS) + (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS) struct nvme_rdma_device { struct ib_device *dev; @@ -719,7 +719,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ret) goto requeue; - ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH); + ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); if (ret) goto requeue; @@ -1291,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) * specified by the Fabrics standard. */ if (priv.qid == 0) { - priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH); - priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); + priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH); + priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1); } else { /* * current interpretation of the fabrics spec @@ -1530,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) { int error; - error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH); + error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); if (error) return error; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index c7a90384dd75..8f3b57b4c97b 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, e->portid = port->disc_addr.portid; /* we support only dynamic controllers */ e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); - e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH); + e->asqsz = cpu_to_le16(NVME_AQ_DEPTH); e->subtype = type; memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index f67606523724..86c09e2a1490 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -21,8 +21,6 @@ #include "../host/nvme.h" #include "../host/fabrics.h" -#define NVME_LOOP_AQ_DEPTH 256 - #define NVME_LOOP_MAX_SEGMENTS 256 /* @@ -31,7 +29,7 @@ */ #define NVME_LOOP_NR_AEN_COMMANDS 1 #define NVME_LOOP_AQ_BLKMQ_DEPTH \ - (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS) + (NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS) struct nvme_loop_iod { struct nvme_request nvme_req; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 9e45cde63376..32aa10b521c8 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1027,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn, queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1; queue->send_queue_size = le16_to_cpu(req->hrqsize); - if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH) + if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH) return NVME_RDMA_CM_INVALID_HSQSIZE; /* XXX: Should we enforce some kind of max for IO queues? */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f516a975bb21..6b8ee9e628e1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -87,7 +87,7 @@ enum { NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; -#define NVMF_AQ_DEPTH 32 +#define NVME_AQ_DEPTH 32 enum { NVME_REG_CAP = 0x0000, /* Controller Capabilities */ -- cgit v1.2.3 From dc1a0afbacaeaced8f5679a99047c0467f1099e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Jul 2017 11:12:09 +0200 Subject: nvme: fix byte swapping in the streams code Signed-off-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- include/linux/nvme.h | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux/nvme.h') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cb96f4a7ae3a..3b77cfe5aa1e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -336,7 +336,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = sizeof(*s); + c.directive.numd = cpu_to_le32(sizeof(*s)); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6b8ee9e628e1..bc74da018bdc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -963,14 +963,14 @@ struct nvme_dbbuf { }; struct streams_directive_params { - __u16 msl; - __u16 nssa; - __u16 nsso; + __le16 msl; + __le16 nssa; + __le16 nsso; __u8 rsvd[10]; - __u32 sws; - __u16 sgs; - __u16 nsa; - __u16 nso; + __le32 sws; + __le16 sgs; + __le16 nsa; + __le16 nso; __u8 rsvd2[6]; }; -- cgit v1.2.3