From d9d53ed3f77ff4057ce714c0d169c28a653504e7 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 13 Mar 2019 18:54:54 +0100 Subject: nvme: add get-feature to admin cmds tracer This will print get-feature cmd in more informative way. For example, run "nvme get-feature /dev/nvme0 -n 1 -f 0x9 -c 10" will trace: nvme-3907 [008] .... 1763.635054: nvme_setup_cmd: nvme0: qid=0, cmdid=6, nsid=1, flags=0x0, meta=0x0, cmd=(nvme_admin_get_features fid=0x9 sel=0x0 cdw11=0xa) -0 [001] d.h. 1763.635112: nvme_sq: nvme0: qid=0, head=27, tail=27 -0 [008] ..s. 1763.635121: nvme_complete_rq: nvme0: qid=0, cmdid=6, res=10, retries=0, flags=0x2, status=0 Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/trace.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 58456de78bb2..5f24ea7a28eb 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -50,7 +50,19 @@ static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10) return ret; } +static const char *nvme_trace_admin_get_features(struct trace_seq *p, + u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 fid = cdw10[0]; + u8 sel = cdw10[1] & 0x7; + u32 cdw11 = get_unaligned_le32(cdw10 + 4); + + trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11); + trace_seq_putc(p, 0); + return ret; +} static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10) { @@ -101,6 +113,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, return nvme_trace_create_cq(p, cdw10); case nvme_admin_identify: return nvme_trace_admin_identify(p, cdw10); + case nvme_admin_get_features: + return nvme_trace_admin_get_features(p, cdw10); default: return nvme_trace_common(p, cdw10); } -- cgit v1.2.3 From 415df90b437f2b026ed37af2f812e41fc06c7f90 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Mar 2019 18:54:55 +0100 Subject: nvme: don't warn on block content change effects A write or flush IO passthrough command is expected to change the logical block content, so don't warn on these as no additional handling is necessary. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 07bf2bff3a76..dc1641247b17 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1250,7 +1250,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (ns) { if (ctrl->effects) effects = le32_to_cpu(ctrl->effects->iocs[opcode]); - if (effects & ~NVME_CMD_EFFECTS_CSUPP) + if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) dev_warn(ctrl->device, "IO command:%02x has unhandled effects:%08x\n", opcode, effects); -- cgit v1.2.3 From 81fe92849928d65159d707b7b28febffbef94559 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Mar 2019 18:54:56 +0100 Subject: nvme-trace: fix cdw10 buffer overrun The field is defined to be a 24 byte array, we don't need to multiply the sizeof() that field by the number of dwords it covers. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 244d7c177e5a..97d3c77365b8 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -108,7 +108,7 @@ TRACE_EVENT(nvme_setup_cmd, __entry->metadata = le64_to_cpu(cmd->common.metadata); __assign_disk_name(__entry->disk, req->rq_disk); memcpy(__entry->cdw10, &cmd->common.cdw10, - 6 * sizeof(__entry->cdw10)); + sizeof(__entry->cdw10)); ), TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", __entry->ctrl_id, __print_disk_name(__entry->disk), -- cgit v1.2.3 From a63b83700ba89c300f705167d06bf122f3666287 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Mar 2019 18:54:57 +0100 Subject: nvme: put ns_head ref if namespace fails allocation In case nvme_alloc_ns fails after we initialize ns_head but before we add the ns to the controller namespaces list we need to explicitly put the ns_head reference because when we teardown the controller we won't find it, causing us to leak a dangling subsystem eventually. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc1641247b17..d57a84f45ed0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3304,6 +3304,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); mutex_unlock(&ctrl->subsys->lock); + nvme_put_ns_head(ns->head); out_free_id: kfree(id); out_free_queue: -- cgit v1.2.3 From 01fc08ff1f2f3f17d5947f18e62ed93c391aa3ce Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Wed, 13 Mar 2019 18:54:58 +0100 Subject: nvme: update comment to make the code easier to read After commit a686ed75c0fb ("nvme: introduce a helper function for controller deletion), nvme_delete_ctrl_sync no longer use flush_work. Update comment, accordingly. Signed-off-by: Yufen Yu Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d57a84f45ed0..b92fab434066 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -179,8 +179,8 @@ static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) int ret = 0; /* - * Keep a reference until the work is flushed since ->delete_ctrl - * can free the controller. + * Keep a reference until nvme_do_delete_ctrl() complete, + * since ->delete_ctrl can free the controller. */ nvme_get_ctrl(ctrl); if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) -- cgit v1.2.3 From d11de63f2b519f0a162b834013b6d3a46dbf3886 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Wed, 13 Mar 2019 18:54:59 +0100 Subject: nvme-loop: init nvmet_ctrl fatal_err_work when allocate After commit 4d43d395fe (workqueue: Try to catch flush_work() without INIT_WORK()), it can cause warning when delete nvme-loop device, trace like: [ 76.601272] Call Trace: [ 76.601646] ? del_timer+0x72/0xa0 [ 76.602156] __cancel_work_timer+0x1ae/0x270 [ 76.602791] cancel_work_sync+0x14/0x20 [ 76.603407] nvmet_ctrl_free+0x1b7/0x2f0 [nvmet] [ 76.604091] ? free_percpu+0x168/0x300 [ 76.604652] nvmet_sq_destroy+0x106/0x240 [nvmet] [ 76.605346] nvme_loop_destroy_admin_queue+0x30/0x60 [nvme_loop] [ 76.606220] nvme_loop_shutdown_ctrl+0xc3/0xf0 [nvme_loop] [ 76.607026] nvme_loop_delete_ctrl_host+0x19/0x30 [nvme_loop] [ 76.607871] nvme_do_delete_ctrl+0x75/0xb0 [ 76.608477] nvme_sysfs_delete+0x7d/0xc0 [ 76.609057] dev_attr_store+0x24/0x40 [ 76.609603] sysfs_kf_write+0x4c/0x60 [ 76.610144] kernfs_fop_write+0x19a/0x260 [ 76.610742] __vfs_write+0x1c/0x60 [ 76.611246] vfs_write+0xfa/0x280 [ 76.611739] ksys_write+0x6e/0x120 [ 76.612238] __x64_sys_write+0x1e/0x30 [ 76.612787] do_syscall_64+0xbf/0x3a0 [ 76.613329] entry_SYSCALL_64_after_hwframe+0x44/0xa9 We fix it by moving fatal_err_work init to nvmet_alloc_ctrl(), which may more reasonable. Signed-off-by: Yufen Yu Reviewed-by: Sagi Grimberg Reviewed-by: Bart Van Assche Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/core.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index d44ede147263..2d73b66e3686 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1163,6 +1163,15 @@ static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) put_device(ctrl->p2p_client); } +static void nvmet_fatal_error_handler(struct work_struct *work) +{ + struct nvmet_ctrl *ctrl = + container_of(work, struct nvmet_ctrl, fatal_err_work); + + pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); + ctrl->ops->delete_ctrl(ctrl); +} + u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) { @@ -1205,6 +1214,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); INIT_LIST_HEAD(&ctrl->async_events); INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); + INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); @@ -1308,21 +1318,11 @@ void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) kref_put(&ctrl->ref, nvmet_ctrl_free); } -static void nvmet_fatal_error_handler(struct work_struct *work) -{ - struct nvmet_ctrl *ctrl = - container_of(work, struct nvmet_ctrl, fatal_err_work); - - pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); - ctrl->ops->delete_ctrl(ctrl); -} - void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) { mutex_lock(&ctrl->lock); if (!(ctrl->csts & NVME_CSTS_CFS)) { ctrl->csts |= NVME_CSTS_CFS; - INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); schedule_work(&ctrl->fatal_err_work); } mutex_unlock(&ctrl->lock); -- cgit v1.2.3 From 9f7d8ae2f79479ce13d987c8f3b1500b8937fc5d Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 13 Mar 2019 18:55:00 +0100 Subject: nvme-fc: use nr_phys_segments to determine existence of sgl For some nvme command, when issued by the nvme core layer, there is an internal buffer which can cause blk_rq_payload_bytes() to return a non-zero value yet there is no actual/real command payload and sg list. An example is the WRITE ZEROES command. To address this, when making choices on whether to dma map an sgl, use blk_rq_nr_phys_segments() instead of blk_rq_payload_bytes(). When there is a sgl, blk_rq_payload_bytes() will return the amount of data to be transferred by the sgl. Signed-off-by: Chaitanya Kulkarni Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b29b12498a1a..ba8f2a9cbdaf 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2107,7 +2107,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, freq->sg_cnt = 0; - if (!blk_rq_payload_bytes(rq)) + if (!blk_rq_nr_phys_segments(rq)) return 0; freq->sg_table.sgl = freq->first_sgl; @@ -2304,12 +2304,23 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; - data_len = blk_rq_payload_bytes(rq); - if (data_len) + /* + * nvme core doesn't quite treat the rq opaquely. Commands such + * as WRITE ZEROES will return a non-zero rq payload_bytes yet + * there is no actual payload to be transferred. + * To get it right, key data transmission on there being 1 or + * more physical segments in the sg list. If there is no + * physical segments, there is no payload. + */ + if (blk_rq_nr_phys_segments(rq)) { + data_len = blk_rq_payload_bytes(rq); io_dir = ((rq_data_dir(rq) == WRITE) ? NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); - else + } else { + data_len = 0; io_dir = NVMEFC_FCP_NODATA; + } + return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); } -- cgit v1.2.3 From 06f3d71ea071b70e62bcc146cd9ff7ed1f9d4e43 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 13 Mar 2019 18:55:01 +0100 Subject: nvme-fc: fix numa_node when dev is null A recent change added a numa_node field to the nvme controller and has the transport assign the node using dev_to_node(). However, fcloop registers with a NULL device struct, so the dev_to_node() call oops. Revise the assignment to assign no node when device struct is null. Fixes: 103e515efa89b ("nvme: add a numa_node field to struct nvme_ctrl") Reported-by: Mike Snitzer Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Mike Snitzer [hch: small coding style fixup] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index ba8f2a9cbdaf..23f6bad19274 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3017,7 +3017,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; - ctrl->ctrl.numa_node = dev_to_node(lport->dev); + if (lport->dev) + ctrl->ctrl.numa_node = dev_to_node(lport->dev); + else + ctrl->ctrl.numa_node = NUMA_NO_NODE; INIT_LIST_HEAD(&ctrl->ctrl_list); ctrl->lport = lport; ctrl->rport = rport; -- cgit v1.2.3 From 834d3710a093aa18c8aa88e6e1892180abadebaf Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 13 Mar 2019 18:55:02 +0100 Subject: nvme-fc: reject reconnect if io queue count is reduced to zero If: - A successful connect has occurred with an io queue count greater than zero and namespaces detected and running. - An error or something occurs which causes a termination of the prior association and then starts a reconnect, - The reconnect then creates a new controller, but for whatever reason, nvme_set_queue_count() results in io queue count set to zero. This will skip io queue and tag set changes. - But... the controller will transition to live, calling nvme_start_ctrl, which calls nvme_start_queues(), which then releases I/Os into the transport which then sends them to the driver. As there are no queues, things eventually hit the driver looking for a handle, which was cleared when the original controller was reset, and it can't proceed. Worst case, things progress, but everything fails. In the failing scenario, the nvme_set_features(NVME_FEAT_NUM_QUEUES) command actually failed with a NVME_SC_INTERNAL error. For some reason, although nvme_set_queue_count() saw the error and set io queue count to zero, it doesn't return a failure status to the transport, which allows the transport to continue using the controller. Fix the problem by simply rejecting the new association if at least 1 I/O queue can't be created. The association reject will fail the reconnect attempt and fall into the reconnect retry policy. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 23f6bad19274..f3b9d91ba0df 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2475,6 +2475,7 @@ static int nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1; unsigned int nr_io_queues; int ret; @@ -2487,6 +2488,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) return ret; } + if (!nr_io_queues && prior_ioq_cnt) { + dev_info(ctrl->ctrl.device, + "Fail Reconnect: At least 1 io queue " + "required (was %d)\n", prior_ioq_cnt); + return -ENOSPC; + } + ctrl->ctrl.queue_count = nr_io_queues + 1; /* check for io queues existing */ if (ctrl->ctrl.queue_count == 1) @@ -2500,6 +2508,10 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ret) goto out_delete_hw_queues; + if (prior_ioq_cnt != nr_io_queues) + dev_info(ctrl->ctrl.device, + "reconnect: revising io queue count from %d to %d\n", + prior_ioq_cnt, nr_io_queues); blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); return 0; -- cgit v1.2.3 From 0191e7405b687339a5540c1562acdecefd70eb3f Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 13 Mar 2019 18:55:03 +0100 Subject: nvmet-fc: fix issues with targetport assoc_list list walking There are two changes: 1) The logic in the __nvmet_fc_free_assoc() routine is bad. It uses "safe" routines assuming pointers will come back valid. However, the intervening next structure being linked can be removed from the list and the resulting safe pointers are bad, resulting in NULL ptrs being hit. Correct by scheduling a work element to perform the association delete, which can be done while under lock. 2) Prior patch that added the work element scheduling left a possible reference on the object if the work element couldn't be scheduled. Correct by doing the put on a failing schedule_work() call. Signed-off-by: Nigel Kirkland Signed-off-by: James Smart Reviewed-by: Ewan D. Milne Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1e9654f04c60..6b7bbf39fa06 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1143,10 +1143,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); - spin_lock_irqsave(&tgtport->lock, flags); + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); } @@ -1185,7 +1183,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - schedule_work(&assoc->del_work); + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); return; } -- cgit v1.2.3 From 404ec31df434fdae515202952b5e230c1b983ee1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 13 Mar 2019 18:55:04 +0100 Subject: nvmet-fc: bring Disconnect into compliance with FC-NVME spec The FC-NVME spec, when finally approved, modified the disconnect LS such that the only scope available is the association. Rework the Disconnect LS processing to be in accordance with the change. Signed-off-by: Nigel Kirkland Signed-off-by: James Smart Reviewed-by: Ewan D. Milne Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 6b7bbf39fa06..98b7b1f4ee96 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1502,10 +1502,8 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf; struct fcnvme_ls_disconnect_acc *acc = (struct fcnvme_ls_disconnect_acc *)iod->rspbuf; - struct nvmet_fc_tgt_queue *queue = NULL; struct nvmet_fc_tgt_assoc *assoc; int ret = 0; - bool del_assoc = false; memset(acc, 0, sizeof(*acc)); @@ -1536,18 +1534,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc = nvmet_fc_find_target_assoc(tgtport, be64_to_cpu(rqst->associd.association_id)); iod->assoc = assoc; - if (assoc) { - if (rqst->discon_cmd.scope == - FCNVME_DISCONN_CONNECTION) { - queue = nvmet_fc_find_target_queue(tgtport, - be64_to_cpu( - rqst->discon_cmd.id)); - if (!queue) { - nvmet_fc_tgt_a_put(assoc); - ret = VERR_NO_CONN; - } - } - } else + if (!assoc) ret = VERR_NO_ASSOC; } @@ -1575,26 +1562,10 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_acc)), FCNVME_LS_DISCONNECT); - - /* are we to delete a Connection ID (queue) */ - if (queue) { - int qid = queue->qid; - - nvmet_fc_delete_target_queue(queue); - - /* release the get taken by find_target_queue */ - nvmet_fc_tgt_q_put(queue); - - /* tear association down if io queue terminated */ - if (!qid) - del_assoc = true; - } - /* release get taken in nvmet_fc_find_target_assoc */ nvmet_fc_tgt_a_put(iod->assoc); - if (del_assoc) - nvmet_fc_delete_target_assoc(iod->assoc); + nvmet_fc_delete_target_assoc(iod->assoc); } -- cgit v1.2.3 From 7b210e4ed5e281728243799c5e2b84d3f70d4dd1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Mar 2019 18:55:05 +0100 Subject: nvme: disable Write Zeroes for qemu controllers Qemu started out with a broken implementation of Write Zeroes written by yours truly. Disable Write Zeroes on qemu for now, eventually we need to go back and make all the qemu quirks version specific, but that is left for another time. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 3 ++- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b92fab434066..951e9f31b57c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1531,7 +1531,8 @@ static inline void nvme_config_write_zeroes(struct nvme_ns *ns) u32 max_sectors; unsigned short bs = 1 << ns->lba_shift; - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES)) + if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || + (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) return; /* * Even though NVMe spec explicitly states that MDTS is not diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b91f1838bbd5..527d64545023 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -87,6 +87,11 @@ enum nvme_quirks { * Ignore device provided subnqn. */ NVME_QUIRK_IGNORE_DEV_SUBNQN = (1 << 8), + + /* + * Broken Write Zeroes. + */ + NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f54718b63637..3a2377888a46 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2975,7 +2975,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ - .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, + .driver_data = NVME_QUIRK_IDENTIFY_CNS | + NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ -- cgit v1.2.3 From b1aafb35b45b1d734c670059c125a4ff111a47bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Mar 2019 18:55:06 +0100 Subject: nvme: remove nvme_ns_config_oncs Just opencode the two function calls in the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Max Gurtovoy Reviewed-by: Chaitanya Kulkarni Tested-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 951e9f31b57c..26ae805fc958 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1552,12 +1552,6 @@ static inline void nvme_config_write_zeroes(struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(ns->queue, max_sectors); } -static inline void nvme_ns_config_oncs(struct nvme_ns *ns) -{ - nvme_config_discard(ns); - nvme_config_write_zeroes(ns); -} - static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, struct nvme_id_ns *id, struct nvme_ns_ids *ids) { @@ -1611,7 +1605,9 @@ static void nvme_update_disk_info(struct gendisk *disk, capacity = 0; set_capacity(disk, capacity); - nvme_ns_config_oncs(ns); + + nvme_config_discard(ns); + nvme_config_write_zeroes(ns); if (id->nsattr & (1 << 0)) set_disk_ro(disk, true); -- cgit v1.2.3 From 2631857160ecbea04e54423f5053133fe2b6ea45 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Mar 2019 18:55:07 +0100 Subject: nvme: add proper discard setup for the multipath device Add a gendisk argument to nvme_config_discard so that the call to nvme_update_disk_info for the multipath device node updates the proper request_queue. Signed-off-by: Christoph Hellwig Reported-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Max Gurtovoy Tested-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 26ae805fc958..6a57ece7d76b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1495,10 +1495,10 @@ static void nvme_set_chunk_size(struct nvme_ns *ns) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); } -static void nvme_config_discard(struct nvme_ns *ns) +static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) { struct nvme_ctrl *ctrl = ns->ctrl; - struct request_queue *queue = ns->queue; + struct request_queue *queue = disk->queue; u32 size = queue_logical_block_size(queue); if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { @@ -1606,7 +1606,7 @@ static void nvme_update_disk_info(struct gendisk *disk, set_capacity(disk, capacity); - nvme_config_discard(ns); + nvme_config_discard(disk, ns); nvme_config_write_zeroes(ns); if (id->nsattr & (1 << 0)) -- cgit v1.2.3 From 9f0916ab932f676c042d4592a235a895847484f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Mar 2019 18:55:08 +0100 Subject: nvme: add proper write zeroes setup for the multipath device Add a gendisk argument to nvme_config_write_zeroes so that the call to nvme_update_disk_info for the multipath device node updates the proper request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Max Gurtovoy Tested-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6a57ece7d76b..470601980794 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1526,7 +1526,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static inline void nvme_config_write_zeroes(struct nvme_ns *ns) +static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) { u32 max_sectors; unsigned short bs = 1 << ns->lba_shift; @@ -1549,7 +1549,7 @@ static inline void nvme_config_write_zeroes(struct nvme_ns *ns) else max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9; - blk_queue_max_write_zeroes_sectors(ns->queue, max_sectors); + blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); } static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, @@ -1607,7 +1607,7 @@ static void nvme_update_disk_info(struct gendisk *disk, set_capacity(disk, capacity); nvme_config_discard(disk, ns); - nvme_config_write_zeroes(ns); + nvme_config_write_zeroes(disk, ns); if (id->nsattr & (1 << 0)) set_disk_ro(disk, true); -- cgit v1.2.3 From 005c674f705ee308e23b8e4e7047419d12122fde Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Mar 2019 18:55:09 +0100 Subject: nvmet: ignore EOPNOTSUPP for discard NVMe DSM is a pure hint, so if the underlying device / file system does not support discard-like operations we should not fail the operation but rather return success. Fixes: 3b031d15995f ("nvmet: add error log support for bdev backend") Signed-off-by: Christoph Hellwig Reviewed by: Chaitanya Kulkarni Tested-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/io-cmd-bdev.c | 8 ++++---- drivers/nvme/target/io-cmd-file.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 71dfedbadc26..a065dbfc43b1 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -194,11 +194,11 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req, le64_to_cpu(range->slba) << (ns->blksize_shift - 9), le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), GFP_KERNEL, 0, bio); - - if (ret) + if (ret && ret != -EOPNOTSUPP) { req->error_slba = le64_to_cpu(range->slba); - - return blk_to_nvme_status(req, errno_to_blk_status(ret)); + return blk_to_nvme_status(req, errno_to_blk_status(ret)); + } + return NVME_SC_SUCCESS; } static void nvmet_bdev_execute_discard(struct nvmet_req *req) diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 517522305e5c..3e43212d3c1c 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -297,7 +297,7 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) } ret = vfs_fallocate(req->ns->file, mode, offset, len); - if (ret) { + if (ret && ret != -EOPNOTSUPP) { req->error_slba = le64_to_cpu(range.slba); status = errno_to_nvme_status(req, ret); break; -- cgit v1.2.3 From 602d674ce90f64ac135452fb9b2b058acb53b226 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Mar 2019 18:55:10 +0100 Subject: nvme-tcp: support C2HData with SUCCESS flag A C2HData PDU with the SUCCESS flag set indicates that the I/O was completed by the controller successfully and means that a subsequent completion response capsule PDU will be ommitted. If we see this flag, fisrt we check that LAST_PDU flag is set as well, and then we complete the request when the data transfer (and data digest verification if its on) is done. While we're at it, reuse a bit of code with nvme_fail_request. Reported-by: Steve Blightman Suggested-by: Oliver Smith-Denny Signed-off-by: Sagi Grimberg Reviewed-by: Oliver Smith-Denny Tested-by: Oliver Smith-Denny Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 208ee518af65..e7e08889865e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -463,6 +463,15 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, queue->data_remaining = le32_to_cpu(pdu->data_length); + if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS && + unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) { + dev_err(queue->ctrl->ctrl.device, + "queue %d tag %#x SUCCESS set but not last PDU\n", + nvme_tcp_queue_id(queue), rq->tag); + nvme_tcp_error_recovery(&queue->ctrl->ctrl); + return -EPROTO; + } + return 0; } @@ -618,6 +627,14 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return ret; } +static inline void nvme_tcp_end_request(struct request *rq, __le16 status) +{ + union nvme_result res = {}; + + nvme_end_request(rq, cpu_to_le16(status << 1), res); +} + + static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { @@ -685,6 +702,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; } else { + if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) + nvme_tcp_end_request(rq, NVME_SC_SUCCESS); nvme_tcp_init_recv_ctx(queue); } } @@ -695,6 +714,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { + struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; char *ddgst = (char *)&queue->recv_ddgst; size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; @@ -718,6 +738,13 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, return -EIO; } + if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { + struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), + pdu->command_id); + + nvme_tcp_end_request(rq, NVME_SC_SUCCESS); + } + nvme_tcp_init_recv_ctx(queue); return 0; } @@ -815,10 +842,7 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static void nvme_tcp_fail_request(struct nvme_tcp_request *req) { - union nvme_result res = {}; - - nvme_end_request(blk_mq_rq_from_pdu(req), - cpu_to_le16(NVME_SC_DATA_XFER_ERROR), res); + nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_DATA_XFER_ERROR); } static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) -- cgit v1.2.3