From 6fb271f1bc4ebc59bac0ff835c2e5197eac4b075 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 21 Jul 2022 08:33:58 +0800 Subject: nvme-fc: restart admin queue if the caller needs to restart queue Without restarting admin queue in __nvme_fc_abort_outstanding_ios(), it leaves controller not capable of handling admin pt request, and causes io hang. Fixes it by restarting admin queue if the caller of __nvme_fc_abort_outstanding_ios requires to restart queue. Signed-off-by: Ming Lei Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Tested-by: Ewan D. Milne Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9987797620b6..8d14df8eeab8 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2533,6 +2533,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); + if (start_queues) + nvme_start_admin_queue(&ctrl->ctrl); } static void -- cgit v1.2.3 From 9317d0014499182c77a03cd095e83bcfb0f53750 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Aug 2022 10:29:55 +0200 Subject: nvme-fc: fix the fc_appid_store return value "nvme-fc: fold t fc_update_appid into fc_appid_store" accidentally changed the userspace interface for the appid attribute, because the code that decrements "count" to remove a trailing '\n' in the parsing results in the decremented value being incorrectly be returned from the sysfs write. Fix this by keeping an orig_count variable for the full length of the write. Fixes: c814153c83a8 ("nvme-fc: fold t fc_update_appid into fc_appid_store") Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ewan D. Milne Reviewed-by: James Smart Tested-by: Muneendra Kumar M --- drivers/nvme/host/fc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8d14df8eeab8..127abaf9ba5d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3880,6 +3880,7 @@ static int fc_parse_cgrpid(const char *buf, u64 *id) static ssize_t fc_appid_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + size_t orig_count = count; u64 cgrp_id; int appid_len = 0; int cgrpid_len = 0; @@ -3904,7 +3905,7 @@ static ssize_t fc_appid_store(struct device *dev, ret = blkcg_set_fc_appid(app_id, cgrp_id, sizeof(app_id)); if (ret < 0) return ret; - return count; + return orig_count; } static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store); #endif /* CONFIG_BLK_CGROUP_FC_APPID */ -- cgit v1.2.3 From 14446f9abd609791064d222ccf3c7b3af1772358 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 26 Jul 2022 10:56:32 +0800 Subject: nvmet-auth: use kmemdup instead of kmalloc + memcpy For code neat purpose, we can use kmemdup to replace kmalloc + memcpy. Signed-off-by: Zhang Xiaoxu Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fabrics-cmd-auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index c851814d6cb0..ebdf9aa81041 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -160,10 +160,10 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d) pr_debug("%s: ctrl %d qid %d host authenticated\n", __func__, ctrl->cntlid, req->sq->qid); if (data->cvalid) { - req->sq->dhchap_c2 = kmalloc(data->hl, GFP_KERNEL); + req->sq->dhchap_c2 = kmemdup(data->rval + data->hl, data->hl, + GFP_KERNEL); if (!req->sq->dhchap_c2) return NVME_AUTH_DHCHAP_FAILURE_FAILED; - memcpy(req->sq->dhchap_c2, data->rval + data->hl, data->hl); pr_debug("%s: ctrl %d qid %d challenge %*ph\n", __func__, ctrl->cntlid, req->sq->qid, data->hl, -- cgit v1.2.3 From ec9e96b5230148294c7abcaf3a4c592d3720b62d Mon Sep 17 00:00:00 2001 From: Amit Engel Date: Mon, 1 Aug 2022 21:40:39 +0300 Subject: nvme-fabrics: parse nvme connect Linux error codes This fixes the assumption that errval is an unsigned nvme error Signed-off-by: Amit Engel Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5207a2348257..83b505358859 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -270,6 +270,12 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, { int err_sctype = errval & ~NVME_SC_DNR; + if (errval < 0) { + dev_err(ctrl->device, + "Connect command failed, errno: %d\n", errval); + return; + } + switch (err_sctype) { case NVME_SC_CONNECT_INVALID_PARAM: if (offset >> 16) { -- cgit v1.2.3 From c50cd03dbebd1d5d34a2eb361f315b0bd833d6c1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Aug 2022 22:15:01 +0200 Subject: nvme-fabrics: Fix a typo in an error message A 'c' is missing. s/fabris/fabrics/ Signed-off-by: Christophe JAILLET Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 83b505358859..10cc4a814602 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1236,7 +1236,7 @@ static int __init nvmf_init(void) nvmf_device = device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl"); if (IS_ERR(nvmf_device)) { - pr_err("couldn't create nvme-fabris device!\n"); + pr_err("couldn't create nvme-fabrics device!\n"); ret = PTR_ERR(nvmf_device); goto out_destroy_class; } -- cgit v1.2.3 From 2bff487f9a9085c9c877b55579b153691f0e5245 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 1 Aug 2022 10:09:00 +0200 Subject: nvme-tcp: check if the queue is allocated before stopping it When an error is detected and the host reconnects, the nvme_tcp_error_recovery_work() function is called and starts tearing down the io queues and de-allocating them; If at the same time the "nvme" process deletes the controller via sysfs, the nvme_tcp_delete_ctrl() gets called and waits until the nvme_tcp_error_recovery_work() finishes its job; then starts tearing down the io queues, but at this point they have already been freed and the mutexes are destroyed. Calling mutex_lock() against a destroyed mutex triggers a warning: [ 1299.025575] nvme nvme1: Reconnecting in 10 seconds... [ 1299.636449] nvme nvme1: Removing ctrl: NQN "blktests-subsystem-1" [ 1299.645262] ------------[ cut here ]------------ [ 1299.649949] DEBUG_LOCKS_WARN_ON(lock->magic != lock) [ 1299.649971] WARNING: CPU: 4 PID: 104150 at kernel/locking/mutex.c:579 __mutex_lock+0x2d0/0x7dc [ 1299.717934] CPU: 4 PID: 104150 Comm: nvme [ 1299.828075] Call trace: [ 1299.830526] __mutex_lock+0x2d0/0x7dc [ 1299.834203] mutex_lock_nested+0x64/0xd4 [ 1299.838139] nvme_tcp_stop_queue+0x54/0xe0 [nvme_tcp] [ 1299.843211] nvme_tcp_teardown_io_queues.part.0+0x90/0x280 [nvme_tcp] [ 1299.849672] nvme_tcp_delete_ctrl+0x6c/0xf0 [nvme_tcp] [ 1299.854831] nvme_do_delete_ctrl+0x108/0x120 [nvme_core] [ 1299.860181] nvme_sysfs_delete+0xec/0xf0 [nvme_core] [ 1299.865179] dev_attr_store+0x40/0x70 Fix the warning by checking if the queues are allocated in the nvme_tcp_stop_queue(). If they are not, it makes no sense to try to stop them. Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e82dcfcda29b..044da18c06f5 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1660,6 +1660,9 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) + return; + mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) __nvme_tcp_stop_queue(queue); -- cgit v1.2.3 From f37527a09dac324c74bb341c841096395a2f2566 Mon Sep 17 00:00:00 2001 From: "Dennis P. Kliem" Date: Thu, 11 Aug 2022 12:56:57 +0200 Subject: nvme-pci: add NVME_QUIRK_BOGUS_NID for ADATA XPG GAMMIX S70 ADATA XPG GAMMIX S70 reports bogus eui64 values that appear to be the same across all drives. Quirk them out so they are not marked as "non globally unique" duplicates. Signed-off-by: Dennis P. Kliem Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 71a4f26ba476..a222caa1ab00 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3516,6 +3516,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1dbe, 0x5236), /* ADATA XPG GAMMIX S70 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */ -- cgit v1.2.3 From aa0c680c3aa96a5f9f160d90dd95402ad578e2b0 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Thu, 11 Aug 2022 20:23:37 -0300 Subject: block: Do not call blk_put_queue() if gendisk allocation fails Commit 6f8191fdf41d ("block: simplify disk shutdown") removed the call to blk_get_queue() during gendisk allocation but missed to remove the corresponding cleanup code blk_put_queue() for it. Thus, if the gendisk allocation fails, the request_queue refcount gets decremented and reaches 0, causing blk_mq_release() to be called with a hctx still alive. That triggers a WARNING report, as found by syzkaller: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 23016 at block/blk-mq.c:3881 blk_mq_release+0xf8/0x3e0 block/blk-mq.c:3881 [...] stripped RIP: 0010:blk_mq_release+0xf8/0x3e0 block/blk-mq.c:3881 [...] stripped Call Trace: blk_release_queue+0x153/0x270 block/blk-sysfs.c:780 kobject_cleanup lib/kobject.c:673 [inline] kobject_release lib/kobject.c:704 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x1c8/0x540 lib/kobject.c:721 __alloc_disk_node+0x4f7/0x610 block/genhd.c:1388 __blk_mq_alloc_disk+0x13b/0x1f0 block/blk-mq.c:3961 loop_add+0x3e2/0xaf0 drivers/block/loop.c:1978 loop_control_ioctl+0x133/0x620 drivers/block/loop.c:2150 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] stripped Fixes: 6f8191fdf41d ("block: simplify disk shutdown") Reported-by: syzbot+31c9594f6e43b9289b25@syzkaller.appspotmail.com Suggested-by: Hillf Danton Signed-off-by: Rafael Mendonca Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220811232338.254673-1-rafaelmendsr@gmail.com Signed-off-by: Jens Axboe --- block/genhd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index b901fea1d55a..d36fabf0abc1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1341,7 +1341,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); if (!disk) - goto out_put_queue; + return NULL; if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, 0)) goto out_free_disk; @@ -1390,8 +1390,6 @@ out_free_bioset: bioset_exit(&disk->bio_split); out_free_disk: kfree(disk); -out_put_queue: - blk_put_queue(q); return NULL; } -- cgit v1.2.3