From 6fb271f1bc4ebc59bac0ff835c2e5197eac4b075 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 21 Jul 2022 08:33:58 +0800
Subject: nvme-fc: restart admin queue if the caller needs to restart queue

Without restarting admin queue in __nvme_fc_abort_outstanding_ios(),
it leaves controller not capable of handling admin pt request, and
causes io hang.

Fixes it by restarting admin queue if the caller of __nvme_fc_abort_outstanding_ios
requires to restart queue.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: James Smart <jsmart2021@gmail.com>
Tested-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9987797620b6..8d14df8eeab8 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2533,6 +2533,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 	blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
+	if (start_queues)
+		nvme_start_admin_queue(&ctrl->ctrl);
 }
 
 static void
-- 
cgit v1.2.3


From 9317d0014499182c77a03cd095e83bcfb0f53750 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 6 Aug 2022 10:29:55 +0200
Subject: nvme-fc: fix the fc_appid_store return value

"nvme-fc: fold t fc_update_appid into fc_appid_store" accidentally
changed the userspace interface for the appid attribute, because the code
that decrements "count" to remove a trailing '\n' in the parsing results
in the decremented value being incorrectly be returned from the sysfs
write.  Fix this by keeping an orig_count variable for the full length
of the write.

Fixes: c814153c83a8 ("nvme-fc: fold t fc_update_appid into fc_appid_store")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: James Smart <jsmart2021@gmail.com>
Tested-by:  Muneendra Kumar M <muneendra.kumar@broadcom.com>
---
 drivers/nvme/host/fc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 8d14df8eeab8..127abaf9ba5d 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3880,6 +3880,7 @@ static int fc_parse_cgrpid(const char *buf, u64 *id)
 static ssize_t fc_appid_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
+	size_t orig_count = count;
 	u64 cgrp_id;
 	int appid_len = 0;
 	int cgrpid_len = 0;
@@ -3904,7 +3905,7 @@ static ssize_t fc_appid_store(struct device *dev,
 	ret = blkcg_set_fc_appid(app_id, cgrp_id, sizeof(app_id));
 	if (ret < 0)
 		return ret;
-	return count;
+	return orig_count;
 }
 static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store);
 #endif /* CONFIG_BLK_CGROUP_FC_APPID */
-- 
cgit v1.2.3


From 14446f9abd609791064d222ccf3c7b3af1772358 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Date: Tue, 26 Jul 2022 10:56:32 +0800
Subject: nvmet-auth: use kmemdup instead of kmalloc + memcpy

For code neat purpose, we can use kmemdup to replace
kmalloc + memcpy.

Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fabrics-cmd-auth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c
index c851814d6cb0..ebdf9aa81041 100644
--- a/drivers/nvme/target/fabrics-cmd-auth.c
+++ b/drivers/nvme/target/fabrics-cmd-auth.c
@@ -160,10 +160,10 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d)
 	pr_debug("%s: ctrl %d qid %d host authenticated\n",
 		 __func__, ctrl->cntlid, req->sq->qid);
 	if (data->cvalid) {
-		req->sq->dhchap_c2 = kmalloc(data->hl, GFP_KERNEL);
+		req->sq->dhchap_c2 = kmemdup(data->rval + data->hl, data->hl,
+					     GFP_KERNEL);
 		if (!req->sq->dhchap_c2)
 			return NVME_AUTH_DHCHAP_FAILURE_FAILED;
-		memcpy(req->sq->dhchap_c2, data->rval + data->hl, data->hl);
 
 		pr_debug("%s: ctrl %d qid %d challenge %*ph\n",
 			 __func__, ctrl->cntlid, req->sq->qid, data->hl,
-- 
cgit v1.2.3


From ec9e96b5230148294c7abcaf3a4c592d3720b62d Mon Sep 17 00:00:00 2001
From: Amit Engel <amit.engel@dell.com>
Date: Mon, 1 Aug 2022 21:40:39 +0300
Subject: nvme-fabrics: parse nvme connect Linux error codes

This fixes the assumption that errval is an unsigned nvme error

Signed-off-by: Amit Engel <amit.engel@dell.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5207a2348257..83b505358859 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -270,6 +270,12 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 {
 	int err_sctype = errval & ~NVME_SC_DNR;
 
+	if (errval < 0) {
+		dev_err(ctrl->device,
+			"Connect command failed, errno: %d\n", errval);
+		return;
+	}
+
 	switch (err_sctype) {
 	case NVME_SC_CONNECT_INVALID_PARAM:
 		if (offset >> 16) {
-- 
cgit v1.2.3


From c50cd03dbebd1d5d34a2eb361f315b0bd833d6c1 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 6 Aug 2022 22:15:01 +0200
Subject: nvme-fabrics: Fix a typo in an error message

A 'c' is missing.
s/fabris/fabrics/

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 83b505358859..10cc4a814602 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -1236,7 +1236,7 @@ static int __init nvmf_init(void)
 	nvmf_device =
 		device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
 	if (IS_ERR(nvmf_device)) {
-		pr_err("couldn't create nvme-fabris device!\n");
+		pr_err("couldn't create nvme-fabrics device!\n");
 		ret = PTR_ERR(nvmf_device);
 		goto out_destroy_class;
 	}
-- 
cgit v1.2.3


From 2bff487f9a9085c9c877b55579b153691f0e5245 Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Mon, 1 Aug 2022 10:09:00 +0200
Subject: nvme-tcp: check if the queue is allocated before stopping it

When an error is detected and the host reconnects, the
nvme_tcp_error_recovery_work() function is called and starts
tearing down the io queues and de-allocating them;
If at the same time the "nvme" process deletes the controller via sysfs,
the nvme_tcp_delete_ctrl() gets called and waits until the
nvme_tcp_error_recovery_work() finishes its job; then starts
tearing down the io queues, but at this point they have already
been freed and the mutexes are destroyed.

Calling mutex_lock() against a destroyed mutex triggers a warning:

[ 1299.025575] nvme nvme1: Reconnecting in 10 seconds...
[ 1299.636449] nvme nvme1: Removing ctrl: NQN "blktests-subsystem-1"
[ 1299.645262] ------------[ cut here ]------------
[ 1299.649949] DEBUG_LOCKS_WARN_ON(lock->magic != lock)
[ 1299.649971] WARNING: CPU: 4 PID: 104150 at kernel/locking/mutex.c:579 __mutex_lock+0x2d0/0x7dc

[ 1299.717934] CPU: 4 PID: 104150 Comm: nvme
[ 1299.828075] Call trace:
[ 1299.830526]  __mutex_lock+0x2d0/0x7dc
[ 1299.834203]  mutex_lock_nested+0x64/0xd4
[ 1299.838139]  nvme_tcp_stop_queue+0x54/0xe0 [nvme_tcp]
[ 1299.843211]  nvme_tcp_teardown_io_queues.part.0+0x90/0x280 [nvme_tcp]
[ 1299.849672]  nvme_tcp_delete_ctrl+0x6c/0xf0 [nvme_tcp]
[ 1299.854831]  nvme_do_delete_ctrl+0x108/0x120 [nvme_core]
[ 1299.860181]  nvme_sysfs_delete+0xec/0xf0 [nvme_core]
[ 1299.865179]  dev_attr_store+0x40/0x70

Fix the warning by checking if the queues are allocated
in the nvme_tcp_stop_queue(). If they are not, it makes no
sense to try to stop them.

Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index e82dcfcda29b..044da18c06f5 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1660,6 +1660,9 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
 	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
 
+	if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+		return;
+
 	mutex_lock(&queue->queue_lock);
 	if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
 		__nvme_tcp_stop_queue(queue);
-- 
cgit v1.2.3


From f37527a09dac324c74bb341c841096395a2f2566 Mon Sep 17 00:00:00 2001
From: "Dennis P. Kliem" <dpkliem@gmail.com>
Date: Thu, 11 Aug 2022 12:56:57 +0200
Subject: nvme-pci: add NVME_QUIRK_BOGUS_NID for ADATA XPG GAMMIX S70

ADATA XPG GAMMIX S70 reports bogus eui64 values that appear to be the same
across all drives. Quirk them out so they are not marked as "non globally
unique" duplicates.

Signed-off-by: Dennis P. Kliem <dpkliem@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 71a4f26ba476..a222caa1ab00 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3516,6 +3516,8 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(0x1cc1, 0x5350),   /* ADATA XPG GAMMIX S50 */
 		.driver_data = NVME_QUIRK_BOGUS_NID, },
+	{ PCI_DEVICE(0x1dbe, 0x5236),   /* ADATA XPG GAMMIX S70 */
+		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(0x1e49, 0x0041),   /* ZHITAI TiPro7000 NVMe SSD */
 		.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
 	{ PCI_DEVICE(0xc0a9, 0x540a),   /* Crucial P2 */
-- 
cgit v1.2.3


From aa0c680c3aa96a5f9f160d90dd95402ad578e2b0 Mon Sep 17 00:00:00 2001
From: Rafael Mendonca <rafaelmendsr@gmail.com>
Date: Thu, 11 Aug 2022 20:23:37 -0300
Subject: block: Do not call blk_put_queue() if gendisk allocation fails

Commit 6f8191fdf41d ("block: simplify disk shutdown") removed the call
to blk_get_queue() during gendisk allocation but missed to remove the
corresponding cleanup code blk_put_queue() for it. Thus, if the gendisk
allocation fails, the request_queue refcount gets decremented and
reaches 0, causing blk_mq_release() to be called with a hctx still
alive. That triggers a WARNING report, as found by syzkaller:

------------[ cut here ]------------
WARNING: CPU: 0 PID: 23016 at block/blk-mq.c:3881
blk_mq_release+0xf8/0x3e0 block/blk-mq.c:3881
[...] stripped
RIP: 0010:blk_mq_release+0xf8/0x3e0 block/blk-mq.c:3881
[...] stripped
Call Trace:
 <TASK>
 blk_release_queue+0x153/0x270 block/blk-sysfs.c:780
 kobject_cleanup lib/kobject.c:673 [inline]
 kobject_release lib/kobject.c:704 [inline]
 kref_put include/linux/kref.h:65 [inline]
 kobject_put+0x1c8/0x540 lib/kobject.c:721
 __alloc_disk_node+0x4f7/0x610 block/genhd.c:1388
 __blk_mq_alloc_disk+0x13b/0x1f0 block/blk-mq.c:3961
 loop_add+0x3e2/0xaf0 drivers/block/loop.c:1978
 loop_control_ioctl+0x133/0x620 drivers/block/loop.c:2150
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:870 [inline]
 __se_sys_ioctl fs/ioctl.c:856 [inline]
 __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:856
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd
[...] stripped

Fixes: 6f8191fdf41d ("block: simplify disk shutdown")
Reported-by: syzbot+31c9594f6e43b9289b25@syzkaller.appspotmail.com
Suggested-by: Hillf Danton <hdanton@sina.com>
Signed-off-by: Rafael Mendonca <rafaelmendsr@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220811232338.254673-1-rafaelmendsr@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index b901fea1d55a..d36fabf0abc1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1341,7 +1341,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 
 	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
 	if (!disk)
-		goto out_put_queue;
+		return NULL;
 
 	if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, 0))
 		goto out_free_disk;
@@ -1390,8 +1390,6 @@ out_free_bioset:
 	bioset_exit(&disk->bio_split);
 out_free_disk:
 	kfree(disk);
-out_put_queue:
-	blk_put_queue(q);
 	return NULL;
 }
 
-- 
cgit v1.2.3