Merge tag 'nfs-for-7.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker: "New features: - XPRTRDMA: Decouple req recycling from RPC completion - NFS: Expose FMODE_NOWAIT for read-only files Bugfixes: - SUNRPC: - Fix sunrpc sysfs error handling - Fix uninitialized xprt_create_args structure - XPRTRDMA: - Harden connect and reply handling - NFS: - Fix EOF updates after fallocate/zero-range - Keep PG_UPTODATE clear after read errors in page groups - Use nfsi->rwsem to protect traversal of the file lock list - Prevent resource leak in nfs_alloc_server() - NFSv4: - Clear exception state on successful mkdir retry - Don't skip revalidate when holding a dir delegation and attrs are stale - pNFS: - Fix use-after-free in pnfs_update_layout() - Defer return_range callbacks until after inode unlock - Fix LAYOUTCOMMIT retry loop on OLD_STATEID - Reject zero-length r_addr in nfs4_decode_mp_ds_addr - NFS/flexfiles: - Reject zero-length filehandle version arrays - Fix checking if a layout is striped - Fixes for honoring FF_FLAGS_NO_IO_THRU_MDS Other cleanups and improvements: - Remove the fileid field from struct nfs_inode - Move long-delayed xprtrdma work onto the system_dfl_long_wq - Convert xprtrdma send buffer free list to an llist - Show "<redacted>" for cert_serial and privkey_serial mount options" * tag 'nfs-for-7.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (42 commits) NFS: Use common error handling code in nfs_alloc_server() NFS: Prevent resource leak in nfs_alloc_server() NFSv4/pNFS: reject zero-length r_addr in nfs4_decode_mp_ds_addr nfs: don't skip revalidate on directory delegation when attrs flagged stale xprtrdma: Return sendctx slot after Send preparation failure xprtrdma: Repost Receive buffers for malformed replies xprtrdma: Sanitize the reply credit grant after parsing xprtrdma: Fix bcall rep leak and unbounded peek xprtrdma: Resize reply buffers before reposting receives xprtrdma: Check frwr_wp_create() during connect xprtrdma: Initialize re_id before removal registration xprtrdma: Fix ep kref imbalance on ADDR_CHANGE xprtrdma: Convert send buffer free list to llist NFS: correct CONFIG_NFS_V4 macro name in #endif comment nfs: use nfsi->rwsem to protect traversal of the file lock list NFSv4.1/pNFS: fix LAYOUTCOMMIT retry loop on OLD_STATEID nfs: expose FMODE_NOWAIT for read-only files nfs: add nowait version of nfs_start_io_direct NFSv4/flexfiles: honor FF_FLAGS_NO_IO_THRU_MDS in pg_get_mirror_count_write NFSv4/flexfiles: honor FF_FLAGS_NO_IO_THRU_MDS on fatal DS connect errors ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2026-06-23 18:36:41 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2026-06-23 18:36:41 -0700
commit: 840ef6c78e6a2f694b578ecb9063241c992aaa9e (patch)
tree: d5915e31458e709297d3487482288cde25dcca70
parent: 09ca8dc7d634f69d0b43f82c244add44cf7885b4 (diff)
parent: 284ea3fb4f6715201e1d9ef3474c25e817ad70e9 (diff)
download: lwn-840ef6c78e6a2f694b578ecb9063241c992aaa9e.tar.gz
lwn-840ef6c78e6a2f694b578ecb9063241c992aaa9e.zip
35 files changed, 703 insertions, 326 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a68003c3599c..b5493a7f8f22 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4288,13 +4288,6 @@ Kernel parameters
 			Only applies if the softerr mount option is enabled,
 			and the specified value is >= 0.
 
-	nfs.enable_ino64=
-			[NFS] enable 64-bit inode numbers.
-			If zero, the NFS client will fake up a 32-bit inode
-			number for the readdir() and stat() syscalls instead
-			of returning the full 64-bit number.
-			The default is to return 64-bit inode numbers.
-
 	nfs.idmap_cache_timeout=
 			[NFS] set the maximum lifetime for idmapper cache
 			entries.
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 4ea9221ded42..10f2354ba304 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -257,6 +257,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
 	struct pnfs_layout_hdr *lo;
 	u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
 	LIST_HEAD(free_me_list);
+	bool return_range = false;
 
 	ino = nfs_layout_find_inode(clp, &args->cbl_fh, &args->cbl_stateid);
 	if (IS_ERR(ino)) {
@@ -301,13 +302,13 @@ static u32 initiate_file_draining(struct nfs_client *clp,
 		/* Embrace your forgetfulness! */
 		rv = NFS4ERR_NOMATCHING_LAYOUT;
 
-		if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
-			NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
-				&args->cbl_range);
-		}
+		return_range = true;
 	}
 unlock:
 	spin_unlock(&ino->i_lock);
+	if (return_range && NFS_SERVER(ino)->pnfs_curr_ld->return_range)
+		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
+			&args->cbl_range);
 	pnfs_free_lseg_list(&free_me_list);
 	/* Free all lsegs that are attached to commit buckets */
 	nfs_commit_inode(ino, 0);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 73b95318ba48..4dcb91ab3039 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1063,10 +1063,8 @@ struct nfs_server *nfs_alloc_server(void)
 		return NULL;
 
 	server->s_sysfs_id = ida_alloc(&s_sysfs_ids, GFP_KERNEL);
-	if (server->s_sysfs_id < 0) {
-		kfree(server);
-		return NULL;
-	}
+	if (server->s_sysfs_id < 0)
+		goto free_server;
 
 	server->client = server->client_acl = ERR_PTR(-EINVAL);
 
@@ -1088,8 +1086,8 @@ struct nfs_server *nfs_alloc_server(void)
 
 	server->io_stats = nfs_alloc_iostats();
 	if (!server->io_stats) {
-		kfree(server);
-		return NULL;
+		ida_free(&s_sysfs_ids, server->s_sysfs_id);
+		goto free_server;
 	}
 
 	server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
@@ -1103,6 +1101,10 @@ struct nfs_server *nfs_alloc_server(void)
 	rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
 
 	return server;
+
+free_server:
+	kfree(server);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(nfs_alloc_server);
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 122fb3f14ffb..9546d2195c25 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -173,6 +173,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
 static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct inode *inode = state->inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
 	struct file_lock *fl;
 	struct file_lock_context *flctx = locks_inode_context(inode);
 	struct list_head *list;
@@ -182,6 +183,9 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
 		goto out;
 
 	list = &flctx->flc_posix;
+
+	/* Guard against reclaim and new lock/unlock calls */
+	down_write(&nfsi->rwsem);
 	spin_lock(&flctx->flc_lock);
 restart:
 	for_each_file_lock(fl, list) {
@@ -189,8 +193,10 @@ restart:
 			continue;
 		spin_unlock(&flctx->flc_lock);
 		status = nfs4_lock_delegation_recall(fl, state, stateid);
-		if (status < 0)
+		if (status < 0) {
+			up_write(&nfsi->rwsem);
 			goto out;
+		}
 		spin_lock(&flctx->flc_lock);
 	}
 	if (list == &flctx->flc_posix) {
@@ -198,6 +204,7 @@ restart:
 		goto restart;
 	}
 	spin_unlock(&flctx->flc_lock);
+	up_write(&nfsi->rwsem);
 out:
 	return status;
 }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2f5f26f93238..c7b723c18620 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -650,7 +650,7 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 		return 0;
 
 	nfsi = NFS_I(inode);
-	if (entry->fattr->fileid != nfsi->fileid)
+	if (entry->fattr->fileid != inode->i_ino)
 		return 0;
 	if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
 		return 0;
@@ -1105,7 +1105,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
 
 		ent = &array->array[i];
 		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
-		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
+		    ent->ino, ent->d_type)) {
 			desc->eob = true;
 			break;
 		}
@@ -2301,7 +2301,7 @@ full_reval:
 	return nfs_do_lookup_revalidate(dir, name, dentry, flags);
 }
 
-#endif /* CONFIG_NFSV4 */
+#endif /* CONFIG_NFS_V4 */
 
 int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
 			struct file *file, unsigned int open_flags,
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 48d89716193a..e626c72495e6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -466,14 +466,22 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 		goto out_release;
 	}
 	dreq->l_ctx = l_ctx;
-	if (!is_sync_kiocb(iocb))
+	if (!is_sync_kiocb(iocb)) {
 		dreq->iocb = iocb;
+	} else if (iocb->ki_flags & IOCB_NOWAIT) {
+		result = -EAGAIN;
+		nfs_direct_req_release(dreq);
+		goto out_release;
+	}
 
 	if (user_backed_iter(iter))
 		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
 
 	if (!swap) {
-		result = nfs_start_io_direct(inode);
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			result = nfs_start_io_direct_nowait(inode);
+		else
+			result = nfs_start_io_direct(inode);
 		if (result) {
 			/* release the reference that would usually be
 			 * consumed by nfs_direct_read_schedule_iovec()
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index a10dd5f9d078..8fb08bce0623 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -49,14 +49,14 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
 		return FILEID_INVALID;
 	}
 
-	p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32;
-	p[FILEID_LOW_OFF] = NFS_FILEID(inode);
+	p[FILEID_HIGH_OFF] = inode->i_ino >> 32;
+	p[FILEID_LOW_OFF] = inode->i_ino;
 	p[FILE_I_TYPE_OFF] = inode->i_mode & S_IFMT;
 	p[len - 1] = 0; /* Padding */
 	nfs_copy_fh(clnt_fh, server_fh);
 	*max_len = len;
 	dprintk("%s: result fh fileid %llu mode %u size %d\n",
-		__func__, NFS_FILEID(inode), inode->i_mode, *max_len);
+		__func__, inode->i_ino, inode->i_mode, *max_len);
 	return *max_len;
 }
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 25048a3c2364..a0d8f1c1cf10 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -72,8 +72,12 @@ nfs_file_open(struct inode *inode, struct file *filp)
 		return res;
 
 	res = nfs_open(inode, filp);
-	if (res == 0)
+	if (res == 0) {
 		filp->f_mode |= FMODE_CAN_ODIRECT;
+		/* flag NOWAIT on read-only files only */
+		if (!(filp->f_mode & FMODE_WRITE))
+			filp->f_mode |= FMODE_NOWAIT;
+	}
 	return res;
 }
 
@@ -166,6 +170,10 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 	if (iocb->ki_flags & IOCB_DIRECT)
 		return nfs_file_direct_read(iocb, to, false);
 
+	/* NOWAIT only supported on direct reads */
+	if (iocb->ki_flags & IOCB_NOWAIT)
+		return -EAGAIN;
+
 	dprintk("NFS: read(%pD2, %zu@%lu)\n",
 		iocb->ki_filp,
 		iov_iter_count(to), (unsigned long) iocb->ki_pos);
@@ -705,6 +713,12 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 
 	trace_nfs_file_write(iocb, from);
 
+	/*
+	 * FMODE_NOWAIT is not set for writable files
+	 */
+	if (WARN_ON_ONCE(iocb->ki_flags & IOCB_NOWAIT))
+		return -EAGAIN;
+
 	result = nfs_key_timeout_notify(file, inode);
 	if (result)
 		return result;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index e85380e3b11d..72e20b56fbc7 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -95,7 +95,7 @@ static void filelayout_reset_write(struct nfs_pgio_header *hdr)
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
 			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(hdr->inode),
+			(unsigned long long)hdr->inode->i_ino,
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
@@ -112,7 +112,7 @@ static void filelayout_reset_read(struct nfs_pgio_header *hdr)
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
 			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(hdr->inode),
+			(unsigned long long)hdr->inode->i_ino,
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
@@ -778,6 +778,8 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 static bool
 filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg)
 {
+	if (flseg->dsaddr)
+		return flseg->dsaddr->stripe_count > 1;
 	return flseg->num_fh > 1;
 }
 
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 8b1559171fe3..c4aa995026f6 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -551,6 +551,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
 			if (!p)
 				goto out_err_free;
 			fh_count = be32_to_cpup(p);
+			if (fh_count == 0) {
+				rc = -EINVAL;
+				goto out_err_free;
+			}
 
 			dss_info->fh_versions =
 			    kzalloc_objs(struct nfs_fh, fh_count, gfp_flags);
@@ -632,6 +636,9 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
 	if (!p)
 		goto out_sort_mirrors;
 	fls->flags = be32_to_cpup(p);
+	if (fls->flags & FF_FLAGS_NO_IO_THRU_MDS)
+		set_bit(NFS4_FF_HDR_NO_IO_THRU_MDS,
+			&FF_LAYOUT_FROM_HDR(lh)->flags);
 
 	p = xdr_inline_decode(&stream, 4);
 	if (!p)
@@ -1181,6 +1188,16 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
 			0, NFS4_MAX_UINT64, IOMODE_RW,
 			NFS_I(pgio->pg_inode)->layout,
 			pgio->pg_lseg);
+	if (NFS_I(pgio->pg_inode)->layout &&
+	    ff_layout_hdr_no_fallback_to_mds(NFS_I(pgio->pg_inode)->layout)) {
+		/*
+		 * FF_FLAGS_NO_IO_THRU_MDS: no current lseg but the server's
+		 * policy forbids MDS fallback.  Surface -EAGAIN so writeback
+		 * retries rather than silently issuing the WRITE via MDS.
+		 */
+		pgio->pg_error = -EAGAIN;
+		goto out;
+	}
 	/* no lseg means that pnfs is not in use, so no mirroring here */
 	nfs_pageio_reset_write_mds(pgio);
 out:
@@ -1230,7 +1247,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
 			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(hdr->inode),
+			(unsigned long long)hdr->inode->i_ino,
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
@@ -1243,7 +1260,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
 			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(hdr->inode),
+			(unsigned long long)hdr->inode->i_ino,
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
@@ -1283,7 +1300,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
 			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(hdr->inode),
+			(unsigned long long)hdr->inode->i_ino,
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
@@ -2200,6 +2217,14 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
 out_failed:
 	if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
 		return PNFS_TRY_AGAIN;
+	if (ff_layout_no_fallback_to_mds(lseg)) {
+		/*
+		 * FF_FLAGS_NO_IO_THRU_MDS: force fresh LAYOUTGET,
+		 * never fall through to MDS I/O.
+		 */
+		pnfs_error_mark_layout_for_return(hdr->inode, lseg);
+		return PNFS_TRY_AGAIN;
+	}
 	trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
 			hdr->args.offset, hdr->args.count,
 			IOMODE_READ, NFS_I(hdr->inode)->layout, lseg);
@@ -2285,6 +2310,14 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 out_failed:
 	if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
 		return PNFS_TRY_AGAIN;
+	if (ff_layout_no_fallback_to_mds(lseg)) {
+		/*
+		 * FF_FLAGS_NO_IO_THRU_MDS: force fresh LAYOUTGET,
+		 * never fall through to MDS I/O.
+		 */
+		pnfs_error_mark_layout_for_return(hdr->inode, lseg);
+		return PNFS_TRY_AGAIN;
+	}
 	trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
 			hdr->args.offset, hdr->args.count,
 			IOMODE_RW, NFS_I(hdr->inode)->layout, lseg);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 17a008c8e97c..a5bd00f69e82 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -112,12 +112,16 @@ struct nfs4_ff_layout_segment {
 	struct nfs4_ff_layout_mirror	*mirror_array[] __counted_by(mirror_array_cnt);
 };
 
+/* nfs4_flexfile_layout::flags bit indices */
+#define NFS4_FF_HDR_NO_IO_THRU_MDS  0   /* any lseg has had FF_FLAGS_NO_IO_THRU_MDS */
+
 struct nfs4_flexfile_layout {
 	struct pnfs_layout_hdr generic_hdr;
 	struct pnfs_ds_commit_info commit_info;
 	struct list_head	mirrors;
 	struct list_head	error_list; /* nfs4_ff_layout_ds_err */
 	ktime_t			last_report_time; /* Layoutstat report times */
+	unsigned long		flags;
 };
 
 struct nfs4_flexfile_layoutreturn_args {
@@ -184,6 +188,18 @@ ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg)
 	return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_IO_THRU_MDS;
 }
 
+/*
+ * Sticky hdr-level mirror of FF_FLAGS_NO_IO_THRU_MDS so callers that have
+ * no current lseg (e.g. between LAYOUTRETURN and the next LAYOUTGET) can
+ * still honor the no-MDS-fallback policy.
+ */
+static inline bool
+ff_layout_hdr_no_fallback_to_mds(struct pnfs_layout_hdr *lo)
+{
+	return test_bit(NFS4_FF_HDR_NO_IO_THRU_MDS,
+			&FF_LAYOUT_FROM_HDR(lo)->flags);
+}
+
 static inline bool
 ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
 {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6227df9ae6f1..5bcd4027d203 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -58,21 +58,23 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-#define NFS_64_BIT_INODE_NUMBERS_ENABLED	1
+static bool enable_ino64;
 
-/* Default is to see 64-bit inode numbers */
-static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
+static int param_set_enable_ino64(const char *val, const struct kernel_param *kp)
+{
+	pr_notice("enable_ino64 is deprecated and has no effect\n");
+	return 0;
+}
+
+static const struct kernel_param_ops param_ops_enable_ino64 = {
+	.set = param_set_enable_ino64,
+	.get = param_get_bool,
+};
 
 static int nfs_update_inode(struct inode *, struct nfs_fattr *);
 
 static struct kmem_cache * nfs_inode_cachep;
 
-static inline unsigned long
-nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
-{
-	return nfs_fileid_to_ino_t(fattr->fileid);
-}
-
 int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
 {
 	if (unlikely(nfs_current_task_exiting()))
@@ -84,29 +86,6 @@ int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
 }
 EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
 
-/**
- * nfs_compat_user_ino64 - returns the user-visible inode number
- * @fileid: 64-bit fileid
- *
- * This function returns a 32-bit inode number if the boot parameter
- * nfs.enable_ino64 is zero.
- */
-u64 nfs_compat_user_ino64(u64 fileid)
-{
-#ifdef CONFIG_COMPAT
-	compat_ulong_t ino;
-#else	
-	unsigned long ino;
-#endif
-
-	if (enable_ino64)
-		return fileid;
-	ino = fileid;
-	if (sizeof(ino) < sizeof(fileid))
-		ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8;
-	return ino;
-}
-
 int nfs_drop_inode(struct inode *inode)
 {
 	return NFS_STALE(inode) || inode_generic_drop(inode);
@@ -314,8 +293,7 @@ struct nfs_find_desc {
 };
 
 /*
- * In NFSv3 we can have 64bit inode numbers. In order to support
- * this, and re-exported directories (also seen in NFSv2)
+ * For re-exported directories (also seen in NFSv2)
  * we are forced to allow 2 different inodes to have the same
  * i_ino.
  */
@@ -326,7 +304,7 @@ nfs_find_actor(struct inode *inode, void *opaque)
 	struct nfs_fh		*fh = desc->fh;
 	struct nfs_fattr	*fattr = desc->fattr;
 
-	if (NFS_FILEID(inode) != fattr->fileid)
+	if (inode->i_ino != fattr->fileid)
 		return 0;
 	if (inode_wrong_type(inode, fattr->mode))
 		return 0;
@@ -343,7 +321,7 @@ nfs_init_locked(struct inode *inode, void *opaque)
 	struct nfs_find_desc	*desc = opaque;
 	struct nfs_fattr	*fattr = desc->fattr;
 
-	set_nfs_fileid(inode, fattr->fileid);
+	inode->i_ino = fattr->fileid;
 	inode->i_mode = fattr->mode;
 	nfs_copy_fh(NFS_FH(inode), desc->fh);
 	return 0;
@@ -414,13 +392,13 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
 		.fattr	= fattr,
 	};
 	struct inode *inode;
-	unsigned long hash;
+	u64 hash;
 
 	if (!(fattr->valid & NFS_ATTR_FATTR_FILEID) ||
 	    !(fattr->valid & NFS_ATTR_FATTR_TYPE))
 		return NULL;
 
-	hash = nfs_fattr_to_ino_t(fattr);
+	hash = fattr->fileid;
 	inode = ilookup5(sb, hash, nfs_find_actor, &desc);
 
 	dprintk("%s: returning %p\n", __func__, inode);
@@ -457,7 +435,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 	};
 	struct inode *inode = ERR_PTR(-ENOENT);
 	u64 fattr_supported = NFS_SB(sb)->fattr_valid;
-	unsigned long hash;
+	u64 hash;
 
 	nfs_attr_check_mountpoint(sb, fattr);
 
@@ -468,7 +446,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
 		goto out_no_inode;
 
-	hash = nfs_fattr_to_ino_t(fattr);
+	hash = fattr->fileid;
 
 	inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
 	if (inode == NULL) {
@@ -480,10 +458,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		struct nfs_inode *nfsi = NFS_I(inode);
 		unsigned long now = jiffies;
 
-		/* We set i_ino for the few things that still rely on it,
-		 * such as stat(2) */
-		inode->i_ino = hash;
-
 		/* We can't support update_atime(), since the server will reset it */
 		inode->i_flags |= S_NOATIME|S_NOCMTIME;
 		inode->i_mode = fattr->mode;
@@ -607,7 +581,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 	}
 	dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n",
 		inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(inode),
+		(unsigned long long)inode->i_ino,
 		nfs_display_fhandle_hash(fh),
 		icount_read_once(inode));
 
@@ -1067,7 +1041,6 @@ out_no_revalidate:
 	stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
 
 	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
-	stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
 	stat->change_cookie = inode_peek_iversion_raw(inode);
 	stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
 	if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED)
@@ -1385,7 +1358,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Lu)\n",
-		inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode));
+		inode->i_sb->s_id, (unsigned long long)inode->i_ino);
 
 	trace_nfs_revalidate_inode_enter(inode);
 
@@ -1399,7 +1372,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		status = pnfs_sync_inode(inode, false);
 		if (status)
 			goto out;
-	} else if (nfs_have_directory_delegation(inode)) {
+	} else if (nfs_have_directory_delegation(inode) &&
+		   !(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)) {
 		status = 0;
 		goto out;
 	}
@@ -1415,7 +1389,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	if (status != 0) {
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
 			 inode->i_sb->s_id,
-			 (unsigned long long)NFS_FILEID(inode), status);
+			 (unsigned long long)inode->i_ino, status);
 		switch (status) {
 		case -ETIMEDOUT:
 			/* A soft timeout occurred. Use cached information? */
@@ -1435,7 +1409,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	if (status) {
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n",
 			 inode->i_sb->s_id,
-			 (unsigned long long)NFS_FILEID(inode), status);
+			 (unsigned long long)inode->i_ino, status);
 		goto out;
 	}
 
@@ -1446,7 +1420,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 
 	dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n",
 		inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(inode));
+		(unsigned long long)inode->i_ino);
 
 out:
 	nfs_free_fattr(fattr);
@@ -1495,7 +1469,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
 
 	dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n",
 			inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(inode));
+			(unsigned long long)inode->i_ino);
 	return 0;
 }
 
@@ -1687,10 +1661,10 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 		if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
 			return 0;
 	/* Has the inode gone and changed behind our back? */
-	} else if (nfsi->fileid != fattr->fileid) {
+	} else if (inode->i_ino != fattr->fileid) {
 		/* Is this perhaps the mounted-on fileid? */
 		if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) &&
-		    nfsi->fileid == fattr->mounted_on_fileid)
+		    inode->i_ino == fattr->mounted_on_fileid)
 			return 0;
 		return -ESTALE;
 	}
@@ -2277,15 +2251,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
 			return 0;
 	/* Has the inode gone and changed behind our back? */
-	} else if (nfsi->fileid != fattr->fileid) {
+	} else if (inode->i_ino != fattr->fileid) {
 		/* Is this perhaps the mounted-on fileid? */
 		if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) &&
-		    nfsi->fileid == fattr->mounted_on_fileid)
+		    inode->i_ino == fattr->mounted_on_fileid)
 			return 0;
 		printk(KERN_ERR "NFS: server %s error: fileid changed\n"
 			"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
 			NFS_SERVER(inode)->nfs_client->cl_hostname,
-			inode->i_sb->s_id, (long long)nfsi->fileid,
+			inode->i_sb->s_id, (long long)inode->i_ino,
 			(long long)fattr->fileid);
 		goto out_err;
 	}
@@ -2813,7 +2787,7 @@ static void __exit exit_nfs_fs(void)
 MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
 MODULE_DESCRIPTION("NFS client support");
 MODULE_LICENSE("GPL");
-module_param(enable_ino64, bool, 0644);
+module_param_cb(enable_ino64, &param_ops_enable_ino64, &enable_ino64, 0644);
 
 module_init(init_nfs_fs)
 module_exit(exit_nfs_fs)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ec2b3d984398..acaeff7ddfdf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -535,6 +535,7 @@ extern void nfs_end_io_read(struct inode *inode);
 extern  __must_check int nfs_start_io_write(struct inode *inode);
 extern void nfs_end_io_write(struct inode *inode);
 extern __must_check int nfs_start_io_direct(struct inode *inode);
+extern __must_check int nfs_start_io_direct_nowait(struct inode *inode);
 extern void nfs_end_io_direct(struct inode *inode);
 
 static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index 8337f0ae852d..2faf2003faf6 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -109,6 +109,16 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
 	}
 }
 
+static int nfs_block_buffered_nowait(struct nfs_inode *nfsi, struct inode *inode)
+{
+	if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) {
+		if (inode->i_mapping->nrpages != 0)
+			return 1;
+		set_bit(NFS_INO_ODIRECT, &nfsi->flags);
+	}
+	return 0;
+}
+
 /**
  * nfs_start_io_direct - declare the file is being used for direct i/o
  * @inode: file inode
@@ -150,6 +160,37 @@ nfs_start_io_direct(struct inode *inode)
 }
 
 /**
+ * nfs_start_io_direct_nowait - non-blocking variant of nfs_start_io_direct()
+ * @inode: file inode
+ *
+ * Try to declare that a direct I/O operation is about to start without
+ * blocking.
+ * Ensure all buffered I/O is blocked.
+ * If this could not be done without blocking then returns -EAGAIN.
+ */
+int
+nfs_start_io_direct_nowait(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (!down_read_trylock(&inode->i_rwsem))
+		return -EAGAIN;
+	if (test_bit(NFS_INO_ODIRECT, &nfsi->flags))
+		return 0;
+	up_read(&inode->i_rwsem);
+
+	/* Slow path: try to flip NFS_INO_ODIRECT without blocking. */
+	if (!down_write_trylock(&inode->i_rwsem))
+		return -EAGAIN;
+	if (nfs_block_buffered_nowait(nfsi, inode)) {
+		up_write(&inode->i_rwsem);
+		return -EAGAIN;
+	}
+	downgrade_write(&inode->i_rwsem);
+	return 0;
+}
+
+/**
  * nfs_end_io_direct - declare that the direct i/o operation is done
  * @inode: file inode
  *
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 7602ede6f75f..ab86246fc364 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -81,12 +81,17 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
 	status = nfs4_call_sync(server->client, server, msg,
 				&args.seq_args, &res.seq_res, 0);
 	if (status == 0) {
-		if (nfs_should_remove_suid(inode)) {
-			spin_lock(&inode->i_lock);
+		loff_t newsize = offset + len;
+
+		spin_lock(&inode->i_lock);
+		if (newsize > i_size_read(inode))
+			i_size_write(inode, newsize);
+		nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
+		if (nfs_should_remove_suid(inode))
 			nfs_set_cache_invalid(inode,
-				NFS_INO_REVAL_FORCED | NFS_INO_INVALID_MODE);
-			spin_unlock(&inode->i_lock);
-		}
+					      NFS_INO_REVAL_FORCED |
+					      NFS_INO_INVALID_MODE);
+		spin_unlock(&inode->i_lock);
 		status = nfs_post_op_update_inode_force_wcc(inode,
 							    res.falloc_fattr);
 	}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c48281db3868..1360409d8de9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -377,7 +377,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 		*p++ = htonl(attrs);                           /* bitmap */
 		*p++ = htonl(12);             /* attribute buffer length */
 		*p++ = htonl(NF4DIR);
-		p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry)));
+		p = xdr_encode_hyper(p, d_inode(dentry)->i_ino);
 	}
 	
 	*p++ = xdr_one;                                  /* next */
@@ -391,7 +391,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 	*p++ = htonl(12);             /* attribute buffer length */
 	*p++ = htonl(NF4DIR);
 	spin_lock(&dentry->d_lock);
-	p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
+	p = xdr_encode_hyper(p, d_inode(dentry->d_parent)->i_ino);
 	spin_unlock(&dentry->d_lock);
 
 	readdir->pgbase = (char *)p - (char *)start;
@@ -5304,10 +5304,9 @@ static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 	do {
 		alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err);
 		trace_nfs4_mkdir(dir, &dentry->d_name, err);
+		err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception);
 		if (err)
-			alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
-							      err,
-							      &exception));
+			alias = ERR_PTR(err);
 	} while (exception.retry);
 	nfs4_label_release_security(label);
 
@@ -7087,7 +7086,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 	switch (task->tk_status) {
 		case 0:
 			renew_lease(calldata->server, calldata->timestamp);
-			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
 			if (nfs4_update_lock_stateid(calldata->lsp,
 					&calldata->res.stateid))
 				break;
@@ -7355,11 +7353,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 	case 0:
 		renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
 				data->timestamp);
-		if (data->arg.new_lock && !data->cancelled) {
-			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
-			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
-				goto out_restart;
-		}
 		if (data->arg.new_lock_owner != 0) {
 			nfs_confirm_seqid(&lsp->ls_seqid, 0);
 			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
@@ -7470,11 +7463,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 	msg.rpc_argp = &data->arg;
 	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
-	if (recovery_type > NFS_LOCK_NEW) {
-		if (recovery_type == NFS_LOCK_RECLAIM)
-			data->arg.reclaim = NFS_LOCK_RECLAIM;
-	} else
-		data->arg.new_lock = 1;
+
+	if (recovery_type == NFS_LOCK_RECLAIM)
+		data->arg.reclaim = NFS_LOCK_RECLAIM;
+
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -7584,6 +7576,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	up_read(&nfsi->rwsem);
 	mutex_unlock(&sp->so_delegreturn_mutex);
 	status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+	if (status)
+		goto out;
+
+	down_read(&nfsi->rwsem);
+	request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
+	status = locks_lock_inode_wait(state->inode, request);
+	up_read(&nfsi->rwsem);
 out:
 	request->c.flc_flags = flags;
 	return status;
@@ -9991,6 +9990,38 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 	case -NFS4ERR_GRACE:	    /* loca_recalim always false */
 		task->tk_status = 0;
 		break;
+	case -NFS4ERR_OLD_STATEID: {
+		u32 old_seqid = be32_to_cpu(data->args.stateid.seqid);
+		struct pnfs_layout_range range = {
+			.iomode = IOMODE_ANY,
+			.offset = 0,
+			.length = NFS4_MAX_UINT64,
+		};
+
+		if (nfs4_layout_refresh_old_stateid(&data->args.stateid,
+						    &range,
+						    data->args.inode)) {
+			struct pnfs_layout_hdr *lo;
+
+			spin_lock(&data->args.inode->i_lock);
+			lo = NFS_I(data->args.inode)->layout;
+			if (lo && pnfs_layout_is_valid(lo) &&
+			    nfs4_stateid_match_other(&data->args.stateid,
+						     &lo->plh_stateid))
+				pnfs_set_layout_stateid(lo, &data->args.stateid,
+							NULL, false);
+			spin_unlock(&data->args.inode->i_lock);
+
+			dprintk("%s: refreshed OLD_STATEID inode %llu seq %u->%u\n",
+				__func__, data->args.inode->i_ino,
+				old_seqid,
+				be32_to_cpu(data->args.stateid.seqid));
+
+			rpc_restart_call_prepare(task);
+			return;
+		}
+		fallthrough;
+	}
 	case 0:
 		break;
 	default:
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index c939533b9881..1ed677810d9d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -597,13 +597,13 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
 				__entry->openstateid_hash = 0;
 			}
 			if (inode != NULL) {
-				__entry->fileid = NFS_FILEID(inode);
+				__entry->fileid = inode->i_ino;
 				__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			} else {
 				__entry->fileid = 0;
 				__entry->fhandle = 0;
 			}
-			__entry->dir = NFS_FILEID(d_inode(ctx->dentry->d_parent));
+			__entry->dir = d_inode(ctx->dentry->d_parent)->i_ino;
 			__assign_str(name);
 		),
 
@@ -658,7 +658,7 @@ TRACE_EVENT(nfs4_cached_open,
 			const struct inode *inode = state->inode;
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->fmode = (__force unsigned int)state->state;
 			__entry->stateid_seq =
@@ -703,7 +703,7 @@ TRACE_EVENT(nfs4_close,
 			const struct inode *inode = state->inode;
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->fmode = (__force unsigned int)state->state;
 			__entry->error = error < 0 ? -error : 0;
@@ -759,7 +759,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
 			__entry->start = request->fl_start;
 			__entry->end = request->fl_end;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->stateid_seq =
 				be32_to_cpu(state->stateid.seqid);
@@ -831,7 +831,7 @@ TRACE_EVENT(nfs4_set_lock,
 			__entry->start = request->fl_start;
 			__entry->end = request->fl_end;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->stateid_seq =
 				be32_to_cpu(state->stateid.seqid);
@@ -922,7 +922,7 @@ TRACE_EVENT(nfs4_state_lock_reclaim,
 			const struct inode *inode = state->inode;
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->state_flags = state->flags;
 			__entry->lock_flags = lock->ls_flags;
@@ -960,7 +960,7 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
 
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->fmode = (__force unsigned int)fmode;
 		),
@@ -1087,7 +1087,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
 
 			__entry->error = error < 0 ? -error : 0;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->stateid_seq =
 				be32_to_cpu(state->stateid.seqid);
@@ -1137,7 +1137,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event,
 
 		TP_fast_assign(
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->error = -error;
 			__assign_str(name);
 		),
@@ -1185,7 +1185,7 @@ TRACE_EVENT(nfs4_lookupp,
 
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->ino = NFS_FILEID(inode);
+			__entry->ino = inode->i_ino;
 			__entry->error = error < 0 ? -error : 0;
 		),
 
@@ -1220,8 +1220,8 @@ TRACE_EVENT(nfs4_rename,
 
 		TP_fast_assign(
 			__entry->dev = olddir->i_sb->s_dev;
-			__entry->olddir = NFS_FILEID(olddir);
-			__entry->newdir = NFS_FILEID(newdir);
+			__entry->olddir = olddir->i_ino;
+			__entry->newdir = newdir->i_ino;
 			__entry->error = error < 0 ? -error : 0;
 			__assign_str(oldname);
 			__assign_str(newname);
@@ -1258,7 +1258,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_event,
 
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->error = error < 0 ? -error : 0;
 		),
@@ -1311,7 +1311,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
 
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->error = error < 0 ? -error : 0;
 			__entry->stateid_seq =
@@ -1421,7 +1421,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
 			__entry->error = error < 0 ? -error : 0;
 			__entry->fhandle = nfs_fhandle_hash(fhandle);
 			if (!IS_ERR_OR_NULL(inode)) {
-				__entry->fileid = NFS_FILEID(inode);
+				__entry->fileid = inode->i_ino;
 				__entry->dev = inode->i_sb->s_dev;
 			} else {
 				__entry->fileid = 0;
@@ -1478,7 +1478,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
 			__entry->error = error < 0 ? -error : 0;
 			__entry->fhandle = nfs_fhandle_hash(fhandle);
 			if (!IS_ERR_OR_NULL(inode)) {
-				__entry->fileid = NFS_FILEID(inode);
+				__entry->fileid = inode->i_ino;
 				__entry->dev = inode->i_sb->s_dev;
 			} else {
 				__entry->fileid = 0;
@@ -1655,7 +1655,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 			const struct pnfs_layout_segment *lseg = hdr->lseg;
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset = hdr->args.offset;
 			__entry->arg_count = hdr->args.count;
@@ -1727,7 +1727,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 			const struct pnfs_layout_segment *lseg = hdr->lseg;
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset = hdr->args.offset;
 			__entry->arg_count = hdr->args.count;
@@ -1795,7 +1795,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event,
 			const struct pnfs_layout_segment *lseg = data->lseg;
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset = data->args.offset;
 			__entry->count = data->args.count;
@@ -1857,7 +1857,7 @@ TRACE_EVENT(nfs4_layoutget,
 			const struct inode *inode = d_inode(ctx->dentry);
 			const struct nfs4_state *state = ctx->state;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->iomode = args->iomode;
 			__entry->offset = args->offset;
@@ -1957,7 +1957,7 @@ TRACE_EVENT(pnfs_update_layout,
 		),
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->pos = pos;
 			__entry->count = count;
@@ -2012,7 +2012,7 @@ DECLARE_EVENT_CLASS(pnfs_layout_event,
 		),
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->pos = pos;
 			__entry->count = count;
@@ -2194,7 +2194,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
 			__entry->error = -error;
 			__entry->nfs_error = hdr->res.op_status;
 			__entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->offset = hdr->args.offset;
 			__entry->count = hdr->args.count;
@@ -2258,7 +2258,7 @@ TRACE_EVENT(ff_layout_commit_error,
 			__entry->error = -error;
 			__entry->nfs_error = data->res.op_status;
 			__entry->fhandle = nfs_fhandle_hash(data->args.fh);
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->offset = data->args.offset;
 			__entry->count = data->args.count;
@@ -2423,7 +2423,7 @@ TRACE_EVENT(nfs4_llseek,
 		TP_STRUCT__entry(
 			__field(unsigned long, error)
 			__field(u32, fhandle)
-			__field(u32, fileid)
+			__field(u64, fileid)
 			__field(dev_t, dev)
 			__field(int, stateid_seq)
 			__field(u32, stateid_hash)
@@ -2434,10 +2434,9 @@ TRACE_EVENT(nfs4_llseek,
 		),
 
 		TP_fast_assign(
-			const struct nfs_inode *nfsi = NFS_I(inode);
 			const struct nfs_fh *fh = args->sa_fh;
 
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset_s = args->sa_offset;
@@ -2499,7 +2498,7 @@ DECLARE_EVENT_CLASS(nfs4_sparse_event,
 			__entry->offset = args->falloc_offset;
 			__entry->len = args->falloc_length;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__entry->stateid_seq =
 				be32_to_cpu(args->falloc_stateid.seqid);
@@ -2568,14 +2567,11 @@ TRACE_EVENT(nfs4_copy,
 		),
 
 		TP_fast_assign(
-			const struct nfs_inode *src_nfsi = NFS_I(src_inode);
-			const struct nfs_inode *dst_nfsi = NFS_I(dst_inode);
-
-			__entry->src_fileid = src_nfsi->fileid;
+			__entry->src_fileid = src_inode->i_ino;
 			__entry->src_dev = src_inode->i_sb->s_dev;
 			__entry->src_fhandle = nfs_fhandle_hash(args->src_fh);
 			__entry->src_offset = args->src_pos;
-			__entry->dst_fileid = dst_nfsi->fileid;
+			__entry->dst_fileid = dst_inode->i_ino;
 			__entry->dst_dev = dst_inode->i_sb->s_dev;
 			__entry->dst_fhandle = nfs_fhandle_hash(args->dst_fh);
 			__entry->dst_offset = args->dst_pos;
@@ -2666,14 +2662,11 @@ TRACE_EVENT(nfs4_clone,
 		),
 
 		TP_fast_assign(
-			const struct nfs_inode *src_nfsi = NFS_I(src_inode);
-			const struct nfs_inode *dst_nfsi = NFS_I(dst_inode);
-
-			__entry->src_fileid = src_nfsi->fileid;
+			__entry->src_fileid = src_inode->i_ino;
 			__entry->src_dev = src_inode->i_sb->s_dev;
 			__entry->src_fhandle = nfs_fhandle_hash(args->src_fh);
 			__entry->src_offset = args->src_offset;
-			__entry->dst_fileid = dst_nfsi->fileid;
+			__entry->dst_fileid = dst_inode->i_ino;
 			__entry->dst_dev = dst_inode->i_sb->s_dev;
 			__entry->dst_fhandle = nfs_fhandle_hash(args->dst_fh);
 			__entry->dst_offset = args->dst_offset;
@@ -2724,7 +2717,7 @@ TRACE_EVENT(nfs4_copy_notify,
 		TP_STRUCT__entry(
 			__field(unsigned long, error)
 			__field(u32, fhandle)
-			__field(u32, fileid)
+			__field(u64, fileid)
 			__field(dev_t, dev)
 			__field(int, stateid_seq)
 			__field(u32, stateid_hash)
@@ -2733,9 +2726,7 @@ TRACE_EVENT(nfs4_copy_notify,
 		),
 
 		TP_fast_assign(
-			const struct nfs_inode *nfsi = NFS_I(inode);
-
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fhandle = nfs_fhandle_hash(args->cna_src_fh);
 			__entry->stateid_seq =
@@ -2830,7 +2821,7 @@ DECLARE_EVENT_CLASS(nfs4_xattr_event,
 		TP_fast_assign(
 			__entry->error = error < 0 ? -error : 0;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
 			__assign_str(name);
 		),
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index ff467959f733..4ada21f4eebd 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -80,7 +80,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event,
 		TP_fast_assign(
 			const struct nfs_inode *nfsi = NFS_I(inode);
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->version = inode_peek_iversion_raw(inode);
 			__entry->cache_validity = nfsi->cache_validity;
@@ -121,7 +121,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done,
 			const struct nfs_inode *nfsi = NFS_I(inode);
 			__entry->error = error < 0 ? -error : 0;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->type = nfs_umode_to_dtype(inode->i_mode);
 			__entry->version = inode_peek_iversion_raw(inode);
@@ -211,7 +211,7 @@ TRACE_EVENT(nfs_access_exit,
 			const struct nfs_inode *nfsi = NFS_I(inode);
 			__entry->error = error < 0 ? -error : 0;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->type = nfs_umode_to_dtype(inode->i_mode);
 			__entry->version = inode_peek_iversion_raw(inode);
@@ -265,7 +265,7 @@ DECLARE_EVENT_CLASS(nfs_update_size_class,
 
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->version = inode_peek_iversion_raw(inode);
 			__entry->cur_size = i_size_read(inode);
 			__entry->new_size = new_size;
@@ -317,7 +317,7 @@ DECLARE_EVENT_CLASS(nfs_inode_range_event,
 
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->version = inode_peek_iversion_raw(inode);
 			__entry->range_start = range_start;
 			__entry->range_end = range_end;
@@ -371,7 +371,7 @@ DECLARE_EVENT_CLASS(nfs_readdir_event,
 			const struct nfs_inode *nfsi = NFS_I(dir);
 
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = dir->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->version = inode_peek_iversion_raw(dir);
 			if (cookie != 0)
@@ -429,9 +429,9 @@ DECLARE_EVENT_CLASS(nfs_lookup_event,
 
 		TP_fast_assign(
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->flags = flags;
-			__entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
+			__entry->fileid = d_is_negative(dentry) ? 0 : d_inode(dentry)->i_ino;
 			__assign_str(name);
 		),
 
@@ -476,10 +476,10 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
 
 		TP_fast_assign(
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->error = error < 0 ? -error : 0;
 			__entry->flags = flags;
-			__entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
+			__entry->fileid = d_is_negative(dentry) ? 0 : d_inode(dentry)->i_ino;
 			__assign_str(name);
 		),
 
@@ -532,7 +532,7 @@ TRACE_EVENT(nfs_atomic_open_enter,
 
 		TP_fast_assign(
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->flags = flags;
 			__entry->fmode = (__force unsigned long)ctx->mode;
 			__assign_str(name);
@@ -571,7 +571,7 @@ TRACE_EVENT(nfs_atomic_open_exit,
 		TP_fast_assign(
 			__entry->error = -error;
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->flags = flags;
 			__entry->fmode = (__force unsigned long)ctx->mode;
 			__assign_str(name);
@@ -608,7 +608,7 @@ TRACE_EVENT(nfs_create_enter,
 
 		TP_fast_assign(
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->flags = flags;
 			__assign_str(name);
 		),
@@ -644,7 +644,7 @@ TRACE_EVENT(nfs_create_exit,
 		TP_fast_assign(
 			__entry->error = -error;
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->flags = flags;
 			__assign_str(name);
 		),
@@ -676,7 +676,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event,
 
 		TP_fast_assign(
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__assign_str(name);
 		),
 
@@ -714,7 +714,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done,
 
 		TP_fast_assign(
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->error = error < 0 ? -error : 0;
 			__assign_str(name);
 		),
@@ -768,8 +768,8 @@ TRACE_EVENT(nfs_link_enter,
 
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
-			__entry->dir = NFS_FILEID(dir);
+			__entry->fileid = inode->i_ino;
+			__entry->dir = dir->i_ino;
 			__assign_str(name);
 		),
 
@@ -803,8 +803,8 @@ TRACE_EVENT(nfs_link_exit,
 
 		TP_fast_assign(
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = NFS_FILEID(inode);
-			__entry->dir = NFS_FILEID(dir);
+			__entry->fileid = inode->i_ino;
+			__entry->dir = dir->i_ino;
 			__entry->error = error < 0 ? -error : 0;
 			__assign_str(name);
 		),
@@ -840,8 +840,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event,
 
 		TP_fast_assign(
 			__entry->dev = old_dir->i_sb->s_dev;
-			__entry->old_dir = NFS_FILEID(old_dir);
-			__entry->new_dir = NFS_FILEID(new_dir);
+			__entry->old_dir = old_dir->i_ino;
+			__entry->new_dir = new_dir->i_ino;
 			__assign_str(old_name);
 			__assign_str(new_name);
 		),
@@ -889,8 +889,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done,
 		TP_fast_assign(
 			__entry->dev = old_dir->i_sb->s_dev;
 			__entry->error = -error;
-			__entry->old_dir = NFS_FILEID(old_dir);
-			__entry->new_dir = NFS_FILEID(new_dir);
+			__entry->old_dir = old_dir->i_ino;
+			__entry->new_dir = new_dir->i_ino;
 			__assign_str(old_name);
 			__assign_str(new_name);
 		),
@@ -943,7 +943,7 @@ TRACE_EVENT(nfs_sillyrename_unlink,
 			struct inode *dir = d_inode(data->dentry->d_parent);
 			size_t len = data->args.name.len;
 			__entry->dev = dir->i_sb->s_dev;
-			__entry->dir = NFS_FILEID(dir);
+			__entry->dir = dir->i_ino;
 			__entry->error = -error;
 			memcpy(__get_str(name),
 				data->args.name.name, len);
@@ -981,7 +981,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
 			const struct nfs_inode *nfsi = NFS_I(inode);
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->version = inode_peek_iversion_raw(inode);
 			__entry->offset = offset;
@@ -1031,7 +1031,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
 			const struct nfs_inode *nfsi = NFS_I(inode);
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->version = inode_peek_iversion_raw(inode);
 			__entry->offset = offset;
@@ -1109,7 +1109,7 @@ DECLARE_EVENT_CLASS(nfs_kiocb_event,
 			const struct nfs_inode *nfsi = NFS_I(inode);
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->version = inode_peek_iversion_raw(inode);
 			__entry->offset = iocb->ki_pos;
@@ -1160,7 +1160,7 @@ TRACE_EVENT(nfs_aop_readahead,
 			const struct nfs_inode *nfsi = NFS_I(inode);
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->version = inode_peek_iversion_raw(inode);
 			__entry->offset = pos;
@@ -1199,7 +1199,7 @@ TRACE_EVENT(nfs_aop_readahead_done,
 			const struct nfs_inode *nfsi = NFS_I(inode);
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->version = inode_peek_iversion_raw(inode);
 			__entry->nr_pages = nr_pages;
@@ -1239,7 +1239,7 @@ TRACE_EVENT(nfs_initiate_read,
 			__entry->offset = hdr->args.offset;
 			__entry->count = hdr->args.count;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
@@ -1284,7 +1284,7 @@ TRACE_EVENT(nfs_readpage_done,
 			__entry->res_count = hdr->res.count;
 			__entry->eof = hdr->res.eof;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
@@ -1330,7 +1330,7 @@ TRACE_EVENT(nfs_readpage_short,
 			__entry->res_count = hdr->res.count;
 			__entry->eof = hdr->res.eof;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
@@ -1377,7 +1377,7 @@ TRACE_EVENT(nfs_pgio_error,
 		__entry->arg_count = hdr->args.count;
 		__entry->res_count = hdr->res.count;
 		__entry->dev = inode->i_sb->s_dev;
-		__entry->fileid = nfsi->fileid;
+		__entry->fileid = inode->i_ino;
 		__entry->fhandle = nfs_fhandle_hash(fh);
 	),
 
@@ -1416,7 +1416,7 @@ TRACE_EVENT(nfs_initiate_write,
 			__entry->count = hdr->args.count;
 			__entry->stable = hdr->args.stable;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
@@ -1467,7 +1467,7 @@ TRACE_EVENT(nfs_writeback_done,
 				&verf->verifier,
 				NFS4_VERIFIER_SIZE);
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
@@ -1507,7 +1507,7 @@ DECLARE_EVENT_CLASS(nfs_page_class,
 			const struct nfs_inode *nfsi = NFS_I(inode);
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->req = req;
 			__entry->offset = req_offset(req);
@@ -1555,7 +1555,7 @@ DECLARE_EVENT_CLASS(nfs_page_error_class,
 		TP_fast_assign(
 			const struct nfs_inode *nfsi = NFS_I(inode);
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
 			__entry->offset = req_offset(req);
 			__entry->count = req->wb_bytes;
@@ -1609,7 +1609,7 @@ TRACE_EVENT(nfs_initiate_commit,
 			__entry->offset = data->args.offset;
 			__entry->count = data->args.count;
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
@@ -1655,7 +1655,7 @@ TRACE_EVENT(nfs_commit_done,
 				&verf->verifier,
 				NFS4_VERIFIER_SIZE);
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 		),
 
@@ -1701,7 +1701,7 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class,
 			const struct nfs_fh *fh = &nfsi->fh;
 
 			__entry->dev = inode->i_sb->s_dev;
-			__entry->fileid = nfsi->fileid;
+			__entry->fileid = inode->i_ino;
 			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset = dreq->io_start;
 			__entry->count = dreq->count;
@@ -1765,7 +1765,7 @@ DECLARE_EVENT_CLASS(nfs_local_dio_class,
 		const struct nfs_fh *fh = &nfsi->fh;
 
 		__entry->dev = inode->i_sb->s_dev;
-		__entry->fileid = nfsi->fileid;
+		__entry->fileid = inode->i_ino;
 		__entry->fhandle = nfs_fhandle_hash(fh);
 		__entry->offset = offset;
 		__entry->count = count;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4a87b2fdb2e6..7dd478ffc2fa 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -759,7 +759,7 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
 	dprintk("NFS: initiated pgio call "
 		"(req %s/%llu, %u bytes @ offset %llu)\n",
 		hdr->inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(hdr->inode),
+		(unsigned long long)hdr->inode->i_ino,
 		hdr->args.count,
 		(unsigned long long)hdr->args.offset);
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 743467e9ba20..7715e2bd5871 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1463,8 +1463,6 @@ _pnfs_return_layout(struct inode *ino)
 	pnfs_clear_layoutcommit(ino, &tmp_list);
 	pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
 
-	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
-		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
 
 	/* Don't send a LAYOUTRETURN if list was initially empty */
 	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
@@ -1476,6 +1474,8 @@ _pnfs_return_layout(struct inode *ino)
 
 	send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
 	spin_unlock(&ino->i_lock);
+	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
+		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
 	if (send)
 		status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY,
 						0);
@@ -2229,11 +2229,11 @@ lookup_again:
 		dprintk("%s wait for layoutreturn\n", __func__);
 		lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo));
 		if (!IS_ERR(lseg)) {
-			pnfs_put_layout_hdr(lo);
 			dprintk("%s retrying\n", __func__);
 			trace_pnfs_update_layout(ino, pos, count, iomode, lo,
 						 lseg,
 						 PNFS_UPDATE_LAYOUT_RETRY);
+			pnfs_put_layout_hdr(lo);
 			goto lookup_again;
 		}
 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
@@ -2373,7 +2373,7 @@ out:
 	dprintk("%s: inode %s/%llu pNFS layout segment %s for "
 			"(%s, offset: %llu, length: %llu)\n",
 			__func__, ino->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(ino),
+			(unsigned long long)ino->i_ino,
 			IS_ERR_OR_NULL(lseg) ? "not found" : "found",
 			iomode==IOMODE_RW ?  "read/write" : "read-only",
 			(unsigned long long)pos,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 12632a706da8..0ff43dbcb7cd 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -1075,14 +1075,14 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 	/* r_netid */
 	nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
 					    gfp_flags);
-	if (unlikely(nlen < 0))
+	if (unlikely(nlen <= 0))
 		goto out_err;
 
 	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
 	/* port is ".ABC.DEF", 8 chars max */
 	rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
 					    IPV6_SCOPE_ID_LEN + 8, gfp_flags);
-	if (unlikely(rlen < 0))
+	if (unlikely(rlen <= 0))
 		goto out_free_netid;
 
 	/* replace port '.' with '-' */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e1fe78d7b8d0..2b70bd2b934b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -132,10 +132,32 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
 
 static void nfs_page_group_set_uptodate(struct nfs_page *req)
 {
-	if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
+	bool uptodate = false;
+
+	nfs_page_group_lock(req);
+	if (!test_bit(PG_READ_FAILED, &req->wb_head->wb_flags) &&
+	    nfs_page_group_sync_on_bit_locked(req, PG_UPTODATE))
+		uptodate = true;
+	nfs_page_group_unlock(req);
+
+	if (uptodate)
 		folio_mark_uptodate(nfs_page_to_folio(req));
 }
 
+static void nfs_page_group_mark_read_failed(struct nfs_page *req)
+{
+	struct nfs_page *tmp;
+
+	nfs_page_group_lock(req);
+	set_bit(PG_READ_FAILED, &req->wb_head->wb_flags);
+	tmp = req;
+	do {
+		clear_bit(PG_UPTODATE, &tmp->wb_flags);
+		tmp = tmp->wb_this_page;
+	} while (tmp != req);
+	nfs_page_group_unlock(req);
+}
+
 static void nfs_read_completion(struct nfs_pgio_header *hdr)
 {
 	unsigned long bytes = 0;
@@ -172,6 +194,7 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
 			if (bytes <= hdr->good_bytes)
 				nfs_page_group_set_uptodate(req);
 			else {
+				nfs_page_group_mark_read_failed(req);
 				error = hdr->error;
 				xchg(&nfs_req_openctx(req)->error, error);
 			}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8f8a03a68d3d..cb19f1540d98 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -509,6 +509,10 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	default:
 		break;
 	}
+	if (clp->cl_xprtsec.cert_serial)
+		seq_puts(m, ",cert_serial=<redacted>");
+	if (clp->cl_xprtsec.privkey_serial)
+		seq_puts(m, ",privkey_serial=<redacted>");
 
 	if (version != 4)
 		nfs_show_mountd_options(m, nfss, showdefaults);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 43ea897943c0..b57cfaa4d516 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -460,7 +460,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
 		goto out;
 
-	fileid = NFS_FILEID(d_inode(dentry));
+	fileid = d_inode(dentry)->i_ino;
 
 	sdentry = NULL;
 	do {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d7c399763ad9..fcffb8c9e9df 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1817,7 +1817,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 
 		dprintk("NFS:       commit (%s/%llu %d@%lld)",
 			nfs_req_openctx(req)->dentry->d_sb->s_id,
-			(unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
+			(unsigned long long)d_inode(nfs_req_openctx(req)->dentry)->i_ino,
 			req->wb_bytes,
 			(long long)req_offset(req));
 		if (status < 0) {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4623262da3c0..ec17e602c979 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -146,11 +146,6 @@ struct nfs4_xattr_cache;
  */
 struct nfs_inode {
 	/*
-	 * The 64bit 'inode number'
-	 */
-	__u64 fileid;
-
-	/*
 	 * NFS file handle
 	 */
 	struct nfs_fh		fh;
@@ -394,16 +389,6 @@ static inline int NFS_STALE(const struct inode *inode)
 	return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
 }
 
-static inline __u64 NFS_FILEID(const struct inode *inode)
-{
-	return NFS_I(inode)->fileid;
-}
-
-static inline void set_nfs_fileid(struct inode *inode, __u64 fileid)
-{
-	NFS_I(inode)->fileid = fileid;
-}
-
 static inline void nfs_mark_for_revalidate(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -473,7 +458,6 @@ extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context
 extern void nfs_file_clear_open_context(struct file *flip);
 extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
 extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
-extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
 extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr);
 extern unsigned long nfs_inc_attr_generation_counter(void);
@@ -668,15 +652,6 @@ static inline loff_t nfs_size_to_loff_t(__u64 size)
 	return min_t(u64, size, OFFSET_MAX);
 }
 
-static inline ino_t
-nfs_fileid_to_ino_t(u64 fileid)
-{
-	ino_t ino = (ino_t) fileid;
-	if (sizeof(ino_t) < sizeof(u64))
-		ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8;
-	return ino;
-}
-
 static inline void nfs_ooo_clear(struct nfs_inode *nfsi)
 {
 	nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index afe1d8f09d89..4b9a35dbc062 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -33,6 +33,7 @@ enum {
 	PG_TEARDOWN,		/* page group sync for destroy */
 	PG_UNLOCKPAGE,		/* page group sync bit in read path */
 	PG_UPTODATE,		/* page group sync bit in read path */
+	PG_READ_FAILED,		/* page group saw a read error */
 	PG_WB_END,		/* page group sync bit in write path */
 	PG_REMOVE,		/* page group sync bit in write path */
 	PG_CONTENDED1,		/* Is someone waiting for a lock? */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 35ea18a40b66..11c5b31cfc7d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -582,7 +582,6 @@ struct nfs_lock_args {
 	struct nfs_lowner	lock_owner;
 	unsigned char		block : 1;
 	unsigned char		reclaim : 1;
-	unsigned char		new_lock : 1;
 	unsigned char		new_lock_owner : 1;
 };
 
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index a90480f80154..e638b92b7ad1 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -327,7 +327,7 @@ static ssize_t rpc_sysfs_xprt_switch_add_xprt_store(struct kobject *kobj,
 {
 	struct rpc_xprt_switch *xprt_switch =
 		rpc_sysfs_xprt_switch_kobj_get_xprt(kobj);
-	struct xprt_create xprt_create_args;
+	struct xprt_create xprt_create_args = {};
 	struct rpc_xprt *xprt, *new;
 
 	if (!xprt_switch)
@@ -348,7 +348,7 @@ static ssize_t rpc_sysfs_xprt_switch_add_xprt_store(struct kobject *kobj,
 	xprt_create_args.reconnect_timeout = xprt->max_reconnect_timeout;
 
 	new = xprt_create_transport(&xprt_create_args);
-	if (IS_ERR_OR_NULL(new)) {
+	if (IS_ERR(new)) {
 		count = PTR_ERR(new);
 		goto out_put_xprt;
 	}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 2f0f9618dd05..e5b3463da25f 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -159,9 +159,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
 	rpcrdma_rep_put(&r_xprt->rx_buf, rep);
 	req->rl_reply = NULL;
 
-	spin_lock(&xprt->bc_pa_lock);
-	list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
-	spin_unlock(&xprt->bc_pa_lock);
+	rpcrdma_req_put(req);
 	xprt_put(xprt);
 }
 
@@ -203,6 +201,7 @@ create_req:
 	rqst->rq_xprt = xprt;
 	__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
 	xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), size);
+	kref_init(&req->rl_kref);
 	return rqst;
 }
 
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 7f79a0a2601e..e5c71cf705a3 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -474,7 +474,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 		++num_wrs;
 	}
 
-	if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
+	if (req->rl_sendctx->sc_unmap_count || num_wrs > ep->re_send_count) {
 		send_wr->send_flags |= IB_SEND_SIGNALED;
 		ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
 					  num_wrs - ep->re_send_count);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 0e0f21974710..1285f04cdac1 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -467,29 +467,11 @@ static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
 	return 0;
 }
 
-static void rpcrdma_sendctx_done(struct kref *kref)
-{
-	struct rpcrdma_req *req =
-		container_of(kref, struct rpcrdma_req, rl_kref);
-	struct rpcrdma_rep *rep = req->rl_reply;
-
-	rpcrdma_complete_rqst(rep);
-	rep->rr_rxprt->rx_stats.reply_waits_for_send++;
-}
-
-/**
- * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
- * @sc: sendctx containing SGEs to unmap
- *
- */
-void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
+static void rpcrdma_sendctx_dma_unmap(struct rpcrdma_sendctx *sc)
 {
 	struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
 	struct ib_sge *sge;
 
-	if (!sc->sc_unmap_count)
-		return;
-
 	/* The first two SGEs contain the transport header and
 	 * the inline buffer. These are always left mapped so
 	 * they can be cheaply re-used.
@@ -498,8 +480,33 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
 	     ++sge, --sc->sc_unmap_count)
 		ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
 				  DMA_TO_DEVICE);
+}
+
+/**
+ * rpcrdma_sendctx_unmap - DMA-unmap Send buffer and release Send owner
+ * @sc: sendctx containing SGEs to unmap
+ *
+ */
+void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
+{
+	struct rpcrdma_req *req = sc->sc_req;
 
-	kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
+	rpcrdma_sendctx_dma_unmap(sc);
+	sc->sc_req = NULL;
+	req->rl_sendctx = NULL;
+	rpcrdma_req_put(req);
+}
+
+/* No Send was posted. Release DMA mappings prepared for this
+ * sendctx, but leave the request reference count alone.
+ */
+static void rpcrdma_sendctx_cancel(struct rpcrdma_sendctx *sc)
+{
+	struct rpcrdma_req *req = sc->sc_req;
+
+	rpcrdma_sendctx_dma_unmap(sc);
+	sc->sc_req = NULL;
+	req->rl_sendctx = NULL;
 }
 
 /* Prepare an SGE for the RPC-over-RDMA transport header.
@@ -691,8 +698,6 @@ static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
 					      tail->iov_len))
 			return false;
 
-	if (req->rl_sendctx->sc_unmap_count)
-		kref_get(&req->rl_kref);
 	return true;
 }
 
@@ -722,7 +727,6 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
 		len -= len & 3;
 		if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
 			return false;
-		kref_get(&req->rl_kref);
 	}
 
 	return true;
@@ -743,6 +747,7 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
 				     struct xdr_buf *xdr,
 				     enum rpcrdma_chunktype rtype)
 {
+	struct rpcrdma_sendctx *sc;
 	int ret;
 
 	ret = -EAGAIN;
@@ -751,7 +756,6 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
 		goto out_nosc;
 	req->rl_sendctx->sc_unmap_count = 0;
 	req->rl_sendctx->sc_req = req;
-	kref_init(&req->rl_kref);
 	req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
 	req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
 	req->rl_wr.num_sge = 0;
@@ -779,10 +783,16 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
 		goto out_unmap;
 	}
 
+	/* The Send-side owner releases this reference when the
+	 * Send has completed.
+	 */
+	kref_get(&req->rl_kref);
 	return 0;
 
 out_unmap:
-	rpcrdma_sendctx_unmap(req->rl_sendctx);
+	sc = req->rl_sendctx;
+	rpcrdma_sendctx_cancel(sc);
+	rpcrdma_sendctx_unget_locked(r_xprt, sc);
 out_nosc:
 	trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
 	return ret;
@@ -1081,6 +1091,8 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
 
 	/* Peek at stream contents without advancing. */
 	p = xdr_inline_decode(xdr, 0);
+	if ((char *)xdr->end - (char *)p < 5 * XDR_UNIT)
+		return false;
 
 	/* Chunk lists */
 	if (xdr_item_is_present(p++))
@@ -1105,7 +1117,7 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
 	 */
 	p = xdr_inline_decode(xdr, 3 * sizeof(*p));
 	if (unlikely(!p))
-		return true;
+		return false;
 
 	rpcrdma_bc_receive_call(r_xprt, rep);
 	return true;
@@ -1329,6 +1341,11 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
 	struct rpc_rqst *rqst = rep->rr_rqst;
 	int status;
 
+	/* I3: rl_registered has been drained by frwr_unmap before
+	 * complete_rqst runs.
+	 */
+	WARN_ON_ONCE(!list_empty(&rpcr_to_rdmar(rqst)->rl_registered));
+
 	switch (rep->rr_proc) {
 	case rdma_msg:
 		status = rpcrdma_decode_msg(r_xprt, rep, rqst);
@@ -1360,13 +1377,69 @@ out_badheader:
 	goto out;
 }
 
-static void rpcrdma_reply_done(struct kref *kref)
-{
-	struct rpcrdma_req *req =
-		container_of(kref, struct rpcrdma_req, rl_kref);
-
-	rpcrdma_complete_rqst(req->rl_reply);
-}
+/* Reply-side ownership invariants
+ *
+ * I1 (Receive WR ownership).  A struct rpcrdma_rep is owned by the
+ *    HCA between ib_post_recv() and the matching Receive completion.
+ *    After ib_dma_sync_single_for_cpu() in rpcrdma_wc_receive() it is
+ *    owned by the CPU until rpcrdma_rep_put() returns it to
+ *    rb_free_reps; a rep on rb_free_reps is not re-posted until
+ *    rpcrdma_post_recvs() pulls it off.  Asserted: rpcrdma_post_recvs()
+ *    WARNs that a pulled rep has rr_rqst == NULL.
+ *
+ * I2 (rep attachment).  While req->rl_reply == rep, the rep cannot be
+ *    re-posted.  rpcrdma_reply_put() NULLs req->rl_reply before handing
+ *    the rep to rpcrdma_rep_put().  Asserted: rpcrdma_reply_put() WARNs
+ *    that rl_reply is NULL after the put.
+ *
+ * I3 (Registered-MR fence).  On entry to rpcrdma_complete_rqst() every
+ *    MR that was on req->rl_registered has had its rkey invalidated
+ *    (remotely via IB_WC_WITH_INVALIDATE or locally via IB_WR_LOCAL_INV)
+ *    and its pages ib_dma_unmap_sg()'d.  The LocalInv chain is posted
+ *    on a single QP; strong send-queue ordering makes the last
+ *    completion (frwr_wc_localinv_done) observe the
+ *    ib_dma_unmap_sg() that ran from each earlier completion's
+ *    frwr_mr_put() before complete_rqst is called.  The inline
+ *    frwr_reminv() path unmaps its one MR synchronously before
+ *    rpcrdma_reply_handler() reaches complete_rqst.  Asserted:
+ *    rpcrdma_complete_rqst() WARNs that rl_registered is empty.
+ *
+ * I4 (Send-buffer release).  req->rl_kref carries two unconditional
+ *    owners while a Send is outstanding: the RPC-layer reference (set
+ *    at xprt_rdma_alloc_slot / xprt_rdma_bc_rqst_get / rpcrdma_req_release
+ *    pool-entry) and the Send-side reference (kref_get() in
+ *    rpcrdma_prepare_send_sges()).  rpcrdma_req_release() runs only
+ *    after both have dropped, so the req does not return to its free
+ *    pool until rpcrdma_sendctx_unmap() has fired -- the HCA has
+ *    released the send buffer before the req can be reused.  Asserted:
+ *    rpcrdma_req_release() WARNs that rl_sendctx is NULL.
+ *
+ * I5 (req lifecycle).  A req is owned by the RPC layer between slot
+ *    acquisition and the matching xprt_rdma_free_slot() (or, for the
+ *    backchannel, xprt_rdma_bc_free_rqst()).  While owned, rl_kref >= 1.
+ *    The pools (rb_send_bufs, bc_pa_list, backlog wake target) never
+ *    contain a req with outstanding Send-side or Reply-side work.
+ *
+ * Non-hazards.  The following claims have been raised by adversarial
+ * review and are each closed by the invariants above:
+ *
+ *   * "Reply completes the RPC while the HCA still holds the send
+ *     buffer" -- excluded by I4.  The Send-side kref reference is held
+ *     until rpcrdma_sendctx_unmap() runs from Send completion.
+ *
+ *   * "Signal-driven release races the in-flight Send" -- same
+ *     resolution.  xprt_rdma_free() does not touch rl_kref; the
+ *     Send-side reference keeps the req out of its pool until Send
+ *     completion fires.
+ *
+ *   * "Receive completion races rep reuse" -- excluded by I1.  A rep
+ *     is on rb_free_reps only after rpcrdma_rep_put() has been called
+ *     and rpcrdma_post_recvs() owns the next transition back to the HCA.
+ *
+ *   * "Pages still DMA-mapped when call_decode reads them" -- excluded
+ *     by I3.  The matching ib_dma_unmap_sg() for every MR has run on
+ *     the same CPU thread that calls rpcrdma_complete_rqst().
+ */
 
 /**
  * rpcrdma_reply_handler - Process received RPC/RDMA messages
@@ -1402,6 +1475,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	credits = be32_to_cpu(*p++);
 	rep->rr_proc = *p++;
 
+	/* The credit grant from the wire is not trustworthy;
+	 * sanitize it before any code path consumes it.
+	 */
+	if (credits == 0)
+		credits = 1;	/* don't deadlock */
+	else if (credits > r_xprt->rx_ep->re_max_requests)
+		credits = r_xprt->rx_ep->re_max_requests;
+
 	if (rep->rr_vers != rpcrdma_version)
 		goto out_badversion;
 
@@ -1418,10 +1499,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	xprt_pin_rqst(rqst);
 	spin_unlock(&xprt->queue_lock);
 
-	if (credits == 0)
-		credits = 1;	/* don't deadlock */
-	else if (credits > r_xprt->rx_ep->re_max_requests)
-		credits = r_xprt->rx_ep->re_max_requests;
 	if (buf->rb_credits != credits)
 		rpcrdma_update_cwnd(r_xprt, credits);
 
@@ -1439,7 +1516,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 		frwr_unmap_async(r_xprt, req);
 		/* LocalInv completion will complete the RPC */
 	else
-		kref_put(&req->rl_kref, rpcrdma_reply_done);
+		rpcrdma_complete_rqst(rep);
 
 out_post:
 	rpcrdma_post_recvs(r_xprt,
@@ -1454,11 +1531,13 @@ out_norqst:
 
 out_badversion:
 	trace_xprtrdma_reply_vers_err(rep);
-	goto out;
+	rpcrdma_rep_put(buf, rep);
+	credits = buf->rb_credits;
+	goto out_post;
 
 out_shortreply:
 	trace_xprtrdma_reply_short_err(rep);
-
-out:
 	rpcrdma_rep_put(buf, rep);
+	credits = buf->rb_credits;
+	goto out_post;
 }
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 61706df5e485..d4e6746d8ecd 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -279,6 +279,13 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
 	cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
 
 	rpcrdma_xprt_disconnect(r_xprt);
+
+	/* The disconnect's sendctx drain can return bc_prealloc reqs
+	 * to bc_pa_list after xprt_destroy_backchannel() emptied it.
+	 */
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	xprt_rdma_bc_destroy(xprt, 0);
+#endif
 	rpcrdma_buffer_destroy(&r_xprt->rx_buf);
 
 	xprt_rdma_free_addresses(xprt);
@@ -484,7 +491,52 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 		xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
 	}
 	trace_xprtrdma_op_connect(r_xprt, delay);
-	queue_delayed_work(system_long_wq, &r_xprt->rx_connect_worker, delay);
+	queue_delayed_work(system_dfl_long_wq, &r_xprt->rx_connect_worker,
+			   delay);
+}
+
+/* rl_kref has two owners while a Send is outstanding: the rpc_rqst
+ * owner and the sendctx. Replies complete the RPC but do not drop
+ * either reference. The req returns to its free pool only after
+ * xprt_rdma_free_slot() or xprt_rdma_bc_free_rqst() has dropped the
+ * RPC-layer reference and rpcrdma_sendctx_unmap() has dropped the
+ * Send-side reference.
+ */
+static void rpcrdma_req_release(struct kref *kref)
+{
+	struct rpcrdma_req *req =
+		container_of(kref, struct rpcrdma_req, rl_kref);
+	struct rpc_rqst *rqst = &req->rl_slot;
+	struct rpc_xprt *xprt = rqst->rq_xprt;
+	struct rpcrdma_xprt *r_xprt;
+
+	/* I4: both the RPC-layer and Send-side owners have dropped,
+	 * so rpcrdma_sendctx_unmap() has cleared rl_sendctx.
+	 */
+	WARN_ON_ONCE(req->rl_sendctx);
+
+	kref_init(&req->rl_kref);
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	if (bc_prealloc(rqst)) {
+		spin_lock(&xprt->bc_pa_lock);
+		list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+		spin_unlock(&xprt->bc_pa_lock);
+		return;
+	}
+#endif
+
+	if (xprt_wake_up_backlog(xprt, rqst))
+		return;
+
+	r_xprt = rpcx_to_rdmax(xprt);
+	memset(rqst, 0, sizeof(*rqst));
+	rpcrdma_buffer_put(&r_xprt->rx_buf, req);
+}
+
+void rpcrdma_req_put(struct rpcrdma_req *req)
+{
+	kref_put(&req->rl_kref, rpcrdma_req_release);
 }
 
 /**
@@ -505,6 +557,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 	req = rpcrdma_buffer_get(&r_xprt->rx_buf);
 	if (!req)
 		goto out_sleep;
+	kref_init(&req->rl_kref);
 	task->tk_rqstp = &req->rl_slot;
 	task->tk_status = 0;
 	return;
@@ -520,6 +573,7 @@ out_sleep:
 	if (req) {
 		struct rpc_rqst *rqst = &req->rl_slot;
 
+		kref_init(&req->rl_kref);
 		if (!xprt_wake_up_backlog(xprt, rqst)) {
 			memset(rqst, 0, sizeof(*rqst));
 			rpcrdma_buffer_put(&r_xprt->rx_buf, req);
@@ -540,10 +594,7 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
 		container_of(xprt, struct rpcrdma_xprt, rx_xprt);
 
 	rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
-	if (!xprt_wake_up_backlog(xprt, rqst)) {
-		memset(rqst, 0, sizeof(*rqst));
-		rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
-	}
+	rpcrdma_req_put(rpcr_to_rdmar(rqst));
 }
 
 static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
@@ -607,10 +658,10 @@ xprt_rdma_free(struct rpc_task *task)
 		frwr_unmap_sync(rpcx_to_rdmax(rqst->rq_xprt), req);
 	}
 
-	/* XXX: If the RPC is completing because of a signal and
-	 * not because a reply was received, we ought to ensure
-	 * that the Send completion has fired, so that memory
-	 * involved with the Send is not still visible to the NIC.
+	/* The Send-side rl_kref owner keeps req out of its free pool
+	 * until rpcrdma_sendctx_unmap() has fired -- see I4 above
+	 * rpcrdma_reply_handler() -- so signal-driven release here
+	 * does not let the HCA touch a recycled send buffer.
 	 */
 }
 
@@ -716,7 +767,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 		   r_xprt->rx_stats.mrs_allocated,
 		   r_xprt->rx_stats.local_inv_needed,
 		   r_xprt->rx_stats.empty_sendctx_q,
-		   r_xprt->rx_stats.reply_waits_for_send);
+		   0LU); /* was reply_waits_for_send; column preserved */
 }
 
 static int
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index aecf9c0a153f..04b286223b24 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -65,6 +65,8 @@
 
 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt);
+static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
+					  unsigned long item);
 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
 				       struct rpcrdma_sendctx *sc);
 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt);
@@ -79,6 +81,8 @@ rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
 			  int node);
 static struct rpcrdma_regbuf *
 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction);
+static bool rpcrdma_regbuf_realloc_node(struct rpcrdma_regbuf *rb,
+					size_t size, gfp_t flags, int node);
 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
 
@@ -243,8 +247,17 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		complete(&ep->re_done);
 		return 0;
 	case RDMA_CM_EVENT_ADDR_CHANGE:
-		ep->re_connect_status = -ENODEV;
-		goto disconnected;
+		switch (xchg(&ep->re_connect_status, -ENODEV)) {
+		case 0:
+			goto wake_connect_worker;
+		case 1:
+			/* The later DISCONNECTED event balances the
+			 * ESTABLISHED get; do not put here.
+			 */
+			rpcrdma_force_disconnect(ep);
+			return 0;
+		}
+		return 0;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		rpcrdma_ep_get(ep);
 		ep->re_connect_status = 1;
@@ -267,7 +280,6 @@ wake_connect_worker:
 		return 0;
 	case RDMA_CM_EVENT_DISCONNECTED:
 		ep->re_connect_status = -ECONNABORTED;
-disconnected:
 		rpcrdma_force_disconnect(ep);
 		return rpcrdma_ep_put(ep);
 	default:
@@ -324,6 +336,7 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
 	if (rc)
 		goto out;
 
+	ep->re_id = id;
 	rc = rpcrdma_rn_register(id->device, &ep->re_rn, rpcrdma_ep_removal_done);
 	if (rc)
 		goto out;
@@ -396,7 +409,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
 	}
 	__module_get(THIS_MODULE);
 	device = id->device;
-	ep->re_id = id;
 	reinit_completion(&ep->re_done);
 
 	ep->re_max_requests = r_xprt->rx_xprt.max_reqs;
@@ -539,7 +551,17 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
 		goto out;
 	}
 	rpcrdma_mrs_create(r_xprt);
-	frwr_wp_create(r_xprt);
+
+	/*
+	 * rpcrdma_encode_write_list() dereferences the write-pad
+	 * MR with no NULL check, so fail the connect rather than
+	 * publish a transport whose write-pad MR is NULL.
+	 */
+	rc = frwr_wp_create(r_xprt);
+	if (rc) {
+		rc = -ENOTCONN;
+		goto out;
+	}
 
 out:
 	trace_xprtrdma_connect(r_xprt, rc);
@@ -571,9 +593,9 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt)
 
 	rpcrdma_xprt_drain(r_xprt);
 	rpcrdma_reps_unmap(r_xprt);
+	rpcrdma_sendctxs_destroy(r_xprt);
 	rpcrdma_reqs_reset(r_xprt);
 	rpcrdma_mrs_destroy(r_xprt);
-	rpcrdma_sendctxs_destroy(r_xprt);
 
 	if (rpcrdma_ep_put(ep))
 		rdma_destroy_id(id);
@@ -605,6 +627,25 @@ static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt)
 
 	if (!buf->rb_sc_ctxs)
 		return;
+
+	/* The QP is drained, but the final unsignaled Sends might not
+	 * have been walked by a signaled Send completion. Release those
+	 * Send owners before request buffers are reset.
+	 *
+	 * Unlike the completion sweep, this walk can visit slots with
+	 * no Send posted: after a partial rpcrdma_sendctxs_create()
+	 * failure on reconnect, rb_sc_head and rb_sc_tail are stale,
+	 * and slots between them can be NULL or have sc_req clear.
+	 */
+	for (i = rpcrdma_sendctx_next(buf, buf->rb_sc_tail);
+	     i != rpcrdma_sendctx_next(buf, buf->rb_sc_head);
+	     i = rpcrdma_sendctx_next(buf, i)) {
+		struct rpcrdma_sendctx *sc = buf->rb_sc_ctxs[i];
+
+		if (sc && sc->sc_req)
+			rpcrdma_sendctx_unmap(sc);
+	}
+
 	for (i = 0; i <= buf->rb_sc_last; i++)
 		kfree(buf->rb_sc_ctxs[i]);
 	kfree(buf->rb_sc_ctxs);
@@ -667,6 +708,12 @@ static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
 	return likely(item < buf->rb_sc_last) ? item + 1 : 0;
 }
 
+static unsigned long rpcrdma_sendctx_prev(struct rpcrdma_buffer *buf,
+					  unsigned long item)
+{
+	return item > 0 ? item - 1 : buf->rb_sc_last;
+}
+
 /**
  * rpcrdma_sendctx_get_locked - Acquire a send context
  * @r_xprt: controlling transport instance
@@ -724,6 +771,29 @@ out_emptyq:
 }
 
 /**
+ * rpcrdma_sendctx_unget_locked - Release an unposted send context
+ * @r_xprt: controlling transport instance
+ * @sc: send context to release
+ *
+ * Usage: Called when no Send is posted for the sendctx most
+ * recently returned by rpcrdma_sendctx_get_locked().
+ *
+ * The caller serializes calls to this function and to
+ * rpcrdma_sendctx_get_locked() (per transport).
+ */
+void rpcrdma_sendctx_unget_locked(struct rpcrdma_xprt *r_xprt,
+				  struct rpcrdma_sendctx *sc)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+
+	if (WARN_ON_ONCE(buf->rb_sc_ctxs[buf->rb_sc_head] != sc))
+		return;
+
+	buf->rb_sc_head = rpcrdma_sendctx_prev(buf, buf->rb_sc_head);
+	xprt_write_space(&r_xprt->rx_xprt);
+}
+
+/**
  * rpcrdma_sendctx_put_locked - Release a send context
  * @r_xprt: controlling transport instance
  * @sc: send context to release
@@ -739,15 +809,18 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	unsigned long next_tail;
 
-	/* Unmap SGEs of previously completed but unsignaled
-	 * Sends by walking up the queue until @sc is found.
+	/* Release previously completed but unsignaled Sends by walking
+	 * up the queue until @sc is found.
 	 */
 	next_tail = buf->rb_sc_tail;
 	do {
+		struct rpcrdma_sendctx *cur;
+
 		next_tail = rpcrdma_sendctx_next(buf, next_tail);
 
 		/* ORDER: item must be accessed _before_ tail is updated */
-		rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]);
+		cur = buf->rb_sc_ctxs[next_tail];
+		rpcrdma_sendctx_unmap(cur);
 
 	} while (buf->rb_sc_ctxs[next_tail] != sc);
 
@@ -1022,9 +1095,15 @@ static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
  * @buf: buffer pool
  * @rep: rep to release
  *
+ * The rep's transient association with an rpc_rqst, established
+ * by rpcrdma_reply_handler() and torn down here, must not survive
+ * onto rb_free_reps: rpcrdma_post_recvs() pulls reps from the free
+ * list to re-post them, and a non-NULL rr_rqst on a free-listed rep
+ * would imply the rep is still referenced by a req.
  */
 void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep)
 {
+	rep->rr_rqst = NULL;
 	llist_add(&rep->rr_node, &buf->rb_free_reps);
 }
 
@@ -1059,6 +1138,22 @@ static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
 	spin_unlock(&buf->rb_lock);
 }
 
+static unsigned int rpcrdma_req_pool_slack(unsigned int max_reqs)
+{
+	/* The sendctx ring can hold up to one Send-signaling batch
+	 * (re_send_batch, set by frwr_open() to re_max_requests >> 3)
+	 * of unfinished Sends. Each pins its req until a signaled Send
+	 * completion releases the sendctx. Size the pool above max_reqs
+	 * by that batch so the recycle delay does not stall a slot
+	 * allocation that the RPC/RDMA credit window would admit.
+	 *
+	 * Round up: re_max_requests >> 3 is zero when max_reqs < 8, but
+	 * a single unsignaled Send is still enough to pin one req. One
+	 * slack slot covers that case.
+	 */
+	return DIV_ROUND_UP(max_reqs, 8);
+}
+
 /**
  * rpcrdma_buffer_create - Create initial set of req/rep objects
  * @r_xprt: transport instance to (re)initialize
@@ -1068,6 +1163,7 @@ static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
 int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	unsigned int max_reqs;
 	int i, rc;
 
 	buf->rb_bc_srv_max_requests = 0;
@@ -1076,19 +1172,21 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 	INIT_LIST_HEAD(&buf->rb_all_mrs);
 	INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker);
 
-	INIT_LIST_HEAD(&buf->rb_send_bufs);
+	init_llist_head(&buf->rb_send_bufs);
 	INIT_LIST_HEAD(&buf->rb_allreqs);
 	INIT_LIST_HEAD(&buf->rb_all_reps);
 
 	rc = -ENOMEM;
-	for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) {
+	max_reqs = r_xprt->rx_xprt.max_reqs;
+	max_reqs += rpcrdma_req_pool_slack(max_reqs);
+	for (i = 0; i < max_reqs; i++) {
 		struct rpcrdma_req *req;
 
 		req = rpcrdma_req_create(r_xprt,
 					 RPCRDMA_V1_DEF_INLINE_SIZE * 2);
 		if (!req)
 			goto out;
-		list_add(&req->rl_list, &buf->rb_send_bufs);
+		llist_add(&req->rl_node, &buf->rb_send_bufs);
 	}
 
 	init_llist_head(&buf->rb_free_reps);
@@ -1168,16 +1266,14 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt)
 void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
-	rpcrdma_reps_destroy(buf);
+	struct rpcrdma_req *req, *next;
+	struct llist_node *node;
 
-	while (!list_empty(&buf->rb_send_bufs)) {
-		struct rpcrdma_req *req;
+	rpcrdma_reps_destroy(buf);
 
-		req = list_first_entry(&buf->rb_send_bufs,
-				       struct rpcrdma_req, rl_list);
-		list_del(&req->rl_list);
+	node = llist_del_all(&buf->rb_send_bufs);
+	llist_for_each_entry_safe(req, next, node, rl_node)
 		rpcrdma_req_destroy(req);
-	}
 }
 
 /**
@@ -1207,9 +1303,11 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
  */
 void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
 {
-	if (req->rl_reply) {
-		rpcrdma_rep_put(buffers, req->rl_reply);
+	struct rpcrdma_rep *rep = req->rl_reply;
+
+	if (rep) {
 		req->rl_reply = NULL;
+		rpcrdma_rep_put(buffers, rep);
 	}
 }
 
@@ -1222,15 +1320,15 @@ void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
 struct rpcrdma_req *
 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 {
-	struct rpcrdma_req *req;
+	struct llist_node *node;
 
+	/* Calls to llist_del_first are required to be serialized */
 	spin_lock(&buffers->rb_lock);
-	req = list_first_entry_or_null(&buffers->rb_send_bufs,
-				       struct rpcrdma_req, rl_list);
-	if (req)
-		list_del_init(&req->rl_list);
+	node = llist_del_first(&buffers->rb_send_bufs);
 	spin_unlock(&buffers->rb_lock);
-	return req;
+	if (!node)
+		return NULL;
+	return llist_entry(node, struct rpcrdma_req, rl_node);
 }
 
 /**
@@ -1243,9 +1341,7 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
 {
 	rpcrdma_reply_put(buffers, req);
 
-	spin_lock(&buffers->rb_lock);
-	list_add(&req->rl_list, &buffers->rb_send_bufs);
-	spin_unlock(&buffers->rb_lock);
+	llist_add(&req->rl_node, &buffers->rb_send_bufs);
 }
 
 /* Returns a pointer to a rpcrdma_regbuf object, or NULL.
@@ -1292,9 +1388,15 @@ rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
  */
 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags)
 {
+	return rpcrdma_regbuf_realloc_node(rb, size, flags, NUMA_NO_NODE);
+}
+
+static bool rpcrdma_regbuf_realloc_node(struct rpcrdma_regbuf *rb,
+					size_t size, gfp_t flags, int node)
+{
 	void *buf;
 
-	buf = kmalloc(size, flags);
+	buf = kmalloc_node(size, flags, node);
 	if (!buf)
 		return false;
 
@@ -1306,6 +1408,23 @@ bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags)
 	return true;
 }
 
+static bool rpcrdma_rep_resize(struct rpcrdma_xprt *r_xprt,
+			       struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_regbuf *rb = rep->rr_rdmabuf;
+	struct rpcrdma_ep *ep = r_xprt->rx_ep;
+	size_t size = ep->re_inline_recv;
+
+	if (likely(rdmab_length(rb) >= size))
+		return true;
+	if (!rpcrdma_regbuf_realloc_node(rb, size, XPRTRDMA_GFP_FLAGS,
+					 ibdev_to_node(ep->re_id->device)))
+		return false;
+
+	xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rb), rdmab_length(rb));
+	return true;
+}
+
 /**
  * __rpcrdma_regbuf_dma_map - DMA-map a regbuf
  * @r_xprt: controlling transport instance
@@ -1387,6 +1506,12 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed)
 			rep = rpcrdma_rep_create(r_xprt);
 		if (!rep)
 			break;
+		/* I1: a rep on rb_free_reps must carry no rqst pointer. */
+		WARN_ON_ONCE(rep->rr_rqst);
+		if (!rpcrdma_rep_resize(r_xprt, rep)) {
+			rpcrdma_rep_put(buf, rep);
+			break;
+		}
 		if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
 			rpcrdma_rep_put(buf, rep);
 			break;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index f53a77472724..4cbc941e4a3e 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -332,7 +332,7 @@ enum {
 
 struct rpcrdma_buffer;
 struct rpcrdma_req {
-	struct list_head	rl_list;
+	struct llist_node	rl_node;
 	struct rpc_rqst		rl_slot;
 	struct rpcrdma_rep	*rl_reply;
 	struct xdr_stream	rl_stream;
@@ -374,14 +374,14 @@ rpcrdma_mr_pop(struct list_head *list)
 }
 
 /*
- * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
- * inline requests/replies, and client/server credits.
+ * struct rpcrdma_buffer -- holds pre-registered memory for inline
+ * requests/replies, and client/server credits.
  *
  * One of these is associated with a transport instance
  */
 struct rpcrdma_buffer {
 	spinlock_t		rb_lock;
-	struct list_head	rb_send_bufs;
+	struct llist_head	rb_send_bufs;
 	struct list_head	rb_mrs;
 
 	unsigned long		rb_sc_head;
@@ -427,7 +427,6 @@ struct rpcrdma_stats {
 	/* accessed when receiving a reply */
 	unsigned long long	total_rdma_reply;
 	unsigned long long	fixup_copy_count;
-	unsigned long		reply_waits_for_send;
 	unsigned long		local_inv_needed;
 	unsigned long		nomsg_call_count;
 	unsigned long		bcall_count;
@@ -496,6 +495,8 @@ void rpcrdma_req_destroy(struct rpcrdma_req *req);
 int rpcrdma_buffer_create(struct rpcrdma_xprt *);
 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
 struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
+void rpcrdma_sendctx_unget_locked(struct rpcrdma_xprt *r_xprt,
+				  struct rpcrdma_sendctx *sc);
 
 struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
 void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
@@ -505,6 +506,7 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
 			struct rpcrdma_req *req);
 void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
 void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req);
+void rpcrdma_req_put(struct rpcrdma_req *req);
 
 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
 			    gfp_t flags);
author	Linus Torvalds <torvalds@linux-foundation.org>	2026-06-23 18:36:41 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2026-06-23 18:36:41 -0700
commit	840ef6c78e6a2f694b578ecb9063241c992aaa9e (patch)
tree	d5915e31458e709297d3487482288cde25dcca70
parent	09ca8dc7d634f69d0b43f82c244add44cf7885b4 (diff)
parent	284ea3fb4f6715201e1d9ef3474c25e817ad70e9 (diff)
download	lwn-840ef6c78e6a2f694b578ecb9063241c992aaa9e.tar.gz lwn-840ef6c78e6a2f694b578ecb9063241c992aaa9e.zip