From 2289e87b5951f97783f07fc895e6c5e804b53668 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Sep 2020 17:22:49 -0400 Subject: SUNRPC: Make trace_svc_process() display the RPC procedure symbolically The next few patches will employ these strings to help make server- side trace logs more human-readable. A similar technique is already in use in kernel RPC client code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 34c2a69820e9..31ee3b6047c3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -463,6 +463,7 @@ struct svc_procedure { unsigned int pc_ressize; /* result struct size */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ + const char * pc_name; /* for display */ }; /* -- cgit v1.2.3 From 81d217474326b25d7f14274b02fe3da1e85ad934 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 27 Nov 2020 17:37:02 -0500 Subject: SUNRPC: Move definition of XDR_UNIT Clean up: The unit of XDR alignment is defined by RFC 4506, not as part of the RPC message header. Thus it belongs in include/linux/sunrpc/xdr.h. Signed-off-by: Chuck Lever --- include/linux/sunrpc/msg_prot.h | 3 --- include/linux/sunrpc/xdr.h | 13 ++++++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 43f854487539..938c2bf29db8 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -10,9 +10,6 @@ #define RPC_VERSION 2 -/* size of an XDR encoding unit in bytes, i.e. 32bit */ -#define XDR_UNIT (4) - /* spec defines authentication flavor as an unsigned 32 bit integer */ typedef u32 rpc_authflavor_t; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 19b6dea27367..dbba537caab6 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -19,6 +19,13 @@ struct bio_vec; struct rpc_rqst; +/* + * Size of an XDR encoding unit in bytes, i.e. 32 bits, + * as defined in Section 3 of RFC 4506. All encoded + * XDR data items are aligned on a boundary of 32 bits. + */ +#define XDR_UNIT sizeof(__be32) + /* * Buffer adjustment */ @@ -330,7 +337,7 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, static inline size_t xdr_align_size(size_t n) { - const size_t mask = sizeof(__u32) - 1; + const size_t mask = XDR_UNIT - 1; return (n + mask) & ~mask; } @@ -360,7 +367,7 @@ static inline size_t xdr_pad_size(size_t n) */ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) { - const size_t len = sizeof(__be32); + const size_t len = XDR_UNIT; __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) @@ -379,7 +386,7 @@ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) */ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) { - const size_t len = sizeof(__be32); + const size_t len = XDR_UNIT; __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) -- cgit v1.2.3 From 6bb844b4eb6e3b109a2fdaffb60e6da722dc4356 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Nov 2020 10:38:46 -0500 Subject: NFSD: Add an xdr_stream-based decoder for NFSv2/3 ACLs Signed-off-by: Chuck Lever --- fs/nfs_common/nfsacl.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfsacl.h | 3 +++ 2 files changed, 55 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index d056ad2fdefd..79c563c1a5e8 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -295,3 +295,55 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, nfsacl_desc.desc.array_len; } EXPORT_SYMBOL_GPL(nfsacl_decode); + +/** + * nfs_stream_decode_acl - Decode an NFSv3 ACL + * + * @xdr: an xdr_stream positioned at an encoded ACL + * @aclcnt: OUT: count of ACEs in decoded posix_acl + * @pacl: OUT: a dynamically-allocated buffer containing the decoded posix_acl + * + * Return values: + * %false: The encoded ACL is not valid + * %true: @pacl contains a decoded ACL, and @xdr is advanced + * + * On a successful return, caller must release *pacl using posix_acl_release(). + */ +bool nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt, + struct posix_acl **pacl) +{ + const size_t elem_size = XDR_UNIT * 3; + struct nfsacl_decode_desc nfsacl_desc = { + .desc = { + .elem_size = elem_size, + .xcode = pacl ? xdr_nfsace_decode : NULL, + }, + }; + unsigned int base; + u32 entries; + + if (xdr_stream_decode_u32(xdr, &entries) < 0) + return false; + if (entries > NFS_ACL_MAX_ENTRIES) + return false; + + base = xdr_stream_pos(xdr); + if (!xdr_inline_decode(xdr, XDR_UNIT + elem_size * entries)) + return false; + nfsacl_desc.desc.array_maxlen = entries; + if (xdr_decode_array2(xdr->buf, base, &nfsacl_desc.desc)) + return false; + + if (pacl) { + if (entries != nfsacl_desc.desc.array_len || + posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) { + posix_acl_release(nfsacl_desc.acl); + return false; + } + *pacl = nfsacl_desc.acl; + } + if (aclcnt) + *aclcnt = entries; + return true; +} +EXPORT_SYMBOL_GPL(nfs_stream_decode_acl); diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h index 103d44695323..0ba99c513649 100644 --- a/include/linux/nfsacl.h +++ b/include/linux/nfsacl.h @@ -38,5 +38,8 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, extern int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, struct posix_acl **pacl); +extern bool +nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt, + struct posix_acl **pacl); #endif /* __LINUX_NFSACL_H */ -- cgit v1.2.3 From df971cd853c05778ae1175e8aeb80a04bb9d4be5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Dec 2020 15:47:44 -0500 Subject: svcrdma: Convert rdma_stat_recv to a per-CPU counter Receives are frequent events. Avoid the overhead of a memory bus lock cycle for counting a value that is hardly every used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 +- net/sunrpc/xprtrdma/svc_rdma.c | 55 ++++++++++++++++++++++++++++++--- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 3 +- 3 files changed, 54 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 294b56e61522..ff32c59a27e7 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -49,6 +49,7 @@ #include #include +#include #include #include @@ -65,7 +66,7 @@ extern unsigned int svcrdma_max_requests; extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; -extern atomic_t rdma_stat_recv; +extern struct percpu_counter svcrdma_stat_recv; extern atomic_t rdma_stat_read; extern atomic_t rdma_stat_write; extern atomic_t rdma_stat_sq_starve; diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 1fc1d5cbeb9b..3e5e622bad81 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -63,7 +63,7 @@ unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH; static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH; static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH; -atomic_t rdma_stat_recv; +struct percpu_counter svcrdma_stat_recv; atomic_t rdma_stat_read; atomic_t rdma_stat_write; atomic_t rdma_stat_sq_starve; @@ -110,6 +110,42 @@ static int read_reset_stat(struct ctl_table *table, int write, return 0; } +enum { + SVCRDMA_COUNTER_BUFSIZ = sizeof(unsigned long long), +}; + +static int svcrdma_counter_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct percpu_counter *stat = (struct percpu_counter *)table->data; + char tmp[SVCRDMA_COUNTER_BUFSIZ + 1]; + int len; + + if (write) { + percpu_counter_set(stat, 0); + return 0; + } + + len = snprintf(tmp, SVCRDMA_COUNTER_BUFSIZ, "%lld\n", + percpu_counter_sum_positive(stat)); + if (len >= SVCRDMA_COUNTER_BUFSIZ) + return -EFAULT; + len = strlen(tmp); + if (*ppos > len) { + *lenp = 0; + return 0; + } + len -= *ppos; + if (len > *lenp) + len = *lenp; + if (len) + memcpy(buffer, tmp, len); + *lenp = len; + *ppos += len; + + return 0; +} + static struct ctl_table_header *svcrdma_table_header; static struct ctl_table svcrdma_parm_table[] = { { @@ -149,10 +185,10 @@ static struct ctl_table svcrdma_parm_table[] = { }, { .procname = "rdma_stat_recv", - .data = &rdma_stat_recv, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_recv, + .maxlen = SVCRDMA_COUNTER_BUFSIZ, .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = svcrdma_counter_handler, }, { .procname = "rdma_stat_write", @@ -230,15 +266,26 @@ static void svc_rdma_proc_cleanup(void) return; unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; + + percpu_counter_destroy(&svcrdma_stat_recv); } static int svc_rdma_proc_init(void) { + int rc; + if (svcrdma_table_header) return 0; + rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL); + if (rc) + goto out_err; + svcrdma_table_header = register_sysctl_table(svcrdma_root_table); return 0; + +out_err: + return rc; } void svc_rdma_cleanup(void) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cbdb71247755..7d14a74df716 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -845,8 +845,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) } list_del(&ctxt->rc_list); spin_unlock(&rdma_xprt->sc_rq_dto_lock); - - atomic_inc(&rdma_stat_recv); + percpu_counter_inc(&svcrdma_stat_recv); svc_rdma_build_arg_xdr(rqstp, ctxt); -- cgit v1.2.3 From 22df5a22462e836ccb30634c3a52602091179a73 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Dec 2020 15:55:17 -0500 Subject: svcrdma: Convert rdma_stat_sq_starve to a per-CPU counter Avoid the overhead of a memory bus lock cycle for counting a value that is hardly every used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 +- net/sunrpc/xprtrdma/svc_rdma.c | 13 +++++++++---- net/sunrpc/xprtrdma/svc_rdma_rw.c | 1 + net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index ff32c59a27e7..c06b16ccf83e 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -69,7 +69,7 @@ extern unsigned int svcrdma_max_req_size; extern struct percpu_counter svcrdma_stat_recv; extern atomic_t rdma_stat_read; extern atomic_t rdma_stat_write; -extern atomic_t rdma_stat_sq_starve; +extern struct percpu_counter svcrdma_stat_sq_starve; extern atomic_t rdma_stat_rq_starve; extern atomic_t rdma_stat_rq_poll; extern atomic_t rdma_stat_rq_prod; diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 3e5e622bad81..ee768d4c3c90 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -66,7 +66,7 @@ static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH; struct percpu_counter svcrdma_stat_recv; atomic_t rdma_stat_read; atomic_t rdma_stat_write; -atomic_t rdma_stat_sq_starve; +struct percpu_counter svcrdma_stat_sq_starve; atomic_t rdma_stat_rq_starve; atomic_t rdma_stat_rq_poll; atomic_t rdma_stat_rq_prod; @@ -199,10 +199,10 @@ static struct ctl_table svcrdma_parm_table[] = { }, { .procname = "rdma_stat_sq_starve", - .data = &rdma_stat_sq_starve, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_sq_starve, + .maxlen = SVCRDMA_COUNTER_BUFSIZ, .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = svcrdma_counter_handler, }, { .procname = "rdma_stat_rq_starve", @@ -267,6 +267,7 @@ static void svc_rdma_proc_cleanup(void) unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; + percpu_counter_destroy(&svcrdma_stat_sq_starve); percpu_counter_destroy(&svcrdma_stat_recv); } @@ -278,6 +279,9 @@ static int svc_rdma_proc_init(void) return 0; rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL); + if (rc) + goto out_err; + rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL); if (rc) goto out_err; @@ -285,6 +289,7 @@ static int svc_rdma_proc_init(void) return 0; out_err: + percpu_counter_destroy(&svcrdma_stat_recv); return rc; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 0b63e1321d74..d7d98b2df00b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -364,6 +364,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) return 0; } + percpu_counter_inc(&svcrdma_stat_sq_starve); trace_svcrdma_sq_full(rdma); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wait_event(rdma->sc_send_wait, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 68af79d4f04f..52c759a8543e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -317,7 +317,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) /* If the SQ is full, wait until an SQ entry is available */ while (1) { if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { - atomic_inc(&rdma_stat_sq_starve); + percpu_counter_inc(&svcrdma_stat_sq_starve); trace_svcrdma_sq_full(rdma); atomic_inc(&rdma->sc_sq_avail); wait_event(rdma->sc_send_wait, -- cgit v1.2.3 From 1e7e55731628c90d8c701c45f9c3a3b8718840d6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Dec 2020 16:09:36 -0500 Subject: svcrdma: Restore read and write stats Now that we have an efficient mechanism to update these two stats, let's start maintaining them again. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 ++-- net/sunrpc/xprtrdma/svc_rdma.c | 26 ++++++++++++++++++-------- net/sunrpc/xprtrdma/svc_rdma_rw.c | 2 ++ 3 files changed, 22 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c06b16ccf83e..c882816cba8e 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -66,10 +66,10 @@ extern unsigned int svcrdma_max_requests; extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; +extern struct percpu_counter svcrdma_stat_read; extern struct percpu_counter svcrdma_stat_recv; -extern atomic_t rdma_stat_read; -extern atomic_t rdma_stat_write; extern struct percpu_counter svcrdma_stat_sq_starve; +extern struct percpu_counter svcrdma_stat_write; extern atomic_t rdma_stat_rq_starve; extern atomic_t rdma_stat_rq_poll; extern atomic_t rdma_stat_rq_prod; diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index ee768d4c3c90..c8336df7a142 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -63,10 +63,10 @@ unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH; static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH; static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH; +struct percpu_counter svcrdma_stat_read; struct percpu_counter svcrdma_stat_recv; -atomic_t rdma_stat_read; -atomic_t rdma_stat_write; struct percpu_counter svcrdma_stat_sq_starve; +struct percpu_counter svcrdma_stat_write; atomic_t rdma_stat_rq_starve; atomic_t rdma_stat_rq_poll; atomic_t rdma_stat_rq_prod; @@ -178,10 +178,10 @@ static struct ctl_table svcrdma_parm_table[] = { { .procname = "rdma_stat_read", - .data = &rdma_stat_read, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_read, + .maxlen = SVCRDMA_COUNTER_BUFSIZ, .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = svcrdma_counter_handler, }, { .procname = "rdma_stat_recv", @@ -192,10 +192,10 @@ static struct ctl_table svcrdma_parm_table[] = { }, { .procname = "rdma_stat_write", - .data = &rdma_stat_write, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_write, + .maxlen = SVCRDMA_COUNTER_BUFSIZ, .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = svcrdma_counter_handler, }, { .procname = "rdma_stat_sq_starve", @@ -267,8 +267,10 @@ static void svc_rdma_proc_cleanup(void) unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; + percpu_counter_destroy(&svcrdma_stat_write); percpu_counter_destroy(&svcrdma_stat_sq_starve); percpu_counter_destroy(&svcrdma_stat_recv); + percpu_counter_destroy(&svcrdma_stat_read); } static int svc_rdma_proc_init(void) @@ -278,10 +280,16 @@ static int svc_rdma_proc_init(void) if (svcrdma_table_header) return 0; + rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL); + if (rc) + goto out_err; rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL); if (rc) goto out_err; rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL); + if (rc) + goto out_err; + rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL); if (rc) goto out_err; @@ -289,7 +297,9 @@ static int svc_rdma_proc_init(void) return 0; out_err: + percpu_counter_destroy(&svcrdma_stat_sq_starve); percpu_counter_destroy(&svcrdma_stat_recv); + percpu_counter_destroy(&svcrdma_stat_read); return rc; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index d7d98b2df00b..693d139a8633 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -469,6 +469,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, DMA_TO_DEVICE); if (ret < 0) return -EIO; + percpu_counter_inc(&svcrdma_stat_write); list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; @@ -719,6 +720,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, segment->rs_handle, DMA_FROM_DEVICE); if (ret < 0) return -EIO; + percpu_counter_inc(&svcrdma_stat_read); list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; -- cgit v1.2.3 From c6226ff9a62a17182b8092883ca201df5cd47f59 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Dec 2020 14:53:09 -0500 Subject: svcrdma: Deprecate stat variables that are no longer used Clean up. We are not permitted to remove old proc files. Instead, convert these variables to stubs that are only ever allowed to display a value of zero. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 5 --- net/sunrpc/xprtrdma/svc_rdma.c | 84 +++++++++++++---------------------------- 2 files changed, 27 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c882816cba8e..1e76ed688044 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -70,11 +70,6 @@ extern struct percpu_counter svcrdma_stat_read; extern struct percpu_counter svcrdma_stat_recv; extern struct percpu_counter svcrdma_stat_sq_starve; extern struct percpu_counter svcrdma_stat_write; -extern atomic_t rdma_stat_rq_starve; -extern atomic_t rdma_stat_rq_poll; -extern atomic_t rdma_stat_rq_prod; -extern atomic_t rdma_stat_sq_poll; -extern atomic_t rdma_stat_sq_prod; struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index c8336df7a142..5bc20e9d09cd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -62,53 +62,13 @@ static unsigned int max_max_requests = 16384; unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH; static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH; static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH; +static unsigned int svcrdma_stat_unused; +static unsigned int zero; struct percpu_counter svcrdma_stat_read; struct percpu_counter svcrdma_stat_recv; struct percpu_counter svcrdma_stat_sq_starve; struct percpu_counter svcrdma_stat_write; -atomic_t rdma_stat_rq_starve; -atomic_t rdma_stat_rq_poll; -atomic_t rdma_stat_rq_prod; -atomic_t rdma_stat_sq_poll; -atomic_t rdma_stat_sq_prod; - -/* - * This function implements reading and resetting an atomic_t stat - * variable through read/write to a proc file. Any write to the file - * resets the associated statistic to zero. Any read returns it's - * current value. - */ -static int read_reset_stat(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - atomic_t *stat = (atomic_t *)table->data; - - if (!stat) - return -EINVAL; - - if (write) - atomic_set(stat, 0); - else { - char str_buf[32]; - int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat)); - if (len >= 32) - return -EFAULT; - len = strlen(str_buf); - if (*ppos > len) { - *lenp = 0; - return 0; - } - len -= *ppos; - if (len > *lenp) - len = *lenp; - if (len) - memcpy(buffer, str_buf, len); - *lenp = len; - *ppos += len; - } - return 0; -} enum { SVCRDMA_COUNTER_BUFSIZ = sizeof(unsigned long long), @@ -206,38 +166,48 @@ static struct ctl_table svcrdma_parm_table[] = { }, { .procname = "rdma_stat_rq_starve", - .data = &rdma_stat_rq_starve, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { .procname = "rdma_stat_rq_poll", - .data = &rdma_stat_rq_poll, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { .procname = "rdma_stat_rq_prod", - .data = &rdma_stat_rq_prod, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { .procname = "rdma_stat_sq_poll", - .data = &rdma_stat_sq_poll, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { .procname = "rdma_stat_sq_prod", - .data = &rdma_stat_sq_prod, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { }, }; -- cgit v1.2.3 From 43042b90cae11cc2d9827c91df6d6b5fe498d5ce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 8 Dec 2020 13:14:15 -0500 Subject: svcrdma: Reduce Receive doorbell rate This is similar to commit e340c2d6ef2a ("xprtrdma: Reduce the doorbell rate (Receive)") which added Receive batching to the client. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 82 +++++++++++++++++---------------- 2 files changed, 44 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 1e76ed688044..7c693b31965e 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -104,6 +104,7 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; + u32 sc_pending_recvs; struct list_head sc_read_complete_q; struct work_struct sc_work; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 7d14a74df716..ab0b7e9777bc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -266,33 +266,46 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp) svc_rdma_recv_ctxt_put(rdma, ctxt); } -static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, - struct svc_rdma_recv_ctxt *ctxt) +static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, + unsigned int wanted, bool temp) { + const struct ib_recv_wr *bad_wr = NULL; + struct svc_rdma_recv_ctxt *ctxt; + struct ib_recv_wr *recv_chain; int ret; - trace_svcrdma_post_recv(ctxt); - ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); + recv_chain = NULL; + while (wanted--) { + ctxt = svc_rdma_recv_ctxt_get(rdma); + if (!ctxt) + break; + + trace_svcrdma_post_recv(ctxt); + ctxt->rc_temp = temp; + ctxt->rc_recv_wr.next = recv_chain; + recv_chain = &ctxt->rc_recv_wr; + rdma->sc_pending_recvs++; + } + if (!recv_chain) + return false; + + ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr); if (ret) goto err_post; - return 0; + return true; err_post: - trace_svcrdma_rq_post_err(rdma, ret); - svc_rdma_recv_ctxt_put(rdma, ctxt); - return ret; -} - -static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) -{ - struct svc_rdma_recv_ctxt *ctxt; + while (bad_wr) { + ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt, + rc_recv_wr); + bad_wr = bad_wr->next; + svc_rdma_recv_ctxt_put(rdma, ctxt); + } - if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) - return 0; - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - return -ENOMEM; - return __svc_rdma_post_recv(rdma, ctxt); + trace_svcrdma_rq_post_err(rdma, ret); + /* Since we're destroying the xprt, no need to reset + * sc_pending_recvs. */ + return false; } /** @@ -303,20 +316,7 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { - struct svc_rdma_recv_ctxt *ctxt; - unsigned int i; - int ret; - - for (i = 0; i < rdma->sc_max_requests; i++) { - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - return false; - ctxt->rc_temp = true; - ret = __svc_rdma_post_recv(rdma, ctxt); - if (ret) - return false; - } - return true; + return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); } /** @@ -324,8 +324,6 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) * @cq: Completion Queue context * @wc: Work Completion object * - * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that - * the Receive completion handler could be running. */ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { @@ -333,6 +331,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_recv_ctxt *ctxt; + rdma->sc_pending_recvs--; + /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); @@ -340,9 +340,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) goto flushed; - if (svc_rdma_post_recv(rdma)) - goto post_err; - /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdma->sc_pd->device, @@ -356,11 +353,18 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) spin_unlock(&rdma->sc_rq_dto_lock); if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) svc_xprt_enqueue(&rdma->sc_xprt); + + if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) && + rdma->sc_pending_recvs < rdma->sc_max_requests) + if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH, + false)) + goto post_err; + return; flushed: -post_err: svc_rdma_recv_ctxt_put(rdma, ctxt); +post_err: set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_xprt_enqueue(&rdma->sc_xprt); } -- cgit v1.2.3 From 3cc55f4434b421d37300aa9a167ace7d60b45ccf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 29 Jan 2021 14:26:29 -0500 Subject: nfs: use change attribute for NFS re-exports When exporting NFS, we may as well use the real change attribute returned by the original server instead of faking up a change attribute from the ctime. Note we can't do that by setting I_VERSION--that would also turn on the logic in iversion.h which treats the lower bit specially, and that doesn't make sense for NFS. So instead we define a new export operation for filesystems like NFS that want to manage the change attribute themselves. Signed-off-by: J. Bruce Fields Reviewed-by: Christoph Hellwig Signed-off-by: Chuck Lever --- fs/nfs/export.c | 18 ++++++++++++++++++ fs/nfsd/nfsfh.h | 5 ++++- include/linux/exportfs.h | 1 + 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 7412bb164fa7..f2b34cfe286c 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -167,10 +167,28 @@ out: return parent; } +static u64 nfs_fetch_iversion(struct inode *inode) +{ + struct nfs_server *server = NFS_SERVER(inode); + + /* Is this the right call?: */ + nfs_revalidate_inode(server, inode); + /* + * Also, note we're ignoring any returned error. That seems to be + * the practice for cache consistency information elsewhere in + * the server, but I'm not sure why. + */ + if (server->nfs_client->rpc_ops->version >= 4) + return inode_peek_iversion_raw(inode); + else + return time_to_chattr(&inode->i_ctime); +} + const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, + .fetch_iversion = nfs_fetch_iversion, .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| EXPORT_OP_NOATOMIC_ATTR, diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index cb20c2cd3469..f58933519f38 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -12,6 +12,7 @@ #include #include #include +#include static inline __u32 ino_t_to_u32(ino_t ino) { @@ -264,7 +265,9 @@ fh_clear_wcc(struct svc_fh *fhp) static inline u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode) { - if (IS_I_VERSION(inode)) { + if (inode->i_sb->s_export_op->fetch_iversion) + return inode->i_sb->s_export_op->fetch_iversion(inode); + else if (IS_I_VERSION(inode)) { u64 chattr; chattr = stat->ctime.tv_sec; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 9f4d4bcbf251..fe848901fcc3 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,6 +213,7 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); + u64 (*fetch_iversion)(struct inode *); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ -- cgit v1.2.3