From 77f18f5e4ebdea35ec3d92343b0ed7546dc87637 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 11 Feb 2009 17:16:58 -0800 Subject: nfs: replace uses of __constant_{endian} The base versions handle constant folding now, none of these headers are exported to userspace, so the __ prefixed versions are not necessary. Signed-off-by: Harvey Harrison Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xdr.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 49e1eb454465..d8910b68e1bd 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -69,27 +69,27 @@ struct xdr_buf { * pre-xdr'ed macros. */ -#define xdr_zero __constant_htonl(0) -#define xdr_one __constant_htonl(1) -#define xdr_two __constant_htonl(2) - -#define rpc_success __constant_htonl(RPC_SUCCESS) -#define rpc_prog_unavail __constant_htonl(RPC_PROG_UNAVAIL) -#define rpc_prog_mismatch __constant_htonl(RPC_PROG_MISMATCH) -#define rpc_proc_unavail __constant_htonl(RPC_PROC_UNAVAIL) -#define rpc_garbage_args __constant_htonl(RPC_GARBAGE_ARGS) -#define rpc_system_err __constant_htonl(RPC_SYSTEM_ERR) -#define rpc_drop_reply __constant_htonl(RPC_DROP_REPLY) - -#define rpc_auth_ok __constant_htonl(RPC_AUTH_OK) -#define rpc_autherr_badcred __constant_htonl(RPC_AUTH_BADCRED) -#define rpc_autherr_rejectedcred __constant_htonl(RPC_AUTH_REJECTEDCRED) -#define rpc_autherr_badverf __constant_htonl(RPC_AUTH_BADVERF) -#define rpc_autherr_rejectedverf __constant_htonl(RPC_AUTH_REJECTEDVERF) -#define rpc_autherr_tooweak __constant_htonl(RPC_AUTH_TOOWEAK) -#define rpcsec_gsserr_credproblem __constant_htonl(RPCSEC_GSS_CREDPROBLEM) -#define rpcsec_gsserr_ctxproblem __constant_htonl(RPCSEC_GSS_CTXPROBLEM) -#define rpc_autherr_oldseqnum __constant_htonl(101) +#define xdr_zero cpu_to_be32(0) +#define xdr_one cpu_to_be32(1) +#define xdr_two cpu_to_be32(2) + +#define rpc_success cpu_to_be32(RPC_SUCCESS) +#define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL) +#define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH) +#define rpc_proc_unavail cpu_to_be32(RPC_PROC_UNAVAIL) +#define rpc_garbage_args cpu_to_be32(RPC_GARBAGE_ARGS) +#define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR) +#define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY) + +#define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK) +#define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED) +#define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED) +#define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF) +#define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF) +#define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) +#define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) +#define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) +#define rpc_autherr_oldseqnum cpu_to_be32(101) /* * Miscellaneous XDR helper functions -- cgit v1.2.3 From 59a252ff8c0f2fa32c896f69d56ae33e641ce7ad Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:35 +1100 Subject: knfsd: avoid overloading the CPU scheduler with enormous load averages Avoid overloading the CPU scheduler with enormous load averages when handling high call-rate NFS loads. When the knfsd bottom half is made aware of an incoming call by the socket layer, it tries to choose an nfsd thread and wake it up. As long as there are idle threads, one will be woken up. If there are lot of nfsd threads (a sensible configuration when the server is disk-bound or is running an HSM), there will be many more nfsd threads than CPUs to run them. Under a high call-rate low service-time workload, the result is that almost every nfsd is runnable, but only a handful are actually able to run. This situation causes two significant problems: 1. The CPU scheduler takes over 10% of each CPU, which is robbing the nfsd threads of valuable CPU time. 2. At a high enough load, the nfsd threads starve userspace threads of CPU time, to the point where daemons like portmap and rpc.mountd do not schedule for tens of seconds at a time. Clients attempting to mount an NFS filesystem timeout at the very first step (opening a TCP connection to portmap) because portmap cannot wake up from select() and call accept() in time. Disclaimer: these effects were observed on a SLES9 kernel, modern kernels' schedulers may behave more gracefully. The solution is simple: keep in each svc_pool a counter of the number of threads which have been woken but have not yet run, and do not wake any more if that count reaches an arbitrary small threshold. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. The server was running 128 nfsds. Profiling showed schedule() taking 6.7% of every CPU, and __wake_up() taking 5.2%. This patch drops those contributions to 3.0% and 2.2%. Load average was over 120 before the patch, and 20.9 after. This patch is a forward-ported version of knfsd-avoid-nfsd-overload which has been shipping in the SGI "Enhanced NFS" product since 2006. It has been posted before: http://article.gmane.org/gmane.linux.nfs/10374 Signed-off-by: Greg Banks Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 2 ++ net/sunrpc/svc_xprt.c | 25 ++++++++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3435d24bfe55..39ec186a492d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -41,6 +41,7 @@ struct svc_pool { struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ + int sp_nwaking; /* number of threads woken but not yet active */ } ____cacheline_aligned_in_smp; /* @@ -264,6 +265,7 @@ struct svc_rqst { * cache pages */ wait_queue_head_t rq_wait; /* synchronization */ struct task_struct *rq_task; /* service thread */ + int rq_waking; /* 1 if thread is being woken */ }; /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e588df5d6b34..0551b6b6cf8c 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -14,6 +14,8 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +#define SVC_MAX_WAKING 5 + static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); @@ -298,6 +300,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; + int thread_avail; if (!(xprt->xpt_flags & ((1<sp_lock); - if (!list_empty(&pool->sp_threads) && - !list_empty(&pool->sp_sockets)) - printk(KERN_ERR - "svc_xprt_enqueue: " - "threads and transports both waiting??\n"); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { /* Don't enqueue dead transports */ dprintk("svc: transport %p is dead, not enqueued\n", xprt); @@ -353,7 +350,14 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) } process: - if (!list_empty(&pool->sp_threads)) { + /* Work out whether threads are available */ + thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */ + if (pool->sp_nwaking >= SVC_MAX_WAKING) { + /* too many threads are runnable and trying to wake up */ + thread_avail = 0; + } + + if (thread_avail) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); @@ -368,6 +372,8 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + rqstp->rq_waking = 1; + pool->sp_nwaking++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { @@ -633,6 +639,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) return -EINTR; spin_lock_bh(&pool->sp_lock); + if (rqstp->rq_waking) { + rqstp->rq_waking = 0; + pool->sp_nwaking--; + BUG_ON(pool->sp_nwaking < 0); + } xprt = svc_xprt_dequeue(pool); if (xprt) { rqstp->rq_xprt = xprt; -- cgit v1.2.3 From 03cf6c9f49a8fea953d38648d016e3f46e814991 Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:36 +1100 Subject: knfsd: add file to export stats about nfsd pools Add /proc/fs/nfsd/pool_stats to export to userspace various statistics about the operation of rpc server thread pools. This patch is based on a forward-ported version of knfsd-add-pool-thread-stats which has been shipping in the SGI "Enhanced NFS" product since 2006 and which was previously posted: http://article.gmane.org/gmane.linux.nfs/10375 It has also been updated thus: * moved EXPORT_SYMBOL() to near the function it exports * made the new struct struct seq_operations const * used SEQ_START_TOKEN instead of ((void *)1) * merged fix from SGI PV 990526 "sunrpc: use dprintk instead of printk in svc_pool_stats_*()" by Harshula Jayasuriya. * merged fix from SGI PV 964001 "Crash reading pool_stats before nfsds are started". Signed-off-by: Greg Banks Signed-off-by: Harshula Jayasuriya Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 12 ++++++ fs/nfsd/nfssvc.c | 7 ++++ include/linux/sunrpc/svc.h | 11 +++++ net/sunrpc/svc_xprt.c | 100 ++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 129 insertions(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3d93b2064ce5..4adebb6312c4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -60,6 +60,7 @@ enum { NFSD_FO_UnlockFS, NFSD_Threads, NFSD_Pool_Threads, + NFSD_Pool_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, @@ -172,6 +173,16 @@ static const struct file_operations exports_operations = { .owner = THIS_MODULE, }; +extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); + +static struct file_operations pool_stats_operations = { + .open = nfsd_pool_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + /*----------------------------------------------------------------------------*/ /* * payload - write methods @@ -1246,6 +1257,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c3eb0759fd57..ef0a3686639d 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -546,3 +546,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); return 1; } + +int nfsd_pool_stats_open(struct inode *inode, struct file *file) +{ + if (nfsd_serv == NULL) + return -ENODEV; + return svc_pool_stats_open(nfsd_serv, file); +} diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 39ec186a492d..9f9f699dd469 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -24,6 +24,15 @@ */ typedef int (*svc_thread_fn)(void *); +/* statistics for svc_pool structures */ +struct svc_pool_stats { + unsigned long packets; + unsigned long sockets_queued; + unsigned long threads_woken; + unsigned long overloads_avoided; + unsigned long threads_timedout; +}; + /* * * RPC service thread pool. @@ -42,6 +51,7 @@ struct svc_pool { unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ int sp_nwaking; /* number of threads woken but not yet active */ + struct svc_pool_stats sp_stats; /* statistics on pool operation */ } ____cacheline_aligned_in_smp; /* @@ -396,6 +406,7 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, sa_family_t, void (*shutdown)(struct svc_serv *), svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); int svc_register(const struct svc_serv *, const unsigned short, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0551b6b6cf8c..1e66f2491460 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -318,6 +318,8 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) goto out_unlock; } + pool->sp_stats.packets++; + /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY * atomically because it also guards against trying to enqueue @@ -355,6 +357,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) if (pool->sp_nwaking >= SVC_MAX_WAKING) { /* too many threads are runnable and trying to wake up */ thread_avail = 0; + pool->sp_stats.overloads_avoided++; } if (thread_avail) { @@ -374,11 +377,13 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); rqstp->rq_waking = 1; pool->sp_nwaking++; + pool->sp_stats.threads_woken++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); + pool->sp_stats.sockets_queued++; BUG_ON(xprt->xpt_pool != pool); } @@ -591,6 +596,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) int pages; struct xdr_buf *arg; DECLARE_WAITQUEUE(wait, current); + long time_left; dprintk("svc: server %p waiting for data (to = %ld)\n", rqstp, timeout); @@ -676,12 +682,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); - schedule_timeout(timeout); + time_left = schedule_timeout(timeout); try_to_freeze(); spin_lock_bh(&pool->sp_lock); remove_wait_queue(&rqstp->rq_wait, &wait); + if (!time_left) + pool->sp_stats.threads_timedout++; xprt = rqstp->rq_xprt; if (!xprt) { @@ -1114,3 +1122,93 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) return totlen; } EXPORT_SYMBOL_GPL(svc_xprt_names); + + +/*----------------------------------------------------------------------------*/ + +static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) +{ + unsigned int pidx = (unsigned int)*pos; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); + + lock_kernel(); + /* bump up the pseudo refcount while traversing */ + svc_get(serv); + unlock_kernel(); + + if (!pidx) + return SEQ_START_TOKEN; + return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); +} + +static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct svc_pool *pool = p; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_next, *pos=%llu\n", *pos); + + if (p == SEQ_START_TOKEN) { + pool = &serv->sv_pools[0]; + } else { + unsigned int pidx = (pool - &serv->sv_pools[0]); + if (pidx < serv->sv_nrpools-1) + pool = &serv->sv_pools[pidx+1]; + else + pool = NULL; + } + ++*pos; + return pool; +} + +static void svc_pool_stats_stop(struct seq_file *m, void *p) +{ + struct svc_serv *serv = m->private; + + lock_kernel(); + /* this function really, really should have been called svc_put() */ + svc_destroy(serv); + unlock_kernel(); +} + +static int svc_pool_stats_show(struct seq_file *m, void *p) +{ + struct svc_pool *pool = p; + + if (p == SEQ_START_TOKEN) { + seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n"); + return 0; + } + + seq_printf(m, "%u %lu %lu %lu %lu %lu\n", + pool->sp_id, + pool->sp_stats.packets, + pool->sp_stats.sockets_queued, + pool->sp_stats.threads_woken, + pool->sp_stats.overloads_avoided, + pool->sp_stats.threads_timedout); + + return 0; +} + +static const struct seq_operations svc_pool_stats_seq_ops = { + .start = svc_pool_stats_start, + .next = svc_pool_stats_next, + .stop = svc_pool_stats_stop, + .show = svc_pool_stats_show, +}; + +int svc_pool_stats_open(struct svc_serv *serv, struct file *file) +{ + int err; + + err = seq_open(file, &svc_pool_stats_seq_ops); + if (!err) + ((struct seq_file *) file->private_data)->private = serv; + return err; +} +EXPORT_SYMBOL(svc_pool_stats_open); + +/*----------------------------------------------------------------------------*/ -- cgit v1.2.3 From 2795e53b4ed5d1f49d2283f416c922f55ec7d461 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Mar 2009 12:07:14 -0400 Subject: SUNRPC: Clean up static inline functions in svc_xprt.h Clean up: Enable the use of const arguments in higher level svc_ APIs by adding const to the arguments of the helper functions in svc_xprt.h Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 46 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0127daca4354..959b931b6053 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -88,29 +88,32 @@ static inline void svc_xprt_get(struct svc_xprt *xprt) kref_get(&xprt->xpt_ref); } static inline void svc_xprt_set_local(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_local, sa, salen); xprt->xpt_locallen = salen; } static inline void svc_xprt_set_remote(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; } -static inline unsigned short svc_addr_port(struct sockaddr *sa) +static inline unsigned short svc_addr_port(const struct sockaddr *sa) { - unsigned short ret = 0; + const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa; + switch (sa->sa_family) { case AF_INET: - ret = ntohs(((struct sockaddr_in *)sa)->sin_port); - break; + return ntohs(sin->sin_port); case AF_INET6: - ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); - break; + return ntohs(sin6->sin6_port); } - return ret; + + return 0; } static inline size_t svc_addr_len(struct sockaddr *sa) @@ -124,36 +127,39 @@ static inline size_t svc_addr_len(struct sockaddr *sa) return -EAFNOSUPPORT; } -static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_local); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_local); } -static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_remote); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_remote); } -static inline char *__svc_print_addr(struct sockaddr *addr, - char *buf, size_t len) +static inline char *__svc_print_addr(const struct sockaddr *addr, + char *buf, const size_t len) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)addr; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr; + switch (addr->sa_family) { case AF_INET: - snprintf(buf, len, "%pI4, port=%u", - &((struct sockaddr_in *)addr)->sin_addr, - ntohs(((struct sockaddr_in *) addr)->sin_port)); + snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr, + ntohs(sin->sin_port)); break; case AF_INET6: snprintf(buf, len, "%pI6, port=%u", - &((struct sockaddr_in6 *)addr)->sin6_addr, - ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); + &sin6->sin6_addr, + ntohs(sin6->sin6_port)); break; default: snprintf(buf, len, "unknown address type: %d", addr->sa_family); break; } + return buf; } #endif /* SUNRPC_SVC_XPRT_H */ -- cgit v1.2.3 From 2f425878b6a71571341dcd3f9e9d1a6f6355da9c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:27:32 +0300 Subject: nfsd: don't use the deferral service, return NFS4ERR_DELAY On an NFSv4.1 server cache miss that causes an upcall, NFS4ERR_DELAY will be returned. It is up to the NFSv4.1 client to resend only the operations that have not been processed. Initialize rq_usedeferral to 1 in svc_process(). It sill be turned off in nfsd4_proc_compound() only when NFSv4.1 Sessions are used. Note: this isn't an adequate solution on its own. It's acceptable as a way to get some minimal 4.1 up and working, but we're going to have to find a way to avoid returning DELAY in all common cases before 4.1 can really be considered ready. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfsd41: reverse rq_nodeferral negative logic] Signed-off-by: Benny Halevy [sunrpc: initialize rq_usedeferral] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 8 ++++++++ include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 2 ++ net/sunrpc/svc_xprt.c | 2 +- 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 249dad987a16..ded469ff08b3 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -854,6 +854,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, resp->cstate.replay_owner = NULL; fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); + /* Use the deferral mechanism only for NFSv4.0 compounds */ + rqstp->rq_usedeferral = (args->minorversion == 0); /* * According to RFC3010, this takes precedence over all other errors. @@ -933,12 +935,18 @@ encode_op: nfsd4_increment_op_stats(op->opnum); } + if (!rqstp->rq_usedeferral && status == nfserr_dropit) { + dprintk("%s Dropit - send NFS4ERR_DELAY\n", __func__); + status = nfserr_jukebox; + } fh_put(&resp->cstate.current_fh); fh_put(&resp->cstate.save_fh); BUG_ON(resp->cstate.replay_owner); out: nfsd4_release_compoundargs(args); + /* Reset deferral mechanism for RPC deferrals */ + rqstp->rq_usedeferral = 1; dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 9f9f699dd469..815dd589d4db 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -230,6 +230,7 @@ struct svc_rqst { struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ + int rq_usedeferral; /* use deferral */ size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index fff09a2d8960..45984cbe1bfa 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1023,6 +1023,8 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ rqstp->rq_splice_ok = 1; + /* Will be turned off only when NFSv4 Sessions are used */ + rqstp->rq_usedeferral = 1; /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 1e66f2491460..600d0918e3ae 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -974,7 +974,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); struct svc_deferred_req *dr; - if (rqstp->rq_arg.page_len) + if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral) return NULL; /* if more than a page, give up FIXME */ if (rqstp->rq_deferred) { dr = rqstp->rq_deferred; -- cgit v1.2.3 From c3d06f9ce8544fecfe13e377d1e2c2e47fe18dbc Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:18 +0300 Subject: nfsd41: hard page limit for DRC Use no more than 1/128th of the number of free pages at nfsd startup for the v4.1 DRC. This is an arbitrary default which should probably end up under the control of an administrator. Signed-off-by: Andy Adamson [moved added fields in struct svc_serv under CONFIG_NFSD_V4_1] Signed-off-by: Benny Halevy [fix set_max_drc calculation of sv_drc_max_pages] [moved NFSD_DRC_SIZE_SHIFT's declaration up in header file] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 23 +++++++++++++++++++++++ include/linux/nfsd/nfsd.h | 3 +++ include/linux/sunrpc/svc.h | 2 ++ 3 files changed, 28 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b5168d1898ec..b53a098e97a4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -197,6 +198,26 @@ void nfsd_reset_versions(void) } } +/* + * Each session guarantees a negotiated per slot memory cache for replies + * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated + * NFSv4.1 server might want to use more memory for a DRC than a machine + * with mutiple services. + * + * Impose a hard limit on the number of pages for the DRC which varies + * according to the machines free pages. This is of course only a default. + * + * For now this is a #defined shift which could be under admin control + * in the future. + */ +static void set_max_drc(void) +{ + nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT; + nfsd_serv->sv_drc_pages_used = 0; + dprintk("%s svc_drc_max_pages %u\n", __func__, + nfsd_serv->sv_drc_max_pages); +} int nfsd_create_serv(void) { @@ -229,6 +250,8 @@ int nfsd_create_serv(void) nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; + else + set_max_drc(); do_gettimeofday(&nfssvc_boot); /* record boot time */ return err; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index ab9616d09204..1f063d495159 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -331,6 +331,9 @@ extern struct timeval nfssvc_boot; #define NFSD_LEASE_TIME (nfs4_lease_time()) #define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ +/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ +#define NFSD_DRC_SIZE_SHIFT 7 + /* * The following attributes are currently not supported by the NFSv4 server: * ARCHIVE (deprecated anyway) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 815dd589d4db..d209c630a4a1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -95,6 +95,8 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ + unsigned int sv_drc_max_pages; /* Total pages for DRC */ + unsigned int sv_drc_pages_used;/* DRC pages used */ }; /* -- cgit v1.2.3