diff options
Diffstat (limited to 'include/linux/sunrpc/svc.h')
| -rw-r--r-- | include/linux/sunrpc/svc.h | 137 |
1 files changed, 91 insertions, 46 deletions
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 74658cca0f38..4be6204f6630 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -20,7 +20,7 @@ #include <linux/lwq.h> #include <linux/wait.h> #include <linux/mm.h> -#include <linux/pagevec.h> +#include <linux/folio_batch.h> #include <linux/kthread.h> /* @@ -35,8 +35,10 @@ */ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ + unsigned int sp_nrthreads; /* # of threads currently running in pool */ + unsigned int sp_nrthrmin; /* Min number of threads to run per pool */ + unsigned int sp_nrthrmax; /* Max requested number of threads in pool */ struct lwq sp_xprts; /* pending transports */ - unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct llist_head sp_idle_threads; /* idle server threads */ @@ -53,6 +55,7 @@ enum { SP_TASK_PENDING, /* still work to do even if no xprt is queued */ SP_NEED_VICTIM, /* One thread needs to agree to exit */ SP_VICTIM_REMAINS, /* One thread needs to actually exit */ + SP_TASK_STARTING, /* Task has started but not added to idle yet */ }; @@ -71,7 +74,7 @@ struct svc_serv { struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; unsigned int sv_nprogs; /* Number of sv_programs */ - unsigned int sv_nrthreads; /* # of server threads */ + unsigned int sv_nrthreads; /* # of running server threads */ unsigned int sv_max_payload; /* datagram payload size */ unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ @@ -119,49 +122,74 @@ void svc_destroy(struct svc_serv **svcp); * Linux limit; someone who cares more about NFS/UDP performance * can test a larger number. * - * For TCP transports we have more freedom. A size of 1MB is - * chosen to match the client limit. Other OSes are known to - * have larger limits, but those numbers are probably beyond - * the point of diminishing returns. + * For non-UDP transports we have more freedom. A size of 4MB is + * chosen to accommodate clients that support larger I/O sizes. */ -#define RPCSVC_MAXPAYLOAD (1*1024*1024u) -#define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD -#define RPCSVC_MAXPAYLOAD_UDP (32*1024u) +enum { + RPCSVC_MAXPAYLOAD = 4 * 1024 * 1024, + RPCSVC_MAXPAYLOAD_TCP = RPCSVC_MAXPAYLOAD, + RPCSVC_MAXPAYLOAD_UDP = 32 * 1024, +}; extern u32 svc_max_payload(const struct svc_rqst *rqstp); /* - * RPC Requests and replies are stored in one or more pages. - * We maintain an array of pages for each server thread. - * Requests are copied into these pages as they arrive. Remaining - * pages are available to write the reply into. + * RPC Call and Reply messages each have their own page array. + * rq_pages holds the incoming Call message; rq_respages holds + * the outgoing Reply message. Both arrays are sized to + * svc_serv_maxpages() entries and are allocated dynamically. + * + * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each + * server thread needs to allocate more to replace those used in + * sending. * - * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread - * needs to allocate more to replace those used in sending. To help keep track - * of these pages we have a receive list where all pages initialy live, and a - * send list where pages are moved to when there are to be part of a reply. + * rq_pages request page contract: * - * We use xdr_buf for holding responses as it fits well with NFS - * read responses (that have a header, and some data pages, and possibly - * a tail) and means we can share some client side routines. + * Transport receive paths that move request data pages out of + * rq_pages -- TCP multi-fragment reassembly (svc_tcp_save_pages) + * and RDMA Read I/O (svc_rdma_clear_rqst_pages) -- NULL those + * entries to prevent svc_rqst_release_pages() from freeing pages + * still in transport use, and set rq_pages_nfree to the count. + * svc_alloc_arg() refills only that many rq_pages entries. * - * The xdr_buf.head kvec always points to the first page in the rq_*pages - * list. The xdr_buf.pages pointer points to the second page on that - * list. xdr_buf.tail points to the end of the first page. - * This assumes that the non-page part of an rpc reply will fit - * in a page - NFSd ensures this. lockd also has no trouble. + * For rq_respages, svc_rqst_release_pages() NULLs entries in + * [rq_respages, rq_next_page) after each RPC. svc_alloc_arg() + * refills only that range. * - * Each request/reply pair can have at most one "payload", plus two pages, - * one for the request, and one for the reply. - * We using ->sendfile to return read data, we might need one extra page - * if the request is not page-aligned. So add another '1'. + * xdr_buf holds responses; the structure fits NFS read responses + * (header, data pages, optional tail) and enables sharing of + * client-side routines. + * + * The xdr_buf.head kvec always points to the first page in the + * rq_*pages list. The xdr_buf.pages pointer points to the second + * page on that list. xdr_buf.tail points to the end of the first + * page. This assumes that the non-page part of an rpc reply will + * fit in a page - NFSd ensures this. lockd also has no trouble. */ -#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ - + 2 + 1) + +/** + * svc_serv_maxpages - maximum count of pages needed for one RPC message + * @serv: RPC service context + * + * Returns a count of pages or vectors that can hold the maximum + * size RPC message for @serv. + * + * Each page array can hold at most one payload plus two + * overhead pages (one for the RPC header, one for tail data). + * nfsd_splice_actor() might need an extra page when a READ + * payload is not page-aligned. + */ +static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv) +{ + return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1; +} /* * The context of a single thread, including the request currently being * processed. + * + * RPC programs are free to use rq_private to stash thread-local information. + * The sunrpc layer will not access it. */ struct svc_rqst { struct list_head rq_all; /* all threads list */ @@ -186,23 +214,23 @@ struct svc_rqst { struct xdr_buf rq_arg; struct xdr_stream rq_arg_stream; struct xdr_stream rq_res_stream; - struct page *rq_scratch_page; + struct folio *rq_scratch_folio; struct xdr_buf rq_res; - struct page *rq_pages[RPCSVC_MAXPAGES + 1]; - struct page * *rq_respages; /* points into rq_pages */ + unsigned long rq_maxpages; /* entries per page array */ + unsigned long rq_pages_nfree; /* rq_pages entries NULLed by transport */ + struct page * *rq_pages; /* Call buffer pages */ + struct page * *rq_respages; /* Reply buffer pages */ struct page * *rq_next_page; /* next reply page to use */ - struct page * *rq_page_end; /* one past the last page */ + struct page * *rq_page_end; /* one past the last reply page */ struct folio_batch rq_fbatch; - struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ - struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; + struct bio_vec *rq_bvec; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ u32 rq_vers; /* program version */ u32 rq_proc; /* procedure number */ u32 rq_prot; /* IP protocol */ - int rq_cachetype; /* catering to nfsd */ unsigned long rq_flags; /* flags field */ ktime_t rq_qtime; /* enqueue time */ @@ -235,10 +263,10 @@ struct svc_rqst { * initialisation success. */ - unsigned long bc_to_initval; - unsigned int bc_to_retries; - void ** rq_lease_breaker; /* The v4 client breaking a lease */ + unsigned long bc_to_initval; + unsigned int bc_to_retries; unsigned int rq_status_counter; /* RPC processing counter */ + void *rq_private; /* For use by the service thread */ }; /* bits for rq_flags */ @@ -430,13 +458,17 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int, bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_release_pages(struct svc_rqst *rqstp); +int svc_new_thread(struct svc_serv *serv, struct svc_pool *pool); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int nprog, struct svc_stat *stats, unsigned int bufsize, int (*threadfn)(void *data)); -int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_set_pool_threads(struct svc_serv *serv, struct svc_pool *pool, + unsigned int min_threads, unsigned int max_threads); +int svc_set_num_threads(struct svc_serv *serv, unsigned int min_threads, + unsigned int nrservs); int svc_pool_stats_open(struct svc_info *si, struct file *file); void svc_process(struct svc_rqst *rqstp); void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp); @@ -452,8 +484,6 @@ const char * svc_proc_name(const struct svc_rqst *rqstp); int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct xdr_buf *payload); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); @@ -468,6 +498,21 @@ int svc_generic_rpcbind_set(struct net *net, #define RPC_MAX_ADDRBUFLEN (63U) +/** + * svc_rqst_page_release - release a page associated with an RPC transaction + * @rqstp: RPC transaction context + * @page: page to release + * + * Released pages are batched and freed together, reducing + * allocator pressure under heavy RPC workloads. + */ +static inline void svc_rqst_page_release(struct svc_rqst *rqstp, + struct page *page) +{ + if (!folio_batch_add(&rqstp->rq_fbatch, page_folio(page))) + __folio_batch_release(&rqstp->rq_fbatch); +} + /* * When we want to reduce the size of the reserved space in the response * buffer, we need to take into account the size of any checksum data that @@ -495,7 +540,7 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; xdr_init_decode(xdr, buf, argv->iov_base, NULL); - xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); + xdr_set_scratch_folio(xdr, rqstp->rq_scratch_folio); } /** |
