From d585158b608248a6ba8ae75e234672e048d3fde9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2007 22:17:02 -0700 Subject: NFS: Fix nfs_set_page_dirty() Be more careful about testing page->mapping. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 797558941745..8593965a35ef 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1531,10 +1531,18 @@ int nfs_wb_page(struct inode *inode, struct page* page) int nfs_set_page_dirty(struct page *page) { - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + struct address_space *mapping = page->mapping; + struct inode *inode; + spinlock_t *req_lock; struct nfs_page *req; int ret; + if (!mapping) + goto out_raced; + inode = mapping->host; + if (!inode) + goto out_raced; + req_lock = &NFS_I(inode)->req_lock; spin_lock(req_lock); req = nfs_page_find_request_locked(page); if (req != NULL) { @@ -1547,6 +1555,8 @@ int nfs_set_page_dirty(struct page *page) ret = __set_page_dirty_nobuffers(page); spin_unlock(req_lock); return ret; +out_raced: + return !TestSetPageDirty(page); } -- cgit v1.2.3 From 1a0ba9ae485c5fd17d0bff2f14d9dd75b8985593 Mon Sep 17 00:00:00 2001 From: Amnon Aaronsohn Date: Mon, 9 Apr 2007 22:05:26 -0700 Subject: NFS: statfs error-handling fix The nfs statfs function returns a success code on error, and fills the output buffer with invalid values. The attached patch makes it return a correct error code instead. Signed-off-by: Amnon Aaronsohn Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust (Modified patch to reinstate the dprintk()) --- fs/nfs/super.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f1eae44b9a1a..719464a04dda 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -204,9 +204,9 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) lock_kernel(); error = server->nfs_client->rpc_ops->statfs(server, fh, &res); - buf->f_type = NFS_SUPER_MAGIC; if (error < 0) goto out_err; + buf->f_type = NFS_SUPER_MAGIC; /* * Current versions of glibc do not correctly handle the @@ -233,15 +233,14 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = res.afiles; buf->f_namelen = server->namelen; - out: + unlock_kernel(); return 0; out_err: dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); - buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; - goto out; - + unlock_kernel(); + return error; } /* -- cgit v1.2.3 From 91e59c368c6ba5eed0369a085c42c9f270b97aa8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Apr 2007 13:12:46 -0400 Subject: NFS: Don't wait for congestion in nfs_update_request() It is redundant, and will interfere with the call to balance_dirty_pages_ratelimited_nr in generic_file_write(). Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8593965a35ef..dbad89c8e427 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -594,34 +594,6 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, un } #endif -static int nfs_wait_on_write_congestion(struct address_space *mapping) -{ - struct inode *inode = mapping->host; - struct backing_dev_info *bdi = mapping->backing_dev_info; - int ret = 0; - - might_sleep(); - - if (!bdi_write_congested(bdi)) - return 0; - - nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT); - - do { - struct rpc_clnt *clnt = NFS_CLIENT(inode); - sigset_t oldset; - - rpc_clnt_sigmask(clnt, &oldset); - ret = congestion_wait_interruptible(WRITE, HZ/10); - rpc_clnt_sigunmask(clnt, &oldset); - if (ret == -ERESTARTSYS) - break; - ret = 0; - } while (bdi_write_congested(bdi)); - - return ret; -} - /* * Try to update any existing write request, or create one if there is none. * In order to match, the request's credentials must match those of @@ -640,8 +612,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, end = offset + bytes; - if (nfs_wait_on_write_congestion(mapping)) - return ERR_PTR(-ERESTARTSYS); for (;;) { /* Loop over all inode entries and see if we find * A request for the page we wish to update -- cgit v1.2.3 From d8a5ad75cc4d577987964e37a4c43b1c648c201e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Apr 2007 18:48:28 -0400 Subject: NFS: Cleanup the coalescing code Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 117 ++++++++++++++++++++++++++++++++++------------- fs/nfs/read.c | 24 +++++----- fs/nfs/write.c | 11 ++--- include/linux/nfs_page.h | 13 +++++- 4 files changed, 114 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ca4b1d4ff42b..7017eb0e0bc8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -223,48 +223,101 @@ out: } /** - * nfs_coalesce_requests - Split coalesced requests out from a list. + * nfs_pageio_init - initialise a page io descriptor + * @desc: pointer to descriptor + * @iosize: io block size + */ +void nfs_pageio_init(struct nfs_pageio_descriptor *desc, unsigned int bsize) +{ + INIT_LIST_HEAD(&desc->pg_list); + desc->pg_count = 0; + desc->pg_bsize = bsize; + desc->pg_base = 0; +} + +/** + * nfs_can_coalesce_requests - test two requests for compatibility + * @prev: pointer to nfs_page + * @req: pointer to nfs_page + * + * The nfs_page structures 'prev' and 'req' are compared to ensure that the + * page data area they describe is contiguous, and that their RPC + * credentials, NFSv4 open state, and lockowners are the same. + * + * Return 'true' if this is the case, else return 'false'. + */ +static int nfs_can_coalesce_requests(struct nfs_page *prev, + struct nfs_page *req) +{ + if (req->wb_context->cred != prev->wb_context->cred) + return 0; + if (req->wb_context->lockowner != prev->wb_context->lockowner) + return 0; + if (req->wb_context->state != prev->wb_context->state) + return 0; + if (req->wb_index != (prev->wb_index + 1)) + return 0; + if (req->wb_pgbase != 0) + return 0; + if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) + return 0; + return 1; +} + +/** + * nfs_pageio_add_request - Attempt to coalesce a request into a page list. + * @desc: destination io descriptor + * @req: request + * + * Returns true if the request 'req' was successfully coalesced into the + * existing list of pages 'desc'. + */ +static int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + size_t newlen = req->wb_bytes; + + if (desc->pg_count != 0) { + struct nfs_page *prev; + + /* + * FIXME: ideally we should be able to coalesce all requests + * that are not block boundary aligned, but currently this + * is problematic for the case of bsize < PAGE_CACHE_SIZE, + * since nfs_flush_multi and nfs_pagein_multi assume you + * can have only one struct nfs_page. + */ + newlen += desc->pg_count; + if (desc->pg_base + newlen > desc->pg_bsize) + return 0; + prev = nfs_list_entry(desc->pg_list.prev); + if (!nfs_can_coalesce_requests(prev, req)) + return 0; + } else + desc->pg_base = req->wb_pgbase; + nfs_list_remove_request(req); + nfs_list_add_request(req, &desc->pg_list); + desc->pg_count = newlen; + return 1; +} + +/** + * nfs_pageio_add_list - Split coalesced requests out from a list. + * @desc: destination io descriptor * @head: source list - * @dst: destination list - * @nmax: maximum number of requests to coalesce * * Moves a maximum of 'nmax' elements from one list to another. * The elements are checked to ensure that they form a contiguous set * of pages, and that the RPC credentials are the same. */ -int -nfs_coalesce_requests(struct list_head *head, struct list_head *dst, - unsigned int nmax) +void nfs_pageio_add_list(struct nfs_pageio_descriptor *desc, + struct list_head *head) { - struct nfs_page *req = NULL; - unsigned int npages = 0; - while (!list_empty(head)) { - struct nfs_page *prev = req; - - req = nfs_list_entry(head->next); - if (prev) { - if (req->wb_context->cred != prev->wb_context->cred) - break; - if (req->wb_context->lockowner != prev->wb_context->lockowner) - break; - if (req->wb_context->state != prev->wb_context->state) - break; - if (req->wb_index != (prev->wb_index + 1)) - break; - - if (req->wb_pgbase != 0) - break; - } - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - npages++; - if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE) - break; - if (npages >= nmax) + struct nfs_page *req = nfs_list_entry(head->next); + if (!nfs_pageio_add_request(desc, req)) break; } - return npages; } #define NFS_SCAN_MAXENTRIES 16 diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6ab4d5a9edf2..97f0f42e136d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -328,24 +328,26 @@ out_bad: } static int -nfs_pagein_list(struct list_head *head, int rpages) +nfs_pagein_list(struct list_head *head, unsigned int rsize) { - LIST_HEAD(one_request); - struct nfs_page *req; - int error = 0; - unsigned int pages = 0; + struct nfs_pageio_descriptor desc; + struct nfs_page *req; + unsigned int pages = 0; + int error = 0; while (!list_empty(head)) { - pages += nfs_coalesce_requests(head, &one_request, rpages); - req = nfs_list_entry(one_request.next); - error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); + nfs_pageio_init(&desc, rsize); + nfs_pageio_add_list(&desc, head); + req = nfs_list_entry(desc.pg_list.next); + error = nfs_pagein_one(&desc.pg_list, req->wb_context->dentry->d_inode); if (error < 0) break; + pages += (desc.pg_count + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; } - if (error >= 0) - return pages; nfs_async_read_error(head); + if (error >= 0) + return pages; return error; } @@ -595,7 +597,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, filp->private_data); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); if (!list_empty(&head)) { - int err = nfs_pagein_list(&head, server->rpages); + int err = nfs_pagein_list(&head, server->rsize); if (!ret) nfs_add_stats(inode, NFSIOS_READPAGES, err); ret = err; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dbad89c8e427..b03ec1ba4d75 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -945,9 +945,8 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how) { - LIST_HEAD(one_request); + struct nfs_pageio_descriptor desc; int (*flush_one)(struct inode *, struct list_head *, int); - struct nfs_page *req; int wpages = NFS_SERVER(inode)->wpages; int wsize = NFS_SERVER(inode)->wsize; int error; @@ -961,16 +960,16 @@ static int nfs_flush_list(struct inode *inode, struct list_head *head, int npage how |= FLUSH_STABLE; do { - nfs_coalesce_requests(head, &one_request, wpages); - req = nfs_list_entry(one_request.next); - error = flush_one(inode, &one_request, how); + nfs_pageio_init(&desc, wsize); + nfs_pageio_add_list(&desc, head); + error = flush_one(inode, &desc.pg_list, how); if (error < 0) goto out_err; } while (!list_empty(head)); return 0; out_err: while (!list_empty(head)) { - req = nfs_list_entry(head->next); + struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 16b0266b14fd..3ef8e0441473 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -48,6 +48,13 @@ struct nfs_page { struct nfs_writeverf wb_verf; /* Commit cookie */ }; +struct nfs_pageio_descriptor { + struct list_head pg_list; + size_t pg_count; + size_t pg_bsize; + unsigned int pg_base; +}; + #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, @@ -64,8 +71,10 @@ extern long nfs_scan_dirty(struct address_space *mapping, struct list_head *dst); extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, unsigned long idx_start, unsigned int npages); -extern int nfs_coalesce_requests(struct list_head *, struct list_head *, - unsigned int); +extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, + size_t iosize); +extern void nfs_pageio_add_list(struct nfs_pageio_descriptor *, + struct list_head *); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_writeback_locked(struct nfs_page *req); -- cgit v1.2.3 From bcb71bba7e64f0442d0ca339d7d3117a7060589f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Apr 2007 18:48:28 -0400 Subject: NFS: Another cleanup of the read/write request coalescing code Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 71 ++++++++++++++++++++++++++++++++++++++++++++---- fs/nfs/read.c | 62 ++++++++++++++++++------------------------ fs/nfs/write.c | 53 ++++++++++++++---------------------- include/linux/nfs_page.h | 14 ++++++++-- 4 files changed, 125 insertions(+), 75 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7017eb0e0bc8..528128545d66 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -225,14 +225,26 @@ out: /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor - * @iosize: io block size + * @inode: pointer to inode + * @doio: pointer to io function + * @bsize: io block size + * @io_flags: extra parameters for the io function */ -void nfs_pageio_init(struct nfs_pageio_descriptor *desc, unsigned int bsize) +void nfs_pageio_init(struct nfs_pageio_descriptor *desc, + struct inode *inode, + int (*doio)(struct inode *, struct list_head *, size_t, int), + unsigned int bsize, + int io_flags) { INIT_LIST_HEAD(&desc->pg_list); + desc->pg_bytes_written = 0; desc->pg_count = 0; desc->pg_bsize = bsize; desc->pg_base = 0; + desc->pg_inode = inode; + desc->pg_doio = doio; + desc->pg_ioflags = io_flags; + desc->pg_error = 0; } /** @@ -265,15 +277,15 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev, } /** - * nfs_pageio_add_request - Attempt to coalesce a request into a page list. + * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list. * @desc: destination io descriptor * @req: request * * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ -static int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, - struct nfs_page *req) +static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) { size_t newlen = req->wb_bytes; @@ -301,6 +313,46 @@ static int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, return 1; } +/* + * Helper for nfs_pageio_add_request and nfs_pageio_complete + */ +static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) +{ + if (!list_empty(&desc->pg_list)) { + int error = desc->pg_doio(desc->pg_inode, + &desc->pg_list, + desc->pg_count, + desc->pg_ioflags); + if (error < 0) + desc->pg_error = error; + else + desc->pg_bytes_written += desc->pg_count; + } + if (list_empty(&desc->pg_list)) { + desc->pg_count = 0; + desc->pg_base = 0; + } +} + +/** + * nfs_pageio_add_request - Attempt to coalesce a request into a page list. + * @desc: destination io descriptor + * @req: request + * + * Returns true if the request 'req' was successfully coalesced into the + * existing list of pages 'desc'. + */ +static int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + while (!nfs_pageio_do_add_request(desc, req)) { + nfs_pageio_doio(desc); + if (desc->pg_error < 0) + return 0; + } + return 1; +} + /** * nfs_pageio_add_list - Split coalesced requests out from a list. * @desc: destination io descriptor @@ -320,6 +372,15 @@ void nfs_pageio_add_list(struct nfs_pageio_descriptor *desc, } } +/** + * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor + * @desc: pointer to io descriptor + */ +void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) +{ + nfs_pageio_doio(desc); +} + #define NFS_SCAN_MAXENTRIES 16 /** * nfs_scan_dirty - Scan the radix tree for dirty requests diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 97f0f42e136d..0effa74992df 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -27,7 +27,8 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_one(struct list_head *, struct inode *); +static int nfs_pagein_multi(struct inode *, struct list_head *, size_t, int); +static int nfs_pagein_one(struct inode *, struct list_head *, size_t, int); static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -133,7 +134,10 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); nfs_list_add_request(new, &one_request); - nfs_pagein_one(&one_request, inode); + if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) + nfs_pagein_multi(inode, &one_request, len, 0); + else + nfs_pagein_one(inode, &one_request, len, 0); return 0; } @@ -230,7 +234,7 @@ static void nfs_execute_read(struct nfs_read_data *data) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct list_head *head, struct inode *inode) +static int nfs_pagein_multi(struct inode *inode, struct list_head *head, size_t count, int flags) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -242,7 +246,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) nfs_list_remove_request(req); - nbytes = req->wb_bytes; + nbytes = count; do { size_t len = min(nbytes,rsize); @@ -258,23 +262,19 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) ClearPageError(page); offset = 0; - nbytes = req->wb_bytes; + nbytes = count; do { data = list_entry(list.next, struct nfs_read_data, pages); list_del_init(&data->pages); data->pagevec[0] = page; - if (nbytes > rsize) { - nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, - rsize, offset); - offset += rsize; - nbytes -= rsize; - } else { - nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, - nbytes, offset); - nbytes = 0; - } + if (nbytes < rsize) + rsize = nbytes; + nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + rsize, offset); + offset += rsize; + nbytes -= rsize; nfs_execute_read(data); } while (nbytes != 0); @@ -291,30 +291,24 @@ out_bad: return -ENOMEM; } -static int nfs_pagein_one(struct list_head *head, struct inode *inode) +static int nfs_pagein_one(struct inode *inode, struct list_head *head, size_t count, int flags) { struct nfs_page *req; struct page **pages; struct nfs_read_data *data; - unsigned int count; - - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - return nfs_pagein_multi(head, inode); - data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize); + data = nfs_readdata_alloc(count); if (!data) goto out_bad; INIT_LIST_HEAD(&data->pages); pages = data->pagevec; - count = 0; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); ClearPageError(req->wb_page); *pages++ = req->wb_page; - count += req->wb_bytes; } req = nfs_list_entry(data->pages.next); @@ -328,22 +322,20 @@ out_bad: } static int -nfs_pagein_list(struct list_head *head, unsigned int rsize) +nfs_pagein_list(struct inode *inode, struct list_head *head, unsigned int rsize) { struct nfs_pageio_descriptor desc; - struct nfs_page *req; unsigned int pages = 0; int error = 0; - while (!list_empty(head)) { - nfs_pageio_init(&desc, rsize); - nfs_pageio_add_list(&desc, head); - req = nfs_list_entry(desc.pg_list.next); - error = nfs_pagein_one(&desc.pg_list, req->wb_context->dentry->d_inode); - if (error < 0) - break; - pages += (desc.pg_count + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - } + if (rsize < PAGE_CACHE_SIZE) + nfs_pageio_init(&desc, inode, nfs_pagein_multi, rsize, 0); + else + nfs_pageio_init(&desc, inode, nfs_pagein_one, rsize, 0); + + nfs_pageio_add_list(&desc, head); + nfs_pageio_complete(&desc); + pages += (desc.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; nfs_async_read_error(head); if (error >= 0) @@ -597,7 +589,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, filp->private_data); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); if (!list_empty(&head)) { - int err = nfs_pagein_list(&head, server->rsize); + int err = nfs_pagein_list(inode, &head, server->rsize); if (!ret) nfs_add_stats(inode, NFSIOS_READPAGES, err); ret = err; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b03ec1ba4d75..b6749967eead 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -835,7 +835,7 @@ static void nfs_execute_write(struct nfs_write_data *data) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) +static int nfs_flush_multi(struct inode *inode, struct list_head *head, size_t count, int how) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -847,7 +847,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) nfs_list_remove_request(req); - nbytes = req->wb_bytes; + nbytes = count; do { size_t len = min(nbytes, wsize); @@ -862,23 +862,19 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) ClearPageError(page); offset = 0; - nbytes = req->wb_bytes; + nbytes = count; do { data = list_entry(list.next, struct nfs_write_data, pages); list_del_init(&data->pages); data->pagevec[0] = page; - if (nbytes > wsize) { - nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, - wsize, offset, how); - offset += wsize; - nbytes -= wsize; - } else { - nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, - nbytes, offset, how); - nbytes = 0; - } + if (nbytes < wsize) + wsize = nbytes; + nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + wsize, offset, how); + offset += wsize; + nbytes -= wsize; nfs_execute_write(data); } while (nbytes != 0); @@ -904,26 +900,23 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) +static int nfs_flush_one(struct inode *inode, struct list_head *head, size_t count, int how) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; - unsigned int count; - data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize); + data = nfs_writedata_alloc(count); if (!data) goto out_bad; pages = data->pagevec; - count = 0; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); ClearPageError(req->wb_page); *pages++ = req->wb_page; - count += req->wb_bytes; } req = nfs_list_entry(data->pages.next); @@ -946,28 +939,22 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how) { struct nfs_pageio_descriptor desc; - int (*flush_one)(struct inode *, struct list_head *, int); int wpages = NFS_SERVER(inode)->wpages; int wsize = NFS_SERVER(inode)->wsize; - int error; - flush_one = nfs_flush_one; - if (wsize < PAGE_CACHE_SIZE) - flush_one = nfs_flush_multi; /* For single writes, FLUSH_STABLE is more efficient */ if (npages <= wpages && npages == NFS_I(inode)->npages && nfs_list_entry(head->next)->wb_bytes <= wsize) how |= FLUSH_STABLE; - do { - nfs_pageio_init(&desc, wsize); - nfs_pageio_add_list(&desc, head); - error = flush_one(inode, &desc.pg_list, how); - if (error < 0) - goto out_err; - } while (!list_empty(head)); - return 0; -out_err: + if (wsize < PAGE_CACHE_SIZE) + nfs_pageio_init(&desc, inode, nfs_flush_multi, wsize, how); + else + nfs_pageio_init(&desc, inode, nfs_flush_one, wsize, how); + nfs_pageio_add_list(&desc, head); + nfs_pageio_complete(&desc); + if (desc.pg_error == 0) + return 0; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); @@ -975,7 +962,7 @@ out_err: nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } - return error; + return desc.pg_error; } /* diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3ef8e0441473..91c7b18c47d8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -50,9 +50,15 @@ struct nfs_page { struct nfs_pageio_descriptor { struct list_head pg_list; + unsigned long pg_bytes_written; size_t pg_count; size_t pg_bsize; unsigned int pg_base; + + struct inode *pg_inode; + int (*pg_doio)(struct inode *, struct list_head *, size_t, int); + int pg_ioflags; + int pg_error; }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) @@ -71,10 +77,14 @@ extern long nfs_scan_dirty(struct address_space *mapping, struct list_head *dst); extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, unsigned long idx_start, unsigned int npages); -extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, - size_t iosize); +extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, + struct inode *inode, + int (*doio)(struct inode *, struct list_head *, size_t, int), + size_t bsize, + int how); extern void nfs_pageio_add_list(struct nfs_pageio_descriptor *, struct list_head *); +extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_writeback_locked(struct nfs_page *req); -- cgit v1.2.3 From 8b09bee3083897e375bd0bf9d60f48daedfab3e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Apr 2007 18:48:28 -0400 Subject: NFS: Cleanup for nfs_readpages() Do the coalescing of read requests into block sized requests at start of I/O as we scan through the pages instead of going through a second pass. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 ++-- fs/nfs/read.c | 47 +++++++++++++++-------------------------------- include/linux/nfs_page.h | 2 ++ 3 files changed, 19 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 528128545d66..094537ddd344 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -342,8 +342,8 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ -static int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, - struct nfs_page *req) +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) { while (!nfs_pageio_do_add_request(desc, req)) { nfs_pageio_doio(desc); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0effa74992df..f0016062340d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -321,28 +321,6 @@ out_bad: return -ENOMEM; } -static int -nfs_pagein_list(struct inode *inode, struct list_head *head, unsigned int rsize) -{ - struct nfs_pageio_descriptor desc; - unsigned int pages = 0; - int error = 0; - - if (rsize < PAGE_CACHE_SIZE) - nfs_pageio_init(&desc, inode, nfs_pagein_multi, rsize, 0); - else - nfs_pageio_init(&desc, inode, nfs_pagein_one, rsize, 0); - - nfs_pageio_add_list(&desc, head); - nfs_pageio_complete(&desc); - pages += (desc.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - nfs_async_read_error(head); - if (error >= 0) - return pages; - return error; -} - /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -532,7 +510,7 @@ out_error: } struct nfs_readdesc { - struct list_head *head; + struct nfs_pageio_descriptor *pgio; struct nfs_open_context *ctx; }; @@ -556,19 +534,21 @@ readpage_async_filler(void *data, struct page *page) } if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_list_add_request(new, desc->head); + nfs_pageio_add_request(desc->pgio, new); return 0; } int nfs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - LIST_HEAD(head); + struct nfs_pageio_descriptor pgio; struct nfs_readdesc desc = { - .head = &head, + .pgio = &pgio, }; struct inode *inode = mapping->host; struct nfs_server *server = NFS_SERVER(inode); + size_t rsize = server->rsize; + unsigned long npages; int ret = -ESTALE; dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", @@ -587,13 +567,16 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, } else desc.ctx = get_nfs_open_context((struct nfs_open_context *) filp->private_data); + if (rsize < PAGE_CACHE_SIZE) + nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); + else + nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); + ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); - if (!list_empty(&head)) { - int err = nfs_pagein_list(inode, &head, server->rsize); - if (!ret) - nfs_add_stats(inode, NFSIOS_READPAGES, err); - ret = err; - } + + nfs_pageio_complete(&pgio); + npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + nfs_add_stats(inode, NFSIOS_READPAGES, npages); put_nfs_open_context(desc.ctx); out: return ret; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 91c7b18c47d8..b8b7bca3bac8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -82,6 +82,8 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, int (*doio)(struct inode *, struct list_head *, size_t, int), size_t bsize, int how); +extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, + struct nfs_page *); extern void nfs_pageio_add_list(struct nfs_pageio_descriptor *, struct list_head *); extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); -- cgit v1.2.3 From c63c7b051395368573779c8309aa5c990dcf2f96 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Apr 2007 19:29:52 -0400 Subject: NFS: Fix a race when doing NFS write coalescing Currently we do write coalescing in a very inefficient manner: one pass in generic_writepages() in order to lock the pages for writing, then one pass in nfs_flush_mapping() and/or nfs_sync_mapping_wait() in order to gather the locked pages for coalescing into RPC requests of size "wsize". In fact, it turns out there is actually a deadlock possible here since we only start I/O on the second pass. If the user signals the process while we're in nfs_sync_mapping_wait(), for instance, then we may exit before starting I/O on all the requests that have been queued up. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 92 ---------------------------- fs/nfs/write.c | 149 +++++++++++++++------------------------------- include/linux/nfs_page.h | 8 +-- include/linux/writeback.h | 2 + 4 files changed, 50 insertions(+), 201 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 094537ddd344..ea1a85df9ab1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -17,7 +17,6 @@ #include #include #include -#include #define NFS_PARANOIA 1 @@ -353,25 +352,6 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, return 1; } -/** - * nfs_pageio_add_list - Split coalesced requests out from a list. - * @desc: destination io descriptor - * @head: source list - * - * Moves a maximum of 'nmax' elements from one list to another. - * The elements are checked to ensure that they form a contiguous set - * of pages, and that the RPC credentials are the same. - */ -void nfs_pageio_add_list(struct nfs_pageio_descriptor *desc, - struct list_head *head) -{ - while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); - if (!nfs_pageio_add_request(desc, req)) - break; - } -} - /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor * @desc: pointer to io descriptor @@ -382,78 +362,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) } #define NFS_SCAN_MAXENTRIES 16 -/** - * nfs_scan_dirty - Scan the radix tree for dirty requests - * @mapping: pointer to address space - * @wbc: writeback_control structure - * @dst: Destination list - * - * Moves elements from one of the inode request lists. - * If the number of requests is set to 0, the entire address_space - * starting at index idx_start, is scanned. - * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the inode's req_lock when calling this function - */ -long nfs_scan_dirty(struct address_space *mapping, - struct writeback_control *wbc, - struct list_head *dst) -{ - struct nfs_inode *nfsi = NFS_I(mapping->host); - struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; - struct nfs_page *req; - pgoff_t idx_start, idx_end; - long res = 0; - int found, i; - - if (nfsi->ndirty == 0) - return 0; - if (wbc->range_cyclic) { - idx_start = 0; - idx_end = ULONG_MAX; - } else if (wbc->range_end == 0) { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = ULONG_MAX; - } else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - } - - for (;;) { - unsigned int toscan = NFS_SCAN_MAXENTRIES; - - found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, - (void **)&pgvec[0], idx_start, toscan, - NFS_PAGE_TAG_DIRTY); - - /* Did we make progress? */ - if (found <= 0) - break; - - for (i = 0; i < found; i++) { - req = pgvec[i]; - if (!wbc->range_cyclic && req->wb_index > idx_end) - goto out; - - /* Try to lock request and mark it for writeback */ - if (!nfs_set_page_writeback_locked(req)) - goto next; - radix_tree_tag_clear(&nfsi->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_DIRTY); - nfsi->ndirty--; - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - res++; - if (res == LONG_MAX) - goto out; -next: - idx_start = req->wb_index + 1; - } - } -out: - WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)); - return res; -} - /** * nfs_scan_list - Scan a list for matching requests * @nfsi: NFS inode diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b6749967eead..6ce2d94e7b3f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -38,7 +38,8 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct page *, unsigned int, unsigned int); -static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); +static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, + struct inode *inode, int ioflags); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; static const struct rpc_call_ops nfs_commit_ops; @@ -201,7 +202,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, static int wb_priority(struct writeback_control *wbc) { if (wbc->for_reclaim) - return FLUSH_HIGHPRI; + return FLUSH_HIGHPRI | FLUSH_STABLE; if (wbc->for_kupdate) return FLUSH_LOWPRI; return 0; @@ -251,7 +252,8 @@ static void nfs_end_page_writeback(struct page *page) * was not tagged. * May also return an error if the user signalled nfs_wait_on_request(). */ -static int nfs_page_mark_flush(struct page *page) +static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, + struct page *page) { struct nfs_page *req; struct nfs_inode *nfsi = NFS_I(page->mapping->host); @@ -273,6 +275,8 @@ static int nfs_page_mark_flush(struct page *page) * request as dirty (in which case we don't care). */ spin_unlock(req_lock); + /* Prevent deadlock! */ + nfs_pageio_complete(pgio); ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) @@ -283,21 +287,18 @@ static int nfs_page_mark_flush(struct page *page) /* This request is marked for commit */ spin_unlock(req_lock); nfs_unlock_request(req); + nfs_pageio_complete(pgio); return 1; } - if (nfs_set_page_writeback(page) == 0) { - nfs_list_remove_request(req); - /* add the request to the inode's dirty list. */ - radix_tree_tag_set(&nfsi->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_DIRTY); - nfs_list_add_request(req, &nfsi->dirty); - nfsi->ndirty++; - spin_unlock(req_lock); - __mark_inode_dirty(page->mapping->host, I_DIRTY_PAGES); - } else + if (nfs_set_page_writeback(page) != 0) { spin_unlock(req_lock); + BUG(); + } + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, + NFS_PAGE_TAG_WRITEBACK); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); - nfs_unlock_request(req); + spin_unlock(req_lock); + nfs_pageio_add_request(pgio, req); return ret; } @@ -306,6 +307,7 @@ static int nfs_page_mark_flush(struct page *page) */ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) { + struct nfs_pageio_descriptor mypgio, *pgio; struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; unsigned offset; @@ -314,7 +316,14 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - err = nfs_page_mark_flush(page); + if (wbc->for_writepages) + pgio = wbc->fs_private; + else { + nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc)); + pgio = &mypgio; + } + + err = nfs_page_async_flush(pgio, page); if (err <= 0) goto out; err = 0; @@ -331,12 +340,12 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc put_nfs_open_context(ctx); if (err != 0) goto out; - err = nfs_page_mark_flush(page); + err = nfs_page_async_flush(pgio, page); if (err > 0) err = 0; out: if (!wbc->for_writepages) - nfs_flush_mapping(page->mapping, wbc, FLUSH_STABLE|wb_priority(wbc)); + nfs_pageio_complete(pgio); return err; } @@ -352,20 +361,20 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; + struct nfs_pageio_descriptor pgio; int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); + nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); + wbc->fs_private = &pgio; err = generic_writepages(mapping, wbc); + nfs_pageio_complete(&pgio); if (err) return err; - err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); - if (err < 0) - goto out; - nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); - err = 0; -out: - return err; + if (pgio.pg_error) + return pgio.pg_error; + return 0; } /* @@ -536,18 +545,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_st return res; } -static void nfs_cancel_dirty_list(struct list_head *head) -{ - struct nfs_page *req; - while(!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_end_page_writeback(req->wb_page); - nfs_inode_remove_request(req); - nfs_clear_page_writeback(req); - } -} - static void nfs_cancel_commit_list(struct list_head *head) { struct nfs_page *req; @@ -936,33 +933,15 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, size_t cou return -ENOMEM; } -static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how) +static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags) { - struct nfs_pageio_descriptor desc; - int wpages = NFS_SERVER(inode)->wpages; int wsize = NFS_SERVER(inode)->wsize; - /* For single writes, FLUSH_STABLE is more efficient */ - if (npages <= wpages && npages == NFS_I(inode)->npages - && nfs_list_entry(head->next)->wb_bytes <= wsize) - how |= FLUSH_STABLE; - if (wsize < PAGE_CACHE_SIZE) - nfs_pageio_init(&desc, inode, nfs_flush_multi, wsize, how); + nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); else - nfs_pageio_init(&desc, inode, nfs_flush_one, wsize, how); - nfs_pageio_add_list(&desc, head); - nfs_pageio_complete(&desc); - if (desc.pg_error == 0) - return 0; - while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_redirty_request(req); - nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); - } - return desc.pg_error; + nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); } /* @@ -1286,31 +1265,7 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; -#else -static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) -{ - return 0; -} -#endif - -static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) -{ - struct nfs_inode *nfsi = NFS_I(mapping->host); - LIST_HEAD(head); - long res; - - spin_lock(&nfsi->req_lock); - res = nfs_scan_dirty(mapping, wbc, &head); - spin_unlock(&nfsi->req_lock); - if (res) { - int error = nfs_flush_list(mapping->host, &head, res, how); - if (error < 0) - return error; - } - return res; -} -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1327,6 +1282,11 @@ int nfs_commit_inode(struct inode *inode, int how) } return res; } +#else +static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +{ + return 0; +} #endif long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) @@ -1360,19 +1320,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) continue; - pages = nfs_scan_dirty(mapping, wbc, &head); - if (pages != 0) { - spin_unlock(&nfsi->req_lock); - if (how & FLUSH_INVALIDATE) { - nfs_cancel_dirty_list(&head); - ret = pages; - } else - ret = nfs_flush_list(inode, &head, pages, how); - spin_lock(&nfsi->req_lock); - continue; - } - if (wbc->pages_skipped != 0) - continue; if (nocommit) break; pages = nfs_scan_commit(inode, &head, idx_start, npages); @@ -1412,7 +1359,7 @@ int nfs_wb_all(struct inode *inode) }; int ret; - ret = generic_writepages(mapping, &wbc); + ret = nfs_writepages(mapping, &wbc); if (ret < 0) goto out; ret = nfs_sync_mapping_wait(mapping, &wbc, 0); @@ -1435,11 +1382,9 @@ int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, lo }; int ret; - if (!(how & FLUSH_NOWRITEPAGE)) { - ret = generic_writepages(mapping, &wbc); - if (ret < 0) - goto out; - } + ret = nfs_writepages(mapping, &wbc); + if (ret < 0) + goto out; ret = nfs_sync_mapping_wait(mapping, &wbc, how); if (ret >= 0) return 0; @@ -1462,7 +1407,7 @@ int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) int ret; BUG_ON(!PageLocked(page)); - if (!(how & FLUSH_NOWRITEPAGE) && clear_page_dirty_for_io(page)) { + if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index b8b7bca3bac8..e556e57ef7ad 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -21,8 +21,7 @@ /* * Valid flags for the radix tree */ -#define NFS_PAGE_TAG_DIRTY 0 -#define NFS_PAGE_TAG_WRITEBACK 1 +#define NFS_PAGE_TAG_WRITEBACK 0 /* * Valid flags for a dirty buffer @@ -72,9 +71,6 @@ extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); -extern long nfs_scan_dirty(struct address_space *mapping, - struct writeback_control *wbc, - struct list_head *dst); extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, unsigned long idx_start, unsigned int npages); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, @@ -84,8 +80,6 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); -extern void nfs_pageio_add_list(struct nfs_pageio_descriptor *, - struct list_head *); extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 0c78f7f4a976..daa6c125f66e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -59,6 +59,8 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ + + void *fs_private; /* For use by ->writepages() */ }; /* -- cgit v1.2.3 From 8d5658c949e6d89edc579a1f112aeee3bc232a8e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Apr 2007 09:26:35 -0400 Subject: NFS: Fix a buffer overflow in the allocation of struct nfs_read/writedata Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 5 +++-- fs/nfs/internal.h | 12 ++++++++++++ fs/nfs/pagelist.c | 10 ++++++++-- fs/nfs/read.c | 19 +++++++++---------- fs/nfs/write.c | 11 +++++------ include/linux/nfs_fs.h | 4 ++-- include/linux/nfs_page.h | 4 ++-- 7 files changed, 41 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2877744cb606..889de60f8a84 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -54,6 +54,7 @@ #include #include +#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -271,7 +272,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo bytes = min(rsize,count); result = -ENOMEM; - data = nfs_readdata_alloc(pgbase + bytes); + data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); if (unlikely(!data)) break; @@ -602,7 +603,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l bytes = min(wsize,count); result = -ENOMEM; - data = nfs_writedata_alloc(pgbase + bytes); + data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); if (unlikely(!data)) break; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6610f2b02077..ad2b40db1e65 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -231,3 +231,15 @@ unsigned int nfs_page_length(struct page *page) } return 0; } + +/* + * Determine the number of pages in an array of length 'len' and + * with a base offset of 'base' + */ +static inline +unsigned int nfs_page_array_len(unsigned int base, size_t len) +{ + return ((unsigned long)len + (unsigned long)base + + PAGE_SIZE - 1) >> PAGE_SHIFT; +} + diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ea1a85df9ab1..096efd73eb4c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -18,6 +18,8 @@ #include #include +#include "internal.h" + #define NFS_PARANOIA 1 static struct kmem_cache *nfs_page_cachep; @@ -231,7 +233,7 @@ out: */ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct inode *, struct list_head *, size_t, int), + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), unsigned int bsize, int io_flags) { @@ -298,8 +300,10 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, * since nfs_flush_multi and nfs_pagein_multi assume you * can have only one struct nfs_page. */ + if (desc->pg_bsize < PAGE_SIZE) + return 0; newlen += desc->pg_count; - if (desc->pg_base + newlen > desc->pg_bsize) + if (newlen > desc->pg_bsize) return 0; prev = nfs_list_entry(desc->pg_list.prev); if (!nfs_can_coalesce_requests(prev, req)) @@ -320,6 +324,8 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) if (!list_empty(&desc->pg_list)) { int error = desc->pg_doio(desc->pg_inode, &desc->pg_list, + nfs_page_array_len(desc->pg_base, + desc->pg_count), desc->pg_count, desc->pg_ioflags); if (error < 0) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f0016062340d..9a55807b2a70 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -27,8 +27,8 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_multi(struct inode *, struct list_head *, size_t, int); -static int nfs_pagein_one(struct inode *, struct list_head *, size_t, int); +static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); +static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -37,9 +37,8 @@ static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) -struct nfs_read_data *nfs_readdata_alloc(size_t len) +struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) { - unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); if (p) { @@ -135,9 +134,9 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, nfs_list_add_request(new, &one_request); if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - nfs_pagein_multi(inode, &one_request, len, 0); + nfs_pagein_multi(inode, &one_request, 1, len, 0); else - nfs_pagein_one(inode, &one_request, len, 0); + nfs_pagein_one(inode, &one_request, 1, len, 0); return 0; } @@ -234,7 +233,7 @@ static void nfs_execute_read(struct nfs_read_data *data) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct inode *inode, struct list_head *head, size_t count, int flags) +static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -250,7 +249,7 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, size_t do { size_t len = min(nbytes,rsize); - data = nfs_readdata_alloc(len); + data = nfs_readdata_alloc(1); if (!data) goto out_bad; INIT_LIST_HEAD(&data->pages); @@ -291,13 +290,13 @@ out_bad: return -ENOMEM; } -static int nfs_pagein_one(struct inode *inode, struct list_head *head, size_t count, int flags) +static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) { struct nfs_page *req; struct page **pages; struct nfs_read_data *data; - data = nfs_readdata_alloc(count); + data = nfs_readdata_alloc(npages); if (!data) goto out_bad; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6ce2d94e7b3f..0a8bbc399689 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -72,9 +72,8 @@ void nfs_commit_free(struct nfs_write_data *wdata) call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free); } -struct nfs_write_data *nfs_writedata_alloc(size_t len) +struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) { - unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); if (p) { @@ -832,7 +831,7 @@ static void nfs_execute_write(struct nfs_write_data *data) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct inode *inode, struct list_head *head, size_t count, int how) +static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -848,7 +847,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, size_t c do { size_t len = min(nbytes, wsize); - data = nfs_writedata_alloc(len); + data = nfs_writedata_alloc(1); if (!data) goto out_bad; list_add(&data->pages, &list); @@ -897,13 +896,13 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct inode *inode, struct list_head *head, size_t count, int how) +static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; - data = nfs_writedata_alloc(count); + data = nfs_writedata_alloc(npages); if (!data) goto out_bad; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index e9ae0c6e2c62..0543439a97af 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -455,7 +455,7 @@ nfs_have_writebacks(struct inode *inode) /* * Allocate nfs_write_data structures */ -extern struct nfs_write_data *nfs_writedata_alloc(size_t len); +extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages); /* * linux/fs/nfs/read.c @@ -469,7 +469,7 @@ extern void nfs_readdata_release(void *data); /* * Allocate nfs_read_data structures */ -extern struct nfs_read_data *nfs_readdata_alloc(size_t len); +extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages); /* * linux/fs/nfs3proc.c diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index e556e57ef7ad..8e9e7bceda48 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -55,7 +55,7 @@ struct nfs_pageio_descriptor { unsigned int pg_base; struct inode *pg_inode; - int (*pg_doio)(struct inode *, struct list_head *, size_t, int); + int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int); int pg_ioflags; int pg_error; }; @@ -75,7 +75,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct unsigned long idx_start, unsigned int npages); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct inode *, struct list_head *, size_t, int), + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), size_t bsize, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, -- cgit v1.2.3 From 724c439c204b12a3537b71289fb4c0a42c3aa566 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Apr 2007 17:22:13 -0400 Subject: NFS: Clean up nfs_sync_mapping_wait() It has no business touching wbc->pages_skipped. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0a8bbc399689..424c4cea1208 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1315,18 +1315,14 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr how &= ~FLUSH_NOCOMMIT; spin_lock(&nfsi->req_lock); do { - wbc->pages_skipped = 0; ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) continue; if (nocommit) break; pages = nfs_scan_commit(inode, &head, idx_start, npages); - if (pages == 0) { - if (wbc->pages_skipped != 0) - continue; + if (pages == 0) break; - } if (how & FLUSH_INVALIDATE) { spin_unlock(&nfsi->req_lock); nfs_cancel_commit_list(&head); -- cgit v1.2.3 From ca52fec152282ef73e5e882b847b36b1febbb1c6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Apr 2007 17:22:13 -0400 Subject: NFS: Use pgoff_t in structures and functions that pass page cache offsets Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 ++-- fs/nfs/write.c | 18 +++++++++--------- include/linux/nfs_page.h | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 096efd73eb4c..d846d39a31ef 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -383,12 +383,12 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) * You must be holding the inode's req_lock when calling this function */ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, - struct list_head *dst, unsigned long idx_start, + struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; struct nfs_page *req; - unsigned long idx_end; + pgoff_t idx_end; int found, i; int res; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 424c4cea1208..5d44b8bd1070 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -139,7 +139,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c { struct inode *inode = page->mapping->host; loff_t end, i_size = i_size_read(inode); - unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; if (i_size > 0 && page->index < end_index) return; @@ -511,11 +511,11 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) * * Interruptible by signals only if mounted with intr flag. */ -static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages) +static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; - unsigned long idx_end, next; + pgoff_t idx_end, next; unsigned int res = 0; int error; @@ -570,7 +570,7 @@ static void nfs_cancel_commit_list(struct list_head *head) * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); int res = 0; @@ -584,7 +584,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st return res; } #else -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { return 0; } @@ -604,7 +604,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req, *new = NULL; - unsigned long rqend, end; + pgoff_t rqend, end; end = offset + bytes; @@ -1292,7 +1292,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr { struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); - unsigned long idx_start, idx_end; + pgoff_t idx_start, idx_end; unsigned int npages = 0; LIST_HEAD(head); int nocommit = how & FLUSH_NOCOMMIT; @@ -1305,10 +1305,10 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; if (idx_end > idx_start) { - unsigned long l_npages = 1 + idx_end - idx_start; + pgoff_t l_npages = 1 + idx_end - idx_start; npages = l_npages; if (sizeof(npages) != sizeof(l_npages) && - (unsigned long)npages != l_npages) + (pgoff_t)npages != l_npages) npages = 0; } } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 8e9e7bceda48..41afab6b5f09 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -38,7 +38,7 @@ struct nfs_page { struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ atomic_t wb_complete; /* i/os we're waiting for */ - unsigned long wb_index; /* Offset >> PAGE_CACHE_SHIFT */ + pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ wb_bytes; /* Length of request */ @@ -72,7 +72,7 @@ extern void nfs_release_request(struct nfs_page *req); extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, - unsigned long idx_start, unsigned int npages); + pgoff_t idx_start, unsigned int npages); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), -- cgit v1.2.3 From 511d2e8855a065c8251d0c140ebc353854f1929e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 29 Mar 2007 16:47:47 -0400 Subject: NLM: Shrink the maximum request size of NLM4 requests NLM version 4 requests estimate the call and reply header sizes rather conservatively, using the very maximum size allowed in the protocol even though Linux always uses only a small fraction of the allowable space. Reduce the size of caller and lock arguments to conserve RPC buffer space while XDR encoding NLM4 arguments. Add compile-time checks to ensure the hostname string won't overflow NLM protocol maximums. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/xdr.c | 13 ++++++++----- fs/lockd/xdr4.c | 17 ++++++++++++----- include/linux/lockd/lockd.h | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 34dae5d70738..6aac4b2c9ff0 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -510,17 +510,20 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) return 0; } +#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) +# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!" +#endif + /* * Buffer requirements for NLM */ #define NLM_void_sz 0 #define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) -#define NLM_caller_sz 1+XDR_QUADLEN(sizeof(utsname()->nodename)) -#define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) -/* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */ +#define NLM_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE) +#define NLM_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE) #define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE) -#define NLM_lock_sz 3+NLM_caller_sz+NLM_netobj_sz+NLM_fhandle_sz -#define NLM_holder_sz 4+NLM_netobj_sz +#define NLM_lock_sz 3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz +#define NLM_holder_sz 4+NLM_owner_sz #define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz #define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index a78240551219..7c8b679c394c 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -516,17 +516,24 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) return 0; } +#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) +# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!" +#endif + +#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN) +# error "NLM host name cannot be larger than NLM's maximum string length!" +#endif + /* * Buffer requirements for NLM */ #define NLM4_void_sz 0 #define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) -#define NLM4_caller_sz 1+XDR_QUADLEN(NLM_MAXSTRLEN) -#define NLM4_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) -/* #define NLM4_owner_sz 1+XDR_QUADLEN(NLM4_MAXOWNER) */ +#define NLM4_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE) +#define NLM4_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE) #define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE) -#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_netobj_sz+NLM4_fhandle_sz -#define NLM4_holder_sz 6+NLM4_netobj_sz +#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz +#define NLM4_holder_sz 6+NLM4_owner_sz #define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz #define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ac25b5649c59..f6a81e0b1b93 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -88,7 +88,7 @@ struct nlm_wait; /* * Memory chunk for NLM client RPC request. */ -#define NLMCLNT_OHSIZE (sizeof(utsname()->nodename)+10) +#define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u) struct nlm_rqst { unsigned int a_flags; /* initial RPC task flags */ struct nlm_host * a_host; /* host handle */ -- cgit v1.2.3 From 2bea90d43a050bbc4021d44e59beb34f384438db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 29 Mar 2007 16:47:53 -0400 Subject: SUNRPC: RPC buffer size estimates are too large The RPC buffer size estimation logic in net/sunrpc/clnt.c always significantly overestimates the requirements for the buffer size. A little instrumentation demonstrated that in fact rpc_malloc was never allocating the buffer from the mempool, but almost always called kmalloc. To compute the size of the RPC buffer more precisely, split p_bufsiz into two fields; one for the argument size, and one for the result size. Then, compute the sum of the exact call and reply header sizes, and split the RPC buffer precisely between the two. That should keep almost all RPC buffers within the 2KiB buffer mempool limit. And, we can finally be rid of RPC_SLACK_SPACE! Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/mon.c | 10 +++----- fs/lockd/xdr.c | 7 ++--- fs/lockd/xdr4.c | 7 ++--- fs/nfs/mount_clnt.c | 7 +++-- fs/nfs/nfs2xdr.c | 7 ++--- fs/nfs/nfs3xdr.c | 13 +++++----- fs/nfs/nfs4xdr.c | 7 ++--- fs/nfsd/nfs4callback.c | 7 ++--- include/linux/sunrpc/clnt.h | 3 ++- include/linux/sunrpc/xprt.h | 4 ++- net/sunrpc/clnt.c | 62 +++++++++++++++++++++++++++------------------ net/sunrpc/pmap_clnt.c | 9 ++++--- net/sunrpc/xprt.c | 1 - 13 files changed, 74 insertions(+), 70 deletions(-) (limited to 'fs') diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index eb243edf8932..2102e2d0134d 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -225,16 +225,13 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp) #define SM_monres_sz 2 #define SM_unmonres_sz 1 -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - static struct rpc_procinfo nsm_procedures[] = { [SM_MON] = { .p_proc = SM_MON, .p_encode = (kxdrproc_t) xdr_encode_mon, .p_decode = (kxdrproc_t) xdr_decode_stat_res, - .p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2, + .p_arglen = SM_mon_sz, + .p_replen = SM_monres_sz, .p_statidx = SM_MON, .p_name = "MONITOR", }, @@ -242,7 +239,8 @@ static struct rpc_procinfo nsm_procedures[] = { .p_proc = SM_UNMON, .p_encode = (kxdrproc_t) xdr_encode_unmon, .p_decode = (kxdrproc_t) xdr_decode_stat, - .p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2, + .p_arglen = SM_mon_id_sz, + .p_replen = SM_unmonres_sz, .p_statidx = SM_UNMON, .p_name = "UNMONITOR", }, diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 6aac4b2c9ff0..9702956d206c 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -534,10 +534,6 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) #define NLM_res_sz NLM_cookie_sz+1 #define NLM_norep_sz 0 -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - /* * For NLM, a void procedure really returns nothing */ @@ -548,7 +544,8 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) .p_proc = NLMPROC_##proc, \ .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \ .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \ - .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \ + .p_arglen = NLM_##argtype##_sz, \ + .p_replen = NLM_##restype##_sz, \ .p_statidx = NLMPROC_##proc, \ .p_name = #proc, \ } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 7c8b679c394c..ce1efdbe1b3a 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -544,10 +544,6 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) #define NLM4_res_sz NLM4_cookie_sz+1 #define NLM4_norep_sz 0 -#ifndef MAX -# define MAX(a,b) (((a) > (b))? (a) : (b)) -#endif - /* * For NLM, a void procedure really returns nothing */ @@ -558,7 +554,8 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) .p_proc = NLMPROC_##proc, \ .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \ .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \ - .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \ + .p_arglen = NLM4_##argtype##_sz, \ + .p_replen = NLM4_##restype##_sz, \ .p_statidx = NLMPROC_##proc, \ .p_name = #proc, \ } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index f75fe72b4160..ca5a266a3140 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -133,13 +133,15 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) #define MNT_dirpath_sz (1 + 256) #define MNT_fhstatus_sz (1 + 8) +#define MNT_fhstatus3_sz (1 + 16) static struct rpc_procinfo mnt_procedures[] = { [MNTPROC_MNT] = { .p_proc = MNTPROC_MNT, .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus, - .p_bufsiz = MNT_dirpath_sz << 2, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus_sz, .p_statidx = MNTPROC_MNT, .p_name = "MOUNT", }, @@ -150,7 +152,8 @@ static struct rpc_procinfo mnt3_procedures[] = { .p_proc = MOUNTPROC3_MNT, .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, - .p_bufsiz = MNT_dirpath_sz << 2, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus3_sz, .p_statidx = MOUNTPROC3_MNT, .p_name = "MOUNT", }, diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 3be4e72a0227..abd9f8b48943 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -687,16 +687,13 @@ nfs_stat_to_errno(int stat) return nfs_errtbl[i].errno; } -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - #define PROC(proc, argtype, restype, timer) \ [NFSPROC_##proc] = { \ .p_proc = NFSPROC_##proc, \ .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ - .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ + .p_arglen = NFS_##argtype##_sz, \ + .p_replen = NFS_##restype##_sz, \ .p_timer = timer, \ .p_statidx = NFSPROC_##proc, \ .p_name = #proc, \ diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 0ace092d126f..b51df8eb9f01 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1102,16 +1102,13 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) } #endif /* CONFIG_NFS_V3_ACL */ -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - #define PROC(proc, argtype, restype, timer) \ [NFS3PROC_##proc] = { \ .p_proc = NFS3PROC_##proc, \ .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ - .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ + .p_arglen = NFS3_##argtype##_sz, \ + .p_replen = NFS3_##restype##_sz, \ .p_timer = timer, \ .p_statidx = NFS3PROC_##proc, \ .p_name = #proc, \ @@ -1153,7 +1150,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { .p_proc = ACLPROC3_GETACL, .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, - .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, + .p_arglen = ACL3_getaclargs_sz, + .p_replen = ACL3_getaclres_sz, .p_timer = 1, .p_name = "GETACL", }, @@ -1161,7 +1159,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { .p_proc = ACLPROC3_SETACL, .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, - .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, + .p_arglen = ACL3_setaclargs_sz, + .p_replen = ACL3_setaclres_sz, .p_timer = 0, .p_name = "SETACL", }, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f02d522fd788..b8c28f2380a5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4546,16 +4546,13 @@ nfs4_stat_to_errno(int stat) return stat; } -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - #define PROC(proc, argtype, restype) \ [NFSPROC4_CLNT_##proc] = { \ .p_proc = NFSPROC4_COMPOUND, \ .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ - .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ + .p_arglen = NFS4_##argtype##_sz, \ + .p_replen = NFS4_##restype##_sz, \ .p_statidx = NFSPROC4_CLNT_##proc, \ .p_name = #proc, \ } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index fb14d68eacab..32ffea033c7a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -315,16 +315,13 @@ out: /* * RPC procedure tables */ -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - #define PROC(proc, call, argtype, restype) \ [NFSPROC4_CLNT_##proc] = { \ .p_proc = NFSPROC4_CB_##call, \ .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ - .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ + .p_arglen = NFS4_##argtype##_sz, \ + .p_replen = NFS4_##restype##_sz, \ .p_statidx = NFSPROC4_CB_##call, \ .p_name = #proc, \ } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c7a78eef2b4f..32c48a0b0d71 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -84,7 +84,8 @@ struct rpc_procinfo { u32 p_proc; /* RPC procedure number */ kxdrproc_t p_encode; /* XDR encode function */ kxdrproc_t p_decode; /* XDR decode function */ - unsigned int p_bufsiz; /* req. buffer size */ + unsigned int p_arglen; /* argument hdr length (u32) */ + unsigned int p_replen; /* reply hdr length (u32) */ unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ u32 p_statidx; /* Which procedure to account */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index f780e72fc417..7aa29502b187 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -84,7 +84,9 @@ struct rpc_rqst { struct list_head rq_list; __u32 * rq_buffer; /* XDR encode buffer */ - size_t rq_bufsize; + size_t rq_bufsize, + rq_callsize, + rq_rcvsize; struct xdr_buf rq_private_buf; /* The receive buffer * used in the softirq. diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 396cdbe249d1..12487aafaab5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -36,8 +36,6 @@ #include -#define RPC_SLACK_SPACE (1024) /* total overkill */ - #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL #endif @@ -747,21 +745,37 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { + unsigned int slack = task->tk_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; - unsigned int bufsiz; + struct rpc_procinfo *proc = task->tk_msg.rpc_proc; dprint_status(task); + task->tk_status = 0; task->tk_action = call_bind; + if (req->rq_buffer) return; - /* FIXME: compute buffer requirements more exactly using - * auth->au_wslack */ - bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; + if (proc->p_proc != 0) { + BUG_ON(proc->p_arglen == 0); + if (proc->p_decode != NULL) + BUG_ON(proc->p_replen == 0); + } - if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) + /* + * Calculate the size (in quads) of the RPC call + * and reply headers, and convert both values + * to byte sizes. + */ + req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen; + req->rq_callsize <<= 2; + req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen; + req->rq_rcvsize <<= 2; + + xprt->ops->buf_alloc(task, req->rq_callsize + req->rq_rcvsize); + if (req->rq_buffer != NULL) return; dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); @@ -788,6 +802,17 @@ rpc_task_force_reencode(struct rpc_task *task) task->tk_rqstp->rq_snd_buf.len = 0; } +static inline void +rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) +{ + buf->head[0].iov_base = start; + buf->head[0].iov_len = len; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->len = 0; + buf->buflen = len; +} + /* * 3. Encode arguments of an RPC call */ @@ -795,28 +820,17 @@ static void call_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct xdr_buf *sndbuf = &req->rq_snd_buf; - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - unsigned int bufsiz; kxdrproc_t encode; __be32 *p; dprint_status(task); - /* Default buffer setup */ - bufsiz = req->rq_bufsize >> 1; - sndbuf->head[0].iov_base = (void *)req->rq_buffer; - sndbuf->head[0].iov_len = bufsiz; - sndbuf->tail[0].iov_len = 0; - sndbuf->page_len = 0; - sndbuf->len = 0; - sndbuf->buflen = bufsiz; - rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz); - rcvbuf->head[0].iov_len = bufsiz; - rcvbuf->tail[0].iov_len = 0; - rcvbuf->page_len = 0; - rcvbuf->len = 0; - rcvbuf->buflen = bufsiz; + rpc_xdr_buf_init(&req->rq_snd_buf, + req->rq_buffer, + req->rq_callsize); + rpc_xdr_buf_init(&req->rq_rcv_buf, + (char *)req->rq_buffer + req->rq_callsize, + req->rq_rcvsize); /* Encode header and provided arguments */ encode = task->tk_msg.rpc_proc->p_encode; diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index d9f765344589..c45fc4c99513 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -335,7 +335,8 @@ static struct rpc_procinfo pmap_procedures[] = { .p_proc = PMAP_SET, .p_encode = (kxdrproc_t) xdr_encode_mapping, .p_decode = (kxdrproc_t) xdr_decode_bool, - .p_bufsiz = 4, + .p_arglen = 4, + .p_replen = 1, .p_count = 1, .p_statidx = PMAP_SET, .p_name = "SET", @@ -344,7 +345,8 @@ static struct rpc_procinfo pmap_procedures[] = { .p_proc = PMAP_UNSET, .p_encode = (kxdrproc_t) xdr_encode_mapping, .p_decode = (kxdrproc_t) xdr_decode_bool, - .p_bufsiz = 4, + .p_arglen = 4, + .p_replen = 1, .p_count = 1, .p_statidx = PMAP_UNSET, .p_name = "UNSET", @@ -353,7 +355,8 @@ static struct rpc_procinfo pmap_procedures[] = { .p_proc = PMAP_GETPORT, .p_encode = (kxdrproc_t) xdr_encode_mapping, .p_decode = (kxdrproc_t) xdr_decode_port, - .p_bufsiz = 4, + .p_arglen = 4, + .p_replen = 1, .p_count = 1, .p_statidx = PMAP_GETPORT, .p_name = "GETPORT", diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 456a14510308..432ee92cf262 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -823,7 +823,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; - req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; xprt_reset_majortimeo(req); -- cgit v1.2.3 From df8b172a8880521396d2048ecef7e75df43b5bc4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 29 Mar 2007 16:48:22 -0400 Subject: NFS: switch NFSROOT to use new rpcbind client It is arguable whether NFSROOT will support IPv6, and thus whether rpcb_getport_external needs to support rpcbind versions greater than 2. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 75f819dc0255..49d1008ce1d7 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto) printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", program, version, NIPQUAD(servaddr)); set_sockaddr(&sin, servaddr, 0); - return rpc_getport_external(&sin, program, version, proto); + return rpcb_getport_external(&sin, program, version, proto); } -- cgit v1.2.3 From 00a6e7bbf990e3a5e59a9a1e6a68e99c94fe001c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 29 Mar 2007 16:48:33 -0400 Subject: SUNRPC: RPC client should retry with different versions of rpcbind Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/Kconfig | 12 ++++++++++++ net/sunrpc/clnt.c | 6 ++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index a42f767dcdd5..20bec7767dd8 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1734,6 +1734,18 @@ config SUNRPC config SUNRPC_GSS tristate +config SUNRPC_BIND34 + bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + help + Provides kernel support for querying rpcbind servers via versions 3 + and 4 of the rpcbind protocol. The kernel automatically falls back + to version 2 if a remote rpcbind service does not support versions + 3 or 4. + + If unsure, say N to get traditional behavior (version 2 rpcbind + requests only). + config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e7dc09ecc470..d8fbee40a19c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -902,9 +902,11 @@ call_bind_status(struct rpc_task *task) task->tk_pid); break; case -EPROTONOSUPPORT: - dprintk("RPC: %5u remote rpcbind version 2 unavailable\n", + dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", task->tk_pid); - break; + task->tk_status = 0; + task->tk_action = call_bind; + return; default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); -- cgit v1.2.3 From 74dd34e6e8bb127ff4c182423154b294729b663b Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Sat, 14 Apr 2007 17:01:15 -0400 Subject: NFS: Added support to turn off the NFSv3 READDIRPLUS RPC. READDIRPLUS can be a performance hindrance when the client is working with large directories. In addition, some servers still have bugs in their implementations (e.g. Tru64 returns wrong values for the fsid). Add a mount flag to enable users to turn it off at mount time following the implementation in Apple's NFS client. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 ++- fs/nfs/super.c | 1 + include/linux/nfs_mount.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2190e6c2792e..5bd03b97002e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -618,7 +618,8 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat if (clp->cl_nfsversion == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - server->caps |= NFS_CAP_READDIRPLUS; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) server->namelen = NFS2_MAXNAMLEN; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 719464a04dda..ca20d3cc2609 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -290,6 +290,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, + { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 659c75438454..cc8b9c59acb8 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -61,6 +61,7 @@ struct nfs_mount_data { #define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ +#define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif -- cgit v1.2.3 From 1f4eab7e7c1d90dcd8ca4d7c064ee78dfbb345eb Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 16 Apr 2007 09:35:27 +1000 Subject: NFS: Set meaningful value for fattr->time_start in readdirplus results. Don't use uninitialsed value for fattr->time_start in readdirplus results. The 'fattr' structure filled in by nfs3_decode_direct does not get a value for ->time_start set. Thus if an entry is for an inode that we already have in cache, when nfs_readdir_lookup calls nfs_fhget, it will call nfs_refresh_inode and may update the inode with out-of-date information. Directories are read a page at a time, so each page could have a different timestamp that "should" be used to set the time_start for the fattr for info in that page. However storing the timestamp per page is awkward. (We could stick in the first 4 bytes and only read 4092 bytes, but that is a bigger code change than I am interested it). This patch ignores the readdir_plus attributes if a readdir finds the information already in cache, and otherwise sets ->time_start to the time the readdir request was sent to the server. It might be nice to store - in the directory inode - the time stamp for the earliest readdir request that is still in the page cache, so that we don't ignore attribute data that we don't have to. This patch doesn't do that. Signed-off-by: Neil Brown Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cd3469720cbf..d971547ce609 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -154,6 +154,8 @@ typedef struct { decode_dirent_t decode; int plus; int error; + unsigned long timestamp; + int timestamp_valid; } nfs_readdir_descriptor_t; /* Now we cache directories properly, by stuffing the dirent @@ -195,6 +197,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) } goto error; } + desc->timestamp = timestamp; + desc->timestamp_valid = 1; SetPageUptodate(page); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; @@ -225,6 +229,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) if (IS_ERR(p)) return PTR_ERR(p); desc->ptr = p; + if (desc->timestamp_valid) + desc->entry->fattr->time_start = desc->timestamp; + else + desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; return 0; } @@ -316,6 +324,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) __FUNCTION__, desc->page_index, (long long) *desc->dir_cookie); + /* If we find the page in the page_cache, we cannot be sure + * how fresh the data is, so we will ignore readdir_plus attributes. + */ + desc->timestamp_valid = 0; page = read_cache_page(inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { @@ -468,6 +480,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct rpc_cred *cred = nfs_file_cred(file); struct page *page = NULL; int status; + unsigned long timestamp; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); @@ -477,6 +490,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, status = -ENOMEM; goto out; } + timestamp = jiffies; desc->error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, @@ -487,6 +501,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { + desc->timestamp = timestamp; + desc->timestamp_valid = 1; if ((status = dir_decode(desc)) == 0) desc->entry->prev_cookie = *desc->dir_cookie; } else -- cgit v1.2.3 From 83672d392f7bcf556f7920d6715e4174d9373ee0 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 26 Feb 2007 12:48:25 +1100 Subject: NFS: Fix directory caching problem - with test case and patch. Try running this script in an NFS mounted directory (Client relatively recent - 2.6.18 has the problem as does 2.6.20). ------------------------------------------------------ #!/bin/bash # # This script will produce the following errormessage from tar: # # tar: newdir/innerdir/innerfile: file changed as we read it # create dirs rm -rf nfstest mkdir -p nfstest/dir/innerdir # create files (should not be empty) echo "Hello World!" >nfstest/dir/file echo "Hello World!" >nfstest/dir/innerdir/innerfile # problem only happens if we sleep before chmod sleep 1 # change file modes chmod -R a+r nfstest # rename dir mv nfstest/dir nfstest/newdir # tar it tar -cf nfstest/nfstest.tar -C nfstest newdir # restore old dir name mv nfstest/newdir nfstest/dir -------------------------------------------------------- What happens: The 'chmod -R' does a readdir_plus in each directory and the results get cached in the page cache. It then updates the ctime on each file by one second. When this happens, the post-op attributes are used to update the ctime stored on the client to match the value in the kernel. The 'mv' calls shrink_dcache_parent on the directory tree which flushes all the dentries (so a new lookup will be required) but doesn't flush the inodes or pagecache. The 'tar' does a readdir on each directory, but (in the case of 'innerdir' at least) satisfies it from the pagecache and uses the READDIRPLUS data to update all the inodes. In the case of 'innerdir/innerfile', the ctime is out of date. 'tar' then calls 'lstat' on innerdir/innerfile getting an old ctime. It then opens the file (triggering a GETATTR), reads the content, and then calls fstat to see if anything has changed. It finds that ctime has changed and so complains. The problem seems to be that the cache readdirplus info is kept around for too long. My patch below discards pagecache data for directories when dentry_iput is called on them. This effectively removes the symptom which convinces me that I correctly understand the problem. However I'm not convinced that is a proper solution, as there could easily be other races that trigger the same problem without being affected by this 'fix'. One possibility would be to require that readdirplus pagecache data be only used *once* to instantiate an inode. Somehow it should then be invalidated so that if the dentry subsequently disappears, it will cause a new request to the server to fill in the stat data. Another possibility is to compare the cache_change_attribute on the inode with something similar for the readdirplus info and reject the info from readdirplus if it is too old. I haven't tried to implement these and would value other opinions before I do. Thanks, NeilBrown Signed-off-by: Neil Brown Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d971547ce609..e59fd31c9a22 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -865,6 +865,10 @@ static int nfs_dentry_delete(struct dentry *dentry) static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { nfs_inode_return_delegation(inode); + if (S_ISDIR(inode->i_mode)) + /* drop any readdir cache as it could easily be old */ + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { lock_kernel(); drop_nlink(inode); -- cgit v1.2.3 From 08efa202eb398ce7939885a4a01df370fd392068 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 1 May 2007 10:56:25 -0400 Subject: NFS4: invalidate cached acl on setacl The ACL that the server sets may not be exactly the one we set--for example, it may silently turn off bits that it does not support. So we should remove any cached ACL so that any subsequent request for the ACL will go to the server. Signed-off-by: "J. Bruce Fields" Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f52cf5c33c6c..3b5ca1b15fe9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2647,8 +2647,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - if (ret == 0) - nfs4_write_cached_acl(inode, buf, buflen); + nfs_zap_caches(inode); return ret; } -- cgit v1.2.3 From a19b89cad51b6f0da8f4bafdfdcfb10264cbcdea Mon Sep 17 00:00:00 2001 From: Jason Uhlenkott Date: Thu, 26 Apr 2007 17:25:51 -0700 Subject: NFS: Clean up nfs_create_request comments Remove some stale comments about hard limits which went away in 2.5. Signed-off-by: Jason Uhlenkott Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d846d39a31ef..fe90130bd7a9 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -51,9 +51,7 @@ nfs_page_free(struct nfs_page *p) * @count: number of bytes to read/write * * The page must be locked by the caller. This makes sure we never - * create two different requests for the same page, and avoids - * a possible deadlock when we reach the hard limit on the number - * of dirty pages. + * create two different requests for the same page. * User should ensure it is safe to sleep in this function. */ struct nfs_page * @@ -64,16 +62,12 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, struct nfs_server *server = NFS_SERVER(inode); struct nfs_page *req; - /* Deal with hard limits. */ for (;;) { /* try to allocate the request struct */ req = nfs_page_alloc(); if (req != NULL) break; - /* Try to free up at least one request in order to stay - * below the hard limit - */ if (signalled() && (server->flags & NFS_MOUNT_INTR)) return ERR_PTR(-ERESTARTSYS); yield(); -- cgit v1.2.3 From 84dde76c4a2d99ed2d7de6ec82c53b56620900a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 May 2007 14:44:06 -0400 Subject: NFS: Fix a compile glitch on 64-bit systems fs/nfs/pagelist.c:226: error: conflicting types for 'nfs_pageio_init' include/linux/nfs_page.h:80: error: previous declaration of 'nfs_pageio_init' was here Thanks to Andrew for spotting this... Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fe90130bd7a9..388950118f59 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -228,7 +228,7 @@ out: void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), - unsigned int bsize, + size_t bsize, int io_flags) { INIT_LIST_HEAD(&desc->pg_list); -- cgit v1.2.3