From 9cccef95052c7169040c3577e17d4f6fa230cc28 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 22 Jul 2007 17:09:05 -0400 Subject: NFS: Clean up write code... The addition of nfs_page_mkwrite means that We should no longer need to create requests inside nfs_writepage() Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - include/linux/nfs_page.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7250eeadd7b5..32bd5d38cdfd 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -420,7 +420,6 @@ extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern void nfs_writedata_release(void *); -extern int nfs_set_page_dirty(struct page *); /* * Try to write back everything synchronously (but check the diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 78e60798d10e..30dbcc185e69 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -30,7 +30,6 @@ #define PG_BUSY 0 #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 -#define PG_NEED_FLUSH 3 struct nfs_inode; struct nfs_page { -- cgit v1.2.3 From 90e9a3f9b0a14198a8ae5a0a5c13ad30f0b8b40d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 22 Jul 2007 19:27:46 -0400 Subject: VFS: Remove writeback_control->fs_private The only user of this field was NFS. Signed-off-by: Trond Myklebust --- include/linux/writeback.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b4af6bcb7b7a..a111d3de2a57 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -61,8 +61,6 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ - - void *fs_private; /* For use by ->writepages() */ }; /* -- cgit v1.2.3 From ed90ef51a33f572fa7d00c8b05f7457be727e74f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jul 2007 13:13:28 -0400 Subject: NFS: Clean up NFS writeback flush code The only user of nfs_sync_mapping_range() is nfs_getattr(), which uses it to flush out the entire inode without sending a commit. We therefore replace nfs_sync_mapping_range with a more appropriate helper. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- fs/nfs/write.c | 41 ++++++++++++++--------------------------- include/linux/nfs_fs.h | 2 +- 3 files changed, 16 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 71a49c3acabd..119fefef13f3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -431,7 +431,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) /* Flush out writes to the server in order to update c/mtime */ if (S_ISREG(inode->i_mode)) - nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); + nfs_wb_nocommit(inode); /* * We may force a getattr if the user cares about atime. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 667ff27ef235..75adb8e78db9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1325,12 +1325,8 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr return ret; } -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) +static int nfs_write_mapping(struct address_space *mapping, int how) { - struct address_space *mapping = inode->i_mapping; struct writeback_control wbc = { .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, @@ -1343,35 +1339,26 @@ int nfs_wb_all(struct inode *inode) ret = nfs_writepages(mapping, &wbc); if (ret < 0) goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, 0); - if (ret >= 0) - return 0; + ret = nfs_sync_mapping_wait(mapping, &wbc, how); + if (ret < 0) + goto out; + return 0; out: __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); return ret; } -int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) { - struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, - .for_writepages = 1, - }; - int ret; + return nfs_write_mapping(inode->i_mapping, 0); +} - ret = nfs_writepages(mapping, &wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, how); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; +int nfs_wb_nocommit(struct inode *inode) +{ + return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); } int nfs_wb_page_cancel(struct inode *inode, struct page *page) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 32bd5d38cdfd..15eec27cb60b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -426,8 +426,8 @@ extern void nfs_writedata_release(void *); * return value!) */ extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); -extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); extern int nfs_wb_all(struct inode *inode); +extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -- cgit v1.2.3 From 7b159fc18d417980f57aef64cab3417ee6af70f8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Jul 2007 14:09:54 -0400 Subject: NFS: Fall back to synchronous writes when a background write errors... This helps prevent huge queues of background writes from building up whenever the server runs out of disk or quota space, or if someone changes the file access modes behind our backs. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 64 ++++++++++++++++++++++++++++++++++---------------- fs/nfs/write.c | 13 +++++++--- include/linux/nfs_fs.h | 3 +++ 3 files changed, 57 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 07f43791f696..5595b32c0915 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -176,6 +176,31 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) return remote_llseek(filp, offset, origin); } +/* + * Helper for nfs_file_flush() and nfs_fsync() + * + * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to + * disk, but it retrieves and clears ctx->error after synching, despite + * the two being set at the same time in nfs_context_set_write_error(). + * This is because the former is used to notify the _next_ call to + * nfs_file_write() that a write error occured, and hence cause it to + * fall back to doing a synchronous write. + */ +static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) +{ + int have_error, status; + int ret = 0; + + have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + status = nfs_wb_all(inode); + have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + if (have_error) + ret = xchg(&ctx->error, 0); + if (!ret) + ret = status; + return ret; +} + /* * Flush all dirty pages, and check for write errors. * @@ -192,16 +217,11 @@ nfs_file_flush(struct file *file, fl_owner_t id) if ((file->f_mode & FMODE_WRITE) == 0) return 0; nfs_inc_stats(inode, NFSIOS_VFSFLUSH); - lock_kernel(); + /* Ensure that data+attribute caches are up to date after close() */ - status = nfs_wb_all(inode); - if (!status) { - status = ctx->error; - ctx->error = 0; - if (!status) - nfs_revalidate_inode(NFS_SERVER(inode), inode); - } - unlock_kernel(); + status = nfs_do_fsync(ctx, inode); + if (!status) + nfs_revalidate_inode(NFS_SERVER(inode), inode); return status; } @@ -278,19 +298,11 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = dentry->d_inode; - int status; dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - lock_kernel(); - status = nfs_wb_all(inode); - if (!status) { - status = ctx->error; - ctx->error = 0; - } - unlock_kernel(); - return status; + return nfs_do_fsync(ctx, inode); } /* @@ -377,6 +389,18 @@ static struct vm_operations_struct nfs_file_vm_ops = { .page_mkwrite = nfs_vm_page_mkwrite, }; +static int nfs_need_sync_write(struct file *filp, struct inode *inode) +{ + struct nfs_open_context *ctx; + + if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) + return 1; + ctx = filp->private_data; + if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) + return 1; + return 0; +} + static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -413,8 +437,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); /* Return error values for O_SYNC and IS_SYNC() */ - if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) { - int err = nfs_fsync(iocb->ki_filp, dentry, 1); + if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { + int err = nfs_do_fsync(iocb->ki_filp->private_data, inode); if (err < 0) result = err; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b3c5f5db73a4..fb396ea5accf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -110,6 +110,13 @@ void nfs_writedata_release(void *wdata) nfs_writedata_free(wdata); } +static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) +{ + ctx->error = error; + smp_wmb(); + set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); +} + static struct nfs_page *nfs_page_find_request_locked(struct page *page) { struct nfs_page *req = NULL; @@ -945,7 +952,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); dprintk(", error = %d\n", task->tk_status); goto out; } @@ -1008,7 +1015,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); dprintk(", error = %d\n", task->tk_status); goto remove_request; } @@ -1222,7 +1229,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 15eec27cb60b..1d1343fd2ddd 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -77,6 +77,9 @@ struct nfs_open_context { struct nfs4_state *state; fl_owner_t lockowner; int mode; + + unsigned long flags; +#define NFS_CONTEXT_ERROR_WRITE (0) int error; struct list_head list; -- cgit v1.2.3 From c7e15961115028b99f6142266b5fb08acca0e8dd Mon Sep 17 00:00:00 2001 From: Fabio Olive Leite Date: Thu, 26 Jul 2007 22:59:00 -0300 Subject: Re: [NFS] [PATCH] Attribute timeout handling and wrapping u32 jiffies I would like to discuss the idea that the current checks for attribute timeout using time_after are inadequate for 32bit architectures, since time_after works correctly only when the two timestamps being compared are within 2^31 jiffies of each other. The signed overflow caused by comparing values more than 2^31 jiffies apart will flip the result, causing incorrect assumptions of validity. 2^31 jiffies is a fairly large period of time (~25 days) when compared to the lifetime of most kernel data structures, but for long lived NFS mounts that can sit idle for months (think that for some reason autofs cannot be used), it is easy to compare inode attribute timestamps with very disparate or even bogus values (as in when jiffies have wrapped many times, where the comparison doesn't even make sense). Currently the code tests for attribute timeout by simply adding the desired amount of jiffies to the stored timestamp and comparing that with the current timestamp of obtained attribute data with time_after. This is incorrect, as it returns true for the desired timeout period and another full 2^31 range of jiffies. In testing with artificial jumps (several small jumps, not one big crank) of the jiffies I was able to reproduce a problem found in a server with very long lived NFS mounts, where attributes would not be refreshed even after touching files and directories in the server: Initial uptime: 03:42:01 up 6 min, 0 users, load average: 0.01, 0.12, 0.07 NFS volume is mounted and time is advanced: 03:38:09 up 25 days, 2 min, 0 users, load average: 1.22, 1.05, 1.08 # ls -l /local/A/foo/bar /nfs/A/foo/bar -rw-r--r-- 1 root root 0 Dec 17 03:38 /local/A/foo/bar -rw-r--r-- 1 root root 0 Nov 22 00:36 /nfs/A/foo/bar # touch /local/A/foo/bar # ls -l /local/A/foo/bar /nfs/A/foo/bar -rw-r--r-- 1 root root 0 Dec 17 03:47 /local/A/foo/bar -rw-r--r-- 1 root root 0 Nov 22 00:36 /nfs/A/foo/bar We can see the local mtime is updated, but the NFS mount still shows the old value. The patch below makes it work: Initial setup... 07:11:02 up 25 days, 1 min, 0 users, load average: 0.15, 0.03, 0.04 # ls -l /local/A/foo/bar /nfs/A/foo/bar -rw-r--r-- 1 root root 0 Jan 11 07:11 /local/A/foo/bar -rw-r--r-- 1 root root 0 Jan 11 07:11 /nfs/A/foo/bar # touch /local/A/foo/bar # ls -l /local/A/foo/bar /nfs/A/foo/bar -rw-r--r-- 1 root root 0 Jan 11 07:14 /local/A/foo/bar -rw-r--r-- 1 root root 0 Jan 11 07:14 /nfs/A/foo/bar Signed-off-by: Fabio Olive Leite Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 4 ++-- include/linux/jiffies.h | 4 ++++ 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dd02db43cbe6..6e0aa04451dd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1855,7 +1855,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs cache = nfs_access_search_rbtree(inode, cred); if (cache == NULL) goto out; - if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) + if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) goto out_stale; res->jiffies = cache->jiffies; res->cred = cache->cred; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3ad938cecd73..7c8ca175d87b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -656,7 +656,7 @@ int nfs_attribute_timeout(struct inode *inode) if (nfs_have_delegation(inode, FMODE_READ)) return 0; - return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); + return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } /** @@ -1055,7 +1055,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; - } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { + } else if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index c080f61fb024..f1c87ad24fea 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -115,6 +115,10 @@ static inline u64 get_jiffies_64(void) ((long)(a) - (long)(b) >= 0)) #define time_before_eq(a,b) time_after_eq(b,a) +#define time_in_range(a,b,c) \ + (time_after_eq(a,b) && \ + time_before_eq(a,c)) + /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of * get_jiffies_64() */ -- cgit v1.2.3 From fbfe3cc677c1a62ca6472abf24d03d4bf9f03a55 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 6 Aug 2007 11:57:02 -0400 Subject: SUNRPC: Add hex-formatted address support to rpc_peeraddr2str() Add support for the NFS client's need to export volume information with IP addresses formatted in hex instead of decimal. This isn't used yet, but subsequent patches (not in this series) will change the NFS client to use this functionality. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/xprtsock.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d11cedd14f0f..4596c26f97aa 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -53,6 +53,8 @@ enum rpc_display_format_t { RPC_DISPLAY_PORT, RPC_DISPLAY_PROTO, RPC_DISPLAY_ALL, + RPC_DISPLAY_HEX_ADDR, + RPC_DISPLAY_HEX_PORT, RPC_DISPLAY_MAX, }; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b3f40b8dbcba..8a684d873111 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -296,6 +296,20 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt) xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) { + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; } static void xs_free_peer_addresses(struct rpc_xprt *xprt) -- cgit v1.2.3 From 756805e7a76bcd2aae07fe31786fe453375e60b1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Aug 2007 16:03:26 -0400 Subject: SUNRPC: Add support for formatted universal addresses "Universal addresses" are a string representation of an IP address and port. They are described fully in RFC 3530, section 2.2. Add support for generating them in the RPC client's socket transport module. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprtsock.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4596c26f97aa..902a9c0a2d9a 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -55,6 +55,7 @@ enum rpc_display_format_t { RPC_DISPLAY_ALL, RPC_DISPLAY_HEX_ADDR, RPC_DISPLAY_HEX_PORT, + RPC_DISPLAY_UNIVERSAL_ADDR, RPC_DISPLAY_MAX, }; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 01121a4f0851..7a154e4b70f5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -328,6 +328,15 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) ntohs(addr->sin_port)); } xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) { + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; } static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) @@ -380,6 +389,15 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) ntohs(addr->sin6_port)); } xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(50, GFP_KERNEL); + if (buf) { + snprintf(buf, 50, NIP6_FMT".%u.%u", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port) >> 8, + ntohs(addr->sin6_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; } static void xs_free_peer_addresses(struct rpc_xprt *xprt) -- cgit v1.2.3 From 89eb21c35b61b5157940e1b78c2c6d0529d11c63 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 11 Sep 2007 18:00:09 -0400 Subject: SUNRPC: fix a signed v. unsigned comparison nit in rpc_bind_new_program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /home/cel/linux/net/sunrpc/clnt.c: In function ‘rpc_bind_new_program’: /home/cel/linux/net/sunrpc/clnt.c:445: warning: comparison between signed and unsigned RPC version numbers are u32, not int. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/clnt.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c0d9d14983b3..d9d5c5ad826c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -117,7 +117,7 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, - struct rpc_program *, int); + struct rpc_program *, u32); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c796e2fd2708..c3d571abcded 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -436,7 +436,7 @@ rpc_release_client(struct rpc_clnt *clnt) */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, struct rpc_program *program, - int vers) + u32 vers) { struct rpc_clnt *clnt; struct rpc_version *version; -- cgit v1.2.3 From 4f40ee4a02a2d017b714d5b2faaf5c25bf9eae47 Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:42:38 -0400 Subject: SUNRPC: move per-transport rpcbind netid's Move the TCP/UDP rpcbind netid's from the rpcbind client to a global header. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/msg_prot.h | 13 +++++++++++++ net/sunrpc/rpcb_clnt.c | 26 +++----------------------- 2 files changed, 16 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 784d4c3ef651..c4beb5775111 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -138,6 +138,19 @@ typedef __be32 rpc_fraghdr; #define RPC_MAX_HEADER_WITH_AUTH \ (RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4)) +/* + * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. + */ +#define RPCBIND_NETID_UDP "udp" +#define RPCBIND_NETID_TCP "tcp" +#define RPCBIND_NETID_UDP6 "udp6" +#define RPCBIND_NETID_TCP6 "tcp6" + +/* + * Note that RFC 1833 does not put any size restrictions on the + * netid string, but all currently defined netid's fit in 4 bytes. + */ +#define RPCBIND_MAXNETIDLEN (4u) #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d7b9f02489e6..b028a0ecd593 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -92,26 +92,6 @@ enum { */ #define RPCB_MAXADDRLEN (128u) -/* - * r_netid - * - * Quoting RFC 3530, section 2.2: - * - * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP - * over IPv4 the value of r_netid is the string "udp". - * - * ... - * - * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP - * over IPv6 the value of r_netid is the string "udp6". - */ -#define RPCB_NETID_UDP "\165\144\160" /* "udp" */ -#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */ -#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */ -#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */ - -#define RPCB_MAXNETIDLEN (4u) - /* * r_owner * @@ -408,8 +388,8 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : - RPCB_NETID_UDP; + map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCBIND_NETID_TCP : + RPCBIND_NETID_UDP; memcpy(map->r_addr, rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), sizeof(map->r_addr)); @@ -587,7 +567,7 @@ out_err: #define RPCB_port_sz (1u) #define RPCB_boolean_sz (1u) -#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN)) +#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) #define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) #define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) -- cgit v1.2.3 From 4417c8c41a51a2ae95b2a2fa2811640b368c4151 Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:43:05 -0400 Subject: SUNRPC: export per-transport rpcbind netid's The rpcbind (v3+) netid is provided by each RPC client transport. This fixes an omission in IPv6 rpcbind client support, and enables future extension. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/rpcb_clnt.c | 3 +-- net/sunrpc/xprtsock.c | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 902a9c0a2d9a..513b0657e14d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -56,6 +56,7 @@ enum rpc_display_format_t { RPC_DISPLAY_HEX_ADDR, RPC_DISPLAY_HEX_PORT, RPC_DISPLAY_UNIVERSAL_ADDR, + RPC_DISPLAY_NETID, RPC_DISPLAY_MAX, }; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b028a0ecd593..6f0af08a51dc 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -388,8 +388,7 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCBIND_NETID_TCP : - RPCBIND_NETID_UDP; + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); memcpy(map->r_addr, rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), sizeof(map->r_addr)); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7a154e4b70f5..5bdce8fa2d56 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -337,6 +337,10 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) ntohs(addr->sin_port) & 0xff); } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + xprt->address_strings[RPC_DISPLAY_NETID] = + kstrdup(xprt->prot == IPPROTO_UDP ? + RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL); } static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) @@ -398,6 +402,10 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) ntohs(addr->sin6_port) & 0xff); } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + xprt->address_strings[RPC_DISPLAY_NETID] = + kstrdup(xprt->prot == IPPROTO_UDP ? + RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL); } static void xs_free_peer_addresses(struct rpc_xprt *xprt) -- cgit v1.2.3 From 4f22ccc3460ef65e9899ec271d36fc4ef795c68d Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:44:58 -0400 Subject: SUNRPC: mark bulk read/write data in xdrbuf Adds a flag word to the xdrbuf struct which indicates any bulk disposition of the data. This enables RPC transport providers to marshal it efficiently/appropriately, and may enable other optimizations. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 2 ++ fs/nfs/nfs3xdr.c | 2 ++ fs/nfs/nfs4xdr.c | 2 ++ include/linux/sunrpc/xdr.h | 5 ++++- net/sunrpc/clnt.c | 1 + 5 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 1c570948fc16..668ab96c7b59 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -251,6 +251,7 @@ nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, count); + req->rq_rcv_buf.flags |= XDRBUF_READ; return 0; } @@ -313,6 +314,7 @@ nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) /* Copy the page array */ xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); + sndbuf->flags |= XDRBUF_WRITE; return 0; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 8a225fbe9ee2..616d3267b7e7 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -346,6 +346,7 @@ nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, count); + req->rq_rcv_buf.flags |= XDRBUF_READ; return 0; } @@ -367,6 +368,7 @@ nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) /* Copy the page array */ xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); + sndbuf->flags |= XDRBUF_WRITE; return 0; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4dbbf44727ea..5f353d4686b6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1856,6 +1856,7 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->count); + req->rq_rcv_buf.flags |= XDRBUF_READ; out: return status; } @@ -1932,6 +1933,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea status = encode_write(&xdr, args); if (status) goto out; + req->rq_snd_buf.flags |= XDRBUF_WRITE; status = encode_getfattr(&xdr, args->bitmask); out: return status; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index c6b53d181bfa..0751c9464d0f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -70,7 +70,10 @@ struct xdr_buf { struct page ** pages; /* Array of contiguous pages */ unsigned int page_base, /* Start of page data */ - page_len; /* Length of page data */ + page_len, /* Length of page data */ + flags; /* Flags for data disposition */ +#define XDRBUF_READ 0x01 /* target of file read */ +#define XDRBUF_WRITE 0x02 /* source of file write */ unsigned int buflen, /* Total length of storage buffer */ len; /* Length of XDR encoded message */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b19bacf42564..19c129ed6467 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -874,6 +874,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; buf->page_len = 0; + buf->flags = 0; buf->len = 0; buf->buflen = len; } -- cgit v1.2.3 From 81c098af3da7981902e9f8163aeccc2467c4ba6d Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:46:00 -0400 Subject: SUNRPC: Provide a new API for registering transport implementations To allow transport capabilities to be loaded dynamically, provide an API for registering and unregistering the transports with the RPC client. Eventually xprt_create_transport() will be changed to search the list of registered transports when initializing a fresh transport. Signed-off-by: Chuck Lever Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 11 +++++++ net/sunrpc/xprt.c | 75 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 513b0657e14d..7b6b137eca97 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -208,6 +208,15 @@ struct rpc_xprtsock_create { struct rpc_timeout * timeout; /* optional timeout parameters */ }; +struct xprt_class { + struct list_head list; + unsigned short family; + int protocol; + struct rpc_xprt * (*setup)(struct rpc_xprtsock_create *); + struct module *owner; + char name[32]; +}; + /* * Transport operations used by ULPs */ @@ -239,6 +248,8 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * /* * Transport switch helper functions */ +int xprt_register_transport(struct xprt_class *type); +int xprt_unregister_transport(struct xprt_class *type); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index bc13616e7fdc..520859622af5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -62,6 +62,9 @@ static inline void do_xprt_reserve(struct rpc_task *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(xprt_list); + /* * The transport code maintains an estimate on the maximum number of out- * standing RPC requests, using a smoothed version of the congestion @@ -80,6 +83,78 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) +/** + * xprt_register_transport - register a transport implementation + * @transport: transport to register + * + * If a transport implementation is loaded as a kernel module, it can + * call this interface to make itself known to the RPC client. + * + * Returns: + * 0: transport successfully registered + * -EEXIST: transport already registered + * -EINVAL: transport module being unloaded + */ +int xprt_register_transport(struct xprt_class *transport) +{ + struct xprt_class *t; + int result; + + result = -EEXIST; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + /* don't register the same transport class twice */ + if (t == transport) + goto out; + } + + result = -EINVAL; + if (try_module_get(THIS_MODULE)) { + list_add_tail(&transport->list, &xprt_list); + printk(KERN_INFO "RPC: Registered %s transport module.\n", + transport->name); + result = 0; + } + +out: + spin_unlock(&xprt_list_lock); + return result; +} +EXPORT_SYMBOL_GPL(xprt_register_transport); + +/** + * xprt_unregister_transport - unregister a transport implementation + * transport: transport to unregister + * + * Returns: + * 0: transport successfully unregistered + * -ENOENT: transport never registered + */ +int xprt_unregister_transport(struct xprt_class *transport) +{ + struct xprt_class *t; + int result; + + result = 0; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (t == transport) { + printk(KERN_INFO + "RPC: Unregistered %s transport module.\n", + transport->name); + list_del_init(&transport->list); + module_put(THIS_MODULE); + goto out; + } + } + result = -ENOENT; + +out: + spin_unlock(&xprt_list_lock); + return result; +} +EXPORT_SYMBOL_GPL(xprt_unregister_transport); + /** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport -- cgit v1.2.3 From 3c341b0b925eee01daae2c594b81e673f659d7cd Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:47:07 -0400 Subject: SUNRPC: rename the rpc_xprtsock_create structure To prepare for including non-sockets-based RPC transports, change the overly suggestive name of the transport creation arguments struct. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 10 +++++----- net/sunrpc/clnt.c | 2 +- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 7b6b137eca97..6992ff02d737 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -200,7 +200,7 @@ struct rpc_xprt { char * address_strings[RPC_DISPLAY_MAX]; }; -struct rpc_xprtsock_create { +struct xprt_create { int proto; /* IPPROTO_UDP or IPPROTO_TCP */ struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ @@ -212,7 +212,7 @@ struct xprt_class { struct list_head list; unsigned short family; int protocol; - struct rpc_xprt * (*setup)(struct rpc_xprtsock_create *); + struct rpc_xprt * (*setup)(struct xprt_create *); struct module *owner; char name[32]; }; @@ -225,7 +225,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ -struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args); +struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); @@ -265,8 +265,8 @@ void xprt_disconnect(struct rpc_xprt *xprt); /* * Socket transport setup operations */ -struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args); -struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args); +struct rpc_xprt *xs_setup_udp(struct xprt_create *args); +struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); int init_socket_xprt(void); void cleanup_socket_xprt(void); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 19c129ed6467..e86958c61a20 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -240,7 +240,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; - struct rpc_xprtsock_create xprtargs = { + struct xprt_create xprtargs = { .proto = args->protocol, .srcaddr = args->saddress, .dstaddr = args->address, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7f8c60b84396..473b48ff4523 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -979,7 +979,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i * @args: rpc transport creation arguments * */ -struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) +struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { struct rpc_xprt *xprt; struct rpc_rqst *req; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3bc9c921c82f..4f1a57b28a49 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1784,7 +1784,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, +static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, unsigned int slot_table_size) { struct rpc_xprt *xprt; @@ -1826,7 +1826,7 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) +struct rpc_xprt *xs_setup_udp(struct xprt_create *args) { struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; @@ -1892,7 +1892,7 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) +struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) { struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; -- cgit v1.2.3 From 49c36fcc441baf6a4d698e3645d1adf28edaf57b Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:47:31 -0400 Subject: SUNRPC: rearrange RPC sockets definitions To prepare for including non-sockets-based RPC transports, move the sockets-dependent definitions into their own file. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 22 ---------------------- include/linux/sunrpc/xprtsock.h | 40 ++++++++++++++++++++++++++++++++++++++++ net/sunrpc/sunrpc_syms.c | 2 +- net/sunrpc/xprtsock.c | 1 + 4 files changed, 42 insertions(+), 23 deletions(-) create mode 100644 include/linux/sunrpc/xprtsock.h (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 6992ff02d737..d7b8fcd312cc 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -19,24 +19,10 @@ #ifdef __KERNEL__ -extern unsigned int xprt_udp_slot_table_entries; -extern unsigned int xprt_tcp_slot_table_entries; - #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) -/* - * Parameters for choosing a free port - */ -extern unsigned int xprt_min_resvport; -extern unsigned int xprt_max_resvport; - -#define RPC_MIN_RESVPORT (1U) -#define RPC_MAX_RESVPORT (65535U) -#define RPC_DEF_MIN_RESVPORT (665U) -#define RPC_DEF_MAX_RESVPORT (1023U) - /* * This describes a timeout strategy */ @@ -262,14 +248,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect(struct rpc_xprt *xprt); -/* - * Socket transport setup operations - */ -struct rpc_xprt *xs_setup_udp(struct xprt_create *args); -struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); -int init_socket_xprt(void); -void cleanup_socket_xprt(void); - /* * Reserved bit positions in xprt->state */ diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h new file mode 100644 index 000000000000..9bde77061e4f --- /dev/null +++ b/include/linux/sunrpc/xprtsock.h @@ -0,0 +1,40 @@ +/* + * linux/include/linux/sunrpc/xprtsock.h + * + * Declarations for the RPC transport socket provider. + */ + +#ifndef _LINUX_SUNRPC_XPRTSOCK_H +#define _LINUX_SUNRPC_XPRTSOCK_H + +#ifdef __KERNEL__ + +/* + * Socket transport setup operations + */ +struct rpc_xprt *xs_setup_udp(struct xprt_create *args); +struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); + +int init_socket_xprt(void); +void cleanup_socket_xprt(void); + +/* + * RPC slot table sizes for UDP, TCP transports + */ +extern unsigned int xprt_udp_slot_table_entries; +extern unsigned int xprt_tcp_slot_table_entries; + +/* + * Parameters for choosing a free port + */ +extern unsigned int xprt_min_resvport; +extern unsigned int xprt_max_resvport; + +#define RPC_MIN_RESVPORT (1U) +#define RPC_MAX_RESVPORT (65535U) +#define RPC_DEF_MIN_RESVPORT (665U) +#define RPC_DEF_MAX_RESVPORT (1023U) + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 384c4ad5ab86..33d89e842c85 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -20,7 +20,7 @@ #include #include #include - +#include /* RPC scheduler */ EXPORT_SYMBOL(rpc_execute); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4f1a57b28a49..192a06e3b8c5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 4fa016eb248cac875541fa199af550a8aefa0e90 Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:47:57 -0400 Subject: NFS/SUNRPC: support transport protocol naming To prepare for including non-sockets-based RPC transports, select RPC transports by an identifier (to be used in following patches). Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++--- include/linux/sunrpc/xprtsock.h | 11 +++++++++++ net/sunrpc/clnt.c | 2 +- net/sunrpc/xprt.c | 8 +++----- net/sunrpc/xprtsock.c | 6 ++---- 5 files changed, 19 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d7b8fcd312cc..30b17b3bc1a9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -187,7 +187,7 @@ struct rpc_xprt { }; struct xprt_create { - int proto; /* IPPROTO_UDP or IPPROTO_TCP */ + int ident; /* XPRT_TRANSPORT identifier */ struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; @@ -196,8 +196,7 @@ struct xprt_create { struct xprt_class { struct list_head list; - unsigned short family; - int protocol; + int ident; /* XPRT_TRANSPORT identifier */ struct rpc_xprt * (*setup)(struct xprt_create *); struct module *owner; char name[32]; diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 9bde77061e4f..2c6c2c2783d8 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -18,6 +18,17 @@ struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); int init_socket_xprt(void); void cleanup_socket_xprt(void); +/* + * RPC transport identifiers for UDP, TCP + * + * To preserve compatibility with the historical use of raw IP protocol + * id's for transport selection, these are specified with the previous + * values. No such restriction exists for new transports, except that + * they may not collide with these values (17 and 6, respectively). + */ +#define XPRT_TRANSPORT_UDP IPPROTO_UDP +#define XPRT_TRANSPORT_TCP IPPROTO_TCP + /* * RPC slot table sizes for UDP, TCP transports */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e86958c61a20..6cdf53c489b7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -241,7 +241,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) struct rpc_xprt *xprt; struct rpc_clnt *clnt; struct xprt_create xprtargs = { - .proto = args->protocol, + .ident = args->protocol, .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 473b48ff4523..282a9a2ec90c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -104,7 +104,7 @@ int xprt_register_transport(struct xprt_class *transport) spin_lock(&xprt_list_lock); list_for_each_entry(t, &xprt_list, list) { /* don't register the same transport class twice */ - if (t == transport) + if (t->ident == transport->ident) goto out; } @@ -987,15 +987,13 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) spin_lock(&xprt_list_lock); list_for_each_entry(t, &xprt_list, list) { - if ((t->family == args->dstaddr->sa_family) && - (t->protocol == args->proto)) { + if (t->ident == args->ident) { spin_unlock(&xprt_list_lock); goto found; } } spin_unlock(&xprt_list_lock); - printk(KERN_ERR "RPC: transport (%u/%d) not supported\n", - args->dstaddr->sa_family, args->proto); + printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); return ERR_PTR(-EIO); found: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 192a06e3b8c5..b81494a97a5e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1955,8 +1955,7 @@ static struct xprt_class xs_udp_transport = { .list = LIST_HEAD_INIT(xs_udp_transport.list), .name = "udp", .owner = THIS_MODULE, - .family = AF_INET, - .protocol = IPPROTO_UDP, + .ident = IPPROTO_UDP, .setup = xs_setup_udp, }; @@ -1964,8 +1963,7 @@ static struct xprt_class xs_tcp_transport = { .list = LIST_HEAD_INIT(xs_tcp_transport.list), .name = "tcp", .owner = THIS_MODULE, - .family = AF_INET, - .protocol = IPPROTO_TCP, + .ident = IPPROTO_TCP, .setup = xs_setup_tcp, }; -- cgit v1.2.3 From c3a57ed7471a17b07844d531534d970b84b69faf Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:49:15 -0400 Subject: RPCRDMA: Kconfig and header file with rpcrdma protocol definitions This file implements the configuration target, protocol template and constants for the rpcrdma transport framing, for use by the xprtrdma rpc transport implementation. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- fs/Kconfig | 8 +++ include/linux/sunrpc/rpc_rdma.h | 116 ++++++++++++++++++++++++++++++++++++++++ include/linux/sunrpc/xprtrdma.h | 85 +++++++++++++++++++++++++++++ 3 files changed, 209 insertions(+) create mode 100644 include/linux/sunrpc/rpc_rdma.h create mode 100644 include/linux/sunrpc/xprtrdma.h (limited to 'include') diff --git a/fs/Kconfig b/fs/Kconfig index f9eed6d79066..b9808bba5722 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1728,6 +1728,14 @@ config SUNRPC config SUNRPC_GSS tristate +config SUNRPC_XPRT_RDMA + tristate "RDMA transport for sunrpc (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + default m + help + Adds a client RPC transport for supporting kernel NFS over RDMA + mounts, including Infiniband and iWARP. Experimental. + config SUNRPC_BIND34 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h new file mode 100644 index 000000000000..0013a0d8dc6b --- /dev/null +++ b/include/linux/sunrpc/rpc_rdma.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_RPC_RDMA_H +#define _LINUX_SUNRPC_RPC_RDMA_H + +struct rpcrdma_segment { + uint32_t rs_handle; /* Registered memory handle */ + uint32_t rs_length; /* Length of the chunk in bytes */ + uint64_t rs_offset; /* Chunk virtual address or offset */ +}; + +/* + * read chunk(s), encoded as a linked list. + */ +struct rpcrdma_read_chunk { + uint32_t rc_discrim; /* 1 indicates presence */ + uint32_t rc_position; /* Position in XDR stream */ + struct rpcrdma_segment rc_target; +}; + +/* + * write chunk, and reply chunk. + */ +struct rpcrdma_write_chunk { + struct rpcrdma_segment wc_target; +}; + +/* + * write chunk(s), encoded as a counted array. + */ +struct rpcrdma_write_array { + uint32_t wc_discrim; /* 1 indicates presence */ + uint32_t wc_nchunks; /* Array count */ + struct rpcrdma_write_chunk wc_array[0]; +}; + +struct rpcrdma_msg { + uint32_t rm_xid; /* Mirrors the RPC header xid */ + uint32_t rm_vers; /* Version of this protocol */ + uint32_t rm_credit; /* Buffers requested/granted */ + uint32_t rm_type; /* Type of message (enum rpcrdma_proc) */ + union { + + struct { /* no chunks */ + uint32_t rm_empty[3]; /* 3 empty chunk lists */ + } rm_nochunks; + + struct { /* no chunks and padded */ + uint32_t rm_align; /* Padding alignment */ + uint32_t rm_thresh; /* Padding threshold */ + uint32_t rm_pempty[3]; /* 3 empty chunk lists */ + } rm_padded; + + uint32_t rm_chunks[0]; /* read, write and reply chunks */ + + } rm_body; +}; + +#define RPCRDMA_HDRLEN_MIN 28 + +enum rpcrdma_errcode { + ERR_VERS = 1, + ERR_CHUNK = 2 +}; + +struct rpcrdma_err_vers { + uint32_t rdma_vers_low; /* Version range supported by peer */ + uint32_t rdma_vers_high; +}; + +enum rpcrdma_proc { + RDMA_MSG = 0, /* An RPC call or reply msg */ + RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */ + RDMA_MSGP = 2, /* An RPC call or reply msg with padding */ + RDMA_DONE = 3, /* Client signals reply completion */ + RDMA_ERROR = 4 /* An RPC RDMA encoding error */ +}; + +#endif /* _LINUX_SUNRPC_RPC_RDMA_H */ diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h new file mode 100644 index 000000000000..4de56b1d372b --- /dev/null +++ b/include/linux/sunrpc/xprtrdma.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_XPRTRDMA_H +#define _LINUX_SUNRPC_XPRTRDMA_H + +/* + * RPC transport identifier for RDMA + */ +#define XPRT_TRANSPORT_RDMA 256 + +/* + * rpcbind (v3+) RDMA netid. + */ +#define RPCBIND_NETID_RDMA "rdma" + +/* + * Constants. Max RPC/NFS header is big enough to account for + * additional marshaling buffers passed down by Linux client. + * + * RDMA header is currently fixed max size, and is big enough for a + * fully-chunked NFS message (read chunks are the largest). Note only + * a single chunk type per message is supported currently. + */ +#define RPCRDMA_MIN_SLOT_TABLE (2U) +#define RPCRDMA_DEF_SLOT_TABLE (32U) +#define RPCRDMA_MAX_SLOT_TABLE (256U) + +#define RPCRDMA_DEF_INLINE (1024) /* default inline max */ + +#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ + +#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */ +#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ + +/* memory registration strategies */ +#define RPCRDMA_PERSISTENT_REGISTRATION (1) + +enum rpcrdma_memreg { + RPCRDMA_BOUNCEBUFFERS = 0, + RPCRDMA_REGISTER, + RPCRDMA_MEMWINDOWS, + RPCRDMA_MEMWINDOWS_ASYNC, + RPCRDMA_MTHCAFMR, + RPCRDMA_ALLPHYSICAL, + RPCRDMA_LAST +}; + +#endif /* _LINUX_SUNRPC_XPRTRDMA_H */ -- cgit v1.2.3 From f58851e6b0f148fb4b2a1c6f70beb2f125863c0f Mon Sep 17 00:00:00 2001 From: "\\\"Talpey, Thomas\\" Date: Mon, 10 Sep 2007 13:50:12 -0400 Subject: RPCRDMA: rpc rdma transport switch This implements the configuration and building of the core transport switch implementation of the rpcrdma transport. Stubs are provided for the rpcrdma protocol handling, and the infiniband/iwarp verbs interface. These are provided in following patches. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 5 + net/sunrpc/Makefile | 1 + net/sunrpc/xprtrdma/Makefile | 3 + net/sunrpc/xprtrdma/rpc_rdma.c | 9 + net/sunrpc/xprtrdma/transport.c | 800 ++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/verbs.c | 37 ++ net/sunrpc/xprtrdma/xprt_rdma.h | 330 +++++++++++++++++ 7 files changed, 1185 insertions(+) create mode 100644 net/sunrpc/xprtrdma/Makefile create mode 100644 net/sunrpc/xprtrdma/rpc_rdma.c create mode 100644 net/sunrpc/xprtrdma/transport.c create mode 100644 net/sunrpc/xprtrdma/verbs.c create mode 100644 net/sunrpc/xprtrdma/xprt_rdma.h (limited to 'include') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 3912cf16361e..3347c72b848a 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -88,6 +88,11 @@ enum { CTL_SLOTTABLE_TCP, CTL_MIN_RESVPORT, CTL_MAX_RESVPORT, + CTL_SLOTTABLE_RDMA, + CTL_RDMA_MAXINLINEREAD, + CTL_RDMA_MAXINLINEWRITE, + CTL_RDMA_WRITEPADDING, + CTL_RDMA_MEMREG, }; #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 8ebfc4db7f51..5c69a725e530 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile new file mode 100644 index 000000000000..264f0feeb513 --- /dev/null +++ b/net/sunrpc/xprtrdma/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o + +xprtrdma-y := transport.o rpc_rdma.o verbs.o diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c new file mode 100644 index 000000000000..b0587f3a5d77 --- /dev/null +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -0,0 +1,9 @@ +/* + * Placeholders for subsequent patches + */ + +#include "xprt_rdma.h" + +void rpcrdma_conn_func(struct rpcrdma_ep *a) { } +void rpcrdma_reply_handler(struct rpcrdma_rep *a) { } +int rpcrdma_marshal_req(struct rpc_rqst *a) { return EINVAL; } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c new file mode 100644 index 000000000000..dc55cc974c90 --- /dev/null +++ b/net/sunrpc/xprtrdma/transport.c @@ -0,0 +1,800 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * transport.c + * + * This file contains the top-level implementation of an RPC RDMA + * transport. + * + * Naming convention: functions beginning with xprt_ are part of the + * transport switch. All others are RPC RDMA internal. + */ + +#include +#include +#include + +#include "xprt_rdma.h" + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +MODULE_LICENSE("Dual BSD/GPL"); + +MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS"); +MODULE_AUTHOR("Network Appliance, Inc."); + +/* + * tunables + */ + +static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; +static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; +static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; +static unsigned int xprt_rdma_inline_write_padding; +#if !RPCRDMA_PERSISTENT_REGISTRATION +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ +#else +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; +#endif + +#ifdef RPC_DEBUG + +static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; +static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; +static unsigned int zero; +static unsigned int max_padding = PAGE_SIZE; +static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; +static unsigned int max_memreg = RPCRDMA_LAST - 1; + +static struct ctl_table_header *sunrpc_table_header; + +static ctl_table xr_tunables_table[] = { + { + .ctl_name = CTL_SLOTTABLE_RDMA, + .procname = "rdma_slot_table_entries", + .data = &xprt_rdma_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_RDMA_MAXINLINEREAD, + .procname = "rdma_max_inline_read", + .data = &xprt_rdma_max_inline_read, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_RDMA_MAXINLINEWRITE, + .procname = "rdma_max_inline_write", + .data = &xprt_rdma_max_inline_write, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_RDMA_WRITEPADDING, + .procname = "rdma_inline_write_padding", + .data = &xprt_rdma_inline_write_padding, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &max_padding, + }, + { + .ctl_name = CTL_RDMA_MEMREG, + .procname = "rdma_memreg_strategy", + .data = &xprt_rdma_memreg_strategy, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_memreg, + .extra2 = &max_memreg, + }, + { + .ctl_name = 0, + }, +}; + +static ctl_table sunrpc_table[] = { + { + .ctl_name = CTL_SUNRPC, + .procname = "sunrpc", + .mode = 0555, + .child = xr_tunables_table + }, + { + .ctl_name = 0, + }, +}; + +#endif + +static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ + +static void +xprt_rdma_format_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = (struct sockaddr_in *) + &rpcx_to_rdmad(xprt).addr; + char *buf; + + buf = kzalloc(20, GFP_KERNEL); + if (buf) + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%u", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; + + buf = kzalloc(48, GFP_KERNEL); + if (buf) + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), "rdma"); + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + /* netid */ + xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; +} + +static void +xprt_rdma_free_addresses(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]); +} + +static void +xprt_rdma_connect_worker(struct work_struct *work) +{ + struct rpcrdma_xprt *r_xprt = + container_of(work, struct rpcrdma_xprt, rdma_connect.work); + struct rpc_xprt *xprt = &r_xprt->xprt; + int rc = 0; + + if (!xprt->shutdown) { + xprt_clear_connected(xprt); + + dprintk("RPC: %s: %sconnect\n", __func__, + r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); + rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + goto out; + } + goto out_clear; + +out: + xprt_wake_pending_tasks(xprt, rc); + +out_clear: + dprintk("RPC: %s: exit\n", __func__); + xprt_clear_connecting(xprt); +} + +/* + * xprt_rdma_destroy + * + * Destroy the xprt. + * Free all memory associated with the object, including its own. + * NOTE: none of the *destroy methods free memory for their top-level + * objects, even though they may have allocated it (they do free + * private memory). It's up to the caller to handle it. In this + * case (RDMA transport), all structure memory is inlined with the + * struct rpcrdma_xprt. + */ +static void +xprt_rdma_destroy(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int rc; + + dprintk("RPC: %s: called\n", __func__); + + cancel_delayed_work(&r_xprt->rdma_connect); + flush_scheduled_work(); + + xprt_clear_connected(xprt); + + rpcrdma_buffer_destroy(&r_xprt->rx_buf); + rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", + __func__, rc); + rpcrdma_ia_close(&r_xprt->rx_ia); + + xprt_rdma_free_addresses(xprt); + + kfree(xprt->slot); + xprt->slot = NULL; + kfree(xprt); + + dprintk("RPC: %s: returning\n", __func__); + + module_put(THIS_MODULE); +} + +/** + * xprt_setup_rdma - Set up transport to use RDMA + * + * @args: rpc transport arguments + */ +static struct rpc_xprt * +xprt_setup_rdma(struct xprt_create *args) +{ + struct rpcrdma_create_data_internal cdata; + struct rpc_xprt *xprt; + struct rpcrdma_xprt *new_xprt; + struct rpcrdma_ep *new_ep; + struct sockaddr_in *sin; + int rc; + + if (args->addrlen > sizeof(xprt->addr)) { + dprintk("RPC: %s: address too large\n", __func__); + return ERR_PTR(-EBADF); + } + + xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); + if (xprt == NULL) { + dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", + __func__); + return ERR_PTR(-ENOMEM); + } + + xprt->max_reqs = xprt_rdma_slot_table_entries; + xprt->slot = kcalloc(xprt->max_reqs, + sizeof(struct rpc_rqst), GFP_KERNEL); + if (xprt->slot == NULL) { + kfree(xprt); + dprintk("RPC: %s: couldn't allocate %d slots\n", + __func__, xprt->max_reqs); + return ERR_PTR(-ENOMEM); + } + + /* 60 second timeout, no retries */ + xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ); + xprt->bind_timeout = (60U * HZ); + xprt->connect_timeout = (60U * HZ); + xprt->reestablish_timeout = (5U * HZ); + xprt->idle_timeout = (5U * 60 * HZ); + + xprt->resvport = 0; /* privileged port not needed */ + xprt->tsh_size = 0; /* RPC-RDMA handles framing */ + xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE; + xprt->ops = &xprt_rdma_procs; + + /* + * Set up RDMA-specific connect data. + */ + + /* Put server RDMA address in local cdata */ + memcpy(&cdata.addr, args->dstaddr, args->addrlen); + + /* Ensure xprt->addr holds valid server TCP (not RDMA) + * address, for any side protocols which peek at it */ + xprt->prot = IPPROTO_TCP; + xprt->addrlen = args->addrlen; + memcpy(&xprt->addr, &cdata.addr, xprt->addrlen); + + sin = (struct sockaddr_in *)&cdata.addr; + if (ntohs(sin->sin_port) != 0) + xprt_set_bound(xprt); + + dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__, + NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); + + /* Set max requests */ + cdata.max_requests = xprt->max_reqs; + + /* Set some length limits */ + cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ + cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ + + cdata.inline_wsize = xprt_rdma_max_inline_write; + if (cdata.inline_wsize > cdata.wsize) + cdata.inline_wsize = cdata.wsize; + + cdata.inline_rsize = xprt_rdma_max_inline_read; + if (cdata.inline_rsize > cdata.rsize) + cdata.inline_rsize = cdata.rsize; + + cdata.padding = xprt_rdma_inline_write_padding; + + /* + * Create new transport instance, which includes initialized + * o ia + * o endpoint + * o buffers + */ + + new_xprt = rpcx_to_rdmax(xprt); + + rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr, + xprt_rdma_memreg_strategy); + if (rc) + goto out1; + + /* + * initialize and create ep + */ + new_xprt->rx_data = cdata; + new_ep = &new_xprt->rx_ep; + new_ep->rep_remote_addr = cdata.addr; + + rc = rpcrdma_ep_create(&new_xprt->rx_ep, + &new_xprt->rx_ia, &new_xprt->rx_data); + if (rc) + goto out2; + + /* + * Allocate pre-registered send and receive buffers for headers and + * any inline data. Also specify any padding which will be provided + * from a preregistered zero buffer. + */ + rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, + &new_xprt->rx_data); + if (rc) + goto out3; + + /* + * Register a callback for connection events. This is necessary because + * connection loss notification is async. We also catch connection loss + * when reaping receives. + */ + INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); + new_ep->rep_func = rpcrdma_conn_func; + new_ep->rep_xprt = xprt; + + xprt_rdma_format_addresses(xprt); + + if (!try_module_get(THIS_MODULE)) + goto out4; + + return xprt; + +out4: + xprt_rdma_free_addresses(xprt); + rc = -EINVAL; +out3: + (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); +out2: + rpcrdma_ia_close(&new_xprt->rx_ia); +out1: + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(rc); +} + +/* + * Close a connection, during shutdown or timeout/reconnect + */ +static void +xprt_rdma_close(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + dprintk("RPC: %s: closing\n", __func__); + xprt_disconnect(xprt); + (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); +} + +static void +xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) +{ + struct sockaddr_in *sap; + + sap = (struct sockaddr_in *)&xprt->addr; + sap->sin_port = htons(port); + sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; + sap->sin_port = htons(port); + dprintk("RPC: %s: %u\n", __func__, port); +} + +static void +xprt_rdma_connect(struct rpc_task *task) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + if (!xprt_test_and_set_connecting(xprt)) { + if (r_xprt->rx_ep.rep_connected != 0) { + /* Reconnect */ + schedule_delayed_work(&r_xprt->rdma_connect, + xprt->reestablish_timeout); + } else { + schedule_delayed_work(&r_xprt->rdma_connect, 0); + if (!RPC_IS_ASYNC(task)) + flush_scheduled_work(); + } + } +} + +static int +xprt_rdma_reserve_xprt(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int credits = atomic_read(&r_xprt->rx_buf.rb_credits); + + /* == RPC_CWNDSCALE @ init, but *after* setup */ + if (r_xprt->rx_buf.rb_cwndscale == 0UL) { + r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; + dprintk("RPC: %s: cwndscale %lu\n", __func__, + r_xprt->rx_buf.rb_cwndscale); + BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); + } + xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; + return xprt_reserve_xprt_cong(task); +} + +/* + * The RDMA allocate/free functions need the task structure as a place + * to hide the struct rpcrdma_req, which is necessary for the actual send/recv + * sequence. For this reason, the recv buffers are attached to send + * buffers for portions of the RPC. Note that the RPC layer allocates + * both send and receive buffers in the same call. We may register + * the receive buffer portion when using reply chunks. + */ +static void * +xprt_rdma_allocate(struct rpc_task *task, size_t size) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_req *req, *nreq; + + req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); + BUG_ON(NULL == req); + + if (size > req->rl_size) { + dprintk("RPC: %s: size %zd too large for buffer[%zd]: " + "prog %d vers %d proc %d\n", + __func__, size, req->rl_size, + task->tk_client->cl_prog, task->tk_client->cl_vers, + task->tk_msg.rpc_proc->p_proc); + /* + * Outgoing length shortage. Our inline write max must have + * been configured to perform direct i/o. + * + * This is therefore a large metadata operation, and the + * allocate call was made on the maximum possible message, + * e.g. containing long filename(s) or symlink data. In + * fact, while these metadata operations *might* carry + * large outgoing payloads, they rarely *do*. However, we + * have to commit to the request here, so reallocate and + * register it now. The data path will never require this + * reallocation. + * + * If the allocation or registration fails, the RPC framework + * will (doggedly) retry. + */ + if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == + RPCRDMA_BOUNCEBUFFERS) { + /* forced to "pure inline" */ + dprintk("RPC: %s: too much data (%zd) for inline " + "(r/w max %d/%d)\n", __func__, size, + rpcx_to_rdmad(xprt).inline_rsize, + rpcx_to_rdmad(xprt).inline_wsize); + size = req->rl_size; + rpc_exit(task, -EIO); /* fail the operation */ + rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + goto out; + } + if (task->tk_flags & RPC_TASK_SWAPPER) + nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); + else + nreq = kmalloc(sizeof *req + size, GFP_NOFS); + if (nreq == NULL) + goto outfail; + + if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, + nreq->rl_base, size + sizeof(struct rpcrdma_req) + - offsetof(struct rpcrdma_req, rl_base), + &nreq->rl_handle, &nreq->rl_iov)) { + kfree(nreq); + goto outfail; + } + rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size; + nreq->rl_size = size; + nreq->rl_niovs = 0; + nreq->rl_nchunks = 0; + nreq->rl_buffer = (struct rpcrdma_buffer *)req; + nreq->rl_reply = req->rl_reply; + memcpy(nreq->rl_segments, + req->rl_segments, sizeof nreq->rl_segments); + /* flag the swap with an unused field */ + nreq->rl_iov.length = 0; + req->rl_reply = NULL; + req = nreq; + } + dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); +out: + return req->rl_xdr_buf; + +outfail: + rpcrdma_buffer_put(req); + rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + return NULL; +} + +/* + * This function returns all RDMA resources to the pool. + */ +static void +xprt_rdma_free(void *buffer) +{ + struct rpcrdma_req *req; + struct rpcrdma_xprt *r_xprt; + struct rpcrdma_rep *rep; + int i; + + if (buffer == NULL) + return; + + req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); + r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); + rep = req->rl_reply; + + dprintk("RPC: %s: called on 0x%p%s\n", + __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); + + /* + * Finish the deregistration. When using mw bind, this was + * begun in rpcrdma_reply_handler(). In all other modes, we + * do it here, in thread context. The process is considered + * complete when the rr_func vector becomes NULL - this + * was put in place during rpcrdma_reply_handler() - the wait + * call below will not block if the dereg is "done". If + * interrupted, our framework will clean up. + */ + for (i = 0; req->rl_nchunks;) { + --req->rl_nchunks; + i += rpcrdma_deregister_external( + &req->rl_segments[i], r_xprt, NULL); + } + + if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { + rep->rr_func = NULL; /* abandon the callback */ + req->rl_reply = NULL; + } + + if (req->rl_iov.length == 0) { /* see allocate above */ + struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer; + oreq->rl_reply = req->rl_reply; + (void) rpcrdma_deregister_internal(&r_xprt->rx_ia, + req->rl_handle, + &req->rl_iov); + kfree(req); + req = oreq; + } + + /* Put back request+reply buffers */ + rpcrdma_buffer_put(req); +} + +/* + * send_request invokes the meat of RPC RDMA. It must do the following: + * 1. Marshal the RPC request into an RPC RDMA request, which means + * putting a header in front of data, and creating IOVs for RDMA + * from those in the request. + * 2. In marshaling, detect opportunities for RDMA, and use them. + * 3. Post a recv message to set up asynch completion, then send + * the request (rpcrdma_ep_post). + * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). + */ + +static int +xprt_rdma_send_request(struct rpc_task *task) +{ + struct rpc_rqst *rqst = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + /* marshal the send itself */ + if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { + r_xprt->rx_stats.failed_marshal_count++; + dprintk("RPC: %s: rpcrdma_marshal_req failed\n", + __func__); + return -EIO; + } + + if (req->rl_reply == NULL) /* e.g. reconnection */ + rpcrdma_recv_buffer_get(req); + + if (req->rl_reply) { + req->rl_reply->rr_func = rpcrdma_reply_handler; + /* this need only be done once, but... */ + req->rl_reply->rr_xprt = xprt; + } + + if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { + xprt_disconnect(xprt); + return -ENOTCONN; /* implies disconnect */ + } + + rqst->rq_bytes_sent = 0; + return 0; +} + +static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, + "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu " + "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n", + + 0, /* need a local port? */ + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u, + + r_xprt->rx_stats.read_chunk_count, + r_xprt->rx_stats.write_chunk_count, + r_xprt->rx_stats.reply_chunk_count, + r_xprt->rx_stats.total_rdma_request, + r_xprt->rx_stats.total_rdma_reply, + r_xprt->rx_stats.pullup_copy_count, + r_xprt->rx_stats.fixup_copy_count, + r_xprt->rx_stats.hardway_register_count, + r_xprt->rx_stats.failed_marshal_count, + r_xprt->rx_stats.bad_reply_count); +} + +/* + * Plumbing for rpc transport switch and kernel module + */ + +static struct rpc_xprt_ops xprt_rdma_procs = { + .reserve_xprt = xprt_rdma_reserve_xprt, + .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ + .release_request = xprt_release_rqst_cong, /* ditto */ + .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ + .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ + .set_port = xprt_rdma_set_port, + .connect = xprt_rdma_connect, + .buf_alloc = xprt_rdma_allocate, + .buf_free = xprt_rdma_free, + .send_request = xprt_rdma_send_request, + .close = xprt_rdma_close, + .destroy = xprt_rdma_destroy, + .print_stats = xprt_rdma_print_stats +}; + +static struct xprt_class xprt_rdma = { + .list = LIST_HEAD_INIT(xprt_rdma.list), + .name = "rdma", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_RDMA, + .setup = xprt_setup_rdma, +}; + +static void __exit xprt_rdma_cleanup(void) +{ + int rc; + + dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); +#ifdef RPC_DEBUG + if (sunrpc_table_header) { + unregister_sysctl_table(sunrpc_table_header); + sunrpc_table_header = NULL; + } +#endif + rc = xprt_unregister_transport(&xprt_rdma); + if (rc) + dprintk("RPC: %s: xprt_unregister returned %i\n", + __func__, rc); +} + +static int __init xprt_rdma_init(void) +{ + int rc; + + rc = xprt_register_transport(&xprt_rdma); + + if (rc) + return rc; + + dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); + + dprintk(KERN_INFO "Defaults:\n"); + dprintk(KERN_INFO "\tSlots %d\n" + "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", + xprt_rdma_slot_table_entries, + xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); + dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", + xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); + +#ifdef RPC_DEBUG + if (!sunrpc_table_header) + sunrpc_table_header = register_sysctl_table(sunrpc_table); +#endif + return 0; +} + +module_init(xprt_rdma_init); +module_exit(xprt_rdma_cleanup); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c new file mode 100644 index 000000000000..0baf53381987 --- /dev/null +++ b/net/sunrpc/xprtrdma/verbs.c @@ -0,0 +1,37 @@ +/* + * Placeholders for subsequent patches + */ + +#include "xprt_rdma.h" + +int rpcrdma_ia_open(struct rpcrdma_xprt *a, struct sockaddr *b, int c) +{ return EINVAL; } +void rpcrdma_ia_close(struct rpcrdma_ia *a) { } +int rpcrdma_ep_create(struct rpcrdma_ep *a, struct rpcrdma_ia *b, +struct rpcrdma_create_data_internal *c) { return EINVAL; } +int rpcrdma_ep_destroy(struct rpcrdma_ep *a, struct rpcrdma_ia *b) +{ return EINVAL; } +int rpcrdma_ep_connect(struct rpcrdma_ep *a, struct rpcrdma_ia *b) +{ return EINVAL; } +int rpcrdma_ep_disconnect(struct rpcrdma_ep *a, struct rpcrdma_ia *b) +{ return EINVAL; } +int rpcrdma_ep_post(struct rpcrdma_ia *a, struct rpcrdma_ep *b, +struct rpcrdma_req *c) { return EINVAL; } +int rpcrdma_ep_post_recv(struct rpcrdma_ia *a, struct rpcrdma_ep *b, +struct rpcrdma_rep *c) { return EINVAL; } +int rpcrdma_buffer_create(struct rpcrdma_buffer *a, struct rpcrdma_ep *b, +struct rpcrdma_ia *c, struct rpcrdma_create_data_internal *d) { return EINVAL; } +void rpcrdma_buffer_destroy(struct rpcrdma_buffer *a) { } +struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *a) +{ return NULL; } +void rpcrdma_buffer_put(struct rpcrdma_req *a) { } +void rpcrdma_recv_buffer_get(struct rpcrdma_req *a) { } +void rpcrdma_recv_buffer_put(struct rpcrdma_rep *a) { } +int rpcrdma_register_internal(struct rpcrdma_ia *a, void *b, int c, +struct ib_mr **d, struct ib_sge *e) { return EINVAL; } +int rpcrdma_deregister_internal(struct rpcrdma_ia *a, struct ib_mr *b, +struct ib_sge *c) { return EINVAL; } +int rpcrdma_register_external(struct rpcrdma_mr_seg *a, int b, int c, +struct rpcrdma_xprt *d) { return EINVAL; } +int rpcrdma_deregister_external(struct rpcrdma_mr_seg *a, +struct rpcrdma_xprt *b, void *c) { return EINVAL; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h new file mode 100644 index 000000000000..2427822f8bd4 --- /dev/null +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_XPRT_RDMA_H +#define _LINUX_SUNRPC_XPRT_RDMA_H + +#include /* wait_queue_head_t, etc */ +#include /* spinlock_t, etc */ +#include /* atomic_t, etc */ + +#include /* RDMA connection api */ +#include /* RDMA verbs api */ + +#include /* rpc_xprt */ +#include /* RPC/RDMA protocol */ +#include /* xprt parameters */ + +/* + * Interface Adapter -- one per transport instance + */ +struct rpcrdma_ia { + struct rdma_cm_id *ri_id; + struct ib_pd *ri_pd; + struct ib_mr *ri_bind_mem; + struct completion ri_done; + int ri_async_rc; + enum rpcrdma_memreg ri_memreg_strategy; +}; + +/* + * RDMA Endpoint -- one per transport instance + */ + +struct rpcrdma_ep { + atomic_t rep_cqcount; + int rep_cqinit; + int rep_connected; + struct rpcrdma_ia *rep_ia; + struct ib_cq *rep_cq; + struct ib_qp_init_attr rep_attr; + wait_queue_head_t rep_connect_wait; + struct ib_sge rep_pad; /* holds zeroed pad */ + struct ib_mr *rep_pad_mr; /* holds zeroed pad */ + void (*rep_func)(struct rpcrdma_ep *); + struct rpc_xprt *rep_xprt; /* for rep_func */ + struct rdma_conn_param rep_remote_cma; + struct sockaddr_storage rep_remote_addr; +}; + +#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) +#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) + +/* + * struct rpcrdma_rep -- this structure encapsulates state required to recv + * and complete a reply, asychronously. It needs several pieces of + * state: + * o recv buffer (posted to provider) + * o ib_sge (also donated to provider) + * o status of reply (length, success or not) + * o bookkeeping state to get run by tasklet (list, etc) + * + * These are allocated during initialization, per-transport instance; + * however, the tasklet execution list itself is global, as it should + * always be pretty short. + * + * N of these are associated with a transport instance, and stored in + * struct rpcrdma_buffer. N is the max number of outstanding requests. + */ + +/* temporary static scatter/gather max */ +#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ +#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ +#define MAX_RPCRDMAHDR (\ + /* max supported RPC/RDMA header */ \ + sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \ + (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32)) + +struct rpcrdma_buffer; + +struct rpcrdma_rep { + unsigned int rr_len; /* actual received reply length */ + struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ + struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ + void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ + struct list_head rr_list; /* tasklet list */ + wait_queue_head_t rr_unbind; /* optional unbind wait */ + struct ib_sge rr_iov; /* for posting */ + struct ib_mr *rr_handle; /* handle for mem in rr_iov */ + char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ +}; + +/* + * struct rpcrdma_req -- structure central to the request/reply sequence. + * + * N of these are associated with a transport instance, and stored in + * struct rpcrdma_buffer. N is the max number of outstanding requests. + * + * It includes pre-registered buffer memory for send AND recv. + * The recv buffer, however, is not owned by this structure, and + * is "donated" to the hardware when a recv is posted. When a + * reply is handled, the recv buffer used is given back to the + * struct rpcrdma_req associated with the request. + * + * In addition to the basic memory, this structure includes an array + * of iovs for send operations. The reason is that the iovs passed to + * ib_post_{send,recv} must not be modified until the work request + * completes. + * + * NOTES: + * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we + * marshal. The number needed varies depending on the iov lists that + * are passed to us, the memory registration mode we are in, and if + * physical addressing is used, the layout. + */ + +struct rpcrdma_mr_seg { /* chunk descriptors */ + union { /* chunk memory handles */ + struct ib_mr *rl_mr; /* if registered directly */ + struct rpcrdma_mw { /* if registered from region */ + union { + struct ib_mw *mw; + struct ib_fmr *fmr; + } r; + struct list_head mw_list; + } *rl_mw; + } mr_chunk; + u64 mr_base; /* registration result */ + u32 mr_rkey; /* registration result */ + u32 mr_len; /* length of chunk or segment */ + int mr_nsegs; /* number of segments in chunk or 0 */ + enum dma_data_direction mr_dir; /* segment mapping direction */ + dma_addr_t mr_dma; /* segment mapping address */ + size_t mr_dmalen; /* segment mapping length */ + struct page *mr_page; /* owning page, if any */ + char *mr_offset; /* kva if no page, else offset */ +}; + +struct rpcrdma_req { + size_t rl_size; /* actual length of buffer */ + unsigned int rl_niovs; /* 0, 2 or 4 */ + unsigned int rl_nchunks; /* non-zero if chunks */ + struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ + struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ + struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ + struct ib_sge rl_send_iov[4]; /* for active requests */ + struct ib_sge rl_iov; /* for posting */ + struct ib_mr *rl_handle; /* handle for mem in rl_iov */ + char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ + __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */ +}; +#define rpcr_to_rdmar(r) \ + container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) + +/* + * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for + * inline requests/replies, and client/server credits. + * + * One of these is associated with a transport instance + */ +struct rpcrdma_buffer { + spinlock_t rb_lock; /* protects indexes */ + atomic_t rb_credits; /* most recent server credits */ + unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ + int rb_max_requests;/* client max requests */ + struct list_head rb_mws; /* optional memory windows/fmrs */ + int rb_send_index; + struct rpcrdma_req **rb_send_bufs; + int rb_recv_index; + struct rpcrdma_rep **rb_recv_bufs; + char *rb_pool; +}; +#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) + +/* + * Internal structure for transport instance creation. This + * exists primarily for modularity. + * + * This data should be set with mount options + */ +struct rpcrdma_create_data_internal { + struct sockaddr_storage addr; /* RDMA server address */ + unsigned int max_requests; /* max requests (slots) in flight */ + unsigned int rsize; /* mount rsize - max read hdr+data */ + unsigned int wsize; /* mount wsize - max write hdr+data */ + unsigned int inline_rsize; /* max non-rdma read data payload */ + unsigned int inline_wsize; /* max non-rdma write data payload */ + unsigned int padding; /* non-rdma write header padding */ +}; + +#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ + (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize) + +#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ + (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize) + +#define RPCRDMA_INLINE_PAD_VALUE(rq)\ + rpcx_to_rdmad(rq->rq_task->tk_xprt).padding + +/* + * Statistics for RPCRDMA + */ +struct rpcrdma_stats { + unsigned long read_chunk_count; + unsigned long write_chunk_count; + unsigned long reply_chunk_count; + + unsigned long long total_rdma_request; + unsigned long long total_rdma_reply; + + unsigned long long pullup_copy_count; + unsigned long long fixup_copy_count; + unsigned long hardway_register_count; + unsigned long failed_marshal_count; + unsigned long bad_reply_count; +}; + +/* + * RPCRDMA transport -- encapsulates the structures above for + * integration with RPC. + * + * The contained structures are embedded, not pointers, + * for convenience. This structure need not be visible externally. + * + * It is allocated and initialized during mount, and released + * during unmount. + */ +struct rpcrdma_xprt { + struct rpc_xprt xprt; + struct rpcrdma_ia rx_ia; + struct rpcrdma_ep rx_ep; + struct rpcrdma_buffer rx_buf; + struct rpcrdma_create_data_internal rx_data; + struct delayed_work rdma_connect; + struct rpcrdma_stats rx_stats; +}; + +#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) +#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) + +/* + * Interface Adapter calls - xprtrdma/verbs.c + */ +int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); +void rpcrdma_ia_close(struct rpcrdma_ia *); + +/* + * Endpoint calls - xprtrdma/verbs.c + */ +int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, + struct rpcrdma_create_data_internal *); +int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); + +int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, + struct rpcrdma_req *); +int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, + struct rpcrdma_rep *); + +/* + * Buffer calls - xprtrdma/verbs.c + */ +int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, + struct rpcrdma_ia *, + struct rpcrdma_create_data_internal *); +void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); + +struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); +void rpcrdma_buffer_put(struct rpcrdma_req *); +void rpcrdma_recv_buffer_get(struct rpcrdma_req *); +void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); + +int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int, + struct ib_mr **, struct ib_sge *); +int rpcrdma_deregister_internal(struct rpcrdma_ia *, + struct ib_mr *, struct ib_sge *); + +int rpcrdma_register_external(struct rpcrdma_mr_seg *, + int, int, struct rpcrdma_xprt *); +int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, + struct rpcrdma_xprt *, void *); + +/* + * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c + */ +void rpcrdma_conn_func(struct rpcrdma_ep *); +void rpcrdma_reply_handler(struct rpcrdma_rep *); + +/* + * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c + */ +int rpcrdma_marshal_req(struct rpc_rqst *); + +#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ -- cgit v1.2.3 From c03025d55540bd648f2546659090140ecc835572 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Aug 2007 17:44:28 -0400 Subject: NFS: Add a helper to extract the nfs_open_context from a struct file Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1d1343fd2ddd..51e4a77030f1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -331,6 +331,11 @@ extern const struct inode_operations nfs3_file_inode_operations; extern const struct file_operations nfs_file_operations; extern const struct address_space_operations nfs_file_aops; +static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) +{ + return filp->private_data; +} + static inline struct rpc_cred *nfs_file_cred(struct file *file) { if (file != NULL) { -- cgit v1.2.3 From cd3758e37ddea66fccca7d93c4b601e8a2e51926 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Aug 2007 17:44:32 -0400 Subject: NFS: Replace file->private_data with calls to nfs_file_open_context() Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/dir.c | 4 ++-- fs/nfs/direct.c | 4 ++-- fs/nfs/file.c | 8 ++++---- fs/nfs/inode.c | 2 +- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/nfs4state.c | 2 +- fs/nfs/read.c | 6 ++---- fs/nfs/write.c | 4 ++-- include/linux/nfs_fs.h | 8 ++------ 10 files changed, 21 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c55a761c22bb..7a1b6e869f9c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if ((struct nfs_open_context *)fl->fl_file->private_data != ctx) + if (nfs_file_open_context(fl->fl_file) != ctx) continue; status = nfs4_lock_delegation_recall(state, fl); if (status >= 0) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dde545925b4e..b332c527d95d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -558,7 +558,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; + desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -623,7 +623,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) } if (offset != filp->f_pos) { filp->f_pos = offset; - ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + nfs_file_open_context(filp)->dir_cookie = 0; } out: mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fcf4d384610e..28c8e1b65db0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -368,7 +368,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size return -ENOMEM; dreq->inode = inode; - dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -718,7 +718,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz sync = FLUSH_STABLE; dreq->inode = inode; - dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5595b32c0915..c664bb921425 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -208,7 +208,7 @@ static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) static int nfs_file_flush(struct file *file, fl_owner_t id) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file->f_path.dentry->d_inode; int status; @@ -296,7 +296,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) static int nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); @@ -395,7 +395,7 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) return 1; - ctx = filp->private_data; + ctx = nfs_file_open_context(filp); if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) return 1; return 0; @@ -438,7 +438,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, result = generic_file_aio_write(iocb, iov, nr_segs, pos); /* Return error values for O_SYNC and IS_SYNC() */ if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { - int err = nfs_do_fsync(iocb->ki_filp->private_data, inode); + int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); if (err < 0) result = err; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7c8ca175d87b..45633f9ad851 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -538,7 +538,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c static void nfs_file_clear_open_context(struct file *filp) { struct inode *inode = filp->f_path.dentry->d_inode; - struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(filp); if (ctx) { filp->private_data = NULL; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d856e9f5913b..2919271a983a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1390,7 +1390,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; - ctx = (struct nfs_open_context *)filp->private_data; + ctx = nfs_file_open_context(filp); ctx->state = state; return 0; } @@ -3303,7 +3303,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = -ENOMEM; if (seqid == NULL) goto out; - task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); + task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) goto out; @@ -3447,7 +3447,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f int ret; dprintk("%s: begin!\n", __FUNCTION__); - data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data, + data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), fl->fl_u.nfs4_fl.owner); if (data == NULL) return -ENOMEM; @@ -3573,7 +3573,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) int status; /* verify open state */ - ctx = (struct nfs_open_context *)filp->private_data; + ctx = nfs_file_open_context(filp); state = ctx->state; if (request->fl_start < 0 || request->fl_end < 0) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3e4adf8c8312..bfb36261cecb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -774,7 +774,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) + if (nfs_file_open_context(fl->fl_file)->state != state) continue; status = ops->recover_lock(state, fl); if (status >= 0) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 19e05633f4e3..d6e62d7afc72 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -497,8 +497,7 @@ int nfs_readpage(struct file *file, struct page *page) if (ctx == NULL) goto out_unlock; } else - ctx = get_nfs_open_context((struct nfs_open_context *) - file->private_data); + ctx = get_nfs_open_context(nfs_file_open_context(file)); error = nfs_readpage_async(ctx, inode, page); @@ -576,8 +575,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (desc.ctx == NULL) return -EBADF; } else - desc.ctx = get_nfs_open_context((struct nfs_open_context *) - filp->private_data); + desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fb396ea5accf..3e9e268b887e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -667,7 +667,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, int nfs_flush_incompatible(struct file *file, struct page *page) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_page *req; int do_flush, status; /* @@ -701,7 +701,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = page->mapping->host; int status = 0; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 51e4a77030f1..5c200fd8c652 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -338,12 +338,8 @@ static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) static inline struct rpc_cred *nfs_file_cred(struct file *file) { - if (file != NULL) { - struct nfs_open_context *ctx; - - ctx = (struct nfs_open_context*)file->private_data; - return ctx->cred; - } + if (file != NULL) + return nfs_file_open_context(file)->cred; return NULL; } -- cgit v1.2.3 From af22f94ae02ab9dd4fd7fe628c8434a59cc293be Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Aug 2007 17:45:10 -0400 Subject: NFSv4: Simplify _nfs4_do_access() Currently, _nfs4_do_access() is just a copy of nfs_do_access() with added conversion of the open flags into an access mask. This patch merges the duplicate functionality. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 22 ++++++++++++++++++++-- fs/nfs/nfs4proc.c | 35 ++--------------------------------- include/linux/nfs_fs.h | 5 ++--- 3 files changed, 24 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b332c527d95d..2b5e611352c5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1815,7 +1815,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st return NULL; } -int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -1882,7 +1882,7 @@ found: nfs_access_free_entry(entry); } -void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) @@ -1930,6 +1930,24 @@ out: return -EACCES; } +static int nfs_open_permission_mask(int openflags) +{ + int mask = 0; + + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; + return mask; +} + +int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) +{ + return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); +} + int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct rpc_cred *cred; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2919271a983a..5aa0dd1e1bdf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -65,7 +65,6 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *) static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); @@ -454,7 +453,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); lock_kernel(); - ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); + ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); unlock_kernel(); if (ret != 0) goto out; @@ -948,36 +947,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) -{ - struct nfs_access_entry cache; - int mask = 0; - int status; - - if (openflags & FMODE_READ) - mask |= MAY_READ; - if (openflags & FMODE_WRITE) - mask |= MAY_WRITE; - if (openflags & FMODE_EXEC) - mask |= MAY_EXEC; - status = nfs_access_get_cached(inode, cred, &cache); - if (status == 0) - goto out; - - /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; - cache.cred = cred; - cache.jiffies = jiffies; - status = _nfs4_proc_access(inode, &cache); - if (status != 0) - return status; - nfs_access_add_cache(inode, &cache); -out: - if ((cache.mask & mask) == mask) - return 0; - return -EACCES; -} - static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; @@ -1381,7 +1350,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct /* If the open_intent is for execute, we have an extra check to make */ if (nd->intent.open.flags & FMODE_EXEC) { - ret = _nfs4_do_access(state->inode, + ret = nfs_may_open(state->inode, state->owner->so_cred, nd->intent.open.flags); if (ret < 0) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5c200fd8c652..5b42fef0baf0 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -292,9 +292,6 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); -extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); -extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); -extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); @@ -382,6 +379,8 @@ extern const struct file_operations nfs_dir_operations; extern struct dentry_operations nfs_dentry_operations; extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); +extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags); +extern void nfs_access_zap_cache(struct inode *inode); /* * linux/fs/nfs/symlink.c -- cgit v1.2.3 From 76b32999dfff6e59252a8af17a5671a4cf3bcf9b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Aug 2007 17:45:11 -0400 Subject: NFSv4: Make NFSv4 ACCESS calls return attributes too... It doesn't really make sense to cache an access call without also revalidating the attributes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 11 +++++++++-- fs/nfs/nfs4xdr.c | 27 ++++++++++++++++++++------- include/linux/nfs_xdr.h | 3 +++ 3 files changed, 32 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5aa0dd1e1bdf..0e366a31f63b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -62,7 +62,6 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); -static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); @@ -1726,10 +1725,16 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) { + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; struct nfs4_accessargs args = { .fh = NFS_FH(inode), + .bitmask = server->attr_bitmask, + }; + struct nfs4_accessres res = { + .server = server, + .fattr = &fattr, }; - struct nfs4_accessres res = { 0 }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, @@ -1755,6 +1760,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry if (mode & MAY_EXEC) args.access |= NFS4_ACCESS_EXECUTE; } + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (!status) { entry->mask = 0; @@ -1764,6 +1770,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry entry->mask |= MAY_WRITE; if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) entry->mask |= MAY_EXEC; + nfs_refresh_inode(inode, &fattr); } return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5f353d4686b6..51dd3804866f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -376,10 +376,12 @@ static int nfs4_stat_to_errno(int); decode_locku_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_access_maxsz) + encode_access_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - decode_access_maxsz) + decode_access_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) @@ -1375,14 +1377,20 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh)) == 0) - status = encode_access(&xdr, args->access); + status = encode_putfh(&xdr, args->fh); + if (status != 0) + goto out; + status = encode_access(&xdr, args->access); + if (status != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: return status; } @@ -3784,8 +3792,13 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; - if ((status = decode_putfh(&xdr)) == 0) - status = decode_access(&xdr, res); + status = decode_putfh(&xdr); + if (status != 0) + goto out; + status = decode_access(&xdr, res); + if (status != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cf74a4db84a5..03032017ffaa 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -538,10 +538,13 @@ typedef u64 clientid4; struct nfs4_accessargs { const struct nfs_fh * fh; + const u32 * bitmask; u32 access; }; struct nfs4_accessres { + const struct nfs_server * server; + struct nfs_fattr * fattr; u32 supported; u32 access; }; -- cgit v1.2.3 From 7957c1418f4b6c66e28d4ac3c4d7a8c19d526c48 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Sep 2007 14:20:12 -0400 Subject: NFS: fix nfs_verify_change_attribute We always want to check that the verifier and directory cache_change_attribute match. This also allows us to remove the 'wraparound hack' for the cache_change_attribute. If we're only checking for equality, then we don't care about wraparound issues. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ---- include/linux/nfs_fs.h | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0d98074d0766..ed035a81eea2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -972,10 +972,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->read_cache_jiffies = fattr->time_start; nfsi->last_updated = now; - /* Fix a wraparound issue with nfsi->cache_change_attribute */ - if (time_before(now, nfsi->cache_change_attribute)) - nfsi->cache_change_attribute = now - 600*HZ; - /* Are we racing with known updates of the metadata on the server? */ data_stable = nfs_verify_change_attribute(inode, fattr->time_start); nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5b42fef0baf0..f45161363be2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -277,7 +277,7 @@ static inline long nfs_save_change_attribute(struct inode *inode) static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) { return !nfs_caches_unstable(inode) - && time_after_eq(chattr, NFS_I(inode)->cache_change_attribute); + && chattr == NFS_I(inode)->cache_change_attribute; } /* -- cgit v1.2.3 From 17cadc95372e28024be0874e67329c1862912c5d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Sep 2007 10:07:31 -0400 Subject: NFS: Don't force a dcache revalidation if nfs_wcc_update_inode succeeds The reason is that if the weak cache consistency update was successful, then we know that our client must be the only one that changed the directory, and we've already updated the dcache to reflect the change. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 11 +++-------- fs/nfs/nfs4proc.c | 7 +++++-- include/linux/nfs_fs.h | 4 +++- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f1f6639f52b5..7e73edc1751f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -791,24 +791,18 @@ void nfs_end_data_update(struct inode *inode) static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - unsigned long now = jiffies; /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_WCC) != 0) { - if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - nfsi->cache_change_attribute = now; - } if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute = now; } - if (inode->i_size == fattr->pre_size && nfsi->npages == 0) { + if (inode->i_size == fattr->pre_size && nfsi->npages == 0) inode->i_size = fattr->size; - nfsi->cache_change_attribute = now; - } } } @@ -919,6 +913,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_change_attribute = jiffies; spin_unlock(&inode->i_lock); goto out; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e366a31f63b..871c102d9bdf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -208,9 +208,12 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) struct nfs_inode *nfsi = NFS_I(dir); spin_lock(&dir->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; - if (cinfo->before == nfsi->change_attr && cinfo->atomic) + if (cinfo->after != nfsi->change_attr) { + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; + if (!cinfo->atomic || cinfo->before != nfsi->change_attr) + nfsi->cache_change_attribute = jiffies; nfsi->change_attr = cinfo->after; + } spin_unlock(&dir->i_lock); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f45161363be2..fd2c5c8158cf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -235,8 +235,10 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; + nfsi->cache_change_attribute = jiffies; + } spin_unlock(&inode->i_lock); } -- cgit v1.2.3 From c4812998398d9cbce8646494704c52297359ede0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Sep 2007 17:11:45 -0400 Subject: NFS: Fix atime revalidation in readdir() NFSv3 will correctly update atime on a readdir call, so there is no need to set the NFS_INO_INVALID_ATIME flag unless the call to nfs_refresh_inode() fails. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 6 ------ fs/nfs/inode.c | 7 +++++++ fs/nfs/nfs3proc.c | 3 +++ fs/nfs/nfs4proc.c | 3 +++ fs/nfs/proc.c | 2 ++ include/linux/nfs_fs.h | 1 + 6 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9f8ec3c3e6a7..07df192e23a0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -200,9 +200,6 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) desc->timestamp = timestamp; desc->timestamp_valid = 1; SetPageUptodate(page); - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&inode->i_lock); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_mutex or some other mechanism. @@ -490,9 +487,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&inode->i_lock); desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cd57f795229e..e37faa3a7bac 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -156,6 +156,13 @@ static void nfs_zap_acl_cache(struct inode *inode) spin_unlock(&inode->i_lock); } +void nfs_invalidate_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); +} + /* * Invalidate, but do not unhash, the inode. * NB: must be called with inode->i_lock held! diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c7ca5d70870b..0ae263cdedc6 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -607,6 +607,9 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, nfs_fattr_init(&dir_attr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + nfs_invalidate_atime(dir); + nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply readdir: %d\n", status); return status; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 871c102d9bdf..9c27a6ed1a62 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2197,6 +2197,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + + nfs_invalidate_atime(dir); + dprintk("%s: returns %d\n", __FUNCTION__, status); return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 845cdde1d8b7..cfc4130f2aec 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -476,6 +476,8 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dprintk("NFS call readdir %d\n", (unsigned int)cookie); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_invalidate_atime(dir); + dprintk("NFS reply readdir: %d\n", status); return status; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fd2c5c8158cf..63850a884902 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -288,6 +288,7 @@ static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long extern int nfs_sync_mapping(struct address_space *mapping); extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping); extern void nfs_zap_caches(struct inode *); +extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); -- cgit v1.2.3 From 70ca88521fc7bee8ef0fc22033a439d4b9a2c70d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 30 Sep 2007 15:21:24 -0400 Subject: NFS: Fake up 'wcc' attributes to prevent cache invalidation after write NFSv2 and v4 don't offer weak cache consistency attributes on WRITE calls. In NFSv3, returning wcc data is optional. In all cases, we want to prevent the client from invalidating our cached data whenever ->write_done() attempts to update the inode attributes. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 34 ++++++++++++++++++++++++++++++++++ fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 2 +- fs/nfs/proc.c | 2 +- include/linux/nfs_fs.h | 1 + include/linux/nfs_xdr.h | 3 ++- 6 files changed, 40 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 23feb9e3d8b0..c5f4e0567533 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -793,6 +793,12 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); + if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 && + nfsi->change_attr == fattr->pre_change_attr) { + nfsi->change_attr = fattr->change_attr; + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + } /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_WCC) != 0) { if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) @@ -923,6 +929,34 @@ out: return status; } +/** + * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. Fake up + * weak cache consistency data, if none exist. + * + * This function is mainly designed to be used by the ->write_done() functions. + */ +int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +{ + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + (fattr->valid & NFS_ATTR_WCC_V4) == 0) { + fattr->pre_change_attr = NFS_I(inode)->change_attr; + fattr->valid |= NFS_ATTR_WCC_V4; + } + if ((fattr->valid & NFS_ATTR_FATTR) != 0 && + (fattr->valid & NFS_ATTR_WCC) == 0) { + memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); + memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); + fattr->pre_size = inode->i_size; + fattr->valid |= NFS_ATTR_WCC; + } + return nfs_post_op_update_inode(inode, fattr); +} + /* * Many nfs protocol calls return the new file attributes after * an operation. Here we update the inode to reflect the state diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index fc6b1193a631..ce1fb99e67e1 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -750,7 +750,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d311984d2c88..796bc8ea7194 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2427,7 +2427,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); } return 0; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ec3ede890bf4..97669ed05500 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -579,7 +579,7 @@ static void nfs_proc_read_setup(struct nfs_read_data *data) static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (task->tk_status >= 0) - nfs_post_op_update_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); return 0; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 63850a884902..9449286c5867 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -293,6 +293,7 @@ extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); +extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); extern int nfs_open(struct inode *, struct file *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 03032017ffaa..daab252f2e5c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -62,7 +62,8 @@ struct nfs_fattr { #define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ #define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ #define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */ -#define NFS_ATTR_FATTR_V4_REFERRAL 0x0010 /* NFSv4 referral */ +#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */ +#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */ /* * Info on the file system -- cgit v1.2.3 From 8edb01828837302055a8f0afddb2256659480bc5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Sep 2007 17:14:03 -0400 Subject: NFS: Fix the sign of the return value of nfs_save_change_attribute() Also fix up the comments. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9449286c5867..a765dd456922 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -260,13 +260,13 @@ static inline int NFS_USE_READDIRPLUS(struct inode *inode) /** * nfs_save_change_attribute - Returns the inode attribute change cookie - * @inode - pointer to inode + * @dir - pointer to parent directory inode * The "change attribute" is updated every time we finish an operation * that will result in a metadata change on the server. */ -static inline long nfs_save_change_attribute(struct inode *inode) +static inline unsigned long nfs_save_change_attribute(struct inode *dir) { - return NFS_I(inode)->cache_change_attribute; + return NFS_I(dir)->cache_change_attribute; } /** -- cgit v1.2.3 From 4b841736bc16b320bcdb1e8ece585b3ced9a8811 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Sep 2007 17:15:01 -0400 Subject: NFS: Fix nfs_verify_change_attribute() We don't care about whether or not some other process on our client is changing the directory while we're in nfs_lookup_revalidate(), because the dcache will take care of ensuring local atomicity. We can therefore remove the test for nfs_caches_unstable(). Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- include/linux/nfs_fs.h | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 07df192e23a0..e275a6eb0a7c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -646,7 +646,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; - if (dentry->d_time == NFS_I(dir)->cache_change_attribute) + if (nfs_verify_change_attribute(dir, dentry->d_time)) return 1; return 0; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a765dd456922..0197b1243fe0 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -270,16 +270,16 @@ static inline unsigned long nfs_save_change_attribute(struct inode *dir) } /** - * nfs_verify_change_attribute - Detects NFS inode cache updates - * @inode - pointer to inode + * nfs_verify_change_attribute - Detects NFS remote directory changes + * @dir - pointer to parent directory inode * @chattr - previously saved change attribute - * Return "false" if metadata has been updated (or is in the process of - * being updated) since the change attribute was saved. + * Return "false" if the verifiers doesn't match the change attribute. + * This would usually indicate that the directory contents have changed on + * the server, and that any dentries need revalidating. */ -static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) +static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr) { - return !nfs_caches_unstable(inode) - && chattr == NFS_I(inode)->cache_change_attribute; + return chattr == NFS_I(dir)->cache_change_attribute; } /* -- cgit v1.2.3 From f38211100d4823be530577dc3452f838861222ec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2007 10:00:23 -0400 Subject: NFS: nfs_mark_for_revalidate don't update cache_change_attribute Just let the subsequent inode revalidation do the update... Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0197b1243fe0..35d61924e0b9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -235,10 +235,8 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; - if (S_ISDIR(inode->i_mode)) { + if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute = jiffies; - } spin_unlock(&inode->i_lock); } -- cgit v1.2.3 From a1643a92f6de92074116922a2d2906dd33499ff4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Sep 2007 17:25:43 -0400 Subject: NFS: NFS_CACHEINV() should not test for nfs_caches_unstable() The fact that we're in the process of modifying the inode does not mean that we should not invalidate the attribute and data caches. The defensive thing is to always invalidate when we're confronted with inode mtime/ctime or change_attribute updates that we do not immediately recognise. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- include/linux/nfs_fs.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 35b447d79dbe..a03ed2f85047 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -788,7 +788,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) out_zap_parent: nfs_zap_caches(dir); out_bad: - NFS_CACHEINV(dir); + nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ nfs_zap_caches(inode); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 35d61924e0b9..c947803a7ba4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -240,12 +240,6 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) spin_unlock(&inode->i_lock); } -static inline void NFS_CACHEINV(struct inode *inode) -{ - if (!nfs_caches_unstable(inode)) - nfs_mark_for_revalidate(inode); -} - static inline int nfs_server_capable(struct inode *inode, int cap) { return NFS_SERVER(inode)->caps & cap; -- cgit v1.2.3 From 80eb209def76d375677840800eb838abce1e6639 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Sep 2007 17:34:46 -0400 Subject: NFS: Remove NFS_I(inode)->data_updates We have no more users... Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 21 +-------------------- include/linux/nfs_fs.h | 14 ++++++-------- 2 files changed, 7 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d722a0e84361..1d507a2a96d6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -85,7 +85,6 @@ void nfs_clear_inode(struct inode *inode) */ BUG_ON(nfs_have_writebacks(inode)); BUG_ON(!list_empty(&NFS_I(inode)->open_files)); - BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); } @@ -756,16 +755,6 @@ out: return ret; } -/** - * nfs_begin_data_update - * @inode - pointer to inode - * Declare that a set of operations will update file data on the server - */ -void nfs_begin_data_update(struct inode *inode) -{ - atomic_inc(&NFS_I(inode)->data_updates); -} - /** * nfs_end_data_update * @inode - pointer to inode @@ -775,15 +764,12 @@ void nfs_begin_data_update(struct inode *inode) */ void nfs_end_data_update(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - /* Directories: invalidate page cache */ if (S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); } - atomic_dec(&nfsi->data_updates); } static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) @@ -823,7 +809,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat { struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; - int data_unstable; /* Has the inode gone and changed behind our back? */ @@ -832,9 +817,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return -EIO; } - /* Are we in the process of updating data on the server? */ - data_unstable = nfs_caches_unstable(inode); - /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); @@ -1162,7 +1144,6 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); - atomic_set(&nfsi->data_updates, 0); nfsi->ncommit = 0; nfsi->npages = 0; nfs4_init_once(nfsi); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c947803a7ba4..1b4edc6f7fd7 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -136,11 +136,6 @@ struct nfs_inode { * server. */ unsigned long cache_change_attribute; - /* - * Counter indicating the number of outstanding requests that - * will cause a file data update. - */ - atomic_t data_updates; struct rb_root access_cache; struct list_head access_cache_entry_lru; @@ -224,9 +219,13 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_FILEID(inode) (NFS_I(inode)->fileid) -static inline int nfs_caches_unstable(struct inode *inode) +/** + * nfs_begin_data_update + * @inode - pointer to inode + * Declare that a set of operations will update file data on the server + */ +static inline void nfs_begin_data_update(struct inode *inode) { - return atomic_read(&NFS_I(inode)->data_updates) != 0; } static inline void nfs_mark_for_revalidate(struct inode *inode) @@ -299,7 +298,6 @@ extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern void nfs_begin_attr_update(struct inode *); extern void nfs_end_attr_update(struct inode *); -extern void nfs_begin_data_update(struct inode *); extern void nfs_end_data_update(struct inode *); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); -- cgit v1.2.3 From 60ccd4ec4170c9487e3792322626acd160197bce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Sep 2007 17:48:19 -0400 Subject: NFS: Remove nfs_begin_data_update/nfs_end_data_update The lower level routines in fs/nfs/proc.c, fs/nfs/nfs3proc.c and fs/nfs/nfs4proc.c should already be dealing with the revalidation issues. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 35 +---------------------------------- fs/nfs/direct.c | 4 ---- fs/nfs/inode.c | 19 ------------------- fs/nfs/nfs3acl.c | 2 -- fs/nfs/unlink.c | 3 --- fs/nfs/write.c | 2 -- include/linux/nfs_fs.h | 12 ------------ 7 files changed, 1 insertion(+), 76 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a03ed2f85047..34da48586829 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1001,12 +1001,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry goto out; } - if (nd->intent.open.flags & O_CREAT) { - nfs_begin_data_update(dir); - res = nfs4_atomic_open(dir, dentry, nd); - nfs_end_data_update(dir); - } else - res = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); if (IS_ERR(res)) { error = PTR_ERR(res); @@ -1224,9 +1219,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, open_flags = nd->intent.open.flags; lock_kernel(); - nfs_begin_data_update(dir); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); - nfs_end_data_update(dir); if (error != 0) goto out_err; unlock_kernel(); @@ -1256,9 +1249,7 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) attr.ia_valid = ATTR_MODE; lock_kernel(); - nfs_begin_data_update(dir); status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); - nfs_end_data_update(dir); if (status != 0) goto out_err; unlock_kernel(); @@ -1284,9 +1275,7 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) attr.ia_mode = mode | S_IFDIR; lock_kernel(); - nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); - nfs_end_data_update(dir); if (error != 0) goto out_err; unlock_kernel(); @@ -1305,12 +1294,10 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); lock_kernel(); - nfs_begin_data_update(dir); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) clear_nlink(dentry->d_inode); - nfs_end_data_update(dir); unlock_kernel(); return error; @@ -1368,17 +1355,13 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) qsilly.name = silly; qsilly.len = strlen(silly); - nfs_begin_data_update(dir); if (dentry->d_inode) { - nfs_begin_data_update(dentry->d_inode); error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); nfs_mark_for_revalidate(dentry->d_inode); - nfs_end_data_update(dentry->d_inode); } else error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); - nfs_end_data_update(dir); if (!error) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); d_move(dentry, sdentry); @@ -1412,19 +1395,15 @@ static int nfs_safe_remove(struct dentry *dentry) goto out; } - nfs_begin_data_update(dir); if (inode != NULL) { nfs_inode_return_delegation(inode); - nfs_begin_data_update(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ if (error == 0) drop_nlink(inode); nfs_mark_for_revalidate(inode); - nfs_end_data_update(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - nfs_end_data_update(dir); out: return error; } @@ -1516,9 +1495,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); kunmap_atomic(kaddr, KM_USER0); - nfs_begin_data_update(dir); error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); - nfs_end_data_update(dir); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, @@ -1558,15 +1535,11 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) dentry->d_parent->d_name.name, dentry->d_name.name); lock_kernel(); - nfs_begin_data_update(dir); - nfs_begin_data_update(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { atomic_inc(&inode->i_count); d_instantiate(dentry, inode); } - nfs_end_data_update(inode); - nfs_end_data_update(dir); unlock_kernel(); return error; } @@ -1669,15 +1642,9 @@ go_ahead: d_delete(new_dentry); } - nfs_begin_data_update(old_dir); - nfs_begin_data_update(new_dir); - nfs_begin_data_update(old_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); nfs_mark_for_revalidate(old_inode); - nfs_end_data_update(old_inode); - nfs_end_data_update(new_dir); - nfs_end_data_update(old_dir); out: if (rehash) d_rehash(rehash); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 28c8e1b65db0..32fe97211eea 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -510,7 +510,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode nfs_direct_write_reschedule(dreq); break; default: - nfs_end_data_update(inode); if (dreq->commit_data != NULL) nfs_commit_free(dreq->commit_data); nfs_direct_free_writedata(dreq); @@ -533,7 +532,6 @@ static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - nfs_end_data_update(inode); nfs_direct_free_writedata(dreq); nfs_zap_mapping(inode, inode->i_mapping); nfs_direct_complete(dreq); @@ -724,8 +722,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); - nfs_begin_data_update(inode); - rpc_clnt_sigmask(clnt, &oldset); result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); if (!result) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1d507a2a96d6..1c23d3a67c85 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -344,7 +344,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) return 0; lock_kernel(); - nfs_begin_data_update(inode); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { filemap_write_and_wait(inode->i_mapping); @@ -358,7 +357,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); - nfs_end_data_update(inode); unlock_kernel(); return error; } @@ -755,23 +753,6 @@ out: return ret; } -/** - * nfs_end_data_update - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will mark the inode as immediately needing revalidation - * of its attribute cache. - */ -void nfs_end_data_update(struct inode *inode) -{ - /* Directories: invalidate page cache */ - if (S_ISDIR(inode->i_mode)) { - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); - } -} - static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 7322da4d2055..9b7362565c0c 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -317,13 +317,11 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, } dprintk("NFS call setacl\n"); - nfs_begin_data_update(inode); msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; status = rpc_call_sync(server->client_acl, &msg, 0); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; spin_unlock(&inode->i_lock); - nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); /* pages may have been allocated at the xdr layer. */ diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 045ab805c17f..1aed850d18f2 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -66,7 +66,6 @@ static void nfs_async_unlink_init(struct rpc_task *task, void *calldata) .rpc_cred = data->cred, }; - nfs_begin_data_update(dir); NFS_PROTO(dir)->unlink_setup(&msg, dir); rpc_call_setup(task, &msg, 0); } @@ -84,8 +83,6 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) if (!NFS_PROTO(dir)->unlink_done(task, dir)) rpc_restart_call(task); - else - nfs_end_data_update(dir); } /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3e9e268b887e..e2bb66c34406 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -378,7 +378,6 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) return error; if (!nfsi->npages) { igrab(inode); - nfs_begin_data_update(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } @@ -406,7 +405,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfsi->npages--; if (!nfsi->npages) { spin_unlock(&inode->i_lock); - nfs_end_data_update(inode); iput(inode); } else spin_unlock(&inode->i_lock); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1b4edc6f7fd7..8f632bd854af 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,15 +219,6 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_FILEID(inode) (NFS_I(inode)->fileid) -/** - * nfs_begin_data_update - * @inode - pointer to inode - * Declare that a set of operations will update file data on the server - */ -static inline void nfs_begin_data_update(struct inode *inode) -{ -} - static inline void nfs_mark_for_revalidate(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); @@ -296,9 +287,6 @@ extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *map extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); -extern void nfs_begin_attr_update(struct inode *); -extern void nfs_end_attr_update(struct inode *); -extern void nfs_end_data_update(struct inode *); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); -- cgit v1.2.3 From d75340cc4de5c187fbf0bba234309ca86cf0a2fb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2007 21:42:01 -0400 Subject: NFSv4: Fix nfs_atomic_open() to set the verifier on negative dentries too Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 -------- fs/nfs/nfs4proc.c | 7 ++++++- include/linux/nfs_fs.h | 5 +++++ 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 41b063c98822..82878a19538d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -656,11 +656,6 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) return 1; } -static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) -{ - dentry->d_time = verf; -} - /* * Return the intent data that applies to this particular path component * @@ -1016,7 +1011,6 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } } else if (res != NULL) dentry = res; - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out: return res; no_open: @@ -1060,8 +1054,6 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) */ lock_kernel(); ret = nfs4_open_revalidate(dir, dentry, openflags, nd); - if (ret == 1) - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); unlock_kernel(); out: dput(parent); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0748c700301d..52af5a7d679e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1399,13 +1399,16 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { - if (PTR_ERR(state) == -ENOENT) + if (PTR_ERR(state) == -ENOENT) { d_add(dentry, NULL); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } return (struct dentry *)state; } res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) path.dentry = res; + nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); nfs4_intent_set_file(nd, &path, state); return res; } @@ -1439,6 +1442,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st } } if (state->inode == dentry->d_inode) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs4_intent_set_file(nd, &path, state); return 1; } @@ -1885,6 +1889,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, goto out; } d_add(dentry, igrab(state->inode)); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (flags & O_EXCL) { struct nfs_fattr fattr; status = nfs4_do_setattr(state->inode, &fattr, sattr, state); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8f632bd854af..992931cd301e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -240,6 +240,11 @@ static inline int NFS_USE_READDIRPLUS(struct inode *inode) return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); } +static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) +{ + dentry->d_time = verf; +} + /** * nfs_save_change_attribute - Returns the inode attribute change cookie * @dir - pointer to parent directory inode -- cgit v1.2.3 From c7c209730d635226b81e9aeae63b6dc8f445569f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Sep 2007 19:22:40 -0400 Subject: NFS: Get rid of some obsolete macros - NFS_READTIME, NFS_CHANGE_ATTR are completely unused. - Inline the few remaining uses of NFS_ATTRTIMEO, and remove. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 4 ++-- include/linux/nfs_fs.h | 4 ---- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bdf4ba42abd7..a4fdc4cc306c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1762,7 +1762,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str cache = nfs_access_search_rbtree(inode, cred); if (cache == NULL) goto out; - if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) + if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) goto out_stale; res->jiffies = cache->jiffies; res->cred = cache->cred; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9d012a6ee4cc..469387920dd9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -117,8 +117,8 @@ static void nfs_zap_caches_locked(struct inode *inode) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 992931cd301e..12fe97bac9ff 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -203,16 +203,12 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) #define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) -#define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) -#define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) -#define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo) #define NFS_MINATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ : NFS_SERVER(inode)->acregmin) #define NFS_MAXATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \ : NFS_SERVER(inode)->acregmax) -#define NFS_ATTRTIMEO_UPDATE(inode) (NFS_I(inode)->attrtimeo_timestamp) #define NFS_FLAGS(inode) (NFS_I(inode)->flags) #define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode))) -- cgit v1.2.3 From 58eaab93376cb524fd0f1531a56902d2b3eaa619 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 21 Jul 2007 17:03:28 +0200 Subject: [23/37] Clean up duplicate includes in Hi, This patch cleans up duplicate includes in include/linux/nfs_fs.h Signed-off-by: Jesper Juhl Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 12fe97bac9ff..0e0c2e0d3aa3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -47,10 +47,8 @@ #include #include #include - #include -#include #include /* -- cgit v1.2.3 From f43bf0bebed7c33b698a8a25f95812f9e87c3843 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Oct 2007 12:01:04 -0400 Subject: NFS: Add a boot parameter to disable 64 bit inode numbers This boot parameter will allow legacy 32-bit applications which call stat() to continue to function even if the NFSv3/v4 server uses 64-bit inode numbers. Signed-off-by: Trond Myklebust --- Documentation/kernel-parameters.txt | 7 +++++++ fs/nfs/dir.c | 3 ++- fs/nfs/inode.c | 27 ++++++++++++++++++++++++++- include/linux/nfs_fs.h | 1 + 4 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4d175c751246..83e21045114e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1073,6 +1073,13 @@ and is between 256 and 4096 characters. It is defined in the file [NFS] set the maximum lifetime for idmapper cache entries. + nfs.enable_ino64= + [NFS] enable 64-bit inode numbers. + If zero, the NFS client will fake up a 32-bit inode + number for the readdir() and stat() syscalls instead + of returning the full 64-bit number. + The default is to return 64-bit inode numbers. + nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels no387 [BUGS=X86-32] Tells the kernel to use the 387 maths diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a4fdc4cc306c..8ec7fbd8240c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -427,7 +427,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, } res = filldir(dirent, entry->name, entry->len, - file->f_pos, fileid, d_type); + file->f_pos, nfs_compat_user_ino64(fileid), + d_type); if (res < 0) break; file->f_pos++; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cad1246bf575..035c769b715e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -49,6 +49,11 @@ #define NFSDBG_FACILITY NFSDBG_VFS +#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 + +/* Default is to see 64-bit inode numbers */ +static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; + static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); @@ -62,6 +67,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } +/** + * nfs_compat_user_ino64 - returns the user-visible inode number + * @fileid: 64-bit fileid + * + * This function returns a 32-bit inode number if the boot parameter + * nfs.enable_ino64 is zero. + */ +u64 nfs_compat_user_ino64(u64 fileid) +{ + int ino; + + if (enable_ino64) + return fileid; + ino = fileid; + if (sizeof(ino) < sizeof(fileid)) + ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8; + return ino; +} + int nfs_write_inode(struct inode *inode, int sync) { int ret; @@ -456,7 +480,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) err = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (!err) { generic_fillattr(inode, stat); - stat->ino = NFS_FILEID(inode); + stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); } return err; } @@ -1235,6 +1259,7 @@ static void __exit exit_nfs_fs(void) /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch "); MODULE_LICENSE("GPL"); +module_param(enable_ino64, bool, 0644); module_init(init_nfs_fs) module_exit(exit_nfs_fs) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0e0c2e0d3aa3..c5164c257f71 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -289,6 +289,7 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); +extern u64 nfs_compat_user_ino64(u64 fileid); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ -- cgit v1.2.3