From 2c1740098c708b465e87637b237feb2fd98f129a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:27 -0500 Subject: NFS: Fix a bug in nfs_fscache_release_page() Not having an fscache cookie is perfectly valid if the user didn't mount with the fscache option. This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=15234 Signed-off-by: Trond Myklebust Acked-by: David Howells Cc: stable@kernel.org --- fs/nfs/fscache.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index fa588006588d..237874f1af23 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -354,12 +354,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) */ int nfs_fscache_release_page(struct page *page, gfp_t gfp) { - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - struct fscache_cookie *cookie = nfsi->fscache; - - BUG_ON(!cookie); - if (PageFsCache(page)) { + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + struct fscache_cookie *cookie = nfsi->fscache; + + BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", cookie, page, nfsi); -- cgit v1.2.3 From 7549ad5f9b6eda49bbac4b14c5b8f37bf464f922 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:34 -0500 Subject: NFS: Remove a redundant check for PageFsCache in nfs_migrate_page() Signed-off-by: Trond Myklebust Acked-by: David Howells --- fs/nfs/write.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b54b8bb101f..d63d964a0392 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1598,8 +1598,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct nfs_page *req; int ret; - if (PageFsCache(page)) - nfs_fscache_release_page(page, GFP_KERNEL); + nfs_fscache_release_page(page, GFP_KERNEL); req = nfs_find_and_lock_request(page); ret = PTR_ERR(req); -- cgit v1.2.3 From fdcb45777a3d1689c5541e1f85ee3ebbd197d2c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:40 -0500 Subject: NFS: Fix the mapping of the NFSERR_SERVERFAULT error It was recently pointed out that the NFSERR_SERVERFAULT error, which is designed to inform the user of a serious internal error on the server, was being mapped to an error value that is internal to the kernel. This patch maps it to the error EREMOTEIO, which is exported to userland through errno.h. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/mount_clnt.c | 2 +- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs4xdr.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 0adefc40cc89..59047f8d7d72 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -120,7 +120,7 @@ static struct { { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, - { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, }, + { .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, }, }; struct mountres { diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5e078b222b4e..7bc2da8efd4a 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -699,7 +699,7 @@ static struct { { NFSERR_BAD_COOKIE, -EBADCOOKIE }, { NFSERR_NOTSUPP, -ENOTSUPP }, { NFSERR_TOOSMALL, -ETOOSMALL }, - { NFSERR_SERVERFAULT, -ESERVERFAULT }, + { NFSERR_SERVERFAULT, -EREMOTEIO }, { NFSERR_BADTYPE, -EBADTYPE }, { NFSERR_JUKEBOX, -EJUKEBOX }, { -1, -EIO } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e437fd6a819f..5cd5184b56db 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4631,7 +4631,7 @@ static int decode_sequence(struct xdr_stream *xdr, * If the server returns different values for sessionID, slotID or * sequence number, the server is looney tunes. */ - status = -ESERVERFAULT; + status = -EREMOTEIO; if (memcmp(id.data, res->sr_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { @@ -5774,7 +5774,7 @@ static struct { { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, { NFS4ERR_NOTSUPP, -ENOTSUPP }, { NFS4ERR_TOOSMALL, -ETOOSMALL }, - { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_SERVERFAULT, -EREMOTEIO }, { NFS4ERR_BADTYPE, -EBADTYPE }, { NFS4ERR_LOCKED, -EAGAIN }, { NFS4ERR_SYMLINK, -ELOOP }, @@ -5801,7 +5801,7 @@ nfs4_stat_to_errno(int stat) } if (stat <= 10000 || stat > 10100) { /* The server is looney tunes. */ - return -ESERVERFAULT; + return -EREMOTEIO; } /* If we cannot translate the error, the recovery routines should * handle it. -- cgit v1.2.3 From 65d269538a1129495ac45a14a777cd11cfe881d8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 Feb 2010 12:19:53 -0500 Subject: NFS: Too many GETATTR and ACCESS calls after direct I/O The cached read and write paths initialize fattr->time_start in their setup procedures. The value of fattr->time_start is propagated to read_cache_jiffies by nfs_update_inode(). Subsequent calls to nfs_attribute_timeout() will then use a good time stamp when computing the attribute cache timeout, and squelch unneeded GETATTR calls. Since the direct I/O paths erroneously leave the inode's fattr->time_start field set to zero, read_cache_jiffies for that inode is set to zero after any direct read or write operation. This triggers an otw GETATTR or ACCESS call to update the file's attribute and access caches properly, even when the NFS READ or WRITE replies have usable post-op attributes. Make sure the direct read and write setup code performs the same fattr initialization as the cached I/O paths to prevent unnecessary GETATTR calls. This was likely introduced by commit 0e574af1 in 2.6.15, which appears to add new nfs_fattr_init() call sites in the cached read and write paths, but not in the equivalent places in fs/nfs/direct.c. A subsequent commit in the same series, 33801147, introduces the fattr->time_start field. Interestingly, the direct write reschedule path already has a call to nfs_fattr_init() in the right place. Reported-by: Quentin Barnes Signed-off-by: Chuck Lever Cc: stable@kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/direct.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e1d415e97849..0d289823e856 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -342,6 +342,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.eof = 0; data->res.count = bytes; + nfs_fattr_init(&data->fattr); msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; @@ -575,6 +576,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); NFS_PROTO(data->inode)->commit_setup(data, &msg); @@ -766,6 +768,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.count = bytes; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); task_setup_data.task = &data->task; task_setup_data.callback_data = data; -- cgit v1.2.3 From 003cb608a2533d0927a83bc4e07e46d7a622eda9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Feb 2010 14:39:01 +0900 Subject: percpu: add __percpu sparse annotations to fs Add __percpu sparse annotations to fs. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. Signed-off-by: Tejun Heo Cc: "Theodore Ts'o" Cc: Trond Myklebust Cc: Alex Elder Cc: Christoph Hellwig Cc: Alexander Viro --- fs/ext4/ext4.h | 2 +- fs/nfs/iostat.h | 4 ++-- fs/xfs/xfs_mount.h | 2 +- include/linux/mount.h | 2 +- include/linux/nfs_fs_sb.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 874d169a193e..4cedc91ec59d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1014,7 +1014,7 @@ struct ext4_sb_info { atomic_t s_lock_busy; /* locality groups */ - struct ext4_locality_group *s_locality_groups; + struct ext4_locality_group __percpu *s_locality_groups; /* for write statistics */ unsigned long s_sectors_written_start; diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 46d779abafd3..1d8d5c813b01 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -57,12 +57,12 @@ static inline void nfs_add_fscache_stats(struct inode *inode, } #endif -static inline struct nfs_iostats *nfs_alloc_iostats(void) +static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) { return alloc_percpu(struct nfs_iostats); } -static inline void nfs_free_iostats(struct nfs_iostats *stats) +static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) { if (stats != NULL) free_percpu(stats); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1df7e4502967..24c88870cdb2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -243,7 +243,7 @@ typedef struct xfs_mount { struct xfs_qmops *m_qm_ops; /* vector of XQM ops */ atomic_t m_active_trans; /* number trans frozen */ #ifdef HAVE_PERCPU_SB - xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */ + xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ unsigned long m_icsb_counters; /* disabled per-cpu counters */ struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ struct mutex m_icsb_mutex; /* balancer sync lock */ diff --git a/include/linux/mount.h b/include/linux/mount.h index 5d5275364867..b5f43a34ef88 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -66,7 +66,7 @@ struct vfsmount { int mnt_pinned; int mnt_ghosts; #ifdef CONFIG_SMP - int *mnt_writers; + int __percpu *mnt_writers; #else int mnt_writers; #endif diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 34fc6be5bfcf..6a2e44fd75e2 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -105,7 +105,7 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nlm_host *nlm_host; /* NLM client handle */ - struct nfs_iostats * io_stats; /* I/O statistics */ + struct nfs_iostats __percpu *io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; atomic_long_t writeback; /* number of writeback pages */ int flags; /* various flags */ -- cgit v1.2.3 From 4912002fffa377e66c5caefc2c311732a4ad5fb8 Mon Sep 17 00:00:00 2001 From: Christian Kujau Date: Fri, 26 Feb 2010 17:25:14 +0000 Subject: Remove EXPERIMENTAL from NFS_FSCACHE There's currently an open Ubuntu bug[0], with the intent to compile NFS_FSCACHE (and possibly AFS_FSCACHE, 9P_FSCACHE) into the standard Ubuntu kernel. However, since *_FSCACHE still depends on EXPERIMENTAL, this won't happen. As Arjan van de Ven pointed out[1], the EXPERIMENTAL flag doesn't mean that much any more, I propose the following patch to fs/nfs/Kconfig. I'd do the same for fs/9p/Kconfig and fs/afs/Kconfig, but as I did not test 9p or AFS, I feel it would not be appropriate for me to remove the flag. [0] https://bugs.launchpad.net/ubuntu/+source/linux/+bug/440522/comments/5 [1] http://lkml.org/lkml/2010/1/23/145 Signed-off-by: Christian Kujau Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/nfs/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 59e5673b4597..a43d07e7b924 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -95,8 +95,7 @@ config ROOT_NFS Most people say N here. config NFS_FSCACHE - bool "Provide NFS client caching support (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "Provide NFS client caching support" depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y help Say Y here if you want NFS data to be cached locally on disc through -- cgit v1.2.3 From 6eae7974d0490a9dbc3091f702ea1650871652a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jan 2010 13:44:07 -0500 Subject: Switch alloc_nfs_open_context() to struct path Signed-off-by: Al Viro --- fs/nfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f141bde7756a..7570573bdb30 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -574,14 +574,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) nfs_revalidate_inode(server, inode); } -static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) +static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) { struct nfs_open_context *ctx; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - ctx->path.dentry = dget(dentry); - ctx->path.mnt = mntget(mnt); + ctx->path = *path; + path_get(&ctx->path); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; @@ -686,7 +686,7 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); + ctx = alloc_nfs_open_context(&filp->f_path, cred); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; -- cgit v1.2.3 From f694869709cc39a5fbde21aa40f22999ddad0e6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jan 2010 13:51:04 -0500 Subject: a couple of mntget+dget -> path_get in nfs4proc Signed-off-by: Al Viro --- fs/nfs/nfs4proc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 375f0fae2c6a..84d83be25a98 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -724,8 +724,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (p->o_arg.seqid == NULL) goto err_free; - p->path.mnt = mntget(path->mnt); - p->path.dentry = dget(path->dentry); + path_get(path); + p->path = *path; p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); @@ -1944,8 +1944,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; - calldata->path.mnt = mntget(path->mnt); - calldata->path.dentry = dget(path->dentry); + path_get(path); + calldata->path = *path; msg.rpc_argp = &calldata->arg, msg.rpc_resp = &calldata->res, -- cgit v1.2.3 From 26821ed40b4230259e770c9911180f38fcaa6f59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:21 +0100 Subject: make sure data is on disk before calling ->write_inode Similar to the fsync issue fixed a while ago in commit 2daea67e966dc0c42067ebea015ddac6834cef88 we need to write for data to actually hit the disk before writing out the metadata to guarantee data integrity for filesystems that modify the inode in the data I/O completion path. Currently XFS and NFS handle this manually, and AFS has a write_inode method that does nothing but waiting for data, while others are possibly missing out on this. Fortunately this change has a lot less impact than the fsync change as none of the write_inode methods starts data writeout of any form by itself. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/afs/internal.h | 1 - fs/afs/super.c | 1 - fs/afs/write.c | 21 --------------------- fs/fs-writeback.c | 15 ++++++++++----- fs/nfs/inode.c | 7 +------ fs/xfs/linux-2.6/xfs_super.c | 4 ---- 6 files changed, 11 insertions(+), 38 deletions(-) (limited to 'fs/nfs') diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6ece2a13bf71..c54dad4e6063 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -733,7 +733,6 @@ extern int afs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata); extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); -extern int afs_write_inode(struct inode *, int); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); diff --git a/fs/afs/super.c b/fs/afs/super.c index e1ea1c240b6a..14f6431598ad 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -48,7 +48,6 @@ struct file_system_type afs_fs_type = { static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, - .write_inode = afs_write_inode, .destroy_inode = afs_destroy_inode, .clear_inode = afs_clear_inode, .put_super = afs_put_super, diff --git a/fs/afs/write.c b/fs/afs/write.c index 5e15a21dbf9f..3bed54a294d4 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -584,27 +584,6 @@ int afs_writepages(struct address_space *mapping, return ret; } -/* - * write an inode back - */ -int afs_write_inode(struct inode *inode, int sync) -{ - struct afs_vnode *vnode = AFS_FS_I(inode); - int ret; - - _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); - - ret = 0; - if (sync) { - ret = filemap_fdatawait(inode->i_mapping); - if (ret < 0) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - } - - _leave(" = %d", ret); - return ret; -} - /* * completion of write to server */ diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1a7c42c64ff4..5f2721b1e4be 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -461,15 +461,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ret = do_writepages(mapping, wbc); - /* Don't write the inode if only I_DIRTY_PAGES was set */ - if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { - int err = write_inode(inode, wait); + /* + * Make sure to wait on the data before writing out the metadata. + * This is important for filesystems that modify metadata on data + * I/O completion. + */ + if (wait) { + int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; } - if (wait) { - int err = filemap_fdatawait(mapping); + /* Don't write the inode if only I_DIRTY_PAGES was set */ + if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + int err = write_inode(inode, wait); if (ret == 0) ret = err; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7570573bdb30..5ecd952cae1d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -101,12 +101,7 @@ int nfs_write_inode(struct inode *inode, int sync) { int ret; - if (sync) { - ret = filemap_fdatawait(inode->i_mapping); - if (ret == 0) - ret = nfs_commit_inode(inode, FLUSH_SYNC); - } else - ret = nfs_commit_inode(inode, 0); + ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0); if (ret >= 0) return 0; __mark_inode_dirty(inode, I_DIRTY_DATASYNC); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 25ea2408118f..8f117db6070e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1075,10 +1075,6 @@ xfs_fs_write_inode( return XFS_ERROR(EIO); if (sync) { - error = xfs_wait_on_pages(ip, 0, -1); - if (error) - goto out; - /* * Make sure the inode has hit stable storage. By using the * log and the fsync transactions we reduce the IOs we have -- cgit v1.2.3 From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:37 +0100 Subject: pass writeback_control to ->write_inode This gives the filesystem more information about the writeback that is happening. Trond requested this for the NFS unstable write handling, and other filesystems might benefit from this too by beeing able to distinguish between the different callers in more detail. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/adfs/adfs.h | 2 +- fs/adfs/inode.c | 5 +++-- fs/affs/affs.h | 3 ++- fs/affs/inode.c | 2 +- fs/bfs/inode.c | 5 +++-- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 4 ++-- fs/exofs/exofs.h | 2 +- fs/exofs/inode.c | 4 ++-- fs/ext2/ext2.h | 2 +- fs/ext2/inode.c | 11 +++++++++-- fs/ext3/inode.c | 4 ++-- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 6 +++--- fs/fat/inode.c | 9 +++++++-- fs/fs-writeback.c | 11 +++++------ fs/gfs2/super.c | 5 +++-- fs/hfs/hfs_fs.h | 2 +- fs/hfs/inode.c | 2 +- fs/hfsplus/super.c | 3 ++- fs/jfs/inode.c | 5 ++++- fs/jfs/jfs_inode.h | 2 +- fs/minix/inode.c | 8 +++++--- fs/nfs/inode.c | 5 +++-- fs/nfs/internal.h | 2 +- fs/ntfs/dir.c | 2 +- fs/ntfs/file.c | 2 +- fs/ntfs/inode.c | 2 +- fs/ntfs/inode.h | 4 ++-- fs/ntfs/super.c | 8 ++++++++ fs/omfs/inode.c | 10 ++++++++-- fs/reiserfs/inode.c | 4 ++-- fs/sysv/inode.c | 10 ++++++++-- fs/sysv/sysv.h | 2 +- fs/ubifs/dir.c | 2 +- fs/ubifs/file.c | 8 ++++---- fs/ubifs/super.c | 2 +- fs/udf/inode.c | 4 ++-- fs/udf/udfdecl.h | 2 +- fs/ufs/inode.c | 5 +++-- fs/ufs/ufs.h | 2 +- fs/xfs/linux-2.6/xfs_super.c | 4 ++-- include/linux/ext3_fs.h | 2 +- include/linux/fs.h | 2 +- include/linux/reiserfs_fs.h | 2 +- 45 files changed, 115 insertions(+), 72 deletions(-) (limited to 'fs/nfs') diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 9cc18775b832..2ff622f6f547 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -121,7 +121,7 @@ struct adfs_discmap { /* Inode stuff */ struct inode *adfs_iget(struct super_block *sb, struct object_info *obj); -int adfs_write_inode(struct inode *inode,int unused); +int adfs_write_inode(struct inode *inode, struct writeback_control *wbc); int adfs_notify_change(struct dentry *dentry, struct iattr *attr); /* map.c */ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 3f57ce4bee5d..0f5e30978135 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -9,6 +9,7 @@ */ #include #include +#include #include "adfs.h" /* @@ -360,7 +361,7 @@ out: * The adfs-specific inode data has already been updated by * adfs_notify_change() */ -int adfs_write_inode(struct inode *inode, int wait) +int adfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct super_block *sb = inode->i_sb; struct object_info obj; @@ -375,7 +376,7 @@ int adfs_write_inode(struct inode *inode, int wait) obj.attr = ADFS_I(inode)->attr; obj.size = inode->i_size; - ret = adfs_dir_update(sb, &obj, wait); + ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; } diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 0e40caaba456..861dae68ac12 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -175,7 +175,8 @@ extern void affs_delete_inode(struct inode *inode); extern void affs_clear_inode(struct inode *inode); extern struct inode *affs_iget(struct super_block *sb, unsigned long ino); -extern int affs_write_inode(struct inode *inode, int); +extern int affs_write_inode(struct inode *inode, + struct writeback_control *wbc); extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type); /* file.c */ diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 3c4ec7d864c4..c9744d771d98 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -166,7 +166,7 @@ bad_inode: } int -affs_write_inode(struct inode *inode, int unused) +affs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct super_block *sb = inode->i_sb; struct buffer_head *bh; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 8f3d9fd89604..f22a7d3dc362 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "bfs.h" @@ -98,7 +99,7 @@ error: return ERR_PTR(-EIO); } -static int bfs_write_inode(struct inode *inode, int wait) +static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct bfs_sb_info *info = BFS_SB(inode->i_sb); unsigned int ino = (u16)inode->i_ino; @@ -147,7 +148,7 @@ static int bfs_write_inode(struct inode *inode, int wait) di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) err = -EIO; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2aa8ec6a0981..8b5cfdd4bfc1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2326,7 +2326,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); -int btrfs_write_inode(struct inode *inode, int wait); +int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); void btrfs_dirty_inode(struct inode *inode); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4deb280f8969..c41db6d45ab6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3968,7 +3968,7 @@ err: return ret; } -int btrfs_write_inode(struct inode *inode, int wait) +int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -3977,7 +3977,7 @@ int btrfs_write_inode(struct inode *inode, int wait) if (root->fs_info->btree_inode == inode) return 0; - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 59b8bf2825c7..8442e353309f 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -261,7 +261,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata); extern struct inode *exofs_iget(struct super_block *, unsigned long); struct inode *exofs_new_inode(struct inode *, int); -extern int exofs_write_inode(struct inode *, int); +extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); extern void exofs_delete_inode(struct inode *); /* dir.c: */ diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 5514f3c2c2f4..a17e4b733e35 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1280,9 +1280,9 @@ out: return ret; } -int exofs_write_inode(struct inode *inode, int wait) +int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) { - return exofs_update_inode(inode, wait); + return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); } /* diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 061914add3cf..0b038e47ad2f 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -118,7 +118,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ extern struct inode *ext2_iget (struct super_block *, unsigned long); -extern int ext2_write_inode (struct inode *, int); +extern int ext2_write_inode (struct inode *, struct writeback_control *); extern void ext2_delete_inode (struct inode *); extern int ext2_sync_inode (struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 71b032c65a02..36ae1cac767c 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -41,6 +41,8 @@ MODULE_AUTHOR("Remy Card and others"); MODULE_DESCRIPTION("Second Extended Filesystem"); MODULE_LICENSE("GPL"); +static int __ext2_write_inode(struct inode *inode, int do_sync); + /* * Test whether an inode is a fast symlink. */ @@ -64,7 +66,7 @@ void ext2_delete_inode (struct inode * inode) goto no_delete; EXT2_I(inode)->i_dtime = get_seconds(); mark_inode_dirty(inode); - ext2_write_inode(inode, inode_needs_sync(inode)); + __ext2_write_inode(inode, inode_needs_sync(inode)); inode->i_size = 0; if (inode->i_blocks) @@ -1335,7 +1337,7 @@ bad_inode: return ERR_PTR(ret); } -int ext2_write_inode(struct inode *inode, int do_sync) +static int __ext2_write_inode(struct inode *inode, int do_sync) { struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; @@ -1440,6 +1442,11 @@ int ext2_write_inode(struct inode *inode, int do_sync) return err; } +int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int ext2_sync_inode(struct inode *inode) { struct writeback_control wbc = { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 455e6e6e5cb9..7aca55fcc976 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3096,7 +3096,7 @@ out_brelse: * `stuff()' is running, and the new i_size will be lost. Plus the inode * will no longer be on the superblock's dirty inode list. */ -int ext3_write_inode(struct inode *inode, int wait) +int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) { if (current->flags & PF_MEMALLOC) return 0; @@ -3107,7 +3107,7 @@ int ext3_write_inode(struct inode *inode, int wait) return -EIO; } - if (!wait) + if (wbc->sync_mode != WB_SYNC_ALL) return 0; return ext3_force_commit(inode->i_sb); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4cedc91ec59d..50af1a2c65e7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1416,7 +1416,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern int ext4_write_inode(struct inode *, int); +extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e11952404e02..d01a6cdbf854 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5177,7 +5177,7 @@ out_brelse: * `stuff()' is running, and the new i_size will be lost. Plus the inode * will no longer be on the superblock's dirty inode list. */ -int ext4_write_inode(struct inode *inode, int wait) +int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) { int err; @@ -5191,7 +5191,7 @@ int ext4_write_inode(struct inode *inode, int wait) return -EIO; } - if (!wait) + if (wbc->sync_mode != WB_SYNC_ALL) return 0; err = ext4_force_commit(inode->i_sb); @@ -5201,7 +5201,7 @@ int ext4_write_inode(struct inode *inode, int wait) err = ext4_get_inode_loc(inode, &iloc); if (err) return err; - if (wait) + if (wbc->sync_mode == WB_SYNC_ALL) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { ext4_error(inode->i_sb, __func__, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 14da530b05ca..fbeecdc194dc 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -577,7 +577,7 @@ static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, return i_pos; } -static int fat_write_inode(struct inode *inode, int wait) +static int __fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -634,9 +634,14 @@ retry: return err; } +static int fat_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int fat_sync_inode(struct inode *inode) { - return fat_write_inode(inode, 1); + return __fat_write_inode(inode, 1); } EXPORT_SYMBOL_GPL(fat_sync_inode); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5f2721b1e4be..76fc4d594acb 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -381,10 +381,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); } -static int write_inode(struct inode *inode, int sync) +static int write_inode(struct inode *inode, struct writeback_control *wbc) { if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) - return inode->i_sb->s_op->write_inode(inode, sync); + return inode->i_sb->s_op->write_inode(inode, wbc); return 0; } @@ -421,7 +421,6 @@ static int writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; - int wait = wbc->sync_mode == WB_SYNC_ALL; unsigned dirty; int ret; @@ -439,7 +438,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * We'll have another go at writing back this inode when we * completed a full scan of b_io. */ - if (!wait) { + if (wbc->sync_mode != WB_SYNC_ALL) { requeue_io(inode); return 0; } @@ -466,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * This is important for filesystems that modify metadata on data * I/O completion. */ - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; @@ -474,7 +473,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { - int err = write_inode(inode, wait); + int err = write_inode(inode, wbc); if (ret == 0) ret = err; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5e22629da67..ca87598ead7f 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -711,7 +712,7 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) * Returns: errno */ -static int gfs2_write_inode(struct inode *inode, int sync) +static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -745,7 +746,7 @@ static int gfs2_write_inode(struct inode *inode, int sync) do_unlock: gfs2_glock_dq_uninit(&gh); do_flush: - if (sync != 0) + if (wbc->sync_mode == WB_SYNC_ALL) gfs2_log_flush(GFS2_SB(inode), ip->i_gl); return ret; } diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 052387e11671..fe35e3b626c4 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -188,7 +188,7 @@ extern const struct address_space_operations hfs_btree_aops; extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int); extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); -extern int hfs_write_inode(struct inode *, int); +extern int hfs_write_inode(struct inode *, struct writeback_control *); extern int hfs_inode_setattr(struct dentry *, struct iattr *); extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, __be32 log_size, __be32 phys_size, u32 clump_size); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index a1cbff2b4d99..14f5cb1b9fdc 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -381,7 +381,7 @@ void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext, HFS_SB(inode->i_sb)->alloc_blksz); } -int hfs_write_inode(struct inode *inode, int unused) +int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct inode *main_inode = inode; struct hfs_find_data fd; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 43022f3d5148..74b473a8ef92 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -87,7 +87,8 @@ bad_inode: return ERR_PTR(err); } -static int hfsplus_write_inode(struct inode *inode, int unused) +static int hfsplus_write_inode(struct inode *inode, + struct writeback_control *wbc) { struct hfsplus_vh *vhdr; int ret = 0; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index b2ae190a77ba..182b78cc3e62 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -120,8 +121,10 @@ int jfs_commit_inode(struct inode *inode, int wait) return rc; } -int jfs_write_inode(struct inode *inode, int wait) +int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) { + int wait = wbc->sync_mode == WB_SYNC_ALL; + if (test_cflag(COMMIT_Nolink, inode)) return 0; /* diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 1eff7db34d63..15902b03c2a7 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -26,7 +26,7 @@ extern long jfs_ioctl(struct file *, unsigned int, unsigned long); extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); -extern int jfs_write_inode(struct inode*, int); +extern int jfs_write_inode(struct inode *, struct writeback_control *); extern void jfs_delete_inode(struct inode *); extern void jfs_dirty_inode(struct inode *); extern void jfs_truncate(struct inode *); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 74ea82d72164..756f8c93780c 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -17,8 +17,10 @@ #include #include #include +#include -static int minix_write_inode(struct inode * inode, int wait); +static int minix_write_inode(struct inode *inode, + struct writeback_control *wbc); static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); @@ -552,7 +554,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) return bh; } -static int minix_write_inode(struct inode *inode, int wait) +static int minix_write_inode(struct inode *inode, struct writeback_control *wbc) { int err = 0; struct buffer_head *bh; @@ -563,7 +565,7 @@ static int minix_write_inode(struct inode *inode, int wait) bh = V2_minix_update_inode(inode); if (!bh) return -EIO; - if (wait && buffer_dirty(bh)) { + if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { printk("IO error syncing minix inode [%s:%08lx]\n", diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5ecd952cae1d..7f9ecc46f3fb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,11 +97,12 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, int sync) +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; - ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0); + ret = nfs_commit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); if (ret >= 0) return 0; __mark_inode_dirty(inode, I_DIRTY_DATASYNC); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 29e464d23b32..11f82f03c5de 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -211,7 +211,7 @@ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); -extern int nfs_write_inode(struct inode *,int); +extern int nfs_write_inode(struct inode *, struct writeback_control *); extern void nfs_clear_inode(struct inode *); #ifdef CONFIG_NFS_V4 extern void nfs4_clear_inode(struct inode *); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 5a9e34475e37..9173e82a45d1 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1545,7 +1545,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, write_inode_now(bmp_vi, !datasync); iput(bmp_vi); } - ret = ntfs_write_inode(vi, 1); + ret = __ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); err = sync_blockdev(vi->i_sb->s_bdev); if (unlikely(err && !ret)) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 43179ddd336f..b681c71d7069 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2182,7 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); BUG_ON(S_ISDIR(vi->i_mode)); if (!datasync || !NInoNonResident(NTFS_I(vi))) - ret = ntfs_write_inode(vi, 1); + ret = __ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); /* * NOTE: If we were to use mapping->private_list (see ext2 and diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index dc2505abb6d7..4b57fb1eac2a 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2957,7 +2957,7 @@ out: * * Return 0 on success and -errno on error. */ -int ntfs_write_inode(struct inode *vi, int sync) +int __ntfs_write_inode(struct inode *vi, int sync) { sle64 nt; ntfs_inode *ni = NTFS_I(vi); diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 117eaf8032a3..9a113544605d 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -307,12 +307,12 @@ extern void ntfs_truncate_vfs(struct inode *vi); extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); -extern int ntfs_write_inode(struct inode *vi, int sync); +extern int __ntfs_write_inode(struct inode *vi, int sync); static inline void ntfs_commit_inode(struct inode *vi) { if (!is_bad_inode(vi)) - ntfs_write_inode(vi, 1); + __ntfs_write_inode(vi, 1); return; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 80b04770e8e9..1cf39dfaee7a 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -39,6 +39,7 @@ #include "dir.h" #include "debug.h" #include "index.h" +#include "inode.h" #include "aops.h" #include "layout.h" #include "malloc.h" @@ -2662,6 +2663,13 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs) return 0; } +#ifdef NTFS_RW +static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc) +{ + return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL); +} +#endif + /** * The complete super operations. */ diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index f3b7c1541f3a..75d9b5ba1d45 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "omfs.h" @@ -89,7 +90,7 @@ static void omfs_update_checksums(struct omfs_inode *oi) oi->i_head.h_check_xor = xor; } -static int omfs_write_inode(struct inode *inode, int wait) +static int __omfs_write_inode(struct inode *inode, int wait) { struct omfs_inode *oi; struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); @@ -162,9 +163,14 @@ out: return ret; } +static int omfs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __omfs_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int omfs_sync_inode(struct inode *inode) { - return omfs_write_inode(inode, 1); + return __omfs_write_inode(inode, 1); } /* diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 2df0f5c7c60b..0d651f980a8d 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1615,7 +1615,7 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, ** to properly mark inodes for datasync and such, but only actually ** does something when called for a synchronous update. */ -int reiserfs_write_inode(struct inode *inode, int do_sync) +int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct reiserfs_transaction_handle th; int jbegin_count = 1; @@ -1627,7 +1627,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync) ** inode needs to reach disk for safety, and they can safely be ** ignored because the altered inode has already been logged. */ - if (do_sync && !(current->flags & PF_MEMALLOC)) { + if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { reiserfs_write_lock(inode->i_sb); if (!journal_begin(&th, inode->i_sb, jbegin_count)) { reiserfs_update_sd(&th, inode); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 9824743832a7..4573734d723d 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include "sysv.h" @@ -246,7 +247,7 @@ bad_inode: return ERR_PTR(-EIO); } -int sysv_write_inode(struct inode *inode, int wait) +static int __sysv_write_inode(struct inode *inode, int wait) { struct super_block * sb = inode->i_sb; struct sysv_sb_info * sbi = SYSV_SB(sb); @@ -296,9 +297,14 @@ int sysv_write_inode(struct inode *inode, int wait) return 0; } +int sysv_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int sysv_sync_inode(struct inode *inode) { - return sysv_write_inode(inode, 1); + return __sysv_write_inode(inode, 1); } static void sysv_delete_inode(struct inode *inode) diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 53786eb5cf60..94cb9b4d76c2 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -142,7 +142,7 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping, /* inode.c */ extern struct inode *sysv_iget(struct super_block *, unsigned int); -extern int sysv_write_inode(struct inode *, int); +extern int sysv_write_inode(struct inode *, struct writeback_control *wbc); extern int sysv_sync_inode(struct inode *); extern void sysv_set_inode(struct inode *, dev_t); extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 552fb0111fff..401e503d44a1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1120,7 +1120,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, if (release) ubifs_release_budget(c, &ino_req); if (IS_SYNC(old_inode)) - err = old_inode->i_sb->s_op->write_inode(old_inode, 1); + err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); return err; out_cancel: diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 16a6444330ec..e26c02ab6cd5 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1011,7 +1011,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) /* Is the page fully inside @i_size? */ if (page->index < end_index) { if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; /* @@ -1039,7 +1039,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) kunmap_atomic(kaddr, KM_USER0); if (i_size > synced_i_size) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; } @@ -1242,7 +1242,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (release) ubifs_release_budget(c, &req); if (IS_SYNC(inode)) - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); return err; out: @@ -1316,7 +1316,7 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) * the inode unless this is a 'datasync()' call. */ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) return err; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 43f9d19a6f33..4d2f2157dd3f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -283,7 +283,7 @@ static void ubifs_destroy_inode(struct inode *inode) /* * Note, Linux write-back code calls this without 'i_mutex'. */ -static int ubifs_write_inode(struct inode *inode, int wait) +static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) { int err = 0; struct ubifs_info *c = inode->i_sb->s_fs_info; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 378a7592257c..b02089247296 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1373,12 +1373,12 @@ static mode_t udf_convert_permissions(struct fileEntry *fe) return mode; } -int udf_write_inode(struct inode *inode, int sync) +int udf_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; lock_kernel(); - ret = udf_update_inode(inode, sync); + ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 8d46f4294ee7..4223ac855da9 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -142,7 +142,7 @@ extern void udf_truncate(struct inode *); extern void udf_read_inode(struct inode *); extern void udf_delete_inode(struct inode *); extern void udf_clear_inode(struct inode *); -extern int udf_write_inode(struct inode *, int); +extern int udf_write_inode(struct inode *, struct writeback_control *wbc); extern long udf_block_map(struct inode *, sector_t); extern int udf_extend_file(struct inode *, struct extent_position *, struct kernel_long_ad *, sector_t); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7cf33379fd46..0a627e08610b 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ufs_fs.h" #include "ufs.h" @@ -890,11 +891,11 @@ static int ufs_update_inode(struct inode * inode, int do_sync) return 0; } -int ufs_write_inode (struct inode * inode, int wait) +int ufs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; lock_kernel(); - ret = ufs_update_inode (inode, wait); + ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 01d0e2a3b230..43f9f5d5670e 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -106,7 +106,7 @@ extern struct inode * ufs_new_inode (struct inode *, int); /* inode.c */ extern struct inode *ufs_iget(struct super_block *, unsigned long); -extern int ufs_write_inode (struct inode *, int); +extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_sync_inode (struct inode *); extern void ufs_delete_inode (struct inode *); extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 8f117db6070e..71345a370d9f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1063,7 +1063,7 @@ xfs_log_inode( STATIC int xfs_fs_write_inode( struct inode *inode, - int sync) + struct writeback_control *wbc) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1074,7 +1074,7 @@ xfs_fs_write_inode( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if (sync) { + if (wbc->sync_mode == WB_SYNC_ALL) { /* * Make sure the inode has hit stable storage. By using the * log and the fsync transactions we reduce the IOs we have diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 6b049030fbe6..deac2566450e 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -877,7 +877,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, int create); extern struct inode *ext3_iget(struct super_block *, unsigned long); -extern int ext3_write_inode (struct inode *, int); +extern int ext3_write_inode (struct inode *, struct writeback_control *); extern int ext3_setattr (struct dentry *, struct iattr *); extern void ext3_delete_inode (struct inode *); extern int ext3_sync_inode (handle_t *, struct inode *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b3182c7eb5f..45689621a851 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1557,7 +1557,7 @@ struct super_operations { void (*destroy_inode)(struct inode *); void (*dirty_inode) (struct inode *); - int (*write_inode) (struct inode *, int); + int (*write_inode) (struct inode *, struct writeback_control *wbc); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 1ba3cf6edfbb..3b603f474186 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2034,7 +2034,7 @@ void reiserfs_read_locked_inode(struct inode *inode, int reiserfs_find_actor(struct inode *inode, void *p); int reiserfs_init_locked_inode(struct inode *inode, void *p); void reiserfs_delete_inode(struct inode *inode); -int reiserfs_write_inode(struct inode *inode, int); +int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc); int reiserfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create); struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, -- cgit v1.2.3 From 8fc795f703c5138e1a8bfb88c69f52632031aa6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 16:46:56 -0800 Subject: NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c The sole purpose of nfs_write_inode is to commit unstable writes, so move it into fs/nfs/write.c, and make nfs_commit_inode static. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 12 ------------ fs/nfs/write.c | 24 +++++++++++++++++++++++- include/linux/nfs_fs.h | 7 ------- 3 files changed, 23 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7f9ecc46f3fb..89e98312599d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - int ret; - - ret = nfs_commit_inode(inode, - wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); - if (ret >= 0) - return 0; - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return ret; -} - void nfs_clear_inode(struct inode *inode) { /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d63d964a0392..09e97097baaa 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; -int nfs_commit_inode(struct inode *inode, int how) +static int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); int res; @@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how) } return res; } + +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) +{ + int ret; + + ret = nfs_commit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); + if (ret >= 0) + return 0; + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; +} #else static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) { return 0; } + +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) +{ + return 0; +} #endif +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return nfs_commit_unstable_pages(inode, wbc); +} + long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) { struct inode *inode = mapping->host; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d09db1bc9083..384ea3ef2863 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -483,15 +483,8 @@ extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); -#else -static inline int -nfs_commit_inode(struct inode *inode, int how) -{ - return 0; -} #endif static inline int -- cgit v1.2.3 From ff778d02bf867e1733a09b34ad6dbb723b024814 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 16:53:39 -0800 Subject: NFS: Add a count of the number of unstable writes carried by an inode In order to know when we should do opportunistic commits of the unstable writes, when the VM is doing a background flush, we add a field to count the number of unstable writes. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + fs/nfs/write.c | 14 ++++++++++---- include/linux/nfs_fs.h | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 89e98312599d..aa5a831001ab 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1404,6 +1404,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); nfsi->npages = 0; + nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); init_waitqueue_head(&nfsi->waitqueue); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 09e97097baaa..dc08a6fbde67 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -438,6 +438,7 @@ nfs_mark_request_commit(struct nfs_page *req) radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); + nfsi->ncommit++; spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); @@ -573,11 +574,15 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); + int ret; if (!nfs_need_commit(nfsi)) return 0; - return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + if (ret > 0) + nfsi->ncommit -= ret; + return ret; } #else static inline int nfs_need_commit(struct nfs_inode *nfsi) @@ -642,9 +647,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - if (nfs_clear_request_commit(req)) - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT); + if (nfs_clear_request_commit(req) && + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) + NFS_I(inode)->ncommit--; /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 384ea3ef2863..309217f46e28 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -166,6 +166,7 @@ struct nfs_inode { struct radix_tree_root nfs_page_tree; unsigned long npages; + unsigned long ncommit; /* Open contexts for shared mmap writes */ struct list_head open_files; -- cgit v1.2.3 From 420e3646bb7d93a571734034249fbb1ae1a7a5c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:00:02 -0800 Subject: NFS: Reduce the number of unnecessary COMMIT calls If the caller is doing a non-blocking flush, and there are still writebacks pending on the wire, we can usually defer the COMMIT call until those writes are done. Also ensure that we honour the wbc->nonblocking flag. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc08a6fbde67..fc05e35da6a9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1415,12 +1415,30 @@ static int nfs_commit_inode(struct inode *inode, int how) static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { - int ret; + struct nfs_inode *nfsi = NFS_I(inode); + int flags = FLUSH_SYNC; + int ret = 0; - ret = nfs_commit_inode(inode, - wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); - if (ret >= 0) + /* Don't commit yet if this is a non-blocking flush and there are + * lots of outstanding writes for this mapping. + */ + if (wbc->sync_mode == WB_SYNC_NONE && + nfsi->ncommit <= (nfsi->npages >> 1)) + goto out_mark_dirty; + + if (wbc->nonblocking) + flags = 0; + ret = nfs_commit_inode(inode, flags); + if (ret >= 0) { + if (wbc->sync_mode == WB_SYNC_NONE) { + if (ret < wbc->nr_to_write) + wbc->nr_to_write -= ret; + else + wbc->nr_to_write = 0; + } return 0; + } +out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } -- cgit v1.2.3 From 5bad5abec4058c5214bfc72cec418348d6747977 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:02:24 -0800 Subject: NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set Signed-off-by: Trond Myklebust Acked-by: Peter Zijlstra Acked-by: Wu Fengguang --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fc05e35da6a9..704e67d392e5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1426,7 +1426,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr nfsi->ncommit <= (nfsi->npages >> 1)) goto out_mark_dirty; - if (wbc->nonblocking) + if (wbc->nonblocking || wbc->for_background) flags = 0; ret = nfs_commit_inode(inode, flags); if (ret >= 0) { -- cgit v1.2.3 From 2928db1ffeacc9717c2d5c230d450bcc377b3ae9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:18 -0800 Subject: NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages Since nfs_scan_list() doesn't wait for locked pages, we have a race in which it is possible to end up with an inode that needs to send a COMMIT, but which does not have the I_DIRTY_DATASYNC flag set. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 704e67d392e5..e40e949598fd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -582,6 +582,8 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); if (ret > 0) nfsi->ncommit -= ret; + if (nfs_need_commit(NFS_I(inode))) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } #else -- cgit v1.2.3 From c988950eb6dd6f8e6d98503ca094622729e9aa13 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:21 -0800 Subject: NFS: Simplify nfs_wb_page_cancel() In all cases we should be able to just remove the request and call cancel_dirty_page(). Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 39 +-------------------------------------- include/linux/nfs_fs.h | 2 -- 2 files changed, 1 insertion(+), 40 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e40e949598fd..dc7f5e9a23b4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -540,19 +540,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u return res; } -static void nfs_cancel_commit_list(struct list_head *head) -{ - struct nfs_page *req; - - while(!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_clear_request_commit(req); - nfs_inode_remove_request(req); - nfs_unlock_request(req); - } -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_need_commit(struct nfs_inode *nfsi) @@ -1495,13 +1482,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr pages = nfs_scan_commit(inode, &head, idx_start, npages); if (pages == 0) break; - if (how & FLUSH_INVALIDATE) { - spin_unlock(&inode->i_lock); - nfs_cancel_commit_list(&head); - ret = pages; - spin_lock(&inode->i_lock); - continue; - } pages += nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&inode->i_lock); ret = nfs_commit_list(inode, &head, how); @@ -1558,26 +1538,13 @@ int nfs_wb_nocommit(struct inode *inode) int nfs_wb_page_cancel(struct inode *inode, struct page *page) { struct nfs_page *req; - loff_t range_start = page_offset(page); - loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); - struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, - }; int ret = 0; BUG_ON(!PageLocked(page)); for (;;) { req = nfs_page_find_request(page); if (req == NULL) - goto out; - if (test_bit(PG_CLEAN, &req->wb_flags)) { - nfs_release_request(req); break; - } if (nfs_lock_request_dontget(req)) { nfs_inode_remove_request(req); /* @@ -1591,12 +1558,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret < 0) - goto out; + break; } - if (!PagePrivate(page)) - return 0; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); -out: return ret; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 309217f46e28..1083134c02ff 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -34,8 +34,6 @@ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ #define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */ -#define FLUSH_INVALIDATE 64 /* Invalidate the page cache */ -#define FLUSH_NOWRITEPAGE 128 /* Don't call writepage() */ #ifdef __KERNEL__ -- cgit v1.2.3 From acdc53b2146c7ee67feb1f02f7bc3020126514b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:26 -0800 Subject: NFS: Replace __nfs_write_mapping with sync_inode() Now that we have correct COMMIT semantics in writeback_single_inode, we can reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a call to filemap_write_and_wait(), which doesn't need to hold the inode->i_mutex. With that done, we can eliminate nfs_write_mapping() altogether. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 15 +++++---------- fs/nfs/write.c | 42 +++++------------------------------------- include/linux/nfs_fs.h | 2 -- 3 files changed, 10 insertions(+), 49 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aa5a831001ab..443772df9b17 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; - /* - * Flush out writes to the server in order to update c/mtime. - * - * Hold the i_mutex to suspend application writes temporarily; - * this prevents long-running writing applications from blocking - * nfs_wb_nocommit. - */ + /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { - mutex_lock(&inode->i_mutex); - nfs_wb_nocommit(inode); - mutex_unlock(&inode->i_mutex); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto out; } /* @@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); } +out: return err; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc7f5e9a23b4..0b323091b481 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1454,7 +1454,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr pgoff_t idx_start, idx_end; unsigned int npages = 0; LIST_HEAD(head); - int nocommit = how & FLUSH_NOCOMMIT; long pages, ret; /* FIXME */ @@ -1471,14 +1470,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr npages = 0; } } - how &= ~FLUSH_NOCOMMIT; spin_lock(&inode->i_lock); do { ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) continue; - if (nocommit) - break; pages = nfs_scan_commit(inode, &head, idx_start, npages); if (pages == 0) break; @@ -1492,47 +1488,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr return ret; } -static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) -{ - int ret; - - ret = nfs_writepages(mapping, wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, wbc, how); - if (ret < 0) - goto out; - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; -} - -/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ -static int nfs_write_mapping(struct address_space *mapping, int how) +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) { struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, }; - return __nfs_write_mapping(mapping, &wbc, how); -} - -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, 0); -} - -int nfs_wb_nocommit(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); + return sync_inode(inode, &wbc); } int nfs_wb_page_cancel(struct inode *inode, struct page *page) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1083134c02ff..93f439e7c5bf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -33,7 +33,6 @@ #define FLUSH_STABLE 4 /* commit to stable storage */ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ -#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */ #ifdef __KERNEL__ @@ -478,7 +477,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); */ extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -- cgit v1.2.3 From 7f2f12d963e7c33a93bfb0b22f0178eb1e6a4196 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:28 -0800 Subject: NFS: Simplify nfs_wb_page() Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 120 ++++++++++--------------------------------------- include/linux/nfs_fs.h | 1 - 2 files changed, 23 insertions(+), 98 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0b323091b481..53ff70e23993 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -502,44 +502,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) } #endif -/* - * Wait for a request to complete. - * - * Interruptible by fatal signals only. - */ -static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) -{ - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req; - pgoff_t idx_end, next; - unsigned int res = 0; - int error; - - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - - next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { - if (req->wb_index > idx_end) - break; - - next = req->wb_index + 1; - BUG_ON(!NFS_WBACK_BUSY(req)); - - kref_get(&req->wb_kref); - spin_unlock(&inode->i_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - spin_lock(&inode->i_lock); - if (error < 0) - return error; - res++; - } - return res; -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_need_commit(struct nfs_inode *nfsi) @@ -1432,7 +1394,7 @@ out_mark_dirty: return ret; } #else -static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +static int nfs_commit_inode(struct inode *inode, int how) { return 0; } @@ -1448,46 +1410,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) return nfs_commit_unstable_pages(inode, wbc); } -long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) -{ - struct inode *inode = mapping->host; - pgoff_t idx_start, idx_end; - unsigned int npages = 0; - LIST_HEAD(head); - long pages, ret; - - /* FIXME */ - if (wbc->range_cyclic) - idx_start = 0; - else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (idx_end > idx_start) { - pgoff_t l_npages = 1 + idx_end - idx_start; - npages = l_npages; - if (sizeof(npages) != sizeof(l_npages) && - (pgoff_t)npages != l_npages) - npages = 0; - } - } - spin_lock(&inode->i_lock); - do { - ret = nfs_wait_on_requests_locked(inode, idx_start, npages); - if (ret != 0) - continue; - pages = nfs_scan_commit(inode, &head, idx_start, npages); - if (pages == 0) - break; - pages += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&inode->i_lock); - ret = nfs_commit_list(inode, &head, how); - spin_lock(&inode->i_lock); - - } while (ret >= 0); - spin_unlock(&inode->i_lock); - return ret; -} - /* * flush the inode to disk. */ @@ -1531,45 +1453,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) return ret; } -static int nfs_wb_page_priority(struct inode *inode, struct page *page, - int how) +/* + * Write back all requests on one page - we do this before reading it. + */ +int nfs_wb_page(struct inode *inode, struct page *page) { loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, + .nr_to_write = 0, .range_start = range_start, .range_end = range_end, }; + struct nfs_page *req; + int need_commit; int ret; - do { + while(PagePrivate(page)) { if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; - } else if (!PagePrivate(page)) + } + req = nfs_find_and_lock_request(page); + if (!req) break; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); - if (ret < 0) + if (IS_ERR(req)) { + ret = PTR_ERR(req); goto out_error; - } while (PagePrivate(page)); + } + need_commit = test_bit(PG_CLEAN, &req->wb_flags); + nfs_clear_page_tag_locked(req); + if (need_commit) { + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (ret < 0) + goto out_error; + } + } return 0; out_error: - __mark_inode_dirty(inode, I_DIRTY_PAGES); return ret; } -/* - * Write back all requests on one page - we do this before reading it. - */ -int nfs_wb_page(struct inode *inode, struct page* page) -{ - return nfs_wb_page_priority(inode, page, FLUSH_STABLE); -} - #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 93f439e7c5bf..b789d85bff82 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -475,7 +475,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); * Try to write back everything synchronously (but check the * return value!) */ -extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -- cgit v1.2.3 From 5cf95214ccb915591e2214f81de4659302d3e452 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:29 -0800 Subject: NFS: Clean up nfs_sync_mapping Remove the redundant call to filemap_write_and_wait(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 443772df9b17..e8b41170d295 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -114,16 +114,12 @@ void nfs_clear_inode(struct inode *inode) */ int nfs_sync_mapping(struct address_space *mapping) { - int ret; + int ret = 0; - if (mapping->nrpages == 0) - return 0; - unmap_mapping_range(mapping, 0, 0, 0); - ret = filemap_write_and_wait(mapping); - if (ret != 0) - goto out; - ret = nfs_wb_all(mapping->host); -out: + if (mapping->nrpages != 0) { + unmap_mapping_range(mapping, 0, 0, 0); + ret = nfs_wb_all(mapping->host); + } return ret; } -- cgit v1.2.3 From 1cda707d52e51a6cafac0aef12d2bd7052d572e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:30 -0800 Subject: NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 41 +---------------------------------------- fs/nfs/symlink.c | 2 +- include/linux/nfs_fs.h | 1 - 4 files changed, 3 insertions(+), 43 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3c7f03b669fb..a1f6b4438fb1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->entry = &my_entry; nfs_block_sillyrename(dentry); - res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (res < 0) goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8b41170d295..dbaaf7d2a188 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -754,7 +754,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return __nfs_revalidate_inode(server, inode); } -static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) +static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -775,49 +775,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa return 0; } -static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) -{ - int ret = 0; - - mutex_lock(&inode->i_mutex); - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) { - ret = nfs_sync_mapping(mapping); - if (ret == 0) - ret = nfs_invalidate_mapping_nolock(inode, mapping); - } - mutex_unlock(&inode->i_mutex); - return ret; -} - -/** - * nfs_revalidate_mapping_nolock - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - */ -int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int ret = 0; - - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { - ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (ret < 0) - goto out; - } - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - ret = nfs_invalidate_mapping_nolock(inode, mapping); -out: - return ret; -} - /** * nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping - * - * This version of the function will take the inode->i_mutex and attempt to - * flush out all dirty data if it needs to invalidate the page cache. */ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 412738dbfbc7..2ea9e5c27e55 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) struct page *page; void *err; - err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping)); + err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) goto read_failed; page = read_cache_page(&inode->i_data, 0, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b789d85bff82..1a0b85aa151e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -347,7 +347,6 @@ extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); -extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); -- cgit v1.2.3