summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJoseph Qi <joseph.qi@huawei.com>2015-09-04 15:43:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-04 16:54:41 -0700
commit512f62acbdf1ee81ce4882c85835f5420a1c304c (patch)
tree0c34ad3fd96408a4c572634a939cb2769f129bd2 /fs
parent81cf09edc793688cbf53c3082802571e2018f3ac (diff)
downloadlwn-512f62acbdf1ee81ce4882c85835f5420a1c304c.tar.gz
lwn-512f62acbdf1ee81ce4882c85835f5420a1c304c.zip
ocfs2: fix race between dio and recover orphan
During direct io the inode will be added to orphan first and then deleted from orphan. There is a race window that the orphan entry will be deleted twice and thus trigger the BUG when validating OCFS2_DIO_ORPHANED_FL in ocfs2_del_inode_from_orphan. ocfs2_direct_IO_write ... ocfs2_add_inode_to_orphan >>>>>>>> race window. 1) another node may rm the file and then down, this node take care of orphan recovery and clear flag OCFS2_DIO_ORPHANED_FL. 2) since rw lock is unlocked, it may race with another orphan recovery and append dio. ocfs2_del_inode_from_orphan So take inode mutex lock when recovering orphans and make rw unlock at the end of aio write in case of append dio. Signed-off-by: Joseph Qi <joseph.qi@huawei.com> Reported-by: Yiwen Jiang <jiangyiwen@huawei.com> Cc: Weiwei Wang <wangww631@huawei.com> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/aops.c9
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/namei.c42
-rw-r--r--fs/ocfs2/super.c2
6 files changed, 24 insertions, 41 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0f5fd9db8194..1e88ff483702 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -627,10 +627,13 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
}
- ocfs2_iocb_clear_rw_locked(iocb);
+ /* Let rw unlock to be done later to protect append direct io write */
+ if (offset + bytes <= i_size_read(inode)) {
+ ocfs2_iocb_clear_rw_locked(iocb);
- level = ocfs2_iocb_rw_locked_level(iocb);
- ocfs2_rw_unlock(inode, level);
+ level = ocfs2_iocb_rw_locked_level(iocb);
+ ocfs2_rw_unlock(inode, level);
+ }
}
static int ocfs2_releasepage(struct page *page, gfp_t wait)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2eb11363b1f7..5d384a6cd696 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2416,7 +2416,7 @@ relock:
}
no_sync:
- if (unaligned_dio) {
+ if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) {
ocfs2_iocb_clear_unaligned_aio(iocb);
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 5e86b247c821..ca3431ee7f24 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -81,8 +81,6 @@ struct ocfs2_inode_info
tid_t i_sync_tid;
tid_t i_datasync_tid;
- wait_queue_head_t append_dio_wq;
-
struct dquot *i_dquot[MAXQUOTAS];
};
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 7c099f7032fd..5e5626884433 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -2170,6 +2170,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
iter = oi->ip_next_orphan;
oi->ip_next_orphan = NULL;
+ mutex_lock(&inode->i_mutex);
ret = ocfs2_rw_lock(inode, 1);
if (ret < 0) {
mlog_errno(ret);
@@ -2206,17 +2207,16 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0);
if (ret)
mlog_errno(ret);
-
- wake_up(&OCFS2_I(inode)->append_dio_wq);
} /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */
unlock_inode:
ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ di_bh = NULL;
unlock_rw:
ocfs2_rw_unlock(inode, 1);
next:
+ mutex_unlock(&inode->i_mutex);
iput(inode);
- brelse(di_bh);
- di_bh = NULL;
inode = iter;
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 948681e37cfd..e9ea7f23da12 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2601,27 +2601,6 @@ leave:
return status;
}
-static int ocfs2_dio_orphan_recovered(struct inode *inode)
-{
- int ret;
- struct buffer_head *di_bh = NULL;
- struct ocfs2_dinode *di = NULL;
-
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (ret < 0) {
- mlog_errno(ret);
- return 0;
- }
-
- di = (struct ocfs2_dinode *) di_bh->b_data;
- ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
- ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
-
- return ret;
-}
-
-#define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000
int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
struct inode *inode)
{
@@ -2633,7 +2612,6 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
handle_t *handle = NULL;
struct ocfs2_dinode *di = NULL;
-restart:
status = ocfs2_inode_lock(inode, &di_bh, 1);
if (status < 0) {
mlog_errno(status);
@@ -2643,15 +2621,21 @@ restart:
di = (struct ocfs2_dinode *) di_bh->b_data;
/*
* Another append dio crashed?
- * If so, wait for recovery first.
+ * If so, manually recover it first.
*/
if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
- ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
- wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq,
- ocfs2_dio_orphan_recovered(inode),
- msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL));
- goto restart;
+ status = ocfs2_truncate_file(inode, di_bh, i_size_read(inode));
+ if (status < 0) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ goto bail_unlock_inode;
+ }
+
+ status = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_unlock_inode;
+ }
}
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 403c5660b306..4474ef2bbc96 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1746,8 +1746,6 @@ static void ocfs2_inode_init_once(void *data)
ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
ocfs2_lock_res_init_once(&oi->ip_open_lockres);
- init_waitqueue_head(&oi->append_dio_wq);
-
ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
&ocfs2_inode_caching_ops);