summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBernard Blackham <bernard@blackham.com.au>2005-04-16 15:25:45 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:25:45 -0700
commite072c6f2af57fb8ad9e0f29bfff3f79edf7bdd55 (patch)
tree9d72262a63754b39df4ebfed5bc74855f0408c3a
parent614a7d6a76b7fb37bb399047eb3ccf86cafbf60d (diff)
downloadlwn-e072c6f2af57fb8ad9e0f29bfff3f79edf7bdd55.tar.gz
lwn-e072c6f2af57fb8ad9e0f29bfff3f79edf7bdd55.zip
[PATCH] ext2 corruption - regression between 2.6.9 and 2.6.10
Whilst trying to stress test a Promise SX8 card, we stumbled across some nasty filesystem corruption in ext2. Our tests involved creating an ext2 partition, mounting, running several concurrent fsx's over it, umounting, and fsck'ing, all scripted[1]. The fsck would always return with errors. This regression was traced back to a change between 2.6.9 and 2.6.10, which moves the functionality of ext2_put_inode into ext2_clear_inode. The attached patch reverses this change, and eliminated the source of corruption. Mingming Cao <cmm@us.ibm.com> said: I think his patch for ext2 is correct. The corruption on ext3 is not the same issue he saw on ext2. I believe that's the race between discard reservation and reservation in-use that we already fixed it in 2.6.12- rc1. For the problem related to ext2, at the time when we design reservation for ext3, we decide we only need to discard the reservation at the last file close, so we have ext3_discard_reservation on iput_final- >ext3_clear_inode. The ext2 handle discard preallocation differently at that time, it discard the preallocation at each iput(), not in input_final(), so we think it's unnecessary to thrash it so frequently, and the right thing to do, as we did for ext3 reservation, discard preallocation on last iput(). So we moved the ext2_discard_preallocation from ext2_put_inode(0 to ext2_clear_inode. Since ext2 preallocation is doing pre-allocation on disk, so it is possible that at the unmount time, someone is still hold the reference of the inode, so the preallocation for a file is not discard yet, so we still mark those blocks allocated on disk, while they are not actually in the inode's block map, so fsck will catch/fix that error later. This is not a issue for ext3, as ext3 reservation(pre-allocation) is done in memory. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/inode.c13
-rw-r--r--fs/ext2/super.c4
3 files changed, 15 insertions, 3 deletions
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9f1a40e7945c..8f0fd726c3f1 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -116,6 +116,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
/* inode.c */
extern void ext2_read_inode (struct inode *);
extern int ext2_write_inode (struct inode *, int);
+extern void ext2_put_inode (struct inode *);
extern void ext2_delete_inode (struct inode *);
extern int ext2_sync_inode (struct inode *);
extern void ext2_discard_prealloc (struct inode *);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b890be022496..a50d9db4b6e4 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -53,6 +53,19 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
}
/*
+ * Called at each iput().
+ *
+ * The inode may be "bad" if ext2_read_inode() saw an error from
+ * ext2_get_inode(), so we need to check that to avoid freeing random disk
+ * blocks.
+ */
+void ext2_put_inode(struct inode *inode)
+{
+ if (!is_bad_inode(inode))
+ ext2_discard_prealloc(inode);
+}
+
+/*
* Called at the last iput() if i_nlink is zero.
*/
void ext2_delete_inode (struct inode * inode)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 37ca77a157ba..661c3d98d946 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -198,11 +198,8 @@ static void ext2_clear_inode(struct inode *inode)
ei->i_default_acl = EXT2_ACL_NOT_CACHED;
}
#endif
- if (!is_bad_inode(inode))
- ext2_discard_prealloc(inode);
}
-
#ifdef CONFIG_QUOTA
static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
@@ -213,6 +210,7 @@ static struct super_operations ext2_sops = {
.destroy_inode = ext2_destroy_inode,
.read_inode = ext2_read_inode,
.write_inode = ext2_write_inode,
+ .put_inode = ext2_put_inode,
.delete_inode = ext2_delete_inode,
.put_super = ext2_put_super,
.write_super = ext2_write_super,