summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
authorEric Sandeen <sandeen@redhat.com>2012-10-28 22:24:57 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2012-11-05 09:50:41 +0100
commit0d819068496793d2f8ef48a6fcec4c7d329d8b8a (patch)
tree4c99864fbba503190f68fc918ab1d7efc7422de9 /fs/ext4
parent5390967b2f98e6aa46f20eae09580e7db73826b9 (diff)
downloadlwn-0d819068496793d2f8ef48a6fcec4c7d329d8b8a.tar.gz
lwn-0d819068496793d2f8ef48a6fcec4c7d329d8b8a.zip
ext4: fix unjournaled inode bitmap modification
commit ffb5387e85d528fb6d0d924abfa3fbf0fc484071 upstream. commit 119c0d4460b001e44b41dcf73dc6ee794b98bd31 changed ext4_new_inode() such that the inode bitmap was being modified outside a transaction, which could lead to corruption, and was discovered when journal_checksum found a bad checksum in the journal during log replay. Nix ran into this when using the journal_async_commit mount option, which enables journal checksumming. The ensuing journal replay failures due to the bad checksums led to filesystem corruption reported as the now infamous "Apparent serious progressive ext4 data corruption bug" [ Changed by tytso to only call ext4_journal_get_write_access() only when we're fairly certain that we're going to allocate the inode. ] I've tested this by mounting with journal_checksum and running fsstress then dropping power; I've also tested by hacking DM to create snapshots w/o first quiescing, which allows me to test journal replay repeatedly w/o actually power-cycling the box. Without the patch I hit a journal checksum error every time. With this fix it survives many iterations. Reported-by: Nix <nix@esperi.org.uk> Signed-off-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ialloc.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0ee374d27fdb..556cc821652c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -697,6 +697,10 @@ repeat_in_this_group:
"inode=%lu", ino + 1);
continue;
}
+ BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+ if (err)
+ goto fail;
ext4_lock_group(sb, group);
ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
ext4_unlock_group(sb, group);
@@ -710,6 +714,11 @@ repeat_in_this_group:
goto out;
got:
+ BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+ if (err)
+ goto fail;
+
/* We may have to initialize the block bitmap if it isn't already */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -742,11 +751,6 @@ got:
goto fail;
}
- BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
- if (err)
- goto fail;
-
BUFFER_TRACE(group_desc_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, group_desc_bh);
if (err)
@@ -789,11 +793,6 @@ got:
ext4_unlock_group(sb, group);
}
- BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
- if (err)
- goto fail;
-
BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
if (err)