summaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2011-10-31 20:52:39 -0400
committerChris Mason <chris.mason@oracle.com>2011-11-06 03:03:48 -0500
commite688b7252f784c2479d559f9f70ca8354752c5e7 (patch)
tree3934b0a9c348b2900e08e8fc9c0e6819e80d0fff /fs/btrfs/tree-log.c
parent1eae31e918972bbeefc119d23c1d67674f49a301 (diff)
downloadlwn-e688b7252f784c2479d559f9f70ca8354752c5e7.tar.gz
lwn-e688b7252f784c2479d559f9f70ca8354752c5e7.zip
Btrfs: fix extent pinning bugs in the tree log
The tree log had two important bugs that could cause corruptions after a crash. Sometimes we were allowing tree log blocks to be reused after the tree log was committed but before the transaction commit was done. This allowed a future metadata write to overwrite the tree log data. It is fixed by adding a new variant of freeing reserved extents that always pins them. Credit goes to Stefan Behrens and Arne Jansen for many many hours spent tracking this bug down. During tree log replay, we do a pass through the tree log and pin all the extents we find. This makes sure the replay code won't go in and use any of those blocks for new allocations during replay. The problem is the free space cache isn't honoring these pinned extents. So the allocator can end up handing them out, leading to all kinds of problems during replay. The fix here is to force any free space cache to load while we pin the extents, and then to make sure we remove the pinned extents from the free space rbtree. Signed-off-by: Chris Mason <chris.mason@oracle.com> Reported-by: Stefan Behrens <sbehrens@giantdisaster.de>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c11
1 files changed, 6 insertions, 5 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 310ab22cfe58..8ca1b6b83bd1 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -276,8 +276,9 @@ static int process_one_buffer(struct btrfs_root *log,
struct walk_control *wc, u64 gen)
{
if (wc->pin)
- btrfs_pin_extent(log->fs_info->extent_root,
- eb->start, eb->len, 0);
+ btrfs_pin_extent_for_log_replay(wc->trans,
+ log->fs_info->extent_root,
+ eb->start, eb->len);
if (btrfs_buffer_uptodate(eb, gen)) {
if (wc->write)
@@ -1760,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(root_owner !=
BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_reserved_extent(root,
+ ret = btrfs_free_and_pin_reserved_extent(root,
bytenr, blocksize);
BUG_ON(ret);
}
@@ -1828,7 +1829,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(next);
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_reserved_extent(root,
+ ret = btrfs_free_and_pin_reserved_extent(root,
path->nodes[*level]->start,
path->nodes[*level]->len);
BUG_ON(ret);
@@ -1897,7 +1898,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(log->root_key.objectid !=
BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_reserved_extent(log, next->start,
+ ret = btrfs_free_and_pin_reserved_extent(log, next->start,
next->len);
BUG_ON(ret);
}