summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2013-04-05 16:51:15 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-05-06 15:54:34 -0400
commit09a2a8f96e3009273bed1833b3f210e2c68728a5 (patch)
treef4742a6e962991e9f7b0252805186466f5a005c6 /fs/btrfs/inode.c
parentcc95bef635a649d595cf8d1cd4fcff5b6bf13023 (diff)
downloadlwn-09a2a8f96e3009273bed1833b3f210e2c68728a5.tar.gz
lwn-09a2a8f96e3009273bed1833b3f210e2c68728a5.zip
Btrfs: fix bad extent logging
A user sent me a btrfs-image of a file system that was panicing on mount during the log recovery. I had originally thought these problems were from a bug in the free space cache code, but that was just a symptom of the problem. The problem is if your application does something like this [prealloc][prealloc][prealloc] the internal extent maps will merge those all together into one extent map, even though on disk they are 3 separate extents. So if you go to write into one of these ranges the extent map will be right since we use the physical extent when doing the write, but when we log the extents they will use the wrong sizes for the remainder prealloc space. If this doesn't happen to trip up the free space cache (which it won't in a lot of cases) then you will get bogus entries in your extent tree which will screw stuff up later. The data and such will still work, but everything else is broken. This patch fixes this by not allowing extents that are on the modified list to be merged. This has the side effect that we are no longer adding everything to the modified list all the time, which means we now have to call btrfs_drop_extents every time we log an extent into the tree. So this allows me to drop all this speciality code I was using to get around calling btrfs_drop_extents. With this patch the testcase I've created no longer creates a bogus file system after replaying the log. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c34
1 files changed, 8 insertions, 26 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 456667b04fe6..c41637a1ed38 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -732,10 +732,7 @@ retry:
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -941,10 +938,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -1387,10 +1381,7 @@ out_check:
em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -4467,10 +4458,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
while (1) {
write_lock(&em_tree->lock);
- err = add_extent_mapping(em_tree, hole_em);
- if (!err)
- list_move(&hole_em->list,
- &em_tree->modified_extents);
+ err = add_extent_mapping(em_tree, hole_em, 1);
write_unlock(&em_tree->lock);
if (err != -EEXIST)
break;
@@ -5989,7 +5977,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree,
em->block_start += start_diff;
em->block_len -= start_diff;
}
- return add_extent_mapping(em_tree, em);
+ return add_extent_mapping(em_tree, em, 0);
}
static noinline int uncompress_inline(struct btrfs_path *path,
@@ -6283,7 +6271,7 @@ insert:
err = 0;
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
+ ret = add_extent_mapping(em_tree, em, 0);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
* an overlapping map exists in the tree
@@ -6706,10 +6694,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
btrfs_drop_extent_cache(inode, em->start,
em->start + em->len - 1, 0);
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
} while (ret == -EEXIST);
@@ -8593,10 +8578,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
if (ret != -EEXIST)
break;