summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2020-11-05 10:45:21 -0500
committerDavid Sterba <dsterba@suse.com>2020-12-08 15:54:07 +0100
commite114c545bb699b2e97e8661d41f34a1651b43f50 (patch)
tree61a7c10304b731ebd5922e89605a3d971d76a58c /fs
parent3fbaf25817f7013fad3ccf76279f0bd5719a5205 (diff)
downloadlwn-e114c545bb699b2e97e8661d41f34a1651b43f50.tar.gz
lwn-e114c545bb699b2e97e8661d41f34a1651b43f50.zip
btrfs: set the lockdep class for extent buffers on creation
Both Filipe and Fedora QA recently hit the following lockdep splat: WARNING: possible recursive locking detected 5.10.0-0.rc1.20201028gited8780e3f2ec.57.fc34.x86_64 #1 Not tainted -------------------------------------------- rsync/2610 is trying to acquire lock: ffff89617ed48f20 (&eb->lock){++++}-{2:2}, at: btrfs_tree_read_lock_atomic+0x34/0x140 but task is already holding lock: ffff8961757b1130 (&eb->lock){++++}-{2:2}, at: btrfs_tree_read_lock_atomic+0x34/0x140 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&eb->lock); lock(&eb->lock); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by rsync/2610: #0: ffff896107212b90 (&type->i_mutex_dir_key#10){++++}-{3:3}, at: walk_component+0x10c/0x190 #1: ffff8961757b1130 (&eb->lock){++++}-{2:2}, at: btrfs_tree_read_lock_atomic+0x34/0x140 stack backtrace: CPU: 1 PID: 2610 Comm: rsync Not tainted 5.10.0-0.rc1.20201028gited8780e3f2ec.57.fc34.x86_64 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack+0x8b/0xb0 __lock_acquire.cold+0x12d/0x2a4 ? kvm_sched_clock_read+0x14/0x30 ? sched_clock+0x5/0x10 lock_acquire+0xc8/0x400 ? btrfs_tree_read_lock_atomic+0x34/0x140 ? read_block_for_search.isra.0+0xdd/0x320 _raw_read_lock+0x3d/0xa0 ? btrfs_tree_read_lock_atomic+0x34/0x140 btrfs_tree_read_lock_atomic+0x34/0x140 btrfs_search_slot+0x616/0x9a0 btrfs_lookup_dir_item+0x6c/0xb0 btrfs_lookup_dentry+0xa8/0x520 ? lockdep_init_map_waits+0x4c/0x210 btrfs_lookup+0xe/0x30 __lookup_slow+0x10f/0x1e0 walk_component+0x11b/0x190 path_lookupat+0x72/0x1c0 filename_lookup+0x97/0x180 ? strncpy_from_user+0x96/0x1e0 ? getname_flags.part.0+0x45/0x1a0 vfs_statx+0x64/0x100 ? lockdep_hardirqs_on_prepare+0xff/0x180 ? _raw_spin_unlock_irqrestore+0x41/0x50 __do_sys_newlstat+0x26/0x40 ? lockdep_hardirqs_on_prepare+0xff/0x180 ? syscall_enter_from_user_mode+0x27/0x80 ? syscall_enter_from_user_mode+0x27/0x80 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 I have also seen a report of lockdep complaining about the lock class that was looked up being the same as the lock class on the lock we were using, but I can't find the report. These are problems that occur because we do not have the lockdep class set on the extent buffer until _after_ we read the eb in properly. This is problematic for concurrent readers, because we will create the extent buffer, lock it, and then attempt to read the extent buffer. If a second thread comes in and tries to do a search down the same path they'll get the above lockdep splat because the class isn't set properly on the extent buffer. There was a good reason for this, we generally didn't know the real owner of the eb until we read it, specifically in refcounted roots. However now all refcounted roots have the same class name, so we no longer need to worry about this. For non-refcounted trees we know which root we're on based on the parent. Fix this by setting the lockdep class on the eb at creation time instead of read time. This will fix the splat and the weirdness where the class changes in the middle of locking the block. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c8
-rw-r--r--fs/btrfs/extent_io.c1
-rw-r--r--fs/btrfs/volumes.c1
4 files changed, 6 insertions, 7 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6abe3c8f025d..f0161d2ae2a4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -554,9 +554,6 @@ static int validate_extent_buffer(struct extent_buffer *eb)
goto out;
}
- btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
- eb, found_level);
-
csum_tree_block(eb, result);
if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0d1b12e5b166..56ea380f5a17 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4634,6 +4634,11 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return ERR_PTR(-EUCLEAN);
}
+ /*
+ * This needs to stay, because we could allocate a freed block from an
+ * old tree into a new tree, so we need to make sure this new block is
+ * set to the appropriate level and owner.
+ */
btrfs_set_buffer_lockdep_class(owner, buf, level);
__btrfs_tree_lock(buf, nest);
btrfs_clean_tree_block(buf);
@@ -5022,9 +5027,6 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
root->root_key.objectid, level - 1);
if (IS_ERR(next))
return PTR_ERR(next);
-
- btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
- level - 1);
reada = 1;
}
btrfs_tree_lock(next);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index bd0d4d195d76..ecca6d6ec90a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5205,6 +5205,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
return ERR_PTR(-ENOMEM);
+ btrfs_set_buffer_lockdep_class(owner_root, eb, level);
num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++, index++) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cf9a48e27ab0..998f25cddf2b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6908,7 +6908,6 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
if (IS_ERR(sb))
return PTR_ERR(sb);
set_extent_buffer_uptodate(sb);
- btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
/*
* The sb extent buffer is artificial and just used to read the system array.
* set_extent_buffer_uptodate() call does not properly mark all it's