diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-12-03 11:02:17 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-12-03 11:02:17 -0800 |
commit | feffde684ac29a3b7aec82d2df850fbdbdee55e4 (patch) | |
tree | 9c6ba0ad3c03b11d0cdf918f9fa28129f12b3907 | |
parent | 3d2469490912122b1e619c46b720d9cde047b2a7 (diff) | |
parent | 22d2e48e318564f8c9b09faf03ecb4f03fb44dd5 (diff) | |
download | lwn-feffde684ac29a3b7aec82d2df850fbdbdee55e4.tar.gz lwn-feffde684ac29a3b7aec82d2df850fbdbdee55e4.zip |
Merge tag 'for-6.13-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
- add lockdep annotations for io_uring/encoded read integration, inode
lock is held when returning to userspace
- properly reflect experimental config option to sysfs
- handle NULL root in case the rescue mode accepts invalid/damaged tree
roots (rescue=ibadroot)
- regression fix of a deadlock between transaction and extent locks
- fix pending bio accounting bug in encoded read ioctl
- fix NOWAIT mode when checking references for NOCOW files
- fix use-after-free in a rb-tree cleanup in ref-verify debugging tool
* tag 'for-6.13-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: fix lockdep warnings on io_uring encoded reads
btrfs: ref-verify: fix use-after-free after invalid ref action
btrfs: add a sanity check for btrfs root in btrfs_search_slot()
btrfs: don't loop for nowait writes when checking for cross references
btrfs: sysfs: advertise experimental features only if CONFIG_BTRFS_EXPERIMENTAL=y
btrfs: fix deadlock between transaction commits and extent locks
btrfs: fix use-after-free in btrfs_encoded_read_endio()
-rw-r--r-- | fs/btrfs/ctree.c | 6 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 18 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 10 | ||||
-rw-r--r-- | fs/btrfs/locking.h | 10 | ||||
-rw-r--r-- | fs/btrfs/ref-verify.c | 1 | ||||
-rw-r--r-- | fs/btrfs/sysfs.c | 4 |
7 files changed, 43 insertions, 8 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 148648ea1c8b..693dc27ffb89 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2046,7 +2046,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info; struct extent_buffer *b; int slot; int ret; @@ -2059,6 +2059,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, int min_write_lock_level; int prev_cmp; + if (!root) + return -EINVAL; + + fs_info = root->fs_info; might_sleep(); lowest_level = p->lowest_level; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 412e318e4a22..bd09dd3ad1a0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2422,7 +2422,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, goto out; ret = check_delayed_ref(root, path, objectid, offset, bytenr); - } while (ret == -EAGAIN); + } while (ret == -EAGAIN && !path->nowait); out: btrfs_release_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 03fe0de2cd0d..fa648ab6fe80 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3063,6 +3063,19 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent) goto out; } + /* + * If it's a COW write we need to lock the extent range as we will be + * inserting/replacing file extent items and unpinning an extent map. + * This must be taken before joining a transaction, as it's a higher + * level lock (like the inode's VFS lock), otherwise we can run into an + * ABBA deadlock with other tasks (transactions work like a lock, + * depending on their current state). + */ + if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { + clear_bits |= EXTENT_LOCKED; + lock_extent(io_tree, start, end, &cached_state); + } + if (freespace_inode) trans = btrfs_join_transaction_spacecache(root); else @@ -3099,9 +3112,6 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent) goto out; } - clear_bits |= EXTENT_LOCKED; - lock_extent(io_tree, start, end, &cached_state); - if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) compress_type = ordered_extent->compress_type; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { @@ -9089,7 +9099,7 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) */ WRITE_ONCE(priv->status, bbio->bio.bi_status); } - if (atomic_dec_return(&priv->pending) == 0) { + if (atomic_dec_and_test(&priv->pending)) { int err = blk_status_to_errno(READ_ONCE(priv->status)); if (priv->uring_ctx) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c9302d193187..3af8bb0c8d75 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4751,6 +4751,9 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss size_t page_offset; ssize_t ret; + /* The inode lock has already been acquired in btrfs_uring_read_extent. */ + btrfs_lockdep_inode_acquire(inode, i_rwsem); + if (priv->err) { ret = priv->err; goto out; @@ -4859,6 +4862,13 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter, * and inode and freeing the allocations. */ + /* + * We're returning to userspace with the inode lock held, and that's + * okay - it'll get unlocked in a worker thread. Call + * btrfs_lockdep_inode_release() to avoid confusing lockdep. + */ + btrfs_lockdep_inode_release(inode, i_rwsem); + return -EIOCBQUEUED; out_fail: diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 46c8be2afab1..35036b151bf5 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -129,6 +129,16 @@ enum btrfs_lockdep_trans_states { rwsem_release(&owner->lock##_map, _THIS_IP_) /* + * Used to account for the fact that when doing io_uring encoded I/O, we can + * return to userspace with the inode lock still held. + */ +#define btrfs_lockdep_inode_acquire(owner, lock) \ + rwsem_acquire_read(&owner->vfs_inode.lock.dep_map, 0, 0, _THIS_IP_) + +#define btrfs_lockdep_inode_release(owner, lock) \ + rwsem_release(&owner->vfs_inode.lock.dep_map, _THIS_IP_) + +/* * Macros for the transaction states wait events, similar to the generic wait * event macros. */ diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 9522a8b79d22..2928abf7eb82 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -857,6 +857,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, "dropping a ref for a root that doesn't have a ref on the block"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + rb_erase(&ref->node, &be->refs); kfree(ref); kfree(ra); goto out_unlock; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index b843308e2bc6..fdcbf650ac31 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -295,7 +295,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(simple_quota, SIMPLE_QUOTA); #ifdef CONFIG_BLK_DEV_ZONED BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED); #endif -#ifdef CONFIG_BTRFS_DEBUG +#ifdef CONFIG_BTRFS_EXPERIMENTAL /* Remove once support for extent tree v2 is feature complete */ BTRFS_FEAT_ATTR_INCOMPAT(extent_tree_v2, EXTENT_TREE_V2); /* Remove once support for raid stripe tree is feature complete. */ @@ -329,7 +329,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { #ifdef CONFIG_BLK_DEV_ZONED BTRFS_FEAT_ATTR_PTR(zoned), #endif -#ifdef CONFIG_BTRFS_DEBUG +#ifdef CONFIG_BTRFS_EXPERIMENTAL BTRFS_FEAT_ATTR_PTR(extent_tree_v2), BTRFS_FEAT_ATTR_PTR(raid_stripe_tree), #endif |