summaryrefslogtreecommitdiff
path: root/fs/bcachefs/alloc_background.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-06-04 19:14:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-06-04 19:14:24 -0700
commitff0905bbf991f4337b5ebc19c0d43525ebb0d96b (patch)
treef1799243f7aee145176342f2eff90e7890662e3a /fs/bcachefs/alloc_background.c
parent16b70698aa3ae7888826d0c84567c72241cf6713 (diff)
parent3d11125ff624b540334f7134d98b94f3b980e85d (diff)
downloadlwn-ff0905bbf991f4337b5ebc19c0d43525ebb0d96b.tar.gz
lwn-ff0905bbf991f4337b5ebc19c0d43525ebb0d96b.zip
Merge tag 'bcachefs-2025-06-04' of git://evilpiepirate.org/bcachefs
Pull more bcachefs updates from Kent Overstreet: "More bcachefs updates: - More stack usage improvements (~600 bytes) - Define CLASS()es for some commonly used types, and convert most rcu_read_lock() uses to the new lock guards - New introspection: - Superblock error counters are now available in sysfs: previously, they were only visible with 'show-super', which doesn't provide a live view - New tracepoint, error_throw(), which is called any time we return an error and start to unwind - Repair - check_fix_ptrs() can now repair btree node roots - We can now repair when we've somehow ended up with the journal using a superblock bucket - Revert some leftovers from the aborted directory i_size feature, and add repair code: some userspace programs (e.g. sshfs) were getting confused It seems in 6.15 there's a bug where i_nlink on the vfs inode has been getting incorrectly set to 0, with some unfortunate results; list_journal analysis showed bch2_inode_rm() being called (by bch2_evict_inode()) when it clearly should not have been. - bch2_inode_rm() now runs "should we be deleting this inode?" checks that were previously only run when deleting unlinked inodes in recovery - check_subvol() was treating a dangling subvol (pointing to a missing root inode) like a dangling dirent, and deleting it. This was the really unfortunate one: check_subvol() will now recreate the root inode if necessary This took longer to debug than it should have, and we lost several filesystems unnecessarily, because users have been ignoring the release notes and blindly running 'fsck -y'. Debugging required reconstructing what happened through analyzing the journal, when ideally someone would have noticed 'hey, fsck is asking me if I want to repair this: it usually doesn't, maybe I should run this in dry run mode and check what's going on?' As a reminder, fsck errors are being marked as autofix once we've verified, in real world usage, that they're working correctly; blindly running 'fsck -y' on an experimental filesystem is playing with fire Up to this incident we've had an excellent track record of not losing data, so let's try to learn from this one This is a community effort, I wouldn't be able to get this done without the help of all the people QAing and providing excellent bug reports and feedback based on real world usage. But please don't ignore advice and expect me to pick up the pieces If an error isn't marked as autofix, and it /is/ happening in the wild, that's also something I need to know about so we can check it out and add it to the autofix list if repair looks good. I haven't been getting those reports, and I should be; since we don't have any sort of telemetry yet I am absolutely dependent on user reports Now I'll be spending the weekend working on new repair code to see if I can get a filesystem back for a user who didn't have backups" * tag 'bcachefs-2025-06-04' of git://evilpiepirate.org/bcachefs: (69 commits) bcachefs: add cond_resched() to handle_overwrites() bcachefs: Make journal read log message a bit quieter bcachefs: Fix subvol to missing root repair bcachefs: Run may_delete_deleted_inode() checks in bch2_inode_rm() bcachefs: delete dead code from may_delete_deleted_inode() bcachefs: Add flags to subvolume_to_text() bcachefs: Fix oops in btree_node_seq_matches() bcachefs: Fix dirent_casefold_mismatch repair bcachefs: Fix bch2_fsck_rename_dirent() for casefold bcachefs: Redo bch2_dirent_init_name() bcachefs: Fix -Wc23-extensions in bch2_check_dirents() bcachefs: Run check_dirents second time if required bcachefs: Run snapshot deletion out of system_long_wq bcachefs: Make check_key_has_snapshot safer bcachefs: BCH_RECOVERY_PASS_NO_RATELIMIT bcachefs: bch2_require_recovery_pass() bcachefs: bch_err_throw() bcachefs: Repair code for directory i_size bcachefs: Kill un-reverted directory i_size code bcachefs: Delete redundant fsck_err() ...
Diffstat (limited to 'fs/bcachefs/alloc_background.c')
-rw-r--r--fs/bcachefs/alloc_background.c79
1 files changed, 40 insertions, 39 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 173e81c2bbcb..b228a5a64479 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -21,7 +21,6 @@
#include "error.h"
#include "lru.h"
#include "recovery.h"
-#include "trace.h"
#include "varint.h"
#include <linux/kthread.h>
@@ -337,11 +336,10 @@ void bch2_alloc_v4_swab(struct bkey_s k)
a->stripe_sectors = swab32(a->stripe_sectors);
}
-void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c,
+ unsigned dev, const struct bch_alloc_v4 *a)
{
- struct bch_alloc_v4 _a;
- const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
- struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL;
+ struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, dev) : NULL;
prt_newline(out);
printbuf_indent_add(out, 2);
@@ -369,6 +367,19 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
bch2_dev_put(ca);
}
+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bch_alloc_v4 _a;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
+
+ __bch2_alloc_v4_to_text(out, c, k.k->p.inode, a);
+}
+
+void bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ __bch2_alloc_v4_to_text(out, c, k.k->p.inode, bkey_s_c_to_alloc_v4(k).v);
+}
+
void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
{
if (k.k->type == KEY_TYPE_alloc_v4) {
@@ -697,8 +708,8 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans,
set ? "" : "un",
bch2_btree_id_str(btree),
buf.buf);
- if (ret == -BCH_ERR_fsck_ignore ||
- ret == -BCH_ERR_fsck_errors_not_fixed)
+ if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) ||
+ bch2_err_matches(ret, BCH_ERR_fsck_errors_not_fixed))
ret = 0;
printbuf_exit(&buf);
@@ -854,7 +865,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
if (!ca)
- return -BCH_ERR_trigger_alloc;
+ return bch_err_throw(c, trigger_alloc);
struct bch_alloc_v4 old_a_convert;
const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
@@ -988,14 +999,11 @@ int bch2_trigger_alloc(struct btree_trans *trans,
}
if (new_a->gen != old_a->gen) {
- rcu_read_lock();
+ guard(rcu)();
u8 *gen = bucket_gen(ca, new.k->p.offset);
- if (unlikely(!gen)) {
- rcu_read_unlock();
+ if (unlikely(!gen))
goto invalid_bucket;
- }
*gen = new_a->gen;
- rcu_read_unlock();
}
#define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; })
@@ -1021,15 +1029,12 @@ int bch2_trigger_alloc(struct btree_trans *trans,
}
if ((flags & BTREE_TRIGGER_gc) && (flags & BTREE_TRIGGER_insert)) {
- rcu_read_lock();
+ guard(rcu)();
struct bucket *g = gc_bucket(ca, new.k->p.offset);
- if (unlikely(!g)) {
- rcu_read_unlock();
+ if (unlikely(!g))
goto invalid_bucket;
- }
g->gen_valid = 1;
g->gen = new_a->gen;
- rcu_read_unlock();
}
err:
fsck_err:
@@ -1039,7 +1044,7 @@ fsck_err:
invalid_bucket:
bch2_fs_inconsistent(c, "reference to invalid bucket\n%s",
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
- ret = -BCH_ERR_trigger_alloc;
+ ret = bch_err_throw(c, trigger_alloc);
goto err;
}
@@ -1105,13 +1110,12 @@ static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *buck
bucket->offset = 0;
}
- rcu_read_lock();
+ guard(rcu)();
*ca = __bch2_next_dev_idx(c, bucket->inode, NULL);
if (*ca) {
*bucket = POS((*ca)->dev_idx, (*ca)->mi.first_bucket);
bch2_dev_get(*ca);
}
- rcu_read_unlock();
return *ca != NULL;
}
@@ -1454,7 +1458,7 @@ delete:
ret = bch2_btree_bit_mod_iter(trans, iter, false) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_commit;
+ bch_err_throw(c, transaction_restart_commit);
goto out;
} else {
/*
@@ -1777,14 +1781,16 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
{
+ struct bch_fs *c = ca->fs;
int ret;
mutex_lock(&ca->discard_buckets_in_flight_lock);
- darray_for_each(ca->discard_buckets_in_flight, i)
- if (i->bucket == bucket) {
- ret = -BCH_ERR_EEXIST_discard_in_flight_add;
- goto out;
- }
+ struct discard_in_flight *i =
+ darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket);
+ if (i) {
+ ret = bch_err_throw(c, EEXIST_discard_in_flight_add);
+ goto out;
+ }
ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
.in_progress = in_progress,
@@ -1798,14 +1804,11 @@ out:
static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
{
mutex_lock(&ca->discard_buckets_in_flight_lock);
- darray_for_each(ca->discard_buckets_in_flight, i)
- if (i->bucket == bucket) {
- BUG_ON(!i->in_progress);
- darray_remove_item(&ca->discard_buckets_in_flight, i);
- goto found;
- }
- BUG();
-found:
+ struct discard_in_flight *i =
+ darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket);
+ BUG_ON(!i || !i->in_progress);
+
+ darray_remove_item(&ca->discard_buckets_in_flight, i);
mutex_unlock(&ca->discard_buckets_in_flight_lock);
}
@@ -2504,7 +2507,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
lockdep_assert_held(&c->state_lock);
- rcu_read_lock();
+ guard(rcu)();
for_each_member_device_rcu(c, ca, NULL) {
struct block_device *bdev = READ_ONCE(ca->disk_sb.bdev);
if (bdev)
@@ -2549,7 +2552,6 @@ void bch2_recalc_capacity(struct bch_fs *c)
bucket_size_max = max_t(unsigned, bucket_size_max,
ca->mi.bucket_size);
}
- rcu_read_unlock();
bch2_set_ra_pages(c, ra_pages);
@@ -2574,10 +2576,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c)
{
u64 ret = U64_MAX;
- rcu_read_lock();
+ guard(rcu)();
for_each_rw_member_rcu(c, ca)
ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
- rcu_read_unlock();
return ret;
}