summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-12-07 23:28:26 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-01-01 11:47:40 -0500
commit27b2df982fa3343552e8a98a744581da69064207 (patch)
treefba3296b6d73abb39a85a89c1ce6916958e291a1
parent8c066edeb43b067badac984b6a0493d07d15c00a (diff)
downloadlwn-27b2df982fa3343552e8a98a744581da69064207.tar.gz
lwn-27b2df982fa3343552e8a98a744581da69064207.zip
bcachefs: Kill for_each_btree_key()
for_each_btree_key() handles transaction restarts, like for_each_btree_key2(), but only calls bch2_trans_begin() after a transaction restart - for_each_btree_key2() wraps every loop iteration in a transaction. The for_each_btree_key() behaviour is problematic when it leads to holding the SRCU lock that prevents key cache reclaim for an unbounded amount of time - there's no real need to keep it around. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/alloc_background.c31
-rw-r--r--fs/bcachefs/btree_gc.c37
-rw-r--r--fs/bcachefs/btree_iter.h2
-rw-r--r--fs/bcachefs/ec.c46
-rw-r--r--fs/bcachefs/fsck.c130
-rw-r--r--fs/bcachefs/inode.c38
-rw-r--r--fs/bcachefs/move.c11
-rw-r--r--fs/bcachefs/recovery.c3
-rw-r--r--fs/bcachefs/snapshot.c31
9 files changed, 152 insertions, 177 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index ad4ad795b3bc..7ebf5516266e 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -544,8 +544,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
u8 gen;
int ret;
- for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
/*
* Not a fsck error because this is checked/repaired by
* bch2_check_alloc_key() which runs later:
@@ -572,8 +572,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
}
g.v.gens[offset] = gen;
- }
- bch2_trans_iter_exit(trans, &iter);
+ 0;
+ }));
if (have_bucket_gens_key && !ret)
ret = commit_do(trans, NULL, NULL,
@@ -582,8 +582,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
bch2_trans_put(trans);
- if (ret)
- bch_err_fn(c, ret);
+ bch_err_fn(c, ret);
return ret;
}
@@ -601,8 +600,8 @@ int bch2_alloc_read(struct bch_fs *c)
const struct bch_bucket_gens *g;
u64 b;
- for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
@@ -624,13 +623,13 @@ int bch2_alloc_read(struct bch_fs *c)
b < min_t(u64, ca->mi.nbuckets, end);
b++)
*bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
- }
- bch2_trans_iter_exit(trans, &iter);
+ 0;
+ }));
} else {
struct bch_alloc_v4 a;
- for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
/*
* Not a fsck error because this is checked/repaired by
* bch2_check_alloc_key() which runs later:
@@ -641,16 +640,14 @@ int bch2_alloc_read(struct bch_fs *c)
ca = bch_dev_bkey_exists(c, k.k->p.inode);
*bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
- }
- bch2_trans_iter_exit(trans, &iter);
+ 0;
+ }));
}
bch2_trans_put(trans);
up_read(&c->gc_lock);
- if (ret)
- bch_err_fn(c, ret);
-
+ bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 5a7b72a461df..6a44427d2ab2 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1665,7 +1665,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
static int bch2_gc_reflink_start(struct bch_fs *c,
bool metadata_only)
{
- struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct reflink_gc *r;
@@ -1674,30 +1673,30 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
if (metadata_only)
return 0;
- trans = bch2_trans_get(c);
c->reflink_gc_nr = 0;
- for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
- const __le64 *refcount = bkey_refcount_c(k);
+ ret = bch2_trans_run(c,
+ for_each_btree_key2(trans, iter, BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ const __le64 *refcount = bkey_refcount_c(k);
- if (!refcount)
- continue;
+ if (!refcount)
+ continue;
- r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
- GFP_KERNEL);
- if (!r) {
- ret = -BCH_ERR_ENOMEM_gc_reflink_start;
- break;
- }
+ r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
+ GFP_KERNEL);
+ if (!r) {
+ ret = -BCH_ERR_ENOMEM_gc_reflink_start;
+ break;
+ }
- r->offset = k.k->p.offset;
- r->size = k.k->size;
- r->refcount = 0;
- }
- bch2_trans_iter_exit(trans, &iter);
+ r->offset = k.k->p.offset;
+ r->size = k.k->size;
+ r->refcount = 0;
+ 0;
+ })));
- bch2_trans_put(trans);
+ bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 83a4ff0b91e1..10d059cfd36d 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -777,7 +777,7 @@ transaction_restart: \
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
-#define for_each_btree_key(_trans, _iter, _btree_id, \
+#define for_each_btree_key_old(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index bc8b556f19a9..8e5237661cce 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1833,7 +1833,6 @@ void bch2_fs_ec_flush(struct bch_fs *c)
int bch2_stripes_read(struct bch_fs *c)
{
- struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
const struct bch_stripe *s;
@@ -1841,36 +1840,33 @@ int bch2_stripes_read(struct bch_fs *c)
unsigned i;
int ret;
- for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
- if (k.k->type != KEY_TYPE_stripe)
- continue;
-
- ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
- if (ret)
- break;
-
- s = bkey_s_c_to_stripe(k).v;
+ ret = bch2_trans_run(c,
+ for_each_btree_key2(trans, iter, BTREE_ID_stripes, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ if (k.k->type != KEY_TYPE_stripe)
+ continue;
- m = genradix_ptr(&c->stripes, k.k->p.offset);
- m->sectors = le16_to_cpu(s->sectors);
- m->algorithm = s->algorithm;
- m->nr_blocks = s->nr_blocks;
- m->nr_redundant = s->nr_redundant;
- m->blocks_nonempty = 0;
+ ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
+ if (ret)
+ break;
- for (i = 0; i < s->nr_blocks; i++)
- m->blocks_nonempty += !!stripe_blockcount_get(s, i);
+ s = bkey_s_c_to_stripe(k).v;
- bch2_stripes_heap_insert(c, m, k.k->p.offset);
- }
- bch2_trans_iter_exit(trans, &iter);
+ m = genradix_ptr(&c->stripes, k.k->p.offset);
+ m->sectors = le16_to_cpu(s->sectors);
+ m->algorithm = s->algorithm;
+ m->nr_blocks = s->nr_blocks;
+ m->nr_redundant = s->nr_redundant;
+ m->blocks_nonempty = 0;
- bch2_trans_put(trans);
+ for (i = 0; i < s->nr_blocks; i++)
+ m->blocks_nonempty += !!stripe_blockcount_get(s, i);
- if (ret)
- bch_err_fn(c, ret);
+ bch2_stripes_heap_insert(c, m, k.k->p.offset);
+ 0;
+ })));
+ bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index bc6b56628fdf..39f529eb4a4d 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -589,14 +589,13 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
- u32 restart_count = trans->restart_count;
int ret;
w->recalculate_sums = false;
w->inodes.nr = 0;
- for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
- BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->p.offset != inum)
break;
@@ -609,8 +608,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
return ret;
w->first_this_inode = true;
-
- return trans_was_restarted(trans, restart_count);
+ return 0;
}
static struct inode_walker_entry *
@@ -2146,19 +2144,14 @@ int bch2_check_directory_structure(struct bch_fs *c)
pathbuf path = { 0, };
int ret;
- for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN,
- BTREE_ITER_INTENT|
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ for_each_btree_key_old(trans, iter, BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (!bkey_is_inode(k.k))
continue;
- ret = bch2_inode_unpack(k, &u);
- if (ret) {
- /* Should have been caught earlier in fsck: */
- bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
- break;
- }
+ BUG_ON(bch2_inode_unpack(k, &u));
if (u.bi_flags & BCH_INODE_unlinked)
continue;
@@ -2170,6 +2163,7 @@ int bch2_check_directory_structure(struct bch_fs *c)
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
darray_exit(&path);
+
bch_err_fn(c, ret);
return ret;
}
@@ -2255,47 +2249,42 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
struct nlink_table *t,
u64 start, u64 *end)
{
- struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_inode_unpacked u;
- int ret = 0;
- for_each_btree_key(trans, iter, BTREE_ID_inodes,
- POS(0, start),
- BTREE_ITER_INTENT|
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- if (!bkey_is_inode(k.k))
- continue;
-
- /* Should never fail, checked by bch2_inode_invalid: */
- BUG_ON(bch2_inode_unpack(k, &u));
-
- /*
- * Backpointer and directory structure checks are sufficient for
- * directories, since they can't have hardlinks:
- */
- if (S_ISDIR(u.bi_mode))
- continue;
+ int ret = bch2_trans_run(c,
+ for_each_btree_key2(trans, iter, BTREE_ID_inodes,
+ POS(0, start),
+ BTREE_ITER_INTENT|
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ if (!bkey_is_inode(k.k))
+ continue;
- if (!u.bi_nlink)
- continue;
+ /* Should never fail, checked by bch2_inode_invalid: */
+ BUG_ON(bch2_inode_unpack(k, &u));
- ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
- if (ret) {
- *end = k.k->p.offset;
- ret = 0;
- break;
- }
+ /*
+ * Backpointer and directory structure checks are sufficient for
+ * directories, since they can't have hardlinks:
+ */
+ if (S_ISDIR(u.bi_mode))
+ continue;
- }
- bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
+ if (!u.bi_nlink)
+ continue;
- if (ret)
- bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
+ ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
+ if (ret) {
+ *end = k.k->p.offset;
+ ret = 0;
+ break;
+ }
+ 0;
+ })));
+ bch_err_fn(c, ret);
return ret;
}
@@ -2303,42 +2292,39 @@ noinline_for_stack
static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
u64 range_start, u64 range_end)
{
- struct btree_trans *trans = bch2_trans_get(c);
struct snapshots_seen s;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_dirent d;
- int ret;
snapshots_seen_init(&s);
- for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
- BTREE_ITER_INTENT|
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
- if (ret)
- break;
-
- switch (k.k->type) {
- case KEY_TYPE_dirent:
- d = bkey_s_c_to_dirent(k);
+ int ret = bch2_trans_run(c,
+ for_each_btree_key2(trans, iter, BTREE_ID_dirents, POS_MIN,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
+ if (ret)
+ break;
- if (d.v->d_type != DT_DIR &&
- d.v->d_type != DT_SUBVOL)
- inc_link(c, &s, links, range_start, range_end,
- le64_to_cpu(d.v->d_inum),
- bch2_snapshot_equiv(c, d.k->p.snapshot));
- break;
- }
- }
- bch2_trans_iter_exit(trans, &iter);
+ switch (k.k->type) {
+ case KEY_TYPE_dirent:
+ d = bkey_s_c_to_dirent(k);
- if (ret)
- bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
+ if (d.v->d_type != DT_DIR &&
+ d.v->d_type != DT_SUBVOL)
+ inc_link(c, &s, links, range_start, range_end,
+ le64_to_cpu(d.v->d_inum),
+ bch2_snapshot_equiv(c, d.k->p.snapshot));
+ break;
+ }
+ 0;
+ })));
- bch2_trans_put(trans);
snapshots_seen_exit(&s);
+
+ bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 0d2bdc7575a8..7ee9ac5e4479 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1168,29 +1168,29 @@ again:
* but we can't retry because the btree write buffer won't have been
* flushed and we'd spin:
*/
- for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
- BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- ret = commit_do(trans, NULL, NULL,
- BCH_TRANS_COMMIT_no_enospc|
- BCH_TRANS_COMMIT_lazy_rw,
- may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
- if (ret < 0)
- break;
-
- if (ret) {
- if (!test_bit(BCH_FS_rw, &c->flags)) {
- bch2_trans_unlock(trans);
- bch2_fs_lazy_rw(c);
- }
-
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
+ if (ret > 0) {
bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
- if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
- break;
+ /*
+ * We don't want to loop here: a transaction restart
+ * error here means we handled a transaction restart and
+ * we're actually done, but if we loop we'll retry the
+ * same key because the write buffer hasn't been flushed
+ * yet
+ */
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ ret = 0;
+ continue;
+ }
}
- }
- bch2_trans_iter_exit(trans, &iter);
+
+ ret;
+ }));
if (!ret && need_another_pass) {
ret = bch2_btree_write_buffer_flush_sync(trans);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 847ab0eba2da..b2985e730650 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -377,8 +377,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
io_opts->d.nr = 0;
- for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
- BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
if (k.k->p.offset != extent_k.k->p.inode)
break;
@@ -391,11 +391,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
- ret = darray_push(&io_opts->d, e);
- if (ret)
- break;
- }
- bch2_trans_iter_exit(trans, &iter);
+ darray_push(&io_opts->d, e);
+ }));
io_opts->cur_inum = extent_k.k->p.inode;
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 629ddbb5850f..ed366b35a1f2 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -534,7 +534,8 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
keys->gap = keys->nr;
set_bit(BCH_FS_may_go_rw, &c->flags);
- if (keys->nr || c->opts.fsck)
+
+ if (keys->nr || c->opts.fsck || !c->sb.clean)
return bch2_fs_read_write_early(c);
return 0;
}
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index b2d216fa7182..48307f79f6ad 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1410,19 +1410,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
- for_each_btree_key(trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k, ret) {
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k, ({
if (k.k->type != KEY_TYPE_snapshot)
continue;
snap = bkey_s_c_to_snapshot(k);
- if (BCH_SNAPSHOT_DELETED(snap.v)) {
- ret = snapshot_list_add(c, &deleted, k.k->p.offset);
- if (ret)
- break;
- }
- }
- bch2_trans_iter_exit(trans, &iter);
+ BCH_SNAPSHOT_DELETED(snap.v)
+ ? snapshot_list_add(c, &deleted, k.k->p.offset)
+ : 0;
+ }));
if (ret) {
bch_err_msg(c, ret, "walking snapshots");
@@ -1469,18 +1466,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
bch2_trans_unlock(trans);
down_write(&c->snapshot_create_lock);
- for_each_btree_key(trans, iter, BTREE_ID_snapshots,
- POS_MIN, 0, k, ret) {
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k, ({
u32 snapshot = k.k->p.offset;
u32 equiv = bch2_snapshot_equiv(c, snapshot);
- if (equiv != snapshot)
- snapshot_list_add(c, &deleted_interior, snapshot);
- }
- bch2_trans_iter_exit(trans, &iter);
+ equiv != snapshot
+ ? snapshot_list_add(c, &deleted_interior, snapshot)
+ : 0;
+ }));
- if (ret)
+ if (ret) {
+ bch_err_msg(c, ret, "walking snapshots");
goto err_create_lock;
+ }
/*
* Fixing children of deleted snapshots can't be done completely