diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-12-07 23:28:26 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2024-01-01 11:47:40 -0500 |
commit | 27b2df982fa3343552e8a98a744581da69064207 (patch) | |
tree | fba3296b6d73abb39a85a89c1ce6916958e291a1 | |
parent | 8c066edeb43b067badac984b6a0493d07d15c00a (diff) | |
download | lwn-27b2df982fa3343552e8a98a744581da69064207.tar.gz lwn-27b2df982fa3343552e8a98a744581da69064207.zip |
bcachefs: Kill for_each_btree_key()
for_each_btree_key() handles transaction restarts, like
for_each_btree_key2(), but only calls bch2_trans_begin() after a
transaction restart - for_each_btree_key2() wraps every loop iteration
in a transaction.
The for_each_btree_key() behaviour is problematic when it leads to
holding the SRCU lock that prevents key cache reclaim for an unbounded
amount of time - there's no real need to keep it around.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/alloc_background.c | 31 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 37 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/ec.c | 46 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 130 | ||||
-rw-r--r-- | fs/bcachefs/inode.c | 38 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 11 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/snapshot.c | 31 |
9 files changed, 152 insertions, 177 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ad4ad795b3bc..7ebf5516266e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -544,8 +544,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) u8 gen; int ret; - for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: @@ -572,8 +572,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) } g.v.gens[offset] = gen; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); if (have_bucket_gens_key && !ret) ret = commit_do(trans, NULL, NULL, @@ -582,8 +582,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) bch2_trans_put(trans); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -601,8 +600,8 @@ int bch2_alloc_read(struct bch_fs *c) const struct bch_bucket_gens *g; u64 b; - for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_bucket_gens, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; @@ -624,13 +623,13 @@ int bch2_alloc_read(struct bch_fs *c) b < min_t(u64, ca->mi.nbuckets, end); b++) *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } else { struct bch_alloc_v4 a; - for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: @@ -641,16 +640,14 @@ int bch2_alloc_read(struct bch_fs *c) ca = bch_dev_bkey_exists(c, k.k->p.inode); *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } bch2_trans_put(trans); up_read(&c->gc_lock); - if (ret) - bch_err_fn(c, ret); - + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 5a7b72a461df..6a44427d2ab2 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1665,7 +1665,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) static int bch2_gc_reflink_start(struct bch_fs *c, bool metadata_only) { - struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct reflink_gc *r; @@ -1674,30 +1673,30 @@ static int bch2_gc_reflink_start(struct bch_fs *c, if (metadata_only) return 0; - trans = bch2_trans_get(c); c->reflink_gc_nr = 0; - for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - const __le64 *refcount = bkey_refcount_c(k); + ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, BTREE_ID_reflink, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ + const __le64 *refcount = bkey_refcount_c(k); - if (!refcount) - continue; + if (!refcount) + continue; - r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, - GFP_KERNEL); - if (!r) { - ret = -BCH_ERR_ENOMEM_gc_reflink_start; - break; - } + r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, + GFP_KERNEL); + if (!r) { + ret = -BCH_ERR_ENOMEM_gc_reflink_start; + break; + } - r->offset = k.k->p.offset; - r->size = k.k->size; - r->refcount = 0; - } - bch2_trans_iter_exit(trans, &iter); + r->offset = k.k->p.offset; + r->size = k.k->size; + r->refcount = 0; + 0; + }))); - bch2_trans_put(trans); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 83a4ff0b91e1..10d059cfd36d 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -777,7 +777,7 @@ transaction_restart: \ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_commit_flags))) -#define for_each_btree_key(_trans, _iter, _btree_id, \ +#define for_each_btree_key_old(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index bc8b556f19a9..8e5237661cce 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1833,7 +1833,6 @@ void bch2_fs_ec_flush(struct bch_fs *c) int bch2_stripes_read(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; const struct bch_stripe *s; @@ -1841,36 +1840,33 @@ int bch2_stripes_read(struct bch_fs *c) unsigned i; int ret; - for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - if (k.k->type != KEY_TYPE_stripe) - continue; - - ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL); - if (ret) - break; - - s = bkey_s_c_to_stripe(k).v; + ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, BTREE_ID_stripes, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ + if (k.k->type != KEY_TYPE_stripe) + continue; - m = genradix_ptr(&c->stripes, k.k->p.offset); - m->sectors = le16_to_cpu(s->sectors); - m->algorithm = s->algorithm; - m->nr_blocks = s->nr_blocks; - m->nr_redundant = s->nr_redundant; - m->blocks_nonempty = 0; + ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL); + if (ret) + break; - for (i = 0; i < s->nr_blocks; i++) - m->blocks_nonempty += !!stripe_blockcount_get(s, i); + s = bkey_s_c_to_stripe(k).v; - bch2_stripes_heap_insert(c, m, k.k->p.offset); - } - bch2_trans_iter_exit(trans, &iter); + m = genradix_ptr(&c->stripes, k.k->p.offset); + m->sectors = le16_to_cpu(s->sectors); + m->algorithm = s->algorithm; + m->nr_blocks = s->nr_blocks; + m->nr_redundant = s->nr_redundant; + m->blocks_nonempty = 0; - bch2_trans_put(trans); + for (i = 0; i < s->nr_blocks; i++) + m->blocks_nonempty += !!stripe_blockcount_get(s, i); - if (ret) - bch_err_fn(c, ret); + bch2_stripes_heap_insert(c, m, k.k->p.offset); + 0; + }))); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index bc6b56628fdf..39f529eb4a4d 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -589,14 +589,13 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - u32 restart_count = trans->restart_count; int ret; w->recalculate_sums = false; w->inodes.nr = 0; - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { if (k.k->p.offset != inum) break; @@ -609,8 +608,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, return ret; w->first_this_inode = true; - - return trans_was_restarted(trans, restart_count); + return 0; } static struct inode_walker_entry * @@ -2146,19 +2144,14 @@ int bch2_check_directory_structure(struct bch_fs *c) pathbuf path = { 0, }; int ret; - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + for_each_btree_key_old(trans, iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { if (!bkey_is_inode(k.k)) continue; - ret = bch2_inode_unpack(k, &u); - if (ret) { - /* Should have been caught earlier in fsck: */ - bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret); - break; - } + BUG_ON(bch2_inode_unpack(k, &u)); if (u.bi_flags & BCH_INODE_unlinked) continue; @@ -2170,6 +2163,7 @@ int bch2_check_directory_structure(struct bch_fs *c) bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); darray_exit(&path); + bch_err_fn(c, ret); return ret; } @@ -2255,47 +2249,42 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, struct nlink_table *t, u64 start, u64 *end) { - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bch_inode_unpacked u; - int ret = 0; - for_each_btree_key(trans, iter, BTREE_ID_inodes, - POS(0, start), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - if (!bkey_is_inode(k.k)) - continue; - - /* Should never fail, checked by bch2_inode_invalid: */ - BUG_ON(bch2_inode_unpack(k, &u)); - - /* - * Backpointer and directory structure checks are sufficient for - * directories, since they can't have hardlinks: - */ - if (S_ISDIR(u.bi_mode)) - continue; + int ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, BTREE_ID_inodes, + POS(0, start), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + if (!bkey_is_inode(k.k)) + continue; - if (!u.bi_nlink) - continue; + /* Should never fail, checked by bch2_inode_invalid: */ + BUG_ON(bch2_inode_unpack(k, &u)); - ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); - if (ret) { - *end = k.k->p.offset; - ret = 0; - break; - } + /* + * Backpointer and directory structure checks are sufficient for + * directories, since they can't have hardlinks: + */ + if (S_ISDIR(u.bi_mode)) + continue; - } - bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); + if (!u.bi_nlink) + continue; - if (ret) - bch_err(c, "error in fsck: btree error %i while walking inodes", ret); + ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); + if (ret) { + *end = k.k->p.offset; + ret = 0; + break; + } + 0; + }))); + bch_err_fn(c, ret); return ret; } @@ -2303,42 +2292,39 @@ noinline_for_stack static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { - struct btree_trans *trans = bch2_trans_get(c); struct snapshots_seen s; struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent d; - int ret; snapshots_seen_init(&s); - for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); - if (ret) - break; - - switch (k.k->type) { - case KEY_TYPE_dirent: - d = bkey_s_c_to_dirent(k); + int ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, BTREE_ID_dirents, POS_MIN, + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); + if (ret) + break; - if (d.v->d_type != DT_DIR && - d.v->d_type != DT_SUBVOL) - inc_link(c, &s, links, range_start, range_end, - le64_to_cpu(d.v->d_inum), - bch2_snapshot_equiv(c, d.k->p.snapshot)); - break; - } - } - bch2_trans_iter_exit(trans, &iter); + switch (k.k->type) { + case KEY_TYPE_dirent: + d = bkey_s_c_to_dirent(k); - if (ret) - bch_err(c, "error in fsck: btree error %i while walking dirents", ret); + if (d.v->d_type != DT_DIR && + d.v->d_type != DT_SUBVOL) + inc_link(c, &s, links, range_start, range_end, + le64_to_cpu(d.v->d_inum), + bch2_snapshot_equiv(c, d.k->p.snapshot)); + break; + } + 0; + }))); - bch2_trans_put(trans); snapshots_seen_exit(&s); + + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 0d2bdc7575a8..7ee9ac5e4479 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1168,29 +1168,29 @@ again: * but we can't retry because the btree write buffer won't have been * flushed and we'd spin: */ - for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, - may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass)); - if (ret < 0) - break; - - if (ret) { - if (!test_bit(BCH_FS_rw, &c->flags)) { - bch2_trans_unlock(trans); - bch2_fs_lazy_rw(c); - } - + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); + if (ret > 0) { bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); - if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - break; + /* + * We don't want to loop here: a transaction restart + * error here means we handled a transaction restart and + * we're actually done, but if we loop we'll retry the + * same key because the write buffer hasn't been flushed + * yet + */ + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + ret = 0; + continue; + } } - } - bch2_trans_iter_exit(trans, &iter); + + ret; + })); if (!ret && need_another_pass) { ret = bch2_btree_write_buffer_flush_sync(trans); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 847ab0eba2da..b2985e730650 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -377,8 +377,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, io_opts->d.nr = 0; - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), + BTREE_ITER_ALL_SNAPSHOTS, k, ({ if (k.k->p.offset != extent_k.k->p.inode) break; @@ -391,11 +391,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; bch2_inode_opts_get(&e.io_opts, trans->c, &inode); - ret = darray_push(&io_opts->d, e); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); + darray_push(&io_opts->d, e); + })); io_opts->cur_inum = extent_k.k->p.inode; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 629ddbb5850f..ed366b35a1f2 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -534,7 +534,8 @@ static int bch2_set_may_go_rw(struct bch_fs *c) keys->gap = keys->nr; set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr || c->opts.fsck) + + if (keys->nr || c->opts.fsck || !c->sb.clean) return bch2_fs_read_write_early(c); return 0; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b2d216fa7182..48307f79f6ad 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1410,19 +1410,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) goto err; } - for_each_btree_key(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, ({ if (k.k->type != KEY_TYPE_snapshot) continue; snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_DELETED(snap.v)) { - ret = snapshot_list_add(c, &deleted, k.k->p.offset); - if (ret) - break; - } - } - bch2_trans_iter_exit(trans, &iter); + BCH_SNAPSHOT_DELETED(snap.v) + ? snapshot_list_add(c, &deleted, k.k->p.offset) + : 0; + })); if (ret) { bch_err_msg(c, ret, "walking snapshots"); @@ -1469,18 +1466,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) bch2_trans_unlock(trans); down_write(&c->snapshot_create_lock); - for_each_btree_key(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, ({ u32 snapshot = k.k->p.offset; u32 equiv = bch2_snapshot_equiv(c, snapshot); - if (equiv != snapshot) - snapshot_list_add(c, &deleted_interior, snapshot); - } - bch2_trans_iter_exit(trans, &iter); + equiv != snapshot + ? snapshot_list_add(c, &deleted_interior, snapshot) + : 0; + })); - if (ret) + if (ret) { + bch_err_msg(c, ret, "walking snapshots"); goto err_create_lock; + } /* * Fixing children of deleted snapshots can't be done completely |