diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-08-21 14:29:43 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:40 -0400 |
commit | ca7d8fcabf29fae627babb72bda9b51763f9a145 (patch) | |
tree | ed2ff585e6590f8d16c911837b8e1e1b27e19e6b /fs | |
parent | 546180874ade7225676bc0cd5ea4e2388e2374bc (diff) | |
download | lwn-ca7d8fcabf29fae627babb72bda9b51763f9a145.tar.gz lwn-ca7d8fcabf29fae627babb72bda9b51763f9a145.zip |
bcachefs: New locking functions
In the future, with the new deadlock cycle detector, we won't be using
bare six_lock_* anymore: lock wait entries will all be embedded in
btree_trans, and we will need a btree_trans context whenever locking a
btree node.
This patch plumbs a btree_trans to the few places that need it, and adds
two new locking functions
- btree_node_lock_nopath, which may fail returning a transaction
restart, and
- btree_node_lock_nopath_nofail, to be used in places where we know we
cannot deadlock (i.e. because we're holding no other locks).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/btree_cache.c | 21 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 40 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 82 | ||||
-rw-r--r-- | fs/bcachefs/btree_locking.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_locking.h | 18 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 95 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 7 |
10 files changed, 182 insertions, 96 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index e09fbf36ebc2..a0e9e14e3fa5 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -959,12 +959,13 @@ lock_node: return b; } -struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, +struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, const struct bkey_i *k, enum btree_id btree_id, unsigned level, bool nofill) { + struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; @@ -998,9 +999,14 @@ retry: goto out; } else { lock_node: - ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k); - if (ret) - goto retry; + ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read); + if (unlikely(ret)) { + if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused)) + goto retry; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ERR_PTR(ret); + BUG(); + } if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.btree_id != btree_id || @@ -1062,8 +1068,9 @@ int bch2_btree_node_prefetch(struct bch_fs *c, return PTR_ERR_OR_ZERO(b); } -void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k) +void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) { + struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; @@ -1079,8 +1086,8 @@ wait_on_io: __bch2_btree_node_wait_on_read(b); __bch2_btree_node_wait_on_write(b); - six_lock_intent(&b->c.lock, NULL, NULL); - six_lock_write(&b->c.lock, NULL, NULL); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); if (btree_node_dirty(b)) { __bch2_btree_node_write(c, b, 0); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 83723805f12a..a4df3e866bb8 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -26,13 +26,13 @@ struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *, const struct bkey_i *, unsigned, enum six_lock_type, unsigned long); -struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, +struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *, enum btree_id, unsigned, bool); int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *, const struct bkey_i *, enum btree_id, unsigned); -void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *); +void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *); void bch2_fs_btree_cache_exit(struct bch_fs *); int bch2_fs_btree_cache_init(struct bch_fs *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 239eda57bf02..77a1fe81ac35 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -165,10 +165,11 @@ static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst) } } -static void bch2_btree_node_update_key_early(struct bch_fs *c, +static void bch2_btree_node_update_key_early(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_i *new) { + struct bch_fs *c = trans->c; struct btree *b; struct bkey_buf tmp; int ret; @@ -176,7 +177,7 @@ static void bch2_btree_node_update_key_early(struct bch_fs *c, bch2_bkey_buf_init(&tmp); bch2_bkey_buf_reassemble(&tmp, c, old); - b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true); + b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); if (!IS_ERR_OR_NULL(b)) { mutex_lock(&c->btree_cache.lock); @@ -352,8 +353,9 @@ fsck_err: return ret; } -static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b) +static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b) { + struct bch_fs *c = trans->c; struct btree_and_journal_iter iter; struct bkey_s_c k; struct bkey_buf prev_k, cur_k; @@ -378,7 +380,7 @@ again: bch2_btree_and_journal_iter_advance(&iter); bch2_bkey_buf_reassemble(&cur_k, c, k); - cur = bch2_btree_node_get_noiter(c, cur_k.k, + cur = bch2_btree_node_get_noiter(trans, cur_k.k, b->c.btree_id, b->c.level - 1, false); ret = PTR_ERR_OR_ZERO(cur); @@ -392,7 +394,7 @@ again: bch2_btree_ids[b->c.btree_id], b->c.level - 1, buf.buf)) { - bch2_btree_node_evict(c, cur_k.k); + bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); cur = NULL; @@ -411,7 +413,7 @@ again: if (ret == DROP_THIS_NODE) { six_unlock_read(&cur->c.lock); - bch2_btree_node_evict(c, cur_k.k); + bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); cur = NULL; @@ -425,7 +427,7 @@ again: prev = NULL; if (ret == DROP_PREV_NODE) { - bch2_btree_node_evict(c, prev_k.k); + bch2_btree_node_evict(trans, prev_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, prev_k.k->k.p); if (ret) @@ -465,7 +467,7 @@ again: bch2_bkey_buf_reassemble(&cur_k, c, k); bch2_btree_and_journal_iter_advance(&iter); - cur = bch2_btree_node_get_noiter(c, cur_k.k, + cur = bch2_btree_node_get_noiter(trans, cur_k.k, b->c.btree_id, b->c.level - 1, false); ret = PTR_ERR_OR_ZERO(cur); @@ -476,12 +478,12 @@ again: goto err; } - ret = bch2_btree_repair_topology_recurse(c, cur); + ret = bch2_btree_repair_topology_recurse(trans, cur); six_unlock_read(&cur->c.lock); cur = NULL; if (ret == DROP_THIS_NODE) { - bch2_btree_node_evict(c, cur_k.k); + bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); dropped_children = true; @@ -522,17 +524,20 @@ fsck_err: static int bch2_repair_topology(struct bch_fs *c) { + struct btree_trans trans; struct btree *b; unsigned i; int ret = 0; + bch2_trans_init(&trans, c, 0, 0); + for (i = 0; i < BTREE_ID_NR && !ret; i++) { b = c->btree_roots[i].b; if (btree_node_fake(b)) continue; - six_lock_read(&b->c.lock, NULL, NULL); - ret = bch2_btree_repair_topology_recurse(c, b); + btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + ret = bch2_btree_repair_topology_recurse(&trans, b); six_unlock_read(&b->c.lock); if (ret == DROP_THIS_NODE) { @@ -541,13 +546,16 @@ static int bch2_repair_topology(struct bch_fs *c) } } + bch2_trans_exit(&trans); + return ret; } -static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, +static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id, unsigned level, bool is_root, struct bkey_s_c *k) { + struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k); const union bch_extent_entry *entry; struct extent_ptr_decoded p = { 0 }; @@ -747,7 +755,7 @@ found: } if (level) - bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new); + bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new); if (c->opts.verbose) { printbuf_reset(&buf); @@ -788,7 +796,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, BUG_ON(bch2_journal_seq_verify && k->k->version.lo > atomic64_read(&c->journal.seq)); - ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k); + ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k); if (ret) goto err; @@ -941,7 +949,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b bch2_bkey_buf_reassemble(&cur, c, k); bch2_btree_and_journal_iter_advance(&iter); - child = bch2_btree_node_get_noiter(c, cur.k, + child = bch2_btree_node_get_noiter(trans, cur.k, b->c.btree_id, b->c.level - 1, false); ret = PTR_ERR_OR_ZERO(child); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index b3dc8b43298e..c63cb70836cc 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1652,9 +1652,15 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b) static void btree_node_write_done(struct bch_fs *c, struct btree *b) { - six_lock_read(&b->c.lock, NULL, NULL); + struct btree_trans trans; + + bch2_trans_init(&trans, c, 0, 0); + + btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); __btree_node_write_done(c, b); six_unlock_read(&b->c.lock); + + bch2_trans_exit(&trans); } static void btree_node_write_work(struct work_struct *work) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 7349c70f8445..38a66302d6e9 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -95,25 +95,14 @@ static void bkey_cached_free(struct btree_key_cache *bc, six_unlock_intent(&ck->c.lock); } -static void bkey_cached_free_fast(struct btree_key_cache *bc, - struct bkey_cached *ck) +static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, + struct bkey_cached *ck) { - struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); struct btree_key_cache_freelist *f; bool freed = false; BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); - ck->btree_trans_barrier_seq = - start_poll_synchronize_srcu(&c->btree_trans_barrier); - - list_del_init(&ck->list); - atomic_long_inc(&bc->nr_freed); - - kfree(ck->k); - ck->k = NULL; - ck->u64s = 0; - preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); @@ -138,13 +127,32 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc, list_move_tail(&ck->list, &bc->freed); mutex_unlock(&bc->lock); } +} + +static void bkey_cached_free_fast(struct btree_key_cache *bc, + struct bkey_cached *ck) +{ + struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); + + ck->btree_trans_barrier_seq = + start_poll_synchronize_srcu(&c->btree_trans_barrier); + + list_del_init(&ck->list); + atomic_long_inc(&bc->nr_freed); + + kfree(ck->k); + ck->k = NULL; + ck->u64s = 0; + + bkey_cached_move_to_freelist(bc, ck); six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); } static struct bkey_cached * -bkey_cached_alloc(struct btree_key_cache *c) +bkey_cached_alloc(struct btree_trans *trans, + struct btree_key_cache *c) { struct bkey_cached *ck = NULL; struct btree_key_cache_freelist *f; @@ -173,8 +181,21 @@ bkey_cached_alloc(struct btree_key_cache *c) } if (ck) { - six_lock_intent(&ck->c.lock, NULL, NULL); - six_lock_write(&ck->c.lock, NULL, NULL); + int ret; + + ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent); + if (unlikely(ret)) { + bkey_cached_move_to_freelist(c, ck); + return ERR_PTR(ret); + } + + ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_write); + if (unlikely(ret)) { + six_unlock_intent(&ck->c.lock); + bkey_cached_move_to_freelist(c, ck); + return ERR_PTR(ret); + } + return ck; } @@ -216,15 +237,18 @@ bkey_cached_reuse(struct btree_key_cache *c) } static struct bkey_cached * -btree_key_cache_create(struct bch_fs *c, +btree_key_cache_create(struct btree_trans *trans, enum btree_id btree_id, struct bpos pos) { + struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; struct bkey_cached *ck; bool was_new = true; - ck = bkey_cached_alloc(bc); + ck = bkey_cached_alloc(trans, bc); + if (unlikely(IS_ERR(ck))) + return ck; if (unlikely(!ck)) { ck = bkey_cached_reuse(bc); @@ -370,7 +394,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) { - ck = btree_key_cache_create(c, path->btree_id, path->pos); + ck = btree_key_cache_create(trans, path->btree_id, path->pos); ret = PTR_ERR_OR_ZERO(ck); if (ret) goto err; @@ -519,10 +543,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, evict: BUG_ON(!btree_node_intent_locked(c_iter.path, 0)); - mark_btree_node_unlocked(c_iter.path, 0); - c_iter.path->l[0].b = NULL; + /* + * XXX: holding a lock that is not marked in btree_trans, not + * ideal: + */ + six_lock_increment(&ck->c.lock, SIX_LOCK_intent); + bch2_trans_unlock(trans); - six_lock_write(&ck->c.lock, NULL, NULL); + /* Will not fail because we are holding no other locks: */ + btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_write); if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { clear_bit(BKEY_CACHED_DIRTY, &ck->flags); @@ -546,11 +575,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, struct bkey_cached *ck = container_of(pin, struct bkey_cached, journal); struct bkey_cached_key key; + struct btree_trans trans; + int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); int ret = 0; - int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + bch2_trans_init(&trans, c, 0, 0); - six_lock_read(&ck->c.lock, NULL, NULL); + btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read); key = ck->key; if (ck->journal.seq != seq || @@ -567,12 +598,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, } six_unlock_read(&ck->c.lock); - ret = bch2_trans_do(c, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, btree_key_cache_flush_pos(&trans, key, seq, BTREE_INSERT_JOURNAL_RECLAIM, false)); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + bch2_trans_exit(&trans); return ret; } diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 301311763d59..24d0ea903380 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -61,7 +61,7 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) * locked: */ six_lock_readers_add(&b->c.lock, -readers); - six_lock_write(&b->c.lock, NULL, NULL); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); six_lock_readers_add(&b->c.lock, readers); } diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index ab3161c1b1f4..32c28c1341e9 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -185,6 +185,24 @@ void bch2_btree_node_unlock_write(struct btree_trans *, /* lock: */ +static inline int __must_check +btree_node_lock_nopath(struct btree_trans *trans, + struct btree_bkey_cached_common *b, + enum six_lock_type type) +{ + six_lock_type(&b->lock, type, NULL, NULL); + return 0; +} + +static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans, + struct btree_bkey_cached_common *b, + enum six_lock_type type) +{ + int ret = btree_node_lock_nopath(trans, b, type); + + BUG_ON(ret); +} + static inline int btree_node_lock_type(struct btree_trans *trans, struct btree_path *path, struct btree_bkey_cached_common *b, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 1f5b98a3d0a2..6fe49766c6c8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -169,7 +169,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, BUG_ON(path->l[b->c.level].b == b && path->l[b->c.level].lock_seq == b->c.lock.state.seq); - six_lock_write(&b->c.lock, NULL, NULL); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); bch2_btree_node_hash_remove(&c->btree_cache, b); __btree_node_free(c, b); @@ -259,7 +259,9 @@ mem_alloc: return b; } -static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level) +static struct btree *bch2_btree_node_alloc(struct btree_update *as, + struct btree_trans *trans, + unsigned level) { struct bch_fs *c = as->c; struct btree *b; @@ -271,8 +273,8 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev b = p->b[--p->nr]; - six_lock_intent(&b->c.lock, NULL, NULL); - six_lock_write(&b->c.lock, NULL, NULL); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); set_btree_node_accessed(b); set_btree_node_dirty_acct(c, b); @@ -323,12 +325,13 @@ static void btree_set_max(struct btree *b, struct bpos pos) } struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as, + struct btree_trans *trans, struct btree *b, struct bkey_format format) { struct btree *n; - n = bch2_btree_node_alloc(as, b->c.level); + n = bch2_btree_node_alloc(as, trans, b->c.level); SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1); @@ -347,6 +350,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as, } static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as, + struct btree_trans *trans, struct btree *b) { struct bkey_format new_f = bch2_btree_calc_format(b); @@ -358,12 +362,13 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as, if (!bch2_btree_node_format_fits(as->c, b, &new_f)) new_f = b->format; - return __bch2_btree_node_alloc_replacement(as, b, new_f); + return __bch2_btree_node_alloc_replacement(as, trans, b, new_f); } -static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level) +static struct btree *__btree_root_alloc(struct btree_update *as, + struct btree_trans *trans, unsigned level) { - struct btree *b = bch2_btree_node_alloc(as, level); + struct btree *b = bch2_btree_node_alloc(as, trans, level); btree_set_min(b, POS_MIN); btree_set_max(b, SPOS_MAX); @@ -378,7 +383,7 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level) return b; } -static void bch2_btree_reserve_put(struct btree_update *as) +static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans) { struct bch_fs *c = as->c; struct prealloc_nodes *p; @@ -405,8 +410,8 @@ static void bch2_btree_reserve_put(struct btree_update *as) mutex_unlock(&c->btree_reserve_cache_lock); - six_lock_intent(&b->c.lock, NULL, NULL); - six_lock_write(&b->c.lock, NULL, NULL); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); __btree_node_free(c, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -460,7 +465,7 @@ err: /* Asynchronous interior node update machinery */ -static void bch2_btree_update_free(struct btree_update *as) +static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans) { struct bch_fs *c = as->c; @@ -473,7 +478,7 @@ static void bch2_btree_update_free(struct btree_update *as) bch2_journal_pin_drop(&c->journal, &as->journal); bch2_journal_pin_flush(&c->journal, &as->journal); bch2_disk_reservation_put(c, &as->disk_res); - bch2_btree_reserve_put(as); + bch2_btree_reserve_put(as, trans); bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total], as->start_time); @@ -551,12 +556,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, static void btree_update_nodes_written(struct btree_update *as) { struct bch_fs *c = as->c; - struct btree *b = as->b; + struct btree *b; struct btree_trans trans; u64 journal_seq = 0; unsigned i; int ret; + bch2_trans_init(&trans, c, 0, 512); /* * If we're already in an error state, it might be because a btree node * was never written, and we might be trying to free that same btree @@ -573,15 +579,16 @@ static void btree_update_nodes_written(struct btree_update *as) * on disk: */ for (i = 0; i < as->nr_old_nodes; i++) { - struct btree *old = as->old_nodes[i]; __le64 seq; - six_lock_read(&old->c.lock, NULL, NULL); - seq = old->data ? old->data->keys.seq : 0; - six_unlock_read(&old->c.lock); + b = as->old_nodes[i]; + + btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + seq = b->data ? b->data->keys.seq : 0; + six_unlock_read(&b->c.lock); if (seq == as->old_nodes_seq[i]) - wait_on_bit_io(&old->flags, BTREE_NODE_write_in_flight_inner, + wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner, TASK_UNINTERRUPTIBLE); } @@ -598,19 +605,19 @@ static void btree_update_nodes_written(struct btree_update *as) * journal reclaim does btree updates when flushing bkey_cached entries, * which may require allocations as well. */ - bch2_trans_init(&trans, c, 0, 512); ret = commit_do(&trans, &as->disk_res, &journal_seq, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_JOURNAL_RECLAIM| - JOURNAL_WATERMARK_reserved, - btree_update_nodes_written_trans(&trans, as)); - bch2_trans_exit(&trans); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_NOCHECK_RW| + BTREE_INSERT_JOURNAL_RECLAIM| + JOURNAL_WATERMARK_reserved, + btree_update_nodes_written_trans(&trans, as)); + bch2_trans_unlock(&trans); bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, "error %i in btree_update_nodes_written()", ret); err: - if (b) { + if (as->b) { + b = as->b; /* * @b is the node we did the final insert into: * @@ -623,8 +630,8 @@ err: * we're in journal error state: */ - six_lock_intent(&b->c.lock, NULL, NULL); - six_lock_write(&b->c.lock, NULL, NULL); + btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent); + btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write); mutex_lock(&c->btree_interior_update_lock); list_del(&as->write_blocked_list); @@ -681,7 +688,7 @@ err: for (i = 0; i < as->nr_new_nodes; i++) { b = as->new_nodes[i]; - six_lock_read(&b->c.lock, NULL, NULL); + btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); btree_node_write_if_need(c, b, SIX_LOCK_read); six_unlock_read(&b->c.lock); } @@ -689,7 +696,8 @@ err: for (i = 0; i < as->nr_open_buckets; i++) bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]); - bch2_btree_update_free(as); + bch2_btree_update_free(as, &trans); + bch2_trans_exit(&trans); } static void btree_interior_update_work(struct work_struct *work) @@ -936,7 +944,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as, as->nr_old_nodes++; } -static void bch2_btree_update_done(struct btree_update *as) +static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans) { struct bch_fs *c = as->c; u64 start_time = as->start_time; @@ -947,7 +955,7 @@ static void bch2_btree_update_done(struct btree_update *as) up_read(&as->c->gc_lock); as->took_gc_lock = false; - bch2_btree_reserve_put(as); + bch2_btree_reserve_put(as, trans); continue_at(&as->cl, btree_update_set_nodes_written, as->c->btree_interior_update_worker); @@ -1102,7 +1110,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, bch2_trans_verify_not_restarted(trans, restart_count); return as; err: - bch2_btree_update_free(as); + bch2_btree_update_free(as, trans); return ERR_PTR(ret); } @@ -1254,6 +1262,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as, * node) */ static struct btree *__btree_split_node(struct btree_update *as, + struct btree_trans *trans, struct btree *n1) { struct bkey_format_state s; @@ -1263,7 +1272,7 @@ static struct btree *__btree_split_node(struct btree_update *as, struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL; struct bpos n1_pos; - n2 = bch2_btree_node_alloc(as, n1->c.level); + n2 = bch2_btree_node_alloc(as, trans, n1->c.level); n2->data->max_key = n1->data->max_key; n2->data->format = n1->format; @@ -1427,7 +1436,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, bch2_btree_interior_update_will_free_node(as, b); - n1 = bch2_btree_node_alloc_replacement(as, b); + n1 = bch2_btree_node_alloc_replacement(as, trans, b); if (keys) btree_split_insert_keys(as, trans, path, n1, keys); @@ -1435,7 +1444,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) { trace_and_count(c, btree_node_split, c, b); - n2 = __btree_split_node(as, n1); + n2 = __btree_split_node(as, trans, n1); bch2_btree_build_aux_trees(n2); bch2_btree_build_aux_trees(n1); @@ -1457,7 +1466,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, if (!parent) { /* Depth increases, make a new root */ - n3 = __btree_root_alloc(as, b->c.level + 1); + n3 = __btree_root_alloc(as, trans, b->c.level + 1); n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; @@ -1622,7 +1631,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans, return PTR_ERR(as); btree_split(as, trans, path, b, NULL, flags); - bch2_btree_update_done(as); + bch2_btree_update_done(as, trans); for (l = path->level + 1; btree_path_node(path, l) && !ret; l++) ret = bch2_foreground_maybe_merge(trans, path, l, flags); @@ -1741,7 +1750,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_btree_interior_update_will_free_node(as, b); bch2_btree_interior_update_will_free_node(as, m); - n = bch2_btree_node_alloc(as, b->c.level); + n = bch2_btree_node_alloc(as, trans, b->c.level); SET_BTREE_NODE_SEQ(n->data, max(BTREE_NODE_SEQ(b->data), @@ -1788,7 +1797,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, six_unlock_intent(&n->c.lock); - bch2_btree_update_done(as); + bch2_btree_update_done(as, trans); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time); out: @@ -1822,7 +1831,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, bch2_btree_interior_update_will_free_node(as, b); - n = bch2_btree_node_alloc_replacement(as, b); + n = bch2_btree_node_alloc_replacement(as, trans, b); bch2_btree_update_add_new_node(as, n); bch2_btree_build_aux_trees(n); @@ -1847,7 +1856,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, bch2_btree_node_free_inmem(trans, b); six_unlock_intent(&n->c.lock); - bch2_btree_update_done(as); + bch2_btree_update_done(as, trans); out: bch2_btree_path_downgrade(trans, iter->path); return ret; diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index adfc6c24a7a4..7af810df8348 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -117,6 +117,7 @@ struct btree_update { }; struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, + struct btree_trans *, struct btree *, struct bkey_format); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a8306b16956d..d414cbefa3c9 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -169,10 +169,13 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, struct bch_fs *c = container_of(j, struct bch_fs, journal); struct btree_write *w = container_of(pin, struct btree_write, journal); struct btree *b = container_of(w, struct btree, writes[i]); + struct btree_trans trans; unsigned long old, new, v; unsigned idx = w - b->writes; - six_lock_read(&b->c.lock, NULL, NULL); + bch2_trans_init(&trans, c, 0, 0); + + btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); v = READ_ONCE(b->flags); do { @@ -188,6 +191,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, btree_node_write_if_need(c, b, SIX_LOCK_read); six_unlock_read(&b->c.lock); + + bch2_trans_exit(&trans); return 0; } |