summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-08-21 14:29:43 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:40 -0400
commitca7d8fcabf29fae627babb72bda9b51763f9a145 (patch)
treeed2ff585e6590f8d16c911837b8e1e1b27e19e6b /fs/bcachefs
parent546180874ade7225676bc0cd5ea4e2388e2374bc (diff)
downloadlwn-ca7d8fcabf29fae627babb72bda9b51763f9a145.tar.gz
lwn-ca7d8fcabf29fae627babb72bda9b51763f9a145.zip
bcachefs: New locking functions
In the future, with the new deadlock cycle detector, we won't be using bare six_lock_* anymore: lock wait entries will all be embedded in btree_trans, and we will need a btree_trans context whenever locking a btree node. This patch plumbs a btree_trans to the few places that need it, and adds two new locking functions - btree_node_lock_nopath, which may fail returning a transaction restart, and - btree_node_lock_nopath_nofail, to be used in places where we know we cannot deadlock (i.e. because we're holding no other locks). Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/btree_cache.c21
-rw-r--r--fs/bcachefs/btree_cache.h4
-rw-r--r--fs/bcachefs/btree_gc.c40
-rw-r--r--fs/bcachefs/btree_io.c8
-rw-r--r--fs/bcachefs/btree_key_cache.c82
-rw-r--r--fs/bcachefs/btree_locking.c2
-rw-r--r--fs/bcachefs/btree_locking.h18
-rw-r--r--fs/bcachefs/btree_update_interior.c95
-rw-r--r--fs/bcachefs/btree_update_interior.h1
-rw-r--r--fs/bcachefs/btree_update_leaf.c7
10 files changed, 182 insertions, 96 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index e09fbf36ebc2..a0e9e14e3fa5 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -959,12 +959,13 @@ lock_node:
return b;
}
-struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
+struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
const struct bkey_i *k,
enum btree_id btree_id,
unsigned level,
bool nofill)
{
+ struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
struct bset_tree *t;
@@ -998,9 +999,14 @@ retry:
goto out;
} else {
lock_node:
- ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
- if (ret)
- goto retry;
+ ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
+ if (unlikely(ret)) {
+ if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
+ goto retry;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ERR_PTR(ret);
+ BUG();
+ }
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
b->c.btree_id != btree_id ||
@@ -1062,8 +1068,9 @@ int bch2_btree_node_prefetch(struct bch_fs *c,
return PTR_ERR_OR_ZERO(b);
}
-void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
+void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
{
+ struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
@@ -1079,8 +1086,8 @@ wait_on_io:
__bch2_btree_node_wait_on_read(b);
__bch2_btree_node_wait_on_write(b);
- six_lock_intent(&b->c.lock, NULL, NULL);
- six_lock_write(&b->c.lock, NULL, NULL);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b, 0);
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 83723805f12a..a4df3e866bb8 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -26,13 +26,13 @@ struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
const struct bkey_i *, unsigned,
enum six_lock_type, unsigned long);
-struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
+struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *,
enum btree_id, unsigned, bool);
int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *,
const struct bkey_i *, enum btree_id, unsigned);
-void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);
+void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
void bch2_fs_btree_cache_exit(struct bch_fs *);
int bch2_fs_btree_cache_init(struct bch_fs *);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 239eda57bf02..77a1fe81ac35 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -165,10 +165,11 @@ static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
}
}
-static void bch2_btree_node_update_key_early(struct bch_fs *c,
+static void bch2_btree_node_update_key_early(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_i *new)
{
+ struct bch_fs *c = trans->c;
struct btree *b;
struct bkey_buf tmp;
int ret;
@@ -176,7 +177,7 @@ static void bch2_btree_node_update_key_early(struct bch_fs *c,
bch2_bkey_buf_init(&tmp);
bch2_bkey_buf_reassemble(&tmp, c, old);
- b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true);
+ b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
if (!IS_ERR_OR_NULL(b)) {
mutex_lock(&c->btree_cache.lock);
@@ -352,8 +353,9 @@ fsck_err:
return ret;
}
-static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
{
+ struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bkey_buf prev_k, cur_k;
@@ -378,7 +380,7 @@ again:
bch2_btree_and_journal_iter_advance(&iter);
bch2_bkey_buf_reassemble(&cur_k, c, k);
- cur = bch2_btree_node_get_noiter(c, cur_k.k,
+ cur = bch2_btree_node_get_noiter(trans, cur_k.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(cur);
@@ -392,7 +394,7 @@ again:
bch2_btree_ids[b->c.btree_id],
b->c.level - 1,
buf.buf)) {
- bch2_btree_node_evict(c, cur_k.k);
+ bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
cur = NULL;
@@ -411,7 +413,7 @@ again:
if (ret == DROP_THIS_NODE) {
six_unlock_read(&cur->c.lock);
- bch2_btree_node_evict(c, cur_k.k);
+ bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
cur = NULL;
@@ -425,7 +427,7 @@ again:
prev = NULL;
if (ret == DROP_PREV_NODE) {
- bch2_btree_node_evict(c, prev_k.k);
+ bch2_btree_node_evict(trans, prev_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, prev_k.k->k.p);
if (ret)
@@ -465,7 +467,7 @@ again:
bch2_bkey_buf_reassemble(&cur_k, c, k);
bch2_btree_and_journal_iter_advance(&iter);
- cur = bch2_btree_node_get_noiter(c, cur_k.k,
+ cur = bch2_btree_node_get_noiter(trans, cur_k.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(cur);
@@ -476,12 +478,12 @@ again:
goto err;
}
- ret = bch2_btree_repair_topology_recurse(c, cur);
+ ret = bch2_btree_repair_topology_recurse(trans, cur);
six_unlock_read(&cur->c.lock);
cur = NULL;
if (ret == DROP_THIS_NODE) {
- bch2_btree_node_evict(c, cur_k.k);
+ bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
dropped_children = true;
@@ -522,17 +524,20 @@ fsck_err:
static int bch2_repair_topology(struct bch_fs *c)
{
+ struct btree_trans trans;
struct btree *b;
unsigned i;
int ret = 0;
+ bch2_trans_init(&trans, c, 0, 0);
+
for (i = 0; i < BTREE_ID_NR && !ret; i++) {
b = c->btree_roots[i].b;
if (btree_node_fake(b))
continue;
- six_lock_read(&b->c.lock, NULL, NULL);
- ret = bch2_btree_repair_topology_recurse(c, b);
+ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
+ ret = bch2_btree_repair_topology_recurse(&trans, b);
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
@@ -541,13 +546,16 @@ static int bch2_repair_topology(struct bch_fs *c)
}
}
+ bch2_trans_exit(&trans);
+
return ret;
}
-static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
+static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id,
unsigned level, bool is_root,
struct bkey_s_c *k)
{
+ struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p = { 0 };
@@ -747,7 +755,7 @@ found:
}
if (level)
- bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new);
+ bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
if (c->opts.verbose) {
printbuf_reset(&buf);
@@ -788,7 +796,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
BUG_ON(bch2_journal_seq_verify &&
k->k->version.lo > atomic64_read(&c->journal.seq));
- ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
+ ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
if (ret)
goto err;
@@ -941,7 +949,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
bch2_bkey_buf_reassemble(&cur, c, k);
bch2_btree_and_journal_iter_advance(&iter);
- child = bch2_btree_node_get_noiter(c, cur.k,
+ child = bch2_btree_node_get_noiter(trans, cur.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(child);
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index b3dc8b43298e..c63cb70836cc 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1652,9 +1652,15 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{
- six_lock_read(&b->c.lock, NULL, NULL);
+ struct btree_trans trans;
+
+ bch2_trans_init(&trans, c, 0, 0);
+
+ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
__btree_node_write_done(c, b);
six_unlock_read(&b->c.lock);
+
+ bch2_trans_exit(&trans);
}
static void btree_node_write_work(struct work_struct *work)
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 7349c70f8445..38a66302d6e9 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -95,25 +95,14 @@ static void bkey_cached_free(struct btree_key_cache *bc,
six_unlock_intent(&ck->c.lock);
}
-static void bkey_cached_free_fast(struct btree_key_cache *bc,
- struct bkey_cached *ck)
+static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
+ struct bkey_cached *ck)
{
- struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
struct btree_key_cache_freelist *f;
bool freed = false;
BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
- ck->btree_trans_barrier_seq =
- start_poll_synchronize_srcu(&c->btree_trans_barrier);
-
- list_del_init(&ck->list);
- atomic_long_inc(&bc->nr_freed);
-
- kfree(ck->k);
- ck->k = NULL;
- ck->u64s = 0;
-
preempt_disable();
f = this_cpu_ptr(bc->pcpu_freed);
@@ -138,13 +127,32 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc,
list_move_tail(&ck->list, &bc->freed);
mutex_unlock(&bc->lock);
}
+}
+
+static void bkey_cached_free_fast(struct btree_key_cache *bc,
+ struct bkey_cached *ck)
+{
+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
+
+ ck->btree_trans_barrier_seq =
+ start_poll_synchronize_srcu(&c->btree_trans_barrier);
+
+ list_del_init(&ck->list);
+ atomic_long_inc(&bc->nr_freed);
+
+ kfree(ck->k);
+ ck->k = NULL;
+ ck->u64s = 0;
+
+ bkey_cached_move_to_freelist(bc, ck);
six_unlock_write(&ck->c.lock);
six_unlock_intent(&ck->c.lock);
}
static struct bkey_cached *
-bkey_cached_alloc(struct btree_key_cache *c)
+bkey_cached_alloc(struct btree_trans *trans,
+ struct btree_key_cache *c)
{
struct bkey_cached *ck = NULL;
struct btree_key_cache_freelist *f;
@@ -173,8 +181,21 @@ bkey_cached_alloc(struct btree_key_cache *c)
}
if (ck) {
- six_lock_intent(&ck->c.lock, NULL, NULL);
- six_lock_write(&ck->c.lock, NULL, NULL);
+ int ret;
+
+ ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent);
+ if (unlikely(ret)) {
+ bkey_cached_move_to_freelist(c, ck);
+ return ERR_PTR(ret);
+ }
+
+ ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_write);
+ if (unlikely(ret)) {
+ six_unlock_intent(&ck->c.lock);
+ bkey_cached_move_to_freelist(c, ck);
+ return ERR_PTR(ret);
+ }
+
return ck;
}
@@ -216,15 +237,18 @@ bkey_cached_reuse(struct btree_key_cache *c)
}
static struct bkey_cached *
-btree_key_cache_create(struct bch_fs *c,
+btree_key_cache_create(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos pos)
{
+ struct bch_fs *c = trans->c;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bkey_cached *ck;
bool was_new = true;
- ck = bkey_cached_alloc(bc);
+ ck = bkey_cached_alloc(trans, bc);
+ if (unlikely(IS_ERR(ck)))
+ return ck;
if (unlikely(!ck)) {
ck = bkey_cached_reuse(bc);
@@ -370,7 +394,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
retry:
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
if (!ck) {
- ck = btree_key_cache_create(c, path->btree_id, path->pos);
+ ck = btree_key_cache_create(trans, path->btree_id, path->pos);
ret = PTR_ERR_OR_ZERO(ck);
if (ret)
goto err;
@@ -519,10 +543,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
evict:
BUG_ON(!btree_node_intent_locked(c_iter.path, 0));
- mark_btree_node_unlocked(c_iter.path, 0);
- c_iter.path->l[0].b = NULL;
+ /*
+ * XXX: holding a lock that is not marked in btree_trans, not
+ * ideal:
+ */
+ six_lock_increment(&ck->c.lock, SIX_LOCK_intent);
+ bch2_trans_unlock(trans);
- six_lock_write(&ck->c.lock, NULL, NULL);
+ /* Will not fail because we are holding no other locks: */
+ btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_write);
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
@@ -546,11 +575,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
+ struct btree_trans trans;
+ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
int ret = 0;
- int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ bch2_trans_init(&trans, c, 0, 0);
- six_lock_read(&ck->c.lock, NULL, NULL);
+ btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read);
key = ck->key;
if (ck->journal.seq != seq ||
@@ -567,12 +598,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
}
six_unlock_read(&ck->c.lock);
- ret = bch2_trans_do(c, NULL, NULL, 0,
+ ret = commit_do(&trans, NULL, NULL, 0,
btree_key_cache_flush_pos(&trans, key, seq,
BTREE_INSERT_JOURNAL_RECLAIM, false));
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+ bch2_trans_exit(&trans);
return ret;
}
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 301311763d59..24d0ea903380 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -61,7 +61,7 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
* locked:
*/
six_lock_readers_add(&b->c.lock, -readers);
- six_lock_write(&b->c.lock, NULL, NULL);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
six_lock_readers_add(&b->c.lock, readers);
}
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index ab3161c1b1f4..32c28c1341e9 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -185,6 +185,24 @@ void bch2_btree_node_unlock_write(struct btree_trans *,
/* lock: */
+static inline int __must_check
+btree_node_lock_nopath(struct btree_trans *trans,
+ struct btree_bkey_cached_common *b,
+ enum six_lock_type type)
+{
+ six_lock_type(&b->lock, type, NULL, NULL);
+ return 0;
+}
+
+static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
+ struct btree_bkey_cached_common *b,
+ enum six_lock_type type)
+{
+ int ret = btree_node_lock_nopath(trans, b, type);
+
+ BUG_ON(ret);
+}
+
static inline int btree_node_lock_type(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b,
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 1f5b98a3d0a2..6fe49766c6c8 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -169,7 +169,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
BUG_ON(path->l[b->c.level].b == b &&
path->l[b->c.level].lock_seq == b->c.lock.state.seq);
- six_lock_write(&b->c.lock, NULL, NULL);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
bch2_btree_node_hash_remove(&c->btree_cache, b);
__btree_node_free(c, b);
@@ -259,7 +259,9 @@ mem_alloc:
return b;
}
-static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
+static struct btree *bch2_btree_node_alloc(struct btree_update *as,
+ struct btree_trans *trans,
+ unsigned level)
{
struct bch_fs *c = as->c;
struct btree *b;
@@ -271,8 +273,8 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b = p->b[--p->nr];
- six_lock_intent(&b->c.lock, NULL, NULL);
- six_lock_write(&b->c.lock, NULL, NULL);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
set_btree_node_accessed(b);
set_btree_node_dirty_acct(c, b);
@@ -323,12 +325,13 @@ static void btree_set_max(struct btree *b, struct bpos pos)
}
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
+ struct btree_trans *trans,
struct btree *b,
struct bkey_format format)
{
struct btree *n;
- n = bch2_btree_node_alloc(as, b->c.level);
+ n = bch2_btree_node_alloc(as, trans, b->c.level);
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
@@ -347,6 +350,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
}
static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
+ struct btree_trans *trans,
struct btree *b)
{
struct bkey_format new_f = bch2_btree_calc_format(b);
@@ -358,12 +362,13 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
if (!bch2_btree_node_format_fits(as->c, b, &new_f))
new_f = b->format;
- return __bch2_btree_node_alloc_replacement(as, b, new_f);
+ return __bch2_btree_node_alloc_replacement(as, trans, b, new_f);
}
-static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
+static struct btree *__btree_root_alloc(struct btree_update *as,
+ struct btree_trans *trans, unsigned level)
{
- struct btree *b = bch2_btree_node_alloc(as, level);
+ struct btree *b = bch2_btree_node_alloc(as, trans, level);
btree_set_min(b, POS_MIN);
btree_set_max(b, SPOS_MAX);
@@ -378,7 +383,7 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
return b;
}
-static void bch2_btree_reserve_put(struct btree_update *as)
+static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans)
{
struct bch_fs *c = as->c;
struct prealloc_nodes *p;
@@ -405,8 +410,8 @@ static void bch2_btree_reserve_put(struct btree_update *as)
mutex_unlock(&c->btree_reserve_cache_lock);
- six_lock_intent(&b->c.lock, NULL, NULL);
- six_lock_write(&b->c.lock, NULL, NULL);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
__btree_node_free(c, b);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@@ -460,7 +465,7 @@ err:
/* Asynchronous interior node update machinery */
-static void bch2_btree_update_free(struct btree_update *as)
+static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans)
{
struct bch_fs *c = as->c;
@@ -473,7 +478,7 @@ static void bch2_btree_update_free(struct btree_update *as)
bch2_journal_pin_drop(&c->journal, &as->journal);
bch2_journal_pin_flush(&c->journal, &as->journal);
bch2_disk_reservation_put(c, &as->disk_res);
- bch2_btree_reserve_put(as);
+ bch2_btree_reserve_put(as, trans);
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
as->start_time);
@@ -551,12 +556,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
static void btree_update_nodes_written(struct btree_update *as)
{
struct bch_fs *c = as->c;
- struct btree *b = as->b;
+ struct btree *b;
struct btree_trans trans;
u64 journal_seq = 0;
unsigned i;
int ret;
+ bch2_trans_init(&trans, c, 0, 512);
/*
* If we're already in an error state, it might be because a btree node
* was never written, and we might be trying to free that same btree
@@ -573,15 +579,16 @@ static void btree_update_nodes_written(struct btree_update *as)
* on disk:
*/
for (i = 0; i < as->nr_old_nodes; i++) {
- struct btree *old = as->old_nodes[i];
__le64 seq;
- six_lock_read(&old->c.lock, NULL, NULL);
- seq = old->data ? old->data->keys.seq : 0;
- six_unlock_read(&old->c.lock);
+ b = as->old_nodes[i];
+
+ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
+ seq = b->data ? b->data->keys.seq : 0;
+ six_unlock_read(&b->c.lock);
if (seq == as->old_nodes_seq[i])
- wait_on_bit_io(&old->flags, BTREE_NODE_write_in_flight_inner,
+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
TASK_UNINTERRUPTIBLE);
}
@@ -598,19 +605,19 @@ static void btree_update_nodes_written(struct btree_update *as)
* journal reclaim does btree updates when flushing bkey_cached entries,
* which may require allocations as well.
*/
- bch2_trans_init(&trans, c, 0, 512);
ret = commit_do(&trans, &as->disk_res, &journal_seq,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_NOCHECK_RW|
- BTREE_INSERT_JOURNAL_RECLAIM|
- JOURNAL_WATERMARK_reserved,
- btree_update_nodes_written_trans(&trans, as));
- bch2_trans_exit(&trans);
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_JOURNAL_RECLAIM|
+ JOURNAL_WATERMARK_reserved,
+ btree_update_nodes_written_trans(&trans, as));
+ bch2_trans_unlock(&trans);
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"error %i in btree_update_nodes_written()", ret);
err:
- if (b) {
+ if (as->b) {
+ b = as->b;
/*
* @b is the node we did the final insert into:
*
@@ -623,8 +630,8 @@ err:
* we're in journal error state:
*/
- six_lock_intent(&b->c.lock, NULL, NULL);
- six_lock_write(&b->c.lock, NULL, NULL);
+ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
+ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write);
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
@@ -681,7 +688,7 @@ err:
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
- six_lock_read(&b->c.lock, NULL, NULL);
+ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
}
@@ -689,7 +696,8 @@ err:
for (i = 0; i < as->nr_open_buckets; i++)
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
- bch2_btree_update_free(as);
+ bch2_btree_update_free(as, &trans);
+ bch2_trans_exit(&trans);
}
static void btree_interior_update_work(struct work_struct *work)
@@ -936,7 +944,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
as->nr_old_nodes++;
}
-static void bch2_btree_update_done(struct btree_update *as)
+static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans)
{
struct bch_fs *c = as->c;
u64 start_time = as->start_time;
@@ -947,7 +955,7 @@ static void bch2_btree_update_done(struct btree_update *as)
up_read(&as->c->gc_lock);
as->took_gc_lock = false;
- bch2_btree_reserve_put(as);
+ bch2_btree_reserve_put(as, trans);
continue_at(&as->cl, btree_update_set_nodes_written,
as->c->btree_interior_update_worker);
@@ -1102,7 +1110,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
bch2_trans_verify_not_restarted(trans, restart_count);
return as;
err:
- bch2_btree_update_free(as);
+ bch2_btree_update_free(as, trans);
return ERR_PTR(ret);
}
@@ -1254,6 +1262,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
* node)
*/
static struct btree *__btree_split_node(struct btree_update *as,
+ struct btree_trans *trans,
struct btree *n1)
{
struct bkey_format_state s;
@@ -1263,7 +1272,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
struct bpos n1_pos;
- n2 = bch2_btree_node_alloc(as, n1->c.level);
+ n2 = bch2_btree_node_alloc(as, trans, n1->c.level);
n2->data->max_key = n1->data->max_key;
n2->data->format = n1->format;
@@ -1427,7 +1436,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, b);
- n1 = bch2_btree_node_alloc_replacement(as, b);
+ n1 = bch2_btree_node_alloc_replacement(as, trans, b);
if (keys)
btree_split_insert_keys(as, trans, path, n1, keys);
@@ -1435,7 +1444,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
trace_and_count(c, btree_node_split, c, b);
- n2 = __btree_split_node(as, n1);
+ n2 = __btree_split_node(as, trans, n1);
bch2_btree_build_aux_trees(n2);
bch2_btree_build_aux_trees(n1);
@@ -1457,7 +1466,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
if (!parent) {
/* Depth increases, make a new root */
- n3 = __btree_root_alloc(as, b->c.level + 1);
+ n3 = __btree_root_alloc(as, trans, b->c.level + 1);
n3->sib_u64s[0] = U16_MAX;
n3->sib_u64s[1] = U16_MAX;
@@ -1622,7 +1631,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
return PTR_ERR(as);
btree_split(as, trans, path, b, NULL, flags);
- bch2_btree_update_done(as);
+ bch2_btree_update_done(as, trans);
for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
ret = bch2_foreground_maybe_merge(trans, path, l, flags);
@@ -1741,7 +1750,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, b);
bch2_btree_interior_update_will_free_node(as, m);
- n = bch2_btree_node_alloc(as, b->c.level);
+ n = bch2_btree_node_alloc(as, trans, b->c.level);
SET_BTREE_NODE_SEQ(n->data,
max(BTREE_NODE_SEQ(b->data),
@@ -1788,7 +1797,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
six_unlock_intent(&n->c.lock);
- bch2_btree_update_done(as);
+ bch2_btree_update_done(as, trans);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
out:
@@ -1822,7 +1831,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, b);
- n = bch2_btree_node_alloc_replacement(as, b);
+ n = bch2_btree_node_alloc_replacement(as, trans, b);
bch2_btree_update_add_new_node(as, n);
bch2_btree_build_aux_trees(n);
@@ -1847,7 +1856,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
bch2_btree_node_free_inmem(trans, b);
six_unlock_intent(&n->c.lock);
- bch2_btree_update_done(as);
+ bch2_btree_update_done(as, trans);
out:
bch2_btree_path_downgrade(trans, iter->path);
return ret;
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index adfc6c24a7a4..7af810df8348 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -117,6 +117,7 @@ struct btree_update {
};
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
+ struct btree_trans *,
struct btree *,
struct bkey_format);
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index a8306b16956d..d414cbefa3c9 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -169,10 +169,13 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
+ struct btree_trans trans;
unsigned long old, new, v;
unsigned idx = w - b->writes;
- six_lock_read(&b->c.lock, NULL, NULL);
+ bch2_trans_init(&trans, c, 0, 0);
+
+ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
v = READ_ONCE(b->flags);
do {
@@ -188,6 +191,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
+
+ bch2_trans_exit(&trans);
return 0;
}