diff options
-rw-r--r-- | fs/bcachefs/btree_cache.c | 9 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.c | 72 | ||||
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_locking.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/trace.h | 43 |
6 files changed, 98 insertions, 38 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index c503d76bab3b..02a2f558cf4d 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -706,7 +706,8 @@ static int lock_node_check_fn(struct six_lock *lock, void *p) */ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, const struct bkey_i *k, unsigned level, - enum six_lock_type lock_type) + enum six_lock_type lock_type, + unsigned long trace_ip) { struct btree_cache *bc = &c->btree_cache; struct btree *b; @@ -768,7 +769,7 @@ lock_node: btree_node_unlock(iter, level + 1); if (!btree_node_lock(b, k->k.p, level, iter, lock_type, - lock_node_check_fn, (void *) k)) { + lock_node_check_fn, (void *) k, trace_ip)) { if (b->hash_val != btree_ptr_hash_val(k)) goto retry; return ERR_PTR(-EINTR); @@ -936,7 +937,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, bch2_bkey_unpack(parent, &tmp.k, k); ret = bch2_btree_node_get(c, iter, &tmp.k, level, - SIX_LOCK_intent); + SIX_LOCK_intent, _THIS_IP_); if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) { struct btree_iter *linked; @@ -956,7 +957,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, btree_node_unlock(iter, level); ret = bch2_btree_node_get(c, iter, &tmp.k, level, - SIX_LOCK_intent); + SIX_LOCK_intent, _THIS_IP_); /* * before btree_iter_relock() calls btree_iter_verify_locks(): diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index d0d3a85bb8be..8a19e60e9258 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -23,7 +23,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, const struct bkey_i *, unsigned, - enum six_lock_type); + enum six_lock_type, unsigned long); struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, enum btree_id, unsigned); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b561d0353d77..a76e13000d11 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -197,13 +197,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, unsigned level, struct btree_iter *iter, enum six_lock_type type, - six_lock_should_sleep_fn should_sleep_fn, - void *p) + six_lock_should_sleep_fn should_sleep_fn, void *p, + unsigned long ip) { struct btree_trans *trans = iter->trans; - struct btree_iter *linked; + struct btree_iter *linked, *deadlock_iter = NULL; u64 start_time = local_clock(); - bool ret = true; + unsigned reason = 9; /* Check if it's safe to block: */ trans_for_each_iter(trans, linked) { @@ -228,10 +228,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, linked->locks_want = max_t(unsigned, linked->locks_want, __fls(linked->nodes_locked) + 1); - if (!btree_iter_get_locks(linked, true, false)) - ret = false; + if (!btree_iter_get_locks(linked, true, false)) { + deadlock_iter = linked; + reason = 1; + } } else { - ret = false; + deadlock_iter = linked; + reason = 2; } } @@ -247,23 +250,30 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, max(level + 1, max_t(unsigned, linked->locks_want, iter->locks_want)); - if (!btree_iter_get_locks(linked, true, false)) - ret = false; + if (!btree_iter_get_locks(linked, true, false)) { + deadlock_iter = linked; + reason = 3; + } } else { - ret = false; + deadlock_iter = linked; + reason = 4; } } /* Must lock btree nodes in key order: */ if ((cmp_int(iter->btree_id, linked->btree_id) ?: - -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0) - ret = false; + -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0) { + deadlock_iter = linked; + reason = 5; + } if (iter->btree_id == linked->btree_id && btree_node_locked(linked, level) && bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b, - btree_iter_type(linked))) <= 0) - ret = false; + btree_iter_type(linked))) <= 0) { + deadlock_iter = linked; + reason = 6; + } /* * Recheck if this is a node we already have locked - since one @@ -277,8 +287,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, } } - if (unlikely(!ret)) { - trace_trans_restart_would_deadlock(iter->trans->ip); + if (unlikely(deadlock_iter)) { + trace_trans_restart_would_deadlock(iter->trans->ip, ip, + reason, + deadlock_iter->btree_id, + btree_iter_type(deadlock_iter), + iter->btree_id, + btree_iter_type(iter)); return false; } @@ -945,7 +960,8 @@ static int lock_root_check_fn(struct six_lock *lock, void *p) } static inline int btree_iter_lock_root(struct btree_iter *iter, - unsigned depth_want) + unsigned depth_want, + unsigned long trace_ip) { struct bch_fs *c = iter->trans->c; struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; @@ -974,7 +990,8 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, lock_type = __btree_lock_want(iter, iter->level); if (unlikely(!btree_node_lock(b, POS_MAX, iter->level, iter, lock_type, - lock_root_check_fn, rootp))) + lock_root_check_fn, rootp, + trace_ip))) return -EINTR; if (likely(b == READ_ONCE(*rootp) && @@ -1046,7 +1063,8 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, btree_node_unlock(iter, plevel); } -static __always_inline int btree_iter_down(struct btree_iter *iter) +static __always_inline int btree_iter_down(struct btree_iter *iter, + unsigned long trace_ip) { struct bch_fs *c = iter->trans->c; struct btree_iter_level *l = &iter->l[iter->level]; @@ -1060,7 +1078,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter) bch2_bkey_unpack(l->b, &tmp.k, bch2_btree_node_iter_peek(&l->iter, l->b)); - b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type); + b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, trace_ip); if (unlikely(IS_ERR(b))) return PTR_ERR(b); @@ -1084,7 +1102,7 @@ static void btree_iter_up(struct btree_iter *iter) btree_node_unlock(iter, iter->level++); } -static int btree_iter_traverse_one(struct btree_iter *); +static int btree_iter_traverse_one(struct btree_iter *, unsigned long); static int __btree_iter_traverse_all(struct btree_trans *trans, int ret) { @@ -1109,6 +1127,7 @@ retry_all: bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx); #undef btree_iter_cmp_by_idx bch2_trans_unlock(trans); + cond_resched(); if (unlikely(ret == -ENOMEM)) { struct closure cl; @@ -1139,7 +1158,7 @@ retry_all: if (!(trans->iters_linked & (1ULL << idx))) continue; - ret = btree_iter_traverse_one(&trans->iters[idx]); + ret = btree_iter_traverse_one(&trans->iters[idx], _THIS_IP_); if (ret) goto retry_all; } @@ -1202,7 +1221,8 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter, * On error, caller (peek_node()/peek_key()) must return NULL; the error is * stashed in the iterator and returned from bch2_trans_exit(). */ -static int btree_iter_traverse_one(struct btree_iter *iter) +static int btree_iter_traverse_one(struct btree_iter *iter, + unsigned long trace_ip) { unsigned depth_want = iter->level; @@ -1249,8 +1269,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter) */ while (iter->level > depth_want) { int ret = btree_iter_node(iter, iter->level) - ? btree_iter_down(iter) - : btree_iter_lock_root(iter, depth_want); + ? btree_iter_down(iter, trace_ip) + : btree_iter_lock_root(iter, depth_want, trace_ip); if (unlikely(ret)) { if (ret == 1) return 0; @@ -1281,7 +1301,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) int ret; ret = bch2_trans_cond_resched(trans) ?: - btree_iter_traverse_one(iter); + btree_iter_traverse_one(iter, _RET_IP_); if (unlikely(ret)) ret = __btree_iter_traverse_all(trans, ret); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 52b657030755..9a93b6d26878 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -242,7 +242,7 @@ retry: enum six_lock_type lock_want = __btree_lock_want(iter, 0); if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want, - bkey_cached_check_fn, iter)) { + bkey_cached_check_fn, iter, _THIS_IP_)) { if (ck->key.btree_id != iter->btree_id || bkey_cmp(ck->key.pos, iter->pos)) { goto retry; diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index ffee6f2d7d4b..cc07ef2938ae 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -175,13 +175,15 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned, struct btree_iter *, enum six_lock_type, - six_lock_should_sleep_fn, void *); + six_lock_should_sleep_fn, void *, + unsigned long); static inline bool btree_node_lock(struct btree *b, struct bpos pos, unsigned level, struct btree_iter *iter, enum six_lock_type type, - six_lock_should_sleep_fn should_sleep_fn, void *p) + six_lock_should_sleep_fn should_sleep_fn, void *p, + unsigned long ip) { struct btree_trans *trans = iter->trans; bool ret; @@ -199,7 +201,7 @@ static inline bool btree_node_lock(struct btree *b, ret = likely(six_trylock_type(&b->c.lock, type)) || btree_node_lock_increment(trans, b, level, type) || __bch2_btree_node_lock(b, pos, level, iter, type, - should_sleep_fn, p); + should_sleep_fn, p, ip); #ifdef CONFIG_BCACHEFS_DEBUG trans->locking = NULL; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 5c57b6efaaf3..c30fb4d74a95 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -536,9 +536,46 @@ DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused, TP_ARGS(ip) ); -DEFINE_EVENT(transaction_restart, trans_restart_would_deadlock, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) +TRACE_EVENT(trans_restart_would_deadlock, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + unsigned reason, + enum btree_id have_btree_id, + unsigned have_iter_type, + enum btree_id want_btree_id, + unsigned want_iter_type), + TP_ARGS(trans_ip, caller_ip, reason, + have_btree_id, have_iter_type, + want_btree_id, want_iter_type), + + TP_STRUCT__entry( + __field(unsigned long, trans_ip ) + __field(unsigned long, caller_ip ) + __field(u8, reason ) + __field(u8, have_btree_id ) + __field(u8, have_iter_type ) + __field(u8, want_btree_id ) + __field(u8, want_iter_type ) + ), + + TP_fast_assign( + __entry->trans_ip = trans_ip; + __entry->caller_ip = caller_ip; + __entry->reason = reason; + __entry->have_btree_id = have_btree_id; + __entry->have_iter_type = have_iter_type; + __entry->want_btree_id = want_btree_id; + __entry->want_iter_type = want_iter_type; + ), + + TP_printk("%pF %pF because %u have %u:%u want %u:%u", + (void *) __entry->trans_ip, + (void *) __entry->caller_ip, + __entry->reason, + __entry->have_btree_id, + __entry->have_iter_type, + __entry->want_btree_id, + __entry->want_iter_type) ); TRACE_EVENT(trans_restart_iters_realloced, |