diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-07-13 16:12:00 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:08 -0400 |
commit | 9f6e1f7bb0e136871159a71d2bcd31c5dd8cbe81 (patch) | |
tree | 96aa18224902e9320df210bbafbed5ae576a5909 /fs | |
parent | 8d3445878166ea726bc24326003ea7b9739cdc00 (diff) | |
download | lwn-9f6e1f7bb0e136871159a71d2bcd31c5dd8cbe81.tar.gz lwn-9f6e1f7bb0e136871159a71d2bcd31c5dd8cbe81.zip |
bcachefs: Fix an allocator shutdown deadlock
On fstest generic/388, we were seeing sporadic deadlocks in the
emergency shutdown, where we'd get stuck shutting down the allocator
because bch2_btree_update_start() -> bch2_btree_reserve_get() allocated
and then deallocated some btree nodes, putting them back on the
btree_reserve_cache, after the allocator shutdown code had already
cleared out that cache.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 37dadbae41e5..0b78fb9d3561 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -948,13 +948,6 @@ bch2_btree_update_start(struct btree_iter *iter, unsigned level, closure_init_stack(&cl); retry: - /* - * This check isn't necessary for correctness - it's just to potentially - * prevent us from doing a lot of work that'll end up being wasted: - */ - ret = bch2_journal_error(&c->journal); - if (ret) - return ERR_PTR(ret); /* * XXX: figure out how far we might need to split, @@ -995,6 +988,22 @@ retry: bch2_keylist_init(&as->new_keys, as->_new_keys); bch2_keylist_init(&as->parent_keys, as->inline_keys); + mutex_lock(&c->btree_interior_update_lock); + list_add_tail(&as->list, &c->btree_interior_update_list); + mutex_unlock(&c->btree_interior_update_lock); + + /* + * We don't want to allocate if we're in an error state, that can cause + * deadlock on emergency shutdown due to open buckets getting stuck in + * the btree_reserve_cache after allocator shutdown has cleared it out. + * This check needs to come after adding us to the btree_interior_update + * list but before calling bch2_btree_reserve_get, to synchronize with + * __bch2_fs_read_only(). + */ + ret = bch2_journal_error(&c->journal); + if (ret) + goto err; + ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, journal_flags|JOURNAL_RES_GET_NONBLOCK); @@ -1046,10 +1055,6 @@ retry: atomic64_read(&c->journal.seq), &as->journal, NULL); - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->list, &c->btree_interior_update_list); - mutex_unlock(&c->btree_interior_update_lock); - return as; err: bch2_btree_update_free(as); |