summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-07-13 16:12:00 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:08 -0400
commit9f6e1f7bb0e136871159a71d2bcd31c5dd8cbe81 (patch)
tree96aa18224902e9320df210bbafbed5ae576a5909 /fs
parent8d3445878166ea726bc24326003ea7b9739cdc00 (diff)
downloadlwn-9f6e1f7bb0e136871159a71d2bcd31c5dd8cbe81.tar.gz
lwn-9f6e1f7bb0e136871159a71d2bcd31c5dd8cbe81.zip
bcachefs: Fix an allocator shutdown deadlock
On fstest generic/388, we were seeing sporadic deadlocks in the emergency shutdown, where we'd get stuck shutting down the allocator because bch2_btree_update_start() -> bch2_btree_reserve_get() allocated and then deallocated some btree nodes, putting them back on the btree_reserve_cache, after the allocator shutdown code had already cleared out that cache. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/btree_update_interior.c27
1 files changed, 16 insertions, 11 deletions
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 37dadbae41e5..0b78fb9d3561 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -948,13 +948,6 @@ bch2_btree_update_start(struct btree_iter *iter, unsigned level,
closure_init_stack(&cl);
retry:
- /*
- * This check isn't necessary for correctness - it's just to potentially
- * prevent us from doing a lot of work that'll end up being wasted:
- */
- ret = bch2_journal_error(&c->journal);
- if (ret)
- return ERR_PTR(ret);
/*
* XXX: figure out how far we might need to split,
@@ -995,6 +988,22 @@ retry:
bch2_keylist_init(&as->new_keys, as->_new_keys);
bch2_keylist_init(&as->parent_keys, as->inline_keys);
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->list, &c->btree_interior_update_list);
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ /*
+ * We don't want to allocate if we're in an error state, that can cause
+ * deadlock on emergency shutdown due to open buckets getting stuck in
+ * the btree_reserve_cache after allocator shutdown has cleared it out.
+ * This check needs to come after adding us to the btree_interior_update
+ * list but before calling bch2_btree_reserve_get, to synchronize with
+ * __bch2_fs_read_only().
+ */
+ ret = bch2_journal_error(&c->journal);
+ if (ret)
+ goto err;
+
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
BTREE_UPDATE_JOURNAL_RES,
journal_flags|JOURNAL_RES_GET_NONBLOCK);
@@ -1046,10 +1055,6 @@ retry:
atomic64_read(&c->journal.seq),
&as->journal, NULL);
- mutex_lock(&c->btree_interior_update_lock);
- list_add_tail(&as->list, &c->btree_interior_update_list);
- mutex_unlock(&c->btree_interior_update_lock);
-
return as;
err:
bch2_btree_update_free(as);