diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2020-01-03 22:38:14 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:34 -0400 |
commit | 31ba2cd33037e1011947b7abbfd70921c735841d (patch) | |
tree | e384a35b2b00b5aa37b706a47c59cd1181ee2c3d | |
parent | 3e548da8f57ef41523f6f7fe72f812116af48ba1 (diff) | |
download | lwn-31ba2cd33037e1011947b7abbfd70921c735841d.tar.gz lwn-31ba2cd33037e1011947b7abbfd70921c735841d.zip |
bcachefs: Hacky fixes for device removal
The device remove test was sporadically failing, because we hadn't
finished dropping btree sector counts for the device when
bch2_replicas_gc2() was called - mainly due to in flight journal writes.
We don't yet have a good mechanism for flushing the counts that
correspend to open journal entries yet.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/migrate.c | 58 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 43 |
2 files changed, 50 insertions, 51 deletions
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 0e3f63c1d65c..1ef62a189e33 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -53,9 +53,6 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { if (!bch2_bkey_has_device(k, dev_idx)) { - ret = bch2_mark_bkey_replicas(c, k); - if (ret) - break; bch2_btree_iter_next(iter); continue; } @@ -129,34 +126,27 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) struct bkey_i_btree_ptr *new_key; retry: if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key), - dev_idx)) { - /* - * we might have found a btree node key we - * needed to update, and then tried to update it - * but got -EINTR after upgrading the iter, but - * then raced and the node is now gone: - */ - bch2_btree_iter_downgrade(iter); - - ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); - if (ret) - goto err; - } else { - bkey_copy(&tmp.k, &b->key); - new_key = bkey_i_to_btree_ptr(&tmp.k); - - ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i), - dev_idx, flags, true); - if (ret) - goto err; - - ret = bch2_btree_node_update_key(c, iter, b, new_key); - if (ret == -EINTR) { - b = bch2_btree_iter_peek_node(iter); - goto retry; - } - if (ret) - goto err; + dev_idx)) + continue; + + bkey_copy(&tmp.k, &b->key); + new_key = bkey_i_to_btree_ptr(&tmp.k); + + ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i), + dev_idx, flags, true); + if (ret) { + bch_err(c, "Cannot drop device without losing data"); + goto err; + } + + ret = bch2_btree_node_update_key(c, iter, b, new_key); + if (ret == -EINTR) { + b = bch2_btree_iter_peek_node(iter); + goto retry; + } + if (ret) { + bch_err(c, "Error updating btree node key: %i", ret); + goto err; } } bch2_trans_iter_free(&trans, iter); @@ -167,9 +157,10 @@ retry: closure_wait_event(&c->btree_interior_update_wait, !bch2_btree_interior_updates_nr_pending(c) || c->btree_roots_dirty); + if (c->btree_roots_dirty) + bch2_journal_meta(&c->journal); if (!bch2_btree_interior_updates_nr_pending(c)) break; - bch2_journal_meta(&c->journal); } ret = 0; @@ -184,6 +175,5 @@ err: int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags) { return bch2_dev_usrdata_drop(c, dev_idx, flags) ?: - bch2_dev_metadata_drop(c, dev_idx, flags) ?: - bch2_replicas_gc2(c); + bch2_dev_metadata_drop(c, dev_idx, flags); } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index cd02e5a5f305..586636a4c204 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1381,7 +1381,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) mutex_lock(&c->state_lock); - percpu_ref_put(&ca->ref); /* XXX */ + /* + * We consume a reference to ca->ref, regardless of whether we succeed + * or fail: + */ + percpu_ref_put(&ca->ref); if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) { bch_err(ca, "Cannot remove without losing data"); @@ -1390,11 +1394,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) __bch2_dev_read_only(c, ca); - /* - * XXX: verify that dev_idx is really not in use anymore, anywhere - * - * flag_data_bad() does not check btree pointers - */ ret = bch2_dev_data_drop(c, ca->dev_idx, flags); if (ret) { bch_err(ca, "Remove failed: error %i dropping data", ret); @@ -1407,17 +1406,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) goto err; } - data = bch2_dev_has_data(c, ca); - if (data) { - char data_has_str[100]; - - bch2_flags_to_text(&PBUF(data_has_str), - bch2_data_types, data); - bch_err(ca, "Remove failed, still has data (%s)", data_has_str); - ret = -EBUSY; - goto err; - } - ret = bch2_btree_delete_range(c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), POS(ca->dev_idx + 1, 0), @@ -1432,12 +1420,33 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) * (overwritten) keys that point to the device we're removing: */ bch2_journal_flush_all_pins(&c->journal); + /* + * hack to ensure bch2_replicas_gc2() clears out entries to this device + */ + bch2_journal_meta(&c->journal); ret = bch2_journal_error(&c->journal); if (ret) { bch_err(ca, "Remove failed, journal error"); goto err; } + ret = bch2_replicas_gc2(c); + if (ret) { + bch_err(ca, "Remove failed: error %i from replicas gc", ret); + goto err; + } + + data = bch2_dev_has_data(c, ca); + if (data) { + char data_has_str[100]; + + bch2_flags_to_text(&PBUF(data_has_str), + bch2_data_types, data); + bch_err(ca, "Remove failed, still has data (%s)", data_has_str); + ret = -EBUSY; + goto err; + } + __bch2_dev_offline(c, ca); mutex_lock(&c->sb_lock); |