summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2020-01-03 22:38:14 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:34 -0400
commit31ba2cd33037e1011947b7abbfd70921c735841d (patch)
treee384a35b2b00b5aa37b706a47c59cd1181ee2c3d
parent3e548da8f57ef41523f6f7fe72f812116af48ba1 (diff)
downloadlwn-31ba2cd33037e1011947b7abbfd70921c735841d.tar.gz
lwn-31ba2cd33037e1011947b7abbfd70921c735841d.zip
bcachefs: Hacky fixes for device removal
The device remove test was sporadically failing, because we hadn't finished dropping btree sector counts for the device when bch2_replicas_gc2() was called - mainly due to in flight journal writes. We don't yet have a good mechanism for flushing the counts that correspend to open journal entries yet. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/migrate.c58
-rw-r--r--fs/bcachefs/super.c43
2 files changed, 50 insertions, 51 deletions
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 0e3f63c1d65c..1ef62a189e33 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -53,9 +53,6 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) {
if (!bch2_bkey_has_device(k, dev_idx)) {
- ret = bch2_mark_bkey_replicas(c, k);
- if (ret)
- break;
bch2_btree_iter_next(iter);
continue;
}
@@ -129,34 +126,27 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
struct bkey_i_btree_ptr *new_key;
retry:
if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
- dev_idx)) {
- /*
- * we might have found a btree node key we
- * needed to update, and then tried to update it
- * but got -EINTR after upgrading the iter, but
- * then raced and the node is now gone:
- */
- bch2_btree_iter_downgrade(iter);
-
- ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
- if (ret)
- goto err;
- } else {
- bkey_copy(&tmp.k, &b->key);
- new_key = bkey_i_to_btree_ptr(&tmp.k);
-
- ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i),
- dev_idx, flags, true);
- if (ret)
- goto err;
-
- ret = bch2_btree_node_update_key(c, iter, b, new_key);
- if (ret == -EINTR) {
- b = bch2_btree_iter_peek_node(iter);
- goto retry;
- }
- if (ret)
- goto err;
+ dev_idx))
+ continue;
+
+ bkey_copy(&tmp.k, &b->key);
+ new_key = bkey_i_to_btree_ptr(&tmp.k);
+
+ ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i),
+ dev_idx, flags, true);
+ if (ret) {
+ bch_err(c, "Cannot drop device without losing data");
+ goto err;
+ }
+
+ ret = bch2_btree_node_update_key(c, iter, b, new_key);
+ if (ret == -EINTR) {
+ b = bch2_btree_iter_peek_node(iter);
+ goto retry;
+ }
+ if (ret) {
+ bch_err(c, "Error updating btree node key: %i", ret);
+ goto err;
}
}
bch2_trans_iter_free(&trans, iter);
@@ -167,9 +157,10 @@ retry:
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c) ||
c->btree_roots_dirty);
+ if (c->btree_roots_dirty)
+ bch2_journal_meta(&c->journal);
if (!bch2_btree_interior_updates_nr_pending(c))
break;
- bch2_journal_meta(&c->journal);
}
ret = 0;
@@ -184,6 +175,5 @@ err:
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
- bch2_dev_metadata_drop(c, dev_idx, flags) ?:
- bch2_replicas_gc2(c);
+ bch2_dev_metadata_drop(c, dev_idx, flags);
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index cd02e5a5f305..586636a4c204 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1381,7 +1381,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
mutex_lock(&c->state_lock);
- percpu_ref_put(&ca->ref); /* XXX */
+ /*
+ * We consume a reference to ca->ref, regardless of whether we succeed
+ * or fail:
+ */
+ percpu_ref_put(&ca->ref);
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
bch_err(ca, "Cannot remove without losing data");
@@ -1390,11 +1394,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
__bch2_dev_read_only(c, ca);
- /*
- * XXX: verify that dev_idx is really not in use anymore, anywhere
- *
- * flag_data_bad() does not check btree pointers
- */
ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
if (ret) {
bch_err(ca, "Remove failed: error %i dropping data", ret);
@@ -1407,17 +1406,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
goto err;
}
- data = bch2_dev_has_data(c, ca);
- if (data) {
- char data_has_str[100];
-
- bch2_flags_to_text(&PBUF(data_has_str),
- bch2_data_types, data);
- bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
- ret = -EBUSY;
- goto err;
- }
-
ret = bch2_btree_delete_range(c, BTREE_ID_ALLOC,
POS(ca->dev_idx, 0),
POS(ca->dev_idx + 1, 0),
@@ -1432,12 +1420,33 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
* (overwritten) keys that point to the device we're removing:
*/
bch2_journal_flush_all_pins(&c->journal);
+ /*
+ * hack to ensure bch2_replicas_gc2() clears out entries to this device
+ */
+ bch2_journal_meta(&c->journal);
ret = bch2_journal_error(&c->journal);
if (ret) {
bch_err(ca, "Remove failed, journal error");
goto err;
}
+ ret = bch2_replicas_gc2(c);
+ if (ret) {
+ bch_err(ca, "Remove failed: error %i from replicas gc", ret);
+ goto err;
+ }
+
+ data = bch2_dev_has_data(c, ca);
+ if (data) {
+ char data_has_str[100];
+
+ bch2_flags_to_text(&PBUF(data_has_str),
+ bch2_data_types, data);
+ bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
+ ret = -EBUSY;
+ goto err;
+ }
+
__bch2_dev_offline(c, ca);
mutex_lock(&c->sb_lock);