summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-01-04 19:41:23 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:21 -0400
commit9b6e2f1e7036d639ca07434fdb27a739b37beb76 (patch)
treefc8face8364f78b93caad20a59ada370d578e9b7
parent03ea3962ab99adf0cf7de9949716e6baeda230f3 (diff)
downloadlwn-9b6e2f1e7036d639ca07434fdb27a739b37beb76.tar.gz
lwn-9b6e2f1e7036d639ca07434fdb27a739b37beb76.zip
Revert "bcachefs: Delete some obsolete journal_seq_blacklist code"
This reverts commit f95b61228efd04c9c158123da5827c96e9773b29. It turns out, we're seeing filesystems in the wild end up with blacklisted btree node bsets - this should not be happening, and until we understand why and fix it we need to keep this code around. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c78
-rw-r--r--fs/bcachefs/journal_seq_blacklist.h2
-rw-r--r--fs/bcachefs/recovery.c22
-rw-r--r--fs/bcachefs/super.c5
5 files changed, 100 insertions, 8 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 8ef874b3afbb..4ebaefd408a4 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -755,6 +755,7 @@ struct bch_fs {
/* JOURNAL SEQ BLACKLIST */
struct journal_seq_blacklist_table *
journal_seq_blacklist_table;
+ struct work_struct journal_seq_blacklist_gc_work;
/* ALLOCATOR */
spinlock_t freelist_lock;
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index e10b2c7c7bae..3cc63fc202ab 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -241,3 +241,81 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
.validate = bch2_sb_journal_seq_blacklist_validate,
.to_text = bch2_sb_journal_seq_blacklist_to_text
};
+
+void bch2_blacklist_entries_gc(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work, struct bch_fs,
+ journal_seq_blacklist_gc_work);
+ struct journal_seq_blacklist_table *t;
+ struct bch_sb_field_journal_seq_blacklist *bl;
+ struct journal_seq_blacklist_entry *src, *dst;
+ struct btree_trans trans;
+ unsigned i, nr, new_nr;
+ int ret;
+
+ bch2_trans_init(&trans, c, 0, 0);
+
+ for (i = 0; i < BTREE_ID_NR; i++) {
+ struct btree_iter iter;
+ struct btree *b;
+
+ bch2_trans_node_iter_init(&trans, &iter, i, POS_MIN,
+ 0, 0, BTREE_ITER_PREFETCH);
+retry:
+ bch2_trans_begin(&trans);
+
+ b = bch2_btree_iter_peek_node(&iter);
+
+ while (!(ret = PTR_ERR_OR_ZERO(b)) &&
+ b &&
+ !test_bit(BCH_FS_STOPPING, &c->flags))
+ b = bch2_btree_iter_next_node(&iter);
+
+ if (ret == -EINTR)
+ goto retry;
+
+ bch2_trans_iter_exit(&trans, &iter);
+ }
+
+ bch2_trans_exit(&trans);
+ if (ret)
+ return;
+
+ mutex_lock(&c->sb_lock);
+ bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
+ if (!bl)
+ goto out;
+
+ nr = blacklist_nr_entries(bl);
+ dst = bl->start;
+
+ t = c->journal_seq_blacklist_table;
+ BUG_ON(nr != t->nr);
+
+ for (src = bl->start, i = eytzinger0_first(t->nr);
+ src < bl->start + nr;
+ src++, i = eytzinger0_next(i, nr)) {
+ BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
+ BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
+
+ if (t->entries[i].dirty)
+ *dst++ = *src;
+ }
+
+ new_nr = dst - bl->start;
+
+ bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
+
+ if (new_nr != nr) {
+ bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+ new_nr ? sb_blacklist_u64s(new_nr) : 0);
+ BUG_ON(new_nr && !bl);
+
+ if (!new_nr)
+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3));
+
+ bch2_write_super(c);
+ }
+out:
+ mutex_unlock(&c->sb_lock);
+}
diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h
index b4f876a04586..afb886ec8e25 100644
--- a/fs/bcachefs/journal_seq_blacklist.h
+++ b/fs/bcachefs/journal_seq_blacklist.h
@@ -17,4 +17,6 @@ int bch2_blacklist_table_initialize(struct bch_fs *);
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
+void bch2_blacklist_entries_gc(struct work_struct *);
+
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 7003cf77fdcd..b818093eab39 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -1065,6 +1065,16 @@ use_clean:
if (ret)
goto err;
+ /*
+ * After an unclean shutdown, skip then next few journal sequence
+ * numbers as they may have been referenced by btree writes that
+ * happened before their corresponding journal writes - those btree
+ * writes need to be ignored, by skipping and blacklisting the next few
+ * journal sequence numbers:
+ */
+ if (!c->sb.clean)
+ journal_seq += 8;
+
if (blacklist_seq != journal_seq) {
ret = bch2_journal_seq_blacklist_add(c,
blacklist_seq, journal_seq);
@@ -1210,14 +1220,6 @@ use_clean:
}
mutex_lock(&c->sb_lock);
- /*
- * With journal replay done, we can clear the journal seq blacklist
- * table:
- */
- BUG_ON(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
- if (le16_to_cpu(c->sb.version_min) >= bcachefs_metadata_version_btree_ptr_sectors_written)
- bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 0);
-
if (c->opts.version_upgrade) {
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
@@ -1259,6 +1261,10 @@ use_clean:
bch_info(c, "scanning for old btree nodes done");
}
+ if (c->journal_seq_blacklist_table &&
+ c->journal_seq_blacklist_table->nr > 128)
+ queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
+
ret = 0;
out:
set_bit(BCH_FS_FSCK_DONE, &c->flags);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 47de774d18b8..55bb263a0906 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -535,6 +535,8 @@ void __bch2_fs_stop(struct bch_fs *c)
set_bit(BCH_FS_STOPPING, &c->flags);
+ cancel_work_sync(&c->journal_seq_blacklist_gc_work);
+
down_write(&c->state_lock);
bch2_fs_read_only(c);
up_write(&c->state_lock);
@@ -698,6 +700,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
spin_lock_init(&c->btree_write_error_lock);
+ INIT_WORK(&c->journal_seq_blacklist_gc_work,
+ bch2_blacklist_entries_gc);
+
INIT_LIST_HEAD(&c->journal_entries);
INIT_LIST_HEAD(&c->journal_iters);