summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-12-27 23:10:06 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:21 -0400
commitd8601afca840d36203d0cf2da94ce4f92003956e (patch)
tree77291db56c593cd0413c3827fc61aabb506db99c /fs
parent8e432d98a5011de5b1304fa9c8591588bea59b96 (diff)
downloadlwn-d8601afca840d36203d0cf2da94ce4f92003956e.tar.gz
lwn-d8601afca840d36203d0cf2da94ce4f92003956e.zip
bcachefs: Simplify journal replay
With BTREE_ITER_WITH_JOURNAL, there's no longer any restrictions on the order we have to replay keys from the journal in, and we can also start up journal reclaim right away - and delete a bunch of code. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/alloc_background.c3
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_key_cache.h3
-rw-r--r--fs/bcachefs/btree_update_interior.c5
-rw-r--r--fs/bcachefs/btree_update_leaf.c3
-rw-r--r--fs/bcachefs/journal_reclaim.c5
-rw-r--r--fs/bcachefs/journal_types.h1
-rw-r--r--fs/bcachefs/recovery.c114
8 files changed, 22 insertions, 114 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index cb4b059e796c..ab7d972aac3a 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -902,8 +902,7 @@ static void discard_one_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
static bool allocator_thread_running(struct bch_dev *ca)
{
unsigned state = ca->mi.state == BCH_MEMBER_STATE_rw &&
- test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags) &&
- test_bit(BCH_FS_ALLOC_REPLAY_DONE, &ca->fs->flags)
+ test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags)
? ALLOCATOR_running
: ALLOCATOR_stopped;
alloc_thread_set_state(ca, state);
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 431cf25b38db..7771b4a4bb87 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -510,8 +510,6 @@ enum {
BCH_FS_INITIAL_GC_DONE,
BCH_FS_INITIAL_GC_UNFIXED,
BCH_FS_TOPOLOGY_REPAIR_DONE,
- BCH_FS_ALLOC_REPLAY_DONE,
- BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_STARTED,
BCH_FS_RW,
diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
index 0768ef3ca776..b3d241b13453 100644
--- a/fs/bcachefs/btree_key_cache.h
+++ b/fs/bcachefs/btree_key_cache.h
@@ -16,8 +16,7 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
size_t max_dirty = 4096 + (nr_keys * 3) / 4;
- return nr_dirty > max_dirty &&
- test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
+ return nr_dirty > max_dirty;
}
int bch2_btree_key_cache_journal_flush(struct journal *,
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 17111c4228bd..51a2ea2c5cd6 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -45,7 +45,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
BUG_ON(!b->c.level);
- if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
+ if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
return;
bch2_btree_node_iter_init_from_start(&iter, b);
@@ -1851,9 +1851,6 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
{
struct async_btree_rewrite *a;
- if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
- return;
-
if (!percpu_ref_tryget(&c->writes))
return;
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index e95940ffad6b..1072acb0c9af 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -206,9 +206,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
- EBUG_ON(!insert->level &&
- !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
-
if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
&insert_l(insert)->iter, insert->k)))
return false;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 4462beb52461..d72b17dc935a 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -489,9 +489,6 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
u64 seq;
int err;
- if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
- return 0;
-
lockdep_assert_held(&j->reclaim_lock);
while (1) {
@@ -689,8 +686,6 @@ static int bch2_journal_reclaim_thread(void *arg)
set_freezable();
- kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags));
-
j->last_flushed = jiffies;
while (!ret && !kthread_should_stop()) {
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 0c4df603280d..73e7fbc4f109 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -148,7 +148,6 @@ enum journal_space_from {
enum {
JOURNAL_REPLAY_DONE,
JOURNAL_STARTED,
- JOURNAL_RECLAIM_STARTED,
JOURNAL_NEED_WRITE,
JOURNAL_MAY_GET_UNRESERVED,
JOURNAL_MAY_SKIP_FLUSH,
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 57311ad283c7..cb0ba84711aa 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -474,8 +474,8 @@ static void replay_now_at(struct journal *j, u64 seq)
bch2_journal_pin_put(j, j->replay_journal_seq++);
}
-static int __bch2_journal_replay_key(struct btree_trans *trans,
- struct journal_key *k)
+static int bch2_journal_replay_key(struct btree_trans *trans,
+ struct journal_key *k)
{
struct btree_iter iter;
unsigned iter_flags =
@@ -484,7 +484,7 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
int ret;
if (!k->level && k->btree_id == BTREE_ID_alloc)
- iter_flags |= BTREE_ITER_CACHED|BTREE_ITER_CACHED_NOFILL;
+ iter_flags |= BTREE_ITER_CACHED;
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
@@ -503,29 +503,12 @@ out:
return ret;
}
-static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
-{
- unsigned commit_flags =
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_JOURNAL_RESERVED;
-
- if (!k->allocated)
- commit_flags |= BTREE_INSERT_JOURNAL_REPLAY;
-
- return bch2_trans_do(c, NULL, NULL, commit_flags,
- __bch2_journal_replay_key(&trans, k));
-}
-
static int journal_sort_seq_cmp(const void *_l, const void *_r)
{
const struct journal_key *l = *((const struct journal_key **)_l);
const struct journal_key *r = *((const struct journal_key **)_r);
- return cmp_int(r->level, l->level) ?:
- cmp_int(l->journal_seq, r->journal_seq) ?:
- cmp_int(l->btree_id, r->btree_id) ?:
- bpos_cmp(l->k->k.p, r->k->k.p);
+ return cmp_int(l->journal_seq, r->journal_seq);
}
static int bch2_journal_replay(struct bch_fs *c)
@@ -533,10 +516,7 @@ static int bch2_journal_replay(struct bch_fs *c)
struct journal_keys *keys = &c->journal_keys;
struct journal_key **keys_sorted, *k;
struct journal *j = &c->journal;
- struct bch_dev *ca;
- unsigned idx;
size_t i;
- u64 seq;
int ret;
keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL);
@@ -555,73 +535,25 @@ static int bch2_journal_replay(struct bch_fs *c)
replay_now_at(j, keys->journal_seq_base);
}
- seq = j->replay_journal_seq;
-
- /*
- * First replay updates to the alloc btree - these will only update the
- * btree key cache:
- */
- for (i = 0; i < keys->nr; i++) {
- k = keys_sorted[i];
-
- cond_resched();
-
- if (!k->level && k->btree_id == BTREE_ID_alloc) {
- j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
- ret = bch2_journal_replay_key(c, k);
- if (ret)
- goto err;
- }
- }
-
- /* Now we can start the allocator threads: */
- set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
- for_each_member_device(ca, c, idx)
- bch2_wake_allocator(ca);
-
- /*
- * Next replay updates to interior btree nodes:
- */
- for (i = 0; i < keys->nr; i++) {
- k = keys_sorted[i];
-
- cond_resched();
-
- if (k->level) {
- j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
- ret = bch2_journal_replay_key(c, k);
- if (ret)
- goto err;
- }
- }
-
- /*
- * Now that the btree is in a consistent state, we can start journal
- * reclaim (which will be flushing entries from the btree key cache back
- * to the btree:
- */
- set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
- set_bit(JOURNAL_RECLAIM_STARTED, &j->flags);
- journal_reclaim_kick(j);
-
- j->replay_journal_seq = seq;
-
- /*
- * Now replay leaf node updates:
- */
for (i = 0; i < keys->nr; i++) {
k = keys_sorted[i];
cond_resched();
- if (k->level || k->btree_id == BTREE_ID_alloc)
- continue;
-
- replay_now_at(j, keys->journal_seq_base + k->journal_seq);
+ if (!k->allocated)
+ replay_now_at(j, keys->journal_seq_base + k->journal_seq);
- ret = bch2_journal_replay_key(c, k);
- if (ret)
+ ret = bch2_trans_do(c, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_JOURNAL_RESERVED|
+ (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0),
+ bch2_journal_replay_key(&trans, k));
+ if (ret) {
+ bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
+ ret, bch2_btree_ids[k->btree_id], k->level);
goto err;
+ }
}
replay_now_at(j, j->replay_journal_seq_end);
@@ -629,14 +561,9 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_journal_set_replay_done(j);
bch2_journal_flush_all_pins(j);
- kfree(keys_sorted);
-
- return bch2_journal_error(j);
+ ret = bch2_journal_error(j);
err:
- bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
- ret, bch2_btree_ids[k->btree_id], k->level);
kfree(keys_sorted);
-
return ret;
}
@@ -1215,7 +1142,8 @@ use_clean:
ret = bch2_journal_replay(c);
if (ret)
goto err;
- bch_verbose(c, "journal replay done");
+ if (c->opts.verbose || !c->sb.clean)
+ bch_info(c, "journal replay done");
if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
!c->opts.nochanges) {
@@ -1385,10 +1313,6 @@ int bch2_fs_initialize(struct bch_fs *c)
for (i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc(c, i);
- set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
- set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
- set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
-
err = "unable to allocate journal buckets";
for_each_online_member(ca, c, i) {
ret = bch2_dev_journal_alloc(ca);