diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-03-21 00:15:53 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:30 -0400 |
commit | ce6201c456571d919e722eec3c17f868f0575b05 (patch) | |
tree | fa7281175f4d7d823da49cff85596b2d7b53e92f /fs/bcachefs/recovery.c | |
parent | 95752a02cb5d38bc97d76625de2607510ac94e69 (diff) | |
download | lwn-ce6201c456571d919e722eec3c17f868f0575b05.tar.gz lwn-ce6201c456571d919e722eec3c17f868f0575b05.zip |
bcachefs: Use a genradix for reading journal entries
Previously, the journal read path used a linked list for storing the
journal entries we read from disk. But there's been a bug that's been
causing journal_flush_delay to incorrectly be set to 0, leading to far
more journal entries than is normal being written out, which then means
filesystems are no longer able to start due to the O(n^2) behaviour of
inserting into/searching that linked list.
Fix this by switching to a radix tree.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs/recovery.c')
-rw-r--r-- | fs/bcachefs/recovery.c | 68 |
1 files changed, 37 insertions, 31 deletions
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 9120ed26250e..07ce6a540856 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -433,16 +433,16 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i /* sort and dedup all keys in the journal: */ -void bch2_journal_entries_free(struct list_head *list) +void bch2_journal_entries_free(struct bch_fs *c) { - - while (!list_empty(list)) { - struct journal_replay *i = - list_first_entry(list, struct journal_replay, list); - list_del(&i->list); - kvpfree(i, offsetof(struct journal_replay, j) + - vstruct_bytes(&i->j)); - } + struct journal_replay **i; + struct genradix_iter iter; + + genradix_for_each(&c->journal_entries, iter, i) + if (*i) + kvpfree(*i, offsetof(struct journal_replay, j) + + vstruct_bytes(&(*i)->j)); + genradix_free(&c->journal_entries); } /* @@ -476,15 +476,18 @@ void bch2_journal_keys_free(struct journal_keys *keys) static int journal_keys_sort(struct bch_fs *c) { - struct journal_replay *i; + struct genradix_iter iter; + struct journal_replay *i, **_i; struct jset_entry *entry; struct bkey_i *k, *_n; struct journal_keys *keys = &c->journal_keys; struct journal_key *src, *dst; size_t nr_keys = 0; - list_for_each_entry(i, &c->journal_entries, list) { - if (i->ignore) + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; + + if (!i || i->ignore) continue; if (!keys->journal_seq_base) @@ -503,8 +506,10 @@ static int journal_keys_sort(struct bch_fs *c) if (!keys->d) return -ENOMEM; - list_for_each_entry(i, &c->journal_entries, list) { - if (i->ignore) + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; + + if (!i || i->ignore) continue; BUG_ON(le64_to_cpu(i->j.seq) - keys->journal_seq_base > U32_MAX); @@ -751,10 +756,8 @@ static int journal_replay_entry_early(struct bch_fs *c, } static int journal_replay_early(struct bch_fs *c, - struct bch_sb_field_clean *clean, - struct list_head *journal) + struct bch_sb_field_clean *clean) { - struct journal_replay *i; struct jset_entry *entry; int ret; @@ -767,8 +770,13 @@ static int journal_replay_early(struct bch_fs *c, return ret; } } else { - list_for_each_entry(i, journal, list) { - if (i->ignore) + struct genradix_iter iter; + struct journal_replay *i, **_i; + + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; + + if (!i || i->ignore) continue; vstruct_for_each(&i->j, entry) { @@ -1093,17 +1101,17 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { - struct journal_replay *i; + struct genradix_iter iter; + struct journal_replay **i; bch_verbose(c, "starting journal read"); - ret = bch2_journal_read(c, &c->journal_entries, - &blacklist_seq, &journal_seq); + ret = bch2_journal_read(c, &blacklist_seq, &journal_seq); if (ret) goto err; - list_for_each_entry_reverse(i, &c->journal_entries, list) - if (!i->ignore) { - last_journal_entry = &i->j; + genradix_for_each_reverse(&c->journal_entries, iter, i) + if (*i && !(*i)->ignore) { + last_journal_entry = &(*i)->j; break; } @@ -1152,7 +1160,7 @@ use_clean: zero_out_btree_mem_ptr(&c->journal_keys); - ret = journal_replay_early(c, clean, &c->journal_entries); + ret = journal_replay_early(c, clean); if (ret) goto err; @@ -1175,8 +1183,7 @@ use_clean: } } - ret = bch2_fs_journal_start(&c->journal, journal_seq, - &c->journal_entries); + ret = bch2_fs_journal_start(&c->journal, journal_seq); if (ret) goto err; @@ -1380,7 +1387,7 @@ out: if (!c->opts.keep_journal) { bch2_journal_keys_free(&c->journal_keys); - bch2_journal_entries_free(&c->journal_entries); + bch2_journal_entries_free(c); } kfree(clean); if (ret) @@ -1401,7 +1408,6 @@ int bch2_fs_initialize(struct bch_fs *c) struct qstr lostfound = QSTR("lost+found"); const char *err = "cannot allocate memory"; struct bch_dev *ca; - LIST_HEAD(journal); unsigned i; int ret; @@ -1441,7 +1447,7 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - bch2_fs_journal_start(&c->journal, 1, &journal); + bch2_fs_journal_start(&c->journal, 1); bch2_journal_set_replay_done(&c->journal); err = "error going read-write"; |