summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/bkey_methods.c2
-rw-r--r--fs/bcachefs/btree_cache.c81
-rw-r--r--fs/bcachefs/btree_cache.h3
-rw-r--r--fs/bcachefs/btree_gc.c113
-rw-r--r--fs/bcachefs/btree_types.h5
-rw-r--r--fs/bcachefs/btree_update_interior.c2
-rw-r--r--fs/bcachefs/btree_update_leaf.c22
-rw-r--r--fs/bcachefs/recovery.c161
-rw-r--r--fs/bcachefs/recovery.h21
9 files changed, 296 insertions, 114 deletions
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index c064cf468a9b..0aa3d3b9a281 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -134,7 +134,7 @@ const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
{
- if (bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0)
+ if (bkey_cmp(k.k->p, b->data->min_key) < 0)
return "key before start of btree node";
if (bkey_cmp(k.k->p, b->data->max_key) > 0)
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index cb843a362cb4..0711bde8d68c 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -589,6 +589,7 @@ err:
static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
struct btree_iter *iter,
const struct bkey_i *k,
+ enum btree_id btree_id,
unsigned level,
enum six_lock_type lock_type,
bool sync)
@@ -601,7 +602,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
* Parent node must be locked, else we could read in a btree node that's
* been freed:
*/
- if (!bch2_btree_node_relock(iter, level + 1))
+ if (iter && !bch2_btree_node_relock(iter, level + 1))
return ERR_PTR(-EINTR);
b = bch2_btree_node_mem_alloc(c);
@@ -609,7 +610,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
return b;
bkey_copy(&b->key, k);
- if (bch2_btree_node_hash_insert(bc, b, level, iter->btree_id)) {
+ if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
/* raced with another fill: */
/* mark as unhashed... */
@@ -629,7 +630,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
*
* XXX: ideally should be dropping all btree node locks here
*/
- if (btree_node_read_locked(iter, level + 1))
+ if (iter && btree_node_read_locked(iter, level + 1))
btree_node_unlock(iter, level + 1);
bch2_btree_node_read(c, b, sync);
@@ -677,7 +678,8 @@ retry:
* else we could read in a btree node from disk that's been
* freed:
*/
- b = bch2_btree_node_fill(c, iter, k, level, lock_type, true);
+ b = bch2_btree_node_fill(c, iter, k, iter->btree_id,
+ level, lock_type, true);
/* We raced and found the btree node in the cache */
if (!b)
@@ -763,6 +765,74 @@ lock_node:
return b;
}
+struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
+ const struct bkey_i *k,
+ enum btree_id btree_id,
+ unsigned level)
+{
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+ struct bset_tree *t;
+
+ EBUG_ON(level >= BTREE_MAX_DEPTH);
+
+ b = btree_node_mem_ptr(k);
+ if (b)
+ goto lock_node;
+retry:
+ b = btree_cache_find(bc, k);
+ if (unlikely(!b)) {
+ b = bch2_btree_node_fill(c, NULL, k, btree_id,
+ level, SIX_LOCK_read, true);
+
+ /* We raced and found the btree node in the cache */
+ if (!b)
+ goto retry;
+
+ if (IS_ERR(b))
+ return b;
+ } else {
+lock_node:
+ six_lock_read(&b->c.lock, NULL, NULL);
+
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
+ b->c.btree_id != btree_id ||
+ b->c.level != level)) {
+ six_unlock_read(&b->c.lock);
+ goto retry;
+ }
+ }
+
+ /* XXX: waiting on IO with btree locks held: */
+ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+ TASK_UNINTERRUPTIBLE);
+
+ prefetch(b->aux_data);
+
+ for_each_bset(b, t) {
+ void *p = (u64 *) b->aux_data + t->aux_data_offset;
+
+ prefetch(p + L1_CACHE_BYTES * 0);
+ prefetch(p + L1_CACHE_BYTES * 1);
+ prefetch(p + L1_CACHE_BYTES * 2);
+ }
+
+ /* avoid atomic set bit if it's not needed: */
+ if (!btree_node_accessed(b))
+ set_btree_node_accessed(b);
+
+ if (unlikely(btree_node_read_error(b))) {
+ six_unlock_read(&b->c.lock);
+ return ERR_PTR(-EIO);
+ }
+
+ EBUG_ON(b->c.btree_id != btree_id ||
+ BTREE_NODE_LEVEL(b->data) != level ||
+ bkey_cmp(b->data->max_key, k->k.p));
+
+ return b;
+}
+
struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
struct btree_iter *iter,
struct btree *b,
@@ -877,7 +947,8 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
if (b)
return;
- bch2_btree_node_fill(c, iter, k, level, SIX_LOCK_read, false);
+ bch2_btree_node_fill(c, iter, k, iter->btree_id,
+ level, SIX_LOCK_read, false);
}
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 5d85987457bf..abde6c2658c6 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -25,6 +25,9 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, unsigned,
enum six_lock_type);
+struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
+ enum btree_id, unsigned);
+
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
struct btree *, enum btree_node_sibling);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index f85fbc057fb3..ee5eafdb1222 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -186,16 +186,8 @@ fsck_err:
return ret;
}
-static bool pos_in_journal_keys(struct journal_keys *journal_keys,
- enum btree_id id, struct bpos pos)
-{
- struct journal_key *k = journal_key_search(journal_keys, id, pos);
-
- return k && k->btree_id == id && !bkey_cmp(k->k->k.p, pos);
-}
-
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
- struct journal_keys *journal_keys, bool initial)
+ bool initial)
{
struct btree_node_iter iter;
struct bkey unpacked;
@@ -209,10 +201,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
for_each_btree_node_key_unpack(b, k, &iter,
&unpacked) {
- if (!b->c.level && journal_keys &&
- pos_in_journal_keys(journal_keys, b->c.btree_id, k.k->p))
- continue;
-
bch2_bkey_debugcheck(c, b, k);
ret = bch2_gc_mark_key(c, k, max_stale, initial);
@@ -224,7 +212,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
- struct journal_keys *journal_keys,
bool initial, bool metadata_only)
{
struct btree_trans trans;
@@ -252,8 +239,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
gc_pos_set(c, gc_pos_btree_node(b));
- ret = btree_gc_mark_node(c, b, &max_stale,
- journal_keys, initial);
+ ret = btree_gc_mark_node(c, b, &max_stale, initial);
if (ret)
break;
@@ -289,6 +275,78 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
return ret;
}
+static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
+ struct journal_keys *journal_keys,
+ unsigned target_depth)
+{
+ struct btree_and_journal_iter iter;
+ struct bkey_s_c k;
+ u8 max_stale = 0;
+ int ret = 0;
+
+ bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
+
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+ bch2_bkey_debugcheck(c, b, k);
+
+ ret = bch2_gc_mark_key(c, k, &max_stale, true);
+ if (ret)
+ break;
+
+ if (b->c.level > target_depth) {
+ struct btree *child;
+ BKEY_PADDED(k) tmp;
+
+ bkey_reassemble(&tmp.k, k);
+
+ child = bch2_btree_node_get_noiter(c, &tmp.k,
+ b->c.btree_id, b->c.level - 1);
+ ret = PTR_ERR_OR_ZERO(child);
+ if (ret)
+ break;
+
+ bch2_gc_btree_init_recurse(c, child,
+ journal_keys, target_depth);
+ six_unlock_read(&child->c.lock);
+ }
+
+ bch2_btree_and_journal_iter_advance(&iter);
+ }
+
+ return ret;
+}
+
+static int bch2_gc_btree_init(struct bch_fs *c,
+ struct journal_keys *journal_keys,
+ enum btree_id btree_id,
+ bool metadata_only)
+{
+ struct btree *b;
+ unsigned target_depth = metadata_only ? 1
+ : expensive_debug_checks(c) ? 0
+ : !btree_node_type_needs_gc(btree_id) ? 1
+ : 0;
+ u8 max_stale = 0;
+ int ret = 0;
+
+ b = c->btree_roots[btree_id].b;
+
+ if (btree_node_fake(b))
+ return 0;
+
+ six_lock_read(&b->c.lock, NULL, NULL);
+ if (b->c.level >= target_depth)
+ ret = bch2_gc_btree_init_recurse(c, b,
+ journal_keys, target_depth);
+
+ if (!ret)
+ ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
+ &max_stale, true);
+ six_unlock_read(&b->c.lock);
+
+ return ret;
+}
+
static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
{
return (int) btree_id_to_gc_phase(l) -
@@ -307,27 +365,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
for (i = 0; i < BTREE_ID_NR; i++) {
enum btree_id id = ids[i];
- enum btree_node_type type = __btree_node_type(0, id);
-
- int ret = bch2_gc_btree(c, id, journal_keys,
- initial, metadata_only);
+ int ret = initial
+ ? bch2_gc_btree_init(c, journal_keys,
+ id, metadata_only)
+ : bch2_gc_btree(c, id, initial, metadata_only);
if (ret)
return ret;
-
- if (journal_keys && !metadata_only &&
- btree_node_type_needs_gc(type)) {
- struct journal_key *j;
- u8 max_stale;
- int ret;
-
- for_each_journal_key(*journal_keys, j)
- if (j->btree_id == id) {
- ret = bch2_gc_mark_key(c, bkey_i_to_s_c(j->k),
- &max_stale, initial);
- if (ret)
- return ret;
- }
- }
}
return 0;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index fdfa7a265850..885cc9500f36 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -264,6 +264,11 @@ static inline enum btree_iter_type btree_iter_type(struct btree_iter *iter)
return iter->flags & BTREE_ITER_TYPE;
}
+static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
+{
+ return iter->l + iter->level;
+}
+
struct btree_insert_entry {
unsigned trigger_flags;
unsigned trans_triggers_run:1;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index c1a4d6559d01..fa9c7f5e0bb9 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1630,7 +1630,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
unsigned flags)
{
struct btree_trans *trans = iter->trans;
- struct btree *b = iter->l[0].b;
+ struct btree *b = iter_l(iter)->b;
struct btree_update *as;
struct closure cl;
int ret = 0;
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index a5362ecb4f5d..a8487f8275b6 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -24,7 +24,7 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i)
{
return i != trans->updates2 &&
- i[0].iter->l[0].b == i[-1].iter->l[0].b;
+ iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
}
inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
@@ -172,13 +172,12 @@ static void bch2_btree_journal_key(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
- struct btree *b = iter->l[0].b;
+ struct btree *b = iter_l(iter)->b;
struct btree_write *w = btree_current_write(b);
u64 seq = likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
? trans->journal_res.seq
: j->replay_journal_seq;
- EBUG_ON(iter->level || b->c.level);
EBUG_ON(trans->journal_res.ref !=
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
@@ -205,17 +204,15 @@ static void btree_insert_key_leaf(struct btree_trans *trans,
struct bkey_i *insert)
{
struct bch_fs *c = trans->c;
- struct btree *b = iter->l[0].b;
+ struct btree *b = iter_l(iter)->b;
struct bset_tree *t = bset_tree_last(b);
int old_u64s = bset_u64s(t);
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
- EBUG_ON(iter->level);
-
insert->k.needs_whiteout = false;
- if (likely(bch2_btree_bset_insert_key(iter, b, &iter->l[0].iter, insert)))
+ if (likely(bch2_btree_bset_insert_key(iter, b, &iter_l(iter)->iter, insert)))
bch2_btree_journal_key(trans, iter, insert);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
@@ -241,7 +238,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
- BUG_ON(iter->level);
BUG_ON(bkey_cmp(insert->k.p, iter->pos));
BUG_ON(debug_check_bkeys(c) &&
bch2_bkey_invalid(c, bkey_i_to_s_c(insert), iter->btree_id));
@@ -290,7 +286,7 @@ btree_key_can_insert(struct btree_trans *trans,
unsigned *u64s)
{
struct bch_fs *c = trans->c;
- struct btree *b = iter->l[0].b;
+ struct btree *b = iter_l(iter)->b;
static enum btree_insert_ret ret;
if (unlikely(btree_node_fake(b)))
@@ -345,7 +341,7 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
struct btree_insert_entry *i;
trans_for_each_update(trans, i)
- if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
+ if (gc_visited(c, gc_pos_btree_node(iter_l(i->iter)->b)))
bch2_mark_update(trans, i->iter, i->k, NULL,
i->trigger_flags|BTREE_TRIGGER_GC);
}
@@ -461,7 +457,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
int ret;
trans_for_each_update2(trans, i)
- BUG_ON(!btree_node_intent_locked(i->iter, 0));
+ BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level));
ret = bch2_journal_preres_get(&trans->c->journal,
&trans->journal_preres, trans->journal_preres_u64s,
@@ -495,13 +491,13 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
trans_for_each_update2(trans, i)
if (!same_leaf_as_prev(trans, i))
bch2_btree_node_lock_for_insert(trans->c,
- i->iter->l[0].b, i->iter);
+ iter_l(i->iter)->b, i->iter);
ret = bch2_trans_commit_write_locked(trans, stopped_at);
trans_for_each_update2(trans, i)
if (!same_leaf_as_prev(trans, i))
- bch2_btree_node_unlock_write_inlined(i->iter->l[0].b,
+ bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
i->iter);
/*
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 02b381cb567b..0d4abaa3ba10 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -27,30 +27,78 @@
/* iterate over keys read from the journal: */
-struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
+static struct journal_key *journal_key_search(struct journal_keys *journal_keys,
+ enum btree_id id, unsigned level,
+ struct bpos pos)
{
- while (iter->k) {
- if (iter->k->btree_id == iter->btree_id)
- return bkey_i_to_s_c(iter->k->k);
+ size_t l = 0, r = journal_keys->nr, m;
- iter->k++;
- if (iter->k == iter->keys->d + iter->keys->nr)
- iter->k = NULL;
+ while (l < r) {
+ m = l + ((r - l) >> 1);
+ if ((cmp_int(id, journal_keys->d[m].btree_id) ?:
+ cmp_int(level, journal_keys->d[m].level) ?:
+ bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0)
+ l = m + 1;
+ else
+ r = m;
}
- return bkey_s_c_null;
+ BUG_ON(l < journal_keys->nr &&
+ (cmp_int(id, journal_keys->d[l].btree_id) ?:
+ cmp_int(level, journal_keys->d[l].level) ?:
+ bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0);
+
+ BUG_ON(l &&
+ (cmp_int(id, journal_keys->d[l - 1].btree_id) ?:
+ cmp_int(level, journal_keys->d[l - 1].level) ?:
+ bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0);
+
+ return l < journal_keys->nr ? journal_keys->d + l : NULL;
+}
+
+static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
+{
+ if (iter->k &&
+ iter->k < iter->keys->d + iter->keys->nr &&
+ iter->k->btree_id == iter->btree_id &&
+ iter->k->level == iter->level)
+ return iter->k->k;
+
+ iter->k = NULL;
+ return NULL;
+}
+
+static void bch2_journal_iter_advance(struct journal_iter *iter)
+{
+ if (iter->k)
+ iter->k++;
}
-struct bkey_s_c bch2_journal_iter_next(struct journal_iter *iter)
+static void bch2_journal_iter_init(struct journal_iter *iter,
+ struct journal_keys *journal_keys,
+ enum btree_id id, unsigned level,
+ struct bpos pos)
{
- if (!iter->k)
- return bkey_s_c_null;
+ iter->btree_id = id;
+ iter->level = level;
+ iter->keys = journal_keys;
+ iter->k = journal_key_search(journal_keys, id, level, pos);
+}
- iter->k++;
- if (iter->k == iter->keys->d + iter->keys->nr)
- iter->k = NULL;
+static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
+{
+ return iter->btree
+ ? bch2_btree_iter_peek(iter->btree)
+ : bch2_btree_node_iter_peek_unpack(&iter->node_iter,
+ iter->b, &iter->unpacked);
+}
- return bch2_journal_iter_peek(iter);
+static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
+{
+ if (iter->btree)
+ bch2_btree_iter_next(iter->btree);
+ else
+ bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
}
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
@@ -59,10 +107,10 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
case none:
break;
case btree:
- bch2_btree_iter_next(iter->btree);
+ bch2_journal_iter_advance_btree(iter);
break;
case journal:
- bch2_journal_iter_next(&iter->journal);
+ bch2_journal_iter_advance(&iter->journal);
break;
}
@@ -74,14 +122,16 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
struct bkey_s_c ret;
while (1) {
- struct bkey_s_c btree_k = bch2_btree_iter_peek(iter->btree);
- struct bkey_s_c journal_k = bch2_journal_iter_peek(&iter->journal);
+ struct bkey_s_c btree_k =
+ bch2_journal_iter_peek_btree(iter);
+ struct bkey_s_c journal_k =
+ bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal));
if (btree_k.k && journal_k.k) {
int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p);
if (!cmp)
- bch2_btree_iter_next(iter->btree);
+ bch2_journal_iter_advance_btree(iter);
iter->last = cmp < 0 ? btree : journal;
} else if (btree_k.k) {
@@ -94,6 +144,14 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
}
ret = iter->last == journal ? journal_k : btree_k;
+
+ if (iter->b &&
+ bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) {
+ iter->journal.k = NULL;
+ iter->last = none;
+ return bkey_s_c_null;
+ }
+
if (!bkey_deleted(ret.k))
break;
@@ -110,41 +168,32 @@ struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *
return bch2_btree_and_journal_iter_peek(iter);
}
-struct journal_key *journal_key_search(struct journal_keys *journal_keys,
- enum btree_id id, struct bpos pos)
-{
- size_t l = 0, r = journal_keys->nr, m;
-
- while (l < r) {
- m = l + ((r - l) >> 1);
- if ((cmp_int(id, journal_keys->d[m].btree_id) ?:
- bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0)
- l = m + 1;
- else
- r = m;
- }
-
- BUG_ON(l < journal_keys->nr &&
- (cmp_int(id, journal_keys->d[l].btree_id) ?:
- bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0);
-
- BUG_ON(l &&
- (cmp_int(id, journal_keys->d[l - 1].btree_id) ?:
- bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0);
-
- return l < journal_keys->nr ? journal_keys->d + l : NULL;
-}
-
void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter,
struct btree_trans *trans,
struct journal_keys *journal_keys,
enum btree_id id, struct bpos pos)
{
- iter->journal.keys = journal_keys;
- iter->journal.k = journal_key_search(journal_keys, id, pos);
- iter->journal.btree_id = id;
+ memset(iter, 0, sizeof(*iter));
iter->btree = bch2_trans_get_iter(trans, id, pos, 0);
+ bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos);
+}
+
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
+ struct journal_keys *journal_keys,
+ struct btree *b)
+{
+ struct bpos start = b->data->min_key;
+
+ if (btree_node_type_is_extents(b->c.btree_id))
+ start = bkey_successor(start);
+
+ memset(iter, 0, sizeof(*iter));
+
+ iter->b = b;
+ bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
+ bch2_journal_iter_init(&iter->journal, journal_keys,
+ b->c.btree_id, b->c.level, start);
}
/* sort and dedup all keys in the journal: */
@@ -169,7 +218,8 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
const struct journal_key *l = _l;
const struct journal_key *r = _r;
- return cmp_int(l->btree_id, r->btree_id) ?:
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p) ?:
cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->journal_offset, r->journal_offset);
@@ -180,9 +230,10 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
const struct journal_key *l = _l;
const struct journal_key *r = _r;
- return cmp_int(l->journal_seq, r->journal_seq) ?:
- cmp_int(l->btree_id, r->btree_id) ?:
- bkey_cmp(l->k->k.p, r->k->k.p);
+ return cmp_int(l->journal_seq, r->journal_seq) ?:
+ cmp_int(l->btree_id, r->btree_id) ?:
+ cmp_int(l->level, r->level) ?:
+ bkey_cmp(l->k->k.p, r->k->k.p);
}
static void journal_keys_free(struct journal_keys *keys)
@@ -218,6 +269,7 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
for_each_jset_key(k, _n, entry, &p->j)
keys.d[keys.nr++] = (struct journal_key) {
.btree_id = entry->btree_id,
+ .level = entry->level,
.k = k,
.journal_seq = le64_to_cpu(p->j.seq) -
keys.journal_seq_base,
@@ -229,7 +281,8 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
src = dst = keys.d;
while (src < keys.d + keys.nr) {
while (src + 1 < keys.d + keys.nr &&
- src[0].btree_id == src[1].btree_id &&
+ src[0].btree_id == src[1].btree_id &&
+ src[0].level == src[1].level &&
!bkey_cmp(src[0].k->k.p, src[1].k->k.p))
src++;
@@ -864,7 +917,7 @@ int bch2_fs_recovery(struct bch_fs *c)
*/
bch_info(c, "starting metadata mark and sweep");
err = "error in mark and sweep";
- ret = bch2_gc(c, NULL, true, true);
+ ret = bch2_gc(c, &journal_keys, true, true);
if (ret)
goto err;
bch_verbose(c, "mark and sweep done");
diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
index c91309301563..fa1f2818817d 100644
--- a/fs/bcachefs/recovery.h
+++ b/fs/bcachefs/recovery.h
@@ -5,6 +5,7 @@
struct journal_keys {
struct journal_key {
enum btree_id btree_id:8;
+ unsigned level:8;
struct bkey_i *k;
u32 journal_seq;
u32 journal_offset;
@@ -17,15 +18,23 @@ struct journal_keys {
for (i = (keys).d; i < (keys).d + (keys).nr; (i)++)
struct journal_iter {
+ enum btree_id btree_id;
+ unsigned level;
struct journal_keys *keys;
struct journal_key *k;
- enum btree_id btree_id;
};
-struct btree_and_journal_iter {
- enum btree_id btree_id;
+/*
+ * Iterate over keys in the btree, with keys from the journal overlaid on top:
+ */
+struct btree_and_journal_iter {
struct btree_iter *btree;
+
+ struct btree *b;
+ struct btree_node_iter node_iter;
+ struct bkey unpacked;
+
struct journal_iter journal;
enum last_key_returned {
@@ -38,12 +47,14 @@ struct btree_and_journal_iter {
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
-struct journal_key *journal_key_search(struct journal_keys *,
- enum btree_id, struct bpos);
+
void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *,
struct btree_trans *,
struct journal_keys *,
enum btree_id, struct bpos);
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
+ struct journal_keys *,
+ struct btree *);
int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *);