diff options
-rw-r--r-- | fs/bcachefs/bkey_methods.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.c | 81 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 113 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 22 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 161 | ||||
-rw-r--r-- | fs/bcachefs/recovery.h | 21 |
9 files changed, 296 insertions, 114 deletions
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index c064cf468a9b..0aa3d3b9a281 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -134,7 +134,7 @@ const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) { - if (bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0) + if (bkey_cmp(k.k->p, b->data->min_key) < 0) return "key before start of btree node"; if (bkey_cmp(k.k->p, b->data->max_key) > 0) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index cb843a362cb4..0711bde8d68c 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -589,6 +589,7 @@ err: static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, struct btree_iter *iter, const struct bkey_i *k, + enum btree_id btree_id, unsigned level, enum six_lock_type lock_type, bool sync) @@ -601,7 +602,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, * Parent node must be locked, else we could read in a btree node that's * been freed: */ - if (!bch2_btree_node_relock(iter, level + 1)) + if (iter && !bch2_btree_node_relock(iter, level + 1)) return ERR_PTR(-EINTR); b = bch2_btree_node_mem_alloc(c); @@ -609,7 +610,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return b; bkey_copy(&b->key, k); - if (bch2_btree_node_hash_insert(bc, b, level, iter->btree_id)) { + if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { /* raced with another fill: */ /* mark as unhashed... */ @@ -629,7 +630,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, * * XXX: ideally should be dropping all btree node locks here */ - if (btree_node_read_locked(iter, level + 1)) + if (iter && btree_node_read_locked(iter, level + 1)) btree_node_unlock(iter, level + 1); bch2_btree_node_read(c, b, sync); @@ -677,7 +678,8 @@ retry: * else we could read in a btree node from disk that's been * freed: */ - b = bch2_btree_node_fill(c, iter, k, level, lock_type, true); + b = bch2_btree_node_fill(c, iter, k, iter->btree_id, + level, lock_type, true); /* We raced and found the btree node in the cache */ if (!b) @@ -763,6 +765,74 @@ lock_node: return b; } +struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, + const struct bkey_i *k, + enum btree_id btree_id, + unsigned level) +{ + struct btree_cache *bc = &c->btree_cache; + struct btree *b; + struct bset_tree *t; + + EBUG_ON(level >= BTREE_MAX_DEPTH); + + b = btree_node_mem_ptr(k); + if (b) + goto lock_node; +retry: + b = btree_cache_find(bc, k); + if (unlikely(!b)) { + b = bch2_btree_node_fill(c, NULL, k, btree_id, + level, SIX_LOCK_read, true); + + /* We raced and found the btree node in the cache */ + if (!b) + goto retry; + + if (IS_ERR(b)) + return b; + } else { +lock_node: + six_lock_read(&b->c.lock, NULL, NULL); + + if (unlikely(b->hash_val != btree_ptr_hash_val(k) || + b->c.btree_id != btree_id || + b->c.level != level)) { + six_unlock_read(&b->c.lock); + goto retry; + } + } + + /* XXX: waiting on IO with btree locks held: */ + wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, + TASK_UNINTERRUPTIBLE); + + prefetch(b->aux_data); + + for_each_bset(b, t) { + void *p = (u64 *) b->aux_data + t->aux_data_offset; + + prefetch(p + L1_CACHE_BYTES * 0); + prefetch(p + L1_CACHE_BYTES * 1); + prefetch(p + L1_CACHE_BYTES * 2); + } + + /* avoid atomic set bit if it's not needed: */ + if (!btree_node_accessed(b)) + set_btree_node_accessed(b); + + if (unlikely(btree_node_read_error(b))) { + six_unlock_read(&b->c.lock); + return ERR_PTR(-EIO); + } + + EBUG_ON(b->c.btree_id != btree_id || + BTREE_NODE_LEVEL(b->data) != level || + bkey_cmp(b->data->max_key, k->k.p)); + + return b; +} + struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, struct btree_iter *iter, struct btree *b, @@ -877,7 +947,8 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, if (b) return; - bch2_btree_node_fill(c, iter, k, level, SIX_LOCK_read, false); + bch2_btree_node_fill(c, iter, k, iter->btree_id, + level, SIX_LOCK_read, false); } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 5d85987457bf..abde6c2658c6 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -25,6 +25,9 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, const struct bkey_i *, unsigned, enum six_lock_type); +struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, + enum btree_id, unsigned); + struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *, struct btree *, enum btree_node_sibling); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index f85fbc057fb3..ee5eafdb1222 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -186,16 +186,8 @@ fsck_err: return ret; } -static bool pos_in_journal_keys(struct journal_keys *journal_keys, - enum btree_id id, struct bpos pos) -{ - struct journal_key *k = journal_key_search(journal_keys, id, pos); - - return k && k->btree_id == id && !bkey_cmp(k->k->k.p, pos); -} - static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, - struct journal_keys *journal_keys, bool initial) + bool initial) { struct btree_node_iter iter; struct bkey unpacked; @@ -209,10 +201,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { - if (!b->c.level && journal_keys && - pos_in_journal_keys(journal_keys, b->c.btree_id, k.k->p)) - continue; - bch2_bkey_debugcheck(c, b, k); ret = bch2_gc_mark_key(c, k, max_stale, initial); @@ -224,7 +212,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, } static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, - struct journal_keys *journal_keys, bool initial, bool metadata_only) { struct btree_trans trans; @@ -252,8 +239,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, gc_pos_set(c, gc_pos_btree_node(b)); - ret = btree_gc_mark_node(c, b, &max_stale, - journal_keys, initial); + ret = btree_gc_mark_node(c, b, &max_stale, initial); if (ret) break; @@ -289,6 +275,78 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, return ret; } +static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, + struct journal_keys *journal_keys, + unsigned target_depth) +{ + struct btree_and_journal_iter iter; + struct bkey_s_c k; + u8 max_stale = 0; + int ret = 0; + + bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); + + while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + bch2_bkey_debugcheck(c, b, k); + + ret = bch2_gc_mark_key(c, k, &max_stale, true); + if (ret) + break; + + if (b->c.level > target_depth) { + struct btree *child; + BKEY_PADDED(k) tmp; + + bkey_reassemble(&tmp.k, k); + + child = bch2_btree_node_get_noiter(c, &tmp.k, + b->c.btree_id, b->c.level - 1); + ret = PTR_ERR_OR_ZERO(child); + if (ret) + break; + + bch2_gc_btree_init_recurse(c, child, + journal_keys, target_depth); + six_unlock_read(&child->c.lock); + } + + bch2_btree_and_journal_iter_advance(&iter); + } + + return ret; +} + +static int bch2_gc_btree_init(struct bch_fs *c, + struct journal_keys *journal_keys, + enum btree_id btree_id, + bool metadata_only) +{ + struct btree *b; + unsigned target_depth = metadata_only ? 1 + : expensive_debug_checks(c) ? 0 + : !btree_node_type_needs_gc(btree_id) ? 1 + : 0; + u8 max_stale = 0; + int ret = 0; + + b = c->btree_roots[btree_id].b; + + if (btree_node_fake(b)) + return 0; + + six_lock_read(&b->c.lock, NULL, NULL); + if (b->c.level >= target_depth) + ret = bch2_gc_btree_init_recurse(c, b, + journal_keys, target_depth); + + if (!ret) + ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), + &max_stale, true); + six_unlock_read(&b->c.lock); + + return ret; +} + static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) { return (int) btree_id_to_gc_phase(l) - @@ -307,27 +365,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, for (i = 0; i < BTREE_ID_NR; i++) { enum btree_id id = ids[i]; - enum btree_node_type type = __btree_node_type(0, id); - - int ret = bch2_gc_btree(c, id, journal_keys, - initial, metadata_only); + int ret = initial + ? bch2_gc_btree_init(c, journal_keys, + id, metadata_only) + : bch2_gc_btree(c, id, initial, metadata_only); if (ret) return ret; - - if (journal_keys && !metadata_only && - btree_node_type_needs_gc(type)) { - struct journal_key *j; - u8 max_stale; - int ret; - - for_each_journal_key(*journal_keys, j) - if (j->btree_id == id) { - ret = bch2_gc_mark_key(c, bkey_i_to_s_c(j->k), - &max_stale, initial); - if (ret) - return ret; - } - } } return 0; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index fdfa7a265850..885cc9500f36 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -264,6 +264,11 @@ static inline enum btree_iter_type btree_iter_type(struct btree_iter *iter) return iter->flags & BTREE_ITER_TYPE; } +static inline struct btree_iter_level *iter_l(struct btree_iter *iter) +{ + return iter->l + iter->level; +} + struct btree_insert_entry { unsigned trigger_flags; unsigned trans_triggers_run:1; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index c1a4d6559d01..fa9c7f5e0bb9 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1630,7 +1630,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, unsigned flags) { struct btree_trans *trans = iter->trans; - struct btree *b = iter->l[0].b; + struct btree *b = iter_l(iter)->b; struct btree_update *as; struct closure cl; int ret = 0; diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a5362ecb4f5d..a8487f8275b6 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -24,7 +24,7 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans, struct btree_insert_entry *i) { return i != trans->updates2 && - i[0].iter->l[0].b == i[-1].iter->l[0].b; + iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b; } inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, @@ -172,13 +172,12 @@ static void bch2_btree_journal_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct journal *j = &c->journal; - struct btree *b = iter->l[0].b; + struct btree *b = iter_l(iter)->b; struct btree_write *w = btree_current_write(b); u64 seq = likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) ? trans->journal_res.seq : j->replay_journal_seq; - EBUG_ON(iter->level || b->c.level); EBUG_ON(trans->journal_res.ref != !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)); @@ -205,17 +204,15 @@ static void btree_insert_key_leaf(struct btree_trans *trans, struct bkey_i *insert) { struct bch_fs *c = trans->c; - struct btree *b = iter->l[0].b; + struct btree *b = iter_l(iter)->b; struct bset_tree *t = bset_tree_last(b); int old_u64s = bset_u64s(t); int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; - EBUG_ON(iter->level); - insert->k.needs_whiteout = false; - if (likely(bch2_btree_bset_insert_key(iter, b, &iter->l[0].iter, insert))) + if (likely(bch2_btree_bset_insert_key(iter, b, &iter_l(iter)->iter, insert))) bch2_btree_journal_key(trans, iter, insert); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; @@ -241,7 +238,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, { struct bch_fs *c = trans->c; - BUG_ON(iter->level); BUG_ON(bkey_cmp(insert->k.p, iter->pos)); BUG_ON(debug_check_bkeys(c) && bch2_bkey_invalid(c, bkey_i_to_s_c(insert), iter->btree_id)); @@ -290,7 +286,7 @@ btree_key_can_insert(struct btree_trans *trans, unsigned *u64s) { struct bch_fs *c = trans->c; - struct btree *b = iter->l[0].b; + struct btree *b = iter_l(iter)->b; static enum btree_insert_ret ret; if (unlikely(btree_node_fake(b))) @@ -345,7 +341,7 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans) struct btree_insert_entry *i; trans_for_each_update(trans, i) - if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b))) + if (gc_visited(c, gc_pos_btree_node(iter_l(i->iter)->b))) bch2_mark_update(trans, i->iter, i->k, NULL, i->trigger_flags|BTREE_TRIGGER_GC); } @@ -461,7 +457,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, int ret; trans_for_each_update2(trans, i) - BUG_ON(!btree_node_intent_locked(i->iter, 0)); + BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level)); ret = bch2_journal_preres_get(&trans->c->journal, &trans->journal_preres, trans->journal_preres_u64s, @@ -495,13 +491,13 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, trans_for_each_update2(trans, i) if (!same_leaf_as_prev(trans, i)) bch2_btree_node_lock_for_insert(trans->c, - i->iter->l[0].b, i->iter); + iter_l(i->iter)->b, i->iter); ret = bch2_trans_commit_write_locked(trans, stopped_at); trans_for_each_update2(trans, i) if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_unlock_write_inlined(i->iter->l[0].b, + bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b, i->iter); /* diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 02b381cb567b..0d4abaa3ba10 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -27,30 +27,78 @@ /* iterate over keys read from the journal: */ -struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) +static struct journal_key *journal_key_search(struct journal_keys *journal_keys, + enum btree_id id, unsigned level, + struct bpos pos) { - while (iter->k) { - if (iter->k->btree_id == iter->btree_id) - return bkey_i_to_s_c(iter->k->k); + size_t l = 0, r = journal_keys->nr, m; - iter->k++; - if (iter->k == iter->keys->d + iter->keys->nr) - iter->k = NULL; + while (l < r) { + m = l + ((r - l) >> 1); + if ((cmp_int(id, journal_keys->d[m].btree_id) ?: + cmp_int(level, journal_keys->d[m].level) ?: + bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0) + l = m + 1; + else + r = m; } - return bkey_s_c_null; + BUG_ON(l < journal_keys->nr && + (cmp_int(id, journal_keys->d[l].btree_id) ?: + cmp_int(level, journal_keys->d[l].level) ?: + bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0); + + BUG_ON(l && + (cmp_int(id, journal_keys->d[l - 1].btree_id) ?: + cmp_int(level, journal_keys->d[l - 1].level) ?: + bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0); + + return l < journal_keys->nr ? journal_keys->d + l : NULL; +} + +static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter) +{ + if (iter->k && + iter->k < iter->keys->d + iter->keys->nr && + iter->k->btree_id == iter->btree_id && + iter->k->level == iter->level) + return iter->k->k; + + iter->k = NULL; + return NULL; +} + +static void bch2_journal_iter_advance(struct journal_iter *iter) +{ + if (iter->k) + iter->k++; } -struct bkey_s_c bch2_journal_iter_next(struct journal_iter *iter) +static void bch2_journal_iter_init(struct journal_iter *iter, + struct journal_keys *journal_keys, + enum btree_id id, unsigned level, + struct bpos pos) { - if (!iter->k) - return bkey_s_c_null; + iter->btree_id = id; + iter->level = level; + iter->keys = journal_keys; + iter->k = journal_key_search(journal_keys, id, level, pos); +} - iter->k++; - if (iter->k == iter->keys->d + iter->keys->nr) - iter->k = NULL; +static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) +{ + return iter->btree + ? bch2_btree_iter_peek(iter->btree) + : bch2_btree_node_iter_peek_unpack(&iter->node_iter, + iter->b, &iter->unpacked); +} - return bch2_journal_iter_peek(iter); +static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter) +{ + if (iter->btree) + bch2_btree_iter_next(iter->btree); + else + bch2_btree_node_iter_advance(&iter->node_iter, iter->b); } void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) @@ -59,10 +107,10 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) case none: break; case btree: - bch2_btree_iter_next(iter->btree); + bch2_journal_iter_advance_btree(iter); break; case journal: - bch2_journal_iter_next(&iter->journal); + bch2_journal_iter_advance(&iter->journal); break; } @@ -74,14 +122,16 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * struct bkey_s_c ret; while (1) { - struct bkey_s_c btree_k = bch2_btree_iter_peek(iter->btree); - struct bkey_s_c journal_k = bch2_journal_iter_peek(&iter->journal); + struct bkey_s_c btree_k = + bch2_journal_iter_peek_btree(iter); + struct bkey_s_c journal_k = + bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal)); if (btree_k.k && journal_k.k) { int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p); if (!cmp) - bch2_btree_iter_next(iter->btree); + bch2_journal_iter_advance_btree(iter); iter->last = cmp < 0 ? btree : journal; } else if (btree_k.k) { @@ -94,6 +144,14 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * } ret = iter->last == journal ? journal_k : btree_k; + + if (iter->b && + bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) { + iter->journal.k = NULL; + iter->last = none; + return bkey_s_c_null; + } + if (!bkey_deleted(ret.k)) break; @@ -110,41 +168,32 @@ struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter * return bch2_btree_and_journal_iter_peek(iter); } -struct journal_key *journal_key_search(struct journal_keys *journal_keys, - enum btree_id id, struct bpos pos) -{ - size_t l = 0, r = journal_keys->nr, m; - - while (l < r) { - m = l + ((r - l) >> 1); - if ((cmp_int(id, journal_keys->d[m].btree_id) ?: - bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0) - l = m + 1; - else - r = m; - } - - BUG_ON(l < journal_keys->nr && - (cmp_int(id, journal_keys->d[l].btree_id) ?: - bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0); - - BUG_ON(l && - (cmp_int(id, journal_keys->d[l - 1].btree_id) ?: - bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0); - - return l < journal_keys->nr ? journal_keys->d + l : NULL; -} - void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter, struct btree_trans *trans, struct journal_keys *journal_keys, enum btree_id id, struct bpos pos) { - iter->journal.keys = journal_keys; - iter->journal.k = journal_key_search(journal_keys, id, pos); - iter->journal.btree_id = id; + memset(iter, 0, sizeof(*iter)); iter->btree = bch2_trans_get_iter(trans, id, pos, 0); + bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos); +} + +void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter, + struct journal_keys *journal_keys, + struct btree *b) +{ + struct bpos start = b->data->min_key; + + if (btree_node_type_is_extents(b->c.btree_id)) + start = bkey_successor(start); + + memset(iter, 0, sizeof(*iter)); + + iter->b = b; + bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b); + bch2_journal_iter_init(&iter->journal, journal_keys, + b->c.btree_id, b->c.level, start); } /* sort and dedup all keys in the journal: */ @@ -169,7 +218,8 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) const struct journal_key *l = _l; const struct journal_key *r = _r; - return cmp_int(l->btree_id, r->btree_id) ?: + return cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->level, r->level) ?: bkey_cmp(l->k->k.p, r->k->k.p) ?: cmp_int(l->journal_seq, r->journal_seq) ?: cmp_int(l->journal_offset, r->journal_offset); @@ -180,9 +230,10 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = _l; const struct journal_key *r = _r; - return cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->btree_id, r->btree_id) ?: - bkey_cmp(l->k->k.p, r->k->k.p); + return cmp_int(l->journal_seq, r->journal_seq) ?: + cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->level, r->level) ?: + bkey_cmp(l->k->k.p, r->k->k.p); } static void journal_keys_free(struct journal_keys *keys) @@ -218,6 +269,7 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries) for_each_jset_key(k, _n, entry, &p->j) keys.d[keys.nr++] = (struct journal_key) { .btree_id = entry->btree_id, + .level = entry->level, .k = k, .journal_seq = le64_to_cpu(p->j.seq) - keys.journal_seq_base, @@ -229,7 +281,8 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries) src = dst = keys.d; while (src < keys.d + keys.nr) { while (src + 1 < keys.d + keys.nr && - src[0].btree_id == src[1].btree_id && + src[0].btree_id == src[1].btree_id && + src[0].level == src[1].level && !bkey_cmp(src[0].k->k.p, src[1].k->k.p)) src++; @@ -864,7 +917,7 @@ int bch2_fs_recovery(struct bch_fs *c) */ bch_info(c, "starting metadata mark and sweep"); err = "error in mark and sweep"; - ret = bch2_gc(c, NULL, true, true); + ret = bch2_gc(c, &journal_keys, true, true); if (ret) goto err; bch_verbose(c, "mark and sweep done"); diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index c91309301563..fa1f2818817d 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -5,6 +5,7 @@ struct journal_keys { struct journal_key { enum btree_id btree_id:8; + unsigned level:8; struct bkey_i *k; u32 journal_seq; u32 journal_offset; @@ -17,15 +18,23 @@ struct journal_keys { for (i = (keys).d; i < (keys).d + (keys).nr; (i)++) struct journal_iter { + enum btree_id btree_id; + unsigned level; struct journal_keys *keys; struct journal_key *k; - enum btree_id btree_id; }; -struct btree_and_journal_iter { - enum btree_id btree_id; +/* + * Iterate over keys in the btree, with keys from the journal overlaid on top: + */ +struct btree_and_journal_iter { struct btree_iter *btree; + + struct btree *b; + struct btree_node_iter node_iter; + struct bkey unpacked; + struct journal_iter journal; enum last_key_returned { @@ -38,12 +47,14 @@ struct btree_and_journal_iter { void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *); -struct journal_key *journal_key_search(struct journal_keys *, - enum btree_id, struct bpos); + void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *, struct btree_trans *, struct journal_keys *, enum btree_id, struct bpos); +void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, + struct journal_keys *, + struct btree *); int bch2_fs_recovery(struct bch_fs *); int bch2_fs_initialize(struct bch_fs *); |