diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-11-26 17:26:04 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:33 -0400 |
commit | bcd6f3e06fe4f039e1526a0ff5bc3ebbc2405e10 (patch) | |
tree | a284f68b3448de48129ed3be534ef1f7a455f00a | |
parent | 1c3ff72c0fa94651a226d3351d9df89d5eafd2d7 (diff) | |
download | lwn-bcd6f3e06fe4f039e1526a0ff5bc3ebbc2405e10.tar.gz lwn-bcd6f3e06fe4f039e1526a0ff5bc3ebbc2405e10.zip |
bcachefs: Use KEY_TYPE_deleted whitouts for extents
Previously, partial overwrites of existing extents were handled
implicitly by the btree code; when reading in a btree node, we'd do a
mergesort of the different bsets and detect and fix partially
overlapping extents during that mergesort.
That approach won't work with snapshots: this changes extents to work
like regular keys as far as the btree code is concerned, where a 0 size
KEY_TYPE_deleted whiteout will completely overwrite an existing extent.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/bkey_sort.c | 232 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 36 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/extent_update.c | 88 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 2 |
8 files changed, 244 insertions, 133 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 535ba2788315..0a623ed3caa6 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1286,6 +1286,7 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); * reflink: gates KEY_TYPE_reflink * inline_data: gates KEY_TYPE_inline_data * new_siphash: gates BCH_STR_HASH_SIPHASH + * new_extent_overwrite: gates BTREE_NODE_NEW_EXTENT_OVERWRITE */ #define BCH_SB_FEATURES() \ x(lz4, 0) \ @@ -1296,7 +1297,8 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); x(journal_seq_blacklist_v3, 5) \ x(reflink, 6) \ x(new_siphash, 7) \ - x(inline_data, 8) + x(inline_data, 8) \ + x(new_extent_overwrite, 9) enum bch_sb_feature { #define x(f, n) BCH_FEATURE_##f, @@ -1620,7 +1622,9 @@ struct btree_node { LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4); LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8); -/* 8-32 unused */ +LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE, + struct btree_node, flags, 8, 9); +/* 9-32 unused */ LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64); struct btree_node_entry { diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index 23b51ef57303..18f842012f05 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -130,24 +130,6 @@ bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, return nr; } -/* - * If keys compare equal, compare by pointer order: - * - * Necessary for sort_fix_overlapping() - if there are multiple keys that - * compare equal in different sets, we have to process them newest to oldest. - */ -static inline int extent_sort_fix_overlapping_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - struct bkey ul = bkey_unpack_key(b, l); - struct bkey ur = bkey_unpack_key(b, r); - - return bkey_cmp(bkey_start_pos(&ul), - bkey_start_pos(&ur)) ?: - cmp_int((unsigned long) r, (unsigned long) l); -} - static void extent_sort_advance_prev(struct bkey_format *f, struct btree_nr_keys *nr, struct bkey_packed *start, @@ -188,102 +170,6 @@ static void extent_sort_append(struct bch_fs *c, bkey_reassemble((void *) *prev, k.s_c); } -struct btree_nr_keys -bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, - struct sort_iter *iter) -{ - struct btree *b = iter->b; - struct bkey_format *f = &b->format; - struct sort_iter_set *_l = iter->data, *_r = iter->data + 1; - struct bkey_packed *prev = NULL; - struct bkey l_unpacked, r_unpacked; - struct bkey_s l, r; - struct btree_nr_keys nr; - struct bkey_on_stack split; - - memset(&nr, 0, sizeof(nr)); - bkey_on_stack_init(&split); - - sort_iter_sort(iter, extent_sort_fix_overlapping_cmp); - - while (!sort_iter_end(iter)) { - l = __bkey_disassemble(b, _l->k, &l_unpacked); - - if (iter->used == 1) { - extent_sort_append(c, f, &nr, dst->start, &prev, l); - sort_iter_advance(iter, - extent_sort_fix_overlapping_cmp); - continue; - } - - r = __bkey_disassemble(b, _r->k, &r_unpacked); - - /* If current key and next key don't overlap, just append */ - if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) { - extent_sort_append(c, f, &nr, dst->start, &prev, l); - sort_iter_advance(iter, - extent_sort_fix_overlapping_cmp); - continue; - } - - /* Skip 0 size keys */ - if (!r.k->size) { - __sort_iter_advance(iter, 1, - extent_sort_fix_overlapping_cmp); - continue; - } - - /* - * overlap: keep the newer key and trim the older key so they - * don't overlap. comparing pointers tells us which one is - * newer, since the bsets are appended one after the other. - */ - - /* can't happen because of comparison func */ - BUG_ON(_l->k < _r->k && - !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k))); - - if (_l->k > _r->k) { - /* l wins, trim r */ - if (bkey_cmp(l.k->p, r.k->p) >= 0) { - __sort_iter_advance(iter, 1, - extent_sort_fix_overlapping_cmp); - } else { - bch2_cut_front_s(l.k->p, r); - extent_save(b, _r->k, r.k); - __sort_iter_sift(iter, 1, - extent_sort_fix_overlapping_cmp); - } - } else if (bkey_cmp(l.k->p, r.k->p) > 0) { - - /* - * r wins, but it overlaps in the middle of l - split l: - */ - bkey_on_stack_reassemble(&split, c, l.s_c); - bch2_cut_back(bkey_start_pos(r.k), split.k); - - bch2_cut_front_s(r.k->p, l); - extent_save(b, _l->k, l.k); - - __sort_iter_sift(iter, 0, - extent_sort_fix_overlapping_cmp); - - extent_sort_append(c, f, &nr, dst->start, - &prev, bkey_i_to_s(split.k)); - } else { - bch2_cut_back_s(bkey_start_pos(r.k), l); - extent_save(b, _l->k, l.k); - } - } - - extent_sort_advance_prev(f, &nr, dst->start, &prev); - - dst->u64s = cpu_to_le16((u64 *) prev - dst->_data); - - bkey_on_stack_exit(&split, c); - return nr; -} - /* Sort + repack in a new format: */ struct btree_nr_keys bch2_sort_repack(struct bset *dst, struct btree *src, @@ -354,7 +240,7 @@ static inline int sort_keys_cmp(struct btree *b, struct bkey_packed *r) { return bkey_cmp_packed(b, l, r) ?: - (int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?: + (int) bkey_deleted(r) - (int) bkey_deleted(l) ?: (int) l->needs_whiteout - (int) r->needs_whiteout; } @@ -399,6 +285,122 @@ unsigned bch2_sort_keys(struct bkey_packed *dst, return (u64 *) out - (u64 *) dst; } +/* Compat code for btree_node_old_extent_overwrite: */ + +/* + * If keys compare equal, compare by pointer order: + * + * Necessary for sort_fix_overlapping() - if there are multiple keys that + * compare equal in different sets, we have to process them newest to oldest. + */ +static inline int extent_sort_fix_overlapping_cmp(struct btree *b, + struct bkey_packed *l, + struct bkey_packed *r) +{ + struct bkey ul = bkey_unpack_key(b, l); + struct bkey ur = bkey_unpack_key(b, r); + + return bkey_cmp(bkey_start_pos(&ul), + bkey_start_pos(&ur)) ?: + cmp_int((unsigned long) r, (unsigned long) l); +} + +struct btree_nr_keys +bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, + struct sort_iter *iter) +{ + struct btree *b = iter->b; + struct bkey_format *f = &b->format; + struct sort_iter_set *_l = iter->data, *_r = iter->data + 1; + struct bkey_packed *prev = NULL; + struct bkey l_unpacked, r_unpacked; + struct bkey_s l, r; + struct btree_nr_keys nr; + struct bkey_on_stack split; + + memset(&nr, 0, sizeof(nr)); + bkey_on_stack_init(&split); + + sort_iter_sort(iter, extent_sort_fix_overlapping_cmp); + + while (!sort_iter_end(iter)) { + l = __bkey_disassemble(b, _l->k, &l_unpacked); + + if (iter->used == 1) { + extent_sort_append(c, f, &nr, dst->start, &prev, l); + sort_iter_advance(iter, + extent_sort_fix_overlapping_cmp); + continue; + } + + r = __bkey_disassemble(b, _r->k, &r_unpacked); + + /* If current key and next key don't overlap, just append */ + if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) { + extent_sort_append(c, f, &nr, dst->start, &prev, l); + sort_iter_advance(iter, + extent_sort_fix_overlapping_cmp); + continue; + } + + /* Skip 0 size keys */ + if (!r.k->size) { + __sort_iter_advance(iter, 1, + extent_sort_fix_overlapping_cmp); + continue; + } + + /* + * overlap: keep the newer key and trim the older key so they + * don't overlap. comparing pointers tells us which one is + * newer, since the bsets are appended one after the other. + */ + + /* can't happen because of comparison func */ + BUG_ON(_l->k < _r->k && + !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k))); + + if (_l->k > _r->k) { + /* l wins, trim r */ + if (bkey_cmp(l.k->p, r.k->p) >= 0) { + __sort_iter_advance(iter, 1, + extent_sort_fix_overlapping_cmp); + } else { + bch2_cut_front_s(l.k->p, r); + extent_save(b, _r->k, r.k); + __sort_iter_sift(iter, 1, + extent_sort_fix_overlapping_cmp); + } + } else if (bkey_cmp(l.k->p, r.k->p) > 0) { + + /* + * r wins, but it overlaps in the middle of l - split l: + */ + bkey_on_stack_reassemble(&split, c, l.s_c); + bch2_cut_back(bkey_start_pos(r.k), split.k); + + bch2_cut_front_s(r.k->p, l); + extent_save(b, _l->k, l.k); + + __sort_iter_sift(iter, 0, + extent_sort_fix_overlapping_cmp); + + extent_sort_append(c, f, &nr, dst->start, + &prev, bkey_i_to_s(split.k)); + } else { + bch2_cut_back_s(bkey_start_pos(r.k), l); + extent_save(b, _l->k, l.k); + } + } + + extent_sort_advance_prev(f, &nr, dst->start, &prev); + + dst->u64s = cpu_to_le16((u64 *) prev - dst->_data); + + bkey_on_stack_exit(&split, c); + return nr; +} + static inline int sort_extents_cmp(struct btree *b, struct bkey_packed *l, struct bkey_packed *r) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 209e20fbcd70..c5b5143ada05 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -22,7 +22,8 @@ static void verify_no_dups(struct btree *b, struct bkey_packed *start, - struct bkey_packed *end) + struct bkey_packed *end, + bool extents) { #ifdef CONFIG_BCACHEFS_DEBUG struct bkey_packed *k, *p; @@ -36,7 +37,7 @@ static void verify_no_dups(struct btree *b, struct bkey l = bkey_unpack_key(b, p); struct bkey r = bkey_unpack_key(b, k); - BUG_ON(btree_node_is_extents(b) + BUG_ON(extents ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0 : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0); //BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0); @@ -147,7 +148,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) } verify_no_dups(b, new_whiteouts, - (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); + (void *) ((u64 *) new_whiteouts + b->whiteout_u64s), + btree_node_old_extent_overwrite(b)); memcpy_u64s(unwritten_whiteouts_start(c, b), new_whiteouts, b->whiteout_u64s); @@ -297,7 +299,8 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c, verify_no_dups(b, unwritten_whiteouts_start(c, b), - unwritten_whiteouts_end(c, b)); + unwritten_whiteouts_end(c, b), + true); btree_bounce_free(c, order, used_mempool, whiteouts); @@ -377,7 +380,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, enum compact_mode mode) { - return !btree_node_is_extents(b) + return !btree_node_old_extent_overwrite(b) ? bch2_drop_whiteouts(b, mode) : bch2_compact_extent_whiteouts(c, b, mode); } @@ -417,10 +420,10 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, start_time = local_clock(); - if (btree_node_is_extents(b)) + if (btree_node_old_extent_overwrite(b)) filter_whiteouts = bset_written(b, start_bset); - u64s = (btree_node_is_extents(b) + u64s = (btree_node_old_extent_overwrite(b) ? bch2_sort_extents : bch2_sort_keys)(out->keys.start, &sort_iter, @@ -706,7 +709,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b, bool have_retry) { struct bkey_packed *k, *prev = NULL; - struct bpos prev_pos = POS_MIN; + struct bpos prev_pos = POS_MIN; + struct bpos prev_data = POS_MIN; bool seen_non_whiteout = false; unsigned version; const char *err; @@ -839,7 +843,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b, (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0))) { *whiteout_u64s = k->_data - i->_data; seen_non_whiteout = true; - } else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) { + } else if (bkey_cmp(prev_data, bkey_start_pos(u.k)) > 0 || + bkey_cmp(prev_pos, u.k->p) > 0) { btree_err(BTREE_ERR_FATAL, c, b, i, "keys out of order: %llu:%llu > %llu:%llu", prev_pos.inode, @@ -849,7 +854,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b, /* XXX: repair this */ } + if (!bkey_deleted(u.k)) + prev_data = u.k->p; prev_pos = u.k->p; + prev = k; k = bkey_next_skip_noops(k, vstruct_last(i)); } @@ -908,6 +916,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry bset_encrypt(c, i, b->written << 9); + if (btree_node_is_extents(b) && + !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) + set_btree_node_old_extent_overwrite(b); + sectors = vstruct_sectors(b->data, c->block_bits); btree_node_set_format(b, b->data->format); @@ -971,7 +983,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry set_btree_bset(b, b->set, &b->data->keys); - b->nr = (btree_node_is_extents(b) + b->nr = (btree_node_old_extent_overwrite(b) ? bch2_extent_sort_fix_overlapping : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter); @@ -1486,7 +1498,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, i->journal_seq = cpu_to_le64(seq); i->u64s = 0; - if (!btree_node_is_extents(b)) { + if (!btree_node_old_extent_overwrite(b)) { sort_iter_add(&sort_iter, unwritten_whiteouts_start(c, b), unwritten_whiteouts_end(c, b)); @@ -1501,7 +1513,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, b->whiteout_u64s = 0; - u64s = btree_node_is_extents(b) + u64s = btree_node_old_extent_overwrite(b) ? bch2_sort_extents(vstruct_last(i), &sort_iter, false) : bch2_sort_keys(i->start, &sort_iter, false); le16_add_cpu(&i->u64s, u64s); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 98451b3dd1a5..cc04cdbaf432 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -311,6 +311,7 @@ enum btree_flags { BTREE_NODE_just_written, BTREE_NODE_dying, BTREE_NODE_fake, + BTREE_NODE_old_extent_overwrite, }; BTREE_FLAG(read_in_flight); @@ -324,6 +325,7 @@ BTREE_FLAG(write_in_flight); BTREE_FLAG(just_written); BTREE_FLAG(dying); BTREE_FLAG(fake); +BTREE_FLAG(old_extent_overwrite); static inline struct btree_write *btree_current_write(struct btree *b) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index c9be0d110c64..870eb0938c22 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -374,6 +374,13 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev SET_BTREE_NODE_LEVEL(b->data, level); b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0]; + if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite)) + SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true); + + if (btree_node_is_extents(b) && + !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) + set_btree_node_old_extent_overwrite(b); + bch2_btree_build_aux_trees(b); btree_node_will_make_reachable(as, b); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 09f5cd6493f4..78f5674394dc 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -267,6 +267,8 @@ static void btree_insert_key_leaf(struct btree_trans *trans, int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; + insert->k->k.needs_whiteout = false; + if (!btree_node_is_extents(b)) bch2_insert_fixup_key(trans, insert); else diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index e021e1623a91..d2f1414f28e2 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -186,11 +186,26 @@ bch2_extent_can_insert(struct btree_trans *trans, overlap = bch2_extent_overlap(&insert->k->k, k.k); + /* + * If we're overwriting an existing extent, we may need to emit + * a whiteout - unless we're inserting a new extent at the same + * position: + */ + if (k.k->needs_whiteout && + (!bkey_whiteout(&insert->k->k) || + bkey_cmp(k.k->p, insert->k->k.p))) + *u64s += BKEY_U64s; + + /* + * If we're partially overwriting an existing extent which has + * been written out to disk, we'll need to emit a new version of + * that extent: + */ if (bkey_written(l->b, _k) && overlap != BCH_EXTENT_OVERLAP_ALL) *u64s += _k->u64s; - /* account for having to split existing extent: */ + /* And we may be splitting an existing extent: */ if (overlap == BCH_EXTENT_OVERLAP_MIDDLE) *u64s += _k->u64s; @@ -286,6 +301,23 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s); } +static void pack_push_whiteout(struct bch_fs *c, struct btree *b, + struct bpos pos) +{ + struct bkey_packed k; + + if (!bkey_pack_pos(&k, pos, b)) { + struct bkey_i tmp; + + bkey_init(&tmp.k); + tmp.k.p = pos; + bkey_copy(&k, &tmp); + } + + k.needs_whiteout = true; + push_whiteout(c, b, &k); +} + static void extent_drop(struct bch_fs *c, struct btree_iter *iter, struct bkey_packed *_k, struct bkey_s k) @@ -297,7 +329,12 @@ extent_drop(struct bch_fs *c, struct btree_iter *iter, k.k->size = 0; k.k->type = KEY_TYPE_deleted; - k.k->needs_whiteout = false; + + if (!btree_node_old_extent_overwrite(l->b) && + k.k->needs_whiteout) { + pack_push_whiteout(c, l->b, k.k->p); + k.k->needs_whiteout = false; + } if (_k >= btree_bset_last(l->b)->start) { unsigned u64s = _k->u64s; @@ -322,12 +359,29 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter, bkey_on_stack_init(&tmp); bkey_on_stack_init(&split); + if (!btree_node_old_extent_overwrite(l->b)) { + if (!bkey_whiteout(&insert->k) && + !bkey_cmp(k.k->p, insert->k.p)) { + insert->k.needs_whiteout = k.k->needs_whiteout; + k.k->needs_whiteout = false; + } + } else { + insert->k.needs_whiteout |= k.k->needs_whiteout; + } + switch (overlap) { case BCH_EXTENT_OVERLAP_FRONT: if (bkey_written(l->b, _k)) { bkey_on_stack_reassemble(&tmp, c, k.s_c); bch2_cut_front(insert->k.p, tmp.k); + /* + * needs_whiteout was propagated to new version of @k, + * @tmp: + */ + if (!btree_node_old_extent_overwrite(l->b)) + k.k->needs_whiteout = false; + extent_drop(c, iter, _k, k); extent_bset_insert(c, iter, tmp.k); } else { @@ -348,9 +402,26 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter, bkey_on_stack_reassemble(&tmp, c, k.s_c); bch2_cut_back(bkey_start_pos(&insert->k), tmp.k); + /* + * @tmp has different position than @k, needs_whiteout + * should not be propagated: + */ + if (!btree_node_old_extent_overwrite(l->b)) + tmp.k->k.needs_whiteout = false; + extent_drop(c, iter, _k, k); extent_bset_insert(c, iter, tmp.k); } else { + /* + * position of @k is changing, emit a whiteout if + * needs_whiteout is set: + */ + if (!btree_node_old_extent_overwrite(l->b) && + k.k->needs_whiteout) { + pack_push_whiteout(c, l->b, k.k->p); + k.k->needs_whiteout = false; + } + btree_keys_account_val_delta(l->b, _k, bch2_cut_back_s(bkey_start_pos(&insert->k), k)); extent_save(l->b, _k, k.k); @@ -367,10 +438,17 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter, bkey_on_stack_reassemble(&split, c, k.s_c); bch2_cut_back(bkey_start_pos(&insert->k), split.k); + if (!btree_node_old_extent_overwrite(l->b)) + split.k->k.needs_whiteout = false; + + /* this is identical to BCH_EXTENT_OVERLAP_FRONT: */ if (bkey_written(l->b, _k)) { bkey_on_stack_reassemble(&tmp, c, k.s_c); bch2_cut_front(insert->k.p, tmp.k); + if (!btree_node_old_extent_overwrite(l->b)) + k.k->needs_whiteout = false; + extent_drop(c, iter, _k, k); extent_bset_insert(c, iter, tmp.k); } else { @@ -462,7 +540,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, bch2_cut_front(cur_end, insert); bch2_btree_iter_set_pos_same_leaf(iter, cur_end); } else { - insert->k.needs_whiteout |= k.k->needs_whiteout; extent_squash(c, iter, insert, _k, k, overlap); } @@ -480,7 +557,10 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, if (insert->k.type == KEY_TYPE_deleted) insert->k.type = KEY_TYPE_discard; - extent_bset_insert(c, iter, insert); + if (!bkey_whiteout(&insert->k) || + btree_node_old_extent_overwrite(l->b)) + extent_bset_insert(c, iter, insert); + bch2_btree_journal_key(trans, iter, insert); } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 97b367252e82..c7367a679b22 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -908,6 +908,7 @@ int bch2_fs_recovery(struct bch_fs *c) le16_to_cpu(bcachefs_metadata_version_min); c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash; + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite; write_sb = true; } @@ -1027,6 +1028,7 @@ int bch2_fs_initialize(struct bch_fs *c) le16_to_cpu(bcachefs_metadata_version_current); c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink; c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash; + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite; SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); |