summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_update_leaf.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-02-02 17:09:10 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:12 -0400
commit7a7d17b2f7c23c0891b0cbd13fafd3bc805b1b29 (patch)
treedc4c22912655b6b21e721d05baa07646cbf52396 /fs/bcachefs/btree_update_leaf.c
parent8c6d298ab22fc1b2912ccef4ffd4a01b35f9c5b4 (diff)
downloadlwn-7a7d17b2f7c23c0891b0cbd13fafd3bc805b1b29.tar.gz
lwn-7a7d17b2f7c23c0891b0cbd13fafd3bc805b1b29.zip
bcachefs: Whiteouts for snapshots
This patch adds KEY_TYPE_whiteout, a new type of whiteout for snapshots, when we're deleting and the key being deleted is in an ancestor snapshot - and updates the transaction update/commit path to use it. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs/btree_update_leaf.c')
-rw-r--r--fs/bcachefs/btree_update_leaf.c113
1 files changed, 105 insertions, 8 deletions
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 1922bf8236f7..2fc134e34572 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -1002,21 +1002,24 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
goto next;
}
- if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k)))
+ if (!bkey_cmp(k.k->p, start))
goto next;
while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) {
+ bool front_split = bkey_cmp(bkey_start_pos(k.k), start) < 0;
+ bool back_split = bkey_cmp(k.k->p, insert->k.p) > 0;
+
/*
* If we're going to be splitting a compressed extent, note it
* so that __bch2_trans_commit() can increase our disk
* reservation:
*/
- if (bkey_cmp(bkey_start_pos(k.k), start) < 0 &&
- bkey_cmp(k.k->p, insert->k.p) > 0 &&
+ if (((front_split && back_split) ||
+ ((front_split || back_split) && k.k->p.snapshot != insert->k.p.snapshot)) &&
(compressed_sectors = bch2_bkey_sectors_compressed(k)))
trans->extra_journal_res += compressed_sectors;
- if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
+ if (front_split) {
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if ((ret = PTR_ERR_OR_ZERO(update)))
goto err;
@@ -1027,6 +1030,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&update_iter) ?:
+ bch2_trans_update(trans, &update_iter, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ flags);
+ bch2_trans_iter_exit(trans, &update_iter);
+
+ if (ret)
+ goto err;
+ }
+
+ if (k.k->p.snapshot != insert->k.p.snapshot &&
+ (front_split || back_split)) {
+ update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ if ((ret = PTR_ERR_OR_ZERO(update)))
+ goto err;
+
+ bkey_reassemble(update, k);
+
+ bch2_cut_front(start, update);
+ bch2_cut_back(insert->k.p, update);
+
+ bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&update_iter) ?:
bch2_trans_update(trans, &update_iter, update,
@@ -1038,12 +1067,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
}
if (bkey_cmp(k.k->p, insert->k.p) <= 0) {
- ret = bch2_btree_delete_at(trans, &iter, flags);
+ update = bch2_trans_kmalloc(trans, sizeof(*update));
+ if ((ret = PTR_ERR_OR_ZERO(update)))
+ goto err;
+
+ bkey_init(&update->k);
+ update->k.p = k.k->p;
+
+ if (insert->k.p.snapshot != k.k->p.snapshot) {
+ update->k.p.snapshot = insert->k.p.snapshot;
+ update->k.type = KEY_TYPE_whiteout;
+ }
+
+ bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&update_iter) ?:
+ bch2_trans_update(trans, &update_iter, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ flags);
+ bch2_trans_iter_exit(trans, &update_iter);
+
if (ret)
goto err;
}
- if (bkey_cmp(k.k->p, insert->k.p) > 0) {
+ if (back_split) {
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if ((ret = PTR_ERR_OR_ZERO(update)))
goto err;
@@ -1051,10 +1100,15 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
bkey_reassemble(update, k);
bch2_cut_front(insert->k.p, update);
- ret = bch2_trans_update(trans, &iter, update, flags);
+ bch2_trans_copy_iter(&update_iter, &iter);
+ update_iter.pos = update->k.p;
+ ret = bch2_trans_update(trans, &update_iter, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ flags);
+ bch2_trans_iter_exit(trans, &update_iter);
+
if (ret)
goto err;
-
goto out;
}
next:
@@ -1086,6 +1140,39 @@ err:
return ret;
}
+/*
+ * When deleting, check if we need to emit a whiteout (because we're overwriting
+ * something in an ancestor snapshot)
+ */
+static int need_whiteout_for_snapshot(struct btree_trans *trans,
+ enum btree_id btree_id, struct bpos pos)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u32 snapshot = pos.snapshot;
+ int ret;
+
+ if (!bch2_snapshot_parent(trans->c, pos.snapshot))
+ return 0;
+
+ pos.snapshot++;
+
+ for_each_btree_key(trans, iter, btree_id, pos,
+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ if (bkey_cmp(k.k->p, pos))
+ break;
+
+ if (bch2_snapshot_is_ancestor(trans->c, snapshot,
+ k.k->p.snapshot)) {
+ ret = !bkey_whiteout(k.k);
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_update_flags flags)
{
@@ -1118,6 +1205,16 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
btree_insert_entry_cmp(i - 1, i) >= 0);
#endif
+ if (bkey_deleted(&n.k->k) &&
+ (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
+ int ret = need_whiteout_for_snapshot(trans, n.btree_id, n.k->k.p);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (ret)
+ n.k->k.type = KEY_TYPE_whiteout;
+ }
+
/*
* Pending updates are kept sorted: first, find position of new update,
* then delete/trim any updates the new update overwrites: