summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-11-29 14:08:51 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:32 -0400
commitc9bebae65eade6529f9d3068a6da42fc56664bfe (patch)
treeebf834840eb1ad862f684aae12ed3cb15f5d9388
parent183797e31d43ce2fbfc596ff3f4d034f1ba144d0 (diff)
downloadlwn-c9bebae65eade6529f9d3068a6da42fc56664bfe.tar.gz
lwn-c9bebae65eade6529f9d3068a6da42fc56664bfe.zip
bcachefs: Whiteout changes
More prep work for snapshots: extents will soon be using KEY_TYPE_deleted for whiteouts, with 0 size. But we wen't be able to keep these whiteouts with the rest of the extents in the btree node, due to sorting invariants breaking. We can deal with this by immediately moving the new whiteouts to the unwritten whiteouts area - this just means those whiteouts won't be sorted, so we need new code to sort them prior to merging them with the rest of the keys to be written. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/btree_cache.c1
-rw-r--r--fs/bcachefs/btree_io.c99
-rw-r--r--fs/bcachefs/btree_types.h1
-rw-r--r--fs/bcachefs/btree_update_interior.h29
-rw-r--r--fs/bcachefs/btree_update_leaf.c45
5 files changed, 119 insertions, 56 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 035da548737b..8eed82ac41f1 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -558,7 +558,6 @@ out:
b->sib_u64s[0] = 0;
b->sib_u64s[1] = 0;
b->whiteout_u64s = 0;
- b->uncompacted_whiteout_u64s = 0;
bch2_btree_keys_init(b, &c->expensive_debug_checks);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 8532087f2754..9acf59c0710d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -79,6 +79,81 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
}
+static void sort_bkey_ptrs(const struct btree *bt,
+ struct bkey_packed **ptrs, unsigned nr)
+{
+ unsigned n = nr, a = nr / 2, b, c, d;
+
+ if (!a)
+ return;
+
+ /* Heap sort: see lib/sort.c: */
+ while (1) {
+ if (a)
+ a--;
+ else if (--n)
+ swap(ptrs[0], ptrs[n]);
+ else
+ break;
+
+ for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
+ b = bkey_cmp_packed(bt,
+ ptrs[c],
+ ptrs[d]) >= 0 ? c : d;
+ if (d == n)
+ b = c;
+
+ while (b != a &&
+ bkey_cmp_packed(bt,
+ ptrs[a],
+ ptrs[b]) >= 0)
+ b = (b - 1) / 2;
+ c = b;
+ while (b != a) {
+ b = (b - 1) / 2;
+ swap(ptrs[b], ptrs[c]);
+ }
+ }
+}
+
+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
+{
+ struct bkey_packed *new_whiteouts, **whiteout_ptrs, *k;
+ bool used_mempool1 = false, used_mempool2 = false;
+ unsigned order, i, nr = 0;
+
+ if (!b->whiteout_u64s)
+ return;
+
+ order = get_order(b->whiteout_u64s * sizeof(u64));
+
+ new_whiteouts = btree_bounce_alloc(c, order, &used_mempool1);
+ whiteout_ptrs = btree_bounce_alloc(c, order, &used_mempool2);
+
+ for (k = unwritten_whiteouts_start(c, b);
+ k != unwritten_whiteouts_end(c, b);
+ k = bkey_next(k))
+ whiteout_ptrs[nr++] = k;
+
+ sort_bkey_ptrs(b, whiteout_ptrs, nr);
+
+ k = new_whiteouts;
+
+ for (i = 0; i < nr; i++) {
+ bkey_copy(k, whiteout_ptrs[i]);
+ k = bkey_next(k);
+ }
+
+ verify_no_dups(b, new_whiteouts,
+ (void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
+
+ memcpy_u64s(unwritten_whiteouts_start(c, b),
+ new_whiteouts, b->whiteout_u64s);
+
+ btree_bounce_free(c, order, used_mempool2, whiteout_ptrs);
+ btree_bounce_free(c, order, used_mempool1, new_whiteouts);
+}
+
static unsigned should_compact_bset(struct btree *b, struct bset_tree *t,
bool compacting,
enum compact_mode mode)
@@ -116,6 +191,8 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
if (!whiteout_u64s)
return false;
+ bch2_sort_whiteouts(c, b);
+
sort_iter_init(&sort_iter, b);
whiteout_u64s += b->whiteout_u64s;
@@ -171,11 +248,14 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
if (bkey_deleted(k) && btree_node_is_extents(b))
continue;
+ BUG_ON(bkey_whiteout(k) &&
+ k->needs_whiteout &&
+ bkey_written(b, k));
+
if (bkey_whiteout(k) && !k->needs_whiteout)
continue;
if (bkey_whiteout(k)) {
- unreserve_whiteout(b, k);
memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k));
set_bkeyp_val_u64s(f, u_pos, 0);
u_pos = bkey_next(u_pos);
@@ -1342,21 +1422,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
- /*
- * We can't block on six_lock_write() here; another thread might be
- * trying to get a journal reservation with read locks held, and getting
- * a journal reservation might be blocked on flushing the journal and
- * doing btree writes:
- */
- if (lock_type_held == SIX_LOCK_intent &&
- six_trylock_write(&b->c.lock)) {
- __bch2_compact_whiteouts(c, b, COMPACT_WRITTEN);
- six_unlock_write(&b->c.lock);
- } else {
- __bch2_compact_whiteouts(c, b, COMPACT_WRITTEN_NO_WRITE_LOCK);
- }
-
- BUG_ON(b->uncompacted_whiteout_u64s);
+ bch2_sort_whiteouts(c, b);
sort_iter_init(&sort_iter, b);
@@ -1545,7 +1611,6 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
return false;
BUG_ON(b->whiteout_u64s);
- BUG_ON(b->uncompacted_whiteout_u64s);
clear_btree_node_just_written(b);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 3a26a8802e86..e370474fd8c2 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -97,7 +97,6 @@ struct btree {
struct btree_nr_keys nr;
u16 sib_u64s[2];
u16 whiteout_u64s;
- u16 uncompacted_whiteout_u64s;
u8 page_order;
u8 unpack_fn_len;
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 85f1320fa7b1..8f9d4a0b68ea 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -251,8 +251,7 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
void *end)
{
ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
- b->whiteout_u64s +
- b->uncompacted_whiteout_u64s;
+ b->whiteout_u64s;
ssize_t total = c->opts.btree_node_size << 6;
return total - used;
@@ -302,23 +301,19 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
return NULL;
}
-static inline void unreserve_whiteout(struct btree *b, struct bkey_packed *k)
+static inline void push_whiteout(struct bch_fs *c, struct btree *b,
+ struct bkey_packed *k)
{
- if (bkey_written(b, k)) {
- EBUG_ON(b->uncompacted_whiteout_u64s <
- bkeyp_key_u64s(&b->format, k));
- b->uncompacted_whiteout_u64s -=
- bkeyp_key_u64s(&b->format, k);
- }
-}
+ unsigned u64s = bkeyp_key_u64s(&b->format, k);
+ struct bkey_packed *dst;
-static inline void reserve_whiteout(struct btree *b, struct bkey_packed *k)
-{
- if (bkey_written(b, k)) {
- BUG_ON(!k->needs_whiteout);
- b->uncompacted_whiteout_u64s +=
- bkeyp_key_u64s(&b->format, k);
- }
+ BUG_ON(u64s > bch_btree_keys_u64s_remaining(c, b));
+
+ b->whiteout_u64s += bkeyp_key_u64s(&b->format, k);
+ dst = unwritten_whiteouts_start(c, b);
+ memcpy_u64s(dst, k, u64s);
+ dst->u64s = u64s;
+ dst->type = KEY_TYPE_deleted;
}
/*
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 6e5405f0b372..d13f1fc75bdf 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -104,38 +104,43 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
return true;
}
- insert->k.needs_whiteout = k->needs_whiteout;
-
btree_account_key_drop(b, k);
- if (k >= btree_bset_last(b)->start) {
- clobber_u64s = k->u64s;
+ if (bkey_whiteout(&insert->k)) {
+ unsigned clobber_u64s = k->u64s, new_u64s = k->u64s;
+
+ k->type = KEY_TYPE_deleted;
- /*
- * If we're deleting, and the key we're deleting doesn't
- * need a whiteout (it wasn't overwriting a key that had
- * been written to disk) - just delete it:
- */
- if (bkey_whiteout(&insert->k) && !k->needs_whiteout) {
+ if (k->needs_whiteout) {
+ push_whiteout(iter->trans->c, b, k);
+ k->needs_whiteout = false;
+ }
+
+ if (k >= btree_bset_last(b)->start) {
bch2_bset_delete(b, k, clobber_u64s);
- bch2_btree_node_iter_fix(iter, b, node_iter,
- k, clobber_u64s, 0);
- return true;
+ new_u64s = 0;
}
+ bch2_btree_node_iter_fix(iter, b, node_iter, k,
+ clobber_u64s, new_u64s);
+ return true;
+
+ }
+
+ if (k >= btree_bset_last(b)->start) {
+ clobber_u64s = k->u64s;
goto overwrite;
}
+ insert->k.needs_whiteout = k->needs_whiteout;
+ k->needs_whiteout = false;
k->type = KEY_TYPE_deleted;
+ /*
+ * XXX: we should be able to do this without two calls to
+ * bch2_btree_node_iter_fix:
+ */
bch2_btree_node_iter_fix(iter, b, node_iter, k,
k->u64s, k->u64s);
-
- if (bkey_whiteout(&insert->k)) {
- reserve_whiteout(b, k);
- return true;
- } else {
- k->needs_whiteout = false;
- }
} else {
/*
* Deleting, but the key to delete wasn't found - nothing to do: