summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/btree_cache.c1
-rw-r--r--fs/bcachefs/btree_io.c99
-rw-r--r--fs/bcachefs/btree_types.h1
-rw-r--r--fs/bcachefs/btree_update_interior.h29
-rw-r--r--fs/bcachefs/btree_update_leaf.c45
5 files changed, 119 insertions, 56 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 035da548737b..8eed82ac41f1 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -558,7 +558,6 @@ out:
b->sib_u64s[0] = 0;
b->sib_u64s[1] = 0;
b->whiteout_u64s = 0;
- b->uncompacted_whiteout_u64s = 0;
bch2_btree_keys_init(b, &c->expensive_debug_checks);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 8532087f2754..9acf59c0710d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -79,6 +79,81 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
}
+static void sort_bkey_ptrs(const struct btree *bt,
+ struct bkey_packed **ptrs, unsigned nr)
+{
+ unsigned n = nr, a = nr / 2, b, c, d;
+
+ if (!a)
+ return;
+
+ /* Heap sort: see lib/sort.c: */
+ while (1) {
+ if (a)
+ a--;
+ else if (--n)
+ swap(ptrs[0], ptrs[n]);
+ else
+ break;
+
+ for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
+ b = bkey_cmp_packed(bt,
+ ptrs[c],
+ ptrs[d]) >= 0 ? c : d;
+ if (d == n)
+ b = c;
+
+ while (b != a &&
+ bkey_cmp_packed(bt,
+ ptrs[a],
+ ptrs[b]) >= 0)
+ b = (b - 1) / 2;
+ c = b;
+ while (b != a) {
+ b = (b - 1) / 2;
+ swap(ptrs[b], ptrs[c]);
+ }
+ }
+}
+
+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
+{
+ struct bkey_packed *new_whiteouts, **whiteout_ptrs, *k;
+ bool used_mempool1 = false, used_mempool2 = false;
+ unsigned order, i, nr = 0;
+
+ if (!b->whiteout_u64s)
+ return;
+
+ order = get_order(b->whiteout_u64s * sizeof(u64));
+
+ new_whiteouts = btree_bounce_alloc(c, order, &used_mempool1);
+ whiteout_ptrs = btree_bounce_alloc(c, order, &used_mempool2);
+
+ for (k = unwritten_whiteouts_start(c, b);
+ k != unwritten_whiteouts_end(c, b);
+ k = bkey_next(k))
+ whiteout_ptrs[nr++] = k;
+
+ sort_bkey_ptrs(b, whiteout_ptrs, nr);
+
+ k = new_whiteouts;
+
+ for (i = 0; i < nr; i++) {
+ bkey_copy(k, whiteout_ptrs[i]);
+ k = bkey_next(k);
+ }
+
+ verify_no_dups(b, new_whiteouts,
+ (void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
+
+ memcpy_u64s(unwritten_whiteouts_start(c, b),
+ new_whiteouts, b->whiteout_u64s);
+
+ btree_bounce_free(c, order, used_mempool2, whiteout_ptrs);
+ btree_bounce_free(c, order, used_mempool1, new_whiteouts);
+}
+
static unsigned should_compact_bset(struct btree *b, struct bset_tree *t,
bool compacting,
enum compact_mode mode)
@@ -116,6 +191,8 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
if (!whiteout_u64s)
return false;
+ bch2_sort_whiteouts(c, b);
+
sort_iter_init(&sort_iter, b);
whiteout_u64s += b->whiteout_u64s;
@@ -171,11 +248,14 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
if (bkey_deleted(k) && btree_node_is_extents(b))
continue;
+ BUG_ON(bkey_whiteout(k) &&
+ k->needs_whiteout &&
+ bkey_written(b, k));
+
if (bkey_whiteout(k) && !k->needs_whiteout)
continue;
if (bkey_whiteout(k)) {
- unreserve_whiteout(b, k);
memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k));
set_bkeyp_val_u64s(f, u_pos, 0);
u_pos = bkey_next(u_pos);
@@ -1342,21 +1422,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
- /*
- * We can't block on six_lock_write() here; another thread might be
- * trying to get a journal reservation with read locks held, and getting
- * a journal reservation might be blocked on flushing the journal and
- * doing btree writes:
- */
- if (lock_type_held == SIX_LOCK_intent &&
- six_trylock_write(&b->c.lock)) {
- __bch2_compact_whiteouts(c, b, COMPACT_WRITTEN);
- six_unlock_write(&b->c.lock);
- } else {
- __bch2_compact_whiteouts(c, b, COMPACT_WRITTEN_NO_WRITE_LOCK);
- }
-
- BUG_ON(b->uncompacted_whiteout_u64s);
+ bch2_sort_whiteouts(c, b);
sort_iter_init(&sort_iter, b);
@@ -1545,7 +1611,6 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
return false;
BUG_ON(b->whiteout_u64s);
- BUG_ON(b->uncompacted_whiteout_u64s);
clear_btree_node_just_written(b);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 3a26a8802e86..e370474fd8c2 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -97,7 +97,6 @@ struct btree {
struct btree_nr_keys nr;
u16 sib_u64s[2];
u16 whiteout_u64s;
- u16 uncompacted_whiteout_u64s;
u8 page_order;
u8 unpack_fn_len;
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 85f1320fa7b1..8f9d4a0b68ea 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -251,8 +251,7 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
void *end)
{
ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
- b->whiteout_u64s +
- b->uncompacted_whiteout_u64s;
+ b->whiteout_u64s;
ssize_t total = c->opts.btree_node_size << 6;
return total - used;
@@ -302,23 +301,19 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
return NULL;
}
-static inline void unreserve_whiteout(struct btree *b, struct bkey_packed *k)
+static inline void push_whiteout(struct bch_fs *c, struct btree *b,
+ struct bkey_packed *k)
{
- if (bkey_written(b, k)) {
- EBUG_ON(b->uncompacted_whiteout_u64s <
- bkeyp_key_u64s(&b->format, k));
- b->uncompacted_whiteout_u64s -=
- bkeyp_key_u64s(&b->format, k);
- }
-}
+ unsigned u64s = bkeyp_key_u64s(&b->format, k);
+ struct bkey_packed *dst;
-static inline void reserve_whiteout(struct btree *b, struct bkey_packed *k)
-{
- if (bkey_written(b, k)) {
- BUG_ON(!k->needs_whiteout);
- b->uncompacted_whiteout_u64s +=
- bkeyp_key_u64s(&b->format, k);
- }
+ BUG_ON(u64s > bch_btree_keys_u64s_remaining(c, b));
+
+ b->whiteout_u64s += bkeyp_key_u64s(&b->format, k);
+ dst = unwritten_whiteouts_start(c, b);
+ memcpy_u64s(dst, k, u64s);
+ dst->u64s = u64s;
+ dst->type = KEY_TYPE_deleted;
}
/*
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 6e5405f0b372..d13f1fc75bdf 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -104,38 +104,43 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
return true;
}
- insert->k.needs_whiteout = k->needs_whiteout;
-
btree_account_key_drop(b, k);
- if (k >= btree_bset_last(b)->start) {
- clobber_u64s = k->u64s;
+ if (bkey_whiteout(&insert->k)) {
+ unsigned clobber_u64s = k->u64s, new_u64s = k->u64s;
+
+ k->type = KEY_TYPE_deleted;
- /*
- * If we're deleting, and the key we're deleting doesn't
- * need a whiteout (it wasn't overwriting a key that had
- * been written to disk) - just delete it:
- */
- if (bkey_whiteout(&insert->k) && !k->needs_whiteout) {
+ if (k->needs_whiteout) {
+ push_whiteout(iter->trans->c, b, k);
+ k->needs_whiteout = false;
+ }
+
+ if (k >= btree_bset_last(b)->start) {
bch2_bset_delete(b, k, clobber_u64s);
- bch2_btree_node_iter_fix(iter, b, node_iter,
- k, clobber_u64s, 0);
- return true;
+ new_u64s = 0;
}
+ bch2_btree_node_iter_fix(iter, b, node_iter, k,
+ clobber_u64s, new_u64s);
+ return true;
+
+ }
+
+ if (k >= btree_bset_last(b)->start) {
+ clobber_u64s = k->u64s;
goto overwrite;
}
+ insert->k.needs_whiteout = k->needs_whiteout;
+ k->needs_whiteout = false;
k->type = KEY_TYPE_deleted;
+ /*
+ * XXX: we should be able to do this without two calls to
+ * bch2_btree_node_iter_fix:
+ */
bch2_btree_node_iter_fix(iter, b, node_iter, k,
k->u64s, k->u64s);
-
- if (bkey_whiteout(&insert->k)) {
- reserve_whiteout(b, k);
- return true;
- } else {
- k->needs_whiteout = false;
- }
} else {
/*
* Deleting, but the key to delete wasn't found - nothing to do: