summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_io.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-07-11 16:41:14 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:08 -0400
commit0a70089062c63b0861217d9ffb76d3ac073d3fde (patch)
tree6ad36372483f209b4145ec231000276888873d9f /fs/bcachefs/btree_io.c
parent2680325b7803c336bb675addfe38c06c44e54273 (diff)
downloadlwn-0a70089062c63b0861217d9ffb76d3ac073d3fde.tar.gz
lwn-0a70089062c63b0861217d9ffb76d3ac073d3fde.zip
bcachefs: Kick off btree node writes from write completions
This is a performance improvement by removing the need to wait for the in flight btree write to complete before kicking one off, which is going to be needed to avoid a performance regression with the upcoming patch to update btree ptrs after every btree write. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs/btree_io.c')
-rw-r--r--fs/bcachefs/btree_io.c61
1 files changed, 48 insertions, 13 deletions
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 2974b2ad6966..1d4b5fcd1e39 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1562,9 +1562,47 @@ void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{
struct btree_write *w = btree_prev_write(b);
+ unsigned long old, new, v;
bch2_btree_complete_write(c, b, w);
- bch2_btree_node_io_unlock(b);
+
+ v = READ_ONCE(b->flags);
+ do {
+ old = new = v;
+
+ if (old & (1U << BTREE_NODE_need_write))
+ goto do_write;
+
+ new &= ~(1U << BTREE_NODE_write_in_flight);
+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
+
+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
+ return;
+
+do_write:
+ six_lock_read(&b->c.lock, NULL, NULL);
+ v = READ_ONCE(b->flags);
+ do {
+ old = new = v;
+
+ if ((old & (1U << BTREE_NODE_dirty)) &&
+ (old & (1U << BTREE_NODE_need_write)) &&
+ !(old & (1U << BTREE_NODE_never_write)) &&
+ btree_node_may_write(b)) {
+ new &= ~(1U << BTREE_NODE_dirty);
+ new &= ~(1U << BTREE_NODE_need_write);
+ new |= (1U << BTREE_NODE_write_in_flight);
+ new |= (1U << BTREE_NODE_just_written);
+ new ^= (1U << BTREE_NODE_write_idx);
+ } else {
+ new &= ~(1U << BTREE_NODE_write_in_flight);
+ }
+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
+
+ if (new & (1U << BTREE_NODE_write_in_flight))
+ __bch2_btree_node_write(c, b, true);
+
+ six_unlock_read(&b->c.lock);
}
static void bch2_btree_node_write_error(struct bch_fs *c,
@@ -1729,7 +1767,7 @@ static void btree_write_submit(struct work_struct *work)
bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &wbio->key);
}
-void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_started)
{
struct btree_write_bio *wbio;
struct bset_tree *t;
@@ -1746,7 +1784,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
bool validate_before_checksum = false;
void *data;
- BUG_ON(btree_node_write_in_flight(b));
+ if (already_started)
+ goto do_write;
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
return;
@@ -1770,14 +1809,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
if (old & (1 << BTREE_NODE_never_write))
return;
- if (old & (1 << BTREE_NODE_write_in_flight)) {
- /*
- * XXX waiting on btree writes with btree locks held -
- * this can deadlock, and we hit the write error path
- */
- bch2_btree_node_wait_on_write(b);
- continue;
- }
+ BUG_ON(old & (1 << BTREE_NODE_write_in_flight));
new &= ~(1 << BTREE_NODE_dirty);
new &= ~(1 << BTREE_NODE_need_write);
@@ -1786,6 +1818,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
new ^= (1 << BTREE_NODE_write_idx);
} while (cmpxchg_acquire(&b->flags, old, new) != old);
+ if (new & (1U << BTREE_NODE_need_write))
+ return;
+do_write:
atomic_dec(&c->btree_cache.dirty);
BUG_ON(btree_node_fake(b));
@@ -2041,7 +2076,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_intent ||
(lock_type_held == SIX_LOCK_read &&
six_lock_tryupgrade(&b->c.lock))) {
- __bch2_btree_node_write(c, b);
+ __bch2_btree_node_write(c, b, false);
/* don't cycle lock unnecessarily: */
if (btree_node_just_written(b) &&
@@ -2053,7 +2088,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
} else {
- __bch2_btree_node_write(c, b);
+ __bch2_btree_node_write(c, b, false);
if (lock_type_held == SIX_LOCK_write &&
btree_node_just_written(b))
bch2_btree_post_write_cleanup(c, b);