summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2022-10-28 17:08:41 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:45 -0400
commit46fee692eebb850b8478531e185fb5a5f942d3ea (patch)
tree332347a26a8887b98e5bcb40456aa69825413500 /fs
parent8852501fe570c4956c0e29246e1e5636f09b58fb (diff)
downloadlwn-46fee692eebb850b8478531e185fb5a5f942d3ea.tar.gz
lwn-46fee692eebb850b8478531e185fb5a5f942d3ea.zip
bcachefs: Improved btree write statistics
This replaces sysfs btree_avg_write_size with btree_write_stats, which now breaks out statistics by the source of the btree write. Btree writes that are too small are a source of inefficiency, and excessive btree resort overhead - this will let us see what's causing them. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/bcachefs.h29
-rw-r--r--fs/bcachefs/btree_cache.c11
-rw-r--r--fs/bcachefs/btree_io.c46
-rw-r--r--fs/bcachefs/btree_io.h10
-rw-r--r--fs/bcachefs/btree_types.h1
-rw-r--r--fs/bcachefs/btree_update_interior.c1
-rw-r--r--fs/bcachefs/btree_update_interior.h1
-rw-r--r--fs/bcachefs/btree_update_leaf.c2
-rw-r--r--fs/bcachefs/sysfs.c16
9 files changed, 91 insertions, 26 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 544621dd4af4..18fe09cdae4d 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -596,6 +596,23 @@ typedef struct {
#define BCACHEFS_ROOT_SUBVOL_INUM \
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
+#define BCH_BTREE_WRITE_TYPES() \
+ x(initial, 0) \
+ x(init_next_bset, 1) \
+ x(cache_reclaim, 2) \
+ x(journal_reclaim, 3) \
+ x(interior, 4)
+
+enum btree_write_type {
+#define x(t, n) BTREE_WRITE_##t,
+ BCH_BTREE_WRITE_TYPES()
+#undef x
+ BTREE_WRITE_TYPE_NR,
+};
+
+#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1)
+#define BTREE_WRITE_TYPE_BITS ilog2(BTREE_WRITE_TYPE_MASK)
+
struct bch_fs {
struct closure cl;
@@ -705,6 +722,13 @@ struct bch_fs {
struct workqueue_struct *btree_interior_update_worker;
struct work_struct btree_interior_update_work;
+ /* btree_io.c: */
+ spinlock_t btree_write_error_lock;
+ struct btree_write_stats {
+ atomic64_t nr;
+ atomic64_t bytes;
+ } btree_write_stats[BTREE_WRITE_TYPE_NR];
+
/* btree_iter.c: */
struct mutex btree_trans_lock;
struct list_head btree_trans_list;
@@ -880,11 +904,6 @@ mempool_t bio_bounce_pages;
struct bio_set dio_write_bioset;
struct bio_set dio_read_bioset;
-
- atomic64_t btree_writes_nr;
- atomic64_t btree_writes_sectors;
- spinlock_t btree_write_error_lock;
-
/* ERRORS */
struct list_head fsck_errors;
struct mutex fsck_error_lock;
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 135c3ea1377d..709453a909fc 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -241,9 +241,11 @@ wait_on_io:
* the post write cleanup:
*/
if (bch2_verify_btree_ondisk)
- bch2_btree_node_write(c, b, SIX_LOCK_intent, 0);
+ bch2_btree_node_write(c, b, SIX_LOCK_intent,
+ BTREE_WRITE_cache_reclaim);
else
- __bch2_btree_node_write(c, b, 0);
+ __bch2_btree_node_write(c, b,
+ BTREE_WRITE_cache_reclaim);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@@ -347,7 +349,7 @@ restart:
six_trylock_read(&b->c.lock)) {
list_move(&bc->live, &b->list);
mutex_unlock(&bc->lock);
- __bch2_btree_node_write(c, b, 0);
+ __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
six_unlock_read(&b->c.lock);
if (touched >= nr)
goto out_nounlock;
@@ -624,6 +626,7 @@ out:
b->flags = 0;
b->written = 0;
b->nsets = 0;
+ b->write_type = 0;
b->sib_u64s[0] = 0;
b->sib_u64s[1] = 0;
b->whiteout_u64s = 0;
@@ -1067,7 +1070,7 @@ wait_on_io:
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
if (btree_node_dirty(b)) {
- __bch2_btree_node_write(c, b, 0);
+ __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
goto wait_on_io;
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index a322a8367688..56f9637d2ca6 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -471,7 +471,8 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
};
if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) {
- bch2_btree_node_write(c, b, SIX_LOCK_write, 0);
+ bch2_btree_node_write(c, b, SIX_LOCK_write,
+ BTREE_WRITE_init_next_bset);
reinit_iter = true;
}
}
@@ -1646,7 +1647,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
} while ((v = cmpxchg(&b->flags, old, new)) != old);
if (new & (1U << BTREE_NODE_write_in_flight))
- __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED);
+ __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|b->write_type);
else
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
}
@@ -1795,6 +1796,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
bool used_mempool;
unsigned long old, new;
bool validate_before_checksum = false;
+ enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
void *data;
int ret;
@@ -1841,6 +1843,12 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
if (new & (1U << BTREE_NODE_need_write))
return;
do_write:
+ if ((flags & BTREE_WRITE_ONLY_IF_NEED))
+ type = b->write_type;
+ b->write_type = 0;
+
+ BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0));
+
atomic_dec(&c->btree_cache.dirty);
BUG_ON(btree_node_fake(b));
@@ -2015,8 +2023,8 @@ do_write:
bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
cpu_to_le16(b->written);
- atomic64_inc(&c->btree_writes_nr);
- atomic64_add(sectors_to_write, &c->btree_writes_sectors);
+ atomic64_inc(&c->btree_write_stats[type].nr);
+ atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
INIT_WORK(&wbio->work, btree_write_submit);
queue_work(c->io_complete_wq, &wbio->work);
@@ -2144,3 +2152,33 @@ bool bch2_btree_flush_all_writes(struct bch_fs *c)
{
return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
}
+
+const char * const bch2_btree_write_types[] = {
+#define x(t, n) [n] = #t,
+ BCH_BTREE_WRITE_TYPES()
+ NULL
+};
+
+void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 10);
+
+ prt_tab(out);
+ prt_str(out, "nr");
+ prt_tab(out);
+ prt_str(out, "size");
+ prt_newline(out);
+
+ for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) {
+ u64 nr = atomic64_read(&c->btree_write_stats[i].nr);
+ u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes);
+
+ prt_printf(out, "%s:", bch2_btree_write_types[i]);
+ prt_tab(out);
+ prt_u64(out, nr);
+ prt_tab(out);
+ prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0);
+ prt_newline(out);
+ }
+}
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index 8af853642123..4b1810ad7d91 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -139,8 +139,12 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
-#define BTREE_WRITE_ONLY_IF_NEED (1U << 0)
-#define BTREE_WRITE_ALREADY_STARTED (1U << 1)
+enum btree_write_flags {
+ __BTREE_WRITE_ONLY_IF_NEED = BTREE_WRITE_TYPE_BITS,
+ __BTREE_WRITE_ALREADY_STARTED,
+};
+#define BTREE_WRITE_ONLY_IF_NEED (1U << __BTREE_WRITE_ONLY_IF_NEED )
+#define BTREE_WRITE_ALREADY_STARTED (1U << __BTREE_WRITE_ALREADY_STARTED)
void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
void bch2_btree_node_write(struct bch_fs *, struct btree *,
@@ -219,4 +223,6 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
bn->min_key = bpos_nosnap_successor(bn->min_key);
}
+void bch2_btree_write_stats_to_text(struct printbuf *, struct bch_fs *);
+
#endif /* _BCACHEFS_BTREE_IO_H */
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index ea844dd7a16b..38c4754dbd7e 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -77,6 +77,7 @@ struct btree {
u8 nsets;
u8 nr_key_bits;
u16 version_ondisk;
+ u8 write_type;
struct bkey_format format;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 0150943074fa..e0483abadd72 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1308,6 +1308,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
set_btree_node_dirty_acct(c, b);
set_btree_node_need_write(b);
+ b->write_type = BTREE_WRITE_interior;
printbuf_exit(&buf);
}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index dabe81596544..2e6d220c3bcd 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -282,6 +282,7 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
struct bkey_packed k;
BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
+ EBUG_ON(btree_node_just_written(b));
if (!bkey_pack_pos(&k, pos, b)) {
struct bkey *u = (void *) &k;
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index fc53958e5619..8cc271030be6 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -181,6 +181,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
new |= 1 << BTREE_NODE_need_write;
} while ((v = cmpxchg(&b->flags, old, new)) != old);
+ b->write_type = BTREE_WRITE_journal_reclaim;
+
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 76301209898f..db3d377ba10c 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -183,7 +183,7 @@ read_attribute(io_latency_stats_read);
read_attribute(io_latency_stats_write);
read_attribute(congested);
-read_attribute(btree_avg_write_size);
+read_attribute(btree_write_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
@@ -250,14 +250,6 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
return ret;
}
-static size_t bch2_btree_avg_write_size(struct bch_fs *c)
-{
- u64 nr = atomic64_read(&c->btree_writes_nr);
- u64 sectors = atomic64_read(&c->btree_writes_sectors);
-
- return nr ? div64_u64(sectors, nr) : 0;
-}
-
static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
{
long ret = 0;
@@ -396,7 +388,9 @@ SHOW(bch2_fs)
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
- sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
+
+ if (attr == &sysfs_btree_write_stats)
+ bch2_btree_write_stats_to_text(out, c);
sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
@@ -557,7 +551,7 @@ SYSFS_OPS(bch2_fs);
struct attribute *bch2_fs_files[] = {
&sysfs_minor,
&sysfs_btree_cache_size,
- &sysfs_btree_avg_write_size,
+ &sysfs_btree_write_stats,
&sysfs_promote_whole_extents,