summaryrefslogtreecommitdiff
path: root/fs/bcachefs/buckets.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-07-24 14:54:39 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:08 -0400
commit5b650fd11a00271b9d4c033d1d0780826e050137 (patch)
tree4494f3164166d229041626880f99e6057836381b /fs/bcachefs/buckets.c
parent09f3297ac90aae99d8f7e776c8df5dd0d32c1ba9 (diff)
downloadlwn-5b650fd11a00271b9d4c033d1d0780826e050137.tar.gz
lwn-5b650fd11a00271b9d4c033d1d0780826e050137.zip
bcachefs: Account for internal fragmentation better
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/buckets.c')
-rw-r--r--fs/bcachefs/buckets.c115
1 files changed, 67 insertions, 48 deletions
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index c0dc0ce1f585..56b197bff4f0 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -73,6 +73,8 @@
#include <linux/preempt.h>
+static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
+
#ifdef DEBUG_BUCKETS
#define lg_local_lock lg_global_lock
@@ -84,18 +86,24 @@ static void bch2_fs_stats_verify(struct bch_fs *c)
__bch2_fs_usage_read(c);
unsigned i, j;
- for (i = 0; i < ARRAY_SIZE(stats.s); i++) {
- for (j = 0; j < ARRAY_SIZE(stats.s[i].data); j++)
- if ((s64) stats.s[i].data[j] < 0)
- panic("replicas %u %s underflow: %lli\n",
+ for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
+ for (j = 0; j < ARRAY_SIZE(stats.replicas[i].data); j++)
+ if ((s64) stats.replicas[i].data[j] < 0)
+ panic("replicas %u %s sectors underflow: %lli\n",
i + 1, bch_data_types[j],
- stats.s[i].data[j]);
+ stats.replicas[i].data[j]);
- if ((s64) stats.s[i].persistent_reserved < 0)
+ if ((s64) stats.replicas[i].persistent_reserved < 0)
panic("replicas %u reserved underflow: %lli\n",
- i + 1, stats.s[i].persistent_reserved);
+ i + 1, stats.replicas[i].persistent_reserved);
}
+ for (j = 0; j < ARRAY_SIZE(stats.buckets); j++)
+ if ((s64) stats.replicas[i].data_buckets[j] < 0)
+ panic("%s buckets underflow: %lli\n",
+ bch_data_types[j],
+ stats.buckets[j]);
+
if ((s64) stats.online_reserved < 0)
panic("sectors_online_reserved underflow: %lli\n",
stats.online_reserved);
@@ -238,6 +246,7 @@ bch2_fs_usage_read(struct bch_fs *c)
}
struct fs_usage_sum {
+ u64 hidden;
u64 data;
u64 reserved;
};
@@ -247,14 +256,21 @@ static inline struct fs_usage_sum __fs_usage_sum(struct bch_fs_usage stats)
struct fs_usage_sum sum = { 0 };
unsigned i, j;
- for (i = 0; i < ARRAY_SIZE(stats.s); i++) {
- u64 a = 0;
+ /*
+ * For superblock and journal we count bucket usage, not sector usage,
+ * because any internal fragmentation should _not_ be counted as
+ * free space:
+ */
+ for (j = 1; j < BCH_DATA_BTREE; j++)
+ sum.hidden += stats.buckets[j];
- for (j = 0; j < ARRAY_SIZE(stats.s[i].data); j++)
- a += stats.s[i].data[j];
+ for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
+ for (j = BCH_DATA_BTREE;
+ j < ARRAY_SIZE(stats.replicas[i].data);
+ j++)
+ sum.data += stats.replicas[i].data[j] * (i + 1);
- sum.data += a * (i + 1);
- sum.reserved += stats.s[i].persistent_reserved * (i + 1);
+ sum.reserved += stats.replicas[i].persistent_reserved * (i + 1);
}
sum.reserved += stats.online_reserved;
@@ -270,14 +286,14 @@ static u64 reserve_factor(u64 r)
static u64 avail_factor(u64 r)
{
- return (r << RESERVE_FACTOR) / (1 << RESERVE_FACTOR) + 1;
+ return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
}
-u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
+static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
{
struct fs_usage_sum sum = __fs_usage_sum(stats);
- return sum.data + reserve_factor(sum.reserved);
+ return sum.hidden + sum.data + reserve_factor(sum.reserved);
}
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
@@ -285,9 +301,9 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
return min(c->capacity, __bch2_fs_sectors_used(c, stats));
}
-u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
+static u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
{
- return avail_factor(c->capacity - bch2_fs_sectors_used(c, stats));
+ return c->capacity - bch2_fs_sectors_used(c, stats);
}
static inline int is_unavailable_bucket(struct bucket_mark m)
@@ -323,9 +339,9 @@ static bool bucket_became_unavailable(struct bch_fs *c,
}
void bch2_fs_usage_apply(struct bch_fs *c,
- struct bch_fs_usage *stats,
- struct disk_reservation *disk_res,
- struct gc_pos gc_pos)
+ struct bch_fs_usage *stats,
+ struct disk_reservation *disk_res,
+ struct gc_pos gc_pos)
{
struct fs_usage_sum sum = __fs_usage_sum(*stats);
s64 added = sum.data + sum.reserved;
@@ -358,6 +374,7 @@ void bch2_fs_usage_apply(struct bch_fs *c,
}
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
+ struct bch_fs_usage *stats,
struct bucket_mark old, struct bucket_mark new)
{
struct bch_dev_usage *dev_usage;
@@ -374,6 +391,9 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_data_types[new.data_type]);
}
+ stats->buckets[bucket_type(old)] -= ca->mi.bucket_size;
+ stats->buckets[bucket_type(new)] += ca->mi.bucket_size;
+
preempt_disable();
dev_usage = this_cpu_ptr(ca->usage_percpu);
@@ -399,17 +419,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_dev_stats_verify(ca);
}
-#define bucket_data_cmpxchg(c, ca, g, new, expr) \
+#define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \
({ \
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
\
- bch2_dev_usage_update(c, ca, _old, new); \
+ bch2_dev_usage_update(c, ca, stats, _old, new); \
_old; \
})
void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, struct bucket_mark *old)
{
+ struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu);
struct bucket *g;
struct bucket_mark new;
@@ -417,7 +438,7 @@ void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
g = bucket(ca, b);
- *old = bucket_data_cmpxchg(c, ca, g, new, ({
+ *old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
BUG_ON(!is_available_bucket(new));
new.owned_by_allocator = 1;
@@ -436,6 +457,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, bool owned_by_allocator,
struct gc_pos pos, unsigned flags)
{
+ struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu);
struct bucket *g;
struct bucket_mark old, new;
@@ -446,7 +468,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
gc_will_visit(c, pos))
return;
- old = bucket_data_cmpxchg(c, ca, g, new, ({
+ old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
new.owned_by_allocator = owned_by_allocator;
}));
@@ -466,10 +488,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
unsigned sectors, struct gc_pos pos,
unsigned flags)
{
+ struct bch_fs_usage *stats;
struct bucket *g;
struct bucket_mark old, new;
- BUG_ON(!type);
+ BUG_ON(type != BCH_DATA_SB &&
+ type != BCH_DATA_JOURNAL);
if (likely(c)) {
percpu_rwsem_assert_held(&c->usage_lock);
@@ -479,16 +503,17 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
return;
}
- rcu_read_lock();
+ preempt_disable();
+ stats = this_cpu_ptr(c->usage_percpu);
g = bucket(ca, b);
- old = bucket_data_cmpxchg(c, ca, g, new, ({
+ old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
new.data_type = type;
checked_add(new.dirty_sectors, sectors);
- new.dirty_sectors += sectors;
}));
- rcu_read_unlock();
+ stats->replicas[0].data[type] += sectors;
+ preempt_enable();
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
bucket_became_unavailable(c, old, new));
@@ -589,7 +614,7 @@ static void bch2_mark_pointer(struct bch_fs *c,
old.v.counter,
new.v.counter)) != old.v.counter);
- bch2_dev_usage_update(c, ca, old, new);
+ bch2_dev_usage_update(c, ca, stats, old, new);
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
bucket_became_unavailable(c, old, new));
@@ -601,6 +626,10 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags)
{
+ unsigned replicas = bch2_extent_nr_dirty_ptrs(k);
+
+ BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas));
+
/*
* synchronization w.r.t. GC:
*
@@ -643,32 +672,22 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
- unsigned replicas = 0;
BUG_ON(!sectors);
- extent_for_each_ptr_crc(e, ptr, crc) {
+ extent_for_each_ptr_crc(e, ptr, crc)
bch2_mark_pointer(c, e, ptr, crc, sectors, data_type,
stats, journal_seq, flags);
- replicas += !ptr->cached;
- }
- if (replicas) {
- BUG_ON(replicas - 1 > ARRAY_SIZE(stats->s));
- stats->s[replicas - 1].data[data_type] += sectors;
- }
+ if (replicas)
+ stats->replicas[replicas - 1].data[data_type] += sectors;
break;
}
- case BCH_RESERVATION: {
- struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-
- if (r.v->nr_replicas) {
- BUG_ON(r.v->nr_replicas - 1 > ARRAY_SIZE(stats->s));
- stats->s[r.v->nr_replicas - 1].persistent_reserved += sectors;
- }
+ case BCH_RESERVATION:
+ if (replicas)
+ stats->replicas[replicas - 1].persistent_reserved += sectors;
break;
}
- }
percpu_up_read(&c->usage_lock);
}
@@ -681,7 +700,7 @@ static u64 __recalc_sectors_available(struct bch_fs *c)
for_each_possible_cpu(cpu)
per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0;
- return bch2_fs_sectors_free(c, bch2_fs_usage_read(c));
+ return avail_factor(bch2_fs_sectors_free(c, bch2_fs_usage_read(c)));
}
/* Used by gc when it's starting: */