diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-02-10 19:34:47 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:21 -0400 |
commit | 5e82a9a1f4f82e273530b90d107638a5969d1de0 (patch) | |
tree | 6e5c17c14f4f2a2589303b4d80a2bfc193818dd0 | |
parent | fca1223ccfac2a461d7d3e29fb09a1b2142bdd7f (diff) | |
download | lwn-5e82a9a1f4f82e273530b90d107638a5969d1de0.tar.gz lwn-5e82a9a1f4f82e273530b90d107638a5969d1de0.zip |
bcachefs: Write out fs usage consistently
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/bcachefs.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 28 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 24 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 217 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 27 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 12 | ||||
-rw-r--r-- | fs/bcachefs/chardev.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/replicas.c | 128 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 25 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 37 | ||||
-rw-r--r-- | fs/bcachefs/util.h | 8 |
14 files changed, 310 insertions, 231 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 877ce788d413..68e2d3b1a9a6 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -646,11 +646,15 @@ struct bch_fs { struct percpu_rw_semaphore mark_lock; + seqcount_t usage_lock; + struct bch_fs_usage *usage_base; struct bch_fs_usage __percpu *usage[2]; + struct bch_fs_usage __percpu *usage_gc; + u64 __percpu *online_reserved; /* single element mempool: */ struct mutex usage_scratch_lock; - struct bch_fs_usage *usage_scratch; + struct bch_fs_usage_online *usage_scratch; /* * When we invalidate buckets, we use both the priority and the amount diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 3ba0910c2a47..5ad933ba049b 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -490,8 +490,8 @@ static void bch2_gc_free(struct bch_fs *c) ca->usage[1] = NULL; } - free_percpu(c->usage[1]); - c->usage[1] = NULL; + free_percpu(c->usage_gc); + c->usage_gc = NULL; } static int bch2_gc_done(struct bch_fs *c, @@ -587,14 +587,16 @@ static int bch2_gc_done(struct bch_fs *c, } }; + for (i = 0; i < ARRAY_SIZE(c->usage); i++) + bch2_fs_usage_acc_to_base(c, i); + bch2_dev_usage_from_buckets(c); { unsigned nr = fs_usage_u64s(c); - struct bch_fs_usage *dst = (void *) - bch2_acc_percpu_u64s((void *) c->usage[0], nr); + struct bch_fs_usage *dst = c->usage_base; struct bch_fs_usage *src = (void *) - bch2_acc_percpu_u64s((void *) c->usage[1], nr); + bch2_acc_percpu_u64s((void *) c->usage_gc, nr); copy_fs_field(hidden, "hidden"); copy_fs_field(btree, "btree"); @@ -647,11 +649,11 @@ static int bch2_gc_start(struct bch_fs *c, */ gc_pos_set(c, gc_phase(GC_PHASE_START)); - BUG_ON(c->usage[1]); + BUG_ON(c->usage_gc); - c->usage[1] = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), + c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), sizeof(u64), GFP_KERNEL); - if (!c->usage[1]) + if (!c->usage_gc) return -ENOMEM; for_each_member_device(ca, c, i) { @@ -770,11 +772,17 @@ out: ret = -EINVAL; } - percpu_down_write(&c->mark_lock); + if (!ret) { + bch2_journal_block(&c->journal); - if (!ret) + percpu_down_write(&c->mark_lock); ret = bch2_gc_done(c, initial, metadata_only); + bch2_journal_unblock(&c->journal); + } else { + percpu_down_write(&c->mark_lock); + } + /* Indicates that gc is no longer in progress: */ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 35472cf5e9e0..cc0cd465b863 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1066,7 +1066,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; struct btree *old = btree_node_root(c, b); - struct bch_fs_usage *fs_usage; + struct bch_fs_usage_online *fs_usage; __bch2_btree_set_root_inmem(c, b); @@ -1075,7 +1075,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key), - true, 0, fs_usage, 0, 0); + true, 0, &fs_usage->u, 0, 0); if (gc_visited(c, gc_pos_btree_root(b->btree_id))) bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key), true, 0, NULL, 0, @@ -1084,8 +1084,8 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) if (old && !btree_node_fake(old)) bch2_btree_node_free_index(as, NULL, bkey_i_to_s_c(&old->key), - fs_usage); - bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); + &fs_usage->u); + bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, 0); bch2_fs_usage_scratch_put(c, fs_usage); percpu_up_read(&c->mark_lock); @@ -1160,7 +1160,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b struct btree_node_iter *node_iter) { struct bch_fs *c = as->c; - struct bch_fs_usage *fs_usage; + struct bch_fs_usage_online *fs_usage; struct bkey_packed *k; struct bkey tmp; @@ -1171,7 +1171,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(insert), - true, 0, fs_usage, 0, 0); + true, 0, &fs_usage->u, 0, 0); if (gc_visited(c, gc_pos_btree_node(b))) bch2_mark_key_locked(c, bkey_i_to_s_c(insert), @@ -1188,9 +1188,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b if (k && !bkey_cmp_packed(b, k, &insert->k)) bch2_btree_node_free_index(as, b, bkey_disassemble(b, k, &tmp), - fs_usage); + &fs_usage->u); - bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); + bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, 0); bch2_fs_usage_scratch_put(c, fs_usage); percpu_up_read(&c->mark_lock); @@ -1984,7 +1984,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, bkey_copy(&b->key, &new_key->k_i); } } else { - struct bch_fs_usage *fs_usage; + struct bch_fs_usage_online *fs_usage; BUG_ON(btree_node_root(c, b) != b); @@ -1995,7 +1995,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i), - true, 0, fs_usage, 0, 0); + true, 0, &fs_usage->u, 0, 0); if (gc_visited(c, gc_pos_btree_root(b->btree_id))) bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i), true, 0, NULL, 0, @@ -2003,8 +2003,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, bch2_btree_node_free_index(as, NULL, bkey_i_to_s_c(&b->key), - fs_usage); - bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); + &fs_usage->u); + bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, 0); bch2_fs_usage_scratch_put(c, fs_usage); percpu_up_read(&c->mark_lock); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a9d7905f3373..3425ad6f68b2 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -533,7 +533,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, struct btree_insert_entry **stopped_at) { struct bch_fs *c = trans->c; - struct bch_fs_usage *fs_usage = NULL; + struct bch_fs_usage_online *fs_usage = NULL; struct btree_insert_entry *i; struct btree_iter *linked; int ret; @@ -608,7 +608,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) { trans_for_each_update_iter(trans, i) - bch2_mark_update(trans, i, fs_usage, 0); + bch2_mark_update(trans, i, &fs_usage->u, 0); if (fs_usage) bch2_trans_fs_usage_apply(trans, fs_usage); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2488a2227bd9..fb5461df3bbf 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -120,8 +120,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c) unsigned i; percpu_down_write(&c->mark_lock); - usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], - fs_usage_u64s(c)); + usage = c->usage_base; + + for (i = 0; i < ARRAY_SIZE(c->usage); i++) + bch2_fs_usage_acc_to_base(c, i); for (i = 0; i < BCH_REPLICAS_MAX; i++) usage->reserved += usage->persistent_reserved[i]; @@ -146,7 +148,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c) percpu_up_write(&c->mark_lock); } -void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage) +void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage_online *fs_usage) { if (fs_usage == c->usage_scratch) mutex_unlock(&c->usage_scratch_lock); @@ -154,12 +156,12 @@ void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage) kfree(fs_usage); } -struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c) +struct bch_fs_usage_online *bch2_fs_usage_scratch_get(struct bch_fs *c) { - struct bch_fs_usage *ret; - unsigned bytes = fs_usage_u64s(c) * sizeof(u64); - - ret = kzalloc(bytes, GFP_NOWAIT); + struct bch_fs_usage_online *ret; + unsigned bytes = sizeof(struct bch_fs_usage_online) + sizeof(u64) * + READ_ONCE(c->replicas.nr); + ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN); if (ret) return ret; @@ -189,30 +191,117 @@ struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca) return ret; } -struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c) +static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, + unsigned journal_seq, + bool gc) { - struct bch_fs_usage *ret; - unsigned v, u64s = fs_usage_u64s(c); -retry: - ret = kzalloc(u64s * sizeof(u64), GFP_NOFS); - if (unlikely(!ret)) - return NULL; + return this_cpu_ptr(gc + ? c->usage_gc + : c->usage[journal_seq & 1]); +} + +u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v) +{ + ssize_t offset = v - (u64 *) c->usage_base; + unsigned seq; + u64 ret; + + BUG_ON(offset < 0 || offset >= fs_usage_u64s(c)); + percpu_rwsem_assert_held(&c->mark_lock); + + do { + seq = read_seqcount_begin(&c->usage_lock); + ret = *v + + percpu_u64_get((u64 __percpu *) c->usage[0] + offset) + + percpu_u64_get((u64 __percpu *) c->usage[1] + offset); + } while (read_seqcount_retry(&c->usage_lock, seq)); + + return ret; +} + +struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c) +{ + struct bch_fs_usage_online *ret; + unsigned seq, i, u64s; percpu_down_read(&c->mark_lock); - v = fs_usage_u64s(c); - if (unlikely(u64s != v)) { - u64s = v; + ret = kmalloc(sizeof(struct bch_fs_usage_online) + + sizeof(u64) + c->replicas.nr, GFP_NOFS); + if (unlikely(!ret)) { percpu_up_read(&c->mark_lock); - kfree(ret); - goto retry; + return NULL; } - acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s); + ret->online_reserved = percpu_u64_get(c->online_reserved); + + u64s = fs_usage_u64s(c); + do { + seq = read_seqcount_begin(&c->usage_lock); + memcpy(&ret->u, c->usage_base, u64s * sizeof(u64)); + for (i = 0; i < ARRAY_SIZE(c->usage); i++) + acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s); + } while (read_seqcount_retry(&c->usage_lock, seq)); return ret; } +void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx) +{ + unsigned u64s = fs_usage_u64s(c); + + BUG_ON(idx >= ARRAY_SIZE(c->usage)); + + preempt_disable(); + write_seqcount_begin(&c->usage_lock); + + acc_u64s_percpu((u64 *) c->usage_base, + (u64 __percpu *) c->usage[idx], u64s); + percpu_memset(c->usage[idx], 0, u64s * sizeof(u64)); + + write_seqcount_end(&c->usage_lock); + preempt_enable(); +} + +void bch2_fs_usage_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_fs_usage_online *fs_usage) +{ + unsigned i; + + pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity); + + pr_buf(out, "hidden:\t\t\t\t%llu\n", + fs_usage->u.hidden); + pr_buf(out, "data:\t\t\t\t%llu\n", + fs_usage->u.data); + pr_buf(out, "cached:\t\t\t\t%llu\n", + fs_usage->u.cached); + pr_buf(out, "reserved:\t\t\t%llu\n", + fs_usage->u.reserved); + pr_buf(out, "nr_inodes:\t\t\t%llu\n", + fs_usage->u.nr_inodes); + pr_buf(out, "online reserved:\t\t%llu\n", + fs_usage->online_reserved); + + for (i = 0; + i < ARRAY_SIZE(fs_usage->u.persistent_reserved); + i++) { + pr_buf(out, "%u replicas:\n", i + 1); + pr_buf(out, "\treserved:\t\t%llu\n", + fs_usage->u.persistent_reserved[i]); + } + + for (i = 0; i < c->replicas.nr; i++) { + struct bch_replicas_entry *e = + cpu_replicas_entry(&c->replicas, i); + + pr_buf(out, "\t"); + bch2_replicas_entry_to_text(out, e); + pr_buf(out, ":\t%llu\n", fs_usage->u.replicas[i]); + } +} + #define RESERVE_FACTOR 6 static u64 reserve_factor(u64 r) @@ -225,12 +314,12 @@ static u64 avail_factor(u64 r) return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1); } -u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage) +u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage) { - return min(fs_usage->hidden + - fs_usage->btree + - fs_usage->data + - reserve_factor(fs_usage->reserved + + return min(fs_usage->u.hidden + + fs_usage->u.btree + + fs_usage->u.data + + reserve_factor(fs_usage->u.reserved + fs_usage->online_reserved), c->capacity); } @@ -242,17 +331,17 @@ __bch2_fs_usage_read_short(struct bch_fs *c) u64 data, reserved; ret.capacity = c->capacity - - percpu_u64_get(&c->usage[0]->hidden); + bch2_fs_usage_read_one(c, &c->usage_base->hidden); - data = percpu_u64_get(&c->usage[0]->data) + - percpu_u64_get(&c->usage[0]->btree); - reserved = percpu_u64_get(&c->usage[0]->reserved) + - percpu_u64_get(&c->usage[0]->online_reserved); + data = bch2_fs_usage_read_one(c, &c->usage_base->data) + + bch2_fs_usage_read_one(c, &c->usage_base->btree); + reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) + + percpu_u64_get(c->online_reserved); ret.used = min(ret.capacity, data + reserve_factor(reserved)); ret.free = ret.capacity - ret.used; - ret.nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes); + ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes); return ret; } @@ -300,10 +389,12 @@ static bool bucket_became_unavailable(struct bucket_mark old, } int bch2_fs_usage_apply(struct bch_fs *c, - struct bch_fs_usage *fs_usage, - struct disk_reservation *disk_res) + struct bch_fs_usage_online *src, + struct disk_reservation *disk_res, + unsigned journal_seq) { - s64 added = fs_usage->data + fs_usage->reserved; + struct bch_fs_usage *dst = fs_usage_ptr(c, journal_seq, false); + s64 added = src->u.data + src->u.reserved; s64 should_not_have_added; int ret = 0; @@ -315,20 +406,22 @@ int bch2_fs_usage_apply(struct bch_fs *c, */ should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0); if (WARN_ONCE(should_not_have_added > 0, - "disk usage increased without a reservation")) { + "disk usage increased by %lli more than reservation of %llu", + added, disk_res ? disk_res->sectors : 0)) { atomic64_sub(should_not_have_added, &c->sectors_available); added -= should_not_have_added; ret = -1; } if (added > 0) { - disk_res->sectors -= added; - fs_usage->online_reserved -= added; + disk_res->sectors -= added; + src->online_reserved -= added; } + this_cpu_add(*c->online_reserved, src->online_reserved); + preempt_disable(); - acc_u64s((u64 *) this_cpu_ptr(c->usage[0]), - (u64 *) fs_usage, fs_usage_u64s(c)); + acc_u64s((u64 *) dst, (u64 *) &src->u, fs_usage_u64s(c)); preempt_enable(); return ret; @@ -371,10 +464,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, account_bucket(fs_usage, dev_usage, bucket_type(new), 1, ca->mi.bucket_size); - dev_usage->buckets_alloc += - (int) new.owned_by_allocator - (int) old.owned_by_allocator; - dev_usage->buckets_ec += - (int) new.stripe - (int) old.stripe; + dev_usage->buckets_ec += (int) new.stripe - (int) old.stripe; dev_usage->buckets_unavailable += is_unavailable_bucket(new) - is_unavailable_bucket(old); @@ -394,21 +484,12 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c) { struct bch_dev *ca; struct bucket_mark old = { .v.counter = 0 }; - struct bch_fs_usage *fs_usage; struct bucket_array *buckets; struct bucket *g; unsigned i; int cpu; - percpu_u64_set(&c->usage[0]->hidden, 0); - - /* - * This is only called during startup, before there's any multithreaded - * access to c->usage: - */ - preempt_disable(); - fs_usage = this_cpu_ptr(c->usage[0]); - preempt_enable(); + c->usage_base->hidden = 0; for_each_member_device(ca, c, i) { for_each_possible_cpu(cpu) @@ -418,7 +499,7 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c) buckets = bucket_array(ca); for_each_bucket(g, buckets) - bch2_dev_usage_update(c, ca, fs_usage, + bch2_dev_usage_update(c, ca, c->usage_base, old, g->mark, false); } } @@ -483,7 +564,7 @@ static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, struct bucket_mark *ret, bool gc) { - struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]); + struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc); struct bucket *g = __bucket(ca, b, gc); struct bucket_mark old, new; @@ -522,7 +603,7 @@ static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, bool owned_by_allocator, bool gc) { - struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]); + struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc); struct bucket *g = __bucket(ca, b, gc); struct bucket_mark old, new; @@ -634,7 +715,7 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, old.dirty_sectors, sectors); if (c) - bch2_dev_usage_update(c, ca, this_cpu_ptr(c->usage[gc]), + bch2_dev_usage_update(c, ca, fs_usage_ptr(c, 0, gc), old, new, gc); return 0; @@ -974,7 +1055,7 @@ int bch2_mark_key_locked(struct bch_fs *c, preempt_disable(); if (!fs_usage || gc) - fs_usage = this_cpu_ptr(c->usage[gc]); + fs_usage = fs_usage_ptr(c, journal_seq, gc); switch (k.k->type) { case KEY_TYPE_alloc: @@ -1133,7 +1214,7 @@ int bch2_mark_update(struct btree_trans *trans, } void bch2_trans_fs_usage_apply(struct btree_trans *trans, - struct bch_fs_usage *fs_usage) + struct bch_fs_usage_online *fs_usage) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; @@ -1141,7 +1222,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; char buf[200]; - if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) || + if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res, + trans->journal_res.seq) || warned_disk_usage || xchg(&warned_disk_usage, 1)) return; @@ -1182,15 +1264,6 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, /* Disk reservations: */ -void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) -{ - percpu_down_read(&c->mark_lock); - this_cpu_sub(c->usage[0]->online_reserved, res->sectors); - percpu_up_read(&c->mark_lock); - - res->sectors = 0; -} - #define SECTORS_CACHE 1024 int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, @@ -1224,7 +1297,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, out: pcpu->sectors_available -= sectors; - this_cpu_add(c->usage[0]->online_reserved, sectors); + this_cpu_add(*c->online_reserved, sectors); res->sectors += sectors; preempt_enable(); @@ -1241,7 +1314,7 @@ recalculate: (flags & BCH_DISK_RESERVATION_NOFAIL)) { atomic64_set(&c->sectors_available, max_t(s64, 0, sectors_available - sectors)); - this_cpu_add(c->usage[0]->online_reserved, sectors); + this_cpu_add(*c->online_reserved, sectors); res->sectors += sectors; ret = 0; } else { diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index c51192fae503..86431cffb660 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -219,12 +219,19 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c) READ_ONCE(c->replicas.nr); } -void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *); -struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *); +void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage_online *); +struct bch_fs_usage_online *bch2_fs_usage_scratch_get(struct bch_fs *); -struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *); +u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *); -u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *); +struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *); + +void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned); + +void bch2_fs_usage_to_text(struct printbuf *, + struct bch_fs *, struct bch_fs_usage_online *); + +u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *); struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); @@ -251,25 +258,23 @@ int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, int bch2_mark_key(struct bch_fs *, struct bkey_s_c, bool, s64, struct bch_fs_usage *, u64, unsigned); -int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, - struct disk_reservation *); +int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage_online *, + struct disk_reservation *, unsigned); int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *, struct bkey_s_c, struct bkey_i *, struct bch_fs_usage *, unsigned); int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *, struct bch_fs_usage *, unsigned); -void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *); +void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage_online *); /* disk reservations: */ -void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *); - static inline void bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) { - if (res->sectors) - __bch2_disk_reservation_put(c, res); + this_cpu_sub(*c->online_reserved, res->sectors); + res->sectors = 0; } #define BCH_DISK_RESERVATION_NOFAIL (1 << 0) diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index a98493dd2ba8..8e47b273360c 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -52,7 +52,6 @@ struct bucket_array { struct bch_dev_usage { u64 buckets[BCH_DATA_NR]; - u64 buckets_alloc; u64 buckets_ec; u64 buckets_unavailable; @@ -63,12 +62,6 @@ struct bch_dev_usage { struct bch_fs_usage { /* all fields are in units of 512 byte sectors: */ - - u64 online_reserved; - - /* fields after online_reserved are cleared/recalculated by gc: */ - u64 gc_start[0]; - u64 hidden; u64 btree; u64 data; @@ -88,6 +81,11 @@ struct bch_fs_usage { u64 replicas[]; }; +struct bch_fs_usage_online { + u64 online_reserved; + struct bch_fs_usage u; +}; + struct bch_fs_usage_short { u64 capacity; u64 used; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index f7cfec9f00f9..2573376290bb 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -394,7 +394,7 @@ static long bch2_ioctl_usage(struct bch_fs *c, } { - struct bch_fs_usage *src; + struct bch_fs_usage_online *src; struct bch_ioctl_fs_usage dst = { .capacity = c->capacity, }; @@ -410,7 +410,7 @@ static long bch2_ioctl_usage(struct bch_fs *c, for (i = 0; i < BCH_REPLICAS_MAX; i++) { dst.persistent_reserved[i] = - src->persistent_reserved[i]; + src->u.persistent_reserved[i]; #if 0 for (j = 0; j < BCH_DATA_NR; j++) dst.sectors[j][i] = src.replicas[i].data[j]; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d207ff7b98f4..a3f07565efb0 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -404,13 +404,11 @@ static int journal_replay_entry_early(struct bch_fs *c, switch (entry->btree_id) { case FS_USAGE_RESERVED: if (entry->level < BCH_REPLICAS_MAX) - percpu_u64_set(&c->usage[0]-> - persistent_reserved[entry->level], - le64_to_cpu(u->v)); + c->usage_base->persistent_reserved[entry->level] = + le64_to_cpu(u->v); break; case FS_USAGE_INODES: - percpu_u64_set(&c->usage[0]->nr_inodes, - le64_to_cpu(u->v)); + c->usage_base->nr_inodes = le64_to_cpu(u->v); break; case FS_USAGE_KEY_VERSION: atomic64_set(&c->key_version, diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index b1df2c1ce4a4..cf13a628682f 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "buckets.h" #include "journal.h" #include "replicas.h" #include "super-io.h" @@ -235,20 +236,13 @@ bool bch2_replicas_marked(struct bch_fs *c, return marked; } -static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p, +static void __replicas_table_update(struct bch_fs_usage *dst, struct bch_replicas_cpu *dst_r, - struct bch_fs_usage __percpu *src_p, + struct bch_fs_usage *src, struct bch_replicas_cpu *src_r) { - unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr; - struct bch_fs_usage *dst, *src = (void *) - bch2_acc_percpu_u64s((void *) src_p, src_nr); int src_idx, dst_idx; - preempt_disable(); - dst = this_cpu_ptr(dst_p); - preempt_enable(); - *dst = *src; for (src_idx = 0; src_idx < src_r->nr; src_idx++) { @@ -263,42 +257,75 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p, } } +static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p, + struct bch_replicas_cpu *dst_r, + struct bch_fs_usage __percpu *src_p, + struct bch_replicas_cpu *src_r) +{ + unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr; + struct bch_fs_usage *dst, *src = (void *) + bch2_acc_percpu_u64s((void *) src_p, src_nr); + + preempt_disable(); + dst = this_cpu_ptr(dst_p); + preempt_enable(); + + __replicas_table_update(dst, dst_r, src, src_r); +} + /* * Resize filesystem accounting: */ static int replicas_table_update(struct bch_fs *c, struct bch_replicas_cpu *new_r) { - struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL }; - struct bch_fs_usage *new_scratch = NULL; - unsigned bytes = sizeof(struct bch_fs_usage) + + struct bch_fs_usage __percpu *new_usage[2]; + struct bch_fs_usage_online *new_scratch = NULL; + struct bch_fs_usage __percpu *new_gc = NULL; + struct bch_fs_usage *new_base = NULL; + unsigned i, bytes = sizeof(struct bch_fs_usage) + + sizeof(u64) * new_r->nr; + unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) + sizeof(u64) * new_r->nr; int ret = -ENOMEM; - if (!(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64), - GFP_NOIO)) || - (c->usage[1] && - !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64), - GFP_NOIO))) || - !(new_scratch = kmalloc(bytes, GFP_NOIO))) - goto err; + memset(new_usage, 0, sizeof(new_usage)); - if (c->usage[0]) - __replicas_table_update(new_usage[0], new_r, - c->usage[0], &c->replicas); - if (c->usage[1]) - __replicas_table_update(new_usage[1], new_r, - c->usage[1], &c->replicas); + for (i = 0; i < ARRAY_SIZE(new_usage); i++) + if (!(new_usage[i] = __alloc_percpu_gfp(bytes, + sizeof(u64), GFP_NOIO))) + goto err; - swap(c->usage[0], new_usage[0]); - swap(c->usage[1], new_usage[1]); + if (!(new_base = kzalloc(bytes, GFP_NOIO)) || + !(new_scratch = kmalloc(scratch_bytes, GFP_NOIO)) || + (c->usage_gc && + !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) + goto err; + + for (i = 0; i < ARRAY_SIZE(new_usage); i++) + if (c->usage[i]) + __replicas_table_update_pcpu(new_usage[i], new_r, + c->usage[i], &c->replicas); + if (c->usage_base) + __replicas_table_update(new_base, new_r, + c->usage_base, &c->replicas); + if (c->usage_gc) + __replicas_table_update_pcpu(new_gc, new_r, + c->usage_gc, &c->replicas); + + for (i = 0; i < ARRAY_SIZE(new_usage); i++) + swap(c->usage[i], new_usage[i]); + swap(c->usage_base, new_base); swap(c->usage_scratch, new_scratch); + swap(c->usage_gc, new_gc); swap(c->replicas, *new_r); ret = 0; err: + free_percpu(new_gc); kfree(new_scratch); free_percpu(new_usage[1]); free_percpu(new_usage[0]); + kfree(new_base); return ret; } @@ -457,9 +484,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) lockdep_assert_held(&c->replicas_gc_lock); mutex_lock(&c->sb_lock); - - if (ret) - goto err; + percpu_down_write(&c->mark_lock); /* * this is kind of crappy; the replicas gc mechanism needs to be ripped @@ -470,26 +495,20 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); struct bch_replicas_cpu n; - u64 v; - if (__replicas_has_entry(&c->replicas_gc, e)) - continue; - - v = percpu_u64_get(&c->usage[0]->replicas[i]); - if (!v) - continue; + if (!__replicas_has_entry(&c->replicas_gc, e) && + (c->usage_base->replicas[i] || + percpu_u64_get(&c->usage[0]->replicas[i]) || + percpu_u64_get(&c->usage[1]->replicas[i]))) { + n = cpu_replicas_add_entry(&c->replicas_gc, e); + if (!n.entries) { + ret = -ENOSPC; + goto err; + } - n = cpu_replicas_add_entry(&c->replicas_gc, e); - if (!n.entries) { - ret = -ENOSPC; - goto err; + swap(n, c->replicas_gc); + kfree(n.entries); } - - percpu_down_write(&c->mark_lock); - swap(n, c->replicas_gc); - percpu_up_write(&c->mark_lock); - - kfree(n.entries); } if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) { @@ -497,19 +516,18 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) goto err; } - bch2_write_super(c); - - /* don't update in memory replicas until changes are persistent */ + ret = replicas_table_update(c, &c->replicas_gc); err: - percpu_down_write(&c->mark_lock); - if (!ret) - ret = replicas_table_update(c, &c->replicas_gc); - kfree(c->replicas_gc.entries); c->replicas_gc.entries = NULL; + percpu_up_write(&c->mark_lock); + if (!ret) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + return ret; } @@ -576,7 +594,7 @@ int bch2_replicas_set_usage(struct bch_fs *c, BUG_ON(ret < 0); } - percpu_u64_set(&c->usage[0]->replicas[idx], sectors); + c->usage_base->replicas[idx] = sectors; return 0; } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 0fe8ea22c6a1..6e69a4f74ca0 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "buckets.h" #include "checksum.h" #include "disk_groups.h" #include "ec.h" @@ -978,13 +979,16 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, mutex_unlock(&c->btree_root_lock); - if (journal_seq) - return entry; + percpu_down_read(&c->mark_lock); - percpu_down_write(&c->mark_lock); + if (!journal_seq) { + for (i = 0; i < ARRAY_SIZE(c->usage); i++) + bch2_fs_usage_acc_to_base(c, i); + } else { + bch2_fs_usage_acc_to_base(c, journal_seq & 1); + } { - u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes); struct jset_entry_usage *u = container_of(entry, struct jset_entry_usage, entry); @@ -992,7 +996,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1; u->entry.type = BCH_JSET_ENTRY_usage; u->entry.btree_id = FS_USAGE_INODES; - u->v = cpu_to_le64(nr_inodes); + u->v = cpu_to_le64(c->usage_base->nr_inodes); entry = vstruct_next(entry); } @@ -1013,17 +1017,13 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, for (i = 0; i < BCH_REPLICAS_MAX; i++) { struct jset_entry_usage *u = container_of(entry, struct jset_entry_usage, entry); - u64 sectors = percpu_u64_get(&c->usage[0]->persistent_reserved[i]); - - if (!sectors) - continue; memset(u, 0, sizeof(*u)); u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1; u->entry.type = BCH_JSET_ENTRY_usage; u->entry.btree_id = FS_USAGE_RESERVED; u->entry.level = i; - u->v = sectors; + u->v = cpu_to_le64(c->usage_base->persistent_reserved[i]); entry = vstruct_next(entry); } @@ -1031,7 +1031,6 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, for (i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); - u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]); struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); @@ -1039,14 +1038,14 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, u->entry.u64s = DIV_ROUND_UP(sizeof(*u) + e->nr_devs, sizeof(u64)) - 1; u->entry.type = BCH_JSET_ENTRY_data_usage; - u->v = cpu_to_le64(sectors); + u->v = cpu_to_le64(c->usage_base->replicas[i]); unsafe_memcpy(&u->r, e, replicas_entry_bytes(e), "embedded variable length struct"); entry = vstruct_next(entry); } - percpu_up_write(&c->mark_lock); + percpu_up_read(&c->mark_lock); return entry; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index b954a4e47e15..959638c986a0 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -464,8 +464,11 @@ static void bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); percpu_free_rwsem(&c->mark_lock); + free_percpu(c->online_reserved); kfree(c->usage_scratch); + free_percpu(c->usage[1]); free_percpu(c->usage[0]); + kfree(c->usage_base); free_percpu(c->pcpu); mempool_exit(&c->btree_iters_pool); mempool_exit(&c->btree_bounce_pool); @@ -658,6 +661,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) seqcount_init(&c->gc_pos_lock); + seqcount_init(&c->usage_lock); + c->copy_gc_enabled = 1; c->rebalance.enabled = 1; c->promote_whole_extents = true; @@ -721,6 +726,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) offsetof(struct btree_write_bio, wbio.bio)), BIOSET_NEED_BVECS) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || + !(c->online_reserved = alloc_percpu(u64)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || mempool_init_kmalloc_pool(&c->btree_iters_pool, 1, @@ -1433,13 +1439,8 @@ err: static void dev_usage_clear(struct bch_dev *ca) { struct bucket_array *buckets; - int cpu; - for_each_possible_cpu(cpu) { - struct bch_dev_usage *p = - per_cpu_ptr(ca->usage[0], cpu); - memset(p, 0, sizeof(*p)); - } + percpu_memset(ca->usage[0], 0, sizeof(*ca->usage[0])); down_read(&ca->bucket_lock); buckets = bucket_array(ca); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index ee4c0764d4ad..3139161fbe88 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -235,43 +235,12 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) { struct printbuf out = _PBUF(buf, PAGE_SIZE); - struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c); - unsigned i; + struct bch_fs_usage_online *fs_usage = bch2_fs_usage_read(c); if (!fs_usage) return -ENOMEM; - pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity); - - pr_buf(&out, "hidden:\t\t\t\t%llu\n", - fs_usage->hidden); - pr_buf(&out, "data:\t\t\t\t%llu\n", - fs_usage->data); - pr_buf(&out, "cached:\t\t\t\t%llu\n", - fs_usage->cached); - pr_buf(&out, "reserved:\t\t\t%llu\n", - fs_usage->reserved); - pr_buf(&out, "nr_inodes:\t\t\t%llu\n", - fs_usage->nr_inodes); - pr_buf(&out, "online reserved:\t\t%llu\n", - fs_usage->online_reserved); - - for (i = 0; - i < ARRAY_SIZE(fs_usage->persistent_reserved); - i++) { - pr_buf(&out, "%u replicas:\n", i + 1); - pr_buf(&out, "\treserved:\t\t%llu\n", - fs_usage->persistent_reserved[i]); - } - - for (i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry *e = - cpu_replicas_entry(&c->replicas, i); - - pr_buf(&out, "\t"); - bch2_replicas_entry_to_text(&out, e); - pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]); - } + bch2_fs_usage_to_text(&out, c, fs_usage); percpu_up_read(&c->mark_lock); @@ -840,7 +809,6 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf) "free[RESERVE_NONE]: %zu/%zu\n" "buckets:\n" " capacity: %llu\n" - " alloc: %llu\n" " sb: %llu\n" " journal: %llu\n" " meta: %llu\n" @@ -867,7 +835,6 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf) fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size, fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size, ca->mi.nbuckets - ca->mi.first_bucket, - stats.buckets_alloc, stats.buckets[BCH_DATA_SB], stats.buckets[BCH_DATA_JOURNAL], stats.buckets[BCH_DATA_BTREE], diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 59c8a1dac7be..c0910f230caf 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -741,6 +741,14 @@ static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src, acc_u64s(acc, per_cpu_ptr(src, cpu), nr); } +static inline void percpu_memset(void __percpu *p, int c, size_t bytes) +{ + int cpu; + + for_each_possible_cpu(cpu) + memset(per_cpu_ptr(p, cpu), c, bytes); +} + u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned); #define cmp_int(l, r) ((l > r) - (l < r)) |