From 61274e9d45547e741cfafc80fb78a81275c8394a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Nov 2018 23:20:21 -0500 Subject: bcachefs: Allocator startup improvements Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 83 +++++++++++++++++++++++++----------------- fs/bcachefs/alloc_background.h | 2 +- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/buckets.c | 12 ++++++ fs/bcachefs/journal_io.c | 9 ++--- 5 files changed, 68 insertions(+), 39 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index b49d0cd84b78..c17fba1eae96 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -237,6 +237,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, __BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key; struct bucket *g; struct bkey_i_alloc *a; + int ret; u8 *d; percpu_down_read(&c->usage_lock); @@ -260,32 +261,50 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, bch2_btree_iter_set_pos(iter, a->k.p); - return bch2_btree_insert_at(c, NULL, journal_seq, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE| - flags, - BTREE_INSERT_ENTRY(iter, &a->k_i)); + ret = bch2_btree_insert_at(c, NULL, journal_seq, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_USE_RESERVE| + BTREE_INSERT_USE_ALLOC_RESERVE| + flags, + BTREE_INSERT_ENTRY(iter, &a->k_i)); + + if (!ret && ca->buckets_written) + set_bit(b, ca->buckets_written); + + return ret; } -int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos) +int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) { struct bch_dev *ca; struct btree_iter iter; int ret; - if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode]) + if (k->k.p.inode >= c->sb.nr_devices || + !c->devs[k->k.p.inode]) return 0; - ca = bch_dev_bkey_exists(c, pos.inode); + ca = bch_dev_bkey_exists(c, k->k.p.inode); - if (pos.offset >= ca->mi.nbuckets) + if (k->k.p.offset >= ca->mi.nbuckets) return 0; - bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, k->k.p, + BTREE_ITER_INTENT); - ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, NULL, 0); + ret = bch2_btree_iter_traverse(&iter); + if (ret) + goto err; + + /* check buckets_written with btree node locked: */ + + ret = test_bit(k->k.p.offset, ca->buckets_written) + ? 0 + : bch2_btree_insert_at(c, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_JOURNAL_REPLAY, + BTREE_INSERT_ENTRY(&iter, k)); +err: bch2_btree_iter_unlock(&iter); return ret; } @@ -1284,51 +1303,49 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) /* Scan for buckets that are already invalidated: */ for_each_rw_member(ca, c, dev_iter) { - struct btree_iter iter; + struct bucket_array *buckets; struct bucket_mark m; - struct bkey_s_c k; - for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), 0, k) { - if (k.k->type != BCH_ALLOC) - continue; + down_read(&ca->bucket_lock); + percpu_down_read(&c->usage_lock); + + buckets = bucket_array(ca); - bu = k.k->p.offset; - m = READ_ONCE(bucket(ca, bu)->mark); + for (bu = buckets->first_bucket; + bu < buckets->nbuckets; bu++) { + m = READ_ONCE(buckets->b[bu].mark); - if (!is_available_bucket(m) || m.cached_sectors) + if (!m.gen_valid || + !is_available_bucket(m) || + m.cached_sectors) continue; - percpu_down_read(&c->usage_lock); bch2_mark_alloc_bucket(c, ca, bu, true, gc_pos_alloc(c, NULL), BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| BCH_BUCKET_MARK_GC_LOCK_HELD); - percpu_up_read(&c->usage_lock); fifo_push(&ca->free_inc, bu); - if (fifo_full(&ca->free_inc)) + discard_invalidated_buckets(c, ca); + + if (fifo_full(&ca->free[RESERVE_BTREE])) break; } - bch2_btree_iter_unlock(&iter); + percpu_up_read(&c->usage_lock); + up_read(&ca->bucket_lock); } /* did we find enough buckets? */ for_each_rw_member(ca, c, dev_iter) - if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) { + if (!fifo_full(&ca->free[RESERVE_BTREE])) { percpu_ref_put(&ca->io_ref); goto not_enough; } return 0; not_enough: - pr_debug("did not find enough empty buckets; issuing discards"); - - /* clear out free_inc, we'll be using it again below: */ - for_each_rw_member(ca, c, dev_iter) - discard_invalidated_buckets(c, ca); - - pr_debug("scanning for reclaimable buckets"); + pr_debug("not enough empty buckets; scanning for reclaimable buckets"); for_each_rw_member(ca, c, dev_iter) { find_reclaimable_buckets(c, ca); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 99535fa60214..59b6a5f2f890 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -17,7 +17,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); } int bch2_alloc_read(struct bch_fs *, struct list_head *); -int bch2_alloc_replay_key(struct bch_fs *, struct bpos); +int bch2_alloc_replay_key(struct bch_fs *, struct bkey_i *); static inline void bch2_wake_allocator(struct bch_dev *ca) { diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index b33fbf709705..cdea3a1d9176 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -394,6 +394,7 @@ struct bch_dev { */ struct bucket_array __rcu *buckets; unsigned long *buckets_dirty; + unsigned long *buckets_written; /* most out of date gen in the btree */ u8 *oldest_gens; struct rw_semaphore bucket_lock; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 9558129e77ba..201798866242 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1096,6 +1096,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { struct bucket_array *buckets = NULL, *old_buckets = NULL; unsigned long *buckets_dirty = NULL; + unsigned long *buckets_written = NULL; u8 *oldest_gens = NULL; alloc_fifo free[RESERVE_NR]; alloc_fifo free_inc; @@ -1127,6 +1128,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) !(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) * sizeof(unsigned long), GFP_KERNEL|__GFP_ZERO)) || + !(buckets_written = kvpmalloc(BITS_TO_LONGS(nbuckets) * + sizeof(unsigned long), + GFP_KERNEL|__GFP_ZERO)) || !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) || !init_fifo(&free[RESERVE_MOVINGGC], copygc_reserve, GFP_KERNEL) || @@ -1161,6 +1165,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) memcpy(buckets_dirty, ca->buckets_dirty, BITS_TO_LONGS(n) * sizeof(unsigned long)); + memcpy(buckets_written, + ca->buckets_written, + BITS_TO_LONGS(n) * sizeof(unsigned long)); } rcu_assign_pointer(ca->buckets, buckets); @@ -1168,6 +1175,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) swap(ca->oldest_gens, oldest_gens); swap(ca->buckets_dirty, buckets_dirty); + swap(ca->buckets_written, buckets_written); if (resize) percpu_up_write(&c->usage_lock); @@ -1207,6 +1215,8 @@ err: free_fifo(&free[i]); kvpfree(buckets_dirty, BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); + kvpfree(buckets_written, + BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); kvpfree(oldest_gens, nbuckets * sizeof(u8)); if (buckets) @@ -1224,6 +1234,8 @@ void bch2_dev_buckets_free(struct bch_dev *ca) free_fifo(&ca->free_inc); for (i = 0; i < RESERVE_NR; i++) free_fifo(&ca->free[i]); + kvpfree(ca->buckets_written, + BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); kvpfree(ca->buckets_dirty, BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8)); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 60fc2eced71a..a74566764630 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -780,7 +780,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) int ret = 0; list_for_each_entry_safe(i, n, list, list) { - j->replay_journal_seq = le64_to_cpu(i->j.seq); for_each_jset_key(k, _n, entry, &i->j) { @@ -790,7 +789,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) * allocation code handles replay for * BTREE_ID_ALLOC keys: */ - ret = bch2_alloc_replay_key(c, k->k.p); + ret = bch2_alloc_replay_key(c, k); } else { /* * We might cause compressed extents to be @@ -801,9 +800,9 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) bch2_disk_reservation_init(c, 0); ret = bch2_btree_insert(c, entry->btree_id, k, - &disk_res, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_REPLAY); + &disk_res, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_JOURNAL_REPLAY); } if (ret) { -- cgit v1.2.3