diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-01-13 16:02:22 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:14 -0400 |
commit | d0cc3defba58889e38eaa0c275d4728b4ac3b8c2 (patch) | |
tree | 7d1eb757681cb09dae9960d4970ec2178a5ba186 | |
parent | b8adb833652909221efde19b1813627382b5bf51 (diff) | |
download | lwn-d0cc3defba58889e38eaa0c275d4728b4ac3b8c2.tar.gz lwn-d0cc3defba58889e38eaa0c275d4728b4ac3b8c2.zip |
bcachefs: More allocator startup improvements
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/alloc_background.c | 131 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 12 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.h | 53 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_locking.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/util.c | 3 |
10 files changed, 120 insertions, 102 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 9c9464efd333..871a41b923da 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -347,12 +347,14 @@ err: return ret; } -int bch2_alloc_write(struct bch_fs *c) +int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote) { struct bch_dev *ca; unsigned i; int ret = 0; + *wrote = false; + for_each_rw_member(ca, c, i) { struct btree_iter iter; struct bucket_array *buckets; @@ -370,9 +372,14 @@ int bch2_alloc_write(struct bch_fs *c) if (!buckets->b[b].mark.dirty) continue; - ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL, 0); + ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL, + nowait + ? BTREE_INSERT_NOWAIT + : 0); if (ret) break; + + *wrote = true; } up_read(&ca->bucket_lock); bch2_btree_iter_unlock(&iter); @@ -1270,20 +1277,23 @@ static void flush_held_btree_writes(struct bch_fs *c) struct bucket_table *tbl; struct rhash_head *pos; struct btree *b; - bool flush_updates; - size_t i, nr_pending_updates; + bool nodes_blocked; + size_t i; + struct closure cl; + + closure_init_stack(&cl); clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); again: pr_debug("flushing dirty btree nodes"); cond_resched(); + closure_wait(&c->btree_interior_update_wait, &cl); - flush_updates = false; - nr_pending_updates = bch2_btree_interior_updates_nr_pending(c); + nodes_blocked = false; rcu_read_lock(); for_each_cached_btree(b, c, tbl, i, pos) - if (btree_node_dirty(b) && (!b->written || b->level)) { + if (btree_node_need_write(b)) { if (btree_node_may_write(b)) { rcu_read_unlock(); btree_node_lock_type(c, b, SIX_LOCK_read); @@ -1291,7 +1301,7 @@ again: six_unlock_read(&b->lock); goto again; } else { - flush_updates = true; + nodes_blocked = true; } } rcu_read_unlock(); @@ -1299,17 +1309,16 @@ again: if (c->btree_roots_dirty) bch2_journal_meta(&c->journal); - /* - * This is ugly, but it's needed to flush btree node writes - * without spinning... - */ - if (flush_updates) { - closure_wait_event(&c->btree_interior_update_wait, - bch2_btree_interior_updates_nr_pending(c) < - nr_pending_updates); + if (nodes_blocked) { + closure_sync(&cl); goto again; } + closure_wake_up(&c->btree_interior_update_wait); + closure_sync(&cl); + + closure_wait_event(&c->btree_interior_update_wait, + !bch2_btree_interior_updates_nr_pending(c)); } static void allocator_start_issue_discards(struct bch_fs *c) @@ -1331,13 +1340,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) unsigned dev_iter; u64 journal_seq = 0; long bu; - bool invalidating_data = false; int ret = 0; - if (test_alloc_startup(c)) { - invalidating_data = true; + if (test_alloc_startup(c)) goto not_enough; - } /* Scan for buckets that are already invalidated: */ for_each_rw_member(ca, c, dev_iter) { @@ -1384,21 +1390,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) not_enough: pr_debug("not enough empty buckets; scanning for reclaimable buckets"); - for_each_rw_member(ca, c, dev_iter) { - find_reclaimable_buckets(c, ca); - - while (!fifo_full(&ca->free[RESERVE_BTREE]) && - (bu = next_alloc_bucket(ca)) >= 0) { - invalidating_data |= - bch2_invalidate_one_bucket(c, ca, bu, &journal_seq); - - fifo_push(&ca->free[RESERVE_BTREE], bu); - bucket_set_dirty(ca, bu); - } - } - - pr_debug("done scanning for reclaimable buckets"); - /* * We're moving buckets to freelists _before_ they've been marked as * invalidated on disk - we have to so that we can allocate new btree @@ -1408,38 +1399,59 @@ not_enough: * have cached data in them, which is live until they're marked as * invalidated on disk: */ - if (invalidating_data) { - pr_debug("invalidating existing data"); - set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); - } else { - pr_debug("issuing discards"); - allocator_start_issue_discards(c); - } + set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags); - /* - * XXX: it's possible for this to deadlock waiting on journal reclaim, - * since we're holding btree writes. What then? - */ - ret = bch2_alloc_write(c); - if (ret) - return ret; + while (1) { + bool wrote = false; - if (invalidating_data) { - pr_debug("flushing journal"); + for_each_rw_member(ca, c, dev_iter) { + find_reclaimable_buckets(c, ca); - ret = bch2_journal_flush_seq(&c->journal, journal_seq); - if (ret) - return ret; + while (!fifo_full(&ca->free[RESERVE_BTREE]) && + (bu = next_alloc_bucket(ca)) >= 0) { + bch2_invalidate_one_bucket(c, ca, bu, + &journal_seq); + + fifo_push(&ca->free[RESERVE_BTREE], bu); + bucket_set_dirty(ca, bu); + } + } + + pr_debug("done scanning for reclaimable buckets"); + + /* + * XXX: it's possible for this to deadlock waiting on journal reclaim, + * since we're holding btree writes. What then? + */ + ret = bch2_alloc_write(c, true, &wrote); - pr_debug("issuing discards"); - allocator_start_issue_discards(c); + /* + * If bch2_alloc_write() did anything, it may have used some + * buckets, and we need the RESERVE_BTREE freelist full - so we + * need to loop and scan again. + * And if it errored, it may have been because there weren't + * enough buckets, so just scan and loop again as long as it + * made some progress: + */ + if (!wrote && ret) + return ret; + if (!wrote && !ret) + break; } + pr_debug("flushing journal"); + + ret = bch2_journal_flush(&c->journal); + if (ret) + return ret; + + pr_debug("issuing discards"); + allocator_start_issue_discards(c); + set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags); /* now flush dirty btree nodes: */ - if (invalidating_data) - flush_held_btree_writes(c); + flush_held_btree_writes(c); return 0; } @@ -1448,6 +1460,7 @@ int bch2_fs_allocator_start(struct bch_fs *c) { struct bch_dev *ca; unsigned i; + bool wrote; int ret; down_read(&c->gc_lock); @@ -1465,7 +1478,7 @@ int bch2_fs_allocator_start(struct bch_fs *c) } } - return bch2_alloc_write(c); + return bch2_alloc_write(c, false, &wrote); } void bch2_fs_allocator_background_init(struct bch_fs *c) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 8ced4e845281..ef5ec659b05d 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -55,7 +55,7 @@ void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_stop(struct bch_dev *); int bch2_dev_allocator_start(struct bch_dev *); -int bch2_alloc_write(struct bch_fs *); +int bch2_alloc_write(struct bch_fs *, bool, bool *); int bch2_fs_allocator_start(struct bch_fs *); void bch2_fs_allocator_background_init(struct bch_fs *); diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index b748afc778f4..65fc82fba071 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -171,6 +171,10 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) if (!btree_node_may_write(b)) goto out_unlock; + if (btree_node_dirty(b) && + test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) + goto out_unlock; + if (btree_node_dirty(b) || btree_node_write_in_flight(b) || btree_node_read_in_flight(b)) { diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index f205bddd814d..6f1b1e4317a0 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1330,8 +1330,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, if (!(old & (1 << BTREE_NODE_dirty))) return; - if (b->written && - !btree_node_may_write(b)) + if (!btree_node_may_write(b)) return; if (old & (1 << BTREE_NODE_write_in_flight)) { @@ -1347,7 +1346,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, } while (cmpxchg_acquire(&b->flags, old, new) != old); BUG_ON(btree_node_fake(b)); - BUG_ON(!list_empty(&b->write_blocked)); BUG_ON((b->will_make_reachable != 0) != !b->written); BUG_ON(b->written >= c->opts.btree_node_size); @@ -1685,15 +1683,13 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) unsigned long flags = READ_ONCE(b->flags); unsigned idx = (flags & (1 << BTREE_NODE_write_idx)) != 0; - if (//!(flags & (1 << BTREE_NODE_dirty)) && - !b->writes[0].wait.list.first && - !b->writes[1].wait.list.first && - !(b->will_make_reachable & 1)) + if (!(flags & (1 << BTREE_NODE_dirty))) continue; - pr_buf(&out, "%p d %u l %u w %u b %u r %u:%lu c %u p %u\n", + pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu c %u p %u\n", b, (flags & (1 << BTREE_NODE_dirty)) != 0, + (flags & (1 << BTREE_NODE_need_write)) != 0, b->level, b->written, !list_empty_careful(&b->write_blocked), diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 9c5a6f9471bd..c817aeed878a 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -3,6 +3,7 @@ #define _BCACHEFS_BTREE_IO_H #include "bset.h" +#include "btree_locking.h" #include "extents.h" #include "io_types.h" @@ -48,7 +49,7 @@ static inline void btree_node_wait_on_io(struct btree *b) static inline bool btree_node_may_write(struct btree *b) { return list_empty_careful(&b->write_blocked) && - !b->will_make_reachable; + (!b->written || !b->will_make_reachable); } enum compact_mode { @@ -100,42 +101,36 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); void bch2_btree_node_write(struct bch_fs *, struct btree *, enum six_lock_type); -/* - * btree_node_dirty() can be cleared with only a read lock, - * and for bch2_btree_node_write_cond() we want to set need_write iff it's - * still dirty: - */ -static inline void set_btree_node_need_write_if_dirty(struct btree *b) +static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b) { - unsigned long old, new, v = READ_ONCE(b->flags); - - do { - old = new = v; - - if (!(old & (1 << BTREE_NODE_dirty))) - return; - - new |= (1 << BTREE_NODE_need_write); - } while ((v = cmpxchg(&b->flags, old, new)) != old); + while (b->written && + btree_node_need_write(b) && + btree_node_may_write(b)) { + if (!btree_node_write_in_flight(b)) { + bch2_btree_node_write(c, b, SIX_LOCK_read); + break; + } + + six_unlock_read(&b->lock); + btree_node_wait_on_io(b); + btree_node_lock_type(c, b, SIX_LOCK_read); + } } #define bch2_btree_node_write_cond(_c, _b, cond) \ do { \ - while ((_b)->written && btree_node_dirty(_b) && (cond)) { \ - if (!btree_node_may_write(_b)) { \ - set_btree_node_need_write_if_dirty(_b); \ - break; \ - } \ + unsigned long old, new, v = READ_ONCE((_b)->flags); \ + \ + do { \ + old = new = v; \ \ - if (!btree_node_write_in_flight(_b)) { \ - bch2_btree_node_write(_c, _b, SIX_LOCK_read); \ + if (!(old & (1 << BTREE_NODE_dirty)) || !(cond)) \ break; \ - } \ \ - six_unlock_read(&(_b)->lock); \ - btree_node_wait_on_io(_b); \ - btree_node_lock_type(c, b, SIX_LOCK_read); \ - } \ + new |= (1 << BTREE_NODE_need_write); \ + } while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \ + \ + btree_node_write_if_need(_c, _b); \ } while (0) void bch2_btree_flush_all_reads(struct bch_fs *); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 912292dad6e5..52e0e003153b 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_BTREE_ITER_H #define _BCACHEFS_BTREE_ITER_H +#include "bset.h" #include "btree_types.h" static inline void btree_iter_set_dirty(struct btree_iter *iter, diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 3871e14e480d..48b50e066186 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -11,7 +11,6 @@ */ #include "btree_iter.h" -#include "btree_io.h" #include "six.h" /* matches six lock types */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index a314bda544dd..2efe191cdc30 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -367,6 +367,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev set_btree_node_accessed(b); set_btree_node_dirty(b); + set_btree_node_need_write(b); bch2_bset_init_first(b, &b->data->keys); memset(&b->nr, 0, sizeof(b->nr)); @@ -655,6 +656,12 @@ retry: closure_wait(&btree_current_write(b)->wait, cl); list_del(&as->write_blocked_list); + + /* + * for flush_held_btree_writes() waiting on updates to flush or + * nodes to be writeable: + */ + closure_wake_up(&c->btree_interior_update_wait); mutex_unlock(&c->btree_interior_update_lock); /* @@ -958,6 +965,12 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) { list_del(&p->write_blocked_list); btree_update_reparent(as, p); + + /* + * for flush_held_btree_writes() waiting on updates to flush or + * nodes to be writeable: + */ + closure_wake_up(&c->btree_interior_update_wait); } clear_btree_node_dirty(b); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 6501dcf12d59..34e5f81b2b5e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1038,7 +1038,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) size_t reserve_none = max_t(size_t, 1, nbuckets >> 9); size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7); size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12), - btree_reserve); + btree_reserve * 2); bool resize = ca->buckets[0] != NULL, start_copygc = ca->copygc_thread != NULL; int ret = -ENOMEM; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 8931aa6a1e2a..d998e51dbc30 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -25,9 +25,6 @@ #include "eytzinger.h" #include "util.h" -#define simple_strtoint(c, end, base) simple_strtol(c, end, base) -#define simple_strtouint(c, end, base) simple_strtoul(c, end, base) - static const char si_units[] = "?kMGTPEZY"; static int __bch2_strtoh(const char *cp, u64 *res, |