summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-01-13 16:02:22 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:14 -0400
commitd0cc3defba58889e38eaa0c275d4728b4ac3b8c2 (patch)
tree7d1eb757681cb09dae9960d4970ec2178a5ba186
parentb8adb833652909221efde19b1813627382b5bf51 (diff)
downloadlwn-d0cc3defba58889e38eaa0c275d4728b4ac3b8c2.tar.gz
lwn-d0cc3defba58889e38eaa0c275d4728b4ac3b8c2.zip
bcachefs: More allocator startup improvements
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/alloc_background.c131
-rw-r--r--fs/bcachefs/alloc_background.h2
-rw-r--r--fs/bcachefs/btree_cache.c4
-rw-r--r--fs/bcachefs/btree_io.c12
-rw-r--r--fs/bcachefs/btree_io.h53
-rw-r--r--fs/bcachefs/btree_iter.h1
-rw-r--r--fs/bcachefs/btree_locking.h1
-rw-r--r--fs/bcachefs/btree_update_interior.c13
-rw-r--r--fs/bcachefs/buckets.c2
-rw-r--r--fs/bcachefs/util.c3
10 files changed, 120 insertions, 102 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 9c9464efd333..871a41b923da 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -347,12 +347,14 @@ err:
return ret;
}
-int bch2_alloc_write(struct bch_fs *c)
+int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
{
struct bch_dev *ca;
unsigned i;
int ret = 0;
+ *wrote = false;
+
for_each_rw_member(ca, c, i) {
struct btree_iter iter;
struct bucket_array *buckets;
@@ -370,9 +372,14 @@ int bch2_alloc_write(struct bch_fs *c)
if (!buckets->b[b].mark.dirty)
continue;
- ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL, 0);
+ ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL,
+ nowait
+ ? BTREE_INSERT_NOWAIT
+ : 0);
if (ret)
break;
+
+ *wrote = true;
}
up_read(&ca->bucket_lock);
bch2_btree_iter_unlock(&iter);
@@ -1270,20 +1277,23 @@ static void flush_held_btree_writes(struct bch_fs *c)
struct bucket_table *tbl;
struct rhash_head *pos;
struct btree *b;
- bool flush_updates;
- size_t i, nr_pending_updates;
+ bool nodes_blocked;
+ size_t i;
+ struct closure cl;
+
+ closure_init_stack(&cl);
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
again:
pr_debug("flushing dirty btree nodes");
cond_resched();
+ closure_wait(&c->btree_interior_update_wait, &cl);
- flush_updates = false;
- nr_pending_updates = bch2_btree_interior_updates_nr_pending(c);
+ nodes_blocked = false;
rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos)
- if (btree_node_dirty(b) && (!b->written || b->level)) {
+ if (btree_node_need_write(b)) {
if (btree_node_may_write(b)) {
rcu_read_unlock();
btree_node_lock_type(c, b, SIX_LOCK_read);
@@ -1291,7 +1301,7 @@ again:
six_unlock_read(&b->lock);
goto again;
} else {
- flush_updates = true;
+ nodes_blocked = true;
}
}
rcu_read_unlock();
@@ -1299,17 +1309,16 @@ again:
if (c->btree_roots_dirty)
bch2_journal_meta(&c->journal);
- /*
- * This is ugly, but it's needed to flush btree node writes
- * without spinning...
- */
- if (flush_updates) {
- closure_wait_event(&c->btree_interior_update_wait,
- bch2_btree_interior_updates_nr_pending(c) <
- nr_pending_updates);
+ if (nodes_blocked) {
+ closure_sync(&cl);
goto again;
}
+ closure_wake_up(&c->btree_interior_update_wait);
+ closure_sync(&cl);
+
+ closure_wait_event(&c->btree_interior_update_wait,
+ !bch2_btree_interior_updates_nr_pending(c));
}
static void allocator_start_issue_discards(struct bch_fs *c)
@@ -1331,13 +1340,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
unsigned dev_iter;
u64 journal_seq = 0;
long bu;
- bool invalidating_data = false;
int ret = 0;
- if (test_alloc_startup(c)) {
- invalidating_data = true;
+ if (test_alloc_startup(c))
goto not_enough;
- }
/* Scan for buckets that are already invalidated: */
for_each_rw_member(ca, c, dev_iter) {
@@ -1384,21 +1390,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
not_enough:
pr_debug("not enough empty buckets; scanning for reclaimable buckets");
- for_each_rw_member(ca, c, dev_iter) {
- find_reclaimable_buckets(c, ca);
-
- while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
- (bu = next_alloc_bucket(ca)) >= 0) {
- invalidating_data |=
- bch2_invalidate_one_bucket(c, ca, bu, &journal_seq);
-
- fifo_push(&ca->free[RESERVE_BTREE], bu);
- bucket_set_dirty(ca, bu);
- }
- }
-
- pr_debug("done scanning for reclaimable buckets");
-
/*
* We're moving buckets to freelists _before_ they've been marked as
* invalidated on disk - we have to so that we can allocate new btree
@@ -1408,38 +1399,59 @@ not_enough:
* have cached data in them, which is live until they're marked as
* invalidated on disk:
*/
- if (invalidating_data) {
- pr_debug("invalidating existing data");
- set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
- } else {
- pr_debug("issuing discards");
- allocator_start_issue_discards(c);
- }
+ set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
- /*
- * XXX: it's possible for this to deadlock waiting on journal reclaim,
- * since we're holding btree writes. What then?
- */
- ret = bch2_alloc_write(c);
- if (ret)
- return ret;
+ while (1) {
+ bool wrote = false;
- if (invalidating_data) {
- pr_debug("flushing journal");
+ for_each_rw_member(ca, c, dev_iter) {
+ find_reclaimable_buckets(c, ca);
- ret = bch2_journal_flush_seq(&c->journal, journal_seq);
- if (ret)
- return ret;
+ while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
+ (bu = next_alloc_bucket(ca)) >= 0) {
+ bch2_invalidate_one_bucket(c, ca, bu,
+ &journal_seq);
+
+ fifo_push(&ca->free[RESERVE_BTREE], bu);
+ bucket_set_dirty(ca, bu);
+ }
+ }
+
+ pr_debug("done scanning for reclaimable buckets");
+
+ /*
+ * XXX: it's possible for this to deadlock waiting on journal reclaim,
+ * since we're holding btree writes. What then?
+ */
+ ret = bch2_alloc_write(c, true, &wrote);
- pr_debug("issuing discards");
- allocator_start_issue_discards(c);
+ /*
+ * If bch2_alloc_write() did anything, it may have used some
+ * buckets, and we need the RESERVE_BTREE freelist full - so we
+ * need to loop and scan again.
+ * And if it errored, it may have been because there weren't
+ * enough buckets, so just scan and loop again as long as it
+ * made some progress:
+ */
+ if (!wrote && ret)
+ return ret;
+ if (!wrote && !ret)
+ break;
}
+ pr_debug("flushing journal");
+
+ ret = bch2_journal_flush(&c->journal);
+ if (ret)
+ return ret;
+
+ pr_debug("issuing discards");
+ allocator_start_issue_discards(c);
+
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
/* now flush dirty btree nodes: */
- if (invalidating_data)
- flush_held_btree_writes(c);
+ flush_held_btree_writes(c);
return 0;
}
@@ -1448,6 +1460,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i;
+ bool wrote;
int ret;
down_read(&c->gc_lock);
@@ -1465,7 +1478,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
}
}
- return bch2_alloc_write(c);
+ return bch2_alloc_write(c, false, &wrote);
}
void bch2_fs_allocator_background_init(struct bch_fs *c)
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 8ced4e845281..ef5ec659b05d 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -55,7 +55,7 @@ void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_stop(struct bch_dev *);
int bch2_dev_allocator_start(struct bch_dev *);
-int bch2_alloc_write(struct bch_fs *);
+int bch2_alloc_write(struct bch_fs *, bool, bool *);
int bch2_fs_allocator_start(struct bch_fs *);
void bch2_fs_allocator_background_init(struct bch_fs *);
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index b748afc778f4..65fc82fba071 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -171,6 +171,10 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
if (!btree_node_may_write(b))
goto out_unlock;
+ if (btree_node_dirty(b) &&
+ test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
+ goto out_unlock;
+
if (btree_node_dirty(b) ||
btree_node_write_in_flight(b) ||
btree_node_read_in_flight(b)) {
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index f205bddd814d..6f1b1e4317a0 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1330,8 +1330,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (!(old & (1 << BTREE_NODE_dirty)))
return;
- if (b->written &&
- !btree_node_may_write(b))
+ if (!btree_node_may_write(b))
return;
if (old & (1 << BTREE_NODE_write_in_flight)) {
@@ -1347,7 +1346,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
} while (cmpxchg_acquire(&b->flags, old, new) != old);
BUG_ON(btree_node_fake(b));
- BUG_ON(!list_empty(&b->write_blocked));
BUG_ON((b->will_make_reachable != 0) != !b->written);
BUG_ON(b->written >= c->opts.btree_node_size);
@@ -1685,15 +1683,13 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
unsigned long flags = READ_ONCE(b->flags);
unsigned idx = (flags & (1 << BTREE_NODE_write_idx)) != 0;
- if (//!(flags & (1 << BTREE_NODE_dirty)) &&
- !b->writes[0].wait.list.first &&
- !b->writes[1].wait.list.first &&
- !(b->will_make_reachable & 1))
+ if (!(flags & (1 << BTREE_NODE_dirty)))
continue;
- pr_buf(&out, "%p d %u l %u w %u b %u r %u:%lu c %u p %u\n",
+ pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu c %u p %u\n",
b,
(flags & (1 << BTREE_NODE_dirty)) != 0,
+ (flags & (1 << BTREE_NODE_need_write)) != 0,
b->level,
b->written,
!list_empty_careful(&b->write_blocked),
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index 9c5a6f9471bd..c817aeed878a 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -3,6 +3,7 @@
#define _BCACHEFS_BTREE_IO_H
#include "bset.h"
+#include "btree_locking.h"
#include "extents.h"
#include "io_types.h"
@@ -48,7 +49,7 @@ static inline void btree_node_wait_on_io(struct btree *b)
static inline bool btree_node_may_write(struct btree *b)
{
return list_empty_careful(&b->write_blocked) &&
- !b->will_make_reachable;
+ (!b->written || !b->will_make_reachable);
}
enum compact_mode {
@@ -100,42 +101,36 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
void bch2_btree_node_write(struct bch_fs *, struct btree *,
enum six_lock_type);
-/*
- * btree_node_dirty() can be cleared with only a read lock,
- * and for bch2_btree_node_write_cond() we want to set need_write iff it's
- * still dirty:
- */
-static inline void set_btree_node_need_write_if_dirty(struct btree *b)
+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b)
{
- unsigned long old, new, v = READ_ONCE(b->flags);
-
- do {
- old = new = v;
-
- if (!(old & (1 << BTREE_NODE_dirty)))
- return;
-
- new |= (1 << BTREE_NODE_need_write);
- } while ((v = cmpxchg(&b->flags, old, new)) != old);
+ while (b->written &&
+ btree_node_need_write(b) &&
+ btree_node_may_write(b)) {
+ if (!btree_node_write_in_flight(b)) {
+ bch2_btree_node_write(c, b, SIX_LOCK_read);
+ break;
+ }
+
+ six_unlock_read(&b->lock);
+ btree_node_wait_on_io(b);
+ btree_node_lock_type(c, b, SIX_LOCK_read);
+ }
}
#define bch2_btree_node_write_cond(_c, _b, cond) \
do { \
- while ((_b)->written && btree_node_dirty(_b) && (cond)) { \
- if (!btree_node_may_write(_b)) { \
- set_btree_node_need_write_if_dirty(_b); \
- break; \
- } \
+ unsigned long old, new, v = READ_ONCE((_b)->flags); \
+ \
+ do { \
+ old = new = v; \
\
- if (!btree_node_write_in_flight(_b)) { \
- bch2_btree_node_write(_c, _b, SIX_LOCK_read); \
+ if (!(old & (1 << BTREE_NODE_dirty)) || !(cond)) \
break; \
- } \
\
- six_unlock_read(&(_b)->lock); \
- btree_node_wait_on_io(_b); \
- btree_node_lock_type(c, b, SIX_LOCK_read); \
- } \
+ new |= (1 << BTREE_NODE_need_write); \
+ } while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \
+ \
+ btree_node_write_if_need(_c, _b); \
} while (0)
void bch2_btree_flush_all_reads(struct bch_fs *);
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 912292dad6e5..52e0e003153b 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_BTREE_ITER_H
#define _BCACHEFS_BTREE_ITER_H
+#include "bset.h"
#include "btree_types.h"
static inline void btree_iter_set_dirty(struct btree_iter *iter,
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 3871e14e480d..48b50e066186 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -11,7 +11,6 @@
*/
#include "btree_iter.h"
-#include "btree_io.h"
#include "six.h"
/* matches six lock types */
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index a314bda544dd..2efe191cdc30 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -367,6 +367,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
set_btree_node_accessed(b);
set_btree_node_dirty(b);
+ set_btree_node_need_write(b);
bch2_bset_init_first(b, &b->data->keys);
memset(&b->nr, 0, sizeof(b->nr));
@@ -655,6 +656,12 @@ retry:
closure_wait(&btree_current_write(b)->wait, cl);
list_del(&as->write_blocked_list);
+
+ /*
+ * for flush_held_btree_writes() waiting on updates to flush or
+ * nodes to be writeable:
+ */
+ closure_wake_up(&c->btree_interior_update_wait);
mutex_unlock(&c->btree_interior_update_lock);
/*
@@ -958,6 +965,12 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
list_del(&p->write_blocked_list);
btree_update_reparent(as, p);
+
+ /*
+ * for flush_held_btree_writes() waiting on updates to flush or
+ * nodes to be writeable:
+ */
+ closure_wake_up(&c->btree_interior_update_wait);
}
clear_btree_node_dirty(b);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 6501dcf12d59..34e5f81b2b5e 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -1038,7 +1038,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
size_t reserve_none = max_t(size_t, 1, nbuckets >> 9);
size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7);
size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
- btree_reserve);
+ btree_reserve * 2);
bool resize = ca->buckets[0] != NULL,
start_copygc = ca->copygc_thread != NULL;
int ret = -ENOMEM;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 8931aa6a1e2a..d998e51dbc30 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -25,9 +25,6 @@
#include "eytzinger.h"
#include "util.h"
-#define simple_strtoint(c, end, base) simple_strtol(c, end, base)
-#define simple_strtouint(c, end, base) simple_strtoul(c, end, base)
-
static const char si_units[] = "?kMGTPEZY";
static int __bch2_strtoh(const char *cp, u64 *res,