diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-04-03 16:24:13 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:59 -0400 |
commit | 2940295c97f49ffe0b2f564dea394094581073e7 (patch) | |
tree | c69d82841a194290f9b45aa133cdc2a5059c3dbb /fs | |
parent | 6167f7c8ff5ce564423fe8b416b5f95d1712859b (diff) | |
download | lwn-2940295c97f49ffe0b2f564dea394094581073e7.tar.gz lwn-2940295c97f49ffe0b2f564dea394094581073e7.zip |
bcachefs: Be more careful about JOURNAL_RES_GET_RESERVED
JOURNAL_RES_GET_RESERVED should only be used for updatse that need to be
done to free up space in the journal. In particular, when we're flushing
keys from the key cache, if we're flushing them out of order we
shouldn't be using it, since we're using up our remaining space in the
journal without dropping a pin that will let us make forward progress.
With this patch, BTREE_INSERT_JOURNAL_RECLAIM without
BTREE_INSERT_JOURNAL_RESERVED may return -EAGAIN - we can't wait on
journal reclaim if we're already in journal reclaim.
This means we need to propagate these errors up to journal reclaim,
indicating that flushing a journal pin should be retried in the future.
This is prep work for a patch to change the way journal reclaim works,
to split out flushing key cache keys because the btree key cache is too
dirty from journal reclaim because we need space in the journal.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 24 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 9 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 15 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 24 | ||||
-rw-r--r-- | fs/bcachefs/journal.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/journal_reclaim.c | 61 | ||||
-rw-r--r-- | fs/bcachefs/journal_types.h | 3 |
7 files changed, 99 insertions, 40 deletions
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 0858f469f7c2..74d982c3402a 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -353,6 +353,7 @@ err: static int btree_key_cache_flush_pos(struct btree_trans *trans, struct bkey_cached_key key, u64 journal_seq, + unsigned commit_flags, bool evict) { struct bch_fs *c = trans->c; @@ -391,12 +392,17 @@ retry: BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_RESERVED| - BTREE_INSERT_JOURNAL_RECLAIM); + (ck->journal.seq == journal_last_seq(j) + ? BTREE_INSERT_JOURNAL_RESERVED + : 0)| + commit_flags); err: if (ret == -EINTR) goto retry; + if (ret == -EAGAIN) + goto out; + if (ret) { bch2_fs_fatal_err_on(!bch2_journal_error(j), c, "error flushing key cache: %i", ret); @@ -439,15 +445,16 @@ out: return ret; } -static void btree_key_cache_journal_flush(struct journal *j, - struct journal_entry_pin *pin, - u64 seq) +static int btree_key_cache_journal_flush(struct journal *j, + struct journal_entry_pin *pin, + u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bkey_cached *ck = container_of(pin, struct bkey_cached, journal); struct bkey_cached_key key; struct btree_trans trans; + int ret = 0; int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); @@ -462,10 +469,13 @@ static void btree_key_cache_journal_flush(struct journal *j, six_unlock_read(&ck->c.lock); bch2_trans_init(&trans, c, 0, 0); - btree_key_cache_flush_pos(&trans, key, seq, false); + ret = btree_key_cache_flush_pos(&trans, key, seq, + BTREE_INSERT_JOURNAL_RECLAIM, false); bch2_trans_exit(&trans); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + + return ret; } /* @@ -481,7 +491,7 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans, if (!bch2_btree_key_cache_find(c, id, pos)) return 0; - return btree_key_cache_flush_pos(trans, key, 0, true); + return btree_key_cache_flush_pos(trans, key, 0, 0, true); } bool bch2_btree_insert_key_cached(struct btree_trans *trans, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 988922699e8b..7aba0e9d99c1 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -916,10 +916,12 @@ bch2_btree_update_start(struct btree_iter *iter, unsigned level, struct closure cl; int disk_res_flags = (flags & BTREE_INSERT_NOFAIL) ? BCH_DISK_RESERVATION_NOFAIL : 0; - int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED) - ? JOURNAL_RES_GET_RECLAIM : 0; + int journal_flags = 0; int ret = 0; + if (flags & BTREE_INSERT_JOURNAL_RESERVED) + journal_flags |= JOURNAL_RES_GET_RESERVED; + closure_init_stack(&cl); retry: /* @@ -982,6 +984,9 @@ retry: bch2_trans_unlock(trans); + if (flags & BTREE_INSERT_JOURNAL_RECLAIM) + goto err; + ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, journal_flags); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index d3d86aa0ee95..ee1c26f2901f 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -134,7 +134,7 @@ fix_iter: return true; } -static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, +static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, unsigned i, u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -145,14 +145,15 @@ static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, bch2_btree_node_write_cond(c, b, (btree_current_write(b) == w && w->journal.seq == seq)); six_unlock_read(&b->c.lock); + return 0; } -static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq) +static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq) { return __btree_node_flush(j, pin, 0, seq); } -static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq) +static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq) { return __btree_node_flush(j, pin, 1, seq); } @@ -563,8 +564,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, ret = bch2_journal_preres_get(&c->journal, &trans->journal_preres, trans->journal_preres_u64s, JOURNAL_RES_GET_NONBLOCK| - ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) - ? JOURNAL_RES_GET_RECLAIM : 0)); + ((trans->flags & BTREE_INSERT_JOURNAL_RESERVED) + ? JOURNAL_RES_GET_RESERVED : 0)); if (unlikely(ret == -EAGAIN)) ret = bch2_trans_journal_preres_get_cold(trans, trans->journal_preres_u64s); @@ -721,6 +722,10 @@ int bch2_trans_commit_error(struct btree_trans *trans, case BTREE_INSERT_NEED_JOURNAL_RES: bch2_trans_unlock(trans); + if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && + !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) + return -EAGAIN; + ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK); if (ret) return ret; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index edbcbe7fb31f..bce056cb6841 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -11,6 +11,7 @@ #include "btree_gc.h" #include "btree_update.h" #include "buckets.h" +#include "error.h" #include "journal.h" #include "journal_io.h" #include "journal_reclaim.h" @@ -449,6 +450,27 @@ unlock: if (!ret) goto retry; + if ((ret == cur_entry_journal_full || + ret == cur_entry_journal_pin_full) && + !can_discard && + j->reservations.idx == j->reservations.unwritten_idx && + (flags & JOURNAL_RES_GET_RESERVED)) { + char *journal_debug_buf = kmalloc(4096, GFP_ATOMIC); + + bch_err(c, "Journal stuck!"); + if (journal_debug_buf) { + bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j); + bch_err(c, "%s", journal_debug_buf); + + bch2_journal_pins_to_text(&_PBUF(journal_debug_buf, 4096), j); + bch_err(c, "Journal pins:\n%s", journal_debug_buf); + kfree(journal_debug_buf); + } + + bch2_fatal_error(c); + dump_stack(); + } + /* * Journal is full - can't rely on reclaim from work item due to * freezing: @@ -1169,6 +1191,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) "last_seq_ondisk:\t%llu\n" "flushed_seq_ondisk:\t%llu\n" "prereserved:\t\t%u/%u\n" + "each entry reserved:\t%u\n" "nr flush writes:\t%llu\n" "nr noflush writes:\t%llu\n" "nr direct reclaim:\t%llu\n" @@ -1183,6 +1206,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) j->flushed_seq_ondisk, j->prereserved.reserved, j->prereserved.remaining, + j->entry_u64s_reserved, j->nr_flush_writes, j->nr_noflush_writes, j->nr_direct_reclaim, diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 547c735ce3cb..a0d19fad3bdd 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -308,7 +308,6 @@ int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *, #define JOURNAL_RES_GET_NONBLOCK (1 << 0) #define JOURNAL_RES_GET_CHECK (1 << 1) #define JOURNAL_RES_GET_RESERVED (1 << 2) -#define JOURNAL_RES_GET_RECLAIM (1 << 3) static inline int journal_res_get_fast(struct journal *j, struct journal_res *res, @@ -446,7 +445,7 @@ static inline int bch2_journal_preres_get_fast(struct journal *j, * into the reclaim path and deadlock: */ - if (!(flags & JOURNAL_RES_GET_RECLAIM) && + if (!(flags & JOURNAL_RES_GET_RESERVED) && new.reserved > new.remaining) return 0; } while ((v = atomic64_cmpxchg(&j->prereserved.counter, diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 3ef42a47f60d..42ed7a3525b1 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -239,7 +239,7 @@ void bch2_journal_space_available(struct journal *j) u64s_remaining = (u64) clean << 6; u64s_remaining -= (u64) total << 3; u64s_remaining = max(0LL, u64s_remaining); - u64s_remaining /= 2; + u64s_remaining /= 4; u64s_remaining = min_t(u64, u64s_remaining, U32_MAX); out: j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; @@ -353,6 +353,9 @@ static inline void __journal_pin_drop(struct journal *j, if (!journal_pin_active(pin)) return; + if (j->flush_in_progress == pin) + j->flush_in_progress_dropped = true; + pin_list = journal_seq_pin(j, pin->seq); pin->seq = 0; list_del_init(&pin->list); @@ -439,34 +442,27 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq) struct journal_entry_pin_list *pin_list; struct journal_entry_pin *ret = NULL; - if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags)) - return NULL; - - spin_lock(&j->lock); - fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) if (*seq > max_seq || (ret = list_first_entry_or_null(&pin_list->list, struct journal_entry_pin, list))) break; - if (ret) { - list_move(&ret->list, &pin_list->flushed); - BUG_ON(j->flush_in_progress); - j->flush_in_progress = ret; - } - - spin_unlock(&j->lock); - return ret; } /* returns true if we did work */ -static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush, - unsigned min_nr) +static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush, + unsigned min_nr) { struct journal_entry_pin *pin; - u64 seq, ret = 0; + size_t nr_flushed = 0; + journal_pin_flush_fn flush_fn; + u64 seq; + int err; + + if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags)) + return 0; lockdep_assert_held(&j->reclaim_lock); @@ -475,23 +471,42 @@ static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush, j->last_flushed = jiffies; + spin_lock(&j->lock); pin = journal_get_next_pin(j, min_nr ? U64_MAX : seq_to_flush, &seq); + if (pin) { + BUG_ON(j->flush_in_progress); + j->flush_in_progress = pin; + j->flush_in_progress_dropped = false; + flush_fn = pin->flush; + } + spin_unlock(&j->lock); + if (!pin) break; if (min_nr) min_nr--; - pin->flush(j, pin, seq); + err = flush_fn(j, pin, seq); - BUG_ON(j->flush_in_progress != pin); + spin_lock(&j->lock); + /* Pin might have been dropped or rearmed: */ + if (likely(!err && !j->flush_in_progress_dropped)) + list_move(&pin->list, &journal_seq_pin(j, seq)->flushed); j->flush_in_progress = NULL; + j->flush_in_progress_dropped = false; + spin_unlock(&j->lock); + wake_up(&j->pin_flush_wait); - ret++; + + if (err) + break; + + nr_flushed++; } - return ret; + return nr_flushed; } static u64 journal_seq_to_flush(struct journal *j) @@ -556,8 +571,8 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct) { struct bch_fs *c = container_of(j, struct bch_fs, journal); bool kthread = (current->flags & PF_KTHREAD) != 0; - u64 seq_to_flush, nr_flushed = 0; - size_t min_nr; + u64 seq_to_flush; + size_t min_nr, nr_flushed; unsigned flags; int ret = 0; diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 3db8c3760cca..ec3c604cdf22 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -50,7 +50,7 @@ struct journal_entry_pin_list { struct journal; struct journal_entry_pin; -typedef void (*journal_pin_flush_fn)(struct journal *j, +typedef int (*journal_pin_flush_fn)(struct journal *j, struct journal_entry_pin *, u64); struct journal_entry_pin { @@ -251,6 +251,7 @@ struct journal { unsigned long last_flushed; struct journal_entry_pin *flush_in_progress; + bool flush_in_progress_dropped; wait_queue_head_t pin_flush_wait; /* protects advancing ja->discard_idx: */ |