diff options
Diffstat (limited to 'fs/bcachefs/journal.c')
-rw-r--r-- | fs/bcachefs/journal.c | 222 |
1 files changed, 149 insertions, 73 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 05b1250619ec..d8f74b6d0a75 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -20,13 +20,6 @@ #include "journal_seq_blacklist.h" #include "trace.h" -static const char * const bch2_journal_errors[] = { -#define x(n) #n, - JOURNAL_ERRORS() -#undef x - NULL -}; - static inline bool journal_seq_unwritten(struct journal *j, u64 seq) { return seq > j->seq_ondisk; @@ -56,14 +49,20 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 prt_printf(out, "seq:\t%llu\n", seq); printbuf_indent_add(out, 2); - prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i)); + if (!buf->write_started) + prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i & JOURNAL_STATE_BUF_MASK)); - prt_printf(out, "size:\t"); - prt_human_readable_u64(out, vstruct_bytes(buf->data)); - prt_newline(out); + struct closure *cl = &buf->io; + int r = atomic_read(&cl->remaining); + prt_printf(out, "io:\t%pS r %i\n", cl->fn, r & CLOSURE_REMAINING_MASK); + + if (buf->data) { + prt_printf(out, "size:\t"); + prt_human_readable_u64(out, vstruct_bytes(buf->data)); + prt_newline(out); + } - prt_printf(out, "expires:\t"); - prt_printf(out, "%li jiffies\n", buf->expires - jiffies); + prt_printf(out, "expires:\t%li jiffies\n", buf->expires - jiffies); prt_printf(out, "flags:\t"); if (buf->noflush) @@ -87,6 +86,9 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) { + lockdep_assert_held(&j->lock); + out->atomic++; + if (!out->nr_tabstops) printbuf_tabstop_push(out, 24); @@ -95,6 +97,8 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) seq++) bch2_journal_buf_to_text(out, j, seq); prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed"); + + --out->atomic; } static inline struct journal_buf * @@ -104,10 +108,8 @@ journal_seq_to_buf(struct journal *j, u64 seq) EBUG_ON(seq > journal_cur_seq(j)); - if (journal_seq_unwritten(j, seq)) { + if (journal_seq_unwritten(j, seq)) buf = j->buf + (seq & JOURNAL_BUF_MASK); - EBUG_ON(le64_to_cpu(buf->data->seq) != seq); - } return buf; } @@ -139,8 +141,10 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) bool stuck = false; struct printbuf buf = PRINTBUF; - if (!(error == JOURNAL_ERR_journal_full || - error == JOURNAL_ERR_journal_pin_full) || + buf.atomic++; + + if (!(error == -BCH_ERR_journal_full || + error == -BCH_ERR_journal_pin_full) || nr_unwritten_journal_entries(j) || (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) return stuck; @@ -164,12 +168,12 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) return stuck; } j->err_seq = journal_cur_seq(j); - spin_unlock(&j->lock); - bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)", - bch2_journal_errors[error]); - bch2_journal_debug_to_text(&buf, j); - bch_err(c, "%s", buf.buf); + __bch2_journal_debug_to_text(&buf, j); + spin_unlock(&j->lock); + prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"), + bch2_err_str(error)); + bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_reset(&buf); bch2_journal_pins_to_text(&buf, j); @@ -195,7 +199,8 @@ void bch2_journal_do_writes(struct journal *j) if (w->write_started) continue; - if (!journal_state_count(j->reservations, idx)) { + if (!journal_state_seq_count(j, j->reservations, seq)) { + j->seq_write_started = seq; w->write_started = true; closure_call(&w->io, bch2_journal_write, j->wq, NULL); } @@ -306,7 +311,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t bch2_journal_space_available(j); - __bch2_journal_buf_put(j, old.idx, le64_to_cpu(buf->data->seq)); + __bch2_journal_buf_put(j, le64_to_cpu(buf->data->seq)); } void bch2_journal_halt(struct journal *j) @@ -377,29 +382,41 @@ static int journal_entry_open(struct journal *j) BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); if (j->blocked) - return JOURNAL_ERR_blocked; + return -BCH_ERR_journal_blocked; if (j->cur_entry_error) return j->cur_entry_error; - if (bch2_journal_error(j)) - return JOURNAL_ERR_insufficient_devices; /* -EROFS */ + int ret = bch2_journal_error(j); + if (unlikely(ret)) + return ret; if (!fifo_free(&j->pin)) - return JOURNAL_ERR_journal_pin_full; + return -BCH_ERR_journal_pin_full; if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf)) - return JOURNAL_ERR_max_in_flight; + return -BCH_ERR_journal_max_in_flight; + + if (atomic64_read(&j->seq) - j->seq_write_started == JOURNAL_STATE_BUF_NR) + return -BCH_ERR_journal_max_open; if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) { bch_err(c, "cannot start: journal seq overflow"); if (bch2_fs_emergency_read_only_locked(c)) bch_err(c, "fatal error - emergency read only"); - return JOURNAL_ERR_insufficient_devices; /* -EROFS */ + return -BCH_ERR_journal_shutdown; } + if (!j->free_buf && !buf->data) + return -BCH_ERR_journal_buf_enomem; /* will retry after write completion frees up a buf */ + BUG_ON(!j->cur_entry_sectors); + if (!buf->data) { + swap(buf->data, j->free_buf); + swap(buf->buf_size, j->free_buf_size); + } + buf->expires = (journal_cur_seq(j) == j->flushed_seq_ondisk ? jiffies @@ -415,7 +432,7 @@ static int journal_entry_open(struct journal *j) u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); if (u64s <= (ssize_t) j->early_journal_entries.nr) - return JOURNAL_ERR_journal_full; + return -BCH_ERR_journal_full; if (fifo_empty(&j->pin) && j->reclaim_thread) wake_up_process(j->reclaim_thread); @@ -464,7 +481,7 @@ static int journal_entry_open(struct journal *j) new.idx++; BUG_ON(journal_state_count(new, new.idx)); - BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK)); + BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_STATE_BUF_MASK)); journal_state_inc(&new); @@ -514,6 +531,33 @@ static void journal_write_work(struct work_struct *work) spin_unlock(&j->lock); } +static void journal_buf_prealloc(struct journal *j) +{ + if (j->free_buf && + j->free_buf_size >= j->buf_size_want) + return; + + unsigned buf_size = j->buf_size_want; + + spin_unlock(&j->lock); + void *buf = kvmalloc(buf_size, GFP_NOFS); + spin_lock(&j->lock); + + if (buf && + (!j->free_buf || + buf_size > j->free_buf_size)) { + swap(buf, j->free_buf); + swap(buf_size, j->free_buf_size); + } + + if (unlikely(buf)) { + spin_unlock(&j->lock); + /* kvfree can sleep */ + kvfree(buf); + spin_lock(&j->lock); + } +} + static int __journal_res_get(struct journal *j, struct journal_res *res, unsigned flags) { @@ -525,25 +569,28 @@ retry: if (journal_res_get_fast(j, res, flags)) return 0; - if (bch2_journal_error(j)) - return -BCH_ERR_erofs_journal_err; + ret = bch2_journal_error(j); + if (unlikely(ret)) + return ret; if (j->blocked) - return -BCH_ERR_journal_res_get_blocked; + return -BCH_ERR_journal_blocked; if ((flags & BCH_WATERMARK_MASK) < j->watermark) { - ret = JOURNAL_ERR_journal_full; + ret = -BCH_ERR_journal_full; can_discard = j->can_discard; goto out; } if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) { - ret = JOURNAL_ERR_max_in_flight; + ret = -BCH_ERR_journal_max_in_flight; goto out; } spin_lock(&j->lock); + journal_buf_prealloc(j); + /* * Recheck after taking the lock, so we don't race with another thread * that just did journal_entry_open() and call bch2_journal_entry_close() @@ -566,25 +613,48 @@ retry: j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1); __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false); - ret = journal_entry_open(j) ?: JOURNAL_ERR_retry; + ret = journal_entry_open(j) ?: -BCH_ERR_journal_retry_open; unlock: can_discard = j->can_discard; spin_unlock(&j->lock); out: - if (ret == JOURNAL_ERR_retry) - goto retry; - if (!ret) + if (likely(!ret)) return 0; + if (ret == -BCH_ERR_journal_retry_open) + goto retry; if (journal_error_check_stuck(j, ret, flags)) - ret = -BCH_ERR_journal_res_get_blocked; + ret = -BCH_ERR_journal_stuck; + + if (ret == -BCH_ERR_journal_max_in_flight && + track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) && + trace_journal_entry_full_enabled()) { + struct printbuf buf = PRINTBUF; - if (ret == JOURNAL_ERR_max_in_flight && - track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true)) { + bch2_printbuf_make_room(&buf, 4096); + + spin_lock(&j->lock); + prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); + bch2_journal_bufs_to_text(&buf, j); + spin_unlock(&j->lock); + + trace_journal_entry_full(c, buf.buf); + printbuf_exit(&buf); + count_event(c, journal_entry_full); + } + if (ret == -BCH_ERR_journal_max_open && + track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) && + trace_journal_entry_full_enabled()) { struct printbuf buf = PRINTBUF; + + bch2_printbuf_make_room(&buf, 4096); + + spin_lock(&j->lock); prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); bch2_journal_bufs_to_text(&buf, j); + spin_unlock(&j->lock); + trace_journal_entry_full(c, buf.buf); printbuf_exit(&buf); count_event(c, journal_entry_full); @@ -594,8 +664,8 @@ out: * Journal is full - can't rely on reclaim from work item due to * freezing: */ - if ((ret == JOURNAL_ERR_journal_full || - ret == JOURNAL_ERR_journal_pin_full) && + if ((ret == -BCH_ERR_journal_full || + ret == -BCH_ERR_journal_pin_full) && !(flags & JOURNAL_RES_GET_NONBLOCK)) { if (can_discard) { bch2_journal_do_discards(j); @@ -608,9 +678,7 @@ out: } } - return ret == JOURNAL_ERR_insufficient_devices - ? -BCH_ERR_erofs_journal_err - : -BCH_ERR_journal_res_get_blocked; + return ret; } static unsigned max_dev_latency(struct bch_fs *c) @@ -640,7 +708,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, int ret; if (closure_wait_event_timeout(&j->async_wait, - (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || + !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || (flags & JOURNAL_RES_GET_NONBLOCK), HZ)) return ret; @@ -654,19 +722,19 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, remaining_wait = max(0, remaining_wait - HZ); if (closure_wait_event_timeout(&j->async_wait, - (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || + !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || (flags & JOURNAL_RES_GET_NONBLOCK), remaining_wait)) return ret; struct printbuf buf = PRINTBUF; bch2_journal_debug_to_text(&buf, j); - bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s", - buf.buf); + bch2_print_string_as_lines(KERN_ERR, buf.buf); + prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); printbuf_exit(&buf); closure_wait_event(&j->async_wait, - (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || + !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || (flags & JOURNAL_RES_GET_NONBLOCK)); return ret; } @@ -687,7 +755,6 @@ void bch2_journal_entry_res_resize(struct journal *j, goto out; j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d); - smp_mb(); state = READ_ONCE(j->reservations); if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL && @@ -907,7 +974,7 @@ int bch2_journal_meta(struct journal *j) struct bch_fs *c = container_of(j, struct bch_fs, journal); if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal)) - return -EROFS; + return -BCH_ERR_erofs_no_writes; int ret = __bch2_journal_meta(j); bch2_write_ref_put(c, BCH_WRITE_REF_journal); @@ -951,7 +1018,8 @@ static void __bch2_journal_block(struct journal *j) new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL; } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); - journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset); + if (old.cur_entry_offset < JOURNAL_ENTRY_BLOCKED_VAL) + journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset); } } @@ -992,7 +1060,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou *blocked = true; } - ret = journal_state_count(s, idx) > open + ret = journal_state_count(s, idx & JOURNAL_STATE_BUF_MASK) > open ? ERR_PTR(-EAGAIN) : buf; break; @@ -1247,7 +1315,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c) int ret = bch2_dev_journal_alloc(ca, true); if (ret) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return ret; } } @@ -1336,6 +1404,14 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) nr = cur_seq - last_seq; + /* + * Extra fudge factor, in case we crashed when the journal pin fifo was + * nearly or completely full. We'll need to be able to open additional + * journal entries (at least a few) in order for journal replay to get + * going: + */ + nr += nr / 4; + if (nr + 1 > j->pin.size) { free_fifo(&j->pin); init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL); @@ -1349,6 +1425,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->replay_journal_seq_end = cur_seq; j->last_seq_ondisk = last_seq; j->flushed_seq_ondisk = cur_seq - 1; + j->seq_write_started = cur_seq - 1; j->seq_ondisk = cur_seq - 1; j->pin.front = last_seq; j->pin.back = cur_seq; @@ -1389,15 +1466,12 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) set_bit(JOURNAL_running, &j->flags); j->last_flush_write = jiffies; - j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j); - j->reservations.unwritten_idx++; + j->reservations.idx = journal_cur_seq(j); c->last_bucket_seq_cleanup = journal_cur_seq(j); - - bch2_journal_space_available(j); spin_unlock(&j->lock); - return bch2_journal_reclaim_start(j); + return 0; } /* init/exit: */ @@ -1443,7 +1517,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE); for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { - ja->bio[i] = kmalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, + ja->bio[i] = kzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, nr_bvecs), GFP_KERNEL); if (!ja->bio[i]) return -BCH_ERR_ENOMEM_dev_journal_init; @@ -1482,6 +1556,7 @@ void bch2_fs_journal_exit(struct journal *j) for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) kvfree(j->buf[i].data); + kvfree(j->free_buf); free_fifo(&j->pin); } @@ -1508,13 +1583,13 @@ int bch2_fs_journal_init(struct journal *j) if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL))) return -BCH_ERR_ENOMEM_journal_pin_fifo; - for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) { - j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN; - j->buf[i].data = kvmalloc(j->buf[i].buf_size, GFP_KERNEL); - if (!j->buf[i].data) - return -BCH_ERR_ENOMEM_journal_buf; + j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN; + j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL); + if (!j->free_buf) + return -BCH_ERR_ENOMEM_journal_buf; + + for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) j->buf[i].idx = i; - } j->pin.front = j->pin.back = 1; @@ -1564,6 +1639,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "average write size:\t"); prt_human_readable_u64(out, nr_writes ? div64_u64(j->entry_bytes_written, nr_writes) : 0); prt_newline(out); + prt_printf(out, "free buf:\t%u\n", j->free_buf ? j->free_buf_size : 0); prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim); prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim); prt_printf(out, "reclaim kicked:\t%u\n", j->reclaim_kicked); @@ -1571,7 +1647,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0); prt_printf(out, "blocked:\t%u\n", j->blocked); prt_printf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors); - prt_printf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]); + prt_printf(out, "current entry error:\t%s\n", bch2_err_str(j->cur_entry_error)); prt_printf(out, "current entry:\t"); switch (s.cur_entry_offset) { |