diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2022-12-05 10:24:19 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:53 -0400 |
commit | 80c33085783656617d0d07e1bc9fba70a592ce5c (patch) | |
tree | 2f2a6b43a3b6caab092c4f74df9f5a582e1a60b0 /fs | |
parent | 1b30ed5fd87828b5e29647510eefb18a363e4d19 (diff) | |
download | lwn-80c33085783656617d0d07e1bc9fba70a592ce5c.tar.gz lwn-80c33085783656617d0d07e1bc9fba70a592ce5c.zip |
bcachefs: Fragmentation LRU
Now that we have much more efficient updates to the LRU btree, this
patch adds a new LRU that indexes buckets by fragmentation.
This means copygc no longer has to scan every bucket to find buckets
that need to be evacuated.
Changes:
- A new field in bch_alloc_v4, fragmentation_lru - this corresponds to
the bucket's position in the fragmentation LRU. We add a new field
for this instead of calculating it as needed because we may make the
fragmentation LRU optional; this field indicates whether a bucket is
on the fragmentation LRU.
Also, zoned devices will introduce variable bucket sizes; explicitly
recording the LRU position will be safer for them.
- A new copygc path for using the fragmentation LRU instead of
scanning every bucket and building up an in-memory heap.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/alloc_background.c | 20 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.h | 15 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 11 | ||||
-rw-r--r-- | fs/bcachefs/lru.c | 38 | ||||
-rw-r--r-- | fs/bcachefs/lru.h | 21 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 51 | ||||
-rw-r--r-- | fs/bcachefs/move.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 171 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/trace.h | 10 |
13 files changed, 189 insertions, 160 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index af3e55fdd54a..aefe72d34c5b 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -415,6 +415,8 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_newline(out); prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]); prt_newline(out); + prt_printf(out, "fragmentation %llu", a->fragmentation_lru); + prt_newline(out); prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a)); prt_newline(out); @@ -910,8 +912,8 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, !new_a->io_time[READ]) new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - old_lru = alloc_lru_idx(*old_a); - new_lru = alloc_lru_idx(*new_a); + old_lru = alloc_lru_idx_read(*old_a); + new_lru = alloc_lru_idx_read(*new_a); if (old_lru != new_lru) { ret = bch2_lru_change(trans, new->k.p.inode, @@ -921,6 +923,18 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, return ret; } + new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, + bch_dev_bkey_exists(c, new->k.p.inode)); + + if (old_a->fragmentation_lru != new_a->fragmentation_lru) { + ret = bch2_lru_change(trans, + BCH_LRU_FRAGMENTATION_START, + bucket_to_u64(new->k.p), + old_a->fragmentation_lru, new_a->fragmentation_lru); + if (ret) + return ret; + } + if (old_a->gen != new_a->gen) { ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen); if (ret) @@ -1777,7 +1791,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, goto out; /* We expect harmless races here due to the btree write buffer: */ - if (lru_pos_time(lru_iter->pos) != alloc_lru_idx(a->v)) + if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v)) goto out; BUG_ON(a->v.data_type != BCH_DATA_cached); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index b3c2f1e0deb6..96ac8f396d46 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -64,11 +64,24 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a, a.stripe, a, data_type); } -static inline u64 alloc_lru_idx(struct bch_alloc_v4 a) +static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; } +static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, + struct bch_dev *ca) +{ + if (a.data_type != BCH_DATA_btree && + a.data_type != BCH_DATA_user) + return 0; + + if (a.dirty_sectors >= ca->mi.bucket_size) + return 0; + + return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size); +} + static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a) { return ((u64) alloc_gc_gen(a) >> 4) << 56; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 84b30adf56c9..5dc4b0c133ad 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -927,7 +927,6 @@ struct bch_fs { /* COPYGC */ struct task_struct *copygc_thread; - copygc_heap copygc_heap; struct write_point copygc_write_point; s64 copygc_wait; bool copygc_running; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 99f9fbd1401f..9524ff02f2d7 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -992,6 +992,7 @@ struct bch_alloc_v4 { __u64 io_time[2]; __u32 stripe; __u32 nr_external_backpointers; + __u64 fragmentation_lru; } __packed __aligned(8); #define BCH_ALLOC_V4_U64s_V0 6 @@ -1563,7 +1564,8 @@ struct bch_sb_field_journal_seq_blacklist { x(inode_v3, 23) \ x(unwritten_extents, 24) \ x(bucket_gens, 25) \ - x(lru_v2, 26) + x(lru_v2, 26) \ + x(fragmentation_lru, 27) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 1dbba7d906dd..2a9dab9006ef 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -89,15 +89,4 @@ struct disk_reservation { unsigned nr_replicas; }; -struct copygc_heap_entry { - u8 dev; - u8 gen; - u8 replicas; - u32 fragmentation; - u32 sectors; - u64 bucket; -}; - -typedef HEAP(struct copygc_heap_entry) copygc_heap; - #endif /* _BUCKETS_TYPES_H */ diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index c121a7cc3acd..e913b90f37b7 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -93,6 +93,13 @@ int bch2_lru_change(struct btree_trans *trans, bch2_lru_set(trans, lru_id, dev_bucket, new_time); } +static const char * const bch2_lru_types[] = { +#define x(n) #n, + BCH_LRU_TYPES() +#undef x + NULL +}; + static int bch2_check_lru_key(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, @@ -105,7 +112,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, const struct bch_alloc_v4 *a; struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; + enum bch_lru_type type = lru_type(lru_k); struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset); + u64 idx; int ret; if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c, @@ -121,9 +130,17 @@ static int bch2_check_lru_key(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &a_convert); + switch (type) { + case BCH_LRU_read: + idx = alloc_lru_idx_read(*a); + break; + case BCH_LRU_fragmentation: + idx = a->fragmentation_lru; + break; + } + if (lru_k.k->type != KEY_TYPE_set || - a->data_type != BCH_DATA_cached || - a->io_time[READ] != lru_pos_time(lru_k.k->p)) {} + lru_pos_time(lru_k.k->p) != idx) { if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) { *last_flushed_pos = lru_k.k->p; ret = bch2_btree_write_buffer_flush_sync(trans) ?: @@ -131,17 +148,14 @@ static int bch2_check_lru_key(struct btree_trans *trans, goto out; } - if (fsck_err_on(lru_k.k->type != KEY_TYPE_set || - a->data_type != BCH_DATA_cached || - a->io_time[READ] != lru_pos_time(lru_k.k->p), c, - "incorrect lru entry (time %llu) %s\n" - " for %s", - lru_pos_time(lru_k.k->p), - (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), - (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) { + if (fsck_err(c, "incorrect lru entry: lru %s time %llu\n" + " %s\n" + " for %s", + bch2_lru_types[type], + lru_pos_time(lru_k.k->p), + (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), + (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) ret = bch2_btree_delete_at(trans, lru_iter, 0); - if (ret) - goto err; } out: err: diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index b8d9848cdb1a..78a6076999ed 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -22,6 +22,27 @@ static inline u64 lru_pos_time(struct bpos pos) return pos.inode & ~(~0ULL << LRU_TIME_BITS); } +#define BCH_LRU_TYPES() \ + x(read) \ + x(fragmentation) + +enum bch_lru_type { +#define x(n) BCH_LRU_##n, + BCH_LRU_TYPES() +#undef x +}; + +#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1) + +static inline enum bch_lru_type lru_type(struct bkey_s_c l) +{ + u16 lru_id = l.k->p.inode >> 48; + + if (lru_id == BCH_LRU_FRAGMENTATION_START) + return BCH_LRU_fragmentation; + return BCH_LRU_read; +} + int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 67f861eb597a..c964643e7ebf 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -652,13 +652,13 @@ failed_to_evacuate: printbuf_exit(&buf); } -int __bch2_evacuate_bucket(struct moving_context *ctxt, +int __bch2_evacuate_bucket(struct btree_trans *trans, + struct moving_context *ctxt, struct bpos bucket, int gen, struct data_update_opts _data_opts) { struct bch_fs *c = ctxt->c; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - struct btree_trans trans; struct btree_iter iter; struct bkey_buf sk; struct bch_backpointer bp; @@ -667,17 +667,17 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, struct bkey_s_c k; struct data_update_opts data_opts; unsigned dirty_sectors, bucket_size; + u64 fragmentation; u64 bp_offset = 0, cur_inum = U64_MAX; int ret = 0; bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_CACHED); - ret = lockrestart_do(&trans, + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (ret) { bch_err(c, "%s: error looking up alloc key: %s", __func__, bch2_err_str(ret)); @@ -687,17 +687,18 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, a = bch2_alloc_to_v4(k, &a_convert); dirty_sectors = a->dirty_sectors; bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; + fragmentation = a->fragmentation_lru; - ret = bch2_btree_write_buffer_flush(&trans); + ret = bch2_btree_write_buffer_flush(trans); if (ret) { bch_err(c, "%s: error flushing btree write buffer: %s", __func__, bch2_err_str(ret)); goto err; } - while (!(ret = move_ratelimit(&trans, ctxt))) { - bch2_trans_begin(&trans); + while (!(ret = move_ratelimit(trans, ctxt))) { + bch2_trans_begin(trans); - ret = bch2_get_next_backpointer(&trans, bucket, gen, + ret = bch2_get_next_backpointer(trans, bucket, gen, &bp_offset, &bp, BTREE_ITER_CACHED); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -712,7 +713,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, struct bkey_s_c k; unsigned i = 0; - k = bch2_backpointer_get_key(&trans, &iter, + k = bch2_backpointer_get_key(trans, &iter, bucket, bp_offset, bp); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -725,9 +726,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum); + ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); if (ret) { - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); continue; } @@ -741,15 +742,15 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, i++; } - ret = bch2_move_extent(&trans, &iter, ctxt, io_opts, + ret = bch2_move_extent(trans, &iter, ctxt, io_opts, bp.btree_id, k, data_opts); - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt, &trans); + bch2_move_ctxt_wait_for_io(ctxt, trans); continue; } if (ret) @@ -761,7 +762,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, } else { struct btree *b; - b = bch2_backpointer_get_node(&trans, &iter, + b = bch2_backpointer_get_node(trans, &iter, bucket, bp_offset, bp); ret = PTR_ERR_OR_ZERO(b); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) @@ -773,8 +774,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, if (!b) goto next; - ret = bch2_btree_node_rewrite(&trans, &iter, b, 0); - bch2_trans_iter_exit(&trans, &iter); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -791,17 +792,16 @@ next: bp_offset++; } - trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, ret); + trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret); if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) { - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); closure_sync(&ctxt->cl); if (!ctxt->write_error) - verify_bucket_evacuated(&trans, bucket, gen); + verify_bucket_evacuated(trans, bucket, gen); } err: - bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); return ret; } @@ -814,12 +814,15 @@ int bch2_evacuate_bucket(struct bch_fs *c, struct write_point_specifier wp, bool wait_on_copygc) { + struct btree_trans trans; struct moving_context ctxt; int ret; + bch2_trans_init(&trans, c, 0, 0); bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(&ctxt, bucket, gen, data_opts); + ret = __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts); bch2_moving_ctxt_exit(&ctxt); + bch2_trans_exit(&trans); return ret; } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index aef613802935..c5a7c0add1d6 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -66,7 +66,8 @@ int bch2_move_data(struct bch_fs *, bool, move_pred_fn, void *); -int __bch2_evacuate_bucket(struct moving_context *, +int __bch2_evacuate_bucket(struct btree_trans *, + struct moving_context *, struct bpos, int, struct data_update_opts); int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int, diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index b420b79edb36..74e57f6ea148 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -10,6 +10,7 @@ #include "alloc_foreground.h" #include "btree_iter.h" #include "btree_update.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "clock.h" #include "disk_groups.h" @@ -19,6 +20,7 @@ #include "eytzinger.h" #include "io.h" #include "keylist.h" +#include "lru.h" #include "move.h" #include "movinggc.h" #include "super-io.h" @@ -31,138 +33,105 @@ #include <linux/sort.h> #include <linux/wait.h> -static inline int fragmentation_cmp(copygc_heap *heap, - struct copygc_heap_entry l, - struct copygc_heap_entry r) +static int bch2_bucket_is_movable(struct btree_trans *trans, + struct bpos bucket, u64 time, u8 *gen) { - return cmp_int(l.fragmentation, r.fragmentation); -} - -static int find_buckets_to_copygc(struct bch_fs *c) -{ - copygc_heap *h = &c->copygc_heap; - struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + struct bch_alloc_v4 _a; + const struct bch_alloc_v4 *a; int ret; - bch2_trans_init(&trans, c, 0, 0); + if (bch2_bucket_is_open(trans->c, bucket.inode, bucket.offset)) + return 0; - /* - * Find buckets with lowest sector counts, skipping completely - * empty buckets, by building a maxheap sorted by sector count, - * and repeatedly replacing the maximum element until all - * buckets have been visited. - */ - h->used = 0; - - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode); - struct copygc_heap_entry e; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; - - a = bch2_alloc_to_v4(k, &a_convert); - - if ((a->data_type != BCH_DATA_btree && - a->data_type != BCH_DATA_user) || - a->dirty_sectors >= ca->mi.bucket_size || - bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset)) - continue; + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, 0); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + bch2_trans_iter_exit(trans, &iter); + + if (ret) + return ret; - e = (struct copygc_heap_entry) { - .dev = iter.pos.inode, - .gen = a->gen, - .replicas = 1 + a->stripe_redundancy, - .fragmentation = div_u64((u64) a->dirty_sectors * (1ULL << 31), - ca->mi.bucket_size), - .sectors = a->dirty_sectors, - .bucket = iter.pos.offset, - }; - heap_add_or_replace(h, e, -fragmentation_cmp, NULL); + a = bch2_alloc_to_v4(k, &_a); + *gen = a->gen; + ret = (a->data_type == BCH_DATA_btree || + a->data_type == BCH_DATA_user) && + a->fragmentation_lru && + a->fragmentation_lru <= time; + if (ret) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, trans->c, k); + pr_debug("%s", buf.buf); + printbuf_exit(&buf); } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); return ret; } +static int bch2_copygc_next_bucket(struct btree_trans *trans, + struct bpos *bucket, u8 *gen, struct bpos *pos) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, + bpos_max(*pos, lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0)), + lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), + 0, k, ({ + *bucket = u64_to_bucket(k.k->p.offset); + + bch2_bucket_is_movable(trans, *bucket, lru_pos_time(k.k->p), gen); + })); + + *pos = iter.pos; + if (ret < 0) + return ret; + return ret ? 0 : -ENOENT; +} + static int bch2_copygc(struct bch_fs *c) { - copygc_heap *h = &c->copygc_heap; - struct copygc_heap_entry e; struct bch_move_stats move_stats; - struct bch_dev *ca; - unsigned dev_idx; - size_t heap_size = 0; + struct btree_trans trans; struct moving_context ctxt; struct data_update_opts data_opts = { .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc, }; + struct bpos bucket; + struct bpos pos; + u8 gen = 0; + unsigned nr_evacuated; int ret = 0; bch2_move_stats_init(&move_stats, "copygc"); - - for_each_rw_member(ca, c, dev_idx) - heap_size += ca->mi.nbuckets >> 7; - - if (h->size < heap_size) { - free_heap(&c->copygc_heap); - if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) { - bch_err(c, "error allocating copygc heap"); - return 0; - } - } - - ret = find_buckets_to_copygc(c); - if (ret) { - bch2_fs_fatal_error(c, "error walking buckets to copygc!"); - return ret; - } - - if (!h->used) { - s64 wait = S64_MAX, dev_wait; - u64 dev_min_wait_fragmented = 0; - u64 dev_min_wait_allowed = 0; - int dev_min_wait = -1; - - for_each_rw_member(ca, c, dev_idx) { - struct bch_dev_usage usage = bch2_dev_usage_read(ca); - s64 allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) * - ca->mi.bucket_size) >> 1); - s64 fragmented = usage.d[BCH_DATA_user].fragmented; - - dev_wait = max(0LL, allowed - fragmented); - - if (dev_min_wait < 0 || dev_wait < wait) { - dev_min_wait = dev_idx; - dev_min_wait_fragmented = fragmented; - dev_min_wait_allowed = allowed; - } - } - - bch_err_ratelimited(c, "copygc requested to run but found no buckets to move! dev %u fragmented %llu allowed %llu", - dev_min_wait, dev_min_wait_fragmented, dev_min_wait_allowed); - return 0; - } - - heap_resort(h, fragmentation_cmp, NULL); - bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, writepoint_ptr(&c->copygc_write_point), false); + bch2_trans_init(&trans, c, 0, 0); + + ret = bch2_btree_write_buffer_flush(&trans); + BUG_ON(ret); - /* not correct w.r.t. device removal */ - while (h->used && !ret) { - BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL)); - ret = __bch2_evacuate_bucket(&ctxt, POS(e.dev, e.bucket), e.gen, - data_opts); + for (nr_evacuated = 0, pos = POS_MIN; + nr_evacuated < 32 && !ret; + nr_evacuated++, pos = bpos_nosnap_successor(pos)) { + ret = bch2_copygc_next_bucket(&trans, &bucket, &gen, &pos) ?: + __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts); + if (bkey_eq(pos, POS_MAX)) + break; } + bch2_trans_exit(&trans); bch2_moving_ctxt_exit(&ctxt); + /* no entries in LRU btree found, or got to end: */ + if (ret == -ENOENT) + ret = 0; + if (ret < 0 && !bch2_err_matches(ret, EROFS)) bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 178f06424460..1976d5fa3427 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1105,6 +1105,9 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; + } else if (c->sb.version < bcachefs_metadata_version_fragmentation_lru) { + bch_info(c, "version prior to backpointers, upgrade required"); + c->opts.version_upgrade = true; } } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 58517f6d128f..f703e41c7560 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -487,7 +487,6 @@ static void __bch2_fs_free(struct bch_fs *c) kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(c->journal_seq_blacklist_table); kfree(c->unused_inode_hints); - free_heap(&c->copygc_heap); if (c->io_complete_wq) destroy_workqueue(c->io_complete_wq); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 24dd2defe7c7..30b10908ced0 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -723,8 +723,8 @@ TRACE_EVENT(move_data, TRACE_EVENT(evacuate_bucket, TP_PROTO(struct bch_fs *c, struct bpos *bucket, unsigned sectors, unsigned bucket_size, - int ret), - TP_ARGS(c, bucket, sectors, bucket_size, ret), + u64 fragmentation, int ret), + TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret), TP_STRUCT__entry( __field(dev_t, dev ) @@ -732,6 +732,7 @@ TRACE_EVENT(evacuate_bucket, __field(u64, bucket ) __field(u32, sectors ) __field(u32, bucket_size ) + __field(u64, fragmentation ) __field(int, ret ) ), @@ -741,14 +742,15 @@ TRACE_EVENT(evacuate_bucket, __entry->bucket = bucket->offset; __entry->sectors = sectors; __entry->bucket_size = bucket_size; + __entry->fragmentation = fragmentation; __entry->ret = ret; ), - TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i", + TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->member, __entry->bucket, __entry->sectors, __entry->bucket_size, - __entry->ret) + __entry->fragmentation, __entry->ret) ); TRACE_EVENT(copygc, |