diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2020-10-19 22:36:24 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:44 -0400 |
commit | 39283c712e6df927c7c49e8b738ca110551bb399 (patch) | |
tree | dea8074dd781389429dd7af9d440a687d2091705 /fs/bcachefs/buckets.c | |
parent | 289980195ffaa949ecd4216337a70a8e23cf8e86 (diff) | |
download | lwn-39283c712e6df927c7c49e8b738ca110551bb399.tar.gz lwn-39283c712e6df927c7c49e8b738ca110551bb399.zip |
bcachefs: Fix for bad stripe pointers
The allocator usually doesn't increment bucket gens right away on
buckets that it's about to hand out (for reasons that need to be
documented), instead deferring that to whatever extent update first
references that bucket.
But stripe pointers reference buckets without changing bucket sector
counts, meaning we could end up with a pointer in a stripe with a gen
newer than the bucket it points to.
Fix this by adding a transactional trigger for KEY_TYPE_stripe that just
writes out the keys in the alloc btree for the buckets it points to.
Also - consolidate the code that checks pointer validity.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/buckets.c')
-rw-r--r-- | fs/bcachefs/buckets.c | 283 |
1 files changed, 183 insertions, 100 deletions
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 7bc51f397c7b..80d11decb71e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -883,124 +883,140 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p, p.crc.uncompressed_size); } -static void bucket_set_stripe(struct bch_fs *c, - const struct bch_extent_ptr *ptr, - struct bch_fs_usage *fs_usage, - u64 journal_seq, - unsigned flags, - bool enabled) -{ - bool gc = flags & BTREE_TRIGGER_GC; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = PTR_BUCKET(ca, ptr, gc); - struct bucket_mark new, old; - - old = bucket_cmpxchg(g, new, ({ - new.stripe = enabled; - if (journal_seq) { - new.journal_seq_valid = 1; - new.journal_seq = journal_seq; - } - })); - - bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); - - /* - * XXX write repair code for these, flag stripe as possibly bad - */ - if (old.gen != ptr->gen) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "stripe with stale pointer"); -#if 0 - /* - * We'd like to check for these, but these checks don't work - * yet: - */ - if (old.stripe && enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "multiple stripes using same bucket"); - - if (!old.stripe && !enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "deleting stripe but bucket not marked as stripe bucket"); -#endif -} - -static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, - struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type ptr_data_type, - u8 bucket_gen, u8 *bucket_data_type, - u16 *dirty_sectors, u16 *cached_sectors) -{ - u16 *dst_sectors = !p.ptr.cached +static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 bucket_data_type, + u16 dirty_sectors, u16 cached_sectors) +{ + size_t bucket_nr = PTR_BUCKET_NR(bch_dev_bkey_exists(c, ptr->dev), ptr); + u16 bucket_sectors = !ptr->cached ? dirty_sectors : cached_sectors; - u16 orig_sectors = *dst_sectors; char buf[200]; - if (gen_after(p.ptr.gen, bucket_gen)) { + if (gen_after(ptr->gen, bucket_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (gen_cmp(bucket_gen, p.ptr.gen) > 96U) { + if (gen_cmp(bucket_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (bucket_gen != p.ptr.gen && !p.ptr.cached) { + if (bucket_gen != ptr->gen && !ptr->cached) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (bucket_gen != p.ptr.gen) + if (bucket_gen != ptr->gen) return 1; - if (*bucket_data_type && *bucket_data_type != ptr_data_type) { + if (bucket_data_type && ptr_data_type && + bucket_data_type != ptr_data_type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type], + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type], bch2_data_types[ptr_data_type], (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (checked_add(*dst_sectors, sectors)) { + if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - orig_sectors, sectors, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + bucket_sectors, sectors, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } + return 0; +} + +static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + struct bch_fs_usage *fs_usage, + u64 journal_seq, + unsigned flags, + bool enabled) +{ + bool gc = flags & BTREE_TRIGGER_GC; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bucket *g = PTR_BUCKET(ca, ptr, gc); + struct bucket_mark new, old; + char buf[200]; + int ret; + + old = bucket_cmpxchg(g, new, ({ + ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type, + new.dirty_sectors, new.cached_sectors); + if (ret) + return ret; + + if (new.stripe && enabled) + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + + if (!new.stripe && !enabled) + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u: deleting stripe but not marked\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + + new.stripe = enabled; + if (journal_seq) { + new.journal_seq_valid = 1; + new.journal_seq = journal_seq; + } + })); + + bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); + return 0; +} + +static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 *bucket_data_type, + u16 *dirty_sectors, u16 *cached_sectors) +{ + u16 *dst_sectors = !ptr->cached + ? dirty_sectors + : cached_sectors; + int ret = check_bucket_ref(c, k, ptr, sectors, ptr_data_type, + bucket_gen, *bucket_data_type, + *dirty_sectors, *cached_sectors); + + if (ret) + return ret; + + *dst_sectors += sectors; *bucket_data_type = *dirty_sectors || *cached_sectors ? ptr_data_type : 0; return 0; @@ -1025,7 +1041,7 @@ static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k, new.v.counter = old.v.counter = v; bucket_data_type = new.data_type; - ret = __mark_pointer(c, k, p, sectors, data_type, new.gen, + ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, new.gen, &bucket_data_type, &new.dirty_sectors, &new.cached_sectors); @@ -1189,6 +1205,7 @@ static int bch2_mark_stripe(struct bch_fs *c, ? bkey_s_c_to_stripe(new).v : NULL; struct stripe *m = genradix_ptr(&c->stripes[gc], idx); unsigned i; + int ret; if (!m || (old_s && !m->alive)) { bch_err_ratelimited(c, "error marking nonexistent stripe %zu", @@ -1198,9 +1215,12 @@ static int bch2_mark_stripe(struct bch_fs *c, if (!new_s) { /* Deleting: */ - for (i = 0; i < old_s->nr_blocks; i++) - bucket_set_stripe(c, old_s->ptrs + i, fs_usage, - journal_seq, flags, false); + for (i = 0; i < old_s->nr_blocks; i++) { + ret = bucket_set_stripe(c, old, old_s->ptrs + i, fs_usage, + journal_seq, flags, false); + if (ret) + return ret; + } if (!gc && m->on_heap) { spin_lock(&c->ec_stripes_heap_lock); @@ -1219,11 +1239,16 @@ static int bch2_mark_stripe(struct bch_fs *c, old_s->ptrs + i, sizeof(struct bch_extent_ptr))) { - if (old_s) - bucket_set_stripe(c, old_s->ptrs + i, fs_usage, + if (old_s) { + bucket_set_stripe(c, old, old_s->ptrs + i, fs_usage, journal_seq, flags, false); - bucket_set_stripe(c, new_s->ptrs + i, fs_usage, - journal_seq, flags, true); + if (ret) + return ret; + } + ret = bucket_set_stripe(c, new, new_s->ptrs + i, fs_usage, + journal_seq, flags, true); + if (ret) + return ret; } } @@ -1549,23 +1574,21 @@ static int trans_get_key(struct btree_trans *trans, return ret; } -static int bch2_trans_mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) +static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, + const struct bch_extent_ptr *ptr, + struct bkey_alloc_unpacked *u) { struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bpos pos = POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)); - struct btree_iter *iter; - struct bkey_s_c k_a; - struct bkey_alloc_unpacked u; - struct bkey_i_alloc *a; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); struct bucket *g; + struct btree_iter *iter; + struct bkey_s_c k; int ret; - iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k_a); + iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k); if (iter) { - u = bch2_alloc_unpack(k_a); + *u = bch2_alloc_unpack(k); } else { iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos, BTREE_ITER_CACHED| @@ -1575,16 +1598,36 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, return PTR_ERR(iter); ret = bch2_btree_iter_traverse(iter); - if (ret) - goto out; + if (ret) { + bch2_trans_iter_put(trans, iter); + return ret; + } percpu_down_read(&c->mark_lock); g = bucket(ca, pos.offset); - u = alloc_mem_to_key(g, READ_ONCE(g->mark)); + *u = alloc_mem_to_key(g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); } - ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type, + *_iter = iter; + return 0; +} + +static int bch2_trans_mark_pointer(struct btree_trans *trans, + struct bkey_s_c k, struct extent_ptr_decoded p, + s64 sectors, enum bch_data_type data_type) +{ + struct bch_fs *c = trans->c; + struct btree_iter *iter; + struct bkey_alloc_unpacked u; + struct bkey_i_alloc *a; + int ret; + + ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); + if (ret) + return ret; + + ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, u.gen, &u.data_type, &u.dirty_sectors, &u.cached_sectors); if (ret) goto out; @@ -1595,7 +1638,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, goto out; bkey_alloc_init(&a->k_i); - a->k.p = pos; + a->k.p = iter->pos; bch2_alloc_pack(a, u); bch2_trans_update(trans, iter, &a->k_i, 0); out: @@ -1716,6 +1759,44 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, return 0; } +static int bch2_trans_mark_stripe(struct btree_trans *trans, + struct bkey_s_c k) +{ + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + struct bkey_alloc_unpacked u; + struct bkey_i_alloc *a; + struct btree_iter *iter; + unsigned i; + int ret = 0; + + /* + * The allocator code doesn't necessarily update bucket gens in the + * btree when incrementing them, right before handing out new buckets - + * we just need to persist those updates here along with the new stripe: + */ + + for (i = 0; i < s->nr_blocks && !ret; i++) { + ret = bch2_trans_start_alloc_update(trans, &iter, + &s->ptrs[i], &u); + if (ret) + break; + + a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto put_iter; + + bkey_alloc_init(&a->k_i); + a->k.p = iter->pos; + bch2_alloc_pack(a, u); + bch2_trans_update(trans, iter, &a->k_i, 0); +put_iter: + bch2_trans_iter_put(trans, iter); + } + + return ret; +} + static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 idx, unsigned sectors, @@ -1815,6 +1896,8 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, case KEY_TYPE_reflink_v: return bch2_trans_mark_extent(trans, k, offset, sectors, flags, BCH_DATA_user); + case KEY_TYPE_stripe: + return bch2_trans_mark_stripe(trans, k); case KEY_TYPE_inode: d = replicas_deltas_realloc(trans, 0); |