diff options
author | Robbie Litchfield <blam.kiwi@gmail.com> | 2021-02-10 13:18:13 +1300 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:53 -0400 |
commit | 0ef837a0cc87d49d9f7d29bdef5a57f07ecc84d3 (patch) | |
tree | 7161856bca1da971b86022a113144f12cb89bfdc /fs/bcachefs/ec.c | |
parent | 2bb748a69596e883cf9ea28321d43f8c6a225cef (diff) | |
download | lwn-0ef837a0cc87d49d9f7d29bdef5a57f07ecc84d3.tar.gz lwn-0ef837a0cc87d49d9f7d29bdef5a57f07ecc84d3.zip |
bcachefs: Fix unnecessary read amplificaiton when allocating ec stripes
When allocating an erasure coding stripe, bcachefs will always reuse any
partial stripes before reserving a new stripe. This causes unnecessary
read amplification when preparing a stripe for writing. This patch changes
bcachefs to always reserve new stripes first, only relying on stripe reuse
when copygc needs more time to empty buckets from existing stripes.
Signed-off-by: Robbie Litchfield <blam.kiwi@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/ec.c')
-rw-r--r-- | fs/bcachefs/ec.c | 155 |
1 files changed, 92 insertions, 63 deletions
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index a32d399e5b6f..a70b859363f0 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1389,6 +1389,72 @@ static s64 get_existing_stripe(struct bch_fs *c, return ret; } +static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, + struct ec_stripe_head *h) +{ + unsigned i; + s64 idx; + int ret; + + idx = get_existing_stripe(c, h); + if (idx < 0) { + bch_err(c, "failed to find an existing stripe"); + return -ENOSPC; + } + + h->s->have_existing_stripe = true; + ret = get_stripe_key(c, idx, &h->s->existing_stripe); + if (ret) { + bch2_fs_fatal_error(c, "error reading stripe key: %i", ret); + return ret; + } + + if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) { + /* + * this is a problem: we have deleted from the + * stripes heap already + */ + BUG(); + } + + BUG_ON(h->s->existing_stripe.size != h->blocksize); + BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors); + + for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { + if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) { + __set_bit(i, h->s->blocks_gotten); + __set_bit(i, h->s->blocks_allocated); + } + + ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); + } + + bkey_copy(&h->s->new_stripe.key.k_i, + &h->s->existing_stripe.key.k_i); + + return 0; +} + +static int __bch2_ec_stripe_head_reserve(struct bch_fs *c, + struct ec_stripe_head *h) +{ + int ret; + + ret = bch2_disk_reservation_get(c, &h->s->res, + h->blocksize, + h->s->nr_parity, 0); + + if (ret) { + /* + * This means we need to wait for copygc to + * empty out buckets from existing stripes: + */ + bch_err(c, "failed to reserve stripe"); + } + + return ret; +} + struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, unsigned target, unsigned algo, @@ -1397,9 +1463,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, struct closure *cl) { struct ec_stripe_head *h; - unsigned i; - s64 idx; int ret; + bool needs_stripe_new; h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc); if (!h) { @@ -1407,80 +1472,44 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, return NULL; } - if (!h->s) { + needs_stripe_new = !h->s; + if (needs_stripe_new) { if (ec_new_stripe_alloc(c, h)) { - bch2_ec_stripe_head_put(c, h); + ret = -ENOMEM; bch_err(c, "failed to allocate new stripe"); - return NULL; - } - - idx = get_existing_stripe(c, h); - if (idx >= 0) { - h->s->have_existing_stripe = true; - ret = get_stripe_key(c, idx, &h->s->existing_stripe); - if (ret) { - bch2_fs_fatal_error(c, "error reading stripe key: %i", ret); - bch2_ec_stripe_head_put(c, h); - return NULL; - } - - if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) { - /* - * this is a problem: we have deleted from the - * stripes heap already - */ - BUG(); - } - - BUG_ON(h->s->existing_stripe.size != h->blocksize); - BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors); - - for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { - if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) { - __set_bit(i, h->s->blocks_gotten); - __set_bit(i, h->s->blocks_allocated); - } - - ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); - } - - bkey_copy(&h->s->new_stripe.key.k_i, - &h->s->existing_stripe.key.k_i); + goto err; } - if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) { + if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) BUG(); - } } - if (!h->s->allocated) { - if (!h->s->have_existing_stripe && - !h->s->res.sectors) { - ret = bch2_disk_reservation_get(c, &h->s->res, - h->blocksize, - h->s->nr_parity, 0); - if (ret) { - /* - * This means we need to wait for copygc to - * empty out buckets from existing stripes: - */ - bch2_ec_stripe_head_put(c, h); - h = NULL; - goto out; - } - } + /* + * Try reserve a new stripe before reusing an + * existing stripe. This will prevent unnecessary + * read amplification during write oriented workloads. + */ + ret = 0; + if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe) + ret = __bch2_ec_stripe_head_reserve(c, h); + if (ret && needs_stripe_new) + ret = __bch2_ec_stripe_head_reuse(c, h); + if (ret) + goto err; + if (!h->s->allocated) { ret = new_stripe_alloc_buckets(c, h, cl); - if (ret) { - bch2_ec_stripe_head_put(c, h); - h = ERR_PTR(-ret); - goto out; - } + if (ret) + goto err; h->s->allocated = true; } -out: + return h; + +err: + bch2_ec_stripe_head_put(c, h); + return ERR_PTR(-ret); } void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) |