summaryrefslogtreecommitdiff
path: root/fs/bcachefs/ec.c
diff options
context:
space:
mode:
authorRobbie Litchfield <blam.kiwi@gmail.com>2021-02-10 13:18:13 +1300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:53 -0400
commit0ef837a0cc87d49d9f7d29bdef5a57f07ecc84d3 (patch)
tree7161856bca1da971b86022a113144f12cb89bfdc /fs/bcachefs/ec.c
parent2bb748a69596e883cf9ea28321d43f8c6a225cef (diff)
downloadlwn-0ef837a0cc87d49d9f7d29bdef5a57f07ecc84d3.tar.gz
lwn-0ef837a0cc87d49d9f7d29bdef5a57f07ecc84d3.zip
bcachefs: Fix unnecessary read amplificaiton when allocating ec stripes
When allocating an erasure coding stripe, bcachefs will always reuse any partial stripes before reserving a new stripe. This causes unnecessary read amplification when preparing a stripe for writing. This patch changes bcachefs to always reserve new stripes first, only relying on stripe reuse when copygc needs more time to empty buckets from existing stripes. Signed-off-by: Robbie Litchfield <blam.kiwi@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/ec.c')
-rw-r--r--fs/bcachefs/ec.c155
1 files changed, 92 insertions, 63 deletions
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index a32d399e5b6f..a70b859363f0 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1389,6 +1389,72 @@ static s64 get_existing_stripe(struct bch_fs *c,
return ret;
}
+static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
+ struct ec_stripe_head *h)
+{
+ unsigned i;
+ s64 idx;
+ int ret;
+
+ idx = get_existing_stripe(c, h);
+ if (idx < 0) {
+ bch_err(c, "failed to find an existing stripe");
+ return -ENOSPC;
+ }
+
+ h->s->have_existing_stripe = true;
+ ret = get_stripe_key(c, idx, &h->s->existing_stripe);
+ if (ret) {
+ bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
+ return ret;
+ }
+
+ if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
+ /*
+ * this is a problem: we have deleted from the
+ * stripes heap already
+ */
+ BUG();
+ }
+
+ BUG_ON(h->s->existing_stripe.size != h->blocksize);
+ BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
+
+ for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
+ if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
+ __set_bit(i, h->s->blocks_gotten);
+ __set_bit(i, h->s->blocks_allocated);
+ }
+
+ ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
+ }
+
+ bkey_copy(&h->s->new_stripe.key.k_i,
+ &h->s->existing_stripe.key.k_i);
+
+ return 0;
+}
+
+static int __bch2_ec_stripe_head_reserve(struct bch_fs *c,
+ struct ec_stripe_head *h)
+{
+ int ret;
+
+ ret = bch2_disk_reservation_get(c, &h->s->res,
+ h->blocksize,
+ h->s->nr_parity, 0);
+
+ if (ret) {
+ /*
+ * This means we need to wait for copygc to
+ * empty out buckets from existing stripes:
+ */
+ bch_err(c, "failed to reserve stripe");
+ }
+
+ return ret;
+}
+
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target,
unsigned algo,
@@ -1397,9 +1463,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
struct closure *cl)
{
struct ec_stripe_head *h;
- unsigned i;
- s64 idx;
int ret;
+ bool needs_stripe_new;
h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc);
if (!h) {
@@ -1407,80 +1472,44 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
return NULL;
}
- if (!h->s) {
+ needs_stripe_new = !h->s;
+ if (needs_stripe_new) {
if (ec_new_stripe_alloc(c, h)) {
- bch2_ec_stripe_head_put(c, h);
+ ret = -ENOMEM;
bch_err(c, "failed to allocate new stripe");
- return NULL;
- }
-
- idx = get_existing_stripe(c, h);
- if (idx >= 0) {
- h->s->have_existing_stripe = true;
- ret = get_stripe_key(c, idx, &h->s->existing_stripe);
- if (ret) {
- bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
- bch2_ec_stripe_head_put(c, h);
- return NULL;
- }
-
- if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
- /*
- * this is a problem: we have deleted from the
- * stripes heap already
- */
- BUG();
- }
-
- BUG_ON(h->s->existing_stripe.size != h->blocksize);
- BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
-
- for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
- if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
- __set_bit(i, h->s->blocks_gotten);
- __set_bit(i, h->s->blocks_allocated);
- }
-
- ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
- }
-
- bkey_copy(&h->s->new_stripe.key.k_i,
- &h->s->existing_stripe.key.k_i);
+ goto err;
}
- if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) {
+ if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize))
BUG();
- }
}
- if (!h->s->allocated) {
- if (!h->s->have_existing_stripe &&
- !h->s->res.sectors) {
- ret = bch2_disk_reservation_get(c, &h->s->res,
- h->blocksize,
- h->s->nr_parity, 0);
- if (ret) {
- /*
- * This means we need to wait for copygc to
- * empty out buckets from existing stripes:
- */
- bch2_ec_stripe_head_put(c, h);
- h = NULL;
- goto out;
- }
- }
+ /*
+ * Try reserve a new stripe before reusing an
+ * existing stripe. This will prevent unnecessary
+ * read amplification during write oriented workloads.
+ */
+ ret = 0;
+ if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe)
+ ret = __bch2_ec_stripe_head_reserve(c, h);
+ if (ret && needs_stripe_new)
+ ret = __bch2_ec_stripe_head_reuse(c, h);
+ if (ret)
+ goto err;
+ if (!h->s->allocated) {
ret = new_stripe_alloc_buckets(c, h, cl);
- if (ret) {
- bch2_ec_stripe_head_put(c, h);
- h = ERR_PTR(-ret);
- goto out;
- }
+ if (ret)
+ goto err;
h->s->allocated = true;
}
-out:
+
return h;
+
+err:
+ bch2_ec_stripe_head_put(c, h);
+ return ERR_PTR(-ret);
}
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)