summaryrefslogtreecommitdiff
path: root/fs/bcachefs/io_read.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/io_read.c')
-rw-r--r--fs/bcachefs/io_read.c747
1 files changed, 402 insertions, 345 deletions
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index aa91fcf51eec..f1503df57dc7 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -25,8 +25,15 @@
#include "subvolume.h"
#include "trace.h"
+#include <linux/random.h>
#include <linux/sched/mm.h>
+#ifdef CONFIG_BCACHEFS_DEBUG
+static unsigned bch2_read_corrupt_ratio;
+module_param_named(read_corrupt_ratio, bch2_read_corrupt_ratio, uint, 0644);
+MODULE_PARM_DESC(read_corrupt_ratio, "");
+#endif
+
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
static bool bch2_target_congested(struct bch_fs *c, u16 target)
@@ -80,6 +87,7 @@ struct promote_op {
struct rhash_head hash;
struct bpos pos;
+ struct work_struct work;
struct data_update write;
struct bio_vec bi_inline_vecs[]; /* must be last */
};
@@ -96,6 +104,33 @@ static inline bool have_io_error(struct bch_io_failures *failed)
return failed && failed->nr;
}
+static inline struct data_update *rbio_data_update(struct bch_read_bio *rbio)
+{
+ EBUG_ON(rbio->split);
+
+ return rbio->data_update
+ ? container_of(rbio, struct data_update, rbio)
+ : NULL;
+}
+
+static bool ptr_being_rewritten(struct bch_read_bio *orig, unsigned dev)
+{
+ struct data_update *u = rbio_data_update(orig);
+ if (!u)
+ return false;
+
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(u->k.k));
+ unsigned i = 0;
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (ptr->dev == dev &&
+ u->data_opts.rewrite_ptrs & BIT(i))
+ return true;
+ i++;
+ }
+
+ return false;
+}
+
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
struct bpos pos,
struct bch_io_opts opts,
@@ -105,7 +140,7 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
if (!have_io_error(failed)) {
BUG_ON(!opts.promote_target);
- if (!(flags & BCH_READ_MAY_PROMOTE))
+ if (!(flags & BCH_READ_may_promote))
return -BCH_ERR_nopromote_may_not;
if (bch2_bkey_has_target(c, k, opts.promote_target))
@@ -125,98 +160,93 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
return 0;
}
-static void promote_free(struct bch_fs *c, struct promote_op *op)
+static noinline void promote_free(struct bch_read_bio *rbio)
{
- int ret;
+ struct promote_op *op = container_of(rbio, struct promote_op, write.rbio);
+ struct bch_fs *c = rbio->c;
+
+ int ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params);
+ BUG_ON(ret);
bch2_data_update_exit(&op->write);
- ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
- bch_promote_params);
- BUG_ON(ret);
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
kfree_rcu(op, rcu);
}
static void promote_done(struct bch_write_op *wop)
{
- struct promote_op *op =
- container_of(wop, struct promote_op, write.op);
- struct bch_fs *c = op->write.op.c;
+ struct promote_op *op = container_of(wop, struct promote_op, write.op);
+ struct bch_fs *c = op->write.rbio.c;
- bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
- op->start_time);
- promote_free(c, op);
+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote], op->start_time);
+ promote_free(&op->write.rbio);
}
-static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
+static void promote_start_work(struct work_struct *work)
{
- struct bio *bio = &op->write.op.wbio.bio;
+ struct promote_op *op = container_of(work, struct promote_op, work);
- trace_and_count(op->write.op.c, read_promote, &rbio->bio);
+ bch2_data_update_read_done(&op->write);
+}
- /* we now own pages: */
- BUG_ON(!rbio->bounce);
- BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
+static noinline void promote_start(struct bch_read_bio *rbio)
+{
+ struct promote_op *op = container_of(rbio, struct promote_op, write.rbio);
- memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
- sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
- swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
+ trace_and_count(op->write.op.c, io_read_promote, &rbio->bio);
- bch2_data_update_read_done(&op->write, rbio->pick.crc);
+ INIT_WORK(&op->work, promote_start_work);
+ queue_work(rbio->c->write_ref_wq, &op->work);
}
-static struct promote_op *__promote_alloc(struct btree_trans *trans,
- enum btree_id btree_id,
- struct bkey_s_c k,
- struct bpos pos,
- struct extent_ptr_decoded *pick,
- struct bch_io_opts opts,
- unsigned sectors,
- struct bch_read_bio **rbio,
- struct bch_io_failures *failed)
+static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
+ enum btree_id btree_id,
+ struct bkey_s_c k,
+ struct bpos pos,
+ struct extent_ptr_decoded *pick,
+ unsigned sectors,
+ struct bch_read_bio *orig,
+ struct bch_io_failures *failed)
{
struct bch_fs *c = trans->c;
- struct promote_op *op = NULL;
- struct bio *bio;
- unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
int ret;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
- return ERR_PTR(-BCH_ERR_nopromote_no_writes);
+ struct data_update_opts update_opts = { .write_flags = BCH_WRITE_alloc_nowait };
- op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL);
- if (!op) {
- ret = -BCH_ERR_nopromote_enomem;
- goto err;
- }
+ if (!have_io_error(failed)) {
+ update_opts.target = orig->opts.promote_target;
+ update_opts.extra_replicas = 1;
+ update_opts.write_flags |= BCH_WRITE_cached;
+ update_opts.write_flags |= BCH_WRITE_only_specified_devs;
+ } else {
+ update_opts.target = orig->opts.foreground_target;
- op->start_time = local_clock();
- op->pos = pos;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned ptr_bit = 1;
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (bch2_dev_io_failures(failed, ptr->dev) &&
+ !ptr_being_rewritten(orig, ptr->dev))
+ update_opts.rewrite_ptrs |= ptr_bit;
+ ptr_bit <<= 1;
+ }
- /*
- * We don't use the mempool here because extents that aren't
- * checksummed or compressed can be too big for the mempool:
- */
- *rbio = kzalloc(sizeof(struct bch_read_bio) +
- sizeof(struct bio_vec) * pages,
- GFP_KERNEL);
- if (!*rbio) {
- ret = -BCH_ERR_nopromote_enomem;
- goto err;
+ if (!update_opts.rewrite_ptrs)
+ return NULL;
}
- rbio_init(&(*rbio)->bio, opts);
- bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
+ return ERR_PTR(-BCH_ERR_nopromote_no_writes);
- if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) {
+ struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL);
+ if (!op) {
ret = -BCH_ERR_nopromote_enomem;
- goto err;
+ goto err_put;
}
- (*rbio)->bounce = true;
- (*rbio)->split = true;
- (*rbio)->kmalloc = true;
+ op->start_time = local_clock();
+ op->pos = pos;
if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
bch_promote_params)) {
@@ -224,64 +254,43 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
goto err;
}
- bio = &op->write.op.wbio.bio;
- bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
-
- struct data_update_opts update_opts = {};
-
- if (!have_io_error(failed)) {
- update_opts.target = opts.promote_target;
- update_opts.extra_replicas = 1;
- update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED;
- } else {
- update_opts.target = opts.foreground_target;
-
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- unsigned ptr_bit = 1;
- bkey_for_each_ptr(ptrs, ptr) {
- if (bch2_dev_io_failures(failed, ptr->dev))
- update_opts.rewrite_ptrs |= ptr_bit;
- ptr_bit <<= 1;
- }
- }
-
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
writepoint_hashed((unsigned long) current),
- opts,
+ &orig->opts,
update_opts,
btree_id, k);
/*
* possible errors: -BCH_ERR_nocow_lock_blocked,
* -BCH_ERR_ENOSPC_disk_reservation:
*/
- if (ret) {
- BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
- bch_promote_params));
- goto err;
- }
+ if (ret)
+ goto err_remove_hash;
+ rbio_init_fragment(&op->write.rbio.bio, orig);
+ op->write.rbio.bounce = true;
+ op->write.rbio.promote = true;
op->write.op.end_io = promote_done;
- return op;
+ return &op->write.rbio;
+err_remove_hash:
+ BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params));
err:
- if (*rbio)
- bio_free_pages(&(*rbio)->bio);
- kfree(*rbio);
- *rbio = NULL;
+ bio_free_pages(&op->write.op.wbio.bio);
/* We may have added to the rhashtable and thus need rcu freeing: */
kfree_rcu(op, rcu);
+err_put:
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
return ERR_PTR(ret);
}
noinline
-static struct promote_op *promote_alloc(struct btree_trans *trans,
+static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
struct bvec_iter iter,
struct bkey_s_c k,
struct extent_ptr_decoded *pick,
- struct bch_io_opts opts,
unsigned flags,
- struct bch_read_bio **rbio,
+ struct bch_read_bio *orig,
bool *bounce,
bool *read_full,
struct bch_io_failures *failed)
@@ -301,18 +310,21 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
struct bpos pos = promote_full
? bkey_start_pos(k.k)
: POS(k.k->p.inode, iter.bi_sector);
- struct promote_op *promote;
int ret;
- ret = should_promote(c, k, pos, opts, flags, failed);
+ ret = should_promote(c, k, pos, orig->opts, flags, failed);
if (ret)
goto nopromote;
- promote = __promote_alloc(trans,
- k.k->type == KEY_TYPE_reflink_v
- ? BTREE_ID_reflink
- : BTREE_ID_extents,
- k, pos, pick, opts, sectors, rbio, failed);
+ struct bch_read_bio *promote =
+ __promote_alloc(trans,
+ k.k->type == KEY_TYPE_reflink_v
+ ? BTREE_ID_reflink
+ : BTREE_ID_extents,
+ k, pos, pick, sectors, orig, failed);
+ if (!promote)
+ return NULL;
+
ret = PTR_ERR_OR_ZERO(promote);
if (ret)
goto nopromote;
@@ -321,7 +333,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
*read_full = promote_full;
return promote;
nopromote:
- trace_read_nopromote(c, ret);
+ trace_io_read_nopromote(c, ret);
return NULL;
}
@@ -330,9 +342,17 @@ nopromote:
static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
struct bch_read_bio *rbio, struct bpos read_pos)
{
- return bch2_inum_offset_err_msg_trans(trans, out,
- (subvol_inum) { rbio->subvol, read_pos.inode },
- read_pos.offset << 9);
+ int ret = lockrestart_do(trans,
+ bch2_inum_offset_err_msg_trans(trans, out,
+ (subvol_inum) { rbio->subvol, read_pos.inode },
+ read_pos.offset << 9));
+ if (ret)
+ return ret;
+
+ if (rbio->data_update)
+ prt_str(out, "(internal move) ");
+
+ return 0;
}
static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out,
@@ -341,10 +361,6 @@ static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out,
bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos));
}
-#define READ_RETRY_AVOID 1
-#define READ_RETRY 2
-#define READ_ERR 3
-
enum rbio_context {
RBIO_CONTEXT_NULL,
RBIO_CONTEXT_HIGHPRI,
@@ -375,20 +391,25 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
{
BUG_ON(rbio->bounce && !rbio->split);
- if (rbio->promote)
- promote_free(rbio->c, rbio->promote);
- rbio->promote = NULL;
-
- if (rbio->bounce)
- bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
+ if (rbio->have_ioref) {
+ struct bch_dev *ca = bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev);
+ percpu_ref_put(&ca->io_ref);
+ }
if (rbio->split) {
struct bch_read_bio *parent = rbio->parent;
- if (rbio->kmalloc)
- kfree(rbio);
- else
+ if (unlikely(rbio->promote)) {
+ if (!rbio->bio.bi_status)
+ promote_start(rbio);
+ else
+ promote_free(rbio);
+ } else {
+ if (rbio->bounce)
+ bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
+
bio_put(&rbio->bio);
+ }
rbio = parent;
}
@@ -408,61 +429,49 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
bio_endio(&rbio->bio);
}
-static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
- struct bvec_iter bvec_iter,
- struct bch_io_failures *failed,
- unsigned flags)
+static noinline int bch2_read_retry_nodecode(struct btree_trans *trans,
+ struct bch_read_bio *rbio,
+ struct bvec_iter bvec_iter,
+ struct bch_io_failures *failed,
+ unsigned flags)
{
- struct btree_trans *trans = bch2_trans_get(c);
- struct btree_iter iter;
- struct bkey_buf sk;
- struct bkey_s_c k;
- int ret;
-
- flags &= ~BCH_READ_LAST_FRAGMENT;
- flags |= BCH_READ_MUST_CLONE;
-
- bch2_bkey_buf_init(&sk);
-
- bch2_trans_iter_init(trans, &iter, rbio->data_btree,
- rbio->read_pos, BTREE_ITER_slots);
+ struct data_update *u = container_of(rbio, struct data_update, rbio);
retry:
bch2_trans_begin(trans);
- rbio->bio.bi_status = 0;
- ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = lockrestart_do(trans,
+ bkey_err(k = bch2_bkey_get_iter(trans, &iter,
+ u->btree_id, bkey_start_pos(&u->k.k->k),
+ 0)));
if (ret)
goto err;
- bch2_bkey_buf_reassemble(&sk, c, k);
- k = bkey_i_to_s_c(sk.k);
-
- if (!bch2_bkey_matches_ptr(c, k,
- rbio->pick.ptr,
- rbio->data_pos.offset -
- rbio->pick.crc.offset)) {
+ if (!bkey_and_val_eq(k, bkey_i_to_s_c(u->k.k))) {
/* extent we wanted to read no longer exists: */
- rbio->hole = true;
- goto out;
+ rbio->ret = -BCH_ERR_data_read_key_overwritten;
+ goto err;
}
ret = __bch2_read_extent(trans, rbio, bvec_iter,
- rbio->read_pos,
- rbio->data_btree,
- k, 0, failed, flags);
- if (ret == READ_RETRY)
- goto retry;
- if (ret)
- goto err;
-out:
- bch2_rbio_done(rbio);
- bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
- bch2_bkey_buf_exit(&sk, c);
- return;
+ bkey_start_pos(&u->k.k->k),
+ u->btree_id,
+ bkey_i_to_s_c(u->k.k),
+ 0, failed, flags, -1);
err:
- rbio->bio.bi_status = BLK_STS_IOERR;
- goto out;
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_data_read_retry))
+ goto retry;
+
+ if (ret) {
+ rbio->bio.bi_status = BLK_STS_IOERR;
+ rbio->ret = ret;
+ }
+
+ BUG_ON(atomic_read(&rbio->bio.__bi_remaining) != 1);
+ return ret;
}
static void bch2_rbio_retry(struct work_struct *work)
@@ -477,45 +486,80 @@ static void bch2_rbio_retry(struct work_struct *work)
.inum = rbio->read_pos.inode,
};
struct bch_io_failures failed = { .nr = 0 };
+ struct btree_trans *trans = bch2_trans_get(c);
- trace_and_count(c, read_retry, &rbio->bio);
+ trace_io_read_retry(&rbio->bio);
+ this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
+ bvec_iter_sectors(rbio->bvec_iter));
- if (rbio->retry == READ_RETRY_AVOID)
- bch2_mark_io_failure(&failed, &rbio->pick);
+ if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
+ bch2_mark_io_failure(&failed, &rbio->pick,
+ rbio->ret == -BCH_ERR_data_read_retry_csum_err);
+
+ if (!rbio->split) {
+ rbio->bio.bi_status = 0;
+ rbio->ret = 0;
+ }
- rbio->bio.bi_status = 0;
+ unsigned subvol = rbio->subvol;
+ struct bpos read_pos = rbio->read_pos;
rbio = bch2_rbio_free(rbio);
- flags |= BCH_READ_IN_RETRY;
- flags &= ~BCH_READ_MAY_PROMOTE;
+ flags |= BCH_READ_in_retry;
+ flags &= ~BCH_READ_may_promote;
+ flags &= ~BCH_READ_last_fragment;
+ flags |= BCH_READ_must_clone;
- if (flags & BCH_READ_NODECODE) {
- bch2_read_retry_nodecode(c, rbio, iter, &failed, flags);
+ int ret = rbio->data_update
+ ? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags)
+ : __bch2_read(trans, rbio, iter, inum, &failed, flags);
+
+ if (ret) {
+ rbio->ret = ret;
+ rbio->bio.bi_status = BLK_STS_IOERR;
} else {
- flags &= ~BCH_READ_LAST_FRAGMENT;
- flags |= BCH_READ_MUST_CLONE;
+ struct printbuf buf = PRINTBUF;
+
+ lockrestart_do(trans,
+ bch2_inum_offset_err_msg_trans(trans, &buf,
+ (subvol_inum) { subvol, read_pos.inode },
+ read_pos.offset << 9));
+ if (rbio->data_update)
+ prt_str(&buf, "(internal move) ");
+ prt_str(&buf, "successful retry");
- __bch2_read(c, rbio, iter, inum, &failed, flags);
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
}
+
+ bch2_rbio_done(rbio);
+ bch2_trans_put(trans);
}
-static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
- blk_status_t error)
+static void bch2_rbio_error(struct bch_read_bio *rbio,
+ int ret, blk_status_t blk_error)
{
- rbio->retry = retry;
+ BUG_ON(ret >= 0);
- if (rbio->flags & BCH_READ_IN_RETRY)
+ rbio->ret = ret;
+ rbio->bio.bi_status = blk_error;
+
+ bch2_rbio_parent(rbio)->saw_error = true;
+
+ if (rbio->flags & BCH_READ_in_retry)
return;
- if (retry == READ_ERR) {
+ if (bch2_err_matches(ret, BCH_ERR_data_read_retry)) {
+ bch2_rbio_punt(rbio, bch2_rbio_retry,
+ RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ } else {
rbio = bch2_rbio_free(rbio);
- rbio->bio.bi_status = error;
+ rbio->ret = ret;
+ rbio->bio.bi_status = blk_error;
+
bch2_rbio_done(rbio);
- } else {
- bch2_rbio_punt(rbio, bch2_rbio_retry,
- RBIO_CONTEXT_UNBOUND, system_unbound_wq);
}
}
@@ -531,15 +575,13 @@ static void bch2_read_io_err(struct work_struct *work)
bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status));
- if (ca) {
- bch2_io_error(ca, BCH_MEMBER_ERROR_read);
+ if (ca)
bch_err_ratelimited(ca, "%s", buf.buf);
- } else {
+ else
bch_err_ratelimited(c, "%s", buf.buf);
- }
printbuf_exit(&buf);
- bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
}
static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
@@ -621,14 +663,12 @@ static void bch2_read_csum_err(struct work_struct *work)
bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
- if (ca) {
- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+ if (ca)
bch_err_ratelimited(ca, "%s", buf.buf);
- } else {
+ else
bch_err_ratelimited(c, "%s", buf.buf);
- }
- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
printbuf_exit(&buf);
}
@@ -648,7 +688,7 @@ static void bch2_read_decompress_err(struct work_struct *work)
else
bch_err_ratelimited(c, "%s", buf.buf);
- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_decompress_err, BLK_STS_IOERR);
printbuf_exit(&buf);
}
@@ -668,7 +708,7 @@ static void bch2_read_decrypt_err(struct work_struct *work)
else
bch_err_ratelimited(c, "%s", buf.buf);
- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_decrypt_err, BLK_STS_IOERR);
printbuf_exit(&buf);
}
@@ -678,9 +718,11 @@ static void __bch2_read_endio(struct work_struct *work)
struct bch_read_bio *rbio =
container_of(work, struct bch_read_bio, work);
struct bch_fs *c = rbio->c;
- struct bio *src = &rbio->bio;
- struct bio *dst = &bch2_rbio_parent(rbio)->bio;
- struct bvec_iter dst_iter = rbio->bvec_iter;
+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
+ struct bch_read_bio *parent = bch2_rbio_parent(rbio);
+ struct bio *src = &rbio->bio;
+ struct bio *dst = &parent->bio;
+ struct bvec_iter dst_iter = rbio->bvec_iter;
struct bch_extent_crc_unpacked crc = rbio->pick.crc;
struct nonce nonce = extent_nonce(rbio->version, crc);
unsigned nofs_flags;
@@ -698,8 +740,26 @@ static void __bch2_read_endio(struct work_struct *work)
src->bi_iter = rbio->bvec_iter;
}
+ bch2_maybe_corrupt_bio(src, bch2_read_corrupt_ratio);
+
csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
- if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io)
+ bool csum_good = !bch2_crc_cmp(csum, rbio->pick.crc.csum) || c->opts.no_data_io;
+
+ /*
+ * Checksum error: if the bio wasn't bounced, we may have been
+ * reading into buffers owned by userspace (that userspace can
+ * scribble over) - retry the read, bouncing it this time:
+ */
+ if (!csum_good && !rbio->bounce && (rbio->flags & BCH_READ_user_mapped)) {
+ rbio->flags |= BCH_READ_must_bounce;
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err_maybe_userspace,
+ BLK_STS_IOERR);
+ goto out;
+ }
+
+ bch2_account_io_completion(ca, BCH_MEMBER_ERROR_checksum, 0, csum_good);
+
+ if (!csum_good)
goto csum_err;
/*
@@ -712,32 +772,40 @@ static void __bch2_read_endio(struct work_struct *work)
if (unlikely(rbio->narrow_crcs))
bch2_rbio_narrow_crcs(rbio);
- if (rbio->flags & BCH_READ_NODECODE)
- goto nodecode;
+ if (likely(!parent->data_update)) {
+ /* Adjust crc to point to subset of data we want: */
+ crc.offset += rbio->offset_into_extent;
+ crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
- /* Adjust crc to point to subset of data we want: */
- crc.offset += rbio->offset_into_extent;
- crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
+ if (crc_is_compressed(crc)) {
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
- if (crc_is_compressed(crc)) {
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
- if (ret)
- goto decrypt_err;
+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
+ !c->opts.no_data_io)
+ goto decompression_err;
+ } else {
+ /* don't need to decrypt the entire bio: */
+ nonce = nonce_add(nonce, crc.offset << 9);
+ bio_advance(src, crc.offset << 9);
- if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
- !c->opts.no_data_io)
- goto decompression_err;
- } else {
- /* don't need to decrypt the entire bio: */
- nonce = nonce_add(nonce, crc.offset << 9);
- bio_advance(src, crc.offset << 9);
+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
+ src->bi_iter.bi_size = dst_iter.bi_size;
- BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
- src->bi_iter.bi_size = dst_iter.bi_size;
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
- if (ret)
- goto decrypt_err;
+ if (rbio->bounce) {
+ struct bvec_iter src_iter = src->bi_iter;
+
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+ }
+ }
+ } else {
+ if (rbio->split)
+ rbio->parent->pick = rbio->pick;
if (rbio->bounce) {
struct bvec_iter src_iter = src->bi_iter;
@@ -754,12 +822,9 @@ static void __bch2_read_endio(struct work_struct *work)
ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
if (ret)
goto decrypt_err;
-
- promote_start(rbio->promote, rbio);
- rbio->promote = NULL;
}
-nodecode:
- if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
+
+ if (likely(!(rbio->flags & BCH_READ_in_retry))) {
rbio = bch2_rbio_free(rbio);
bch2_rbio_done(rbio);
}
@@ -767,17 +832,6 @@ out:
memalloc_nofs_restore(nofs_flags);
return;
csum_err:
- /*
- * Checksum error: if the bio wasn't bounced, we may have been
- * reading into buffers owned by userspace (that userspace can
- * scribble over) - retry the read, bouncing it this time:
- */
- if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
- rbio->flags |= BCH_READ_MUST_BOUNCE;
- bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
- goto out;
- }
-
bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
goto out;
decompression_err:
@@ -797,10 +851,8 @@ static void bch2_read_endio(struct bio *bio)
struct workqueue_struct *wq = NULL;
enum rbio_context context = RBIO_CONTEXT_NULL;
- if (rbio->have_ioref) {
- bch2_latency_acct(ca, rbio->submit_time, READ);
- percpu_ref_put(&ca->io_ref);
- }
+ bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
+ rbio->submit_time, !bio->bi_status);
if (!rbio->split)
rbio->bio.bi_end_io = rbio->end_io;
@@ -810,14 +862,14 @@ static void bch2_read_endio(struct bio *bio)
return;
}
- if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
+ if (((rbio->flags & BCH_READ_retry_if_stale) && race_fault()) ||
(ca && dev_ptr_stale(ca, &rbio->pick.ptr))) {
- trace_and_count(c, read_reuse_race, &rbio->bio);
+ trace_and_count(c, io_read_reuse_race, &rbio->bio);
- if (rbio->flags & BCH_READ_RETRY_IF_STALE)
- bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
+ if (rbio->flags & BCH_READ_retry_if_stale)
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_ptr_stale_retry, BLK_STS_AGAIN);
else
- bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_ptr_stale_race, BLK_STS_AGAIN);
return;
}
@@ -883,15 +935,15 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bvec_iter iter, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
unsigned offset_into_extent,
- struct bch_io_failures *failed, unsigned flags)
+ struct bch_io_failures *failed, unsigned flags, int dev)
{
struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
- struct promote_op *promote = NULL;
bool bounce = false, read_full = false, narrow_crcs = false;
struct bpos data_pos = bkey_start_pos(k.k);
- int pick_ret;
+ struct data_update *u = rbio_data_update(orig);
+ int ret = 0;
if (bkey_extent_is_inline_data(k.k)) {
unsigned bytes = min_t(unsigned, iter.bi_size,
@@ -902,19 +954,21 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
swap(iter.bi_size, bytes);
bio_advance_iter(&orig->bio, &iter, bytes);
zero_fill_bio_iter(&orig->bio, iter);
+ this_cpu_add(c->counters[BCH_COUNTER_io_read_inline],
+ bvec_iter_sectors(iter));
goto out_read_done;
}
retry_pick:
- pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
+ ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
/* hole or reservation - just zero fill: */
- if (!pick_ret)
+ if (!ret)
goto hole;
- if (unlikely(pick_ret < 0)) {
+ if (unlikely(ret < 0)) {
struct printbuf buf = PRINTBUF;
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
- prt_printf(&buf, "no device to read from: %s\n ", bch2_err_str(pick_ret));
+ prt_printf(&buf, "%s\n ", bch2_err_str(ret));
bch2_bkey_val_to_text(&buf, c, k);
bch_err_ratelimited(c, "%s", buf.buf);
@@ -930,6 +984,7 @@ retry_pick:
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
+ ret = -BCH_ERR_data_read_no_encryption_key;
goto err;
}
@@ -941,56 +996,57 @@ retry_pick:
* retry path, don't check here, it'll be caught in bch2_read_endio()
* and we'll end up in the retry path:
*/
- if ((flags & BCH_READ_IN_RETRY) &&
+ if ((flags & BCH_READ_in_retry) &&
!pick.ptr.cached &&
ca &&
unlikely(dev_ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, ca, k, pick.ptr);
- bch2_mark_io_failure(failed, &pick);
+ bch2_mark_io_failure(failed, &pick, false);
percpu_ref_put(&ca->io_ref);
goto retry_pick;
}
- if (flags & BCH_READ_NODECODE) {
+ if (likely(!u)) {
+ if (!(flags & BCH_READ_last_fragment) ||
+ bio_flagged(&orig->bio, BIO_CHAIN))
+ flags |= BCH_READ_must_clone;
+
+ narrow_crcs = !(flags & BCH_READ_in_retry) &&
+ bch2_can_narrow_extent_crcs(k, pick.crc);
+
+ if (narrow_crcs && (flags & BCH_READ_user_mapped))
+ flags |= BCH_READ_must_bounce;
+
+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
+
+ if (crc_is_compressed(pick.crc) ||
+ (pick.crc.csum_type != BCH_CSUM_none &&
+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+ (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
+ (flags & BCH_READ_user_mapped)) ||
+ (flags & BCH_READ_must_bounce)))) {
+ read_full = true;
+ bounce = true;
+ }
+ } else {
/*
* can happen if we retry, and the extent we were going to read
* has been merged in the meantime:
*/
- if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) {
+ if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
if (ca)
percpu_ref_put(&ca->io_ref);
- goto hole;
+ rbio->ret = -BCH_ERR_data_read_buffer_too_small;
+ goto out_read_done;
}
iter.bi_size = pick.crc.compressed_size << 9;
- goto get_bio;
- }
-
- if (!(flags & BCH_READ_LAST_FRAGMENT) ||
- bio_flagged(&orig->bio, BIO_CHAIN))
- flags |= BCH_READ_MUST_CLONE;
-
- narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
- bch2_can_narrow_extent_crcs(k, pick.crc);
-
- if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
- flags |= BCH_READ_MUST_BOUNCE;
-
- EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
-
- if (crc_is_compressed(pick.crc) ||
- (pick.crc.csum_type != BCH_CSUM_none &&
- (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
- (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
- (flags & BCH_READ_USER_MAPPED)) ||
- (flags & BCH_READ_MUST_BOUNCE)))) {
read_full = true;
- bounce = true;
}
if (orig->opts.promote_target || have_io_error(failed))
- promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
- &rbio, &bounce, &read_full, failed);
+ rbio = promote_alloc(trans, iter, k, &pick, flags, orig,
+ &bounce, &read_full, failed);
if (!read_full) {
EBUG_ON(crc_is_compressed(pick.crc));
@@ -1009,7 +1065,7 @@ retry_pick:
pick.crc.offset = 0;
pick.crc.live_size = bvec_iter_sectors(iter);
}
-get_bio:
+
if (rbio) {
/*
* promote already allocated bounce rbio:
@@ -1024,17 +1080,16 @@ get_bio:
} else if (bounce) {
unsigned sectors = pick.crc.compressed_size;
- rbio = rbio_init(bio_alloc_bioset(NULL,
+ rbio = rbio_init_fragment(bio_alloc_bioset(NULL,
DIV_ROUND_UP(sectors, PAGE_SECTORS),
0,
GFP_NOFS,
&c->bio_read_split),
- orig->opts);
+ orig);
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
rbio->bounce = true;
- rbio->split = true;
- } else if (flags & BCH_READ_MUST_CLONE) {
+ } else if (flags & BCH_READ_must_clone) {
/*
* Have to clone if there were any splits, due to error
* reporting issues (if a split errored, and retrying didn't
@@ -1043,11 +1098,10 @@ get_bio:
* from the whole bio, in which case we don't want to retry and
* lose the error)
*/
- rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
+ rbio = rbio_init_fragment(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
&c->bio_read_split),
- orig->opts);
+ orig);
rbio->bio.bi_iter = iter;
- rbio->split = true;
} else {
rbio = orig;
rbio->bio.bi_iter = iter;
@@ -1056,67 +1110,60 @@ get_bio:
EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
- rbio->c = c;
rbio->submit_time = local_clock();
- if (rbio->split)
- rbio->parent = orig;
- else
+ if (!rbio->split)
rbio->end_io = orig->bio.bi_end_io;
rbio->bvec_iter = iter;
rbio->offset_into_extent= offset_into_extent;
rbio->flags = flags;
rbio->have_ioref = ca != NULL;
rbio->narrow_crcs = narrow_crcs;
- rbio->hole = 0;
- rbio->retry = 0;
+ rbio->ret = 0;
rbio->context = 0;
- /* XXX: only initialize this if needed */
- rbio->devs_have = bch2_bkey_devs(k);
rbio->pick = pick;
rbio->subvol = orig->subvol;
rbio->read_pos = read_pos;
rbio->data_btree = data_btree;
rbio->data_pos = data_pos;
rbio->version = k.k->bversion;
- rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
- if (flags & BCH_READ_NODECODE)
- orig->pick = pick;
-
rbio->bio.bi_opf = orig->bio.bi_opf;
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;
if (rbio->bounce)
- trace_and_count(c, read_bounce, &rbio->bio);
+ trace_and_count(c, io_read_bounce, &rbio->bio);
- this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
+ if (!u)
+ this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
+ else
+ this_cpu_add(c->counters[BCH_COUNTER_io_move_read], bio_sectors(&rbio->bio));
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
/*
* If it's being moved internally, we don't want to flag it as a cache
* hit:
*/
- if (ca && pick.ptr.cached && !(flags & BCH_READ_NODECODE))
+ if (ca && pick.ptr.cached && !u)
bch2_bucket_io_time_reset(trans, pick.ptr.dev,
PTR_BUCKET_NR(ca, &pick.ptr), READ);
- if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
+ if (!(flags & (BCH_READ_in_retry|BCH_READ_last_fragment))) {
bio_inc_remaining(&orig->bio);
- trace_and_count(c, read_split, &orig->bio);
+ trace_and_count(c, io_read_split, &orig->bio);
}
/*
* Unlock the iterator while the btree node's lock is still in
* cache, before doing the IO:
*/
- if (!(flags & BCH_READ_IN_RETRY))
+ if (!(flags & BCH_READ_in_retry))
bch2_trans_unlock(trans);
else
bch2_trans_unlock_long(trans);
- if (!rbio->pick.idx) {
+ if (likely(!rbio->pick.do_ec_reconstruct)) {
if (unlikely(!rbio->have_ioref)) {
struct printbuf buf = PRINTBUF;
bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
@@ -1126,7 +1173,9 @@ get_bio:
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ bch2_rbio_error(rbio,
+ -BCH_ERR_data_read_retry_device_offline,
+ BLK_STS_IOERR);
goto out;
}
@@ -1135,10 +1184,10 @@ get_bio:
bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
if (unlikely(c->opts.no_data_io)) {
- if (likely(!(flags & BCH_READ_IN_RETRY)))
+ if (likely(!(flags & BCH_READ_in_retry)))
bio_endio(&rbio->bio);
} else {
- if (likely(!(flags & BCH_READ_IN_RETRY)))
+ if (likely(!(flags & BCH_READ_in_retry)))
submit_bio(&rbio->bio);
else
submit_bio_wait(&rbio->bio);
@@ -1152,15 +1201,16 @@ get_bio:
} else {
/* Attempting reconstruct read: */
if (bch2_ec_read_extent(trans, rbio, k)) {
- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_ec_reconstruct_err,
+ BLK_STS_IOERR);
goto out;
}
- if (likely(!(flags & BCH_READ_IN_RETRY)))
+ if (likely(!(flags & BCH_READ_in_retry)))
bio_endio(&rbio->bio);
}
out:
- if (likely(!(flags & BCH_READ_IN_RETRY))) {
+ if (likely(!(flags & BCH_READ_in_retry))) {
return 0;
} else {
bch2_trans_unlock(trans);
@@ -1170,54 +1220,54 @@ out:
rbio->context = RBIO_CONTEXT_UNBOUND;
bch2_read_endio(&rbio->bio);
- ret = rbio->retry;
+ ret = rbio->ret;
rbio = bch2_rbio_free(rbio);
- if (ret == READ_RETRY_AVOID) {
- bch2_mark_io_failure(failed, &pick);
- ret = READ_RETRY;
- }
-
- if (!ret)
- goto out_read_done;
+ if (bch2_err_matches(ret, BCH_ERR_data_read_retry_avoid))
+ bch2_mark_io_failure(failed, &pick,
+ ret == -BCH_ERR_data_read_retry_csum_err);
return ret;
}
err:
- if (flags & BCH_READ_IN_RETRY)
- return READ_ERR;
+ if (flags & BCH_READ_in_retry)
+ return ret;
- orig->bio.bi_status = BLK_STS_IOERR;
+ orig->bio.bi_status = BLK_STS_IOERR;
+ orig->ret = ret;
goto out_read_done;
hole:
+ this_cpu_add(c->counters[BCH_COUNTER_io_read_hole],
+ bvec_iter_sectors(iter));
/*
- * won't normally happen in the BCH_READ_NODECODE
- * (bch2_move_extent()) path, but if we retry and the extent we wanted
- * to read no longer exists we have to signal that:
+ * won't normally happen in the data update (bch2_move_extent()) path,
+ * but if we retry and the extent we wanted to read no longer exists we
+ * have to signal that:
*/
- if (flags & BCH_READ_NODECODE)
- orig->hole = true;
+ if (u)
+ orig->ret = -BCH_ERR_data_read_key_overwritten;
zero_fill_bio_iter(&orig->bio, iter);
out_read_done:
- if (flags & BCH_READ_LAST_FRAGMENT)
+ if ((flags & BCH_READ_last_fragment) &&
+ !(flags & BCH_READ_in_retry))
bch2_rbio_done(orig);
return 0;
}
-void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
- struct bvec_iter bvec_iter, subvol_inum inum,
- struct bch_io_failures *failed, unsigned flags)
+int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
+ struct bvec_iter bvec_iter, subvol_inum inum,
+ struct bch_io_failures *failed, unsigned flags)
{
- struct btree_trans *trans = bch2_trans_get(c);
+ struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_buf sk;
struct bkey_s_c k;
int ret;
- BUG_ON(flags & BCH_READ_NODECODE);
+ EBUG_ON(rbio->data_update);
bch2_bkey_buf_init(&sk);
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
@@ -1267,24 +1317,26 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
swap(bvec_iter.bi_size, bytes);
if (bvec_iter.bi_size == bytes)
- flags |= BCH_READ_LAST_FRAGMENT;
+ flags |= BCH_READ_last_fragment;
ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
data_btree, k,
- offset_into_extent, failed, flags);
+ offset_into_extent, failed, flags, -1);
if (ret)
goto err;
- if (flags & BCH_READ_LAST_FRAGMENT)
+ if (flags & BCH_READ_last_fragment)
break;
swap(bvec_iter.bi_size, bytes);
bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
err:
+ if (ret == -BCH_ERR_data_read_retry_csum_err_maybe_userspace)
+ flags |= BCH_READ_must_bounce;
+
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
- ret != READ_RETRY &&
- ret != READ_RETRY_AVOID)
+ !bch2_err_matches(ret, BCH_ERR_data_read_retry))
break;
}
@@ -1292,17 +1344,22 @@ err:
if (ret) {
struct printbuf buf = PRINTBUF;
- bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9);
- prt_printf(&buf, "read error %i from btree lookup", ret);
+ lockrestart_do(trans,
+ bch2_inum_offset_err_msg_trans(trans, &buf, inum,
+ bvec_iter.bi_sector << 9));
+ prt_printf(&buf, "read error: %s", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
- rbio->bio.bi_status = BLK_STS_IOERR;
- bch2_rbio_done(rbio);
+ rbio->bio.bi_status = BLK_STS_IOERR;
+ rbio->ret = ret;
+
+ if (!(flags & BCH_READ_in_retry))
+ bch2_rbio_done(rbio);
}
- bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
+ return ret;
}
void bch2_fs_io_read_exit(struct bch_fs *c)