diff options
Diffstat (limited to 'fs/bcachefs/move.c')
-rw-r--r-- | fs/bcachefs/move.c | 501 |
1 files changed, 310 insertions, 191 deletions
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 160b4374160a..fc396b9fa754 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -38,28 +38,28 @@ const char * const bch2_data_ops_strs[] = { NULL }; -static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, +static void trace_io_move2(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - if (trace_move_extent_enabled()) { + if (trace_io_move_enabled()) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); prt_newline(&buf); bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); - trace_move_extent(c, buf.buf); + trace_io_move(c, buf.buf); printbuf_exit(&buf); } } -static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k) +static void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k) { - if (trace_move_extent_read_enabled()) { + if (trace_io_move_read_enabled()) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); - trace_move_extent_read(c, buf.buf); + trace_io_move_read(c, buf.buf); printbuf_exit(&buf); } } @@ -74,11 +74,7 @@ struct moving_io { unsigned read_sectors; unsigned write_sectors; - struct bch_read_bio rbio; - struct data_update write; - /* Must be last since it is variable size */ - struct bio_vec bi_inline_vecs[]; }; static void move_free(struct moving_io *io) @@ -88,43 +84,72 @@ static void move_free(struct moving_io *io) if (io->b) atomic_dec(&io->b->count); - bch2_data_update_exit(&io->write); - mutex_lock(&ctxt->lock); list_del(&io->io_list); wake_up(&ctxt->wait); mutex_unlock(&ctxt->lock); + if (!io->write.data_opts.scrub) { + bch2_data_update_exit(&io->write); + } else { + bch2_bio_free_pages_pool(io->write.op.c, &io->write.op.wbio.bio); + kfree(io->write.bvecs); + } kfree(io); } static void move_write_done(struct bch_write_op *op) { struct moving_io *io = container_of(op, struct moving_io, write.op); + struct bch_fs *c = op->c; struct moving_context *ctxt = io->write.ctxt; - if (io->write.op.error) + if (op->error) { + if (trace_io_move_write_fail_enabled()) { + struct printbuf buf = PRINTBUF; + + bch2_write_op_to_text(&buf, op); + prt_printf(&buf, "ret\t%s\n", bch2_err_str(op->error)); + trace_io_move_write_fail(c, buf.buf); + printbuf_exit(&buf); + } + this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]); + ctxt->write_error = true; + } - atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors); - atomic_dec(&io->write.ctxt->write_ios); + atomic_sub(io->write_sectors, &ctxt->write_sectors); + atomic_dec(&ctxt->write_ios); move_free(io); closure_put(&ctxt->cl); } static void move_write(struct moving_io *io) { - if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) { + struct moving_context *ctxt = io->write.ctxt; + + if (ctxt->stats) { + if (io->write.rbio.bio.bi_status) + atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, + &ctxt->stats->sectors_error_uncorrected); + else if (io->write.rbio.saw_error) + atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, + &ctxt->stats->sectors_error_corrected); + } + + if (unlikely(io->write.rbio.ret || + io->write.rbio.bio.bi_status || + io->write.data_opts.scrub)) { move_free(io); return; } - if (trace_move_extent_write_enabled()) { + if (trace_io_move_write_enabled()) { struct bch_fs *c = io->write.op.c; struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); - trace_move_extent_write(c, buf.buf); + trace_io_move_write(c, buf.buf); printbuf_exit(&buf); } @@ -132,7 +157,7 @@ static void move_write(struct moving_io *io) atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); atomic_inc(&io->write.ctxt->write_ios); - bch2_data_update_read_done(&io->write, io->rbio.pick.crc); + bch2_data_update_read_done(&io->write); } struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) @@ -145,7 +170,7 @@ struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctx static void move_read_endio(struct bio *bio) { - struct moving_io *io = container_of(bio, struct moving_io, rbio.bio); + struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio); struct moving_context *ctxt = io->write.ctxt; atomic_sub(io->read_sectors, &ctxt->read_sectors); @@ -258,14 +283,10 @@ int bch2_move_extent(struct moving_context *ctxt, { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - struct moving_io *io; - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - unsigned sectors = k.k->size, pages; int ret = -ENOMEM; - trace_move_extent2(c, k, &io_opts, &data_opts); + trace_io_move2(c, k, &io_opts, &data_opts); + this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); if (ctxt->stats) ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); @@ -273,7 +294,8 @@ int bch2_move_extent(struct moving_context *ctxt, bch2_data_update_opts_normalize(k, &data_opts); if (!data_opts.rewrite_ptrs && - !data_opts.extra_replicas) { + !data_opts.extra_replicas && + !data_opts.scrub) { if (data_opts.kill_ptrs) return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); return 0; @@ -285,13 +307,7 @@ int bch2_move_extent(struct moving_context *ctxt, */ bch2_trans_unlock(trans); - /* write path might have to decompress data: */ - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - sectors = max_t(unsigned, sectors, p.crc.uncompressed_size); - - pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); - io = kzalloc(sizeof(struct moving_io) + - sizeof(struct bio_vec) * pages, GFP_KERNEL); + struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL); if (!io) goto err; @@ -300,31 +316,27 @@ int bch2_move_extent(struct moving_context *ctxt, io->read_sectors = k.k->size; io->write_sectors = k.k->size; - bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0); - io->write.op.wbio.bio.bi_ioprio = - IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); - - if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9, - GFP_KERNEL)) - goto err_free; + if (!data_opts.scrub) { + ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, + &io_opts, data_opts, iter->btree_id, k); + if (ret) + goto err_free; - io->rbio.c = c; - io->rbio.opts = io_opts; - bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0); - io->rbio.bio.bi_vcnt = pages; - io->rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); - io->rbio.bio.bi_iter.bi_size = sectors << 9; + io->write.op.end_io = move_write_done; + } else { + bch2_bkey_buf_init(&io->write.k); + bch2_bkey_buf_reassemble(&io->write.k, c, k); - io->rbio.bio.bi_opf = REQ_OP_READ; - io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); - io->rbio.bio.bi_end_io = move_read_endio; + io->write.op.c = c; + io->write.data_opts = data_opts; - ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, - io_opts, data_opts, iter->btree_id, k); - if (ret) - goto err_free_pages; + ret = bch2_data_update_bios_init(&io->write, c, &io_opts); + if (ret) + goto err_free; + } - io->write.op.end_io = move_write_done; + io->write.rbio.bio.bi_end_io = move_read_endio; + io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); if (ctxt->rate) bch2_ratelimit_increment(ctxt->rate, k.k->size); @@ -339,9 +351,7 @@ int bch2_move_extent(struct moving_context *ctxt, atomic_inc(&io->b->count); } - this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); - this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size); - trace_move_extent_read2(c, k); + trace_io_move_read2(c, k); mutex_lock(&ctxt->lock); atomic_add(io->read_sectors, &ctxt->read_sectors); @@ -356,33 +366,33 @@ int bch2_move_extent(struct moving_context *ctxt, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - bch2_read_extent(trans, &io->rbio, - bkey_start_pos(k.k), - iter->btree_id, k, 0, - BCH_READ_NODECODE| - BCH_READ_LAST_FRAGMENT); + __bch2_read_extent(trans, &io->write.rbio, + io->write.rbio.bio.bi_iter, + bkey_start_pos(k.k), + iter->btree_id, k, 0, + NULL, + BCH_READ_last_fragment, + data_opts.scrub ? data_opts.read_dev : -1); return 0; -err_free_pages: - bio_free_pages(&io->write.op.wbio.bio); err_free: kfree(io); err: - if (ret == -BCH_ERR_data_update_done) + if (bch2_err_matches(ret, BCH_ERR_data_update_done)) return 0; if (bch2_err_matches(ret, EROFS) || bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; - count_event(c, move_extent_start_fail); + count_event(c, io_move_start_fail); - if (trace_move_extent_start_fail_enabled()) { + if (trace_io_move_start_fail_enabled()) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); prt_str(&buf, ": "); prt_str(&buf, bch2_err_str(ret)); - trace_move_extent_start_fail(c, buf.buf); + trace_io_move_start_fail(c, buf.buf); printbuf_exit(&buf); } return ret; @@ -518,6 +528,37 @@ int bch2_move_ratelimit(struct moving_context *ctxt) return 0; } +/* + * Move requires non extents iterators, and there's also no need for it to + * signal indirect_extent_missing_error: + */ +static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c_reflink_p p) +{ + if (unlikely(REFLINK_P_ERROR(p.v))) + return bkey_s_c_null; + + struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v)); + + bch2_trans_iter_init(trans, iter, + BTREE_ID_reflink, reflink_pos, + BTREE_ITER_not_extents); + + struct bkey_s_c k = bch2_btree_iter_peek(trans, iter); + if (!k.k || bkey_err(k)) { + bch2_trans_iter_exit(trans, iter); + return k; + } + + if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) { + bch2_trans_iter_exit(trans, iter); + return bkey_s_c_null; + } + + return k; +} + static int bch2_move_data_btree(struct moving_context *ctxt, struct bpos start, struct bpos end, @@ -551,6 +592,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_trans_begin(trans); bch2_trans_iter_init(trans, &iter, btree_id, start, BTREE_ITER_prefetch| + BTREE_ITER_not_extents| BTREE_ITER_all_snapshots); if (ctxt->rate) @@ -561,7 +603,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(trans, &iter); if (!k.k) break; @@ -581,17 +623,16 @@ static int bch2_move_data_btree(struct moving_context *ctxt, k.k->type == KEY_TYPE_reflink_p && REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); bch2_trans_iter_exit(trans, &reflink_iter); - k = bch2_lookup_indirect_extent(trans, &reflink_iter, &offset_into_extent, p, true, 0); + k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; - if (bkey_deleted(k.k)) + if (!k.k) goto next_nondata; /* @@ -627,7 +668,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; - if (ret2 == -ENOMEM) { + if (bch2_err_matches(ret2, ENOMEM)) { /* memory allocation failure, wait for some IO to finish */ bch2_move_ctxt_wait_for_io(ctxt); continue; @@ -640,7 +681,7 @@ next: if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); next_nondata: - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } bch2_trans_iter_exit(trans, &reflink_iter); @@ -689,21 +730,22 @@ int bch2_move_data(struct bch_fs *c, bool wait_on_copygc, move_pred_fn pred, void *arg) { - struct moving_context ctxt; - int ret; bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_move_data(&ctxt, start, end, pred, arg); + int ret = __bch2_move_data(&ctxt, start, end, pred, arg); bch2_moving_ctxt_exit(&ctxt); return ret; } -int bch2_evacuate_bucket(struct moving_context *ctxt, - struct move_bucket_in_flight *bucket_in_flight, - struct bpos bucket, int gen, - struct data_update_opts _data_opts) +static int __bch2_move_data_phys(struct moving_context *ctxt, + struct move_bucket_in_flight *bucket_in_flight, + unsigned dev, + u64 bucket_start, + u64 bucket_end, + unsigned data_types, + move_pred_fn pred, void *arg) { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; @@ -712,16 +754,19 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, struct btree_iter iter = {}, bp_iter = {}; struct bkey_buf sk; struct bkey_s_c k; - struct data_update_opts data_opts; - unsigned sectors_moved = 0; struct bkey_buf last_flushed; int ret = 0; - struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); + struct bch_dev *ca = bch2_dev_tryget(c, dev); if (!ca) return 0; - trace_bucket_evacuate(c, &bucket); + bucket_end = min(bucket_end, ca->mi.nbuckets); + + struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start)); + struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end)); + bch2_dev_put(ca); + ca = NULL; bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); @@ -732,8 +777,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, */ bch2_trans_begin(trans); - bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, - bucket_pos_to_bp_start(ca, bucket), 0); + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0); bch_err_msg(c, ret, "looking up alloc key"); if (ret) @@ -750,14 +794,14 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&bp_iter); + k = bch2_btree_iter_peek(trans, &bp_iter); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; - if (!k.k || bkey_gt(k.k->p, bucket_pos_to_bp_end(ca, bucket))) + if (!k.k || bkey_gt(k.k->p, bp_end)) break; if (k.k->type != KEY_TYPE_backpointer) @@ -765,107 +809,148 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); - if (!bp.v->level) { - k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - goto err; - if (!k.k) - goto next; + if (ctxt->stats) + ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; + + if (!(data_types & BIT(bp.v->data_type))) + goto next; - bch2_bkey_buf_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); + if (!bp.v->level && bp.v->btree_id == BTREE_ID_stripes) + goto next; + k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); + ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + goto err; + if (!k.k) + goto next; + + if (!bp.v->level) { ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); if (ret) { bch2_trans_iter_exit(trans, &iter); continue; } + } - data_opts = _data_opts; - data_opts.target = io_opts.background_target; - data_opts.rewrite_ptrs = 0; - - unsigned sectors = bp.v->bucket_len; /* move_extent will drop locks */ - unsigned i = 0; - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { - if (p.ptr.dev == bucket.inode) { - if (p.ptr.cached) { - bch2_trans_iter_exit(trans, &iter); - goto next; - } - data_opts.rewrite_ptrs |= 1U << i; - break; - } - i++; - } - - ret = bch2_move_extent(ctxt, bucket_in_flight, - &iter, k, io_opts, data_opts); + struct data_update_opts data_opts = {}; + if (!pred(c, arg, k, &io_opts, &data_opts)) { bch2_trans_iter_exit(trans, &iter); + goto next; + } - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret == -ENOMEM) { - /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt); - continue; - } - if (ret) - goto err; - - if (ctxt->stats) - atomic64_add(sectors, &ctxt->stats->sectors_seen); - sectors_moved += sectors; - } else { - struct btree *b; + if (data_opts.scrub && + !bch2_dev_idx_is_online(c, data_opts.read_dev)) { + bch2_trans_iter_exit(trans, &iter); + ret = -BCH_ERR_device_offline; + break; + } - b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed); - ret = PTR_ERR_OR_ZERO(b); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - goto next; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - goto err; - if (!b) - goto next; + bch2_bkey_buf_reassemble(&sk, c, k); + k = bkey_i_to_s_c(sk.k); - unsigned sectors = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); + /* move_extent will drop locks */ + unsigned sectors = bp.v->bucket_len; - ret = bch2_btree_node_rewrite(trans, &iter, b, 0); - bch2_trans_iter_exit(trans, &iter); + if (!bp.v->level) + ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts); + else if (!data_opts.scrub) + ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, k.k->p, 0); + else + ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - goto err; + bch2_trans_iter_exit(trans, &iter); - if (ctxt->rate) - bch2_ratelimit_increment(ctxt->rate, sectors); - if (ctxt->stats) { - atomic64_add(sectors, &ctxt->stats->sectors_seen); - atomic64_add(sectors, &ctxt->stats->sectors_moved); - } - sectors_moved += btree_sectors(c); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret == -ENOMEM) { + /* memory allocation failure, wait for some IO to finish */ + bch2_move_ctxt_wait_for_io(ctxt); + continue; } + if (ret) + goto err; + + if (ctxt->stats) + atomic64_add(sectors, &ctxt->stats->sectors_seen); next: - bch2_btree_iter_advance(&bp_iter); + bch2_btree_iter_advance(trans, &bp_iter); } - - trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret); err: bch2_trans_iter_exit(trans, &bp_iter); - bch2_dev_put(ca); bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&last_flushed, c); return ret; } +static int bch2_move_data_phys(struct bch_fs *c, + unsigned dev, + u64 start, + u64 end, + unsigned data_types, + struct bch_ratelimit *rate, + struct bch_move_stats *stats, + struct write_point_specifier wp, + bool wait_on_copygc, + move_pred_fn pred, void *arg) +{ + struct moving_context ctxt; + + bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); + + bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); + ctxt.stats->phys = true; + ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; + + int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg); + bch2_moving_ctxt_exit(&ctxt); + + return ret; +} + +struct evacuate_bucket_arg { + struct bpos bucket; + int gen; + struct data_update_opts data_opts; +}; + +static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + struct evacuate_bucket_arg *arg = _arg; + + *data_opts = arg->data_opts; + + unsigned i = 0; + bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { + if (ptr->dev == arg->bucket.inode && + (arg->gen < 0 || arg->gen == ptr->gen) && + !ptr->cached) + data_opts->rewrite_ptrs |= BIT(i); + i++; + } + + return data_opts->rewrite_ptrs != 0; +} + +int bch2_evacuate_bucket(struct moving_context *ctxt, + struct move_bucket_in_flight *bucket_in_flight, + struct bpos bucket, int gen, + struct data_update_opts data_opts) +{ + struct evacuate_bucket_arg arg = { bucket, gen, data_opts, }; + + return __bch2_move_data_phys(ctxt, bucket_in_flight, + bucket.inode, + bucket.offset, + bucket.offset + 1, + ~0, + evacuate_bucket_pred, &arg); +} + typedef bool (*move_btree_pred)(struct bch_fs *, void *, struct btree *, struct bch_io_opts *, struct data_update_opts *); @@ -906,7 +991,7 @@ static int bch2_move_btree(struct bch_fs *c, retry: ret = 0; while (bch2_trans_begin(trans), - (b = bch2_btree_iter_peek_node(&iter)) && + (b = bch2_btree_iter_peek_node(trans, &iter)) && !(ret = PTR_ERR_OR_ZERO(b))) { if (kthread && kthread_should_stop()) break; @@ -926,7 +1011,7 @@ retry: if (ret) break; next: - bch2_btree_iter_next_node(&iter); + bch2_btree_iter_next_node(trans, &iter); } if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; @@ -1007,14 +1092,6 @@ static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } -static bool migrate_btree_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); -} - /* * Ancient versions of bcachefs produced packed formats which could represent * keys that the in memory format cannot represent; this checks for those @@ -1104,6 +1181,30 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } +static bool scrub_pred(struct bch_fs *c, void *_arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + struct bch_ioctl_data *arg = _arg; + + if (k.k->type != KEY_TYPE_btree_ptr_v2) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (p.ptr.dev == arg->migrate.dev) { + if (!p.crc.csum_type) + return false; + break; + } + } + + data_opts->scrub = true; + data_opts->read_dev = arg->migrate.dev; + return true; +} + int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) @@ -1118,6 +1219,22 @@ int bch2_data_job(struct bch_fs *c, bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); switch (op.op) { + case BCH_DATA_OP_scrub: + /* + * prevent tests from spuriously failing, make sure we see all + * btree nodes that need to be repaired + */ + bch2_btree_interior_updates_flush(c); + + ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX, + op.scrub.data_types, + NULL, + stats, + writepoint_hashed((unsigned long) current), + false, + scrub_pred, &op) ?: ret; + break; + case BCH_DATA_OP_rereplicate: stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); @@ -1137,14 +1254,14 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_btree(c, start, end, - migrate_btree_pred, &op, stats) ?: ret; - ret = bch2_move_data(c, start, end, - NULL, - stats, - writepoint_hashed((unsigned long) current), - true, - migrate_pred, &op) ?: ret; + ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX, + ~0, + NULL, + stats, + writepoint_hashed((unsigned long) current), + true, + migrate_pred, &op) ?: ret; + bch2_btree_interior_updates_flush(c); ret = bch2_replicas_gc2(c) ?: ret; break; case BCH_DATA_OP_rewrite_old_nodes: @@ -1176,17 +1293,17 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) prt_newline(out); printbuf_indent_add(out, 2); - prt_printf(out, "keys moved: %llu\n", atomic64_read(&stats->keys_moved)); - prt_printf(out, "keys raced: %llu\n", atomic64_read(&stats->keys_raced)); - prt_printf(out, "bytes seen: "); + prt_printf(out, "keys moved:\t%llu\n", atomic64_read(&stats->keys_moved)); + prt_printf(out, "keys raced:\t%llu\n", atomic64_read(&stats->keys_raced)); + prt_printf(out, "bytes seen:\t"); prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9); prt_newline(out); - prt_printf(out, "bytes moved: "); + prt_printf(out, "bytes moved:\t"); prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9); prt_newline(out); - prt_printf(out, "bytes raced: "); + prt_printf(out, "bytes raced:\t"); prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9); prt_newline(out); @@ -1195,7 +1312,8 @@ void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) { - struct moving_io *io; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); bch2_move_stats_to_text(out, ctxt->stats); printbuf_indent_add(out, 2); @@ -1215,8 +1333,9 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str printbuf_indent_add(out, 2); mutex_lock(&ctxt->lock); + struct moving_io *io; list_for_each_entry(io, &ctxt->ios, io_list) - bch2_write_op_to_text(out, &io->write.op); + bch2_data_update_inflight_to_text(out, &io->write); mutex_unlock(&ctxt->lock); printbuf_indent_sub(out, 4); |