summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-03-14 19:01:14 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:54 -0400
commit1889ad5a1285ba452f6a8cef3df663087611050a (patch)
tree30343f618461d1e5803231c62a2f9fb761ce2b8d /fs
parent8567415457b25b467933e47ff78dca55a55f7206 (diff)
downloadlwn-1889ad5a1285ba452f6a8cef3df663087611050a.tar.gz
lwn-1889ad5a1285ba452f6a8cef3df663087611050a.zip
bcachefs: Add code to scan for/rewite old btree nodes
This adds a new data job type to scan for btree nodes in the old extent format, and rewrite them. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/bcachefs_ioctl.h17
-rw-r--r--fs/bcachefs/btree_io.c5
-rw-r--r--fs/bcachefs/btree_types.h1
-rw-r--r--fs/bcachefs/btree_update_interior.c1
-rw-r--r--fs/bcachefs/move.c131
-rw-r--r--fs/bcachefs/move.h6
-rw-r--r--fs/bcachefs/movinggc.c6
-rw-r--r--fs/bcachefs/rebalance.c3
8 files changed, 132 insertions, 38 deletions
diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h
index 38c6ac96e12f..1ef9907e07ad 100644
--- a/fs/bcachefs/bcachefs_ioctl.h
+++ b/fs/bcachefs/bcachefs_ioctl.h
@@ -171,10 +171,11 @@ struct bch_ioctl_disk_set_state {
};
enum bch_data_ops {
- BCH_DATA_OP_SCRUB = 0,
- BCH_DATA_OP_REREPLICATE = 1,
- BCH_DATA_OP_MIGRATE = 2,
- BCH_DATA_OP_NR = 3,
+ BCH_DATA_OP_SCRUB = 0,
+ BCH_DATA_OP_REREPLICATE = 1,
+ BCH_DATA_OP_MIGRATE = 2,
+ BCH_DATA_OP_REWRITE_OLD_NODES = 3,
+ BCH_DATA_OP_NR = 4,
};
/*
@@ -187,11 +188,13 @@ enum bch_data_ops {
* job. The file descriptor is O_CLOEXEC.
*/
struct bch_ioctl_data {
- __u32 op;
+ __u16 op;
+ __u8 start_btree;
+ __u8 end_btree;
__u32 flags;
- struct bpos start;
- struct bpos end;
+ struct bpos start_pos;
+ struct bpos end_pos;
union {
struct {
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 6e656ed6b32a..eac51c39fc6c 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -920,6 +920,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
unsigned u64s;
int ret, retry_read = 0, write = READ;
+ b->version_ondisk = U16_MAX;
+
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
sort_iter_init(iter, b);
iter->size = (btree_blocks(c) + 1) * 2;
@@ -1000,6 +1002,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sectors = vstruct_sectors(bne, c->block_bits);
}
+ b->version_ondisk = min(b->version_ondisk,
+ le16_to_cpu(i->version));
+
ret = validate_bset(c, ca, b, i, sectors,
READ, have_retry);
if (ret)
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 80bb31a53339..55d8d815a04a 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -76,6 +76,7 @@ struct btree {
u16 written;
u8 nsets;
u8 nr_key_bits;
+ u16 version_ondisk;
struct bkey_format format;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 285365ba7012..989ba81207c9 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -286,6 +286,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
bch2_bset_init_first(b, &b->data->keys);
b->c.level = level;
b->c.btree_id = as->btree_id;
+ b->version_ondisk = c->sb.version;
memset(&b->nr, 0, sizeof(b->nr));
b->data->magic = cpu_to_le64(bset_magic(c));
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 28e2125c12ed..72958b867014 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -531,7 +531,7 @@ static int __bch2_move_data(struct bch_fs *c,
stats->data_type = BCH_DATA_user;
stats->btree_id = btree_id;
- stats->pos = POS_MIN;
+ stats->pos = start;
iter = bch2_trans_get_iter(&trans, btree_id, start,
BTREE_ITER_PREFETCH);
@@ -646,14 +646,15 @@ out:
}
int bch2_move_data(struct bch_fs *c,
+ enum btree_id start_btree_id, struct bpos start_pos,
+ enum btree_id end_btree_id, struct bpos end_pos,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
- struct bpos start,
- struct bpos end,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats)
{
struct moving_context ctxt = { .stats = stats };
+ enum btree_id id;
int ret;
closure_init_stack(&ctxt.cl);
@@ -662,10 +663,23 @@ int bch2_move_data(struct bch_fs *c,
stats->data_type = BCH_DATA_user;
- ret = __bch2_move_data(c, &ctxt, rate, wp, start, end,
- pred, arg, stats, BTREE_ID_EXTENTS) ?:
- __bch2_move_data(c, &ctxt, rate, wp, start, end,
- pred, arg, stats, BTREE_ID_REFLINK);
+ for (id = start_btree_id;
+ id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
+ id++) {
+ stats->btree_id = id;
+
+ if (id != BTREE_ID_EXTENTS &&
+ id != BTREE_ID_REFLINK)
+ continue;
+
+ ret = __bch2_move_data(c, &ctxt, rate, wp,
+ id == start_btree_id ? start_pos : POS_MIN,
+ id == end_btree_id ? end_pos : POS_MAX,
+ pred, arg, stats, id);
+ if (ret)
+ break;
+ }
+
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl);
@@ -679,16 +693,22 @@ int bch2_move_data(struct bch_fs *c,
return ret;
}
+typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
+ struct btree *, struct bch_io_opts *,
+ struct data_opts *);
+
static int bch2_move_btree(struct bch_fs *c,
- move_pred_fn pred,
- void *arg,
+ enum btree_id start_btree_id, struct bpos start_pos,
+ enum btree_id end_btree_id, struct bpos end_pos,
+ move_btree_pred pred, void *arg,
struct bch_move_stats *stats)
{
+ bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree_trans trans;
struct btree_iter *iter;
struct btree *b;
- unsigned id;
+ enum btree_id id;
struct data_opts data_opts;
enum data_cmd cmd;
int ret = 0;
@@ -697,16 +717,24 @@ static int bch2_move_btree(struct bch_fs *c,
stats->data_type = BCH_DATA_btree;
- for (id = 0; id < BTREE_ID_NR; id++) {
+ for (id = start_btree_id;
+ id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
+ id++) {
stats->btree_id = id;
- for_each_btree_node(&trans, iter, id, POS_MIN,
+ for_each_btree_node(&trans, iter, id,
+ id == start_btree_id ? start_pos : POS_MIN,
BTREE_ITER_PREFETCH, b) {
+ if (kthread && kthread_should_stop())
+ goto out;
+
+ if ((cmp_int(id, end_btree_id) ?:
+ bkey_cmp(b->key.k.p, end_pos)) > 0)
+ break;
+
stats->pos = iter->pos;
- switch ((cmd = pred(c, arg,
- bkey_i_to_s_c(&b->key),
- &io_opts, &data_opts))) {
+ switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
case DATA_SKIP:
goto next;
case DATA_SCRUB:
@@ -726,7 +754,7 @@ next:
ret = bch2_trans_iter_free(&trans, iter) ?: ret;
}
-
+out:
bch2_trans_exit(&trans);
return ret;
@@ -785,6 +813,38 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
return DATA_REWRITE;
}
+static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_opts *data_opts)
+{
+ return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_opts *data_opts)
+{
+ return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_opts *data_opts)
+{
+ if (b->version_ondisk != c->sb.version ||
+ btree_node_need_rewrite(b)) {
+ data_opts->target = 0;
+ data_opts->nr_replicas = 1;
+ data_opts->btree_insert_flags = 0;
+ return DATA_REWRITE;
+ }
+
+ return DATA_SKIP;
+}
+
int bch2_data_job(struct bch_fs *c,
struct bch_move_stats *stats,
struct bch_ioctl_data op)
@@ -796,17 +856,20 @@ int bch2_data_job(struct bch_fs *c,
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, -1);
- ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
+ ret = bch2_move_btree(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ rereplicate_btree_pred, c, stats) ?: ret;
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = bch2_replicas_gc2(c) ?: ret;
- ret = bch2_move_data(c, NULL,
- writepoint_hashed((unsigned long) current),
- op.start,
- op.end,
+ ret = bch2_move_data(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ NULL, writepoint_hashed((unsigned long) current),
rereplicate_pred, c, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
@@ -817,16 +880,32 @@ int bch2_data_job(struct bch_fs *c,
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
- ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
+ ret = bch2_move_btree(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ migrate_btree_pred, &op, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
- ret = bch2_move_data(c, NULL,
- writepoint_hashed((unsigned long) current),
- op.start,
- op.end,
+ ret = bch2_move_data(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ NULL, writepoint_hashed((unsigned long) current),
migrate_pred, &op, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
+ case BCH_DATA_OP_REWRITE_OLD_NODES:
+ ret = bch2_move_btree(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ rewrite_old_nodes_pred, &op, stats) ?: ret;
+
+ if (!ret) {
+ mutex_lock(&c->sb_lock);
+ c->disk_sb.sb->version_min = c->disk_sb.sb->version;
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+ break;
default:
ret = -EINVAL;
}
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index b04bc669226d..403ca695c875 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -52,9 +52,11 @@ typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
struct bkey_s_c,
struct bch_io_opts *, struct data_opts *);
-int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
+int bch2_move_data(struct bch_fs *,
+ enum btree_id, struct bpos,
+ enum btree_id, struct bpos,
+ struct bch_ratelimit *,
struct write_point_specifier,
- struct bpos, struct bpos,
move_pred_fn, void *,
struct bch_move_stats *);
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index b61bbc18a0aa..65a8cd14ee75 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -219,9 +219,11 @@ static int bch2_copygc(struct bch_fs *c)
sizeof(h->data[0]),
bucket_offset_cmp, NULL);
- ret = bch2_move_data(c, &c->copygc_pd.rate,
+ ret = bch2_move_data(c,
+ 0, POS_MIN,
+ BTREE_ID_NR, POS_MAX,
+ &c->copygc_pd.rate,
writepoint_ptr(&c->copygc_write_point),
- POS_MIN, POS_MAX,
copygc_pred, NULL,
&move_stats);
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index c75411af4622..c83c12dbb0d2 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -239,10 +239,11 @@ static int bch2_rebalance_thread(void *arg)
rebalance_work_reset(c);
bch2_move_data(c,
+ 0, POS_MIN,
+ BTREE_ID_NR, POS_MAX,
/* ratelimiting disabled for now */
NULL, /* &r->pd.rate, */
writepoint_ptr(&c->rebalance_write_point),
- POS_MIN, POS_MAX,
rebalance_pred, NULL,
&r->move_stats);
}