summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_io.h
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-07-10 13:44:42 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:08 -0400
commit9f1833cadda7bb40a77dc9fd1b85798e20d92195 (patch)
tree52cecab2af6a679dfe9b796c0cf6aba2539d9253 /fs/bcachefs/btree_io.h
parentf8f86c6aec1ecb21839933ff3615dcd219ef026f (diff)
downloadlwn-9f1833cadda7bb40a77dc9fd1b85798e20d92195.tar.gz
lwn-9f1833cadda7bb40a77dc9fd1b85798e20d92195.zip
bcachefs: Update btree ptrs after every write
This closes a significant hole (and last known hole) in our ability to verify metadata. Previously, since btree nodes are log structured, we couldn't detect lost btree writes that weren't the first write to a given node. Additionally, this seems to have lead to some significant metadata corruption on multi device filesystems with metadata replication: since a write may have made it to one device and not another, if we read that btree node back from the replica that did have that write and started appending after that point, the other replica would have a gap in the bset entries and reading from that replica wouldn't find the rest of the bsets. But, since updates to interior btree nodes are now journalled, we can close this hole by updating pointers to btree nodes after every write with the currently written number of sectors, without negatively affecting performance. This means we will always detect lost or corrupt metadata - it also means that our btree is now a curious hybrid of COW and non COW btrees, with all the benefits of both (excluding complexity). Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs/btree_io.h')
-rw-r--r--fs/bcachefs/btree_io.h11
1 files changed, 9 insertions, 2 deletions
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index 3732d135de8d..7fdcf879c7d4 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -32,6 +32,13 @@ static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
atomic_dec(&c->btree_cache.dirty);
}
+static inline unsigned btree_ptr_sectors_written(struct bkey_i *k)
+{
+ return k->k.type == KEY_TYPE_btree_ptr_v2
+ ? le16_to_cpu(bkey_i_to_btree_ptr_v2(k)->v.sectors_written)
+ : 0;
+}
+
struct btree_read_bio {
struct bch_fs *c;
struct btree *b;
@@ -48,7 +55,8 @@ struct btree_write_bio {
struct work_struct work;
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
void *data;
- unsigned bytes;
+ unsigned data_bytes;
+ unsigned sector_offset;
struct bch_write_bio wbio;
};
@@ -137,7 +145,6 @@ int bch2_btree_root_read(struct bch_fs *, enum btree_id,
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
-void bch2_btree_write_error_work(struct work_struct *);
void __bch2_btree_node_write(struct bch_fs *, struct btree *, bool);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);