summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-10-02 21:23:41 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2024-10-09 16:42:53 -0400
commit9d86178782a25fac105e550e1c29c7d3f8470116 (patch)
tree6294fe77dbf4da190d440605e4fc0d551d074003 /fs
parent84878e82457f2d7900cc70041bd7c05684a6726e (diff)
downloadlwn-9d86178782a25fac105e550e1c29c7d3f8470116.tar.gz
lwn-9d86178782a25fac105e550e1c29c7d3f8470116.zip
bcachefs: bch2_inode_or_descendents_is_open()
fsck can now correctly check if inodes in interior snapshot nodes are open/in use. - Tweak the vfs inode rhashtable so that the subvolume ID isn't hashed, meaning inums in different subvolumes will hash to the same slot. Note that this is a hack, and will cause problems if anyone ever has the same file in many different snapshots open all at the same time. - Then check if any of those subvolumes is a descendent of the snapshot ID being checked Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/fs.c106
-rw-r--r--fs/bcachefs/fs.h6
-rw-r--r--fs/bcachefs/fsck.c7
-rw-r--r--fs/bcachefs/inode.c5
4 files changed, 103 insertions, 21 deletions
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 23cae92d313d..e9e32d21f82d 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -157,6 +157,20 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
return a.subvol == b.subvol && a.inum == b.inum;
}
+static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed)
+{
+ const subvol_inum *inum = data;
+
+ return jhash(&inum->inum, sizeof(inum->inum), seed);
+}
+
+static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed)
+{
+ const struct bch_inode_info *inode = data;
+
+ return bch2_vfs_inode_hash_fn(&inode->ei_inum, sizeof(inode->ei_inum), seed);
+}
+
static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
const void *obj)
{
@@ -170,32 +184,93 @@ static const struct rhashtable_params bch2_vfs_inodes_params = {
.head_offset = offsetof(struct bch_inode_info, hash),
.key_offset = offsetof(struct bch_inode_info, ei_inum),
.key_len = sizeof(subvol_inum),
+ .hashfn = bch2_vfs_inode_hash_fn,
+ .obj_hashfn = bch2_vfs_inode_obj_hash_fn,
.obj_cmpfn = bch2_vfs_inode_cmp_fn,
.automatic_shrinking = true,
};
-static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
{
- return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
-}
+ struct bch_fs *c = trans->c;
+ struct rhashtable *ht = &c->vfs_inodes_table;
+ subvol_inum inum = (subvol_inum) { .inum = p.offset };
+ DARRAY(u32) subvols;
+ int ret = 0;
-bool bch2_inode_is_open(struct bch_fs *c, struct bpos p)
-{
if (!test_bit(BCH_FS_started, &c->flags))
return false;
- subvol_inum inum = {
- .subvol = snapshot_t(c, p.snapshot)->subvol,
- .inum = p.offset,
- };
+ darray_init(&subvols);
+restart_from_top:
+
+ /*
+ * Tweaked version of __rhashtable_lookup(); we need to get a list of
+ * subvolumes in which the given inode number is open.
+ *
+ * For this to work, we don't include the subvolume ID in the key that
+ * we hash - all inodes with the same inode number regardless of
+ * subvolume will hash to the same slot.
+ *
+ * This will be less than ideal if the same file is ever open
+ * simultaneously in many different snapshots:
+ */
+ rcu_read_lock();
+ struct rhash_lock_head __rcu *const *bkt;
+ struct rhash_head *he;
+ unsigned int hash;
+ struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+restart:
+ hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params);
+ bkt = rht_bucket(tbl, hash);
+ do {
+ struct bch_inode_info *inode;
+
+ rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) {
+ if (inode->ei_inum.inum == inum.inum) {
+ ret = darray_push_gfp(&subvols, inode->ei_inum.subvol,
+ GFP_NOWAIT|__GFP_NOWARN);
+ if (ret) {
+ rcu_read_unlock();
+ ret = darray_make_room(&subvols, 1);
+ if (ret)
+ goto err;
+ subvols.nr = 0;
+ goto restart_from_top;
+ }
+ }
+ }
+ /* An object might have been moved to a different hash chain,
+ * while we walk along it - better check and retry.
+ */
+ } while (he != RHT_NULLS_MARKER(bkt));
+
+ /* Ensure we see any new tables. */
+ smp_rmb();
+
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (unlikely(tbl))
+ goto restart;
+ rcu_read_unlock();
+
+ darray_for_each(subvols, i) {
+ u32 snap;
+ ret = bch2_subvolume_get_snapshot(trans, *i, &snap);
+ if (ret)
+ goto err;
- /* snapshot tree interior node, can't safely delete while online (yet) */
- if (!inum.subvol) {
- bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot);
- return true;
+ ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot);
+ if (ret)
+ break;
}
+err:
+ darray_exit(&subvols);
+ return ret;
+}
- return __bch2_inode_hash_find(c, inum) != NULL;
+static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+{
+ return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
}
static void __wait_on_freeing_inode(struct bch_fs *c,
@@ -271,7 +346,8 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
set_bit(EI_INODE_HASHED, &inode->ei_flags);
retry:
- if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,
+ if (unlikely(rhashtable_lookup_insert_key(&c->vfs_inodes_table,
+ &inode->ei_inum,
&inode->hash,
bch2_vfs_inodes_params))) {
old = bch2_inode_hash_find(c, trans, inode->ei_inum);
diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
index 40dbd5774d0b..59f9f7ae728d 100644
--- a/fs/bcachefs/fs.h
+++ b/fs/bcachefs/fs.h
@@ -146,6 +146,8 @@ struct bch_inode_info *
__bch2_create(struct mnt_idmap *, struct bch_inode_info *,
struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
+int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p);
+
int bch2_fs_quota_transfer(struct bch_fs *,
struct bch_inode_info *,
struct bch_qid,
@@ -179,8 +181,6 @@ void bch2_inode_update_after_write(struct btree_trans *,
int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
inode_set_fn, void *, unsigned);
-bool bch2_inode_is_open(struct bch_fs *c, struct bpos p);
-
int bch2_setattr_nonsize(struct mnt_idmap *,
struct bch_inode_info *,
struct iattr *);
@@ -198,7 +198,7 @@ int bch2_vfs_init(void);
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
-static inline bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) { return false; }
+static inline int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) { return 0; }
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
snapshot_id_list *s) {}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index f00a36f62323..a1087fd292e4 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -1213,7 +1213,11 @@ static int check_inode(struct btree_trans *trans,
if (ret)
goto err;
} else {
- if (fsck_err_on(!bch2_inode_is_open(c, k.k->p),
+ ret = bch2_inode_or_descendents_is_open(trans, k.k->p);
+ if (ret < 0)
+ goto err;
+
+ if (fsck_err_on(!ret,
trans, inode_unlinked_and_not_open,
"inode %llu%u unlinked and not open",
u.bi_inum, u.bi_snapshot)) {
@@ -1221,6 +1225,7 @@ static int check_inode(struct btree_trans *trans,
bch_err_msg(c, ret, "in fsck deleting inode");
goto err_noprint;
}
+ ret = 0;
}
}
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 9d6040d4ba39..2c037e84fbae 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1244,8 +1244,9 @@ next_parent:
if (!unlinked)
return 0;
- if (bch2_inode_is_open(trans->c, pos))
- return 0;
+ ret = lockrestart_do(trans, bch2_inode_or_descendents_is_open(trans, pos));
+ if (ret)
+ return ret < 0 ? ret : 0;
ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot);
if (ret)