summaryrefslogtreecommitdiff
path: root/fs/fuse/inode.c
diff options
context:
space:
mode:
authorZhang Tianci <zhangtianci.1997@bytedance.com>2024-11-18 18:16:00 +0800
committerMiklos Szeredi <mszeredi@redhat.com>2024-11-18 12:24:13 +0100
commit69eb56f69efb866c791cc87fd7bf62adf2ffcbb3 (patch)
treeb288b6587e377fc15a14878473902f5248505760 /fs/fuse/inode.c
parent68bfb7eb7f7de355d5b3812c25a2a36e9eead97b (diff)
downloadlwn-69eb56f69efb866c791cc87fd7bf62adf2ffcbb3.tar.gz
lwn-69eb56f69efb866c791cc87fd7bf62adf2ffcbb3.zip
fuse: check attributes staleness on fuse_iget()
Function fuse_direntplus_link() might call fuse_iget() to initialize a new fuse_inode and change its attributes. If fi->attr_version is always initialized with 0, even if the attributes returned by the FUSE_READDIR request is staled, as the new fi->attr_version is 0, fuse_change_attributes will still set the staled attributes to inode. This wrong behaviour may cause file size inconsistency even when there is no changes from server-side. To reproduce the issue, consider the following 2 programs (A and B) are running concurrently, A B ---------------------------------- -------------------------------- { /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. } readdir(/fusemnt/dir) start //Daemon set size 0 to f direntry fallocate(f, 1024) stat(f) // B see size 1024 echo 2 > /proc/sys/vm/drop_caches readdir(/fusemnt/dir) reply to kernel Kernel set 0 to the I_NEW inode stat(f) // B see size 0 In the above case, only program B is modifying the file size, however, B observes file size changing between the 2 'readonly' stat() calls. To fix this issue, we should make sure readdirplus still follows the rule of attr_version staleness checking even if the fi->attr_version is lost due to inode eviction. To identify this situation, the new fc->evict_ctr is used to record whether the eviction of inodes occurs during the readdirplus request processing. If it does, the result of readdirplus may be inaccurate; otherwise, the result of readdirplus can be trusted. Although this may still lead to incorrect invalidation, considering the relatively low frequency of evict occurrences, it should be acceptable. Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/ Link: https://lore.kernel.org/lkml/20241114070905.48901-1-zhangtianci.1997@bytedance.com/ Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> Suggested-by: Miklos Szeredi <miklos@szeredi.hu> Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Diffstat (limited to 'fs/fuse/inode.c')
-rw-r--r--fs/fuse/inode.c56
1 files changed, 44 insertions, 12 deletions
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f1779ff3f8d1..3ce4f4e81d09 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -175,6 +175,14 @@ static void fuse_evict_inode(struct inode *inode)
fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
fi->submount_lookup = NULL;
}
+ /*
+ * Evict of non-deleted inode may race with outstanding
+ * LOOKUP/READDIRPLUS requests and result in inconsistency when
+ * the request finishes. Deal with that here by bumping a
+ * counter that can be compared to the starting value.
+ */
+ if (inode->i_nlink > 0)
+ atomic64_inc(&fc->evict_ctr);
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(fi->iocachectr != 0);
@@ -208,17 +216,30 @@ static ino_t fuse_squash_ino(u64 ino64)
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
- u64 attr_valid, u32 cache_mask)
+ u64 attr_valid, u32 cache_mask,
+ u64 evict_ctr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
lockdep_assert_held(&fi->lock);
+ /*
+ * Clear basic stats from invalid mask.
+ *
+ * Don't do this if this is coming from a fuse_iget() call and there
+ * might have been a racing evict which would've invalidated the result
+ * if the attr_version would've been preserved.
+ *
+ * !evict_ctr -> this is create
+ * fi->attr_version != 0 -> this is not a new inode
+ * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request
+ */
+ if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc))
+ set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
+
fi->attr_version = atomic64_inc_return(&fc->attr_version);
fi->i_time = attr_valid;
- /* Clear basic stats from invalid mask */
- set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
@@ -297,9 +318,9 @@ u32 fuse_get_cache_mask(struct inode *inode)
return STATX_MTIME | STATX_CTIME | STATX_SIZE;
}
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
- struct fuse_statx *sx,
- u64 attr_valid, u64 attr_version)
+static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_statx *sx, u64 attr_valid,
+ u64 attr_version, u64 evict_ctr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -333,7 +354,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
}
old_mtime = inode_get_mtime(inode);
- fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
+ fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask,
+ evict_ctr);
oldsize = inode->i_size;
/*
@@ -374,6 +396,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_dax_dontcache(inode, attr->flags);
}
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_statx *sx, u64 attr_valid,
+ u64 attr_version)
+{
+ fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0);
+}
+
static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
u64 nodeid)
{
@@ -428,7 +457,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
- u64 attr_valid, u64 attr_version)
+ u64 attr_valid, u64 attr_version,
+ u64 evict_ctr)
{
struct inode *inode;
struct fuse_inode *fi;
@@ -489,8 +519,8 @@ retry:
fi->nlookup++;
spin_unlock(&fi->lock);
done:
- fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
-
+ fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version,
+ evict_ctr);
return inode;
}
@@ -942,6 +972,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
fc->initialized = 0;
fc->connected = 1;
atomic64_set(&fc->attr_version, 1);
+ atomic64_set(&fc->evict_ctr, 1);
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
@@ -1003,7 +1034,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
attr.mode = mode;
attr.ino = FUSE_ROOT_ID;
attr.nlink = 1;
- return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
+ return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0);
}
struct fuse_inode_handle {
@@ -1612,7 +1643,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
return -ENOMEM;
fuse_fill_attr_from_inode(&root_attr, parent_fi);
- root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
+ root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0,
+ fuse_get_evict_ctr(fm->fc));
/*
* This inode is just a duplicate, so it is not looked up and
* its nlookup should not be incremented. fuse_iget() does