summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-27 08:11:46 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-27 08:11:46 -0800
commit7d4050728c83aa63828494ad0f4d0eb4faf5f97a (patch)
treed266a130d9cfe054d3f082860e8f242a5299ea64
parentb5287c55dedae89f273f505197244d9ece1d42d9 (diff)
parentcf87766dd6f9ddcceaa8ee26e3cbd7538e42dd19 (diff)
downloadlwn-7d4050728c83aa63828494ad0f4d0eb4faf5f97a.tar.gz
lwn-7d4050728c83aa63828494ad0f4d0eb4faf5f97a.zip
Merge tag 'vfs-6.13-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner: - Fix a few iomap bugs - Fix a wrong argument in backing file callback - Fix security mount option retrieval in statmount() - Cleanup how statmount() handles unescaped options - Add a missing inode_owner_or_capable() check for setting write hints - Clear the return value in read_kcore_iter() after a successful iov_iter_zero() - Fix a mount_setattr() selftest - Fix function signature in mount api documentation - Remove duplicate include header in the fscache code * tag 'vfs-6.13-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs/backing_file: fix wrong argument in callback fs_parser: update mount_api doc to match function signature fs: require inode_owner_or_capable for F_SET_RW_HINT fs/proc/kcore.c: Clear ret value in read_kcore_iter after successful iov_iter_zero statmount: fix security option retrieval statmount: clean up unescaped option handling fscache: Remove duplicate included header iomap: elide flush from partial eof zero range iomap: lift zeroed mapping handling into iomap_zero_range() iomap: reset per-iter state on non-error iter advances iomap: warn on zero range of a post-eof folio selftests/mount_setattr: Fix failures on 64K PAGE_SIZE kernels
-rw-r--r--Documentation/filesystems/mount_api.rst3
-rw-r--r--fs/backing-file.c3
-rw-r--r--fs/fcntl.c3
-rw-r--r--fs/iomap/buffered-io.c90
-rw-r--r--fs/iomap/iter.c11
-rw-r--r--fs/namespace.c46
-rw-r--r--fs/netfs/fscache_io.c1
-rw-r--r--fs/proc/kcore.c1
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c2
9 files changed, 81 insertions, 79 deletions
diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
index 317934c9e8fc..d92c276f1575 100644
--- a/Documentation/filesystems/mount_api.rst
+++ b/Documentation/filesystems/mount_api.rst
@@ -770,7 +770,8 @@ process the parameters it is given.
* ::
- bool fs_validate_description(const struct fs_parameter_description *desc);
+ bool fs_validate_description(const char *name,
+ const struct fs_parameter_description *desc);
This performs some validation checks on a parameter description. It
returns true if the description is good and false if it is not. It will
diff --git a/fs/backing-file.c b/fs/backing-file.c
index 526ddb4d6f76..cbdad8b68474 100644
--- a/fs/backing-file.c
+++ b/fs/backing-file.c
@@ -327,6 +327,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
+ struct file *user_file = vma->vm_file;
int ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
@@ -342,7 +343,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
revert_creds_light(old_cred);
if (ctx->accessed)
- ctx->accessed(vma->vm_file);
+ ctx->accessed(user_file);
return ret;
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ac77dd912412..49884fa3c81d 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -374,6 +374,9 @@ static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
u64 __user *argp = (u64 __user *)arg;
u64 hint;
+ if (!inode_owner_or_capable(file_mnt_idmap(file), inode))
+ return -EPERM;
+
if (copy_from_user(&hint, argp, sizeof(hint)))
return -EFAULT;
if (!rw_hint_valid(hint))
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d42f01e0fc1c..955f19e27e47 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1350,40 +1350,12 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
return filemap_write_and_wait_range(mapping, i->pos, end);
}
-static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
- bool *range_dirty)
+static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
- const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
loff_t written = 0;
- /*
- * We must zero subranges of unwritten mappings that might be dirty in
- * pagecache from previous writes. We only know whether the entire range
- * was clean or not, however, and dirty folios may have been written
- * back or reclaimed at any point after mapping lookup.
- *
- * The easiest way to deal with this is to flush pagecache to trigger
- * any pending unwritten conversions and then grab the updated extents
- * from the fs. The flush may change the current mapping, so mark it
- * stale for the iterator to remap it for the next pass to handle
- * properly.
- *
- * Note that holes are treated the same as unwritten because zero range
- * is (ab)used for partial folio zeroing in some cases. Hole backed
- * post-eof ranges can be dirtied via mapped write and the flush
- * triggers writeback time post-eof zeroing.
- */
- if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) {
- if (*range_dirty) {
- *range_dirty = false;
- return iomap_zero_iter_flush_and_stale(iter);
- }
- /* range is clean and already zeroed, nothing to do */
- return length;
- }
-
do {
struct folio *folio;
int status;
@@ -1397,6 +1369,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
if (iter->iomap.flags & IOMAP_F_STALE)
break;
+ /* warn about zeroing folios beyond eof that won't write back */
+ WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
offset = offset_in_folio(folio, pos);
if (bytes > folio_size(folio) - offset)
bytes = folio_size(folio) - offset;
@@ -1429,28 +1403,58 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
.len = len,
.flags = IOMAP_ZERO,
};
+ struct address_space *mapping = inode->i_mapping;
+ unsigned int blocksize = i_blocksize(inode);
+ unsigned int off = pos & (blocksize - 1);
+ loff_t plen = min_t(loff_t, len, blocksize - off);
int ret;
bool range_dirty;
/*
- * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but
- * pagecache must be flushed to ensure stale data from previous
- * buffered writes is not exposed. A flush is only required for certain
- * types of mappings, but checking pagecache after mapping lookup is
- * racy with writeback and reclaim.
+ * Zero range can skip mappings that are zero on disk so long as
+ * pagecache is clean. If pagecache was dirty prior to zero range, the
+ * mapping converts on writeback completion and so must be zeroed.
*
- * Therefore, check the entire range first and pass along whether any
- * part of it is dirty. If so and an underlying mapping warrants it,
- * flush the cache at that point. This trades off the occasional false
- * positive (and spurious flush, if the dirty data and mapping don't
- * happen to overlap) for simplicity in handling a relatively uncommon
- * situation.
+ * The simplest way to deal with this across a range is to flush
+ * pagecache and process the updated mappings. To avoid excessive
+ * flushing on partial eof zeroing, special case it to zero the
+ * unaligned start portion if already dirty in pagecache.
+ */
+ if (off &&
+ filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) {
+ iter.len = plen;
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_zero_iter(&iter, did_zero);
+
+ iter.len = len - (iter.pos - pos);
+ if (ret || !iter.len)
+ return ret;
+ }
+
+ /*
+ * To avoid an unconditional flush, check pagecache state and only flush
+ * if dirty and the fs returns a mapping that might convert on
+ * writeback.
*/
range_dirty = filemap_range_needs_writeback(inode->i_mapping,
- pos, pos + len - 1);
+ iter.pos, iter.pos + iter.len - 1);
+ while ((ret = iomap_iter(&iter, ops)) > 0) {
+ const struct iomap *srcmap = iomap_iter_srcmap(&iter);
- while ((ret = iomap_iter(&iter, ops)) > 0)
- iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty);
+ if (srcmap->type == IOMAP_HOLE ||
+ srcmap->type == IOMAP_UNWRITTEN) {
+ loff_t proc = iomap_length(&iter);
+
+ if (range_dirty) {
+ range_dirty = false;
+ proc = iomap_zero_iter_flush_and_stale(&iter);
+ }
+ iter.processed = proc;
+ continue;
+ }
+
+ iter.processed = iomap_zero_iter(&iter, did_zero);
+ }
return ret;
}
EXPORT_SYMBOL_GPL(iomap_zero_range);
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
index 79a0614eaab7..3790918646af 100644
--- a/fs/iomap/iter.c
+++ b/fs/iomap/iter.c
@@ -22,26 +22,25 @@
static inline int iomap_iter_advance(struct iomap_iter *iter)
{
bool stale = iter->iomap.flags & IOMAP_F_STALE;
+ int ret = 1;
/* handle the previous iteration (if any) */
if (iter->iomap.length) {
if (iter->processed < 0)
return iter->processed;
- if (!iter->processed && !stale)
- return 0;
if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
return -EIO;
iter->pos += iter->processed;
iter->len -= iter->processed;
- if (!iter->len)
- return 0;
+ if (!iter->len || (!iter->processed && !stale))
+ ret = 0;
}
- /* clear the state for the next iteration */
+ /* clear the per iteration state */
iter->processed = 0;
memset(&iter->iomap, 0, sizeof(iter->iomap));
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
- return 1;
+ return ret;
}
static inline void iomap_iter_done(struct iomap_iter *iter)
diff --git a/fs/namespace.c b/fs/namespace.c
index 6b0a17487d0f..23e81c2a1e3f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5057,21 +5057,32 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
return 0;
}
-static inline int statmount_opt_unescape(struct seq_file *seq, char *buf_start)
+static inline int statmount_opt_process(struct seq_file *seq, size_t start)
{
- char *buf_end, *opt_start, *opt_end;
+ char *buf_end, *opt_end, *src, *dst;
int count = 0;
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
+
buf_end = seq->buf + seq->count;
+ dst = seq->buf + start;
+ src = dst + 1; /* skip initial comma */
+
+ if (src >= buf_end) {
+ seq->count = start;
+ return 0;
+ }
+
*buf_end = '\0';
- for (opt_start = buf_start + 1; opt_start < buf_end; opt_start = opt_end + 1) {
- opt_end = strchrnul(opt_start, ',');
+ for (; src < buf_end; src = opt_end + 1) {
+ opt_end = strchrnul(src, ',');
*opt_end = '\0';
- buf_start += string_unescape(opt_start, buf_start, 0, UNESCAPE_OCTAL) + 1;
+ dst += string_unescape(src, dst, 0, UNESCAPE_OCTAL) + 1;
if (WARN_ON_ONCE(++count == INT_MAX))
return -EOVERFLOW;
}
- seq->count = buf_start - 1 - seq->buf;
+ seq->count = dst - 1 - seq->buf;
return count;
}
@@ -5080,24 +5091,16 @@ static int statmount_opt_array(struct kstatmount *s, struct seq_file *seq)
struct vfsmount *mnt = s->mnt;
struct super_block *sb = mnt->mnt_sb;
size_t start = seq->count;
- char *buf_start;
int err;
if (!sb->s_op->show_options)
return 0;
- buf_start = seq->buf + start;
err = sb->s_op->show_options(seq, mnt->mnt_root);
if (err)
return err;
- if (unlikely(seq_has_overflowed(seq)))
- return -EAGAIN;
-
- if (seq->count == start)
- return 0;
-
- err = statmount_opt_unescape(seq, buf_start);
+ err = statmount_opt_process(seq, start);
if (err < 0)
return err;
@@ -5110,22 +5113,13 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq)
struct vfsmount *mnt = s->mnt;
struct super_block *sb = mnt->mnt_sb;
size_t start = seq->count;
- char *buf_start;
int err;
- buf_start = seq->buf + start;
-
err = security_sb_show_options(seq, sb);
- if (!err)
+ if (err)
return err;
- if (unlikely(seq_has_overflowed(seq)))
- return -EAGAIN;
-
- if (seq->count == start)
- return 0;
-
- err = statmount_opt_unescape(seq, buf_start);
+ err = statmount_opt_process(seq, start);
if (err < 0)
return err;
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index 38637e5c9b57..b1722a82c03d 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -9,7 +9,6 @@
#include <linux/uio.h>
#include <linux/bvec.h>
#include <linux/slab.h>
-#include <linux/uio.h>
#include "internal.h"
/**
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 7a85735d584f..e376f48c4b8b 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -600,6 +600,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
ret = -EFAULT;
goto out;
}
+ ret = 0;
/*
* We know the bounce buffer is safe to copy from, so
* use _copy_to_iter() directly.
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 68801e1a9ec2..70f65eb320a7 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -1026,7 +1026,7 @@ FIXTURE_SETUP(mount_setattr_idmapped)
"size=100000,mode=700"), 0);
ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
- "size=100000,mode=700"), 0);
+ "size=2m,mode=700"), 0);
ASSERT_EQ(mkdir("/mnt/A", 0777), 0);