diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 11:52:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 11:52:12 -0700 |
commit | 3eccc0c886b1796f95a289c9d127c8ca1a254bd5 (patch) | |
tree | 021761894d246df22d53e1c64810f6cbf3ece716 /fs/xfs | |
parent | cc423f6337d0a5ff1906f3b3d465d28c0d1705f6 (diff) | |
parent | 9eee8bd81421c5e961cbb1a3c3fa1a06fad545e8 (diff) | |
download | lwn-3eccc0c886b1796f95a289c9d127c8ca1a254bd5.tar.gz lwn-3eccc0c886b1796f95a289c9d127c8ca1a254bd5.zip |
Merge tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux
Pull splice updates from Jens Axboe:
"This kills off ITER_PIPE to avoid a race between truncate,
iov_iter_revert() on the pipe and an as-yet incomplete DMA to a bio
with unpinned/unref'ed pages from an O_DIRECT splice read. This causes
memory corruption.
Instead, we either use (a) filemap_splice_read(), which invokes the
buffered file reading code and splices from the pagecache into the
pipe; (b) copy_splice_read(), which bulk-allocates a buffer, reads
into it and then pushes the filled pages into the pipe; or (c) handle
it in filesystem-specific code.
Summary:
- Rename direct_splice_read() to copy_splice_read()
- Simplify the calculations for the number of pages to be reclaimed
in copy_splice_read()
- Turn do_splice_to() into a helper, vfs_splice_read(), so that it
can be used by overlayfs and coda to perform the checks on the
lower fs
- Make vfs_splice_read() jump to copy_splice_read() to handle
direct-I/O and DAX
- Provide shmem with its own splice_read to handle non-existent pages
in the pagecache. We don't want a ->read_folio() as we don't want
to populate holes, but filemap_get_pages() requires it
- Provide overlayfs with its own splice_read to call down to a lower
layer as overlayfs doesn't provide ->read_folio()
- Provide coda with its own splice_read to call down to a lower layer
as coda doesn't provide ->read_folio()
- Direct ->splice_read to copy_splice_read() in tty, procfs, kernfs
and random files as they just copy to the output buffer and don't
splice pages
- Provide wrappers for afs, ceph, ecryptfs, ext4, f2fs, nfs, ntfs3,
ocfs2, orangefs, xfs and zonefs to do locking and/or revalidation
- Make cifs use filemap_splice_read()
- Replace pointers to generic_file_splice_read() with pointers to
filemap_splice_read() as DIO and DAX are handled in the caller;
filesystems can still provide their own alternate ->splice_read()
op
- Remove generic_file_splice_read()
- Remove ITER_PIPE and its paraphernalia as generic_file_splice_read
was the only user"
* tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux: (31 commits)
splice: kdoc for filemap_splice_read() and copy_splice_read()
iov_iter: Kill ITER_PIPE
splice: Remove generic_file_splice_read()
splice: Use filemap_splice_read() instead of generic_file_splice_read()
cifs: Use filemap_splice_read()
trace: Convert trace/seq to use copy_splice_read()
zonefs: Provide a splice-read wrapper
xfs: Provide a splice-read wrapper
orangefs: Provide a splice-read wrapper
ocfs2: Provide a splice-read wrapper
ntfs3: Provide a splice-read wrapper
nfs: Provide a splice-read wrapper
f2fs: Provide a splice-read wrapper
ext4: Provide a splice-read wrapper
ecryptfs: Provide a splice-read wrapper
ceph: Provide a splice-read wrapper
afs: Provide a splice-read wrapper
9p: Add splice_read wrapper
net: Make sock_splice_read() use copy_splice_read() by default
tty, proc, kernfs, random: Use copy_splice_read()
...
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_file.c | 30 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 2 |
2 files changed, 30 insertions, 2 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index aede746541f8..08d632668e94 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -306,6 +306,34 @@ xfs_file_read_iter( return ret; } +STATIC ssize_t +xfs_file_splice_read( + struct file *in, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags) +{ + struct inode *inode = file_inode(in); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t ret = 0; + + XFS_STATS_INC(mp, xs_read_calls); + + if (xfs_is_shutdown(mp)) + return -EIO; + + trace_xfs_file_splice_read(ip, *ppos, len); + + xfs_ilock(ip, XFS_IOLOCK_SHARED); + ret = filemap_splice_read(in, ppos, pipe, len, flags); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + if (ret > 0) + XFS_STATS_ADD(mp, xs_read_bytes, ret); + return ret; +} + /* * Common pre-write limit and setup checks. * @@ -1423,7 +1451,7 @@ const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, .read_iter = xfs_file_read_iter, .write_iter = xfs_file_write_iter, - .splice_read = generic_file_splice_read, + .splice_read = xfs_file_splice_read, .splice_write = iter_file_splice_write, .iopoll = iocb_bio_iopoll, .unlocked_ioctl = xfs_file_ioctl, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index cd4ca5b1fcb0..4db669203149 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1445,7 +1445,6 @@ DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_dax_write); DEFINE_RW_EVENT(xfs_reflink_bounce_dio_write); - DECLARE_EVENT_CLASS(xfs_imap_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, int whichfork, struct xfs_bmbt_irec *irec), @@ -1535,6 +1534,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append); +DEFINE_SIMPLE_IO_EVENT(xfs_file_splice_read); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), |