summaryrefslogtreecommitdiff
path: root/fs/iomap
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2022-11-23 12:44:38 +1100
committerDave Chinner <david@fromorbit.com>2022-11-23 12:44:38 +1100
commit9c7babf94a0d686b552e53aded8d4703d1b8b92b (patch)
treed468b984eab6a7bc85a49d76c85e4d4592bfd5e8 /fs/iomap
parentb71f889c18ada210a97aa3eb5e00c0de552234c6 (diff)
downloadlwn-9c7babf94a0d686b552e53aded8d4703d1b8b92b.tar.gz
lwn-9c7babf94a0d686b552e53aded8d4703d1b8b92b.zip
xfs,iomap: move delalloc punching to iomap
Because that's what Christoph wants for this error handling path only XFS uses. It requires a new iomap export for handling errors over delalloc ranges. This is basically the XFS code as is stands, but even though Christoph wants this as iomap funcitonality, we still have to call it from the filesystem specific ->iomap_end callback, and call into the iomap code with yet another filesystem specific callback to punch the delalloc extent within the defined ranges. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/iomap')
-rw-r--r--fs/iomap/buffered-io.c60
1 files changed, 60 insertions, 0 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 91ee0b308e13..734b761a1e4a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -832,6 +832,66 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
+/*
+ * When a short write occurs, the filesystem may need to remove reserved space
+ * that was allocated in ->iomap_begin from it's ->iomap_end method. For
+ * filesystems that use delayed allocation, we need to punch out delalloc
+ * extents from the range that are not dirty in the page cache. As the write can
+ * race with page faults, there can be dirty pages over the delalloc extent
+ * outside the range of a short write but still within the delalloc extent
+ * allocated for this iomap.
+ *
+ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to
+ * simplify range iterations, but converts them back to {offset,len} tuples for
+ * the punch callback.
+ */
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
+ struct iomap *iomap, loff_t pos, loff_t length,
+ ssize_t written,
+ int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+{
+ loff_t start_byte;
+ loff_t end_byte;
+ int blocksize = i_blocksize(inode);
+ int error = 0;
+
+ if (iomap->type != IOMAP_DELALLOC)
+ return 0;
+
+ /* If we didn't reserve the blocks, we're not allowed to punch them. */
+ if (!(iomap->flags & IOMAP_F_NEW))
+ return 0;
+
+ /*
+ * start_byte refers to the first unused block after a short write. If
+ * nothing was written, round offset down to point at the first block in
+ * the range.
+ */
+ if (unlikely(!written))
+ start_byte = round_down(pos, blocksize);
+ else
+ start_byte = round_up(pos + written, blocksize);
+ end_byte = round_up(pos + length, blocksize);
+
+ /* Nothing to do if we've written the entire delalloc extent */
+ if (start_byte >= end_byte)
+ return 0;
+
+ /*
+ * Lock the mapping to avoid races with page faults re-instantiating
+ * folios and dirtying them via ->page_mkwrite between the page cache
+ * truncation and the delalloc extent removal. Failing to do this can
+ * leave dirty pages with no space reservation in the cache.
+ */
+ filemap_invalidate_lock(inode->i_mapping);
+ truncate_pagecache_range(inode, start_byte, end_byte - 1);
+ error = punch(inode, start_byte, end_byte - start_byte);
+ filemap_invalidate_unlock(inode->i_mapping);
+
+ return error;
+}
+EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
+
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
{
struct iomap *iomap = &iter->iomap;