summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/iomap/buffered-io.c60
-rw-r--r--fs/xfs/xfs_iomap.c47
-rw-r--r--include/linux/iomap.h4
3 files changed, 72 insertions, 39 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 91ee0b308e13..734b761a1e4a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -832,6 +832,66 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
+/*
+ * When a short write occurs, the filesystem may need to remove reserved space
+ * that was allocated in ->iomap_begin from it's ->iomap_end method. For
+ * filesystems that use delayed allocation, we need to punch out delalloc
+ * extents from the range that are not dirty in the page cache. As the write can
+ * race with page faults, there can be dirty pages over the delalloc extent
+ * outside the range of a short write but still within the delalloc extent
+ * allocated for this iomap.
+ *
+ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to
+ * simplify range iterations, but converts them back to {offset,len} tuples for
+ * the punch callback.
+ */
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
+ struct iomap *iomap, loff_t pos, loff_t length,
+ ssize_t written,
+ int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+{
+ loff_t start_byte;
+ loff_t end_byte;
+ int blocksize = i_blocksize(inode);
+ int error = 0;
+
+ if (iomap->type != IOMAP_DELALLOC)
+ return 0;
+
+ /* If we didn't reserve the blocks, we're not allowed to punch them. */
+ if (!(iomap->flags & IOMAP_F_NEW))
+ return 0;
+
+ /*
+ * start_byte refers to the first unused block after a short write. If
+ * nothing was written, round offset down to point at the first block in
+ * the range.
+ */
+ if (unlikely(!written))
+ start_byte = round_down(pos, blocksize);
+ else
+ start_byte = round_up(pos + written, blocksize);
+ end_byte = round_up(pos + length, blocksize);
+
+ /* Nothing to do if we've written the entire delalloc extent */
+ if (start_byte >= end_byte)
+ return 0;
+
+ /*
+ * Lock the mapping to avoid races with page faults re-instantiating
+ * folios and dirtying them via ->page_mkwrite between the page cache
+ * truncation and the delalloc extent removal. Failing to do this can
+ * leave dirty pages with no space reservation in the cache.
+ */
+ filemap_invalidate_lock(inode->i_mapping);
+ truncate_pagecache_range(inode, start_byte, end_byte - 1);
+ error = punch(inode, start_byte, end_byte - start_byte);
+ filemap_invalidate_unlock(inode->i_mapping);
+
+ return error;
+}
+EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
+
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
{
struct iomap *iomap = &iter->iomap;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7bb55dbc19d3..ea96e8a34868 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1123,12 +1123,12 @@ out_unlock:
static int
xfs_buffered_write_delalloc_punch(
struct inode *inode,
- loff_t start_byte,
- loff_t end_byte)
+ loff_t offset,
+ loff_t length)
{
struct xfs_mount *mp = XFS_M(inode->i_sb);
- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
- xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
+ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length);
return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb,
end_fsb - start_fsb);
@@ -1143,13 +1143,9 @@ xfs_buffered_write_iomap_end(
unsigned flags,
struct iomap *iomap)
{
- struct xfs_mount *mp = XFS_M(inode->i_sb);
- loff_t start_byte;
- loff_t end_byte;
- int error = 0;
- if (iomap->type != IOMAP_DELALLOC)
- return 0;
+ struct xfs_mount *mp = XFS_M(inode->i_sb);
+ int error;
/*
* Behave as if the write failed if drop writes is enabled. Set the NEW
@@ -1160,35 +1156,8 @@ xfs_buffered_write_iomap_end(
written = 0;
}
- /* If we didn't reserve the blocks, we're not allowed to punch them. */
- if (!(iomap->flags & IOMAP_F_NEW))
- return 0;
-
- /*
- * start_fsb refers to the first unused block after a short write. If
- * nothing was written, round offset down to point at the first block in
- * the range.
- */
- if (unlikely(!written))
- start_byte = round_down(offset, mp->m_sb.sb_blocksize);
- else
- start_byte = round_up(offset + written, mp->m_sb.sb_blocksize);
- end_byte = round_up(offset + length, mp->m_sb.sb_blocksize);
-
- /* Nothing to do if we've written the entire delalloc extent */
- if (start_byte >= end_byte)
- return 0;
-
- /*
- * Lock the mapping to avoid races with page faults re-instantiating
- * folios and dirtying them via ->page_mkwrite between the page cache
- * truncation and the delalloc extent removal. Failing to do this can
- * leave dirty pages with no space reservation in the cache.
- */
- filemap_invalidate_lock(inode->i_mapping);
- truncate_pagecache_range(inode, start_byte, end_byte - 1);
- error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte);
- filemap_invalidate_unlock(inode->i_mapping);
+ error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset,
+ length, written, &xfs_buffered_write_delalloc_punch);
if (error && !xfs_is_shutdown(mp)) {
xfs_alert(mp, "%s: unable to clean up ino 0x%llx",
__func__, XFS_I(inode)->i_ino);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 238a03087e17..0698c4b8ce0e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -226,6 +226,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops);
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
+ struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
+ int (*punch)(struct inode *inode, loff_t pos, loff_t length));
+
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);