diff options
-rw-r--r-- | fs/iomap/buffered-io.c | 60 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 47 | ||||
-rw-r--r-- | include/linux/iomap.h | 4 |
3 files changed, 72 insertions, 39 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 91ee0b308e13..734b761a1e4a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -832,6 +832,66 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); +/* + * When a short write occurs, the filesystem may need to remove reserved space + * that was allocated in ->iomap_begin from it's ->iomap_end method. For + * filesystems that use delayed allocation, we need to punch out delalloc + * extents from the range that are not dirty in the page cache. As the write can + * race with page faults, there can be dirty pages over the delalloc extent + * outside the range of a short write but still within the delalloc extent + * allocated for this iomap. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations, but converts them back to {offset,len} tuples for + * the punch callback. + */ +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, + ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + loff_t start_byte; + loff_t end_byte; + int blocksize = i_blocksize(inode); + int error = 0; + + if (iomap->type != IOMAP_DELALLOC) + return 0; + + /* If we didn't reserve the blocks, we're not allowed to punch them. */ + if (!(iomap->flags & IOMAP_F_NEW)) + return 0; + + /* + * start_byte refers to the first unused block after a short write. If + * nothing was written, round offset down to point at the first block in + * the range. + */ + if (unlikely(!written)) + start_byte = round_down(pos, blocksize); + else + start_byte = round_up(pos + written, blocksize); + end_byte = round_up(pos + length, blocksize); + + /* Nothing to do if we've written the entire delalloc extent */ + if (start_byte >= end_byte) + return 0; + + /* + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite between the page cache + * truncation and the delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. + */ + filemap_invalidate_lock(inode->i_mapping); + truncate_pagecache_range(inode, start_byte, end_byte - 1); + error = punch(inode, start_byte, end_byte - start_byte); + filemap_invalidate_unlock(inode->i_mapping); + + return error; +} +EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc); + static loff_t iomap_unshare_iter(struct iomap_iter *iter) { struct iomap *iomap = &iter->iomap; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7bb55dbc19d3..ea96e8a34868 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1123,12 +1123,12 @@ out_unlock: static int xfs_buffered_write_delalloc_punch( struct inode *inode, - loff_t start_byte, - loff_t end_byte) + loff_t offset, + loff_t length) { struct xfs_mount *mp = XFS_M(inode->i_sb); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); - xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb, end_fsb - start_fsb); @@ -1143,13 +1143,9 @@ xfs_buffered_write_iomap_end( unsigned flags, struct iomap *iomap) { - struct xfs_mount *mp = XFS_M(inode->i_sb); - loff_t start_byte; - loff_t end_byte; - int error = 0; - if (iomap->type != IOMAP_DELALLOC) - return 0; + struct xfs_mount *mp = XFS_M(inode->i_sb); + int error; /* * Behave as if the write failed if drop writes is enabled. Set the NEW @@ -1160,35 +1156,8 @@ xfs_buffered_write_iomap_end( written = 0; } - /* If we didn't reserve the blocks, we're not allowed to punch them. */ - if (!(iomap->flags & IOMAP_F_NEW)) - return 0; - - /* - * start_fsb refers to the first unused block after a short write. If - * nothing was written, round offset down to point at the first block in - * the range. - */ - if (unlikely(!written)) - start_byte = round_down(offset, mp->m_sb.sb_blocksize); - else - start_byte = round_up(offset + written, mp->m_sb.sb_blocksize); - end_byte = round_up(offset + length, mp->m_sb.sb_blocksize); - - /* Nothing to do if we've written the entire delalloc extent */ - if (start_byte >= end_byte) - return 0; - - /* - * Lock the mapping to avoid races with page faults re-instantiating - * folios and dirtying them via ->page_mkwrite between the page cache - * truncation and the delalloc extent removal. Failing to do this can - * leave dirty pages with no space reservation in the cache. - */ - filemap_invalidate_lock(inode->i_mapping); - truncate_pagecache_range(inode, start_byte, end_byte - 1); - error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte); - filemap_invalidate_unlock(inode->i_mapping); + error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, + length, written, &xfs_buffered_write_delalloc_punch); if (error && !xfs_is_shutdown(mp)) { xfs_alert(mp, "%s: unable to clean up ino 0x%llx", __func__, XFS_I(inode)->i_ino); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 238a03087e17..0698c4b8ce0e 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -226,6 +226,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)); + int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); |