summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhang Yi <yi.zhang@huawei.com>2024-06-18 22:21:11 +0800
committerChristian Brauner <brauner@kernel.org>2024-06-19 15:58:28 +0200
commitd048945150b798147b324f05f7e8c857772b0d3f (patch)
treeefda97c68ba63b76188199d8df095df3def45d16
parenta7ca193bc9b6be34dd7c22106c01034f2e77e220 (diff)
downloadlwn-d048945150b798147b324f05f7e8c857772b0d3f.tar.gz
lwn-d048945150b798147b324f05f7e8c857772b0d3f.zip
xfs: reserve blocks for truncating large realtime inode
When unaligned truncate down a big realtime file, xfs_truncate_page() only zeros out the tail EOF block, __xfs_bunmapi() should split the tail written extent and convert the later one that beyond EOF block to unwritten, but it couldn't work as expected now since the reserved block is zero in xfs_setattr_size(), this could expose stale data just after commit '943bc0882ceb ("iomap: don't increase i_size if it's not a write operation")'. If we truncate file that contains a large enough written extent: |< rxext >|< rtext >| ...WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW ^ (new EOF) ^ old EOF Since we only zeros out the tail of the EOF block, and xfs_itruncate_extents()->..->__xfs_bunmapi() unmap the whole ailgned extents, it becomes this state: |< rxext >| ...WWWzWWWWWWWWWWWWW ^ new EOF Then if we do an extending write like this, the blocks in the previous tail extent becomes stale: |< rxext >| ...WWWzSSSSSSSSSSSSS..........WWWWWWWWWWWWWWWWW ^ old EOF ^ append start ^ new EOF Fix this by reserving XFS_DIOSTRAT_SPACE_RES blocks for big realtime inode. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Link: https://lore.kernel.org/r/20240618142112.1315279-2-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/xfs/xfs_iops.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ff222827e550..a00dcbc77e12 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -17,6 +17,8 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_trans.h"
+#include "xfs_trans_space.h"
+#include "xfs_bmap_btree.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
@@ -811,6 +813,7 @@ xfs_setattr_size(
struct xfs_trans *tp;
int error;
uint lock_flags = 0;
+ uint resblks = 0;
bool did_zeroing = false;
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
@@ -917,7 +920,17 @@ xfs_setattr_size(
return error;
}
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+ /*
+ * For realtime inode with more than one block rtextsize, we need the
+ * block reservation for bmap btree block allocations/splits that can
+ * happen since it could split the tail written extent and convert the
+ * right beyond EOF one to unwritten.
+ */
+ if (xfs_inode_has_bigrtalloc(ip))
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
+ 0, 0, &tp);
if (error)
return error;