diff options
author | Dave Chinner <dchinner@redhat.com> | 2010-05-20 23:19:42 +1000 |
---|---|---|
committer | Alex Elder <aelder@sgi.com> | 2010-05-24 10:41:22 -0500 |
commit | ccf7c23fc129e75ef60e6f59f60a485b7a056598 (patch) | |
tree | 957539e31ee2a7155bbf9bb085ec1cb1d3432d3a /fs/xfs/xfs_trans_buf.c | |
parent | df806158b0f6eb24247773b4a19b8b59d7217e59 (diff) | |
download | lwn-ccf7c23fc129e75ef60e6f59f60a485b7a056598.tar.gz lwn-ccf7c23fc129e75ef60e6f59f60a485b7a056598.zip |
xfs: Ensure inode allocation buffers are fully replayed
With delayed logging, we can get inode allocation buffers in the
same transaction inode unlink buffers. We don't currently mark inode
allocation buffers in the log, so inode unlink buffers take
precedence over allocation buffers.
The result is that when they are combined into the same checkpoint,
only the unlinked inode chain fields are replayed, resulting in
uninitialised inode buffers being detected when the next inode
modification is replayed.
To fix this, we need to ensure that we do not set the inode buffer
flag in the buffer log item format flags if the inode allocation has
not already hit the log. To avoid requiring a change to log
recovery, we really need to make this a modification that relies
only on in-memory sate.
We can do this by checking during buffer log formatting (while the
CIL cannot be flushed) if we are still in the same sequence when we
commit the unlink transaction as the inode allocation transaction.
If we are, then we do not add the inode buffer flag to the buffer
log format item flags. This means the entire buffer will be
replayed, not just the unlinked fields. We do this while
CIL flusheѕ are locked out to ensure that we don't race with the
sequence numbers changing and hence fail to put the inode buffer
flag in the buffer format flags when we really need to.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_trans_buf.c')
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 3390c3e7441b..63d81a22f4fd 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -792,7 +792,7 @@ xfs_trans_binval( XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_STALE(bp); bip->bli_flags |= XFS_BLI_STALE; - bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY); + bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; bip->bli_format.blf_flags |= XFS_BLF_CANCEL; memset((char *)(bip->bli_format.blf_data_map), 0, @@ -802,16 +802,16 @@ xfs_trans_binval( } /* - * This call is used to indicate that the buffer contains on-disk - * inodes which must be handled specially during recovery. They - * require special handling because only the di_next_unlinked from - * the inodes in the buffer should be recovered. The rest of the - * data in the buffer is logged via the inodes themselves. + * This call is used to indicate that the buffer contains on-disk inodes which + * must be handled specially during recovery. They require special handling + * because only the di_next_unlinked from the inodes in the buffer should be + * recovered. The rest of the data in the buffer is logged via the inodes + * themselves. * - * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log - * format structure so that we'll know what to do at recovery time. + * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be + * transferred to the buffer's log format structure so that we'll know what to + * do at recovery time. */ -/* ARGSUSED */ void xfs_trans_inode_buf( xfs_trans_t *tp, @@ -826,7 +826,7 @@ xfs_trans_inode_buf( bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); - bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; + bip->bli_flags |= XFS_BLI_INODE_BUF; } /* |