summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_bmap_btree.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-08-30 10:23:44 +1000
committerBen Myers <bpm@sgi.com>2013-09-10 10:26:47 -0500
commit21b5c9784bceb8b8e0095f87355f3b138ebac2d0 (patch)
tree52b179280cb81bba1d7304a41676ed64d847067a /fs/xfs/xfs_bmap_btree.c
parent0f295a214bb7658ca37bd61a8a1f0cd4a9d86c1f (diff)
downloadlwn-21b5c9784bceb8b8e0095f87355f3b138ebac2d0.tar.gz
lwn-21b5c9784bceb8b8e0095f87355f3b138ebac2d0.zip
xfs: swap extents operations for CRC filesystems
For CRC enabled filesystems, we can't just swap inode forks from one inode to another when defragmenting a file - the blocks in the inode fork bmap btree contain pointers back to the owner inode. Hence if we are to swap the inode forks we have to atomically modify every block in the btree during the transaction. We are doing an entire fork swap here, so we could create a new transaction item type that indicates we are changing the owner of a certain structure from one value to another. If we combine this with ordered buffer logging to modify all the buffers in the tree, then we can change the buffers in the tree without needing log space for the operation. However, this then requires log recovery to perform the modification of the owner information of the objects/structures in question. This does introduce some interesting ordering details into recovery: we have to make sure that the owner change replay occurs after the change that moves the objects is made, not before. Hence we can't use a separate log item for this as we have no guarantee of strict ordering between multiple items in the log due to the relogging action of asynchronous transaction commits. Hence there is no "generic" method we can use for changing the ownership of arbitrary metadata structures. For inode forks, however, there is a simple method of communicating that the fork contents need the owner rewritten - we can pass a inode log format flag for the fork for the transaction that does a fork swap. This flag will then follow the inode fork through relogging actions so when the swap actually gets replayed the ownership can be changed immediately by log recovery. So that gives us a simple method of "whole fork" exchange between two inodes. This is relatively simple to implement, so it makes sense to do this as an initial implementation to support xfs_fsr on CRC enabled filesytems in the same manner as we do on existing filesystems. This commit introduces the swapext driven functionality, the recovery functionality will be in a separate patch. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_bmap_btree.c')
-rw-r--r--fs/xfs/xfs_bmap_btree.c34
1 files changed, 34 insertions, 0 deletions
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index cf3bc76710c3..aa2eadd41bab 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -925,3 +925,37 @@ xfs_bmdr_maxrecs(
return blocklen / sizeof(xfs_bmdr_rec_t);
return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
}
+
+/*
+ * Change the owner of a btree format fork fo the inode passed in. Change it to
+ * the owner of that is passed in so that we can change owners before or after
+ * we switch forks between inodes. The operation that the caller is doing will
+ * determine whether is needs to change owner before or after the switch.
+ *
+ * For demand paged modification, the fork switch should be done after reading
+ * in all the blocks, modifying them and pinning them in the transaction. For
+ * modification when the buffers are already pinned in memory, the fork switch
+ * can be done before changing the owner as we won't need to validate the owner
+ * until the btree buffers are unpinned and writes can occur again.
+ */
+int
+xfs_bmbt_change_owner(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_ino_t new_owner)
+{
+ struct xfs_btree_cur *cur;
+ int error;
+
+ if (whichfork == XFS_DATA_FORK)
+ ASSERT(ip->i_d.di_format = XFS_DINODE_FMT_BTREE);
+ else
+ ASSERT(ip->i_d.di_aformat = XFS_DINODE_FMT_BTREE);
+
+ cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
+ error = xfs_btree_change_owner(cur, new_owner);
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+ return error;
+}
+