summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-17 12:57:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-17 12:57:48 -0700
commitbf3aa9de7ba57c2c7b5ea70c1ad3a6670cd6fcb0 (patch)
tree791228dc4eb6d90e2c27295930449b06f6952ad3
parent0260b0a7445c62a08938fa66fad256e5d0779817 (diff)
parent2bf6e353542d233486195953dc9c346331f82dcb (diff)
downloadlwn-bf3aa9de7ba57c2c7b5ea70c1ad3a6670cd6fcb0.tar.gz
lwn-bf3aa9de7ba57c2c7b5ea70c1ad3a6670cd6fcb0.zip
Merge tag 'xfs-6.11-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Chandan Babu: "Major changes in this release are limited to enabling FITRIM on realtime devices and Byte-based grant head log reservation tracking. The remaining changes are limited to fixes and cleanups included in this pull request. Core: - Enable FITRIM on the realtime device - Introduce byte-based grant head log reservation tracking instead of physical log location tracking. This allows grant head to track a full 64 bit bytes space and hence overcome the limit of 4GB indexing that has been present until now Fixes: - xfs_flush_unmap_range() and xfs_prepare_shift() should consider RT extents in the flush unmap range - Implement bounds check when traversing log operations during log replay - Prevent out of bounds access when traversing a directory data block - Prevent incorrect ENOSPC when concurrently performing file creation and file writes - Fix rtalloc rotoring when delalloc is in use Cleanups: - Clean up I/O path inode locking helpers and the page fault handler - xfs: hoist inode operations to libxfs in anticipation of the metadata inode directory feature, which maintains a directory tree of metadata inodes. This will be necessary for further enhancements to the realtime feature, subvolume support - Clean up some warts in the extent freeing log intent code - Clean up the refcount and rmap intent code before adding support for realtime devices - Provide the correct email address for sysfs ABI documentation" * tag 'xfs-6.11-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (80 commits) xfs: fix rtalloc rotoring when delalloc is in use xfs: get rid of xfs_ag_resv_rmapbt_alloc xfs: skip flushing log items during push xfs: grant heads track byte counts, not LSNs xfs: pass the full grant head to accounting functions xfs: track log space pinned by the AIL xfs: collapse xlog_state_set_callback in caller xfs: l_last_sync_lsn is really AIL state xfs: ensure log tail is always up to date xfs: background AIL push should target physical space xfs: AIL doesn't need manual pushing xfs: move and rename xfs_trans_committed_bulk xfs: fix the contact address for the sysfs ABI documentation xfs: Avoid races with cnt_btree lastrec updates xfs: move xfs_refcount_update_defer_add to xfs_refcount_item.c xfs: simplify usage of the rcur local variable in xfs_refcount_finish_one xfs: don't bother calling xfs_refcount_finish_one_cleanup in xfs_refcount_finish_one xfs: reuse xfs_refcount_update_cancel_item xfs: add a ci_entry helper xfs: remove xfs_trans_set_refcount_flags ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-xfs26
-rw-r--r--fs/xfs/Kconfig12
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_ag.c2
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h19
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c235
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h18
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c64
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c55
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h3
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_btree.c51
-rw-r--r--fs/xfs/libxfs/xfs_btree.h16
-rw-r--r--fs/xfs/libxfs/xfs_defer.c4
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c661
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h49
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c31
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h7
-rw-r--r--fs/xfs/libxfs/xfs_format.h9
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c20
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.c749
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.h62
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h1
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c156
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h11
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c266
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h15
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c7
-rw-r--r--fs/xfs/libxfs/xfs_shared.h7
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c1
-rw-r--r--fs/xfs/scrub/common.c1
-rw-r--r--fs/xfs/scrub/newbt.c5
-rw-r--r--fs/xfs/scrub/quota_repair.c1
-rw-r--r--fs/xfs/scrub/reap.c7
-rw-r--r--fs/xfs/scrub/tempfile.c21
-rw-r--r--fs/xfs/xfs.h4
-rw-r--r--fs/xfs/xfs_bmap_item.c6
-rw-r--r--fs/xfs/xfs_bmap_util.c22
-rw-r--r--fs/xfs/xfs_buf_item.c32
-rw-r--r--fs/xfs/xfs_discard.c303
-rw-r--r--fs/xfs/xfs_dquot_item.c31
-rw-r--r--fs/xfs/xfs_drain.c8
-rw-r--r--fs/xfs/xfs_drain.h5
-rw-r--r--fs/xfs/xfs_extfree_item.c119
-rw-r--r--fs/xfs/xfs_extfree_item.h6
-rw-r--r--fs/xfs/xfs_file.c141
-rw-r--r--fs/xfs/xfs_handle.c1
-rw-r--r--fs/xfs/xfs_inode.c1488
-rw-r--r--fs/xfs/xfs_inode.h70
-rw-r--r--fs/xfs/xfs_inode_item.c38
-rw-r--r--fs/xfs/xfs_ioctl.c60
-rw-r--r--fs/xfs/xfs_iomap.c71
-rw-r--r--fs/xfs/xfs_iops.c51
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log.c511
-rw-r--r--fs/xfs/xfs_log.h1
-rw-r--r--fs/xfs/xfs_log_cil.c177
-rw-r--r--fs/xfs/xfs_log_priv.h61
-rw-r--r--fs/xfs/xfs_log_recover.c28
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_bhv.c1
-rw-r--r--fs/xfs/xfs_refcount_item.c110
-rw-r--r--fs/xfs/xfs_refcount_item.h5
-rw-r--r--fs/xfs/xfs_reflink.c2
-rw-r--r--fs/xfs/xfs_reflink.h10
-rw-r--r--fs/xfs/xfs_rmap_item.c151
-rw-r--r--fs/xfs/xfs_rmap_item.h4
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_symlink.c70
-rw-r--r--fs/xfs/xfs_sysfs.c29
-rw-r--r--fs/xfs/xfs_trace.c4
-rw-r--r--fs/xfs/xfs_trace.h533
-rw-r--r--fs/xfs/xfs_trans.c129
-rw-r--r--fs/xfs/xfs_trans.h5
-rw-r--r--fs/xfs/xfs_trans_ail.c244
-rw-r--r--fs/xfs/xfs_trans_priv.h44
79 files changed, 3781 insertions, 3407 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-xfs b/Documentation/ABI/testing/sysfs-fs-xfs
index f704925f6fe9..7da4de948b46 100644
--- a/Documentation/ABI/testing/sysfs-fs-xfs
+++ b/Documentation/ABI/testing/sysfs-fs-xfs
@@ -1,7 +1,7 @@
What: /sys/fs/xfs/<disk>/log/log_head_lsn
Date: July 2014
KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+Contact: linux-xfs@vger.kernel.org
Description:
The log sequence number (LSN) of the current head of the
log. The LSN is exported in "cycle:basic block" format.
@@ -10,30 +10,28 @@ Users: xfstests
What: /sys/fs/xfs/<disk>/log/log_tail_lsn
Date: July 2014
KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+Contact: linux-xfs@vger.kernel.org
Description:
The log sequence number (LSN) of the current tail of the
log. The LSN is exported in "cycle:basic block" format.
-What: /sys/fs/xfs/<disk>/log/reserve_grant_head
-Date: July 2014
-KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+What: /sys/fs/xfs/<disk>/log/reserve_grant_head_bytes
+Date: June 2024
+KernelVersion: 6.11
+Contact: linux-xfs@vger.kernel.org
Description:
The current state of the log reserve grant head. It
represents the total log reservation of all currently
- outstanding transactions. The grant head is exported in
- "cycle:bytes" format.
+ outstanding transactions in bytes.
Users: xfstests
-What: /sys/fs/xfs/<disk>/log/write_grant_head
-Date: July 2014
-KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+What: /sys/fs/xfs/<disk>/log/write_grant_head_bytes
+Date: June 2024
+KernelVersion: 6.11
+Contact: linux-xfs@vger.kernel.org
Description:
The current state of the log write grant head. It
represents the total log reservation of all currently
outstanding transactions, including regrants due to
- rolling transactions. The grant head is exported in
- "cycle:bytes" format.
+ rolling transactions in bytes.
Users: xfstests
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index d41edd30388b..fffd6fffdce0 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -217,6 +217,18 @@ config XFS_DEBUG
Say N unless you are an XFS developer, or you play one on TV.
+config XFS_DEBUG_EXPENSIVE
+ bool "XFS expensive debugging checks"
+ depends on XFS_FS && XFS_DEBUG
+ help
+ Say Y here to get an XFS build with expensive debugging checks
+ enabled. These checks may affect performance significantly.
+
+ Note that the resulting code will be HUGER and SLOWER, and probably
+ not useful unless you are debugging a particular problem.
+
+ Say N unless you are an XFS developer, or you play one on TV.
+
config XFS_ASSERT_FATAL
bool "XFS fatal asserts"
default y
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c50447548d65..dd692619bed5 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,6 +40,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_iext_tree.o \
xfs_inode_fork.o \
xfs_inode_buf.o \
+ xfs_inode_util.o \
xfs_log_rlimit.o \
xfs_ag_resv.o \
xfs_parent.o \
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 240e079cb3fb..7e80732cb547 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -1008,7 +1008,7 @@ xfs_ag_shrink_space(
goto resv_err;
err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
- XFS_AG_RESV_NONE, true);
+ XFS_AG_RESV_NONE, XFS_FREE_EXTENT_SKIP_DISCARD);
if (err2)
goto resv_err;
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index ff20ed93de77..f247eeff7358 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -33,23 +33,4 @@ xfs_perag_resv(
}
}
-/*
- * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from
- * the AGFL, they are allocated one at a time and the reservation updates don't
- * require a transaction.
- */
-static inline void
-xfs_ag_resv_rmapbt_alloc(
- struct xfs_mount *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_alloc_arg args = { NULL };
- struct xfs_perag *pag;
-
- args.len = 1;
- pag = xfs_perag_get(mp, agno);
- xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
- xfs_perag_put(pag);
-}
-
#endif /* __XFS_AG_RESV_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 6c55a6e88eba..59326f84f6a5 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -27,6 +27,7 @@
#include "xfs_ag_resv.h"
#include "xfs_bmap.h"
#include "xfs_health.h"
+#include "xfs_extfree_item.h"
struct kmem_cache *xfs_extfree_item_cache;
@@ -466,6 +467,97 @@ xfs_alloc_fix_len(
}
/*
+ * Determine if the cursor points to the block that contains the right-most
+ * block of records in the by-count btree. This block contains the largest
+ * contiguous free extent in the AG, so if we modify a record in this block we
+ * need to call xfs_alloc_fixup_longest() once the modifications are done to
+ * ensure the agf->agf_longest field is kept up to date with the longest free
+ * extent tracked by the by-count btree.
+ */
+static bool
+xfs_alloc_cursor_at_lastrec(
+ struct xfs_btree_cur *cnt_cur)
+{
+ struct xfs_btree_block *block;
+ union xfs_btree_ptr ptr;
+ struct xfs_buf *bp;
+
+ block = xfs_btree_get_block(cnt_cur, 0, &bp);
+
+ xfs_btree_get_sibling(cnt_cur, block, &ptr, XFS_BB_RIGHTSIB);
+ return xfs_btree_ptr_is_null(cnt_cur, &ptr);
+}
+
+/*
+ * Find the rightmost record of the cntbt, and return the longest free space
+ * recorded in it. Simply set both the block number and the length to their
+ * maximum values before searching.
+ */
+static int
+xfs_cntbt_longest(
+ struct xfs_btree_cur *cnt_cur,
+ xfs_extlen_t *longest)
+{
+ struct xfs_alloc_rec_incore irec;
+ union xfs_btree_rec *rec;
+ int stat = 0;
+ int error;
+
+ memset(&cnt_cur->bc_rec, 0xFF, sizeof(cnt_cur->bc_rec));
+ error = xfs_btree_lookup(cnt_cur, XFS_LOOKUP_LE, &stat);
+ if (error)
+ return error;
+ if (!stat) {
+ /* totally empty tree */
+ *longest = 0;
+ return 0;
+ }
+
+ error = xfs_btree_get_rec(cnt_cur, &rec, &stat);
+ if (error)
+ return error;
+ if (XFS_IS_CORRUPT(cnt_cur->bc_mp, !stat)) {
+ xfs_btree_mark_sick(cnt_cur);
+ return -EFSCORRUPTED;
+ }
+
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ *longest = irec.ar_blockcount;
+ return 0;
+}
+
+/*
+ * Update the longest contiguous free extent in the AG from the by-count cursor
+ * that is passed to us. This should be done at the end of any allocation or
+ * freeing operation that touches the longest extent in the btree.
+ *
+ * Needing to update the longest extent can be determined by calling
+ * xfs_alloc_cursor_at_lastrec() after the cursor is positioned for record
+ * modification but before the modification begins.
+ */
+static int
+xfs_alloc_fixup_longest(
+ struct xfs_btree_cur *cnt_cur)
+{
+ struct xfs_perag *pag = cnt_cur->bc_ag.pag;
+ struct xfs_buf *bp = cnt_cur->bc_ag.agbp;
+ struct xfs_agf *agf = bp->b_addr;
+ xfs_extlen_t longest = 0;
+ int error;
+
+ /* Lookup last rec in order to update AGF. */
+ error = xfs_cntbt_longest(cnt_cur, &longest);
+ if (error)
+ return error;
+
+ pag->pagf_longest = longest;
+ agf->agf_longest = cpu_to_be32(pag->pagf_longest);
+ xfs_alloc_log_agf(cnt_cur->bc_tp, bp, XFS_AGF_LONGEST);
+
+ return 0;
+}
+
+/*
* Update the two btrees, logically removing from freespace the extent
* starting at rbno, rlen blocks. The extent is contained within the
* actual (current) free extent fbno for flen blocks.
@@ -489,6 +581,7 @@ xfs_alloc_fixup_trees(
xfs_extlen_t nflen1=0; /* first new free length */
xfs_extlen_t nflen2=0; /* second new free length */
struct xfs_mount *mp;
+ bool fixup_longest = false;
mp = cnt_cur->bc_mp;
@@ -577,6 +670,10 @@ xfs_alloc_fixup_trees(
nfbno2 = rbno + rlen;
nflen2 = (fbno + flen) - nfbno2;
}
+
+ if (xfs_alloc_cursor_at_lastrec(cnt_cur))
+ fixup_longest = true;
+
/*
* Delete the entry from the by-size btree.
*/
@@ -654,6 +751,10 @@ xfs_alloc_fixup_trees(
return -EFSCORRUPTED;
}
}
+
+ if (fixup_longest)
+ return xfs_alloc_fixup_longest(cnt_cur);
+
return 0;
}
@@ -1932,7 +2033,7 @@ out_nominleft:
/*
* Free the extent starting at agno/bno for length.
*/
-STATIC int
+int
xfs_free_ag_extent(
struct xfs_trans *tp,
struct xfs_buf *agbp,
@@ -1956,6 +2057,7 @@ xfs_free_ag_extent(
int i;
int error;
struct xfs_perag *pag = agbp->b_pag;
+ bool fixup_longest = false;
bno_cur = cnt_cur = NULL;
mp = tp->t_mountp;
@@ -2219,8 +2321,13 @@ xfs_free_ag_extent(
}
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
bno_cur = NULL;
+
/*
* In all cases we need to insert the new freespace in the by-size tree.
+ *
+ * If this new freespace is being inserted in the block that contains
+ * the largest free space in the btree, make sure we also fix up the
+ * agf->agf-longest tracker field.
*/
if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
goto error0;
@@ -2229,6 +2336,8 @@ xfs_free_ag_extent(
error = -EFSCORRUPTED;
goto error0;
}
+ if (xfs_alloc_cursor_at_lastrec(cnt_cur))
+ fixup_longest = true;
if ((error = xfs_btree_insert(cnt_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -2236,6 +2345,12 @@ xfs_free_ag_extent(
error = -EFSCORRUPTED;
goto error0;
}
+ if (fixup_longest) {
+ error = xfs_alloc_fixup_longest(cnt_cur);
+ if (error)
+ goto error0;
+ }
+
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
cnt_cur = NULL;
@@ -2422,32 +2537,6 @@ xfs_alloc_space_available(
return true;
}
-int
-xfs_free_agfl_block(
- struct xfs_trans *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t agbno,
- struct xfs_buf *agbp,
- struct xfs_owner_info *oinfo)
-{
- int error;
- struct xfs_buf *bp;
-
- error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
- XFS_AG_RESV_AGFL);
- if (error)
- return error;
-
- error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
- XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
- tp->t_mountp->m_bsize, 0, &bp);
- if (error)
- return error;
- xfs_trans_binval(tp, bp);
-
- return 0;
-}
-
/*
* Check the agfl fields of the agf for inconsistency or corruption.
*
@@ -2536,48 +2625,6 @@ xfs_agfl_reset(
}
/*
- * Defer an AGFL block free. This is effectively equivalent to
- * xfs_free_extent_later() with some special handling particular to AGFL blocks.
- *
- * Deferring AGFL frees helps prevent log reservation overruns due to too many
- * allocation operations in a transaction. AGFL frees are prone to this problem
- * because for one they are always freed one at a time. Further, an immediate
- * AGFL block free can cause a btree join and require another block free before
- * the real allocation can proceed. Deferring the free disconnects freeing up
- * the AGFL slot from freeing the block.
- */
-static int
-xfs_defer_agfl_block(
- struct xfs_trans *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t agbno,
- struct xfs_owner_info *oinfo)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_extent_free_item *xefi;
- xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
-
- ASSERT(xfs_extfree_item_cache != NULL);
- ASSERT(oinfo != NULL);
-
- if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
- return -EFSCORRUPTED;
-
- xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
- GFP_KERNEL | __GFP_NOFAIL);
- xefi->xefi_startblock = fsbno;
- xefi->xefi_blockcount = 1;
- xefi->xefi_owner = oinfo->oi_owner;
- xefi->xefi_agresv = XFS_AG_RESV_AGFL;
-
- trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
-
- xfs_extent_free_get_group(mp, xefi);
- xfs_defer_add(tp, &xefi->xefi_list, &xfs_agfl_free_defer_type);
- return 0;
-}
-
-/*
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
@@ -2588,28 +2635,15 @@ xfs_defer_extent_free(
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
- bool skip_discard,
+ unsigned int free_flags,
struct xfs_defer_pending **dfpp)
{
struct xfs_extent_free_item *xefi;
struct xfs_mount *mp = tp->t_mountp;
-#ifdef DEBUG
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(len > 0);
ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
ASSERT(!isnullstartblock(bno));
- agno = XFS_FSB_TO_AGNO(mp, bno);
- agbno = XFS_FSB_TO_AGBNO(mp, bno);
- ASSERT(agno < mp->m_sb.sb_agcount);
- ASSERT(agbno < mp->m_sb.sb_agblocks);
- ASSERT(len < mp->m_sb.sb_agblocks);
- ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
-#endif
- ASSERT(xfs_extfree_item_cache != NULL);
- ASSERT(type != XFS_AG_RESV_AGFL);
+ ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
return -EFSCORRUPTED;
@@ -2619,7 +2653,7 @@ xfs_defer_extent_free(
xefi->xefi_startblock = bno;
xefi->xefi_blockcount = (xfs_extlen_t)len;
xefi->xefi_agresv = type;
- if (skip_discard)
+ if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD)
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
if (oinfo) {
ASSERT(oinfo->oi_offset == 0);
@@ -2632,12 +2666,8 @@ xfs_defer_extent_free(
} else {
xefi->xefi_owner = XFS_RMAP_OWN_NULL;
}
- trace_xfs_bmap_free_defer(mp,
- XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
- XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
- xfs_extent_free_get_group(mp, xefi);
- *dfpp = xfs_defer_add(tp, &xefi->xefi_list, &xfs_extent_free_defer_type);
+ xfs_extent_free_defer_add(tp, xefi, dfpp);
return 0;
}
@@ -2648,11 +2678,11 @@ xfs_free_extent_later(
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
- bool skip_discard)
+ unsigned int free_flags)
{
struct xfs_defer_pending *dontcare = NULL;
- return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard,
+ return xfs_defer_extent_free(tp, bno, len, oinfo, type, free_flags,
&dontcare);
}
@@ -2677,13 +2707,13 @@ xfs_free_extent_later(
int
xfs_alloc_schedule_autoreap(
const struct xfs_alloc_arg *args,
- bool skip_discard,
+ unsigned int free_flags,
struct xfs_alloc_autoreap *aarp)
{
int error;
error = xfs_defer_extent_free(args->tp, args->fsbno, args->len,
- &args->oinfo, args->resv, skip_discard, &aarp->dfp);
+ &args->oinfo, args->resv, free_flags, &aarp->dfp);
if (error)
return error;
@@ -2895,8 +2925,21 @@ xfs_alloc_fix_freelist(
if (error)
goto out_agbp_relse;
- /* defer agfl frees */
- error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ /*
+ * Defer the AGFL block free.
+ *
+ * This helps to prevent log reservation overruns due to too
+ * many allocation operations in a transaction. AGFL frees are
+ * prone to this problem because for one they are always freed
+ * one at a time. Further, an immediate AGFL block free can
+ * cause a btree join and require another block free before the
+ * real allocation can proceed.
+ * Deferring the free disconnects freeing up the AGFL slot from
+ * freeing the block.
+ */
+ error = xfs_free_extent_later(tp,
+ XFS_AGB_TO_FSB(mp, args->agno, bno), 1,
+ &targs.oinfo, XFS_AG_RESV_AGFL, 0);
if (error)
goto out_agbp_relse;
}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 0b956f8b9d5a..fae170825be0 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -80,6 +80,10 @@ int xfs_alloc_get_freelist(struct xfs_perag *pag, struct xfs_trans *tp,
int xfs_alloc_put_freelist(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_buf *agfbp, struct xfs_buf *agflbp,
xfs_agblock_t bno, int btreeblk);
+int xfs_free_ag_extent(struct xfs_trans *tp, struct xfs_buf *agbp,
+ xfs_agnumber_t agno, xfs_agblock_t bno,
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type);
/*
* Compute and fill in value of m_alloc_maxlevels.
@@ -194,8 +198,6 @@ int xfs_alloc_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags,
struct xfs_buf **agfbpp);
int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_buf **bpp);
-int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t,
- struct xfs_buf *, struct xfs_owner_info *);
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags);
int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_buf **agbp);
@@ -233,7 +235,12 @@ xfs_buf_to_agfl_bno(
int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type, bool skip_discard);
+ enum xfs_ag_resv_type type, unsigned int free_flags);
+
+/* Don't issue a discard for the blocks freed. */
+#define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0)
+
+#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD)
/*
* List of extents to be free "later".
@@ -249,9 +256,6 @@ struct xfs_extent_free_item {
enum xfs_ag_resv_type xefi_agresv;
};
-void xfs_extent_free_get_group(struct xfs_mount *mp,
- struct xfs_extent_free_item *xefi);
-
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
@@ -262,7 +266,7 @@ struct xfs_alloc_autoreap {
};
int xfs_alloc_schedule_autoreap(const struct xfs_alloc_arg *args,
- bool skip_discard, struct xfs_alloc_autoreap *aarp);
+ unsigned int free_flags, struct xfs_alloc_autoreap *aarp);
void xfs_alloc_cancel_autoreap(struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp);
void xfs_alloc_commit_autoreap(struct xfs_trans *tp,
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 6ef5ddd89600..585e98e87ef9 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -115,67 +115,6 @@ xfs_allocbt_free_block(
return 0;
}
-/*
- * Update the longest extent in the AGF
- */
-STATIC void
-xfs_allocbt_update_lastrec(
- struct xfs_btree_cur *cur,
- const struct xfs_btree_block *block,
- const union xfs_btree_rec *rec,
- int ptr,
- int reason)
-{
- struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- struct xfs_perag *pag;
- __be32 len;
- int numrecs;
-
- ASSERT(!xfs_btree_is_bno(cur->bc_ops));
-
- switch (reason) {
- case LASTREC_UPDATE:
- /*
- * If this is the last leaf block and it's the last record,
- * then update the size of the longest extent in the AG.
- */
- if (ptr != xfs_btree_get_numrecs(block))
- return;
- len = rec->alloc.ar_blockcount;
- break;
- case LASTREC_INSREC:
- if (be32_to_cpu(rec->alloc.ar_blockcount) <=
- be32_to_cpu(agf->agf_longest))
- return;
- len = rec->alloc.ar_blockcount;
- break;
- case LASTREC_DELREC:
- numrecs = xfs_btree_get_numrecs(block);
- if (ptr <= numrecs)
- return;
- ASSERT(ptr == numrecs + 1);
-
- if (numrecs) {
- xfs_alloc_rec_t *rrp;
-
- rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
- len = rrp->ar_blockcount;
- } else {
- len = 0;
- }
-
- break;
- default:
- ASSERT(0);
- return;
- }
-
- agf->agf_longest = len;
- pag = cur->bc_ag.agbp->b_pag;
- pag->pagf_longest = be32_to_cpu(len);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST);
-}
-
STATIC int
xfs_allocbt_get_minrecs(
struct xfs_btree_cur *cur,
@@ -493,7 +432,6 @@ const struct xfs_btree_ops xfs_bnobt_ops = {
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
- .update_lastrec = xfs_allocbt_update_lastrec,
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
@@ -511,7 +449,6 @@ const struct xfs_btree_ops xfs_bnobt_ops = {
const struct xfs_btree_ops xfs_cntbt_ops = {
.name = "cnt",
.type = XFS_BTREE_TYPE_AG,
- .geom_flags = XFS_BTGEO_LASTREC_UPDATE,
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -525,7 +462,6 @@ const struct xfs_btree_ops xfs_cntbt_ops = {
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
- .update_lastrec = xfs_allocbt_update_lastrec,
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6af6f744fdd6..7df74c35d9f9 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -39,6 +39,7 @@
#include "xfs_health.h"
#include "xfs_bmap_item.h"
#include "xfs_symlink_remote.h"
+#include "xfs_inode_util.h"
struct kmem_cache *xfs_bmap_intent_cache;
@@ -604,7 +605,7 @@ xfs_bmap_btree_to_extents(
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
return error;
@@ -5380,11 +5381,15 @@ xfs_bmap_del_extent_real(
error = xfs_rtfree_blocks(tp, del->br_startblock,
del->br_blockcount);
} else {
+ unsigned int efi_flags = 0;
+
+ if ((bflags & XFS_BMAPI_NODISCARD) ||
+ del->br_state == XFS_EXT_UNWRITTEN)
+ efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD;
+
error = xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
- XFS_AG_RESV_NONE,
- ((bflags & XFS_BMAPI_NODISCARD) ||
- del->br_state == XFS_EXT_UNWRITTEN));
+ XFS_AG_RESV_NONE, efi_flags);
}
if (error)
return error;
@@ -6454,3 +6459,45 @@ xfs_bmap_query_all(
return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
}
+
+/* Helper function to extract extent size hint from inode */
+xfs_extlen_t
+xfs_get_extsz_hint(
+ struct xfs_inode *ip)
+{
+ /*
+ * No point in aligning allocations if we need to COW to actually
+ * write to them.
+ */
+ if (xfs_is_always_cow_inode(ip))
+ return 0;
+ if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
+ return ip->i_extsize;
+ if (XFS_IS_REALTIME_INODE(ip) &&
+ ip->i_mount->m_sb.sb_rextsize > 1)
+ return ip->i_mount->m_sb.sb_rextsize;
+ return 0;
+}
+
+/*
+ * Helper function to extract CoW extent size hint from inode.
+ * Between the extent size hint and the CoW extent size hint, we
+ * return the greater of the two. If the value is zero (automatic),
+ * use the default size.
+ */
+xfs_extlen_t
+xfs_get_cowextsz_hint(
+ struct xfs_inode *ip)
+{
+ xfs_extlen_t a, b;
+
+ a = 0;
+ if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
+ a = ip->i_cowextsize;
+ b = xfs_get_extsz_hint(ip);
+
+ a = max(a, b);
+ if (a == 0)
+ return XFS_DEFAULT_COWEXTSZ_HINT;
+ return a;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 667b0c2b33d1..7592d46e97c6 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -296,4 +296,7 @@ typedef int (*xfs_bmap_query_range_fn)(
int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn,
void *priv);
+xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
+xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
+
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index f5d84dcb58da..d1b06ccde19e 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -282,7 +282,7 @@ xfs_bmbt_free_block(
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index d29547572a68..a5c4af148853 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1331,30 +1331,6 @@ xfs_btree_init_block_cur(
xfs_btree_owner(cur));
}
-/*
- * Return true if ptr is the last record in the btree and
- * we need to track updates to this record. The decision
- * will be further refined in the update_lastrec method.
- */
-STATIC int
-xfs_btree_is_lastrec(
- struct xfs_btree_cur *cur,
- struct xfs_btree_block *block,
- int level)
-{
- union xfs_btree_ptr ptr;
-
- if (level > 0)
- return 0;
- if (!(cur->bc_ops->geom_flags & XFS_BTGEO_LASTREC_UPDATE))
- return 0;
-
- xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
- if (!xfs_btree_ptr_is_null(cur, &ptr))
- return 0;
- return 1;
-}
-
STATIC void
xfs_btree_buf_to_ptr(
struct xfs_btree_cur *cur,
@@ -2420,15 +2396,6 @@ xfs_btree_update(
xfs_btree_copy_recs(cur, rp, rec, 1);
xfs_btree_log_recs(cur, bp, ptr, ptr);
- /*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, 0)) {
- cur->bc_ops->update_lastrec(cur, block, rec,
- ptr, LASTREC_UPDATE);
- }
-
/* Pass new key value up to our parent. */
if (xfs_btree_needs_key_update(cur, ptr)) {
error = xfs_btree_update_keys(cur, 0);
@@ -3618,15 +3585,6 @@ xfs_btree_insrec(
}
/*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, level)) {
- cur->bc_ops->update_lastrec(cur, block, rec,
- ptr, LASTREC_INSREC);
- }
-
- /*
* Return the new block number, if any.
* If there is one, give back a record value and a cursor too.
*/
@@ -3984,15 +3942,6 @@ xfs_btree_delrec(
xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
/*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, level)) {
- cur->bc_ops->update_lastrec(cur, block, NULL,
- ptr, LASTREC_DELREC);
- }
-
- /*
* We're at the root level. First, shrink the root block in-memory.
* Try to get rid of the next level down. If we can't then there's
* nothing left to do.
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index f93374278aa1..10b7ddc3b2b3 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -154,12 +154,6 @@ struct xfs_btree_ops {
int *stat);
int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
- /* update last record information */
- void (*update_lastrec)(struct xfs_btree_cur *cur,
- const struct xfs_btree_block *block,
- const union xfs_btree_rec *rec,
- int ptr, int reason);
-
/* records in block/level */
int (*get_minrecs)(struct xfs_btree_cur *cur, int level);
int (*get_maxrecs)(struct xfs_btree_cur *cur, int level);
@@ -222,15 +216,7 @@ struct xfs_btree_ops {
};
/* btree geometry flags */
-#define XFS_BTGEO_LASTREC_UPDATE (1U << 0) /* track last rec externally */
-#define XFS_BTGEO_OVERLAPPING (1U << 1) /* overlapping intervals */
-
-/*
- * Reasons for the update_lastrec method to be called.
- */
-#define LASTREC_UPDATE 0
-#define LASTREC_INSREC 1
-#define LASTREC_DELREC 2
+#define XFS_BTGEO_OVERLAPPING (1U << 0) /* overlapping intervals */
union xfs_btree_irec {
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 4a078e07e1a0..40021849b42f 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -12,12 +12,14 @@
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_log.h"
+#include "xfs_log_priv.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
#include "xfs_bmap.h"
@@ -556,7 +558,7 @@ xfs_defer_relog(
* the log threshold once per call.
*/
if (threshold_lsn == NULLCOMMITLSN) {
- threshold_lsn = xlog_grant_push_threshold(log, 0);
+ threshold_lsn = xfs_ail_get_push_target(log->l_ailp);
if (threshold_lsn == NULLCOMMITLSN)
break;
}
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 457f9a38f850..202468223bf9 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -19,6 +19,11 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_health.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_parent.h"
+#include "xfs_ag.h"
+#include "xfs_ialloc.h"
const struct xfs_name xfs_name_dotdot = {
.name = (const unsigned char *)"..",
@@ -584,9 +589,9 @@ xfs_dir_replace(
*/
int
xfs_dir_canenter(
- xfs_trans_t *tp,
- xfs_inode_t *dp,
- struct xfs_name *name) /* name of entry to add */
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ const struct xfs_name *name) /* name of entry to add */
{
return xfs_dir_createname(tp, dp, name, 0, 0);
}
@@ -756,3 +761,653 @@ xfs_dir2_compname(
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
+
+#ifdef CONFIG_XFS_LIVE_HOOKS
+/*
+ * Use a static key here to reduce the overhead of directory live update hooks.
+ * If the compiler supports jump labels, the static branch will be replaced by
+ * a nop sled when there are no hook users. Online fsck is currently the only
+ * caller, so this is a reasonable tradeoff.
+ *
+ * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
+ * parts of the kernel allocate memory with that lock held, which means that
+ * XFS callers cannot hold any locks that might be used by memory reclaim or
+ * writeback when calling the static_branch_{inc,dec} functions.
+ */
+DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch);
+
+void
+xfs_dir_hook_disable(void)
+{
+ xfs_hooks_switch_off(&xfs_dir_hooks_switch);
+}
+
+void
+xfs_dir_hook_enable(void)
+{
+ xfs_hooks_switch_on(&xfs_dir_hooks_switch);
+}
+
+/* Call hooks for a directory update relating to a child dirent update. */
+inline void
+xfs_dir_update_hook(
+ struct xfs_inode *dp,
+ struct xfs_inode *ip,
+ int delta,
+ const struct xfs_name *name)
+{
+ if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) {
+ struct xfs_dir_update_params p = {
+ .dp = dp,
+ .ip = ip,
+ .delta = delta,
+ .name = name,
+ };
+ struct xfs_mount *mp = ip->i_mount;
+
+ xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p);
+ }
+}
+
+/* Call the specified function during a directory update. */
+int
+xfs_dir_hook_add(
+ struct xfs_mount *mp,
+ struct xfs_dir_hook *hook)
+{
+ return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook);
+}
+
+/* Stop calling the specified function during a directory update. */
+void
+xfs_dir_hook_del(
+ struct xfs_mount *mp,
+ struct xfs_dir_hook *hook)
+{
+ xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook);
+}
+
+/* Configure directory update hook functions. */
+void
+xfs_dir_hook_setup(
+ struct xfs_dir_hook *hook,
+ notifier_fn_t mod_fn)
+{
+ xfs_hook_setup(&hook->dirent_hook, mod_fn);
+}
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
+/*
+ * Given a directory @dp, a newly allocated inode @ip, and a @name, link @ip
+ * into @dp under the given @name. If @ip is a directory, it will be
+ * initialized. Both inodes must have the ILOCK held and the transaction must
+ * have sufficient blocks reserved.
+ */
+int
+xfs_dir_create_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+
+ error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks);
+ if (error) {
+ ASSERT(error != -ENOSPC);
+ return error;
+ }
+
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ error = xfs_dir_init(tp, ip, dp);
+ if (error)
+ return error;
+
+ xfs_bumplink(tp, dp);
+ }
+
+ /*
+ * If we have parent pointers, we need to add the attribute containing
+ * the parent information now.
+ */
+ if (du->ppargs) {
+ error = xfs_parent_addname(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, 1, name);
+ return 0;
+}
+
+/*
+ * Given a directory @dp, an existing non-directory inode @ip, and a @name,
+ * link @ip into @dp under the given @name. Both inodes must have the ILOCK
+ * held.
+ */
+int
+xfs_dir_add_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ struct xfs_mount *mp = tp->t_mountp;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+ ASSERT(!S_ISDIR(VFS_I(ip)->i_mode));
+
+ if (!resblks) {
+ error = xfs_dir_canenter(tp, dp, name);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Handle initial link state of O_TMPFILE inode
+ */
+ if (VFS_I(ip)->i_nlink == 0) {
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ error = xfs_iunlink_remove(tp, pag, ip);
+ xfs_perag_put(pag);
+ if (error)
+ return error;
+ }
+
+ error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+
+ xfs_bumplink(tp, ip);
+
+ /*
+ * If we have parent pointers, we now need to add the parent record to
+ * the attribute fork of the inode. If this is the initial parent
+ * attribute, we need to create it correctly, otherwise we can just add
+ * the parent to the inode.
+ */
+ if (du->ppargs) {
+ error = xfs_parent_addname(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, 1, name);
+ return 0;
+}
+
+/*
+ * Given a directory @dp, a child @ip, and a @name, remove the (@name, @ip)
+ * entry from the directory. Both inodes must have the ILOCK held.
+ */
+int
+xfs_dir_remove_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+
+ /*
+ * If we're removing a directory perform some additional validation.
+ */
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ ASSERT(VFS_I(ip)->i_nlink >= 2);
+ if (VFS_I(ip)->i_nlink != 2)
+ return -ENOTEMPTY;
+ if (!xfs_dir_isempty(ip))
+ return -ENOTEMPTY;
+
+ /* Drop the link from ip's "..". */
+ error = xfs_droplink(tp, dp);
+ if (error)
+ return error;
+
+ /* Drop the "." link from ip to self. */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ return error;
+
+ /*
+ * Point the unlinked child directory's ".." entry to the root
+ * directory to eliminate back-references to inodes that may
+ * get freed before the child directory is closed. If the fs
+ * gets shrunk, this can lead to dirent inode validation errors.
+ */
+ if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
+ error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
+ tp->t_mountp->m_sb.sb_rootino, 0);
+ if (error)
+ return error;
+ }
+ } else {
+ /*
+ * When removing a non-directory we need to log the parent
+ * inode here. For a directory this is done implicitly
+ * by the xfs_droplink call for the ".." entry.
+ */
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+ }
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ /* Drop the link from dp to ip. */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ return error;
+
+ error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
+ if (error) {
+ ASSERT(error != -ENOENT);
+ return error;
+ }
+
+ /* Remove parent pointer. */
+ if (du->ppargs) {
+ error = xfs_parent_removename(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, -1, name);
+ return 0;
+}
+
+/*
+ * Exchange the entry (@name1, @ip1) in directory @dp1 with the entry (@name2,
+ * @ip2) in directory @dp2, and update '..' @ip1 and @ip2's entries as needed.
+ * @ip1 and @ip2 need not be of the same type.
+ *
+ * All inodes must have the ILOCK held, and both entries must already exist.
+ */
+int
+xfs_dir_exchange_children(
+ struct xfs_trans *tp,
+ struct xfs_dir_update *du1,
+ struct xfs_dir_update *du2,
+ unsigned int spaceres)
+{
+ struct xfs_inode *dp1 = du1->dp;
+ const struct xfs_name *name1 = du1->name;
+ struct xfs_inode *ip1 = du1->ip;
+ struct xfs_inode *dp2 = du2->dp;
+ const struct xfs_name *name2 = du2->name;
+ struct xfs_inode *ip2 = du2->ip;
+ int ip1_flags = 0;
+ int ip2_flags = 0;
+ int dp2_flags = 0;
+ int error;
+
+ /* Swap inode number for dirent in first parent */
+ error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* Swap inode number for dirent in second parent */
+ error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /*
+ * If we're renaming one or more directories across different parents,
+ * update the respective ".." entries (and link counts) to match the new
+ * parents.
+ */
+ if (dp1 != dp2) {
+ dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+
+ if (S_ISDIR(VFS_I(ip2)->i_mode)) {
+ error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
+ dp1->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* transfer ip2 ".." reference to dp1 */
+ if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
+ error = xfs_droplink(tp, dp2);
+ if (error)
+ return error;
+ xfs_bumplink(tp, dp1);
+ }
+
+ /*
+ * Although ip1 isn't changed here, userspace needs
+ * to be warned about the change, so that applications
+ * relying on it (like backup ones), will properly
+ * notify the change
+ */
+ ip1_flags |= XFS_ICHGTIME_CHG;
+ ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+ }
+
+ if (S_ISDIR(VFS_I(ip1)->i_mode)) {
+ error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
+ dp2->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* transfer ip1 ".." reference to dp2 */
+ if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
+ error = xfs_droplink(tp, dp1);
+ if (error)
+ return error;
+ xfs_bumplink(tp, dp2);
+ }
+
+ /*
+ * Although ip2 isn't changed here, userspace needs
+ * to be warned about the change, so that applications
+ * relying on it (like backup ones), will properly
+ * notify the change
+ */
+ ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+ ip2_flags |= XFS_ICHGTIME_CHG;
+ }
+ }
+
+ if (ip1_flags) {
+ xfs_trans_ichgtime(tp, ip1, ip1_flags);
+ xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
+ }
+ if (ip2_flags) {
+ xfs_trans_ichgtime(tp, ip2, ip2_flags);
+ xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
+ }
+ if (dp2_flags) {
+ xfs_trans_ichgtime(tp, dp2, dp2_flags);
+ xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
+ }
+ xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
+
+ /* Schedule parent pointer replacements */
+ if (du1->ppargs) {
+ error = xfs_parent_replacename(tp, du1->ppargs, dp1, name1,
+ dp2, name2, ip1);
+ if (error)
+ return error;
+ }
+
+ if (du2->ppargs) {
+ error = xfs_parent_replacename(tp, du2->ppargs, dp2, name2,
+ dp1, name1, ip2);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Inform our hook clients that we've finished an exchange operation as
+ * follows: removed the source and target files from their directories;
+ * added the target to the source directory; and added the source to
+ * the target directory. All inodes are locked, so it's ok to model a
+ * rename this way so long as we say we deleted entries before we add
+ * new ones.
+ */
+ xfs_dir_update_hook(dp1, ip1, -1, name1);
+ xfs_dir_update_hook(dp2, ip2, -1, name2);
+ xfs_dir_update_hook(dp1, ip2, 1, name1);
+ xfs_dir_update_hook(dp2, ip1, 1, name2);
+ return 0;
+}
+
+/*
+ * Given an entry (@src_name, @src_ip) in directory @src_dp, make the entry
+ * @target_name in directory @target_dp point to @src_ip and remove the
+ * original entry, cleaning up everything left behind.
+ *
+ * Cleanup involves dropping a link count on @target_ip, and either removing
+ * the (@src_name, @src_ip) entry from @src_dp or simply replacing the entry
+ * with (@src_name, @wip) if a whiteout inode @wip is supplied.
+ *
+ * All inodes must have the ILOCK held. We assume that if @src_ip is a
+ * directory then its '..' doesn't already point to @target_dp, and that @wip
+ * is a freshly allocated whiteout.
+ */
+int
+xfs_dir_rename_children(
+ struct xfs_trans *tp,
+ struct xfs_dir_update *du_src,
+ struct xfs_dir_update *du_tgt,
+ unsigned int spaceres,
+ struct xfs_dir_update *du_wip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *src_dp = du_src->dp;
+ const struct xfs_name *src_name = du_src->name;
+ struct xfs_inode *src_ip = du_src->ip;
+ struct xfs_inode *target_dp = du_tgt->dp;
+ const struct xfs_name *target_name = du_tgt->name;
+ struct xfs_inode *target_ip = du_tgt->ip;
+ bool new_parent = (src_dp != target_dp);
+ bool src_is_directory;
+ int error;
+
+ src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
+
+ /*
+ * Check for expected errors before we dirty the transaction
+ * so we can return an error without a transaction abort.
+ */
+ if (target_ip == NULL) {
+ /*
+ * If there's no space reservation, check the entry will
+ * fit before actually inserting it.
+ */
+ if (!spaceres) {
+ error = xfs_dir_canenter(tp, target_dp, target_name);
+ if (error)
+ return error;
+ }
+ } else {
+ /*
+ * If target exists and it's a directory, check that whether
+ * it can be destroyed.
+ */
+ if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
+ (!xfs_dir_isempty(target_ip) ||
+ (VFS_I(target_ip)->i_nlink > 2)))
+ return -EEXIST;
+ }
+
+ /*
+ * Directory entry creation below may acquire the AGF. Remove
+ * the whiteout from the unlinked list first to preserve correct
+ * AGI/AGF locking order. This dirties the transaction so failures
+ * after this point will abort and log recovery will clean up the
+ * mess.
+ *
+ * For whiteouts, we need to bump the link count on the whiteout
+ * inode. After this point, we have a real link, clear the tmpfile
+ * state flag from the inode so it doesn't accidentally get misused
+ * in future.
+ */
+ if (du_wip->ip) {
+ struct xfs_perag *pag;
+
+ ASSERT(VFS_I(du_wip->ip)->i_nlink == 0);
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, du_wip->ip->i_ino));
+ error = xfs_iunlink_remove(tp, pag, du_wip->ip);
+ xfs_perag_put(pag);
+ if (error)
+ return error;
+
+ xfs_bumplink(tp, du_wip->ip);
+ }
+
+ /*
+ * Set up the target.
+ */
+ if (target_ip == NULL) {
+ /*
+ * If target does not exist and the rename crosses
+ * directories, adjust the target directory link count
+ * to account for the ".." reference from the new entry.
+ */
+ error = xfs_dir_createname(tp, target_dp, target_name,
+ src_ip->i_ino, spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, target_dp,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ if (new_parent && src_is_directory) {
+ xfs_bumplink(tp, target_dp);
+ }
+ } else { /* target_ip != NULL */
+ /*
+ * Link the source inode under the target name.
+ * If the source inode is a directory and we are moving
+ * it across directories, its ".." entry will be
+ * inconsistent until we replace that down below.
+ *
+ * In case there is already an entry with the same
+ * name at the destination directory, remove it first.
+ */
+ error = xfs_dir_replace(tp, target_dp, target_name,
+ src_ip->i_ino, spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, target_dp,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ /*
+ * Decrement the link count on the target since the target
+ * dir no longer points to it.
+ */
+ error = xfs_droplink(tp, target_ip);
+ if (error)
+ return error;
+
+ if (src_is_directory) {
+ /*
+ * Drop the link from the old "." entry.
+ */
+ error = xfs_droplink(tp, target_ip);
+ if (error)
+ return error;
+ }
+ } /* target_ip != NULL */
+
+ /*
+ * Remove the source.
+ */
+ if (new_parent && src_is_directory) {
+ /*
+ * Rewrite the ".." entry to point to the new
+ * directory.
+ */
+ error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
+ target_dp->i_ino, spaceres);
+ ASSERT(error != -EEXIST);
+ if (error)
+ return error;
+ }
+
+ /*
+ * We always want to hit the ctime on the source inode.
+ *
+ * This isn't strictly required by the standards since the source
+ * inode isn't really being changed, but old unix file systems did
+ * it and some incremental backup programs won't work without it.
+ */
+ xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
+
+ /*
+ * Adjust the link count on src_dp. This is necessary when
+ * renaming a directory, either within one parent when
+ * the target existed, or across two parent directories.
+ */
+ if (src_is_directory && (new_parent || target_ip != NULL)) {
+
+ /*
+ * Decrement link count on src_directory since the
+ * entry that's moved no longer points to it.
+ */
+ error = xfs_droplink(tp, src_dp);
+ if (error)
+ return error;
+ }
+
+ /*
+ * For whiteouts, we only need to update the source dirent with the
+ * inode number of the whiteout inode rather than removing it
+ * altogether.
+ */
+ if (du_wip->ip)
+ error = xfs_dir_replace(tp, src_dp, src_name, du_wip->ip->i_ino,
+ spaceres);
+ else
+ error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+ spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
+ if (new_parent)
+ xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
+
+ /* Schedule parent pointer updates. */
+ if (du_wip->ppargs) {
+ error = xfs_parent_addname(tp, du_wip->ppargs, src_dp,
+ src_name, du_wip->ip);
+ if (error)
+ return error;
+ }
+
+ if (du_src->ppargs) {
+ error = xfs_parent_replacename(tp, du_src->ppargs, src_dp,
+ src_name, target_dp, target_name, src_ip);
+ if (error)
+ return error;
+ }
+
+ if (du_tgt->ppargs) {
+ error = xfs_parent_removename(tp, du_tgt->ppargs, target_dp,
+ target_name, target_ip);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Inform our hook clients that we've finished a rename operation as
+ * follows: removed the source and target files from their directories;
+ * that we've added the source to the target directory; and finally
+ * that we've added the whiteout, if there was one. All inodes are
+ * locked, so it's ok to model a rename this way so long as we say we
+ * deleted entries before we add new ones.
+ */
+ if (target_ip)
+ xfs_dir_update_hook(target_dp, target_ip, -1, target_name);
+ xfs_dir_update_hook(src_dp, src_ip, -1, src_name);
+ xfs_dir_update_hook(target_dp, src_ip, 1, target_name);
+ if (du_wip->ip)
+ xfs_dir_update_hook(src_dp, du_wip->ip, 1, src_name);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 6dbe6e9ecb49..576068ed81fa 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -74,7 +74,7 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t inum,
xfs_extlen_t tot);
extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_name *name);
+ const struct xfs_name *name);
int xfs_dir_lookup_args(struct xfs_da_args *args);
int xfs_dir_createname_args(struct xfs_da_args *args);
@@ -309,4 +309,51 @@ static inline unsigned char xfs_ascii_ci_xfrm(unsigned char c)
return c;
}
+struct xfs_dir_update_params {
+ const struct xfs_inode *dp;
+ const struct xfs_inode *ip;
+ const struct xfs_name *name;
+ int delta;
+};
+
+#ifdef CONFIG_XFS_LIVE_HOOKS
+void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip,
+ int delta, const struct xfs_name *name);
+
+struct xfs_dir_hook {
+ struct xfs_hook dirent_hook;
+};
+
+void xfs_dir_hook_disable(void);
+void xfs_dir_hook_enable(void);
+
+int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook);
+void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook);
+void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn);
+#else
+# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0)
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
+struct xfs_parent_args;
+
+struct xfs_dir_update {
+ struct xfs_inode *dp;
+ const struct xfs_name *name;
+ struct xfs_inode *ip;
+ struct xfs_parent_args *ppargs;
+};
+
+int xfs_dir_create_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+int xfs_dir_add_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+int xfs_dir_remove_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+
+int xfs_dir_exchange_children(struct xfs_trans *tp, struct xfs_dir_update *du1,
+ struct xfs_dir_update *du2, unsigned int spaceres);
+int xfs_dir_rename_children(struct xfs_trans *tp, struct xfs_dir_update *du_src,
+ struct xfs_dir_update *du_tgt, unsigned int spaceres,
+ struct xfs_dir_update *du_wip);
+
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index ea0b9628df18..a16b05c43e2e 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -178,6 +178,14 @@ __xfs_dir3_data_check(
while (offset < end) {
struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
+ unsigned int reclen;
+
+ /*
+ * Are the remaining bytes large enough to hold an
+ * unused entry?
+ */
+ if (offset > end - xfs_dir2_data_unusedsize(1))
+ return __this_address;
/*
* If it's unused, look for the space in the bestfree table.
@@ -187,9 +195,13 @@ __xfs_dir3_data_check(
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
xfs_failaddr_t fa;
+ reclen = xfs_dir2_data_unusedsize(
+ be16_to_cpu(dup->length));
if (lastfree != 0)
return __this_address;
- if (offset + be16_to_cpu(dup->length) > end)
+ if (be16_to_cpu(dup->length) != reclen)
+ return __this_address;
+ if (offset + reclen > end)
return __this_address;
if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
offset)
@@ -207,10 +219,18 @@ __xfs_dir3_data_check(
be16_to_cpu(bf[2].length))
return __this_address;
}
- offset += be16_to_cpu(dup->length);
+ offset += reclen;
lastfree = 1;
continue;
}
+
+ /*
+ * This is not an unused entry. Are the remaining bytes
+ * large enough for a dirent with a single-byte name?
+ */
+ if (offset > end - xfs_dir2_data_entsize(mp, 1))
+ return __this_address;
+
/*
* It's a real entry. Validate the fields.
* If this is a block directory then make sure it's
@@ -219,9 +239,10 @@ __xfs_dir3_data_check(
*/
if (dep->namelen == 0)
return __this_address;
- if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber)))
+ reclen = xfs_dir2_data_entsize(mp, dep->namelen);
+ if (offset + reclen > end)
return __this_address;
- if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end)
+ if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber)))
return __this_address;
if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset)
return __this_address;
@@ -245,7 +266,7 @@ __xfs_dir3_data_check(
if (i >= be32_to_cpu(btp->count))
return __this_address;
}
- offset += xfs_dir2_data_entsize(mp, dep->namelen);
+ offset += reclen;
}
/*
* Need to have seen all the entries and all the bestfree slots.
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 3befb32509fa..10041350274a 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -190,6 +190,13 @@ extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
struct dir_context *ctx, size_t bufsize);
static inline unsigned int
+xfs_dir2_data_unusedsize(
+ unsigned int len)
+{
+ return round_up(len, XFS_DIR2_DATA_ALIGN);
+}
+
+static inline unsigned int
xfs_dir2_data_entsize(
struct xfs_mount *mp,
unsigned int namelen)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 61f51becff4f..e1bfee0c3b1a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -90,8 +90,7 @@ struct xfs_ifork;
#define XFSLABEL_MAX 12
/*
- * Superblock - in core version. Must match the ondisk version below.
- * Must be padded to 64 bit alignment.
+ * Superblock - in core version. Must be padded to 64 bit alignment.
*/
typedef struct xfs_sb {
uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
@@ -178,10 +177,8 @@ typedef struct xfs_sb {
/* must be padded to 64 bit alignment */
} xfs_sb_t;
-#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc)
-
/*
- * Superblock - on disk version. Must match the in core version above.
+ * Superblock - on disk version.
* Must be padded to 64 bit alignment.
*/
struct xfs_dsb {
@@ -265,6 +262,8 @@ struct xfs_dsb {
/* must be padded to 64 bit alignment */
};
+#define XFS_SB_CRC_OFF offsetof(struct xfs_dsb, sb_crc)
+
/*
* Misc. Flags - warning - these will be cleared by xfs_repair unless
* a feature bit is set when the flag is used.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 14c81f227c5b..0af5b7a33d05 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1946,6 +1946,21 @@ retry:
}
return -ENOSPC;
}
+
+ /*
+ * Protect against obviously corrupt allocation btree records. Later
+ * xfs_iget checks will catch re-allocation of other active in-memory
+ * and on-disk inodes. If we don't catch reallocating the parent inode
+ * here we will deadlock in xfs_iget() so we have to do these checks
+ * first.
+ */
+ if (ino == parent || !xfs_verify_dir_ino(mp, ino)) {
+ xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
+ xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino),
+ XFS_SICK_AG_INOBT);
+ return -EFSCORRUPTED;
+ }
+
*new_ino = ino;
return 0;
}
@@ -1975,7 +1990,7 @@ xfs_difree_inode_chunk(
return xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, sagbno),
M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@@ -2021,8 +2036,7 @@ xfs_difree_inode_chunk(
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
error = xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
- &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE,
- false);
+ &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, 0);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 42e9fd47f6c7..496e2f72a85b 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -170,7 +170,7 @@ __xfs_inobt_free_block(
xfs_inobt_mod_blockcount(cur, -1);
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
- &XFS_RMAP_OINFO_INOBT, resv, false);
+ &XFS_RMAP_OINFO_INOBT, resv, 0);
}
STATIC int
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
new file mode 100644
index 000000000000..032333289113
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -0,0 +1,749 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include <linux/iversion.h>
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_inode_util.h"
+#include "xfs_trans.h"
+#include "xfs_ialloc.h"
+#include "xfs_health.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_ag.h"
+#include "xfs_iunlink_item.h"
+#include "xfs_inode_item.h"
+
+uint16_t
+xfs_flags2diflags(
+ struct xfs_inode *ip,
+ unsigned int xflags)
+{
+ /* can't set PREALLOC this way, just preserve it */
+ uint16_t di_flags =
+ (ip->i_diflags & XFS_DIFLAG_PREALLOC);
+
+ if (xflags & FS_XFLAG_IMMUTABLE)
+ di_flags |= XFS_DIFLAG_IMMUTABLE;
+ if (xflags & FS_XFLAG_APPEND)
+ di_flags |= XFS_DIFLAG_APPEND;
+ if (xflags & FS_XFLAG_SYNC)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if (xflags & FS_XFLAG_NOATIME)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if (xflags & FS_XFLAG_NODUMP)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if (xflags & FS_XFLAG_NODEFRAG)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (xflags & FS_XFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ if (xflags & FS_XFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (xflags & FS_XFLAG_NOSYMLINKS)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if (xflags & FS_XFLAG_EXTSZINHERIT)
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ if (xflags & FS_XFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ } else if (S_ISREG(VFS_I(ip)->i_mode)) {
+ if (xflags & FS_XFLAG_REALTIME)
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (xflags & FS_XFLAG_EXTSIZE)
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ }
+
+ return di_flags;
+}
+
+uint64_t
+xfs_flags2diflags2(
+ struct xfs_inode *ip,
+ unsigned int xflags)
+{
+ uint64_t di_flags2 =
+ (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
+ XFS_DIFLAG2_BIGTIME |
+ XFS_DIFLAG2_NREXT64));
+
+ if (xflags & FS_XFLAG_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+ if (xflags & FS_XFLAG_COWEXTSIZE)
+ di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+
+ return di_flags2;
+}
+
+uint32_t
+xfs_ip2xflags(
+ struct xfs_inode *ip)
+{
+ uint32_t flags = 0;
+
+ if (ip->i_diflags & XFS_DIFLAG_ANY) {
+ if (ip->i_diflags & XFS_DIFLAG_REALTIME)
+ flags |= FS_XFLAG_REALTIME;
+ if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
+ flags |= FS_XFLAG_PREALLOC;
+ if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
+ flags |= FS_XFLAG_IMMUTABLE;
+ if (ip->i_diflags & XFS_DIFLAG_APPEND)
+ flags |= FS_XFLAG_APPEND;
+ if (ip->i_diflags & XFS_DIFLAG_SYNC)
+ flags |= FS_XFLAG_SYNC;
+ if (ip->i_diflags & XFS_DIFLAG_NOATIME)
+ flags |= FS_XFLAG_NOATIME;
+ if (ip->i_diflags & XFS_DIFLAG_NODUMP)
+ flags |= FS_XFLAG_NODUMP;
+ if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
+ flags |= FS_XFLAG_RTINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ flags |= FS_XFLAG_PROJINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
+ flags |= FS_XFLAG_NOSYMLINKS;
+ if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
+ flags |= FS_XFLAG_EXTSIZE;
+ if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
+ flags |= FS_XFLAG_EXTSZINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
+ flags |= FS_XFLAG_NODEFRAG;
+ if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
+ flags |= FS_XFLAG_FILESTREAM;
+ }
+
+ if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
+ if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
+ flags |= FS_XFLAG_DAX;
+ if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
+ flags |= FS_XFLAG_COWEXTSIZE;
+ }
+
+ if (xfs_inode_has_attr_fork(ip))
+ flags |= FS_XFLAG_HASATTR;
+ return flags;
+}
+
+prid_t
+xfs_get_initial_prid(struct xfs_inode *dp)
+{
+ if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ return dp->i_projid;
+
+ /* Assign to the root project by default. */
+ return 0;
+}
+
+/* Propagate di_flags from a parent inode to a child inode. */
+static inline void
+xfs_inode_inherit_flags(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ unsigned int di_flags = 0;
+ xfs_failaddr_t failaddr;
+ umode_t mode = VFS_I(ip)->i_mode;
+
+ if (S_ISDIR(mode)) {
+ if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ ip->i_extsize = pip->i_extsize;
+ }
+ if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ } else if (S_ISREG(mode)) {
+ if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ xfs_has_realtime(ip->i_mount))
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ ip->i_extsize = pip->i_extsize;
+ }
+ }
+ if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
+ xfs_inherit_noatime)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
+ xfs_inherit_nodump)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
+ xfs_inherit_sync)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
+ xfs_inherit_nosymlinks)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
+ xfs_inherit_nodefrag)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+
+ ip->i_diflags |= di_flags;
+
+ /*
+ * Inode verifiers on older kernels only check that the extent size
+ * hint is an integer multiple of the rt extent size on realtime files.
+ * They did not check the hint alignment on a directory with both
+ * rtinherit and extszinherit flags set. If the misaligned hint is
+ * propagated from a directory into a new realtime file, new file
+ * allocations will fail due to math errors in the rt allocator and/or
+ * trip the verifiers. Validate the hint settings in the new file so
+ * that we don't let broken hints propagate.
+ */
+ failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
+ VFS_I(ip)->i_mode, ip->i_diflags);
+ if (failaddr) {
+ ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ ip->i_extsize = 0;
+ }
+}
+
+/* Propagate di_flags2 from a parent inode to a child inode. */
+static inline void
+xfs_inode_inherit_flags2(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ xfs_failaddr_t failaddr;
+
+ if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
+ ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_cowextsize = pip->i_cowextsize;
+ }
+ if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
+ ip->i_diflags2 |= XFS_DIFLAG2_DAX;
+
+ /* Don't let invalid cowextsize hints propagate. */
+ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
+ VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
+ if (failaddr) {
+ ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_cowextsize = 0;
+ }
+}
+
+/*
+ * If we need to create attributes immediately after allocating the inode,
+ * initialise an empty attribute fork right now. We use the default fork offset
+ * for attributes here as we don't know exactly what size or how many
+ * attributes we might be adding. We can do this safely here because we know
+ * the data fork is completely empty and this saves us from needing to run a
+ * separate transaction to set the fork offset in the immediate future.
+ *
+ * If we have parent pointers and the caller hasn't told us that the file will
+ * never be linked into a directory tree, we /must/ create the attr fork.
+ */
+static inline bool
+xfs_icreate_want_attrfork(
+ struct xfs_mount *mp,
+ const struct xfs_icreate_args *args)
+{
+ if (args->flags & XFS_ICREATE_INIT_XATTRS)
+ return true;
+
+ if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp))
+ return true;
+
+ return false;
+}
+
+/* Initialise an inode's attributes. */
+void
+xfs_inode_init(
+ struct xfs_trans *tp,
+ const struct xfs_icreate_args *args,
+ struct xfs_inode *ip)
+{
+ struct xfs_inode *pip = args->pip;
+ struct inode *dir = pip ? VFS_I(pip) : NULL;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct inode *inode = VFS_I(ip);
+ unsigned int flags;
+ int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG |
+ XFS_ICHGTIME_ACCESS;
+
+ if (args->flags & XFS_ICREATE_TMPFILE)
+ set_nlink(inode, 0);
+ else if (S_ISDIR(args->mode))
+ set_nlink(inode, 2);
+ else
+ set_nlink(inode, 1);
+ inode->i_rdev = args->rdev;
+
+ if (!args->idmap || pip == NULL) {
+ /* creating a tree root, sb rooted, or detached file */
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ ip->i_projid = 0;
+ inode->i_mode = args->mode;
+ } else {
+ /* creating a child in the directory tree */
+ if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
+ inode_fsuid_set(inode, args->idmap);
+ inode->i_gid = dir->i_gid;
+ inode->i_mode = args->mode;
+ } else {
+ inode_init_owner(args->idmap, inode, dir, args->mode);
+ }
+
+ /*
+ * If the group ID of the new file does not match the effective
+ * group ID or one of the supplementary group IDs, the S_ISGID
+ * bit is cleared (and only if the irix_sgid_inherit
+ * compatibility variable is set).
+ */
+ if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
+ !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
+ inode->i_mode &= ~S_ISGID;
+
+ ip->i_projid = pip ? xfs_get_initial_prid(pip) : 0;
+ }
+
+ ip->i_disk_size = 0;
+ ip->i_df.if_nextents = 0;
+ ASSERT(ip->i_nblocks == 0);
+
+ ip->i_extsize = 0;
+ ip->i_diflags = 0;
+
+ if (xfs_has_v3inodes(mp)) {
+ inode_set_iversion(inode, 1);
+ ip->i_cowextsize = 0;
+ times |= XFS_ICHGTIME_CREATE;
+ }
+
+ xfs_trans_ichgtime(tp, ip, times);
+
+ flags = XFS_ILOG_CORE;
+ switch (args->mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFSOCK:
+ ip->i_df.if_format = XFS_DINODE_FMT_DEV;
+ flags |= XFS_ILOG_DEV;
+ break;
+ case S_IFREG:
+ case S_IFDIR:
+ if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
+ xfs_inode_inherit_flags(ip, pip);
+ if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
+ xfs_inode_inherit_flags2(ip, pip);
+ fallthrough;
+ case S_IFLNK:
+ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
+ ip->i_df.if_bytes = 0;
+ ip->i_df.if_data = NULL;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ if (xfs_icreate_want_attrfork(mp, args)) {
+ ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
+ xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
+
+ if (!xfs_has_attr(mp)) {
+ spin_lock(&mp->m_sb_lock);
+ xfs_add_attr(mp);
+ spin_unlock(&mp->m_sb_lock);
+ xfs_log_sb(tp);
+ }
+ }
+
+ xfs_trans_log_inode(tp, ip, flags);
+}
+
+/*
+ * In-Core Unlinked List Lookups
+ * =============================
+ *
+ * Every inode is supposed to be reachable from some other piece of metadata
+ * with the exception of the root directory. Inodes with a connection to a
+ * file descriptor but not linked from anywhere in the on-disk directory tree
+ * are collectively known as unlinked inodes, though the filesystem itself
+ * maintains links to these inodes so that on-disk metadata are consistent.
+ *
+ * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
+ * header contains a number of buckets that point to an inode, and each inode
+ * record has a pointer to the next inode in the hash chain. This
+ * singly-linked list causes scaling problems in the iunlink remove function
+ * because we must walk that list to find the inode that points to the inode
+ * being removed from the unlinked hash bucket list.
+ *
+ * Hence we keep an in-memory double linked list to link each inode on an
+ * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
+ * based lists would require having 64 list heads in the perag, one for each
+ * list. This is expensive in terms of memory (think millions of AGs) and cache
+ * misses on lookups. Instead, use the fact that inodes on the unlinked list
+ * must be referenced at the VFS level to keep them on the list and hence we
+ * have an existence guarantee for inodes on the unlinked list.
+ *
+ * Given we have an existence guarantee, we can use lockless inode cache lookups
+ * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
+ * for the double linked unlinked list, and we don't need any extra locking to
+ * keep the list safe as all manipulations are done under the AGI buffer lock.
+ * Keeping the list up to date does not require memory allocation, just finding
+ * the XFS inode and updating the next/prev unlinked list aginos.
+ */
+
+/*
+ * Update the prev pointer of the next agino. Returns -ENOLINK if the inode
+ * is not in cache.
+ */
+static int
+xfs_iunlink_update_backref(
+ struct xfs_perag *pag,
+ xfs_agino_t prev_agino,
+ xfs_agino_t next_agino)
+{
+ struct xfs_inode *ip;
+
+ /* No update necessary if we are at the end of the list. */
+ if (next_agino == NULLAGINO)
+ return 0;
+
+ ip = xfs_iunlink_lookup(pag, next_agino);
+ if (!ip)
+ return -ENOLINK;
+
+ ip->i_prev_unlinked = prev_agino;
+ return 0;
+}
+
+/*
+ * Point the AGI unlinked bucket at an inode and log the results. The caller
+ * is responsible for validating the old value.
+ */
+STATIC int
+xfs_iunlink_update_bucket(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ unsigned int bucket_index,
+ xfs_agino_t new_agino)
+{
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t old_value;
+ int offset;
+
+ ASSERT(xfs_verify_agino_or_null(pag, new_agino));
+
+ old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
+ old_value, new_agino);
+
+ /*
+ * We should never find the head of the list already set to the value
+ * passed in because either we're adding or removing ourselves from the
+ * head of the list.
+ */
+ if (old_value == new_agino) {
+ xfs_buf_mark_corrupt(agibp);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
+ offset = offsetof(struct xfs_agi, agi_unlinked) +
+ (sizeof(xfs_agino_t) * bucket_index);
+ xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
+ return 0;
+}
+
+static int
+xfs_iunlink_insert_inode(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t next_agino;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
+ int error;
+
+ /*
+ * Get the index into the agi hash table for the list this inode will
+ * go on. Make sure the pointer isn't garbage and that this inode
+ * isn't already on the list.
+ */
+ next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ if (next_agino == agino ||
+ !xfs_verify_agino_or_null(pag, next_agino)) {
+ xfs_buf_mark_corrupt(agibp);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Update the prev pointer in the next inode to point back to this
+ * inode.
+ */
+ error = xfs_iunlink_update_backref(pag, agino, next_agino);
+ if (error == -ENOLINK)
+ error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
+ if (error)
+ return error;
+
+ if (next_agino != NULLAGINO) {
+ /*
+ * There is already another inode in the bucket, so point this
+ * inode to the current head of the list.
+ */
+ error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
+ if (error)
+ return error;
+ ip->i_next_unlinked = next_agino;
+ }
+
+ /* Point the head of the list to point to this inode. */
+ ip->i_prev_unlinked = NULLAGINO;
+ return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
+}
+
+/*
+ * This is called when the inode's link count has gone to 0 or we are creating
+ * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
+ *
+ * We place the on-disk inode on a list in the AGI. It will be pulled from this
+ * list when the inode is freed.
+ */
+int
+xfs_iunlink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_perag *pag;
+ struct xfs_buf *agibp;
+ int error;
+
+ ASSERT(VFS_I(ip)->i_nlink == 0);
+ ASSERT(VFS_I(ip)->i_mode != 0);
+ trace_xfs_iunlink(ip);
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+
+ /* Get the agi buffer first. It ensures lock ordering on the list. */
+ error = xfs_read_agi(pag, tp, 0, &agibp);
+ if (error)
+ goto out;
+
+ error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
+out:
+ xfs_perag_put(pag);
+ return error;
+}
+
+static int
+xfs_iunlink_remove_inode(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ xfs_agino_t head_agino;
+ short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
+ int error;
+
+ trace_xfs_iunlink_remove(ip);
+
+ /*
+ * Get the index into the agi hash table for the list this inode will
+ * go on. Make sure the head pointer isn't garbage.
+ */
+ head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ if (!xfs_verify_agino(pag, head_agino)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ agi, sizeof(*agi));
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Set our inode's next_unlinked pointer to NULL and then return
+ * the old pointer value so that we can update whatever was previous
+ * to us in the list to point to whatever was next in the list.
+ */
+ error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
+ if (error)
+ return error;
+
+ /*
+ * Update the prev pointer in the next inode to point back to previous
+ * inode in the chain.
+ */
+ error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
+ ip->i_next_unlinked);
+ if (error == -ENOLINK)
+ error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
+ ip->i_next_unlinked);
+ if (error)
+ return error;
+
+ if (head_agino != agino) {
+ struct xfs_inode *prev_ip;
+
+ prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
+ if (!prev_ip) {
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
+ return -EFSCORRUPTED;
+ }
+
+ error = xfs_iunlink_log_inode(tp, prev_ip, pag,
+ ip->i_next_unlinked);
+ prev_ip->i_next_unlinked = ip->i_next_unlinked;
+ } else {
+ /* Point the head of the list to the next unlinked inode. */
+ error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
+ ip->i_next_unlinked);
+ }
+
+ ip->i_next_unlinked = NULLAGINO;
+ ip->i_prev_unlinked = 0;
+ return error;
+}
+
+/*
+ * Pull the on-disk inode from the AGI unlinked list.
+ */
+int
+xfs_iunlink_remove(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_inode *ip)
+{
+ struct xfs_buf *agibp;
+ int error;
+
+ trace_xfs_iunlink_remove(ip);
+
+ /* Get the agi buffer first. It ensures lock ordering on the list. */
+ error = xfs_read_agi(pag, tp, 0, &agibp);
+ if (error)
+ return error;
+
+ return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
+}
+
+/*
+ * Decrement the link count on an inode & log the change. If this causes the
+ * link count to go to zero, move the inode to AGI unlinked list so that it can
+ * be freed when the last active reference goes away via xfs_inactive().
+ */
+int
+xfs_droplink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+
+ if (inode->i_nlink == 0) {
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count dropped below zero. Pinning link count.",
+ ip->i_ino);
+ set_nlink(inode, XFS_NLINK_PINNED);
+ }
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ drop_nlink(inode);
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ if (inode->i_nlink)
+ return 0;
+
+ return xfs_iunlink(tp, ip);
+}
+
+/*
+ * Increment the link count on an inode & log the change.
+ */
+void
+xfs_bumplink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+
+ if (inode->i_nlink == XFS_NLINK_PINNED - 1)
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count exceeded maximum. Pinning link count.",
+ ip->i_ino);
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ inc_nlink(inode);
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Free an inode in the ondisk index and zero it out. */
+int
+xfs_inode_uninit(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_inode *ip,
+ struct xfs_icluster *xic)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int error;
+
+ /*
+ * Free the inode first so that we guarantee that the AGI lock is going
+ * to be taken before we remove the inode from the unlinked list. This
+ * makes the AGI lock -> unlinked list modification order the same as
+ * used in O_TMPFILE creation.
+ */
+ error = xfs_difree(tp, pag, ip->i_ino, xic);
+ if (error)
+ return error;
+
+ error = xfs_iunlink_remove(tp, pag, ip);
+ if (error)
+ return error;
+
+ /*
+ * Free any local-format data sitting around before we reset the
+ * data fork to extents format. Note that the attr fork data has
+ * already been freed by xfs_attr_inactive.
+ */
+ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
+ kfree(ip->i_df.if_data);
+ ip->i_df.if_data = NULL;
+ ip->i_df.if_bytes = 0;
+ }
+
+ VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
+ ip->i_diflags = 0;
+ ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
+ ip->i_forkoff = 0; /* mark the attr fork not in use */
+ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
+
+ /*
+ * Bump the generation count so no one will be confused
+ * by reincarnations of this inode.
+ */
+ VFS_I(ip)->i_generation++;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_util.h b/fs/xfs/libxfs/xfs_inode_util.h
new file mode 100644
index 000000000000..060242998a23
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_inode_util.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_INODE_UTIL_H__
+#define __XFS_INODE_UTIL_H__
+
+struct xfs_icluster;
+
+uint16_t xfs_flags2diflags(struct xfs_inode *ip, unsigned int xflags);
+uint64_t xfs_flags2diflags2(struct xfs_inode *ip, unsigned int xflags);
+uint32_t xfs_dic2xflags(struct xfs_inode *ip);
+uint32_t xfs_ip2xflags(struct xfs_inode *ip);
+
+prid_t xfs_get_initial_prid(struct xfs_inode *dp);
+
+/*
+ * File creation context.
+ *
+ * Due to our only partial reliance on the VFS to propagate uid and gid values
+ * according to accepted Unix behaviors, callers must initialize idmap to the
+ * correct idmapping structure to get the correct inheritance behaviors when
+ * XFS_MOUNT_GRPID is set.
+ *
+ * To create files detached from the directory tree (e.g. quota inodes), set
+ * idmap to NULL. To create a tree root, set pip to NULL.
+ */
+struct xfs_icreate_args {
+ struct mnt_idmap *idmap;
+ struct xfs_inode *pip; /* parent inode or null */
+ dev_t rdev;
+ umode_t mode;
+
+#define XFS_ICREATE_TMPFILE (1U << 0) /* create an unlinked file */
+#define XFS_ICREATE_INIT_XATTRS (1U << 1) /* will set xattrs immediately */
+#define XFS_ICREATE_UNLINKABLE (1U << 2) /* cannot link into dir tree */
+ uint16_t flags;
+};
+
+/*
+ * Flags for xfs_trans_ichgtime().
+ */
+#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
+#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
+#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
+#define XFS_ICHGTIME_ACCESS 0x8 /* last access timestamp */
+void xfs_trans_ichgtime(struct xfs_trans *tp, struct xfs_inode *ip, int flags);
+
+void xfs_inode_init(struct xfs_trans *tp, const struct xfs_icreate_args *args,
+ struct xfs_inode *ip);
+
+int xfs_inode_uninit(struct xfs_trans *tp, struct xfs_perag *pag,
+ struct xfs_inode *ip, struct xfs_icluster *xic);
+
+int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
+int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
+ struct xfs_inode *ip);
+int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip);
+void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip);
+
+#endif /* __XFS_INODE_UTIL_H__ */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index e8cdd77d03fa..23c133fd36f5 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -85,6 +85,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12);
*/
+ XFS_CHECK_OFFSET(struct xfs_dsb, sb_crc, 224);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen, 0);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen, 2);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval, 3);
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 511c912d515c..198b84117df1 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -24,6 +24,7 @@
#include "xfs_rmap.h"
#include "xfs_ag.h"
#include "xfs_health.h"
+#include "xfs_refcount_item.h"
struct kmem_cache *xfs_refcount_intent_cache;
@@ -51,7 +52,7 @@ xfs_refcount_lookup_le(
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ trace_xfs_refcount_lookup(cur,
xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
@@ -71,7 +72,7 @@ xfs_refcount_lookup_ge(
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ trace_xfs_refcount_lookup(cur,
xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_GE);
cur->bc_rec.rc.rc_startblock = bno;
@@ -91,7 +92,7 @@ xfs_refcount_lookup_eq(
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ trace_xfs_refcount_lookup(cur,
xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
@@ -183,7 +184,7 @@ xfs_refcount_get_rec(
if (fa)
return xfs_refcount_complain_bad_rec(cur, fa, irec);
- trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+ trace_xfs_refcount_get(cur, irec);
return 0;
}
@@ -201,7 +202,7 @@ xfs_refcount_update(
uint32_t start;
int error;
- trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+ trace_xfs_refcount_update(cur, irec);
start = xfs_refcount_encode_startblock(irec->rc_startblock,
irec->rc_domain);
@@ -211,8 +212,7 @@ xfs_refcount_update(
error = xfs_btree_update(cur, &rec);
if (error)
- trace_xfs_refcount_update_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_update_error(cur, error, _RET_IP_);
return error;
}
@@ -229,7 +229,7 @@ xfs_refcount_insert(
{
int error;
- trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+ trace_xfs_refcount_insert(cur, irec);
cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
@@ -247,8 +247,7 @@ xfs_refcount_insert(
out_error:
if (error)
- trace_xfs_refcount_insert_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_insert_error(cur, error, _RET_IP_);
return error;
}
@@ -275,7 +274,7 @@ xfs_refcount_delete(
error = -EFSCORRUPTED;
goto out_error;
}
- trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec);
+ trace_xfs_refcount_delete(cur, &irec);
error = xfs_btree_delete(cur, i);
if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
xfs_btree_mark_sick(cur);
@@ -288,8 +287,7 @@ xfs_refcount_delete(
&found_rec);
out_error:
if (error)
- trace_xfs_refcount_delete_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_delete_error(cur, error, _RET_IP_);
return error;
}
@@ -413,8 +411,7 @@ xfs_refcount_split_extent(
return 0;
*shape_changed = true;
- trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- &rcext, agbno);
+ trace_xfs_refcount_split_extent(cur, &rcext, agbno);
/* Establish the right extent. */
tmp = rcext;
@@ -438,8 +435,7 @@ xfs_refcount_split_extent(
return error;
out_error:
- trace_xfs_refcount_split_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_split_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -458,8 +454,7 @@ xfs_refcount_merge_center_extents(
int error;
int found_rec;
- trace_xfs_refcount_merge_center_extents(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, left, center, right);
+ trace_xfs_refcount_merge_center_extents(cur, left, center, right);
ASSERT(left->rc_domain == center->rc_domain);
ASSERT(right->rc_domain == center->rc_domain);
@@ -522,8 +517,7 @@ xfs_refcount_merge_center_extents(
return error;
out_error:
- trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_merge_center_extents_error(cur, error, _RET_IP_);
return error;
}
@@ -541,8 +535,7 @@ xfs_refcount_merge_left_extent(
int error;
int found_rec;
- trace_xfs_refcount_merge_left_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, left, cleft);
+ trace_xfs_refcount_merge_left_extent(cur, left, cleft);
ASSERT(left->rc_domain == cleft->rc_domain);
@@ -589,8 +582,7 @@ xfs_refcount_merge_left_extent(
return error;
out_error:
- trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_merge_left_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -607,8 +599,7 @@ xfs_refcount_merge_right_extent(
int error;
int found_rec;
- trace_xfs_refcount_merge_right_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, cright, right);
+ trace_xfs_refcount_merge_right_extent(cur, cright, right);
ASSERT(right->rc_domain == cright->rc_domain);
@@ -658,8 +649,7 @@ xfs_refcount_merge_right_extent(
return error;
out_error:
- trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_merge_right_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -748,13 +738,11 @@ not_found:
cleft->rc_refcount = 1;
cleft->rc_domain = domain;
}
- trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- left, cleft, agbno);
+ trace_xfs_refcount_find_left_extent(cur, left, cleft, agbno);
return error;
out_error:
- trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_find_left_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -843,13 +831,12 @@ not_found:
cright->rc_refcount = 1;
cright->rc_domain = domain;
}
- trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- cright, right, agbno + aglen);
+ trace_xfs_refcount_find_right_extent(cur, cright, right,
+ agbno + aglen);
return error;
out_error:
- trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_find_right_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -1148,8 +1135,7 @@ xfs_refcount_adjust_extents(
tmp.rc_refcount = 1 + adj;
tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
- trace_xfs_refcount_modify_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, &tmp);
+ trace_xfs_refcount_modify_extent(cur, &tmp);
/*
* Either cover the hole (increment) or
@@ -1173,7 +1159,7 @@ xfs_refcount_adjust_extents(
tmp.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
tmp.rc_blockcount, NULL,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
goto out_error;
}
@@ -1214,8 +1200,7 @@ xfs_refcount_adjust_extents(
if (ext.rc_refcount == MAXREFCOUNT)
goto skip;
ext.rc_refcount += adj;
- trace_xfs_refcount_modify_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, &ext);
+ trace_xfs_refcount_modify_extent(cur, &ext);
cur->bc_refc.nr_ops++;
if (ext.rc_refcount > 1) {
error = xfs_refcount_update(cur, &ext);
@@ -1237,7 +1222,7 @@ xfs_refcount_adjust_extents(
ext.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
ext.rc_blockcount, NULL,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
goto out_error;
}
@@ -1254,8 +1239,7 @@ advloop:
return error;
out_error:
- trace_xfs_refcount_modify_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -1272,11 +1256,9 @@ xfs_refcount_adjust(
int error;
if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
- trace_xfs_refcount_increase(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, *agbno, *aglen);
+ trace_xfs_refcount_increase(cur, *agbno, *aglen);
else
- trace_xfs_refcount_decrease(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, *agbno, *aglen);
+ trace_xfs_refcount_decrease(cur, *agbno, *aglen);
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
@@ -1315,28 +1297,10 @@ xfs_refcount_adjust(
return 0;
out_error:
- trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_refcount_adjust_error(cur, error, _RET_IP_);
return error;
}
-/* Clean up after calling xfs_refcount_finish_one. */
-void
-xfs_refcount_finish_one_cleanup(
- struct xfs_trans *tp,
- struct xfs_btree_cur *rcur,
- int error)
-{
- struct xfs_buf *agbp;
-
- if (rcur == NULL)
- return;
- agbp = rcur->bc_ag.agbp;
- xfs_btree_del_cursor(rcur, error);
- if (error)
- xfs_trans_brelse(tp, agbp);
-}
-
/*
* Set up a continuation a deferred refcount operation by updating the intent.
* Checks to make sure we're not going to run off the end of the AG.
@@ -1378,7 +1342,7 @@ xfs_refcount_finish_one(
struct xfs_btree_cur **pcur)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_btree_cur *rcur;
+ struct xfs_btree_cur *rcur = *pcur;
struct xfs_buf *agbp = NULL;
int error = 0;
xfs_agblock_t bno;
@@ -1387,9 +1351,7 @@ xfs_refcount_finish_one(
bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
- trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
- ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
- ri->ri_blockcount);
+ trace_xfs_refcount_deferred(mp, ri);
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
return -EIO;
@@ -1398,11 +1360,10 @@ xfs_refcount_finish_one(
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
- rcur = *pcur;
if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
nr_ops = rcur->bc_refc.nr_ops;
shape_changes = rcur->bc_refc.shape_changes;
- xfs_refcount_finish_one_cleanup(tp, rcur, 0);
+ xfs_btree_del_cursor(rcur, 0);
rcur = NULL;
*pcur = NULL;
}
@@ -1412,11 +1373,11 @@ xfs_refcount_finish_one(
if (error)
return error;
- rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag);
+ *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp,
+ ri->ri_pag);
rcur->bc_refc.nr_ops = nr_ops;
rcur->bc_refc.shape_changes = shape_changes;
}
- *pcur = rcur;
switch (ri->ri_type) {
case XFS_REFCOUNT_INCREASE:
@@ -1452,8 +1413,7 @@ xfs_refcount_finish_one(
return -EFSCORRUPTED;
}
if (!error && ri->ri_blockcount > 0)
- trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno,
- ri->ri_type, bno, ri->ri_blockcount);
+ trace_xfs_refcount_finish_one_leftover(mp, ri);
return error;
}
@@ -1469,11 +1429,6 @@ __xfs_refcount_add(
{
struct xfs_refcount_intent *ri;
- trace_xfs_refcount_defer(tp->t_mountp,
- XFS_FSB_TO_AGNO(tp->t_mountp, startblock),
- type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
- blockcount);
-
ri = kmem_cache_alloc(xfs_refcount_intent_cache,
GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&ri->ri_list);
@@ -1481,8 +1436,7 @@ __xfs_refcount_add(
ri->ri_startblock = startblock;
ri->ri_blockcount = blockcount;
- xfs_refcount_update_get_group(tp->t_mountp, ri);
- xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type);
+ xfs_refcount_defer_add(tp, ri);
}
/*
@@ -1537,8 +1491,7 @@ xfs_refcount_find_shared(
int have;
int error;
- trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- agbno, aglen);
+ trace_xfs_refcount_find_shared(cur, agbno, aglen);
/* By default, skip the whole range */
*fbno = NULLAGBLOCK;
@@ -1625,13 +1578,11 @@ xfs_refcount_find_shared(
}
done:
- trace_xfs_refcount_find_shared_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, *fbno, *flen);
+ trace_xfs_refcount_find_shared_result(cur, *fbno, *flen);
out_error:
if (error)
- trace_xfs_refcount_find_shared_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_find_shared_error(cur, error, _RET_IP_);
return error;
}
@@ -1737,8 +1688,7 @@ xfs_refcount_adjust_cow_extents(
tmp.rc_refcount = 1;
tmp.rc_domain = XFS_REFC_DOMAIN_COW;
- trace_xfs_refcount_modify_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, &tmp);
+ trace_xfs_refcount_modify_extent(cur, &tmp);
error = xfs_refcount_insert(cur, &tmp,
&found_tmp);
@@ -1769,8 +1719,7 @@ xfs_refcount_adjust_cow_extents(
}
ext.rc_refcount = 0;
- trace_xfs_refcount_modify_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, &ext);
+ trace_xfs_refcount_modify_extent(cur, &ext);
error = xfs_refcount_delete(cur, &found_rec);
if (error)
goto out_error;
@@ -1786,8 +1735,7 @@ xfs_refcount_adjust_cow_extents(
return error;
out_error:
- trace_xfs_refcount_modify_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -1833,8 +1781,7 @@ xfs_refcount_adjust_cow(
return 0;
out_error:
- trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_refcount_adjust_cow_error(cur, error, _RET_IP_);
return error;
}
@@ -1847,8 +1794,7 @@ __xfs_refcount_cow_alloc(
xfs_agblock_t agbno,
xfs_extlen_t aglen)
{
- trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
- agbno, aglen);
+ trace_xfs_refcount_cow_increase(rcur, agbno, aglen);
/* Add refcount btree reservation */
return xfs_refcount_adjust_cow(rcur, agbno, aglen,
@@ -1864,8 +1810,7 @@ __xfs_refcount_cow_free(
xfs_agblock_t agbno,
xfs_extlen_t aglen)
{
- trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
- agbno, aglen);
+ trace_xfs_refcount_cow_decrease(rcur, agbno, aglen);
/* Remove refcount btree reservation */
return xfs_refcount_adjust_cow(rcur, agbno, aglen,
@@ -2010,9 +1955,6 @@ xfs_refcount_recover_cow_leftovers(
if (error)
goto out_free;
- trace_xfs_refcount_recover_extent(mp, pag->pag_agno,
- &rr->rr_rrec);
-
/* Free the orphan record */
fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
rr->rr_rrec.rc_startblock);
@@ -2022,7 +1964,7 @@ xfs_refcount_recover_cow_leftovers(
/* Free the block. */
error = xfs_free_extent_later(tp, fsb,
rr->rr_rrec.rc_blockcount, NULL,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
goto out_trans;
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 9b56768a590c..68acb0b1b4a8 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -48,6 +48,12 @@ enum xfs_refcount_intent_type {
XFS_REFCOUNT_FREE_COW,
};
+#define XFS_REFCOUNT_INTENT_STRINGS \
+ { XFS_REFCOUNT_INCREASE, "incr" }, \
+ { XFS_REFCOUNT_DECREASE, "decr" }, \
+ { XFS_REFCOUNT_ALLOC_COW, "alloc_cow" }, \
+ { XFS_REFCOUNT_FREE_COW, "free_cow" }
+
struct xfs_refcount_intent {
struct list_head ri_list;
struct xfs_perag *ri_pag;
@@ -68,16 +74,11 @@ xfs_refcount_check_domain(
return true;
}
-void xfs_refcount_update_get_group(struct xfs_mount *mp,
- struct xfs_refcount_intent *ri);
-
void xfs_refcount_increase_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
void xfs_refcount_decrease_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
-extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp,
- struct xfs_btree_cur *rcur, int error);
extern int xfs_refcount_finish_one(struct xfs_trans *tp,
struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index ca59f6c89f3e..cb3b1d42ae9a 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -109,7 +109,7 @@ xfs_refcountbt_free_block(
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
- &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, false);
+ &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, 0);
}
STATIC int
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index ef16f6f9cef6..6ef4687b3aba 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -24,6 +24,7 @@
#include "xfs_inode.h"
#include "xfs_ag.h"
#include "xfs_health.h"
+#include "xfs_rmap_item.h"
struct kmem_cache *xfs_rmap_intent_cache;
@@ -100,8 +101,7 @@ xfs_rmap_update(
union xfs_btree_rec rec;
int error;
- trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- irec->rm_startblock, irec->rm_blockcount,
+ trace_xfs_rmap_update(cur, irec->rm_startblock, irec->rm_blockcount,
irec->rm_owner, irec->rm_offset, irec->rm_flags);
rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
@@ -111,8 +111,7 @@ xfs_rmap_update(
xfs_rmap_irec_offset_pack(irec));
error = xfs_btree_update(cur, &rec);
if (error)
- trace_xfs_rmap_update_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_update_error(cur, error, _RET_IP_);
return error;
}
@@ -128,8 +127,7 @@ xfs_rmap_insert(
int i;
int error;
- trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno,
- len, owner, offset, flags);
+ trace_xfs_rmap_insert(rcur, agbno, len, owner, offset, flags);
error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
if (error)
@@ -155,8 +153,7 @@ xfs_rmap_insert(
}
done:
if (error)
- trace_xfs_rmap_insert_error(rcur->bc_mp,
- rcur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_insert_error(rcur, error, _RET_IP_);
return error;
}
@@ -172,8 +169,7 @@ xfs_rmap_delete(
int i;
int error;
- trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno,
- len, owner, offset, flags);
+ trace_xfs_rmap_delete(rcur, agbno, len, owner, offset, flags);
error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
if (error)
@@ -194,8 +190,7 @@ xfs_rmap_delete(
}
done:
if (error)
- trace_xfs_rmap_delete_error(rcur->bc_mp,
- rcur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_delete_error(rcur, error, _RET_IP_);
return error;
}
@@ -342,8 +337,7 @@ xfs_rmap_find_left_neighbor_helper(
{
struct xfs_find_left_neighbor_info *info = priv;
- trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, rec->rm_startblock,
+ trace_xfs_rmap_find_left_neighbor_candidate(cur, rec->rm_startblock,
rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
rec->rm_flags);
@@ -393,8 +387,8 @@ xfs_rmap_find_left_neighbor(
info.high.rm_blockcount = 0;
info.irec = irec;
- trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
+ trace_xfs_rmap_find_left_neighbor_query(cur, bno, 0, owner, offset,
+ flags);
/*
* Historically, we always used the range query to walk every reverse
@@ -425,8 +419,7 @@ xfs_rmap_find_left_neighbor(
return error;
*stat = 1;
- trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, irec->rm_startblock,
+ trace_xfs_rmap_find_left_neighbor_result(cur, irec->rm_startblock,
irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
irec->rm_flags);
return 0;
@@ -441,8 +434,7 @@ xfs_rmap_lookup_le_range_helper(
{
struct xfs_find_left_neighbor_info *info = priv;
- trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, rec->rm_startblock,
+ trace_xfs_rmap_lookup_le_range_candidate(cur, rec->rm_startblock,
rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
rec->rm_flags);
@@ -489,8 +481,7 @@ xfs_rmap_lookup_le_range(
*stat = 0;
info.irec = irec;
- trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- bno, 0, owner, offset, flags);
+ trace_xfs_rmap_lookup_le_range(cur, bno, 0, owner, offset, flags);
/*
* Historically, we always used the range query to walk every reverse
@@ -521,8 +512,7 @@ xfs_rmap_lookup_le_range(
return error;
*stat = 1;
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, irec->rm_startblock,
+ trace_xfs_rmap_lookup_le_range_result(cur, irec->rm_startblock,
irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
irec->rm_flags);
return 0;
@@ -634,8 +624,7 @@ xfs_rmap_unmap(
(flags & XFS_RMAP_BMBT_BLOCK);
if (unwritten)
flags |= XFS_RMAP_UNWRITTEN;
- trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo);
/*
* We should always have a left record because there's a static record
@@ -651,10 +640,9 @@ xfs_rmap_unmap(
goto out_error;
}
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
- ltrec.rm_blockcount, ltrec.rm_owner,
- ltrec.rm_offset, ltrec.rm_flags);
+ trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock,
+ ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset,
+ ltrec.rm_flags);
ltoff = ltrec.rm_offset;
/*
@@ -721,10 +709,9 @@ xfs_rmap_unmap(
if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
/* exact match, simply remove the record from rmap tree */
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- ltrec.rm_startblock, ltrec.rm_blockcount,
- ltrec.rm_owner, ltrec.rm_offset,
- ltrec.rm_flags);
+ trace_xfs_rmap_delete(cur, ltrec.rm_startblock,
+ ltrec.rm_blockcount, ltrec.rm_owner,
+ ltrec.rm_offset, ltrec.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto out_error;
@@ -800,8 +787,7 @@ xfs_rmap_unmap(
else
cur->bc_rec.r.rm_offset = offset + len;
cur->bc_rec.r.rm_flags = flags;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno,
- cur->bc_rec.r.rm_startblock,
+ trace_xfs_rmap_insert(cur, cur->bc_rec.r.rm_startblock,
cur->bc_rec.r.rm_blockcount,
cur->bc_rec.r.rm_owner,
cur->bc_rec.r.rm_offset,
@@ -812,12 +798,10 @@ xfs_rmap_unmap(
}
out_done:
- trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo);
out_error:
if (error)
- trace_xfs_rmap_unmap_error(mp, cur->bc_ag.pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_rmap_unmap_error(cur, error, _RET_IP_);
return error;
}
@@ -987,8 +971,7 @@ xfs_rmap_map(
(flags & XFS_RMAP_BMBT_BLOCK);
if (unwritten)
flags |= XFS_RMAP_UNWRITTEN;
- trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo);
ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
/*
@@ -1001,8 +984,7 @@ xfs_rmap_map(
if (error)
goto out_error;
if (have_lt) {
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
+ trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner,
ltrec.rm_offset, ltrec.rm_flags);
@@ -1040,10 +1022,10 @@ xfs_rmap_map(
error = -EFSCORRUPTED;
goto out_error;
}
- trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, gtrec.rm_startblock,
- gtrec.rm_blockcount, gtrec.rm_owner,
- gtrec.rm_offset, gtrec.rm_flags);
+ trace_xfs_rmap_find_right_neighbor_result(cur,
+ gtrec.rm_startblock, gtrec.rm_blockcount,
+ gtrec.rm_owner, gtrec.rm_offset,
+ gtrec.rm_flags);
if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
have_gt = 0;
}
@@ -1080,12 +1062,9 @@ xfs_rmap_map(
* result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
*/
ltrec.rm_blockcount += gtrec.rm_blockcount;
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- gtrec.rm_startblock,
- gtrec.rm_blockcount,
- gtrec.rm_owner,
- gtrec.rm_offset,
- gtrec.rm_flags);
+ trace_xfs_rmap_delete(cur, gtrec.rm_startblock,
+ gtrec.rm_blockcount, gtrec.rm_owner,
+ gtrec.rm_offset, gtrec.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto out_error;
@@ -1132,8 +1111,7 @@ xfs_rmap_map(
cur->bc_rec.r.rm_owner = owner;
cur->bc_rec.r.rm_offset = offset;
cur->bc_rec.r.rm_flags = flags;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len,
- owner, offset, flags);
+ trace_xfs_rmap_insert(cur, bno, len, owner, offset, flags);
error = xfs_btree_insert(cur, &i);
if (error)
goto out_error;
@@ -1144,12 +1122,10 @@ xfs_rmap_map(
}
}
- trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo);
out_error:
if (error)
- trace_xfs_rmap_map_error(mp, cur->bc_ag.pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_rmap_map_error(cur, error, _RET_IP_);
return error;
}
@@ -1223,8 +1199,7 @@ xfs_rmap_convert(
(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
new_endoff = offset + len;
- trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo);
/*
* For the initial lookup, look for an exact match or the left-adjacent
@@ -1240,10 +1215,9 @@ xfs_rmap_convert(
goto done;
}
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, PREV.rm_startblock,
- PREV.rm_blockcount, PREV.rm_owner,
- PREV.rm_offset, PREV.rm_flags);
+ trace_xfs_rmap_lookup_le_range_result(cur, PREV.rm_startblock,
+ PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset,
+ PREV.rm_flags);
ASSERT(PREV.rm_offset <= offset);
ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
@@ -1284,10 +1258,9 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, LEFT.rm_startblock,
- LEFT.rm_blockcount, LEFT.rm_owner,
- LEFT.rm_offset, LEFT.rm_flags);
+ trace_xfs_rmap_find_left_neighbor_result(cur,
+ LEFT.rm_startblock, LEFT.rm_blockcount,
+ LEFT.rm_owner, LEFT.rm_offset, LEFT.rm_flags);
if (LEFT.rm_startblock + LEFT.rm_blockcount == bno &&
LEFT.rm_offset + LEFT.rm_blockcount == offset &&
xfs_rmap_is_mergeable(&LEFT, owner, newext))
@@ -1325,10 +1298,10 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock,
- RIGHT.rm_blockcount, RIGHT.rm_owner,
- RIGHT.rm_offset, RIGHT.rm_flags);
+ trace_xfs_rmap_find_right_neighbor_result(cur,
+ RIGHT.rm_startblock, RIGHT.rm_blockcount,
+ RIGHT.rm_owner, RIGHT.rm_offset,
+ RIGHT.rm_flags);
if (bno + len == RIGHT.rm_startblock &&
offset + len == RIGHT.rm_offset &&
xfs_rmap_is_mergeable(&RIGHT, owner, newext))
@@ -1344,8 +1317,7 @@ xfs_rmap_convert(
RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
state &= ~RMAP_RIGHT_CONTIG;
- trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state,
- _RET_IP_);
+ trace_xfs_rmap_convert_state(cur, state, _RET_IP_);
/* reset the cursor back to PREV */
error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
@@ -1376,10 +1348,9 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- RIGHT.rm_startblock, RIGHT.rm_blockcount,
- RIGHT.rm_owner, RIGHT.rm_offset,
- RIGHT.rm_flags);
+ trace_xfs_rmap_delete(cur, RIGHT.rm_startblock,
+ RIGHT.rm_blockcount, RIGHT.rm_owner,
+ RIGHT.rm_offset, RIGHT.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
@@ -1396,10 +1367,9 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- PREV.rm_startblock, PREV.rm_blockcount,
- PREV.rm_owner, PREV.rm_offset,
- PREV.rm_flags);
+ trace_xfs_rmap_delete(cur, PREV.rm_startblock,
+ PREV.rm_blockcount, PREV.rm_owner,
+ PREV.rm_offset, PREV.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
@@ -1428,10 +1398,9 @@ xfs_rmap_convert(
* Setting all of a previous oldext extent to newext.
* The left neighbor is contiguous, the right is not.
*/
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- PREV.rm_startblock, PREV.rm_blockcount,
- PREV.rm_owner, PREV.rm_offset,
- PREV.rm_flags);
+ trace_xfs_rmap_delete(cur, PREV.rm_startblock,
+ PREV.rm_blockcount, PREV.rm_owner,
+ PREV.rm_offset, PREV.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
@@ -1468,10 +1437,9 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- RIGHT.rm_startblock, RIGHT.rm_blockcount,
- RIGHT.rm_owner, RIGHT.rm_offset,
- RIGHT.rm_flags);
+ trace_xfs_rmap_delete(cur, RIGHT.rm_startblock,
+ RIGHT.rm_blockcount, RIGHT.rm_owner,
+ RIGHT.rm_offset, RIGHT.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
@@ -1549,8 +1517,7 @@ xfs_rmap_convert(
NEW.rm_blockcount = len;
NEW.rm_flags = newext;
cur->bc_rec.r = NEW;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno,
- len, owner, offset, newext);
+ trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
@@ -1608,8 +1575,7 @@ xfs_rmap_convert(
NEW.rm_blockcount = len;
NEW.rm_flags = newext;
cur->bc_rec.r = NEW;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno,
- len, owner, offset, newext);
+ trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
@@ -1640,9 +1606,8 @@ xfs_rmap_convert(
NEW = PREV;
NEW.rm_blockcount = offset - PREV.rm_offset;
cur->bc_rec.r = NEW;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno,
- NEW.rm_startblock, NEW.rm_blockcount,
- NEW.rm_owner, NEW.rm_offset,
+ trace_xfs_rmap_insert(cur, NEW.rm_startblock,
+ NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
NEW.rm_flags);
error = xfs_btree_insert(cur, &i);
if (error)
@@ -1669,8 +1634,7 @@ xfs_rmap_convert(
/* new middle extent - newext */
cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
cur->bc_rec.r.rm_flags |= newext;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len,
- owner, offset, newext);
+ trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
@@ -1694,12 +1658,10 @@ xfs_rmap_convert(
ASSERT(0);
}
- trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo);
done:
if (error)
- trace_xfs_rmap_convert_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_convert_error(cur, error, _RET_IP_);
return error;
}
@@ -1735,8 +1697,7 @@ xfs_rmap_convert_shared(
(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
new_endoff = offset + len;
- trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo);
/*
* For the initial lookup, look for and exact match or the left-adjacent
@@ -1805,10 +1766,10 @@ xfs_rmap_convert_shared(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock,
- RIGHT.rm_blockcount, RIGHT.rm_owner,
- RIGHT.rm_offset, RIGHT.rm_flags);
+ trace_xfs_rmap_find_right_neighbor_result(cur,
+ RIGHT.rm_startblock, RIGHT.rm_blockcount,
+ RIGHT.rm_owner, RIGHT.rm_offset,
+ RIGHT.rm_flags);
if (xfs_rmap_is_mergeable(&RIGHT, owner, newext))
state |= RMAP_RIGHT_CONTIG;
}
@@ -1822,8 +1783,7 @@ xfs_rmap_convert_shared(
RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
state &= ~RMAP_RIGHT_CONTIG;
- trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state,
- _RET_IP_);
+ trace_xfs_rmap_convert_state(cur, state, _RET_IP_);
/*
* Switch out based on the FILLING and CONTIG state bits.
*/
@@ -2121,12 +2081,10 @@ xfs_rmap_convert_shared(
ASSERT(0);
}
- trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo);
done:
if (error)
- trace_xfs_rmap_convert_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_convert_error(cur, error, _RET_IP_);
return error;
}
@@ -2164,8 +2122,7 @@ xfs_rmap_unmap_shared(
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
if (unwritten)
flags |= XFS_RMAP_UNWRITTEN;
- trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo);
/*
* We should always have a left record because there's a static record
@@ -2321,12 +2278,10 @@ xfs_rmap_unmap_shared(
goto out_error;
}
- trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo);
out_error:
if (error)
- trace_xfs_rmap_unmap_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_unmap_error(cur, error, _RET_IP_);
return error;
}
@@ -2361,8 +2316,7 @@ xfs_rmap_map_shared(
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
if (unwritten)
flags |= XFS_RMAP_UNWRITTEN;
- trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo);
/* Is there a left record that abuts our range? */
error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, flags,
@@ -2387,10 +2341,10 @@ xfs_rmap_map_shared(
error = -EFSCORRUPTED;
goto out_error;
}
- trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, gtrec.rm_startblock,
- gtrec.rm_blockcount, gtrec.rm_owner,
- gtrec.rm_offset, gtrec.rm_flags);
+ trace_xfs_rmap_find_right_neighbor_result(cur,
+ gtrec.rm_startblock, gtrec.rm_blockcount,
+ gtrec.rm_owner, gtrec.rm_offset,
+ gtrec.rm_flags);
if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
have_gt = 0;
@@ -2482,12 +2436,10 @@ xfs_rmap_map_shared(
goto out_error;
}
- trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo);
out_error:
if (error)
- trace_xfs_rmap_map_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_map_error(cur, error, _RET_IP_);
return error;
}
@@ -2572,23 +2524,6 @@ xfs_rmap_query_all(
return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
}
-/* Clean up after calling xfs_rmap_finish_one. */
-void
-xfs_rmap_finish_one_cleanup(
- struct xfs_trans *tp,
- struct xfs_btree_cur *rcur,
- int error)
-{
- struct xfs_buf *agbp;
-
- if (rcur == NULL)
- return;
- agbp = rcur->bc_ag.agbp;
- xfs_btree_del_cursor(rcur, error);
- if (error)
- xfs_trans_brelse(tp, agbp);
-}
-
/* Commit an rmap operation into the ondisk tree. */
int
__xfs_rmap_finish_intent(
@@ -2634,20 +2569,15 @@ xfs_rmap_finish_one(
struct xfs_rmap_intent *ri,
struct xfs_btree_cur **pcur)
{
+ struct xfs_owner_info oinfo;
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_btree_cur *rcur;
+ struct xfs_btree_cur *rcur = *pcur;
struct xfs_buf *agbp = NULL;
- int error = 0;
- struct xfs_owner_info oinfo;
xfs_agblock_t bno;
bool unwritten;
+ int error = 0;
- bno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock);
-
- trace_xfs_rmap_deferred(mp, ri->ri_pag->pag_agno, ri->ri_type, bno,
- ri->ri_owner, ri->ri_whichfork,
- ri->ri_bmap.br_startoff, ri->ri_bmap.br_blockcount,
- ri->ri_bmap.br_state);
+ trace_xfs_rmap_deferred(mp, ri);
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE))
return -EIO;
@@ -2656,9 +2586,8 @@ xfs_rmap_finish_one(
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
- rcur = *pcur;
if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
- xfs_rmap_finish_one_cleanup(tp, rcur, 0);
+ xfs_btree_del_cursor(rcur, 0);
rcur = NULL;
*pcur = NULL;
}
@@ -2678,9 +2607,8 @@ xfs_rmap_finish_one(
return -EFSCORRUPTED;
}
- rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag);
+ *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag);
}
- *pcur = rcur;
xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork,
ri->ri_bmap.br_startoff);
@@ -2722,15 +2650,6 @@ __xfs_rmap_add(
{
struct xfs_rmap_intent *ri;
- trace_xfs_rmap_defer(tp->t_mountp,
- XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
- type,
- XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
- owner, whichfork,
- bmap->br_startoff,
- bmap->br_blockcount,
- bmap->br_state);
-
ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&ri->ri_list);
ri->ri_type = type;
@@ -2738,8 +2657,7 @@ __xfs_rmap_add(
ri->ri_whichfork = whichfork;
ri->ri_bmap = *bmap;
- xfs_rmap_update_get_group(tp->t_mountp, ri);
- xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type);
+ xfs_rmap_defer_add(tp, ri);
}
/* Map an extent into a file. */
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 9d01fe689497..b783dd4dd95d 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -157,6 +157,16 @@ enum xfs_rmap_intent_type {
XFS_RMAP_FREE,
};
+#define XFS_RMAP_INTENT_STRINGS \
+ { XFS_RMAP_MAP, "map" }, \
+ { XFS_RMAP_MAP_SHARED, "map_shared" }, \
+ { XFS_RMAP_UNMAP, "unmap" }, \
+ { XFS_RMAP_UNMAP_SHARED, "unmap_shared" }, \
+ { XFS_RMAP_CONVERT, "cvt" }, \
+ { XFS_RMAP_CONVERT_SHARED, "cvt_shared" }, \
+ { XFS_RMAP_ALLOC, "alloc" }, \
+ { XFS_RMAP_FREE, "free" }
+
struct xfs_rmap_intent {
struct list_head ri_list;
enum xfs_rmap_intent_type ri_type;
@@ -166,9 +176,6 @@ struct xfs_rmap_intent {
struct xfs_perag *ri_pag;
};
-void xfs_rmap_update_get_group(struct xfs_mount *mp,
- struct xfs_rmap_intent *ri);
-
/* functions for updating the rmapbt based on bmbt map/unmap operations */
void xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, struct xfs_bmbt_irec *imap);
@@ -182,8 +189,6 @@ void xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
void xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner);
-void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
- struct xfs_btree_cur *rcur, int error);
int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri,
struct xfs_btree_cur **pcur);
int __xfs_rmap_finish_intent(struct xfs_btree_cur *rcur,
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 9e759efa81cc..56fd6c4bd8b4 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -88,6 +88,7 @@ xfs_rmapbt_alloc_block(
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_alloc_arg args = { .len = 1 };
int error;
xfs_agblock_t bno;
@@ -107,7 +108,11 @@ xfs_rmapbt_alloc_block(
be32_add_cpu(&agf->agf_rmap_blocks, 1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
- xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
+ /*
+ * Since rmapbt blocks are sourced from the AGFL, they are allocated one
+ * at a time and the reservation updates don't require a transaction.
+ */
+ xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
*stat = 1;
return 0;
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 34f104ed372c..2f7413afbf46 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -177,13 +177,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_REFC_BTREE_REF 1
#define XFS_SSB_REF 0
-/*
- * Flags for xfs_trans_ichgtime().
- */
-#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
-#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
-#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
-
/* Computed inode geometry for the filesystem. */
struct xfs_ino_geometry {
/* Maximum inode count in this filesystem. */
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 69fc5b981352..3c40f37e82c7 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -68,6 +68,8 @@ xfs_trans_ichgtime(
inode_set_mtime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_CHG)
inode_set_ctime_to_ts(inode, tv);
+ if (flags & XFS_ICHGTIME_ACCESS)
+ inode_set_atime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_CREATE)
ip->i_crtime = tv;
}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6dbe6e7251e7..3dc8f785bf29 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -22,7 +22,6 @@
#include "xfs_rtbitmap.h"
#include "xfs_attr_item.h"
#include "xfs_log.h"
-#include "xfs_da_format.h"
#define _ALLOC true
#define _FREE false
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 1ad8ec63a7f4..22f5f1a9d3f0 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -26,6 +26,7 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_dir2_priv.h"
+#include "xfs_dir2.h"
#include "xfs_attr.h"
#include "xfs_reflink.h"
#include "xfs_ag.h"
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 4a0271123d94..2aa14b7ab630 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -160,7 +160,8 @@ xrep_newbt_add_blocks(
if (args->tp) {
ASSERT(xnr->oinfo.oi_offset == 0);
- error = xfs_alloc_schedule_autoreap(args, true, &resv->autoreap);
+ error = xfs_alloc_schedule_autoreap(args,
+ XFS_FREE_EXTENT_SKIP_DISCARD, &resv->autoreap);
if (error)
goto out_pag;
}
@@ -414,7 +415,7 @@ xrep_newbt_free_extent(
*/
fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
- xnr->resv, true);
+ xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
if (error)
return error;
diff --git a/fs/xfs/scrub/quota_repair.c b/fs/xfs/scrub/quota_repair.c
index 90cd1512bba9..cd51f10f2920 100644
--- a/fs/xfs/scrub/quota_repair.c
+++ b/fs/xfs/scrub/quota_repair.c
@@ -12,7 +12,6 @@
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bit.h"
-#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index be283153c254..53697f3c5e1b 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -451,7 +451,7 @@ xreap_agextent_iter(
xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
- rs->resv, true);
+ rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
if (error)
return error;
@@ -477,7 +477,7 @@ xreap_agextent_iter(
* system with large EFIs.
*/
error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
- rs->resv, true);
+ rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
if (error)
return error;
@@ -943,7 +943,8 @@ xrep_reap_bmapi_iter(
xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
-(int64_t)imap->br_blockcount);
return xfs_free_extent_later(sc->tp, imap->br_startblock,
- imap->br_blockcount, NULL, XFS_AG_RESV_NONE, true);
+ imap->br_blockcount, NULL, XFS_AG_RESV_NONE,
+ XFS_FREE_EXTENT_SKIP_DISCARD);
}
/*
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index b747b625c5ee..d390d56cd875 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -40,11 +40,16 @@ xrep_tempfile_create(
struct xfs_scrub *sc,
uint16_t mode)
{
+ struct xfs_icreate_args args = {
+ .pip = sc->mp->m_rootip,
+ .mode = mode,
+ .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE,
+ };
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = NULL;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
+ struct xfs_dquot *udqp;
+ struct xfs_dquot *gdqp;
+ struct xfs_dquot *pdqp;
struct xfs_trans_res *tres;
struct xfs_inode *dp = mp->m_rootip;
xfs_ino_t ino;
@@ -65,8 +70,7 @@ xrep_tempfile_create(
* inode should be completely root owned so that we don't fail due to
* quota limits.
*/
- error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
- XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
+ error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -87,14 +91,11 @@ xrep_tempfile_create(
error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
if (error)
goto out_trans_cancel;
- error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
- 0, false, &sc->tempip);
+ error = xfs_icreate(tp, ino, &args, &sc->tempip);
if (error)
goto out_trans_cancel;
- /* Change the ownership of the inode to root. */
- VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
- VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
+ /* We don't touch file data, so drop the realtime flags. */
sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index f6ffb4f248f7..9355ccad9503 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -10,6 +10,10 @@
#define DEBUG 1
#endif
+#ifdef CONFIG_XFS_DEBUG_EXPENSIVE
+#define DEBUG_EXPENSIVE 1
+#endif
+
#ifdef CONFIG_XFS_ASSERT_FATAL
#define XFS_ASSERT_FATAL 1
#endif
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index a19d62e78aa1..e224b49b7cff 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -324,13 +324,9 @@ xfs_bmap_update_get_group(
struct xfs_mount *mp,
struct xfs_bmap_intent *bi)
{
- xfs_agnumber_t agno;
-
if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork))
return;
- agno = XFS_FSB_TO_AGNO(mp, bi->bi_bmap.br_startblock);
-
/*
* Bump the intent count on behalf of the deferred rmap and refcount
* intent items that that we can queue when we finish this bmap work.
@@ -338,7 +334,7 @@ xfs_bmap_update_get_group(
* intent drops the intent count, ensuring that the intent count
* remains nonzero across the transaction roll.
*/
- bi->bi_pag = xfs_perag_intent_get(mp, agno);
+ bi->bi_pag = xfs_perag_intent_get(mp, bi->bi_bmap.br_startblock);
}
/* Add this deferred BUI to the transaction. */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a4d9fbc21b83..fe2e2c930975 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -808,14 +808,18 @@ xfs_flush_unmap_range(
xfs_off_t offset,
xfs_off_t len)
{
- struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
xfs_off_t rounding, start, end;
int error;
- rounding = max_t(xfs_off_t, mp->m_sb.sb_blocksize, PAGE_SIZE);
- start = round_down(offset, rounding);
- end = round_up(offset + len, rounding) - 1;
+ /*
+ * Make sure we extend the flush out to extent alignment
+ * boundaries so any extent range overlapping the start/end
+ * of the modification we are about to do is clean and idle.
+ */
+ rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE);
+ start = rounddown_64(offset, rounding);
+ end = roundup_64(offset + len, rounding) - 1;
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (error)
@@ -898,7 +902,7 @@ xfs_prepare_shift(
struct xfs_inode *ip,
loff_t offset)
{
- struct xfs_mount *mp = ip->i_mount;
+ unsigned int rounding;
int error;
/*
@@ -916,11 +920,13 @@ xfs_prepare_shift(
* with the full range of the operation. If we don't, a COW writeback
* completion could race with an insert, front merge with the start
* extent (after split) during the shift and corrupt the file. Start
- * with the block just prior to the start to stabilize the boundary.
+ * with the allocation unit just prior to the start to stabilize the
+ * boundary.
*/
- offset = round_down(offset, mp->m_sb.sb_blocksize);
+ rounding = xfs_inode_alloc_unitsize(ip);
+ offset = rounddown_64(offset, rounding);
if (offset)
- offset -= mp->m_sb.sb_blocksize;
+ offset -= rounding;
/*
* Writeback and invalidate cache for the remainder of the file as we're
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 43031842341a..47549cfa61cd 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -22,6 +22,7 @@
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_error.h"
struct kmem_cache *xfs_buf_item_cache;
@@ -781,8 +782,39 @@ xfs_buf_item_committed(
return lsn;
}
+#ifdef DEBUG_EXPENSIVE
+static int
+xfs_buf_item_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
+ struct xfs_mount *mp = bp->b_mount;
+ xfs_failaddr_t fa;
+
+ if (!bp->b_ops || !bp->b_ops->verify_struct)
+ return 0;
+ if (bip->bli_flags & XFS_BLI_STALE)
+ return 0;
+
+ fa = bp->b_ops->verify_struct(bp);
+ if (fa) {
+ xfs_buf_verifier_error(bp, -EFSCORRUPTED, bp->b_ops->name,
+ bp->b_addr, BBTOB(bp->b_length), fa);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+
+ return 0;
+}
+#else
+# define xfs_buf_item_precommit NULL
+#endif
+
static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_size = xfs_buf_item_size,
+ .iop_precommit = xfs_buf_item_precommit,
.iop_format = xfs_buf_item_format,
.iop_pin = xfs_buf_item_pin,
.iop_unpin = xfs_buf_item_unpin,
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 25fe3b932b5a..6f0fc7fe1f2b 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -20,6 +20,7 @@
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_health.h"
+#include "xfs_rtbitmap.h"
/*
* Notes on an efficient, low latency fstrim algorithm
@@ -322,7 +323,7 @@ xfs_trim_should_stop(void)
* we found in the last batch as the key to start the next.
*/
static int
-xfs_trim_extents(
+xfs_trim_perag_extents(
struct xfs_perag *pag,
xfs_agblock_t start,
xfs_agblock_t end,
@@ -383,6 +384,259 @@ xfs_trim_extents(
}
+static int
+xfs_trim_datadev_extents(
+ struct xfs_mount *mp,
+ xfs_daddr_t start,
+ xfs_daddr_t end,
+ xfs_extlen_t minlen,
+ uint64_t *blocks_trimmed)
+{
+ xfs_agnumber_t start_agno, end_agno;
+ xfs_agblock_t start_agbno, end_agbno;
+ xfs_daddr_t ddev_end;
+ struct xfs_perag *pag;
+ int last_error = 0, error;
+
+ ddev_end = min_t(xfs_daddr_t, end,
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1);
+
+ start_agno = xfs_daddr_to_agno(mp, start);
+ start_agbno = xfs_daddr_to_agbno(mp, start);
+ end_agno = xfs_daddr_to_agno(mp, ddev_end);
+ end_agbno = xfs_daddr_to_agbno(mp, ddev_end);
+
+ for_each_perag_range(mp, start_agno, end_agno, pag) {
+ xfs_agblock_t agend = pag->block_count;
+
+ if (start_agno == end_agno)
+ agend = end_agbno;
+ error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen,
+ blocks_trimmed);
+ if (error)
+ last_error = error;
+
+ if (xfs_trim_should_stop()) {
+ xfs_perag_rele(pag);
+ break;
+ }
+ start_agbno = 0;
+ }
+
+ return last_error;
+}
+
+#ifdef CONFIG_XFS_RT
+struct xfs_trim_rtdev {
+ /* list of rt extents to free */
+ struct list_head extent_list;
+
+ /* pointer to count of blocks trimmed */
+ uint64_t *blocks_trimmed;
+
+ /* minimum length that caller allows us to trim */
+ xfs_rtblock_t minlen_fsb;
+
+ /* restart point for the rtbitmap walk */
+ xfs_rtxnum_t restart_rtx;
+
+ /* stopping point for the current rtbitmap walk */
+ xfs_rtxnum_t stop_rtx;
+};
+
+struct xfs_rtx_busy {
+ struct list_head list;
+ xfs_rtblock_t bno;
+ xfs_rtblock_t length;
+};
+
+static void
+xfs_discard_free_rtdev_extents(
+ struct xfs_trim_rtdev *tr)
+{
+ struct xfs_rtx_busy *busyp, *n;
+
+ list_for_each_entry_safe(busyp, n, &tr->extent_list, list) {
+ list_del_init(&busyp->list);
+ kfree(busyp);
+ }
+}
+
+/*
+ * Walk the discard list and issue discards on all the busy extents in the
+ * list. We plug and chain the bios so that we only need a single completion
+ * call to clear all the busy extents once the discards are complete.
+ */
+static int
+xfs_discard_rtdev_extents(
+ struct xfs_mount *mp,
+ struct xfs_trim_rtdev *tr)
+{
+ struct block_device *bdev = mp->m_rtdev_targp->bt_bdev;
+ struct xfs_rtx_busy *busyp;
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ xfs_rtblock_t start = NULLRTBLOCK, length = 0;
+ int error = 0;
+
+ blk_start_plug(&plug);
+ list_for_each_entry(busyp, &tr->extent_list, list) {
+ if (start == NULLRTBLOCK)
+ start = busyp->bno;
+ length += busyp->length;
+
+ trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length);
+
+ error = __blkdev_issue_discard(bdev,
+ XFS_FSB_TO_BB(mp, busyp->bno),
+ XFS_FSB_TO_BB(mp, busyp->length),
+ GFP_NOFS, &bio);
+ if (error)
+ break;
+ }
+ xfs_discard_free_rtdev_extents(tr);
+
+ if (bio) {
+ error = submit_bio_wait(bio);
+ if (error == -EOPNOTSUPP)
+ error = 0;
+ if (error)
+ xfs_info(mp,
+ "discard failed for rtextent [0x%llx,%llu], error %d",
+ (unsigned long long)start,
+ (unsigned long long)length,
+ error);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ return error;
+}
+
+static int
+xfs_trim_gather_rtextent(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ const struct xfs_rtalloc_rec *rec,
+ void *priv)
+{
+ struct xfs_trim_rtdev *tr = priv;
+ struct xfs_rtx_busy *busyp;
+ xfs_rtblock_t rbno, rlen;
+
+ if (rec->ar_startext > tr->stop_rtx) {
+ /*
+ * If we've scanned a large number of rtbitmap blocks, update
+ * the cursor to point at this extent so we restart the next
+ * batch from this extent.
+ */
+ tr->restart_rtx = rec->ar_startext;
+ return -ECANCELED;
+ }
+
+ rbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
+ rlen = xfs_rtx_to_rtb(mp, rec->ar_extcount);
+
+ /* Ignore too small. */
+ if (rlen < tr->minlen_fsb) {
+ trace_xfs_discard_rttoosmall(mp, rbno, rlen);
+ return 0;
+ }
+
+ busyp = kzalloc(sizeof(struct xfs_rtx_busy), GFP_KERNEL);
+ if (!busyp)
+ return -ENOMEM;
+
+ busyp->bno = rbno;
+ busyp->length = rlen;
+ INIT_LIST_HEAD(&busyp->list);
+ list_add_tail(&busyp->list, &tr->extent_list);
+ *tr->blocks_trimmed += rlen;
+
+ tr->restart_rtx = rec->ar_startext + rec->ar_extcount;
+ return 0;
+}
+
+static int
+xfs_trim_rtdev_extents(
+ struct xfs_mount *mp,
+ xfs_daddr_t start,
+ xfs_daddr_t end,
+ xfs_daddr_t minlen,
+ uint64_t *blocks_trimmed)
+{
+ struct xfs_rtalloc_rec low = { };
+ struct xfs_rtalloc_rec high = { };
+ struct xfs_trim_rtdev tr = {
+ .blocks_trimmed = blocks_trimmed,
+ .minlen_fsb = XFS_BB_TO_FSB(mp, minlen),
+ };
+ struct xfs_trans *tp;
+ xfs_daddr_t rtdev_daddr;
+ int error;
+
+ INIT_LIST_HEAD(&tr.extent_list);
+
+ /* Shift the start and end downwards to match the rt device. */
+ rtdev_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (start > rtdev_daddr)
+ start -= rtdev_daddr;
+ else
+ start = 0;
+
+ if (end <= rtdev_daddr)
+ return 0;
+ end -= rtdev_daddr;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ end = min_t(xfs_daddr_t, end,
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1);
+
+ /* Convert the rt blocks to rt extents */
+ low.ar_startext = xfs_rtb_to_rtxup(mp, XFS_BB_TO_FSB(mp, start));
+ high.ar_startext = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end));
+
+ /*
+ * Walk the free ranges between low and high. The query_range function
+ * trims the extents returned.
+ */
+ do {
+ tr.stop_rtx = low.ar_startext + (mp->m_sb.sb_blocksize * NBBY);
+ xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
+ error = xfs_rtalloc_query_range(mp, tp, &low, &high,
+ xfs_trim_gather_rtextent, &tr);
+
+ if (error == -ECANCELED)
+ error = 0;
+ if (error) {
+ xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
+ xfs_discard_free_rtdev_extents(&tr);
+ break;
+ }
+
+ if (list_empty(&tr.extent_list)) {
+ xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
+ break;
+ }
+
+ error = xfs_discard_rtdev_extents(mp, &tr);
+ xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
+ if (error)
+ break;
+
+ low.ar_startext = tr.restart_rtx;
+ } while (!xfs_trim_should_stop() && low.ar_startext <= high.ar_startext);
+
+ xfs_trans_cancel(tp);
+ return error;
+}
+#else
+# define xfs_trim_rtdev_extents(m,s,e,n,b) (-EOPNOTSUPP)
+#endif /* CONFIG_XFS_RT */
+
/*
* trim a range of the filesystem.
*
@@ -391,28 +645,37 @@ xfs_trim_extents(
* addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
* is a linear address range. Hence we need to use DADDR based conversions and
* comparisons for determining the correct offset and regions to trim.
+ *
+ * The realtime device is mapped into the FITRIM "address space" immediately
+ * after the data device.
*/
int
xfs_ioc_trim(
struct xfs_mount *mp,
struct fstrim_range __user *urange)
{
- struct xfs_perag *pag;
unsigned int granularity =
bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
+ struct block_device *rt_bdev = NULL;
struct fstrim_range range;
xfs_daddr_t start, end;
xfs_extlen_t minlen;
- xfs_agnumber_t start_agno, end_agno;
- xfs_agblock_t start_agbno, end_agbno;
+ xfs_rfsblock_t max_blocks;
uint64_t blocks_trimmed = 0;
int error, last_error = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev))
+ if (mp->m_rtdev_targp &&
+ bdev_max_discard_sectors(mp->m_rtdev_targp->bt_bdev))
+ rt_bdev = mp->m_rtdev_targp->bt_bdev;
+ if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev) && !rt_bdev)
return -EOPNOTSUPP;
+ if (rt_bdev)
+ granularity = max(granularity,
+ bdev_discard_granularity(rt_bdev));
+
/*
* We haven't recovered the log, so we cannot use our bnobt-guided
* storage zapping commands.
@@ -433,35 +696,27 @@ xfs_ioc_trim(
* used by the fstrim application. In the end it really doesn't
* matter as trimming blocks is an advisory interface.
*/
- if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
+ max_blocks = mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks;
+ if (range.start >= XFS_FSB_TO_B(mp, max_blocks) ||
range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
range.len < mp->m_sb.sb_blocksize)
return -EINVAL;
start = BTOBB(range.start);
- end = min_t(xfs_daddr_t, start + BTOBBT(range.len),
- XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) - 1;
+ end = start + BTOBBT(range.len) - 1;
- start_agno = xfs_daddr_to_agno(mp, start);
- start_agbno = xfs_daddr_to_agbno(mp, start);
- end_agno = xfs_daddr_to_agno(mp, end);
- end_agbno = xfs_daddr_to_agbno(mp, end);
-
- for_each_perag_range(mp, start_agno, end_agno, pag) {
- xfs_agblock_t agend = pag->block_count;
-
- if (start_agno == end_agno)
- agend = end_agbno;
- error = xfs_trim_extents(pag, start_agbno, agend, minlen,
+ if (bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) {
+ error = xfs_trim_datadev_extents(mp, start, end, minlen,
&blocks_trimmed);
if (error)
last_error = error;
+ }
- if (xfs_trim_should_stop()) {
- xfs_perag_rele(pag);
- break;
- }
- start_agbno = 0;
+ if (rt_bdev && !xfs_trim_should_stop()) {
+ error = xfs_trim_rtdev_extents(mp, start, end, minlen,
+ &blocks_trimmed);
+ if (error)
+ last_error = error;
}
if (last_error)
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 6a1aae799cf1..7d19091215b0 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -17,6 +17,7 @@
#include "xfs_trans_priv.h"
#include "xfs_qm.h"
#include "xfs_log.h"
+#include "xfs_error.h"
static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
{
@@ -193,8 +194,38 @@ xfs_qm_dquot_logitem_committing(
return xfs_qm_dquot_logitem_release(lip);
}
+#ifdef DEBUG_EXPENSIVE
+static int
+xfs_qm_dquot_logitem_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_disk_dquot ddq = { };
+ xfs_failaddr_t fa;
+
+ xfs_dquot_to_disk(&ddq, dqp);
+ fa = xfs_dquot_verify(mp, &ddq, dqp->q_id);
+ if (fa) {
+ XFS_CORRUPTION_ERROR("Bad dquot during logging",
+ XFS_ERRLEVEL_LOW, mp, &ddq, sizeof(ddq));
+ xfs_alert(mp,
+ "Metadata corruption detected at %pS, dquot 0x%x",
+ fa, dqp->q_id);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+
+ return 0;
+}
+#else
+# define xfs_qm_dquot_logitem_precommit NULL
+#endif
+
static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = xfs_qm_dquot_logitem_size,
+ .iop_precommit = xfs_qm_dquot_logitem_precommit,
.iop_format = xfs_qm_dquot_logitem_format,
.iop_pin = xfs_qm_dquot_logitem_pin,
.iop_unpin = xfs_qm_dquot_logitem_unpin,
diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c
index 005a66be44a2..7bdb9688c0f5 100644
--- a/fs/xfs/xfs_drain.c
+++ b/fs/xfs/xfs_drain.c
@@ -94,17 +94,17 @@ static inline int xfs_defer_drain_wait(struct xfs_defer_drain *dr)
}
/*
- * Get a passive reference to an AG and declare an intent to update its
- * metadata.
+ * Get a passive reference to the AG that contains a fsbno and declare an intent
+ * to update its metadata.
*/
struct xfs_perag *
xfs_perag_intent_get(
struct xfs_mount *mp,
- xfs_agnumber_t agno)
+ xfs_fsblock_t fsbno)
{
struct xfs_perag *pag;
- pag = xfs_perag_get(mp, agno);
+ pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, fsbno));
if (!pag)
return NULL;
diff --git a/fs/xfs/xfs_drain.h b/fs/xfs/xfs_drain.h
index 50a5772a8296..775164f54ea6 100644
--- a/fs/xfs/xfs_drain.h
+++ b/fs/xfs/xfs_drain.h
@@ -62,7 +62,7 @@ void xfs_drain_wait_enable(void);
* until the item is finished or cancelled.
*/
struct xfs_perag *xfs_perag_intent_get(struct xfs_mount *mp,
- xfs_agnumber_t agno);
+ xfs_fsblock_t fsbno);
void xfs_perag_intent_put(struct xfs_perag *pag);
void xfs_perag_intent_hold(struct xfs_perag *pag);
@@ -76,7 +76,8 @@ struct xfs_defer_drain { /* empty */ };
#define xfs_defer_drain_free(dr) ((void)0)
#define xfs_defer_drain_init(dr) ((void)0)
-#define xfs_perag_intent_get(mp, agno) xfs_perag_get((mp), (agno))
+#define xfs_perag_intent_get(mp, fsbno) \
+ xfs_perag_get((mp), XFS_FSB_TO_AGNO(mp, fsbno))
#define xfs_perag_intent_put(pag) xfs_perag_put(pag)
static inline void xfs_perag_intent_hold(struct xfs_perag *pag) { }
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 8c382f092332..abffc74a924f 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -303,6 +303,11 @@ static const struct xfs_item_ops xfs_efd_item_ops = {
.iop_intent = xfs_efd_item_intent,
};
+static inline struct xfs_extent_free_item *xefi_entry(const struct list_head *e)
+{
+ return list_entry(e, struct xfs_extent_free_item, xefi_list);
+}
+
/*
* Fill the EFD with all extents from the EFI when we need to roll the
* transaction and continue with a new EFI.
@@ -331,6 +336,22 @@ xfs_efd_from_efi(
efdp->efd_next_extent = efip->efi_format.efi_nextents;
}
+static void
+xfs_efd_add_extent(
+ struct xfs_efd_log_item *efdp,
+ struct xfs_extent_free_item *xefi)
+{
+ struct xfs_extent *extp;
+
+ ASSERT(efdp->efd_next_extent < efdp->efd_format.efd_nextents);
+
+ extp = &efdp->efd_format.efd_extents[efdp->efd_next_extent];
+ extp->ext_start = xefi->xefi_startblock;
+ extp->ext_len = xefi->xefi_blockcount;
+
+ efdp->efd_next_extent++;
+}
+
/* Sort bmap items by AG. */
static int
xfs_extent_free_diff_items(
@@ -338,11 +359,8 @@ xfs_extent_free_diff_items(
const struct list_head *a,
const struct list_head *b)
{
- struct xfs_extent_free_item *ra;
- struct xfs_extent_free_item *rb;
-
- ra = container_of(a, struct xfs_extent_free_item, xefi_list);
- rb = container_of(b, struct xfs_extent_free_item, xefi_list);
+ struct xfs_extent_free_item *ra = xefi_entry(a);
+ struct xfs_extent_free_item *rb = xefi_entry(b);
return ra->xefi_pag->pag_agno - rb->xefi_pag->pag_agno;
}
@@ -418,24 +436,35 @@ xfs_extent_free_create_done(
return &efdp->efd_item;
}
-/* Take a passive ref to the AG containing the space we're freeing. */
+/* Add this deferred EFI to the transaction. */
void
-xfs_extent_free_get_group(
- struct xfs_mount *mp,
- struct xfs_extent_free_item *xefi)
+xfs_extent_free_defer_add(
+ struct xfs_trans *tp,
+ struct xfs_extent_free_item *xefi,
+ struct xfs_defer_pending **dfpp)
{
- xfs_agnumber_t agno;
+ struct xfs_mount *mp = tp->t_mountp;
+
+ trace_xfs_extent_free_defer(mp, xefi);
- agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock);
- xefi->xefi_pag = xfs_perag_intent_get(mp, agno);
+ xefi->xefi_pag = xfs_perag_intent_get(mp, xefi->xefi_startblock);
+ if (xefi->xefi_agresv == XFS_AG_RESV_AGFL)
+ *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
+ &xfs_agfl_free_defer_type);
+ else
+ *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
+ &xfs_extent_free_defer_type);
}
-/* Release a passive AG ref after some freeing work. */
-static inline void
-xfs_extent_free_put_group(
- struct xfs_extent_free_item *xefi)
+/* Cancel a free extent. */
+STATIC void
+xfs_extent_free_cancel_item(
+ struct list_head *item)
{
+ struct xfs_extent_free_item *xefi = xefi_entry(item);
+
xfs_perag_intent_put(xefi->xefi_pag);
+ kmem_cache_free(xfs_extfree_item_cache, xefi);
}
/* Process a free extent. */
@@ -447,15 +476,12 @@ xfs_extent_free_finish_item(
struct xfs_btree_cur **state)
{
struct xfs_owner_info oinfo = { };
- struct xfs_extent_free_item *xefi;
+ struct xfs_extent_free_item *xefi = xefi_entry(item);
struct xfs_efd_log_item *efdp = EFD_ITEM(done);
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_extent *extp;
- uint next_extent;
xfs_agblock_t agbno;
int error = 0;
- xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock);
oinfo.oi_owner = xefi->xefi_owner;
@@ -464,8 +490,7 @@ xfs_extent_free_finish_item(
if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK)
oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
- trace_xfs_bmap_free_deferred(tp->t_mountp, xefi->xefi_pag->pag_agno, 0,
- agbno, xefi->xefi_blockcount);
+ trace_xfs_extent_free_deferred(mp, xefi);
/*
* If we need a new transaction to make progress, the caller will log a
@@ -482,16 +507,8 @@ xfs_extent_free_finish_item(
return error;
}
- /* Add the work we finished to the EFD, even though nobody uses that */
- next_extent = efdp->efd_next_extent;
- ASSERT(next_extent < efdp->efd_format.efd_nextents);
- extp = &(efdp->efd_format.efd_extents[next_extent]);
- extp->ext_start = xefi->xefi_startblock;
- extp->ext_len = xefi->xefi_blockcount;
- efdp->efd_next_extent++;
-
- xfs_extent_free_put_group(xefi);
- kmem_cache_free(xfs_extfree_item_cache, xefi);
+ xfs_efd_add_extent(efdp, xefi);
+ xfs_extent_free_cancel_item(item);
return error;
}
@@ -503,19 +520,6 @@ xfs_extent_free_abort_intent(
xfs_efi_release(EFI_ITEM(intent));
}
-/* Cancel a free extent. */
-STATIC void
-xfs_extent_free_cancel_item(
- struct list_head *item)
-{
- struct xfs_extent_free_item *xefi;
-
- xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
-
- xfs_extent_free_put_group(xefi);
- kmem_cache_free(xfs_extfree_item_cache, xefi);
-}
-
/*
* AGFL blocks are accounted differently in the reserve pools and are not
* inserted into the busy extent list.
@@ -530,35 +534,24 @@ xfs_agfl_free_finish_item(
struct xfs_owner_info oinfo = { };
struct xfs_mount *mp = tp->t_mountp;
struct xfs_efd_log_item *efdp = EFD_ITEM(done);
- struct xfs_extent_free_item *xefi;
- struct xfs_extent *extp;
+ struct xfs_extent_free_item *xefi = xefi_entry(item);
struct xfs_buf *agbp;
int error;
xfs_agblock_t agbno;
- uint next_extent;
- xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
ASSERT(xefi->xefi_blockcount == 1);
agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock);
oinfo.oi_owner = xefi->xefi_owner;
- trace_xfs_agfl_free_deferred(mp, xefi->xefi_pag->pag_agno, 0, agbno,
- xefi->xefi_blockcount);
+ trace_xfs_agfl_free_deferred(mp, xefi);
error = xfs_alloc_read_agf(xefi->xefi_pag, tp, 0, &agbp);
if (!error)
- error = xfs_free_agfl_block(tp, xefi->xefi_pag->pag_agno,
- agbno, agbp, &oinfo);
+ error = xfs_free_ag_extent(tp, agbp, xefi->xefi_pag->pag_agno,
+ agbno, 1, &oinfo, XFS_AG_RESV_AGFL);
- next_extent = efdp->efd_next_extent;
- ASSERT(next_extent < efdp->efd_format.efd_nextents);
- extp = &(efdp->efd_format.efd_extents[next_extent]);
- extp->ext_start = xefi->xefi_startblock;
- extp->ext_len = xefi->xefi_blockcount;
- efdp->efd_next_extent++;
-
- xfs_extent_free_put_group(xefi);
- kmem_cache_free(xfs_extfree_item_cache, xefi);
+ xfs_efd_add_extent(efdp, xefi);
+ xfs_extent_free_cancel_item(&xefi->xefi_list);
return error;
}
@@ -585,7 +578,7 @@ xfs_efi_recover_work(
xefi->xefi_blockcount = extp->ext_len;
xefi->xefi_agresv = XFS_AG_RESV_NONE;
xefi->xefi_owner = XFS_RMAP_OWN_UNKNOWN;
- xfs_extent_free_get_group(mp, xefi);
+ xefi->xefi_pag = xfs_perag_intent_get(mp, extp->ext_start);
xfs_defer_add_item(dfp, &xefi->xefi_list);
}
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index da6a5afa607c..41b7c4306079 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -88,4 +88,10 @@ xfs_efd_log_item_sizeof(
extern struct kmem_cache *xfs_efi_cache;
extern struct kmem_cache *xfs_efd_cache;
+struct xfs_extent_free_item;
+
+void xfs_extent_free_defer_add(struct xfs_trans *tp,
+ struct xfs_extent_free_item *xefi,
+ struct xfs_defer_pending **dfpp);
+
#endif /* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index b240ea5241dc..4cdc54dc9686 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -213,29 +213,18 @@ xfs_ilock_iocb_for_write(
if (ret)
return ret;
- if (*lock_mode == XFS_IOLOCK_EXCL)
- return 0;
- if (!xfs_iflags_test(ip, XFS_IREMAPPING))
- return 0;
-
- xfs_iunlock(ip, *lock_mode);
- *lock_mode = XFS_IOLOCK_EXCL;
- return xfs_ilock_iocb(iocb, *lock_mode);
-}
-
-static unsigned int
-xfs_ilock_for_write_fault(
- struct xfs_inode *ip)
-{
- /* get a shared lock if no remapping in progress */
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- if (!xfs_iflags_test(ip, XFS_IREMAPPING))
- return XFS_MMAPLOCK_SHARED;
+ /*
+ * If a reflink remap is in progress we always need to take the iolock
+ * exclusively to wait for it to finish.
+ */
+ if (*lock_mode == XFS_IOLOCK_SHARED &&
+ xfs_iflags_test(ip, XFS_IREMAPPING)) {
+ xfs_iunlock(ip, *lock_mode);
+ *lock_mode = XFS_IOLOCK_EXCL;
+ return xfs_ilock_iocb(iocb, *lock_mode);
+ }
- /* wait for remapping to complete */
- xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
- xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- return XFS_MMAPLOCK_EXCL;
+ return 0;
}
STATIC ssize_t
@@ -1247,31 +1236,77 @@ xfs_file_llseek(
return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
}
-#ifdef CONFIG_FS_DAX
static inline vm_fault_t
-xfs_dax_fault(
+xfs_dax_fault_locked(
struct vm_fault *vmf,
unsigned int order,
- bool write_fault,
- pfn_t *pfn)
+ bool write_fault)
{
- return dax_iomap_fault(vmf, order, pfn, NULL,
+ vm_fault_t ret;
+ pfn_t pfn;
+
+ if (!IS_ENABLED(CONFIG_FS_DAX)) {
+ ASSERT(0);
+ return VM_FAULT_SIGBUS;
+ }
+ ret = dax_iomap_fault(vmf, order, &pfn, NULL,
(write_fault && !vmf->cow_page) ?
&xfs_dax_write_iomap_ops :
&xfs_read_iomap_ops);
+ if (ret & VM_FAULT_NEEDDSYNC)
+ ret = dax_finish_sync_fault(vmf, order, pfn);
+ return ret;
}
-#else
-static inline vm_fault_t
-xfs_dax_fault(
+
+static vm_fault_t
+xfs_dax_read_fault(
struct vm_fault *vmf,
- unsigned int order,
- bool write_fault,
- pfn_t *pfn)
+ unsigned int order)
{
- ASSERT(0);
- return VM_FAULT_SIGBUS;
+ struct xfs_inode *ip = XFS_I(file_inode(vmf->vma->vm_file));
+ vm_fault_t ret;
+
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ ret = xfs_dax_fault_locked(vmf, order, false);
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+ return ret;
+}
+
+static vm_fault_t
+xfs_write_fault(
+ struct vm_fault *vmf,
+ unsigned int order)
+{
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ struct xfs_inode *ip = XFS_I(inode);
+ unsigned int lock_mode = XFS_MMAPLOCK_SHARED;
+ vm_fault_t ret;
+
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vmf->vma->vm_file);
+
+ /*
+ * Normally we only need the shared mmaplock, but if a reflink remap is
+ * in progress we take the exclusive lock to wait for the remap to
+ * finish before taking a write fault.
+ */
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ if (xfs_iflags_test(ip, XFS_IREMAPPING)) {
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ lock_mode = XFS_MMAPLOCK_EXCL;
+ }
+
+ if (IS_DAX(inode))
+ ret = xfs_dax_fault_locked(vmf, order, true);
+ else
+ ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
+ xfs_iunlock(ip, lock_mode);
+
+ sb_end_pagefault(inode->i_sb);
+ return ret;
}
-#endif
/*
* Locking for serialisation of IO during page faults. This results in a lock
@@ -1290,38 +1325,14 @@ __xfs_filemap_fault(
bool write_fault)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
- struct xfs_inode *ip = XFS_I(inode);
- vm_fault_t ret;
- unsigned int lock_mode = 0;
-
- trace_xfs_filemap_fault(ip, order, write_fault);
-
- if (write_fault) {
- sb_start_pagefault(inode->i_sb);
- file_update_time(vmf->vma->vm_file);
- }
-
- if (IS_DAX(inode) || write_fault)
- lock_mode = xfs_ilock_for_write_fault(XFS_I(inode));
- if (IS_DAX(inode)) {
- pfn_t pfn;
-
- ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
- if (ret & VM_FAULT_NEEDDSYNC)
- ret = dax_finish_sync_fault(vmf, order, pfn);
- } else if (write_fault) {
- ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
- } else {
- ret = filemap_fault(vmf);
- }
-
- if (lock_mode)
- xfs_iunlock(XFS_I(inode), lock_mode);
+ trace_xfs_filemap_fault(XFS_I(inode), order, write_fault);
if (write_fault)
- sb_end_pagefault(inode->i_sb);
- return ret;
+ return xfs_write_fault(vmf, order);
+ if (IS_DAX(inode))
+ return xfs_dax_read_fault(vmf, order);
+ return filemap_fault(vmf);
}
static inline bool
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index a3f16e9b6fe5..cf5acbd3c7ca 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -21,7 +21,6 @@
#include "xfs_attr.h"
#include "xfs_ioctl.h"
#include "xfs_parent.h"
-#include "xfs_da_btree.h"
#include "xfs_handle.h"
#include "xfs_health.h"
#include "xfs_icache.h"
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a4e3cd8971fc..7dc6f326936c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,55 +42,11 @@
#include "xfs_pnfs.h"
#include "xfs_parent.h"
#include "xfs_xattr.h"
-#include "xfs_sb.h"
+#include "xfs_inode_util.h"
struct kmem_cache *xfs_inode_cache;
/*
- * helper function to extract extent size hint from inode
- */
-xfs_extlen_t
-xfs_get_extsz_hint(
- struct xfs_inode *ip)
-{
- /*
- * No point in aligning allocations if we need to COW to actually
- * write to them.
- */
- if (xfs_is_always_cow_inode(ip))
- return 0;
- if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
- return ip->i_extsize;
- if (XFS_IS_REALTIME_INODE(ip) &&
- ip->i_mount->m_sb.sb_rextsize > 1)
- return ip->i_mount->m_sb.sb_rextsize;
- return 0;
-}
-
-/*
- * Helper function to extract CoW extent size hint from inode.
- * Between the extent size hint and the CoW extent size hint, we
- * return the greater of the two. If the value is zero (automatic),
- * use the default size.
- */
-xfs_extlen_t
-xfs_get_cowextsz_hint(
- struct xfs_inode *ip)
-{
- xfs_extlen_t a, b;
-
- a = 0;
- if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
- a = ip->i_cowextsize;
- b = xfs_get_extsz_hint(ip);
-
- a = max(a, b);
- if (a == 0)
- return XFS_DEFAULT_COWEXTSZ_HINT;
- return a;
-}
-
-/*
* These two are wrapper routines around the xfs_ilock() routine used to
* centralize some grungy code. They are used in places that wish to lock the
* inode solely for reading the extents. The reason these places can't just
@@ -567,55 +523,6 @@ xfs_lock_two_inodes(
}
}
-uint
-xfs_ip2xflags(
- struct xfs_inode *ip)
-{
- uint flags = 0;
-
- if (ip->i_diflags & XFS_DIFLAG_ANY) {
- if (ip->i_diflags & XFS_DIFLAG_REALTIME)
- flags |= FS_XFLAG_REALTIME;
- if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
- flags |= FS_XFLAG_PREALLOC;
- if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
- flags |= FS_XFLAG_IMMUTABLE;
- if (ip->i_diflags & XFS_DIFLAG_APPEND)
- flags |= FS_XFLAG_APPEND;
- if (ip->i_diflags & XFS_DIFLAG_SYNC)
- flags |= FS_XFLAG_SYNC;
- if (ip->i_diflags & XFS_DIFLAG_NOATIME)
- flags |= FS_XFLAG_NOATIME;
- if (ip->i_diflags & XFS_DIFLAG_NODUMP)
- flags |= FS_XFLAG_NODUMP;
- if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
- flags |= FS_XFLAG_RTINHERIT;
- if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
- flags |= FS_XFLAG_PROJINHERIT;
- if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
- flags |= FS_XFLAG_NOSYMLINKS;
- if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
- flags |= FS_XFLAG_EXTSIZE;
- if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
- flags |= FS_XFLAG_EXTSZINHERIT;
- if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
- flags |= FS_XFLAG_NODEFRAG;
- if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
- flags |= FS_XFLAG_FILESTREAM;
- }
-
- if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
- if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
- flags |= FS_XFLAG_DAX;
- if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
- flags |= FS_XFLAG_COWEXTSIZE;
- }
-
- if (xfs_inode_has_attr_fork(ip))
- flags |= FS_XFLAG_HASATTR;
- return flags;
-}
-
/*
* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
* is allowed, otherwise it has to be an exact match. If a CI match is found,
@@ -657,97 +564,6 @@ out_unlock:
return error;
}
-/* Propagate di_flags from a parent inode to a child inode. */
-static void
-xfs_inode_inherit_flags(
- struct xfs_inode *ip,
- const struct xfs_inode *pip)
-{
- unsigned int di_flags = 0;
- xfs_failaddr_t failaddr;
- umode_t mode = VFS_I(ip)->i_mode;
-
- if (S_ISDIR(mode)) {
- if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- ip->i_extsize = pip->i_extsize;
- }
- if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- } else if (S_ISREG(mode)) {
- if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- xfs_has_realtime(ip->i_mount))
- di_flags |= XFS_DIFLAG_REALTIME;
- if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
- di_flags |= XFS_DIFLAG_EXTSIZE;
- ip->i_extsize = pip->i_extsize;
- }
- }
- if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
- xfs_inherit_noatime)
- di_flags |= XFS_DIFLAG_NOATIME;
- if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
- xfs_inherit_nodump)
- di_flags |= XFS_DIFLAG_NODUMP;
- if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
- xfs_inherit_sync)
- di_flags |= XFS_DIFLAG_SYNC;
- if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
- xfs_inherit_nosymlinks)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
- xfs_inherit_nodefrag)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
-
- ip->i_diflags |= di_flags;
-
- /*
- * Inode verifiers on older kernels only check that the extent size
- * hint is an integer multiple of the rt extent size on realtime files.
- * They did not check the hint alignment on a directory with both
- * rtinherit and extszinherit flags set. If the misaligned hint is
- * propagated from a directory into a new realtime file, new file
- * allocations will fail due to math errors in the rt allocator and/or
- * trip the verifiers. Validate the hint settings in the new file so
- * that we don't let broken hints propagate.
- */
- failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
- VFS_I(ip)->i_mode, ip->i_diflags);
- if (failaddr) {
- ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
- XFS_DIFLAG_EXTSZINHERIT);
- ip->i_extsize = 0;
- }
-}
-
-/* Propagate di_flags2 from a parent inode to a child inode. */
-static void
-xfs_inode_inherit_flags2(
- struct xfs_inode *ip,
- const struct xfs_inode *pip)
-{
- xfs_failaddr_t failaddr;
-
- if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
- ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
- ip->i_cowextsize = pip->i_cowextsize;
- }
- if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
- ip->i_diflags2 |= XFS_DIFLAG2_DAX;
-
- /* Don't let invalid cowextsize hints propagate. */
- failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
- VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
- if (failaddr) {
- ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
- ip->i_cowextsize = 0;
- }
-}
-
/*
* Initialise a newly allocated inode and return the in-core inode to the
* caller locked exclusively.
@@ -755,39 +571,15 @@ xfs_inode_inherit_flags2(
* Caller is responsible for unlocking the inode manually upon return
*/
int
-xfs_init_new_inode(
- struct mnt_idmap *idmap,
+xfs_icreate(
struct xfs_trans *tp,
- struct xfs_inode *pip,
xfs_ino_t ino,
- umode_t mode,
- xfs_nlink_t nlink,
- dev_t rdev,
- prid_t prid,
- bool init_xattrs,
+ const struct xfs_icreate_args *args,
struct xfs_inode **ipp)
{
- struct inode *dir = pip ? VFS_I(pip) : NULL;
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_inode *ip;
- unsigned int flags;
+ struct xfs_inode *ip = NULL;
int error;
- struct timespec64 tv;
- struct inode *inode;
-
- /*
- * Protect against obviously corrupt allocation btree records. Later
- * xfs_iget checks will catch re-allocation of other active in-memory
- * and on-disk inodes. If we don't catch reallocating the parent inode
- * here we will deadlock in xfs_iget() so we have to do these checks
- * first.
- */
- if ((pip && ino == pip->i_ino) || !xfs_verify_dir_ino(mp, ino)) {
- xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
- xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino),
- XFS_SICK_AG_INOBT);
- return -EFSCORRUPTED;
- }
/*
* Get the in-core inode with the lock held exclusively to prevent
@@ -798,96 +590,8 @@ xfs_init_new_inode(
return error;
ASSERT(ip != NULL);
- inode = VFS_I(ip);
- set_nlink(inode, nlink);
- inode->i_rdev = rdev;
- ip->i_projid = prid;
-
- if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
- inode_fsuid_set(inode, idmap);
- inode->i_gid = dir->i_gid;
- inode->i_mode = mode;
- } else {
- inode_init_owner(idmap, inode, dir, mode);
- }
-
- /*
- * If the group ID of the new file does not match the effective group
- * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
- * (and only if the irix_sgid_inherit compatibility variable is set).
- */
- if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
- !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)))
- inode->i_mode &= ~S_ISGID;
-
- ip->i_disk_size = 0;
- ip->i_df.if_nextents = 0;
- ASSERT(ip->i_nblocks == 0);
-
- tv = inode_set_ctime_current(inode);
- inode_set_mtime_to_ts(inode, tv);
- inode_set_atime_to_ts(inode, tv);
-
- ip->i_extsize = 0;
- ip->i_diflags = 0;
-
- if (xfs_has_v3inodes(mp)) {
- inode_set_iversion(inode, 1);
- ip->i_cowextsize = 0;
- ip->i_crtime = tv;
- }
-
- flags = XFS_ILOG_CORE;
- switch (mode & S_IFMT) {
- case S_IFIFO:
- case S_IFCHR:
- case S_IFBLK:
- case S_IFSOCK:
- ip->i_df.if_format = XFS_DINODE_FMT_DEV;
- flags |= XFS_ILOG_DEV;
- break;
- case S_IFREG:
- case S_IFDIR:
- if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
- xfs_inode_inherit_flags(ip, pip);
- if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
- xfs_inode_inherit_flags2(ip, pip);
- fallthrough;
- case S_IFLNK:
- ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
- ip->i_df.if_bytes = 0;
- ip->i_df.if_data = NULL;
- break;
- default:
- ASSERT(0);
- }
-
- /*
- * If we need to create attributes immediately after allocating the
- * inode, initialise an empty attribute fork right now. We use the
- * default fork offset for attributes here as we don't know exactly what
- * size or how many attributes we might be adding. We can do this
- * safely here because we know the data fork is completely empty and
- * this saves us from needing to run a separate transaction to set the
- * fork offset in the immediate future.
- */
- if (init_xattrs) {
- ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
- xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
-
- if (!xfs_has_attr(mp)) {
- spin_lock(&mp->m_sb_lock);
- xfs_add_attr(mp);
- spin_unlock(&mp->m_sb_lock);
- xfs_log_sb(tp);
- }
- }
-
- /*
- * Log the new values stuffed into the inode.
- */
xfs_trans_ijoin(tp, ip, 0);
- xfs_trans_log_inode(tp, ip, flags);
+ xfs_inode_init(tp, args, ip);
/* now that we have an i_mode we can setup the inode structure */
xfs_setup_inode(ip);
@@ -896,158 +600,60 @@ xfs_init_new_inode(
return 0;
}
-/*
- * Decrement the link count on an inode & log the change. If this causes the
- * link count to go to zero, move the inode to AGI unlinked list so that it can
- * be freed when the last active reference goes away via xfs_inactive().
- */
+/* Return dquots for the ids that will be assigned to a new file. */
int
-xfs_droplink(
- struct xfs_trans *tp,
- struct xfs_inode *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-
- if (inode->i_nlink == 0) {
- xfs_info_ratelimited(tp->t_mountp,
- "Inode 0x%llx link count dropped below zero. Pinning link count.",
- ip->i_ino);
- set_nlink(inode, XFS_NLINK_PINNED);
- }
- if (inode->i_nlink != XFS_NLINK_PINNED)
- drop_nlink(inode);
-
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- if (inode->i_nlink)
- return 0;
-
- return xfs_iunlink(tp, ip);
-}
-
-/*
- * Increment the link count on an inode & log the change.
- */
-void
-xfs_bumplink(
- struct xfs_trans *tp,
- struct xfs_inode *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-
- if (inode->i_nlink == XFS_NLINK_PINNED - 1)
- xfs_info_ratelimited(tp->t_mountp,
- "Inode 0x%llx link count exceeded maximum. Pinning link count.",
- ip->i_ino);
- if (inode->i_nlink != XFS_NLINK_PINNED)
- inc_nlink(inode);
-
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-}
-
-#ifdef CONFIG_XFS_LIVE_HOOKS
-/*
- * Use a static key here to reduce the overhead of directory live update hooks.
- * If the compiler supports jump labels, the static branch will be replaced by
- * a nop sled when there are no hook users. Online fsck is currently the only
- * caller, so this is a reasonable tradeoff.
- *
- * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
- * parts of the kernel allocate memory with that lock held, which means that
- * XFS callers cannot hold any locks that might be used by memory reclaim or
- * writeback when calling the static_branch_{inc,dec} functions.
- */
-DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch);
-
-void
-xfs_dir_hook_disable(void)
-{
- xfs_hooks_switch_off(&xfs_dir_hooks_switch);
-}
-
-void
-xfs_dir_hook_enable(void)
-{
- xfs_hooks_switch_on(&xfs_dir_hooks_switch);
-}
-
-/* Call hooks for a directory update relating to a child dirent update. */
-inline void
-xfs_dir_update_hook(
- struct xfs_inode *dp,
- struct xfs_inode *ip,
- int delta,
- const struct xfs_name *name)
-{
- if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) {
- struct xfs_dir_update_params p = {
- .dp = dp,
- .ip = ip,
- .delta = delta,
- .name = name,
- };
- struct xfs_mount *mp = ip->i_mount;
-
- xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p);
+xfs_icreate_dqalloc(
+ const struct xfs_icreate_args *args,
+ struct xfs_dquot **udqpp,
+ struct xfs_dquot **gdqpp,
+ struct xfs_dquot **pdqpp)
+{
+ struct inode *dir = VFS_I(args->pip);
+ kuid_t uid = GLOBAL_ROOT_UID;
+ kgid_t gid = GLOBAL_ROOT_GID;
+ prid_t prid = 0;
+ unsigned int flags = XFS_QMOPT_QUOTALL;
+
+ if (args->idmap) {
+ /*
+ * The uid/gid computation code must match what the VFS uses to
+ * assign i_[ug]id. INHERIT adjusts the gid computation for
+ * setgid/grpid systems.
+ */
+ uid = mapped_fsuid(args->idmap, i_user_ns(dir));
+ gid = mapped_fsgid(args->idmap, i_user_ns(dir));
+ prid = xfs_get_initial_prid(args->pip);
+ flags |= XFS_QMOPT_INHERIT;
}
-}
-/* Call the specified function during a directory update. */
-int
-xfs_dir_hook_add(
- struct xfs_mount *mp,
- struct xfs_dir_hook *hook)
-{
- return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook);
-}
+ *udqpp = *gdqpp = *pdqpp = NULL;
-/* Stop calling the specified function during a directory update. */
-void
-xfs_dir_hook_del(
- struct xfs_mount *mp,
- struct xfs_dir_hook *hook)
-{
- xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook);
+ return xfs_qm_vop_dqalloc(args->pip, uid, gid, prid, flags, udqpp,
+ gdqpp, pdqpp);
}
-/* Configure directory update hook functions. */
-void
-xfs_dir_hook_setup(
- struct xfs_dir_hook *hook,
- notifier_fn_t mod_fn)
-{
- xfs_hook_setup(&hook->dirent_hook, mod_fn);
-}
-#endif /* CONFIG_XFS_LIVE_HOOKS */
-
int
xfs_create(
- struct mnt_idmap *idmap,
- struct xfs_inode *dp,
+ const struct xfs_icreate_args *args,
struct xfs_name *name,
- umode_t mode,
- dev_t rdev,
- bool init_xattrs,
- xfs_inode_t **ipp)
+ struct xfs_inode **ipp)
{
- int is_dir = S_ISDIR(mode);
+ struct xfs_inode *dp = args->pip;
+ struct xfs_dir_update du = {
+ .dp = dp,
+ .name = name,
+ };
struct xfs_mount *mp = dp->i_mount;
- struct xfs_inode *ip = NULL;
struct xfs_trans *tp = NULL;
- int error;
- bool unlock_dp_on_error = false;
- prid_t prid;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
+ struct xfs_dquot *udqp;
+ struct xfs_dquot *gdqp;
+ struct xfs_dquot *pdqp;
struct xfs_trans_res *tres;
- uint resblks;
xfs_ino_t ino;
- struct xfs_parent_args *ppargs;
+ bool unlock_dp_on_error = false;
+ bool is_dir = S_ISDIR(args->mode);
+ uint resblks;
+ int error;
trace_xfs_create(dp, name);
@@ -1056,15 +662,8 @@ xfs_create(
if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
return -EIO;
- prid = xfs_get_initial_prid(dp);
-
- /*
- * Make sure that we have allocated dquot(s) on disk.
- */
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
- mapped_fsgid(idmap, &init_user_ns), prid,
- XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
- &udqp, &gdqp, &pdqp);
+ /* Make sure that we have allocated dquot(s) on disk. */
+ error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -1076,7 +675,7 @@ xfs_create(
tres = &M_RES(mp)->tr_create;
}
- error = xfs_parent_start(mp, &ppargs);
+ error = xfs_parent_start(mp, &du.ppargs);
if (error)
goto out_release_dquots;
@@ -1105,10 +704,9 @@ xfs_create(
* entry pointing to them, but a directory also the "." entry
* pointing to itself.
*/
- error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino);
if (!error)
- error = xfs_init_new_inode(idmap, tp, dp, ino, mode,
- is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip);
+ error = xfs_icreate(tp, ino, args, &du.ip);
if (error)
goto out_trans_cancel;
@@ -1121,38 +719,9 @@ xfs_create(
*/
xfs_trans_ijoin(tp, dp, 0);
- error = xfs_dir_createname(tp, dp, name, ip->i_ino,
- resblks - XFS_IALLOC_SPACE_RES(mp));
- if (error) {
- ASSERT(error != -ENOSPC);
+ error = xfs_dir_create_child(tp, resblks, &du);
+ if (error)
goto out_trans_cancel;
- }
- xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-
- if (is_dir) {
- error = xfs_dir_init(tp, ip, dp);
- if (error)
- goto out_trans_cancel;
-
- xfs_bumplink(tp, dp);
- }
-
- /*
- * If we have parent pointers, we need to add the attribute containing
- * the parent information now.
- */
- if (ppargs) {
- error = xfs_parent_addname(tp, ppargs, dp, name, ip);
- if (error)
- goto out_trans_cancel;
- }
-
- /*
- * Create ip with a reference from dp, and add '.' and '..' references
- * if it's a directory.
- */
- xfs_dir_update_hook(dp, ip, 1, name);
/*
* If this is a synchronous mount, make sure that the
@@ -1167,7 +736,7 @@ xfs_create(
* These ids of the inode couldn't have changed since the new
* inode has been locked ever since it was created.
*/
- xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+ xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp);
error = xfs_trans_commit(tp);
if (error)
@@ -1177,10 +746,10 @@ xfs_create(
xfs_qm_dqrele(gdqp);
xfs_qm_dqrele(pdqp);
- *ipp = ip;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ *ipp = du.ip;
+ xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
return 0;
out_trans_cancel:
@@ -1191,13 +760,13 @@ xfs_create(
* setup of the inode and release the inode. This prevents recursive
* transactions and deadlocks from xfs_inactive.
*/
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_finish_inode_setup(ip);
- xfs_irele(ip);
+ if (du.ip) {
+ xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
+ xfs_finish_inode_setup(du.ip);
+ xfs_irele(du.ip);
}
out_parent:
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
out_release_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
@@ -1210,36 +779,28 @@ xfs_create(
int
xfs_create_tmpfile(
- struct mnt_idmap *idmap,
- struct xfs_inode *dp,
- umode_t mode,
- bool init_xattrs,
+ const struct xfs_icreate_args *args,
struct xfs_inode **ipp)
{
+ struct xfs_inode *dp = args->pip;
struct xfs_mount *mp = dp->i_mount;
struct xfs_inode *ip = NULL;
struct xfs_trans *tp = NULL;
- int error;
- prid_t prid;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
+ struct xfs_dquot *udqp;
+ struct xfs_dquot *gdqp;
+ struct xfs_dquot *pdqp;
struct xfs_trans_res *tres;
- uint resblks;
xfs_ino_t ino;
+ uint resblks;
+ int error;
+
+ ASSERT(args->flags & XFS_ICREATE_TMPFILE);
if (xfs_is_shutdown(mp))
return -EIO;
- prid = xfs_get_initial_prid(dp);
-
- /*
- * Make sure that we have allocated dquot(s) on disk.
- */
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
- mapped_fsgid(idmap, &init_user_ns), prid,
- XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
- &udqp, &gdqp, &pdqp);
+ /* Make sure that we have allocated dquot(s) on disk. */
+ error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -1251,10 +812,9 @@ xfs_create_tmpfile(
if (error)
goto out_release_dquots;
- error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino);
if (!error)
- error = xfs_init_new_inode(idmap, tp, dp, ino, mode,
- 0, 0, prid, init_xattrs, &ip);
+ error = xfs_icreate(tp, ino, args, &ip);
if (error)
goto out_trans_cancel;
@@ -1311,11 +871,15 @@ xfs_link(
struct xfs_inode *sip,
struct xfs_name *target_name)
{
+ struct xfs_dir_update du = {
+ .dp = tdp,
+ .name = target_name,
+ .ip = sip,
+ };
struct xfs_mount *mp = tdp->i_mount;
struct xfs_trans *tp;
int error, nospace_error = 0;
int resblks;
- struct xfs_parent_args *ppargs;
trace_xfs_link(tdp, target_name);
@@ -1334,7 +898,7 @@ xfs_link(
if (error)
goto std_return;
- error = xfs_parent_start(mp, &ppargs);
+ error = xfs_parent_start(mp, &du.ppargs);
if (error)
goto std_return;
@@ -1349,7 +913,7 @@ xfs_link(
* pointers are enabled because we can't back out if the xattrs must
* grow.
*/
- if (ppargs && nospace_error) {
+ if (du.ppargs && nospace_error) {
error = nospace_error;
goto error_return;
}
@@ -1376,47 +940,9 @@ xfs_link(
}
}
- if (!resblks) {
- error = xfs_dir_canenter(tp, tdp, target_name);
- if (error)
- goto error_return;
- }
-
- /*
- * Handle initial link state of O_TMPFILE inode
- */
- if (VFS_I(sip)->i_nlink == 0) {
- struct xfs_perag *pag;
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino));
- error = xfs_iunlink_remove(tp, pag, sip);
- xfs_perag_put(pag);
- if (error)
- goto error_return;
- }
-
- error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
- resblks);
+ error = xfs_dir_add_child(tp, resblks, &du);
if (error)
goto error_return;
- xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
-
- xfs_bumplink(tp, sip);
-
- /*
- * If we have parent pointers, we now need to add the parent record to
- * the attribute fork of the inode. If this is the initial parent
- * attribute, we need to create it correctly, otherwise we can just add
- * the parent to the inode.
- */
- if (ppargs) {
- error = xfs_parent_addname(tp, ppargs, tdp, target_name, sip);
- if (error)
- goto error_return;
- }
-
- xfs_dir_update_hook(tdp, sip, 1, target_name);
/*
* If this is a synchronous mount, make sure that the
@@ -1429,7 +955,7 @@ xfs_link(
error = xfs_trans_commit(tp);
xfs_iunlock(tdp, XFS_ILOCK_EXCL);
xfs_iunlock(sip, XFS_ILOCK_EXCL);
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
return error;
error_return:
@@ -1437,7 +963,7 @@ xfs_link(
xfs_iunlock(tdp, XFS_ILOCK_EXCL);
xfs_iunlock(sip, XFS_ILOCK_EXCL);
out_parent:
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
std_return:
if (error == -ENOSPC && nospace_error)
error = nospace_error;
@@ -2024,39 +1550,6 @@ out:
}
/*
- * In-Core Unlinked List Lookups
- * =============================
- *
- * Every inode is supposed to be reachable from some other piece of metadata
- * with the exception of the root directory. Inodes with a connection to a
- * file descriptor but not linked from anywhere in the on-disk directory tree
- * are collectively known as unlinked inodes, though the filesystem itself
- * maintains links to these inodes so that on-disk metadata are consistent.
- *
- * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
- * header contains a number of buckets that point to an inode, and each inode
- * record has a pointer to the next inode in the hash chain. This
- * singly-linked list causes scaling problems in the iunlink remove function
- * because we must walk that list to find the inode that points to the inode
- * being removed from the unlinked hash bucket list.
- *
- * Hence we keep an in-memory double linked list to link each inode on an
- * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
- * based lists would require having 64 list heads in the perag, one for each
- * list. This is expensive in terms of memory (think millions of AGs) and cache
- * misses on lookups. Instead, use the fact that inodes on the unlinked list
- * must be referenced at the VFS level to keep them on the list and hence we
- * have an existence guarantee for inodes on the unlinked list.
- *
- * Given we have an existence guarantee, we can use lockless inode cache lookups
- * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
- * for the double linked unlinked list, and we don't need any extra locking to
- * keep the list safe as all manipulations are done under the AGI buffer lock.
- * Keeping the list up to date does not require memory allocation, just finding
- * the XFS inode and updating the next/prev unlinked list aginos.
- */
-
-/*
* Find an inode on the unlinked list. This does not take references to the
* inode as we have existence guarantees by holding the AGI buffer lock and that
* only unlinked, referenced inodes can be on the unlinked inode list. If we
@@ -2091,75 +1584,11 @@ xfs_iunlink_lookup(
}
/*
- * Update the prev pointer of the next agino. Returns -ENOLINK if the inode
- * is not in cache.
- */
-static int
-xfs_iunlink_update_backref(
- struct xfs_perag *pag,
- xfs_agino_t prev_agino,
- xfs_agino_t next_agino)
-{
- struct xfs_inode *ip;
-
- /* No update necessary if we are at the end of the list. */
- if (next_agino == NULLAGINO)
- return 0;
-
- ip = xfs_iunlink_lookup(pag, next_agino);
- if (!ip)
- return -ENOLINK;
-
- ip->i_prev_unlinked = prev_agino;
- return 0;
-}
-
-/*
- * Point the AGI unlinked bucket at an inode and log the results. The caller
- * is responsible for validating the old value.
- */
-STATIC int
-xfs_iunlink_update_bucket(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- unsigned int bucket_index,
- xfs_agino_t new_agino)
-{
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t old_value;
- int offset;
-
- ASSERT(xfs_verify_agino_or_null(pag, new_agino));
-
- old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
- old_value, new_agino);
-
- /*
- * We should never find the head of the list already set to the value
- * passed in because either we're adding or removing ourselves from the
- * head of the list.
- */
- if (old_value == new_agino) {
- xfs_buf_mark_corrupt(agibp);
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
- offset = offsetof(struct xfs_agi, agi_unlinked) +
- (sizeof(xfs_agino_t) * bucket_index);
- xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
- return 0;
-}
-
-/*
* Load the inode @next_agino into the cache and set its prev_unlinked pointer
* to @prev_agino. Caller must hold the AGI to synchronize with other changes
* to the unlinked list.
*/
-STATIC int
+int
xfs_iunlink_reload_next(
struct xfs_trans *tp,
struct xfs_buf *agibp,
@@ -2215,187 +1644,6 @@ rele:
return error;
}
-static int
-xfs_iunlink_insert_inode(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t next_agino;
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
- short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- int error;
-
- /*
- * Get the index into the agi hash table for the list this inode will
- * go on. Make sure the pointer isn't garbage and that this inode
- * isn't already on the list.
- */
- next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- if (next_agino == agino ||
- !xfs_verify_agino_or_null(pag, next_agino)) {
- xfs_buf_mark_corrupt(agibp);
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- /*
- * Update the prev pointer in the next inode to point back to this
- * inode.
- */
- error = xfs_iunlink_update_backref(pag, agino, next_agino);
- if (error == -ENOLINK)
- error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
- if (error)
- return error;
-
- if (next_agino != NULLAGINO) {
- /*
- * There is already another inode in the bucket, so point this
- * inode to the current head of the list.
- */
- error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
- if (error)
- return error;
- ip->i_next_unlinked = next_agino;
- }
-
- /* Point the head of the list to point to this inode. */
- ip->i_prev_unlinked = NULLAGINO;
- return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
-}
-
-/*
- * This is called when the inode's link count has gone to 0 or we are creating
- * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
- *
- * We place the on-disk inode on a list in the AGI. It will be pulled from this
- * list when the inode is freed.
- */
-int
-xfs_iunlink(
- struct xfs_trans *tp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_perag *pag;
- struct xfs_buf *agibp;
- int error;
-
- ASSERT(VFS_I(ip)->i_nlink == 0);
- ASSERT(VFS_I(ip)->i_mode != 0);
- trace_xfs_iunlink(ip);
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-
- /* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(pag, tp, 0, &agibp);
- if (error)
- goto out;
-
- error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
-out:
- xfs_perag_put(pag);
- return error;
-}
-
-static int
-xfs_iunlink_remove_inode(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
- xfs_agino_t head_agino;
- short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- int error;
-
- trace_xfs_iunlink_remove(ip);
-
- /*
- * Get the index into the agi hash table for the list this inode will
- * go on. Make sure the head pointer isn't garbage.
- */
- head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- if (!xfs_verify_agino(pag, head_agino)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- agi, sizeof(*agi));
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- /*
- * Set our inode's next_unlinked pointer to NULL and then return
- * the old pointer value so that we can update whatever was previous
- * to us in the list to point to whatever was next in the list.
- */
- error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
- if (error)
- return error;
-
- /*
- * Update the prev pointer in the next inode to point back to previous
- * inode in the chain.
- */
- error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
- ip->i_next_unlinked);
- if (error == -ENOLINK)
- error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
- ip->i_next_unlinked);
- if (error)
- return error;
-
- if (head_agino != agino) {
- struct xfs_inode *prev_ip;
-
- prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
- if (!prev_ip) {
- xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
- return -EFSCORRUPTED;
- }
-
- error = xfs_iunlink_log_inode(tp, prev_ip, pag,
- ip->i_next_unlinked);
- prev_ip->i_next_unlinked = ip->i_next_unlinked;
- } else {
- /* Point the head of the list to the next unlinked inode. */
- error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
- ip->i_next_unlinked);
- }
-
- ip->i_next_unlinked = NULLAGINO;
- ip->i_prev_unlinked = 0;
- return error;
-}
-
-/*
- * Pull the on-disk inode from the AGI unlinked list.
- */
-int
-xfs_iunlink_remove(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_inode *ip)
-{
- struct xfs_buf *agibp;
- int error;
-
- trace_xfs_iunlink_remove(ip);
-
- /* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(pag, tp, 0, &agibp);
- if (error)
- return error;
-
- return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
-}
-
/*
* Look up the inode number specified and if it is not already marked XFS_ISTALE
* mark it stale. We should only find clean inodes in this lookup that aren't
@@ -2614,36 +1862,10 @@ xfs_ifree(
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- /*
- * Free the inode first so that we guarantee that the AGI lock is going
- * to be taken before we remove the inode from the unlinked list. This
- * makes the AGI lock -> unlinked list modification order the same as
- * used in O_TMPFILE creation.
- */
- error = xfs_difree(tp, pag, ip->i_ino, &xic);
- if (error)
- goto out;
-
- error = xfs_iunlink_remove(tp, pag, ip);
+ error = xfs_inode_uninit(tp, pag, ip, &xic);
if (error)
goto out;
- /*
- * Free any local-format data sitting around before we reset the
- * data fork to extents format. Note that the attr fork data has
- * already been freed by xfs_attr_inactive.
- */
- if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- kfree(ip->i_df.if_data);
- ip->i_df.if_data = NULL;
- ip->i_df.if_bytes = 0;
- }
-
- VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
- ip->i_diflags = 0;
- ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
- ip->i_forkoff = 0; /* mark the attr fork not in use */
- ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS);
@@ -2652,13 +1874,6 @@ xfs_ifree(
iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER);
spin_unlock(&iip->ili_lock);
- /*
- * Bump the generation count so no one will be confused
- * by reincarnations of this inode.
- */
- VFS_I(ip)->i_generation++;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
if (xic.deleted)
error = xfs_ifree_cluster(tp, pag, ip, &xic);
out:
@@ -2742,13 +1957,17 @@ xfs_remove(
struct xfs_name *name,
struct xfs_inode *ip)
{
+ struct xfs_dir_update du = {
+ .dp = dp,
+ .name = name,
+ .ip = ip,
+ };
struct xfs_mount *mp = dp->i_mount;
struct xfs_trans *tp = NULL;
int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
int dontcare;
int error = 0;
uint resblks;
- struct xfs_parent_args *ppargs;
trace_xfs_remove(dp, name);
@@ -2765,7 +1984,7 @@ xfs_remove(
if (error)
goto std_return;
- error = xfs_parent_start(mp, &ppargs);
+ error = xfs_parent_start(mp, &du.ppargs);
if (error)
goto std_return;
@@ -2788,76 +2007,10 @@ xfs_remove(
goto out_parent;
}
- /*
- * If we're removing a directory perform some additional validation.
- */
- if (is_dir) {
- ASSERT(VFS_I(ip)->i_nlink >= 2);
- if (VFS_I(ip)->i_nlink != 2) {
- error = -ENOTEMPTY;
- goto out_trans_cancel;
- }
- if (!xfs_dir_isempty(ip)) {
- error = -ENOTEMPTY;
- goto out_trans_cancel;
- }
-
- /* Drop the link from ip's "..". */
- error = xfs_droplink(tp, dp);
- if (error)
- goto out_trans_cancel;
-
- /* Drop the "." link from ip to self. */
- error = xfs_droplink(tp, ip);
- if (error)
- goto out_trans_cancel;
-
- /*
- * Point the unlinked child directory's ".." entry to the root
- * directory to eliminate back-references to inodes that may
- * get freed before the child directory is closed. If the fs
- * gets shrunk, this can lead to dirent inode validation errors.
- */
- if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
- error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
- tp->t_mountp->m_sb.sb_rootino, 0);
- if (error)
- goto out_trans_cancel;
- }
- } else {
- /*
- * When removing a non-directory we need to log the parent
- * inode here. For a directory this is done implicitly
- * by the xfs_droplink call for the ".." entry.
- */
- xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
- }
- xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- /* Drop the link from dp to ip. */
- error = xfs_droplink(tp, ip);
+ error = xfs_dir_remove_child(tp, resblks, &du);
if (error)
goto out_trans_cancel;
- error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
- if (error) {
- ASSERT(error != -ENOENT);
- goto out_trans_cancel;
- }
-
- /* Remove parent pointer. */
- if (ppargs) {
- error = xfs_parent_removename(tp, ppargs, dp, name, ip);
- if (error)
- goto out_trans_cancel;
- }
-
- /*
- * Drop the link from dp to ip, and if ip was a directory, remove the
- * '.' and '..' references since we freed the directory.
- */
- xfs_dir_update_hook(dp, ip, -1, name);
-
/*
* If this is a synchronous mount, make sure that the
* remove transaction goes to disk before returning to
@@ -2875,7 +2028,7 @@ xfs_remove(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
return 0;
out_trans_cancel:
@@ -2884,7 +2037,7 @@ xfs_remove(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
out_parent:
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
std_return:
return error;
}
@@ -2964,160 +2117,6 @@ xfs_sort_inodes(
}
}
-static int
-xfs_finish_rename(
- struct xfs_trans *tp)
-{
- /*
- * If this is a synchronous mount, make sure that the rename transaction
- * goes to disk before returning to the user.
- */
- if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
- xfs_trans_set_sync(tp);
-
- return xfs_trans_commit(tp);
-}
-
-/*
- * xfs_cross_rename()
- *
- * responsible for handling RENAME_EXCHANGE flag in renameat2() syscall
- */
-STATIC int
-xfs_cross_rename(
- struct xfs_trans *tp,
- struct xfs_inode *dp1,
- struct xfs_name *name1,
- struct xfs_inode *ip1,
- struct xfs_parent_args *ip1_ppargs,
- struct xfs_inode *dp2,
- struct xfs_name *name2,
- struct xfs_inode *ip2,
- struct xfs_parent_args *ip2_ppargs,
- int spaceres)
-{
- int error = 0;
- int ip1_flags = 0;
- int ip2_flags = 0;
- int dp2_flags = 0;
-
- /* Swap inode number for dirent in first parent */
- error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
- if (error)
- goto out_trans_abort;
-
- /* Swap inode number for dirent in second parent */
- error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
- if (error)
- goto out_trans_abort;
-
- /*
- * If we're renaming one or more directories across different parents,
- * update the respective ".." entries (and link counts) to match the new
- * parents.
- */
- if (dp1 != dp2) {
- dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
-
- if (S_ISDIR(VFS_I(ip2)->i_mode)) {
- error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
- dp1->i_ino, spaceres);
- if (error)
- goto out_trans_abort;
-
- /* transfer ip2 ".." reference to dp1 */
- if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
- error = xfs_droplink(tp, dp2);
- if (error)
- goto out_trans_abort;
- xfs_bumplink(tp, dp1);
- }
-
- /*
- * Although ip1 isn't changed here, userspace needs
- * to be warned about the change, so that applications
- * relying on it (like backup ones), will properly
- * notify the change
- */
- ip1_flags |= XFS_ICHGTIME_CHG;
- ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
- }
-
- if (S_ISDIR(VFS_I(ip1)->i_mode)) {
- error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
- dp2->i_ino, spaceres);
- if (error)
- goto out_trans_abort;
-
- /* transfer ip1 ".." reference to dp2 */
- if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
- error = xfs_droplink(tp, dp1);
- if (error)
- goto out_trans_abort;
- xfs_bumplink(tp, dp2);
- }
-
- /*
- * Although ip2 isn't changed here, userspace needs
- * to be warned about the change, so that applications
- * relying on it (like backup ones), will properly
- * notify the change
- */
- ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
- ip2_flags |= XFS_ICHGTIME_CHG;
- }
- }
-
- /* Schedule parent pointer replacements */
- if (ip1_ppargs) {
- error = xfs_parent_replacename(tp, ip1_ppargs, dp1, name1, dp2,
- name2, ip1);
- if (error)
- goto out_trans_abort;
- }
-
- if (ip2_ppargs) {
- error = xfs_parent_replacename(tp, ip2_ppargs, dp2, name2, dp1,
- name1, ip2);
- if (error)
- goto out_trans_abort;
- }
-
- if (ip1_flags) {
- xfs_trans_ichgtime(tp, ip1, ip1_flags);
- xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
- }
- if (ip2_flags) {
- xfs_trans_ichgtime(tp, ip2, ip2_flags);
- xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
- }
- if (dp2_flags) {
- xfs_trans_ichgtime(tp, dp2, dp2_flags);
- xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
- }
- xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
-
- /*
- * Inform our hook clients that we've finished an exchange operation as
- * follows: removed the source and target files from their directories;
- * added the target to the source directory; and added the source to
- * the target directory. All inodes are locked, so it's ok to model a
- * rename this way so long as we say we deleted entries before we add
- * new ones.
- */
- xfs_dir_update_hook(dp1, ip1, -1, name1);
- xfs_dir_update_hook(dp2, ip2, -1, name2);
- xfs_dir_update_hook(dp1, ip2, 1, name1);
- xfs_dir_update_hook(dp2, ip1, 1, name2);
-
- return xfs_finish_rename(tp);
-
-out_trans_abort:
- xfs_trans_cancel(tp);
- return error;
-}
-
/*
* xfs_rename_alloc_whiteout()
*
@@ -3133,12 +2132,17 @@ xfs_rename_alloc_whiteout(
struct xfs_inode *dp,
struct xfs_inode **wip)
{
+ struct xfs_icreate_args args = {
+ .idmap = idmap,
+ .pip = dp,
+ .mode = S_IFCHR | WHITEOUT_MODE,
+ .flags = XFS_ICREATE_TMPFILE,
+ };
struct xfs_inode *tmpfile;
struct qstr name;
int error;
- error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE,
- xfs_has_parent(dp->i_mount), &tmpfile);
+ error = xfs_create_tmpfile(&args, &tmpfile);
if (error)
return error;
@@ -3178,13 +2182,20 @@ xfs_rename(
struct xfs_inode *target_ip,
unsigned int flags)
{
+ struct xfs_dir_update du_src = {
+ .dp = src_dp,
+ .name = src_name,
+ .ip = src_ip,
+ };
+ struct xfs_dir_update du_tgt = {
+ .dp = target_dp,
+ .name = target_name,
+ .ip = target_ip,
+ };
+ struct xfs_dir_update du_wip = { };
struct xfs_mount *mp = src_dp->i_mount;
struct xfs_trans *tp;
- struct xfs_inode *wip = NULL; /* whiteout inode */
struct xfs_inode *inodes[__XFS_SORT_INODES];
- struct xfs_parent_args *src_ppargs = NULL;
- struct xfs_parent_args *tgt_ppargs = NULL;
- struct xfs_parent_args *wip_ppargs = NULL;
int i;
int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp);
@@ -3204,8 +2215,8 @@ xfs_rename(
* appropriately.
*/
if (flags & RENAME_WHITEOUT) {
- error = xfs_rename_alloc_whiteout(idmap, src_name,
- target_dp, &wip);
+ error = xfs_rename_alloc_whiteout(idmap, src_name, target_dp,
+ &du_wip.ip);
if (error)
return error;
@@ -3213,21 +2224,21 @@ xfs_rename(
src_name->type = XFS_DIR3_FT_CHRDEV;
}
- xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
- inodes, &num_inodes);
+ xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, du_wip.ip,
+ inodes, &num_inodes);
- error = xfs_parent_start(mp, &src_ppargs);
+ error = xfs_parent_start(mp, &du_src.ppargs);
if (error)
goto out_release_wip;
- if (wip) {
- error = xfs_parent_start(mp, &wip_ppargs);
+ if (du_wip.ip) {
+ error = xfs_parent_start(mp, &du_wip.ppargs);
if (error)
goto out_src_ppargs;
}
if (target_ip) {
- error = xfs_parent_start(mp, &tgt_ppargs);
+ error = xfs_parent_start(mp, &du_tgt.ppargs);
if (error)
goto out_wip_ppargs;
}
@@ -3235,7 +2246,7 @@ xfs_rename(
retry:
nospace_error = 0;
spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL,
- target_name->len, wip != NULL);
+ target_name->len, du_wip.ip != NULL);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
if (error == -ENOSPC) {
nospace_error = error;
@@ -3250,7 +2261,7 @@ retry:
* We don't allow reservationless renaming when parent pointers are
* enabled because we can't back out if the xattrs must grow.
*/
- if (src_ppargs && nospace_error) {
+ if (du_src.ppargs && nospace_error) {
error = nospace_error;
xfs_trans_cancel(tp);
goto out_tgt_ppargs;
@@ -3282,8 +2293,8 @@ retry:
xfs_trans_ijoin(tp, src_ip, 0);
if (target_ip)
xfs_trans_ijoin(tp, target_ip, 0);
- if (wip)
- xfs_trans_ijoin(tp, wip, 0);
+ if (du_wip.ip)
+ xfs_trans_ijoin(tp, du_wip.ip, 0);
/*
* If we are using project inheritance, we only allow renames
@@ -3298,11 +2309,11 @@ retry:
/* RENAME_EXCHANGE is unique from here on. */
if (flags & RENAME_EXCHANGE) {
- error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
- src_ppargs, target_dp, target_name, target_ip,
- tgt_ppargs, spaceres);
- nospace_error = 0;
- goto out_unlock;
+ error = xfs_dir_exchange_children(tp, &du_src, &du_tgt,
+ spaceres);
+ if (error)
+ goto out_trans_cancel;
+ goto out_commit;
}
/*
@@ -3335,39 +2346,12 @@ retry:
* We don't allow quotaless renaming when parent pointers are enabled
* because we can't back out if the xattrs must grow.
*/
- if (src_ppargs && nospace_error) {
+ if (du_src.ppargs && nospace_error) {
error = nospace_error;
goto out_trans_cancel;
}
/*
- * Check for expected errors before we dirty the transaction
- * so we can return an error without a transaction abort.
- */
- if (target_ip == NULL) {
- /*
- * If there's no space reservation, check the entry will
- * fit before actually inserting it.
- */
- if (!spaceres) {
- error = xfs_dir_canenter(tp, target_dp, target_name);
- if (error)
- goto out_trans_cancel;
- }
- } else {
- /*
- * If target exists and it's a directory, check that whether
- * it can be destroyed.
- */
- if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
- (!xfs_dir_isempty(target_ip) ||
- (VFS_I(target_ip)->i_nlink > 2))) {
- error = -EEXIST;
- goto out_trans_cancel;
- }
- }
-
- /*
* Lock the AGI buffers we need to handle bumping the nlink of the
* whiteout inode off the unlinked list and to handle dropping the
* nlink of the target inode. Per locking order rules, do this in
@@ -3378,7 +2362,7 @@ retry:
* target_ip is either null or an empty directory.
*/
for (i = 0; i < num_inodes && inodes[i] != NULL; i++) {
- if (inodes[i] == wip ||
+ if (inodes[i] == du_wip.ip ||
(inodes[i] == target_ip &&
(VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) {
struct xfs_perag *pag;
@@ -3393,188 +2377,29 @@ retry:
}
}
- /*
- * Directory entry creation below may acquire the AGF. Remove
- * the whiteout from the unlinked list first to preserve correct
- * AGI/AGF locking order. This dirties the transaction so failures
- * after this point will abort and log recovery will clean up the
- * mess.
- *
- * For whiteouts, we need to bump the link count on the whiteout
- * inode. After this point, we have a real link, clear the tmpfile
- * state flag from the inode so it doesn't accidentally get misused
- * in future.
- */
- if (wip) {
- struct xfs_perag *pag;
-
- ASSERT(VFS_I(wip)->i_nlink == 0);
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino));
- error = xfs_iunlink_remove(tp, pag, wip);
- xfs_perag_put(pag);
- if (error)
- goto out_trans_cancel;
-
- xfs_bumplink(tp, wip);
- VFS_I(wip)->i_state &= ~I_LINKABLE;
- }
-
- /*
- * Set up the target.
- */
- if (target_ip == NULL) {
- /*
- * If target does not exist and the rename crosses
- * directories, adjust the target directory link count
- * to account for the ".." reference from the new entry.
- */
- error = xfs_dir_createname(tp, target_dp, target_name,
- src_ip->i_ino, spaceres);
- if (error)
- goto out_trans_cancel;
-
- xfs_trans_ichgtime(tp, target_dp,
- XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- if (new_parent && src_is_directory) {
- xfs_bumplink(tp, target_dp);
- }
- } else { /* target_ip != NULL */
- /*
- * Link the source inode under the target name.
- * If the source inode is a directory and we are moving
- * it across directories, its ".." entry will be
- * inconsistent until we replace that down below.
- *
- * In case there is already an entry with the same
- * name at the destination directory, remove it first.
- */
- error = xfs_dir_replace(tp, target_dp, target_name,
- src_ip->i_ino, spaceres);
- if (error)
- goto out_trans_cancel;
-
- xfs_trans_ichgtime(tp, target_dp,
- XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- /*
- * Decrement the link count on the target since the target
- * dir no longer points to it.
- */
- error = xfs_droplink(tp, target_ip);
- if (error)
- goto out_trans_cancel;
-
- if (src_is_directory) {
- /*
- * Drop the link from the old "." entry.
- */
- error = xfs_droplink(tp, target_ip);
- if (error)
- goto out_trans_cancel;
- }
- } /* target_ip != NULL */
-
- /*
- * Remove the source.
- */
- if (new_parent && src_is_directory) {
- /*
- * Rewrite the ".." entry to point to the new
- * directory.
- */
- error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
- target_dp->i_ino, spaceres);
- ASSERT(error != -EEXIST);
- if (error)
- goto out_trans_cancel;
- }
-
- /*
- * We always want to hit the ctime on the source inode.
- *
- * This isn't strictly required by the standards since the source
- * inode isn't really being changed, but old unix file systems did
- * it and some incremental backup programs won't work without it.
- */
- xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
-
- /*
- * Adjust the link count on src_dp. This is necessary when
- * renaming a directory, either within one parent when
- * the target existed, or across two parent directories.
- */
- if (src_is_directory && (new_parent || target_ip != NULL)) {
-
- /*
- * Decrement link count on src_directory since the
- * entry that's moved no longer points to it.
- */
- error = xfs_droplink(tp, src_dp);
- if (error)
- goto out_trans_cancel;
- }
-
- /*
- * For whiteouts, we only need to update the source dirent with the
- * inode number of the whiteout inode rather than removing it
- * altogether.
- */
- if (wip)
- error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
- spaceres);
- else
- error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
- spaceres);
-
+ error = xfs_dir_rename_children(tp, &du_src, &du_tgt, spaceres,
+ &du_wip);
if (error)
goto out_trans_cancel;
- /* Schedule parent pointer updates. */
- if (wip_ppargs) {
- error = xfs_parent_addname(tp, wip_ppargs, src_dp, src_name,
- wip);
- if (error)
- goto out_trans_cancel;
- }
-
- if (src_ppargs) {
- error = xfs_parent_replacename(tp, src_ppargs, src_dp,
- src_name, target_dp, target_name, src_ip);
- if (error)
- goto out_trans_cancel;
- }
-
- if (tgt_ppargs) {
- error = xfs_parent_removename(tp, tgt_ppargs, target_dp,
- target_name, target_ip);
- if (error)
- goto out_trans_cancel;
+ if (du_wip.ip) {
+ /*
+ * Now we have a real link, clear the "I'm a tmpfile" state
+ * flag from the inode so it doesn't accidentally get misused in
+ * future.
+ */
+ VFS_I(du_wip.ip)->i_state &= ~I_LINKABLE;
}
- xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
- if (new_parent)
- xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
-
+out_commit:
/*
- * Inform our hook clients that we've finished a rename operation as
- * follows: removed the source and target files from their directories;
- * that we've added the source to the target directory; and finally
- * that we've added the whiteout, if there was one. All inodes are
- * locked, so it's ok to model a rename this way so long as we say we
- * deleted entries before we add new ones.
+ * If this is a synchronous mount, make sure that the rename
+ * transaction goes to disk before returning to the user.
*/
- if (target_ip)
- xfs_dir_update_hook(target_dp, target_ip, -1, target_name);
- xfs_dir_update_hook(src_dp, src_ip, -1, src_name);
- xfs_dir_update_hook(target_dp, src_ip, 1, target_name);
- if (wip)
- xfs_dir_update_hook(src_dp, wip, 1, src_name);
+ if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
+ xfs_trans_set_sync(tp);
- error = xfs_finish_rename(tp);
+ error = xfs_trans_commit(tp);
nospace_error = 0;
goto out_unlock;
@@ -3583,14 +2408,14 @@ out_trans_cancel:
out_unlock:
xfs_iunlock_rename(inodes, num_inodes);
out_tgt_ppargs:
- xfs_parent_finish(mp, tgt_ppargs);
+ xfs_parent_finish(mp, du_tgt.ppargs);
out_wip_ppargs:
- xfs_parent_finish(mp, wip_ppargs);
+ xfs_parent_finish(mp, du_wip.ppargs);
out_src_ppargs:
- xfs_parent_finish(mp, src_ppargs);
+ xfs_parent_finish(mp, du_src.ppargs);
out_release_wip:
- if (wip)
- xfs_irele(wip);
+ if (du_wip.ip)
+ xfs_irele(du_wip.ip);
if (error == -ENOSPC && nospace_error)
error = nospace_error;
return error;
@@ -3730,6 +2555,7 @@ flush_out:
iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0;
iip->ili_fsync_fields = 0;
+ set_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags);
spin_unlock(&iip->ili_lock);
/*
@@ -4293,3 +3119,11 @@ xfs_inode_alloc_unitsize(
return XFS_FSB_TO_B(ip->i_mount, blocks);
}
+
+/* Should we always be using copy on write for file writes? */
+bool
+xfs_is_always_cow_inode(
+ struct xfs_inode *ip)
+{
+ return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 292b90b5f2ac..51defdebef30 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -8,6 +8,7 @@
#include "xfs_inode_buf.h"
#include "xfs_inode_fork.h"
+#include "xfs_inode_util.h"
/*
* Kernel only inode definitions
@@ -270,15 +271,6 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags)
return ret;
}
-static inline prid_t
-xfs_get_initial_prid(struct xfs_inode *dp)
-{
- if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
- return dp->i_projid;
-
- return XFS_PROJID_DEFAULT;
-}
-
static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
{
return ip->i_diflags2 & XFS_DIFLAG2_REFLINK;
@@ -292,6 +284,13 @@ static inline bool xfs_is_metadata_inode(struct xfs_inode *ip)
xfs_is_quota_inode(&mp->m_sb, ip->i_ino);
}
+bool xfs_is_always_cow_inode(struct xfs_inode *ip);
+
+static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
+{
+ return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
+}
+
/*
* Check if an inode has any data in the COW fork. This might be often false
* even for inodes with the reflink flag when there is no pending COW operation.
@@ -517,12 +516,9 @@ int xfs_release(struct xfs_inode *ip);
int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
-int xfs_create(struct mnt_idmap *idmap,
- struct xfs_inode *dp, struct xfs_name *name,
- umode_t mode, dev_t rdev, bool need_xattr,
- struct xfs_inode **ipp);
-int xfs_create_tmpfile(struct mnt_idmap *idmap,
- struct xfs_inode *dp, umode_t mode, bool init_xattrs,
+int xfs_create(const struct xfs_icreate_args *iargs,
+ struct xfs_name *name, struct xfs_inode **ipp);
+int xfs_create_tmpfile(const struct xfs_icreate_args *iargs,
struct xfs_inode **ipp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
@@ -542,7 +538,6 @@ void xfs_assert_ilocked(struct xfs_inode *, uint);
uint xfs_ilock_data_map_shared(struct xfs_inode *);
uint xfs_ilock_attr_map_shared(struct xfs_inode *);
-uint xfs_ip2xflags(struct xfs_inode *);
int xfs_ifree(struct xfs_trans *, struct xfs_inode *);
int xfs_itruncate_extents_flags(struct xfs_trans **,
struct xfs_inode *, int, xfs_fsize_t, int);
@@ -556,13 +551,8 @@ int xfs_iflush_cluster(struct xfs_buf *);
void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
struct xfs_inode *ip1, uint ip1_mode);
-xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
-xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
-
-int xfs_init_new_inode(struct mnt_idmap *idmap, struct xfs_trans *tp,
- struct xfs_inode *pip, xfs_ino_t ino, umode_t mode,
- xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_xattrs,
- struct xfs_inode **ipp);
+int xfs_icreate(struct xfs_trans *tp, xfs_ino_t ino,
+ const struct xfs_icreate_args *args, struct xfs_inode **ipp);
static inline int
xfs_itruncate_extents(
@@ -616,18 +606,15 @@ extern struct kmem_cache *xfs_inode_cache;
bool xfs_inode_needs_inactive(struct xfs_inode *ip);
-int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
-int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
- struct xfs_inode *ip);
struct xfs_inode *xfs_iunlink_lookup(struct xfs_perag *pag, xfs_agino_t agino);
+int xfs_iunlink_reload_next(struct xfs_trans *tp, struct xfs_buf *agibp,
+ xfs_agino_t prev_agino, xfs_agino_t next_agino);
void xfs_end_io(struct work_struct *work);
int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2);
-int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip);
-void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip);
void xfs_lock_inodes(struct xfs_inode **ips, int inodes, uint lock_mode);
void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes);
@@ -645,29 +632,8 @@ void xfs_inode_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_filblks_t *dblocks, xfs_filblks_t *rblocks);
unsigned int xfs_inode_alloc_unitsize(struct xfs_inode *ip);
-struct xfs_dir_update_params {
- const struct xfs_inode *dp;
- const struct xfs_inode *ip;
- const struct xfs_name *name;
- int delta;
-};
-
-#ifdef CONFIG_XFS_LIVE_HOOKS
-void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip,
- int delta, const struct xfs_name *name);
-
-struct xfs_dir_hook {
- struct xfs_hook dirent_hook;
-};
-
-void xfs_dir_hook_disable(void);
-void xfs_dir_hook_enable(void);
-
-int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook);
-void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook);
-void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn);
-#else
-# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0)
-#endif /* CONFIG_XFS_LIVE_HOOKS */
+int xfs_icreate_dqalloc(const struct xfs_icreate_args *args,
+ struct xfs_dquot **udqpp, struct xfs_dquot **gdqpp,
+ struct xfs_dquot **pdqpp);
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f28d653300d1..b509cbd191f4 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -37,6 +37,36 @@ xfs_inode_item_sort(
return INODE_ITEM(lip)->ili_inode->i_ino;
}
+#ifdef DEBUG_EXPENSIVE
+static void
+xfs_inode_item_precommit_check(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_dinode *dip;
+ xfs_failaddr_t fa;
+
+ dip = kzalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | GFP_NOFS);
+ if (!dip) {
+ ASSERT(dip != NULL);
+ return;
+ }
+
+ xfs_inode_to_disk(ip, dip, 0);
+ xfs_dinode_calc_crc(mp, dip);
+ fa = xfs_dinode_verify(mp, ip->i_ino, dip);
+ if (fa) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
+ sizeof(*dip), fa);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+ kfree(dip);
+}
+#else
+# define xfs_inode_item_precommit_check(ip) ((void)0)
+#endif
+
/*
* Prior to finally logging the inode, we have to ensure that all the
* per-modification inode state changes are applied. This includes VFS inode
@@ -169,6 +199,8 @@ xfs_inode_item_precommit(
iip->ili_fields |= (flags | iip->ili_last_fields);
spin_unlock(&iip->ili_lock);
+ xfs_inode_item_precommit_check(ip);
+
/*
* We are done with the log item transaction dirty state, so clear it so
* that it doesn't pollute future transactions.
@@ -933,6 +965,7 @@ xfs_iflush_finish(
}
iip->ili_last_fields = 0;
iip->ili_flush_lsn = 0;
+ clear_bit(XFS_LI_FLUSHING, &lip->li_flags);
spin_unlock(&iip->ili_lock);
xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING);
if (drop_buffer)
@@ -991,8 +1024,10 @@ xfs_buf_inode_io_fail(
{
struct xfs_log_item *lip;
- list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
set_bit(XFS_LI_FAILED, &lip->li_flags);
+ clear_bit(XFS_LI_FLUSHING, &lip->li_flags);
+ }
}
/*
@@ -1011,6 +1046,7 @@ xfs_iflush_abort_clean(
iip->ili_flush_lsn = 0;
iip->ili_item.li_buf = NULL;
list_del_init(&iip->ili_item.li_bio_list);
+ clear_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags);
}
/*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index f0117188f302..4e933db75b12 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -469,66 +469,6 @@ xfs_fileattr_get(
return 0;
}
-STATIC uint16_t
-xfs_flags2diflags(
- struct xfs_inode *ip,
- unsigned int xflags)
-{
- /* can't set PREALLOC this way, just preserve it */
- uint16_t di_flags =
- (ip->i_diflags & XFS_DIFLAG_PREALLOC);
-
- if (xflags & FS_XFLAG_IMMUTABLE)
- di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (xflags & FS_XFLAG_APPEND)
- di_flags |= XFS_DIFLAG_APPEND;
- if (xflags & FS_XFLAG_SYNC)
- di_flags |= XFS_DIFLAG_SYNC;
- if (xflags & FS_XFLAG_NOATIME)
- di_flags |= XFS_DIFLAG_NOATIME;
- if (xflags & FS_XFLAG_NODUMP)
- di_flags |= XFS_DIFLAG_NODUMP;
- if (xflags & FS_XFLAG_NODEFRAG)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (xflags & FS_XFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
- if (S_ISDIR(VFS_I(ip)->i_mode)) {
- if (xflags & FS_XFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (xflags & FS_XFLAG_NOSYMLINKS)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (xflags & FS_XFLAG_EXTSZINHERIT)
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- if (xflags & FS_XFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- } else if (S_ISREG(VFS_I(ip)->i_mode)) {
- if (xflags & FS_XFLAG_REALTIME)
- di_flags |= XFS_DIFLAG_REALTIME;
- if (xflags & FS_XFLAG_EXTSIZE)
- di_flags |= XFS_DIFLAG_EXTSIZE;
- }
-
- return di_flags;
-}
-
-STATIC uint64_t
-xfs_flags2diflags2(
- struct xfs_inode *ip,
- unsigned int xflags)
-{
- uint64_t di_flags2 =
- (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
- XFS_DIFLAG2_BIGTIME |
- XFS_DIFLAG2_NREXT64));
-
- if (xflags & FS_XFLAG_DAX)
- di_flags2 |= XFS_DIFLAG2_DAX;
- if (xflags & FS_XFLAG_COWEXTSIZE)
- di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
-
- return di_flags2;
-}
-
static int
xfs_ioctl_setattr_xflags(
struct xfs_trans *tp,
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 414903885ab9..72c981e3dc92 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -717,53 +717,30 @@ imap_needs_cow(
return true;
}
+/*
+ * Extents not yet cached requires exclusive access, don't block for
+ * IOMAP_NOWAIT.
+ *
+ * This is basically an opencoded xfs_ilock_data_map_shared() call, but with
+ * support for IOMAP_NOWAIT.
+ */
static int
xfs_ilock_for_iomap(
struct xfs_inode *ip,
unsigned flags,
unsigned *lockmode)
{
- unsigned int mode = *lockmode;
- bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO);
-
- /*
- * COW writes may allocate delalloc space or convert unwritten COW
- * extents, so we need to make sure to take the lock exclusively here.
- */
- if (xfs_is_cow_inode(ip) && is_write)
- mode = XFS_ILOCK_EXCL;
-
- /*
- * Extents not yet cached requires exclusive access, don't block. This
- * is an opencoded xfs_ilock_data_map_shared() call but with
- * non-blocking behaviour.
- */
- if (xfs_need_iread_extents(&ip->i_df)) {
- if (flags & IOMAP_NOWAIT)
- return -EAGAIN;
- mode = XFS_ILOCK_EXCL;
- }
-
-relock:
if (flags & IOMAP_NOWAIT) {
- if (!xfs_ilock_nowait(ip, mode))
+ if (xfs_need_iread_extents(&ip->i_df))
+ return -EAGAIN;
+ if (!xfs_ilock_nowait(ip, *lockmode))
return -EAGAIN;
} else {
- xfs_ilock(ip, mode);
+ if (xfs_need_iread_extents(&ip->i_df))
+ *lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, *lockmode);
}
- /*
- * The reflink iflag could have changed since the earlier unlocked
- * check, so if we got ILOCK_SHARED for a write and but we're now a
- * reflink inode we have to switch to ILOCK_EXCL and relock.
- */
- if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) {
- xfs_iunlock(ip, mode);
- mode = XFS_ILOCK_EXCL;
- goto relock;
- }
-
- *lockmode = mode;
return 0;
}
@@ -801,7 +778,7 @@ xfs_direct_write_iomap_begin(
int nimaps = 1, error = 0;
bool shared = false;
u16 iomap_flags = 0;
- unsigned int lockmode = XFS_ILOCK_SHARED;
+ unsigned int lockmode;
u64 seq;
ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
@@ -817,10 +794,30 @@ xfs_direct_write_iomap_begin(
if (offset + length > i_size_read(inode))
iomap_flags |= IOMAP_F_DIRTY;
+ /*
+ * COW writes may allocate delalloc space or convert unwritten COW
+ * extents, so we need to make sure to take the lock exclusively here.
+ */
+ if (xfs_is_cow_inode(ip))
+ lockmode = XFS_ILOCK_EXCL;
+ else
+ lockmode = XFS_ILOCK_SHARED;
+
+relock:
error = xfs_ilock_for_iomap(ip, flags, &lockmode);
if (error)
return error;
+ /*
+ * The reflink iflag could have changed since the earlier unlocked
+ * check, check if it again and relock if needed.
+ */
+ if (xfs_is_cow_inode(ip) && lockmode == XFS_ILOCK_SHARED) {
+ xfs_iunlock(ip, lockmode);
+ lockmode = XFS_ILOCK_EXCL;
+ goto relock;
+ }
+
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
if (error)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index a00dcbc77e12..1cdc8034f54d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -28,6 +28,7 @@
#include "xfs_ioctl.h"
#include "xfs_xattr.h"
#include "xfs_file.h"
+#include "xfs_bmap.h"
#include <linux/posix_acl.h>
#include <linux/security.h>
@@ -159,8 +160,6 @@ xfs_create_need_xattr(
if (dir->i_sb->s_security)
return true;
#endif
- if (xfs_has_parent(XFS_I(dir)->i_mount))
- return true;
return false;
}
@@ -174,49 +173,55 @@ xfs_generic_create(
dev_t rdev,
struct file *tmpfile) /* unnamed file */
{
- struct inode *inode;
- struct xfs_inode *ip = NULL;
- struct posix_acl *default_acl, *acl;
- struct xfs_name name;
- int error;
+ struct xfs_icreate_args args = {
+ .idmap = idmap,
+ .pip = XFS_I(dir),
+ .rdev = rdev,
+ .mode = mode,
+ };
+ struct inode *inode;
+ struct xfs_inode *ip = NULL;
+ struct posix_acl *default_acl, *acl;
+ struct xfs_name name;
+ int error;
/*
* Irix uses Missed'em'V split, but doesn't want to see
* the upper 5 bits of (14bit) major.
*/
- if (S_ISCHR(mode) || S_ISBLK(mode)) {
- if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
+ if (S_ISCHR(args.mode) || S_ISBLK(args.mode)) {
+ if (unlikely(!sysv_valid_dev(args.rdev) ||
+ MAJOR(args.rdev) & ~0x1ff))
return -EINVAL;
} else {
- rdev = 0;
+ args.rdev = 0;
}
- error = posix_acl_create(dir, &mode, &default_acl, &acl);
+ error = posix_acl_create(dir, &args.mode, &default_acl, &acl);
if (error)
return error;
/* Verify mode is valid also for tmpfile case */
- error = xfs_dentry_mode_to_name(&name, dentry, mode);
+ error = xfs_dentry_mode_to_name(&name, dentry, args.mode);
if (unlikely(error))
goto out_free_acl;
if (!tmpfile) {
- error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev,
- xfs_create_need_xattr(dir, default_acl, acl),
- &ip);
+ if (xfs_create_need_xattr(dir, default_acl, acl))
+ args.flags |= XFS_ICREATE_INIT_XATTRS;
+
+ error = xfs_create(&args, &name, &ip);
} else {
- bool init_xattrs = false;
+ args.flags |= XFS_ICREATE_TMPFILE;
/*
- * If this temporary file will be linkable, set up the file
- * with an attr fork to receive a parent pointer.
+ * If this temporary file will not be linkable, don't bother
+ * creating an attr fork to receive a parent pointer.
*/
- if (!(tmpfile->f_flags & O_EXCL) &&
- xfs_has_parent(XFS_I(dir)->i_mount))
- init_xattrs = true;
+ if (tmpfile->f_flags & O_EXCL)
+ args.flags |= XFS_ICREATE_UNLINKABLE;
- error = xfs_create_tmpfile(idmap, XFS_I(dir), mode,
- init_xattrs, &ip);
+ error = xfs_create_tmpfile(&args, &ip);
}
if (unlikely(error))
goto out_free_acl;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ac355328121a..54a098fe7285 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -135,8 +135,6 @@ typedef __u32 xfs_nlink_t;
*/
#define __this_address ({ __label__ __here; __here: barrier(); &&__here; })
-#define XFS_PROJID_DEFAULT 0
-
#define howmany(x, y) (((x)+((y)-1))/(y))
static inline void delay(long ticks)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 416c15494983..817ea7e0a8ab 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -30,10 +30,6 @@ xlog_alloc_log(
struct xfs_buftarg *log_target,
xfs_daddr_t blk_offset,
int num_bblks);
-STATIC int
-xlog_space_left(
- struct xlog *log,
- atomic64_t *head);
STATIC void
xlog_dealloc_log(
struct xlog *log);
@@ -51,19 +47,12 @@ xlog_state_get_iclog_space(
struct xlog_ticket *ticket,
int *logoffsetp);
STATIC void
-xlog_grant_push_ail(
- struct xlog *log,
- int need_bytes);
-STATIC void
xlog_sync(
struct xlog *log,
struct xlog_in_core *iclog,
struct xlog_ticket *ticket);
#if defined(DEBUG)
STATIC void
-xlog_verify_grant_tail(
- struct xlog *log);
-STATIC void
xlog_verify_iclog(
struct xlog *log,
struct xlog_in_core *iclog,
@@ -73,7 +62,6 @@ xlog_verify_tail_lsn(
struct xlog *log,
struct xlog_in_core *iclog);
#else
-#define xlog_verify_grant_tail(a)
#define xlog_verify_iclog(a,b,c)
#define xlog_verify_tail_lsn(a,b)
#endif
@@ -141,70 +129,66 @@ xlog_prepare_iovec(
return buf;
}
-static void
+static inline void
xlog_grant_sub_space(
- struct xlog *log,
- atomic64_t *head,
- int bytes)
+ struct xlog_grant_head *head,
+ int64_t bytes)
{
- int64_t head_val = atomic64_read(head);
- int64_t new, old;
-
- do {
- int cycle, space;
-
- xlog_crack_grant_head_val(head_val, &cycle, &space);
-
- space -= bytes;
- if (space < 0) {
- space += log->l_logsize;
- cycle--;
- }
-
- old = head_val;
- new = xlog_assign_grant_head_val(cycle, space);
- head_val = atomic64_cmpxchg(head, old, new);
- } while (head_val != old);
+ atomic64_sub(bytes, &head->grant);
}
-static void
+static inline void
xlog_grant_add_space(
- struct xlog *log,
- atomic64_t *head,
- int bytes)
+ struct xlog_grant_head *head,
+ int64_t bytes)
{
- int64_t head_val = atomic64_read(head);
- int64_t new, old;
-
- do {
- int tmp;
- int cycle, space;
-
- xlog_crack_grant_head_val(head_val, &cycle, &space);
-
- tmp = log->l_logsize - space;
- if (tmp > bytes)
- space += bytes;
- else {
- space = bytes - tmp;
- cycle++;
- }
-
- old = head_val;
- new = xlog_assign_grant_head_val(cycle, space);
- head_val = atomic64_cmpxchg(head, old, new);
- } while (head_val != old);
+ atomic64_add(bytes, &head->grant);
}
-STATIC void
+static void
xlog_grant_head_init(
struct xlog_grant_head *head)
{
- xlog_assign_grant_head(&head->grant, 1, 0);
+ atomic64_set(&head->grant, 0);
INIT_LIST_HEAD(&head->waiters);
spin_lock_init(&head->lock);
}
+void
+xlog_grant_return_space(
+ struct xlog *log,
+ xfs_lsn_t old_head,
+ xfs_lsn_t new_head)
+{
+ int64_t diff = xlog_lsn_sub(log, new_head, old_head);
+
+ xlog_grant_sub_space(&log->l_reserve_head, diff);
+ xlog_grant_sub_space(&log->l_write_head, diff);
+}
+
+/*
+ * Return the space in the log between the tail and the head. In the case where
+ * we have overrun available reservation space, return 0. The memory barrier
+ * pairs with the smp_wmb() in xlog_cil_ail_insert() to ensure that grant head
+ * vs tail space updates are seen in the correct order and hence avoid
+ * transients as space is transferred from the grant heads to the AIL on commit
+ * completion.
+ */
+static uint64_t
+xlog_grant_space_left(
+ struct xlog *log,
+ struct xlog_grant_head *head)
+{
+ int64_t free_bytes;
+
+ smp_rmb(); /* paired with smp_wmb in xlog_cil_ail_insert() */
+ free_bytes = log->l_logsize - READ_ONCE(log->l_tail_space) -
+ atomic64_read(&head->grant);
+ if (free_bytes > 0)
+ return free_bytes;
+ return 0;
+}
+
STATIC void
xlog_grant_head_wake_all(
struct xlog_grant_head *head)
@@ -242,42 +226,15 @@ xlog_grant_head_wake(
{
struct xlog_ticket *tic;
int need_bytes;
- bool woken_task = false;
list_for_each_entry(tic, &head->waiters, t_queue) {
-
- /*
- * There is a chance that the size of the CIL checkpoints in
- * progress at the last AIL push target calculation resulted in
- * limiting the target to the log head (l_last_sync_lsn) at the
- * time. This may not reflect where the log head is now as the
- * CIL checkpoints may have completed.
- *
- * Hence when we are woken here, it may be that the head of the
- * log that has moved rather than the tail. As the tail didn't
- * move, there still won't be space available for the
- * reservation we require. However, if the AIL has already
- * pushed to the target defined by the old log head location, we
- * will hang here waiting for something else to update the AIL
- * push target.
- *
- * Therefore, if there isn't space to wake the first waiter on
- * the grant head, we need to push the AIL again to ensure the
- * target reflects both the current log tail and log head
- * position before we wait for the tail to move again.
- */
-
need_bytes = xlog_ticket_reservation(log, head, tic);
- if (*free_bytes < need_bytes) {
- if (!woken_task)
- xlog_grant_push_ail(log, need_bytes);
+ if (*free_bytes < need_bytes)
return false;
- }
*free_bytes -= need_bytes;
trace_xfs_log_grant_wake_up(log, tic);
wake_up_process(tic->t_task);
- woken_task = true;
}
return true;
@@ -296,13 +253,15 @@ xlog_grant_head_wait(
do {
if (xlog_is_shutdown(log))
goto shutdown;
- xlog_grant_push_ail(log, need_bytes);
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock(&head->lock);
XFS_STATS_INC(log->l_mp, xs_sleep_logspace);
+ /* Push on the AIL to free up all the log space. */
+ xfs_ail_push_all(log->l_ailp);
+
trace_xfs_log_grant_sleep(log, tic);
schedule();
trace_xfs_log_grant_wake(log, tic);
@@ -310,7 +269,7 @@ xlog_grant_head_wait(
spin_lock(&head->lock);
if (xlog_is_shutdown(log))
goto shutdown;
- } while (xlog_space_left(log, &head->grant) < need_bytes);
+ } while (xlog_grant_space_left(log, head) < need_bytes);
list_del_init(&tic->t_queue);
return 0;
@@ -355,7 +314,7 @@ xlog_grant_head_check(
* otherwise try to get some space for this transaction.
*/
*need_bytes = xlog_ticket_reservation(log, head, tic);
- free_bytes = xlog_space_left(log, &head->grant);
+ free_bytes = xlog_grant_space_left(log, head);
if (!list_empty_careful(&head->waiters)) {
spin_lock(&head->lock);
if (!xlog_grant_head_wake(log, head, &free_bytes) ||
@@ -418,9 +377,6 @@ xfs_log_regrant(
* of rolling transactions in the log easily.
*/
tic->t_tid++;
-
- xlog_grant_push_ail(log, tic->t_unit_res);
-
tic->t_curr_res = tic->t_unit_res;
if (tic->t_cnt > 0)
return 0;
@@ -432,9 +388,8 @@ xfs_log_regrant(
if (error)
goto out_error;
- xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
+ xlog_grant_add_space(&log->l_write_head, need_bytes);
trace_xfs_log_regrant_exit(log, tic);
- xlog_verify_grant_tail(log);
return 0;
out_error:
@@ -477,21 +432,15 @@ xfs_log_reserve(
ASSERT(*ticp == NULL);
tic = xlog_ticket_alloc(log, unit_bytes, cnt, permanent);
*ticp = tic;
-
- xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
- : tic->t_unit_res);
-
trace_xfs_log_reserve(log, tic);
-
error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
&need_bytes);
if (error)
goto out_error;
- xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
- xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
+ xlog_grant_add_space(&log->l_reserve_head, need_bytes);
+ xlog_grant_add_space(&log->l_write_head, need_bytes);
trace_xfs_log_reserve_exit(log, tic);
- xlog_verify_grant_tail(log);
return 0;
out_error:
@@ -571,7 +520,6 @@ xlog_state_release_iclog(
struct xlog_in_core *iclog,
struct xlog_ticket *ticket)
{
- xfs_lsn_t tail_lsn;
bool last_ref;
lockdep_assert_held(&log->l_icloglock);
@@ -586,8 +534,8 @@ xlog_state_release_iclog(
if ((iclog->ic_state == XLOG_STATE_WANT_SYNC ||
(iclog->ic_flags & XLOG_ICL_NEED_FUA)) &&
!iclog->ic_header.h_tail_lsn) {
- tail_lsn = xlog_assign_tail_lsn(log->l_mp);
- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ iclog->ic_header.h_tail_lsn =
+ cpu_to_be64(atomic64_read(&log->l_tail_lsn));
}
last_ref = atomic_dec_and_test(&iclog->ic_refcnt);
@@ -1149,7 +1097,7 @@ xfs_log_space_wake(
ASSERT(!xlog_in_recovery(log));
spin_lock(&log->l_write_head.lock);
- free_bytes = xlog_space_left(log, &log->l_write_head.grant);
+ free_bytes = xlog_grant_space_left(log, &log->l_write_head);
xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
spin_unlock(&log->l_write_head.lock);
}
@@ -1158,7 +1106,7 @@ xfs_log_space_wake(
ASSERT(!xlog_in_recovery(log));
spin_lock(&log->l_reserve_head.lock);
- free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
+ free_bytes = xlog_grant_space_left(log, &log->l_reserve_head);
xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
spin_unlock(&log->l_reserve_head.lock);
}
@@ -1272,105 +1220,6 @@ xfs_log_cover(
return error;
}
-/*
- * We may be holding the log iclog lock upon entering this routine.
- */
-xfs_lsn_t
-xlog_assign_tail_lsn_locked(
- struct xfs_mount *mp)
-{
- struct xlog *log = mp->m_log;
- struct xfs_log_item *lip;
- xfs_lsn_t tail_lsn;
-
- assert_spin_locked(&mp->m_ail->ail_lock);
-
- /*
- * To make sure we always have a valid LSN for the log tail we keep
- * track of the last LSN which was committed in log->l_last_sync_lsn,
- * and use that when the AIL was empty.
- */
- lip = xfs_ail_min(mp->m_ail);
- if (lip)
- tail_lsn = lip->li_lsn;
- else
- tail_lsn = atomic64_read(&log->l_last_sync_lsn);
- trace_xfs_log_assign_tail_lsn(log, tail_lsn);
- atomic64_set(&log->l_tail_lsn, tail_lsn);
- return tail_lsn;
-}
-
-xfs_lsn_t
-xlog_assign_tail_lsn(
- struct xfs_mount *mp)
-{
- xfs_lsn_t tail_lsn;
-
- spin_lock(&mp->m_ail->ail_lock);
- tail_lsn = xlog_assign_tail_lsn_locked(mp);
- spin_unlock(&mp->m_ail->ail_lock);
-
- return tail_lsn;
-}
-
-/*
- * Return the space in the log between the tail and the head. The head
- * is passed in the cycle/bytes formal parms. In the special case where
- * the reserve head has wrapped passed the tail, this calculation is no
- * longer valid. In this case, just return 0 which means there is no space
- * in the log. This works for all places where this function is called
- * with the reserve head. Of course, if the write head were to ever
- * wrap the tail, we should blow up. Rather than catch this case here,
- * we depend on other ASSERTions in other parts of the code. XXXmiken
- *
- * If reservation head is behind the tail, we have a problem. Warn about it,
- * but then treat it as if the log is empty.
- *
- * If the log is shut down, the head and tail may be invalid or out of whack, so
- * shortcut invalidity asserts in this case so that we don't trigger them
- * falsely.
- */
-STATIC int
-xlog_space_left(
- struct xlog *log,
- atomic64_t *head)
-{
- int tail_bytes;
- int tail_cycle;
- int head_cycle;
- int head_bytes;
-
- xlog_crack_grant_head(head, &head_cycle, &head_bytes);
- xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
- tail_bytes = BBTOB(tail_bytes);
- if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
- return log->l_logsize - (head_bytes - tail_bytes);
- if (tail_cycle + 1 < head_cycle)
- return 0;
-
- /* Ignore potential inconsistency when shutdown. */
- if (xlog_is_shutdown(log))
- return log->l_logsize;
-
- if (tail_cycle < head_cycle) {
- ASSERT(tail_cycle == (head_cycle - 1));
- return tail_bytes - head_bytes;
- }
-
- /*
- * The reservation head is behind the tail. In this case we just want to
- * return the size of the log as the amount of space left.
- */
- xfs_alert(log->l_mp, "xlog_space_left: head behind tail");
- xfs_alert(log->l_mp, " tail_cycle = %d, tail_bytes = %d",
- tail_cycle, tail_bytes);
- xfs_alert(log->l_mp, " GH cycle = %d, GH bytes = %d",
- head_cycle, head_bytes);
- ASSERT(0);
- return log->l_logsize;
-}
-
-
static void
xlog_ioend_work(
struct work_struct *work)
@@ -1543,7 +1392,6 @@ xlog_alloc_log(
log->l_prev_block = -1;
/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
- xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1)
@@ -1668,89 +1516,6 @@ out:
} /* xlog_alloc_log */
/*
- * Compute the LSN that we'd need to push the log tail towards in order to have
- * (a) enough on-disk log space to log the number of bytes specified, (b) at
- * least 25% of the log space free, and (c) at least 256 blocks free. If the
- * log free space already meets all three thresholds, this function returns
- * NULLCOMMITLSN.
- */
-xfs_lsn_t
-xlog_grant_push_threshold(
- struct xlog *log,
- int need_bytes)
-{
- xfs_lsn_t threshold_lsn = 0;
- xfs_lsn_t last_sync_lsn;
- int free_blocks;
- int free_bytes;
- int threshold_block;
- int threshold_cycle;
- int free_threshold;
-
- ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
-
- free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
- free_blocks = BTOBBT(free_bytes);
-
- /*
- * Set the threshold for the minimum number of free blocks in the
- * log to the maximum of what the caller needs, one quarter of the
- * log, and 256 blocks.
- */
- free_threshold = BTOBB(need_bytes);
- free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
- free_threshold = max(free_threshold, 256);
- if (free_blocks >= free_threshold)
- return NULLCOMMITLSN;
-
- xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
- &threshold_block);
- threshold_block += free_threshold;
- if (threshold_block >= log->l_logBBsize) {
- threshold_block -= log->l_logBBsize;
- threshold_cycle += 1;
- }
- threshold_lsn = xlog_assign_lsn(threshold_cycle,
- threshold_block);
- /*
- * Don't pass in an lsn greater than the lsn of the last
- * log record known to be on disk. Use a snapshot of the last sync lsn
- * so that it doesn't change between the compare and the set.
- */
- last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
- if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
- threshold_lsn = last_sync_lsn;
-
- return threshold_lsn;
-}
-
-/*
- * Push the tail of the log if we need to do so to maintain the free log space
- * thresholds set out by xlog_grant_push_threshold. We may need to adopt a
- * policy which pushes on an lsn which is further along in the log once we
- * reach the high water mark. In this manner, we would be creating a low water
- * mark.
- */
-STATIC void
-xlog_grant_push_ail(
- struct xlog *log,
- int need_bytes)
-{
- xfs_lsn_t threshold_lsn;
-
- threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
- if (threshold_lsn == NULLCOMMITLSN || xlog_is_shutdown(log))
- return;
-
- /*
- * Get the transaction layer to kick the dirty buffers out to
- * disk asynchronously. No point in trying to do this if
- * the filesystem is shutting down.
- */
- xfs_ail_push(log->l_ailp, threshold_lsn);
-}
-
-/*
* Stamp cycle number in every block
*/
STATIC void
@@ -2048,8 +1813,8 @@ xlog_sync(
if (ticket) {
ticket->t_curr_res -= roundoff;
} else {
- xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
- xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
+ xlog_grant_add_space(&log->l_reserve_head, roundoff);
+ xlog_grant_add_space(&log->l_write_head, roundoff);
}
/* put cycle number in every block */
@@ -2675,47 +2440,6 @@ xlog_get_lowest_lsn(
}
/*
- * Completion of a iclog IO does not imply that a transaction has completed, as
- * transactions can be large enough to span many iclogs. We cannot change the
- * tail of the log half way through a transaction as this may be the only
- * transaction in the log and moving the tail to point to the middle of it
- * will prevent recovery from finding the start of the transaction. Hence we
- * should only update the last_sync_lsn if this iclog contains transaction
- * completion callbacks on it.
- *
- * We have to do this before we drop the icloglock to ensure we are the only one
- * that can update it.
- *
- * If we are moving the last_sync_lsn forwards, we also need to ensure we kick
- * the reservation grant head pushing. This is due to the fact that the push
- * target is bound by the current last_sync_lsn value. Hence if we have a large
- * amount of log space bound up in this committing transaction then the
- * last_sync_lsn value may be the limiting factor preventing tail pushing from
- * freeing space in the log. Hence once we've updated the last_sync_lsn we
- * should push the AIL to ensure the push target (and hence the grant head) is
- * no longer bound by the old log head location and can move forwards and make
- * progress again.
- */
-static void
-xlog_state_set_callback(
- struct xlog *log,
- struct xlog_in_core *iclog,
- xfs_lsn_t header_lsn)
-{
- trace_xlog_iclog_callback(iclog, _RET_IP_);
- iclog->ic_state = XLOG_STATE_CALLBACK;
-
- ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
- header_lsn) <= 0);
-
- if (list_empty_careful(&iclog->ic_callbacks))
- return;
-
- atomic64_set(&log->l_last_sync_lsn, header_lsn);
- xlog_grant_push_ail(log, 0);
-}
-
-/*
* Return true if we need to stop processing, false to continue to the next
* iclog. The caller will need to run callbacks if the iclog is returned in the
* XLOG_STATE_CALLBACK state.
@@ -2746,7 +2470,17 @@ xlog_state_iodone_process_iclog(
lowest_lsn = xlog_get_lowest_lsn(log);
if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0)
return false;
- xlog_state_set_callback(log, iclog, header_lsn);
+ /*
+ * If there are no callbacks on this iclog, we can mark it clean
+ * immediately and return. Otherwise we need to run the
+ * callbacks.
+ */
+ if (list_empty(&iclog->ic_callbacks)) {
+ xlog_state_clean_iclog(log, iclog);
+ return false;
+ }
+ trace_xlog_iclog_callback(iclog, _RET_IP_);
+ iclog->ic_state = XLOG_STATE_CALLBACK;
return false;
default:
/*
@@ -3000,18 +2734,15 @@ xfs_log_ticket_regrant(
if (ticket->t_cnt > 0)
ticket->t_cnt--;
- xlog_grant_sub_space(log, &log->l_reserve_head.grant,
- ticket->t_curr_res);
- xlog_grant_sub_space(log, &log->l_write_head.grant,
- ticket->t_curr_res);
+ xlog_grant_sub_space(&log->l_reserve_head, ticket->t_curr_res);
+ xlog_grant_sub_space(&log->l_write_head, ticket->t_curr_res);
ticket->t_curr_res = ticket->t_unit_res;
trace_xfs_log_ticket_regrant_sub(log, ticket);
/* just return if we still have some of the pre-reserved space */
if (!ticket->t_cnt) {
- xlog_grant_add_space(log, &log->l_reserve_head.grant,
- ticket->t_unit_res);
+ xlog_grant_add_space(&log->l_reserve_head, ticket->t_unit_res);
trace_xfs_log_ticket_regrant_exit(log, ticket);
ticket->t_curr_res = ticket->t_unit_res;
@@ -3058,8 +2789,8 @@ xfs_log_ticket_ungrant(
bytes += ticket->t_unit_res*ticket->t_cnt;
}
- xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
- xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
+ xlog_grant_sub_space(&log->l_reserve_head, bytes);
+ xlog_grant_sub_space(&log->l_write_head, bytes);
trace_xfs_log_ticket_ungrant_exit(log, ticket);
@@ -3532,42 +3263,27 @@ xlog_ticket_alloc(
}
#if defined(DEBUG)
-/*
- * Check to make sure the grant write head didn't just over lap the tail. If
- * the cycles are the same, we can't be overlapping. Otherwise, make sure that
- * the cycles differ by exactly one and check the byte count.
- *
- * This check is run unlocked, so can give false positives. Rather than assert
- * on failures, use a warn-once flag and a panic tag to allow the admin to
- * determine if they want to panic the machine when such an error occurs. For
- * debug kernels this will have the same effect as using an assert but, unlinke
- * an assert, it can be turned off at runtime.
- */
-STATIC void
-xlog_verify_grant_tail(
- struct xlog *log)
+static void
+xlog_verify_dump_tail(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
{
- int tail_cycle, tail_blocks;
- int cycle, space;
-
- xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
- xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
- if (tail_cycle != cycle) {
- if (cycle - 1 != tail_cycle &&
- !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) {
- xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
- "%s: cycle - 1 != tail_cycle", __func__);
- }
-
- if (space > BBTOB(tail_blocks) &&
- !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) {
- xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
- "%s: space > BBTOB(tail_blocks)", __func__);
- }
- }
-}
-
-/* check if it will fit */
+ xfs_alert(log->l_mp,
+"ran out of log space tail 0x%llx/0x%llx, head lsn 0x%llx, head 0x%x/0x%x, prev head 0x%x/0x%x",
+ iclog ? be64_to_cpu(iclog->ic_header.h_tail_lsn) : -1,
+ atomic64_read(&log->l_tail_lsn),
+ log->l_ailp->ail_head_lsn,
+ log->l_curr_cycle, log->l_curr_block,
+ log->l_prev_cycle, log->l_prev_block);
+ xfs_alert(log->l_mp,
+"write grant 0x%llx, reserve grant 0x%llx, tail_space 0x%llx, size 0x%x, iclog flags 0x%x",
+ atomic64_read(&log->l_write_head.grant),
+ atomic64_read(&log->l_reserve_head.grant),
+ log->l_tail_space, log->l_logsize,
+ iclog ? iclog->ic_flags : -1);
+}
+
+/* Check if the new iclog will fit in the log. */
STATIC void
xlog_verify_tail_lsn(
struct xlog *log,
@@ -3576,21 +3292,34 @@ xlog_verify_tail_lsn(
xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn);
int blocks;
- if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
- blocks =
- log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn));
- if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize))
- xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
- } else {
- ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle);
+ if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
+ blocks = log->l_logBBsize -
+ (log->l_prev_block - BLOCK_LSN(tail_lsn));
+ if (blocks < BTOBB(iclog->ic_offset) +
+ BTOBB(log->l_iclog_hsize)) {
+ xfs_emerg(log->l_mp,
+ "%s: ran out of log space", __func__);
+ xlog_verify_dump_tail(log, iclog);
+ }
+ return;
+ }
- if (BLOCK_LSN(tail_lsn) == log->l_prev_block)
+ if (CYCLE_LSN(tail_lsn) + 1 != log->l_prev_cycle) {
+ xfs_emerg(log->l_mp, "%s: head has wrapped tail.", __func__);
+ xlog_verify_dump_tail(log, iclog);
+ return;
+ }
+ if (BLOCK_LSN(tail_lsn) == log->l_prev_block) {
xfs_emerg(log->l_mp, "%s: tail wrapped", __func__);
+ xlog_verify_dump_tail(log, iclog);
+ return;
+ }
blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block;
- if (blocks < BTOBB(iclog->ic_offset) + 1)
- xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
- }
+ if (blocks < BTOBB(iclog->ic_offset) + 1) {
+ xfs_emerg(log->l_mp, "%s: ran out of iclog space", __func__);
+ xlog_verify_dump_tail(log, iclog);
+ }
}
/*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d69acf881153..67c539cc9305 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -156,7 +156,6 @@ int xfs_log_quiesce(struct xfs_mount *mp);
void xfs_log_clean(struct xfs_mount *mp);
bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
-xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes);
bool xlog_force_shutdown(struct xlog *log, uint32_t shutdown_flags);
int xfs_attr_use_log_assist(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index f51cbc6405c1..391a938d690c 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -694,6 +694,180 @@ xlog_cil_insert_items(
}
}
+static inline void
+xlog_cil_ail_insert_batch(
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ struct xfs_log_item **log_items,
+ int nr_items,
+ xfs_lsn_t commit_lsn)
+{
+ int i;
+
+ spin_lock(&ailp->ail_lock);
+ /* xfs_trans_ail_update_bulk drops ailp->ail_lock */
+ xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
+
+ for (i = 0; i < nr_items; i++) {
+ struct xfs_log_item *lip = log_items[i];
+
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 0);
+ }
+}
+
+/*
+ * Take the checkpoint's log vector chain of items and insert the attached log
+ * items into the AIL. This uses bulk insertion techniques to minimise AIL lock
+ * traffic.
+ *
+ * The AIL tracks log items via the start record LSN of the checkpoint,
+ * not the commit record LSN. This is because we can pipeline multiple
+ * checkpoints, and so the start record of checkpoint N+1 can be
+ * written before the commit record of checkpoint N. i.e:
+ *
+ * start N commit N
+ * +-------------+------------+----------------+
+ * start N+1 commit N+1
+ *
+ * The tail of the log cannot be moved to the LSN of commit N when all
+ * the items of that checkpoint are written back, because then the
+ * start record for N+1 is no longer in the active portion of the log
+ * and recovery will fail/corrupt the filesystem.
+ *
+ * Hence when all the log items in checkpoint N are written back, the
+ * tail of the log most now only move as far forwards as the start LSN
+ * of checkpoint N+1.
+ *
+ * If we are called with the aborted flag set, it is because a log write during
+ * a CIL checkpoint commit has failed. In this case, all the items in the
+ * checkpoint have already gone through iop_committed and iop_committing, which
+ * means that checkpoint commit abort handling is treated exactly the same as an
+ * iclog write error even though we haven't started any IO yet. Hence in this
+ * case all we need to do is iop_committed processing, followed by an
+ * iop_unpin(aborted) call.
+ *
+ * The AIL cursor is used to optimise the insert process. If commit_lsn is not
+ * at the end of the AIL, the insert cursor avoids the need to walk the AIL to
+ * find the insertion point on every xfs_log_item_batch_insert() call. This
+ * saves a lot of needless list walking and is a net win, even though it
+ * slightly increases that amount of AIL lock traffic to set it up and tear it
+ * down.
+ */
+static void
+xlog_cil_ail_insert(
+ struct xfs_cil_ctx *ctx,
+ bool aborted)
+{
+#define LOG_ITEM_BATCH_SIZE 32
+ struct xfs_ail *ailp = ctx->cil->xc_log->l_ailp;
+ struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
+ struct xfs_log_vec *lv;
+ struct xfs_ail_cursor cur;
+ xfs_lsn_t old_head;
+ int i = 0;
+
+ /*
+ * Update the AIL head LSN with the commit record LSN of this
+ * checkpoint. As iclogs are always completed in order, this should
+ * always be the same (as iclogs can contain multiple commit records) or
+ * higher LSN than the current head. We do this before insertion of the
+ * items so that log space checks during insertion will reflect the
+ * space that this checkpoint has already consumed. We call
+ * xfs_ail_update_finish() so that tail space and space-based wakeups
+ * will be recalculated appropriately.
+ */
+ ASSERT(XFS_LSN_CMP(ctx->commit_lsn, ailp->ail_head_lsn) >= 0 ||
+ aborted);
+ spin_lock(&ailp->ail_lock);
+ xfs_trans_ail_cursor_last(ailp, &cur, ctx->start_lsn);
+ old_head = ailp->ail_head_lsn;
+ ailp->ail_head_lsn = ctx->commit_lsn;
+ /* xfs_ail_update_finish() drops the ail_lock */
+ xfs_ail_update_finish(ailp, NULLCOMMITLSN);
+
+ /*
+ * We move the AIL head forwards to account for the space used in the
+ * log before we remove that space from the grant heads. This prevents a
+ * transient condition where reservation space appears to become
+ * available on return, only for it to disappear again immediately as
+ * the AIL head update accounts in the log tail space.
+ */
+ smp_wmb(); /* paired with smp_rmb in xlog_grant_space_left */
+ xlog_grant_return_space(ailp->ail_log, old_head, ailp->ail_head_lsn);
+
+ /* unpin all the log items */
+ list_for_each_entry(lv, &ctx->lv_chain, lv_list) {
+ struct xfs_log_item *lip = lv->lv_item;
+ xfs_lsn_t item_lsn;
+
+ if (aborted)
+ set_bit(XFS_LI_ABORTED, &lip->li_flags);
+
+ if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
+ lip->li_ops->iop_release(lip);
+ continue;
+ }
+
+ if (lip->li_ops->iop_committed)
+ item_lsn = lip->li_ops->iop_committed(lip,
+ ctx->start_lsn);
+ else
+ item_lsn = ctx->start_lsn;
+
+ /* item_lsn of -1 means the item needs no further processing */
+ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
+ continue;
+
+ /*
+ * if we are aborting the operation, no point in inserting the
+ * object into the AIL as we are in a shutdown situation.
+ */
+ if (aborted) {
+ ASSERT(xlog_is_shutdown(ailp->ail_log));
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 1);
+ continue;
+ }
+
+ if (item_lsn != ctx->start_lsn) {
+
+ /*
+ * Not a bulk update option due to unusual item_lsn.
+ * Push into AIL immediately, rechecking the lsn once
+ * we have the ail lock. Then unpin the item. This does
+ * not affect the AIL cursor the bulk insert path is
+ * using.
+ */
+ spin_lock(&ailp->ail_lock);
+ if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
+ xfs_trans_ail_update(ailp, lip, item_lsn);
+ else
+ spin_unlock(&ailp->ail_lock);
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 0);
+ continue;
+ }
+
+ /* Item is a candidate for bulk AIL insert. */
+ log_items[i++] = lv->lv_item;
+ if (i >= LOG_ITEM_BATCH_SIZE) {
+ xlog_cil_ail_insert_batch(ailp, &cur, log_items,
+ LOG_ITEM_BATCH_SIZE, ctx->start_lsn);
+ i = 0;
+ }
+ }
+
+ /* make sure we insert the remainder! */
+ if (i)
+ xlog_cil_ail_insert_batch(ailp, &cur, log_items, i,
+ ctx->start_lsn);
+
+ spin_lock(&ailp->ail_lock);
+ xfs_trans_ail_cursor_done(&cur);
+ spin_unlock(&ailp->ail_lock);
+}
+
static void
xlog_cil_free_logvec(
struct list_head *lv_chain)
@@ -733,8 +907,7 @@ xlog_cil_committed(
spin_unlock(&ctx->cil->xc_push_lock);
}
- xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain,
- ctx->start_lsn, abort);
+ xlog_cil_ail_insert(ctx, abort);
xfs_extent_busy_sort(&ctx->busy_extents.extent_list);
xfs_extent_busy_clear(mp, &ctx->busy_extents.extent_list,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 40e22ec0fbe6..b8778a4fd6b6 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -431,18 +431,16 @@ struct xlog {
int l_prev_block; /* previous logical log block */
/*
- * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
- * read without needing to hold specific locks. To avoid operations
- * contending with other hot objects, place each of them on a separate
- * cacheline.
+ * l_tail_lsn is atomic so it can be set and read without needing to
+ * hold specific locks. To avoid operations contending with other hot
+ * objects, it on a separate cacheline.
*/
- /* lsn of last LR on disk */
- atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
/* lsn of 1st LR with unflushed * buffers */
atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
struct xlog_grant_head l_reserve_head;
struct xlog_grant_head l_write_head;
+ uint64_t l_tail_space;
struct xfs_kobj l_kobj;
@@ -546,36 +544,6 @@ xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
}
/*
- * When we crack the grant head, we sample it first so that the value will not
- * change while we are cracking it into the component values. This means we
- * will always get consistent component values to work from.
- */
-static inline void
-xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
-{
- *cycle = val >> 32;
- *space = val & 0xffffffff;
-}
-
-static inline void
-xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
-{
- xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
-}
-
-static inline int64_t
-xlog_assign_grant_head_val(int cycle, int space)
-{
- return ((int64_t)cycle << 32) | space;
-}
-
-static inline void
-xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
-{
- atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
-}
-
-/*
* Committed Item List interfaces
*/
int xlog_cil_init(struct xlog *log);
@@ -623,6 +591,27 @@ xlog_wait(
int xlog_wait_on_iclog(struct xlog_in_core *iclog)
__releases(iclog->ic_log->l_icloglock);
+/* Calculate the distance between two LSNs in bytes */
+static inline uint64_t
+xlog_lsn_sub(
+ struct xlog *log,
+ xfs_lsn_t high,
+ xfs_lsn_t low)
+{
+ uint32_t hi_cycle = CYCLE_LSN(high);
+ uint32_t hi_block = BLOCK_LSN(high);
+ uint32_t lo_cycle = CYCLE_LSN(low);
+ uint32_t lo_block = BLOCK_LSN(low);
+
+ if (hi_cycle == lo_cycle)
+ return BBTOB(hi_block - lo_block);
+ ASSERT((hi_cycle == lo_cycle + 1) || xlog_is_shutdown(log));
+ return (uint64_t)log->l_logsize - BBTOB(lo_block - hi_block);
+}
+
+void xlog_grant_return_space(struct xlog *log, xfs_lsn_t old_head,
+ xfs_lsn_t new_head);
+
/*
* The LSN is valid so long as it is behind the current LSN. If it isn't, this
* means that the next log record that includes this metadata could have a
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 4fe627991e86..4423dd344239 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1177,8 +1177,8 @@ xlog_check_unmount_rec(
*/
xlog_assign_atomic_lsn(&log->l_tail_lsn,
log->l_curr_cycle, after_umount_blk);
- xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
- log->l_curr_cycle, after_umount_blk);
+ log->l_ailp->ail_head_lsn =
+ atomic64_read(&log->l_tail_lsn);
*tail_blk = after_umount_blk;
*clean = true;
@@ -1212,11 +1212,7 @@ xlog_set_state(
if (bump_cycle)
log->l_curr_cycle++;
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
- atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
- xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
- BBTOB(log->l_curr_block));
- xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
- BBTOB(log->l_curr_block));
+ log->l_ailp->ail_head_lsn = be64_to_cpu(rhead->h_lsn);
}
/*
@@ -2489,7 +2485,10 @@ xlog_recover_process_data(
ohead = (struct xlog_op_header *)dp;
dp += sizeof(*ohead);
- ASSERT(dp <= end);
+ if (dp > end) {
+ xfs_warn(log->l_mp, "%s: op header overrun", __func__);
+ return -EFSCORRUPTED;
+ }
/* errors will abort recovery */
error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
@@ -3363,14 +3362,13 @@ xlog_do_recover(
/*
* We now update the tail_lsn since much of the recovery has completed
- * and there may be space available to use. If there were no extent
- * or iunlinks, we can free up the entire log and set the tail_lsn to
- * be the last_sync_lsn. This was set in xlog_find_tail to be the
- * lsn of the last known good LR on disk. If there are extent frees
- * or iunlinks they will have some entries in the AIL; so we look at
- * the AIL to determine how to set the tail_lsn.
+ * and there may be space available to use. If there were no extent or
+ * iunlinks, we can free up the entire log. This was set in
+ * xlog_find_tail to be the lsn of the last known good LR on disk. If
+ * there are extent frees or iunlinks they will have some entries in the
+ * AIL; so we look at the AIL to determine how to set the tail_lsn.
*/
- xlog_assign_tail_lsn(mp);
+ xfs_ail_assign_tail_lsn(log->l_ailp);
/*
* Now that we've finished replaying all buffer and inode updates,
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 47120b745c47..9490b913a4ab 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -793,12 +793,15 @@ xfs_qm_qino_alloc(
return error;
if (need_alloc) {
+ struct xfs_icreate_args args = {
+ .mode = S_IFREG,
+ .flags = XFS_ICREATE_UNLINKABLE,
+ };
xfs_ino_t ino;
error = xfs_dialloc(&tp, 0, S_IFREG, &ino);
if (!error)
- error = xfs_init_new_inode(&nop_mnt_idmap, tp, NULL, ino,
- S_IFREG, 1, 0, 0, false, ipp);
+ error = xfs_icreate(tp, ino, &args, ipp);
if (error) {
xfs_trans_cancel(tp);
return error;
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 271c1021c733..a11436579877 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -11,7 +11,6 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_quota.h"
-#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 14919b33e4fe..27398512b179 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -21,6 +21,8 @@
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
#include "xfs_ag.h"
+#include "xfs_btree.h"
+#include "xfs_trace.h"
struct kmem_cache *xfs_cui_cache;
struct kmem_cache *xfs_cud_cache;
@@ -227,6 +229,11 @@ static const struct xfs_item_ops xfs_cud_item_ops = {
.iop_intent = xfs_cud_item_intent,
};
+static inline struct xfs_refcount_intent *ci_entry(const struct list_head *e)
+{
+ return list_entry(e, struct xfs_refcount_intent, ri_list);
+}
+
/* Sort refcount intents by AG. */
static int
xfs_refcount_update_diff_items(
@@ -234,34 +241,12 @@ xfs_refcount_update_diff_items(
const struct list_head *a,
const struct list_head *b)
{
- struct xfs_refcount_intent *ra;
- struct xfs_refcount_intent *rb;
-
- ra = container_of(a, struct xfs_refcount_intent, ri_list);
- rb = container_of(b, struct xfs_refcount_intent, ri_list);
+ struct xfs_refcount_intent *ra = ci_entry(a);
+ struct xfs_refcount_intent *rb = ci_entry(b);
return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno;
}
-/* Set the phys extent flags for this reverse mapping. */
-static void
-xfs_trans_set_refcount_flags(
- struct xfs_phys_extent *pmap,
- enum xfs_refcount_intent_type type)
-{
- pmap->pe_flags = 0;
- switch (type) {
- case XFS_REFCOUNT_INCREASE:
- case XFS_REFCOUNT_DECREASE:
- case XFS_REFCOUNT_ALLOC_COW:
- case XFS_REFCOUNT_FREE_COW:
- pmap->pe_flags |= type;
- break;
- default:
- ASSERT(0);
- }
-}
-
/* Log refcount updates in the intent item. */
STATIC void
xfs_refcount_update_log_item(
@@ -282,7 +267,18 @@ xfs_refcount_update_log_item(
pmap = &cuip->cui_format.cui_extents[next_extent];
pmap->pe_startblock = ri->ri_startblock;
pmap->pe_len = ri->ri_blockcount;
- xfs_trans_set_refcount_flags(pmap, ri->ri_type);
+
+ pmap->pe_flags = 0;
+ switch (ri->ri_type) {
+ case XFS_REFCOUNT_INCREASE:
+ case XFS_REFCOUNT_DECREASE:
+ case XFS_REFCOUNT_ALLOC_COW:
+ case XFS_REFCOUNT_FREE_COW:
+ pmap->pe_flags |= ri->ri_type;
+ break;
+ default:
+ ASSERT(0);
+ }
}
static struct xfs_log_item *
@@ -324,24 +320,29 @@ xfs_refcount_update_create_done(
return &cudp->cud_item;
}
-/* Take a passive ref to the AG containing the space we're refcounting. */
+/* Add this deferred CUI to the transaction. */
void
-xfs_refcount_update_get_group(
- struct xfs_mount *mp,
+xfs_refcount_defer_add(
+ struct xfs_trans *tp,
struct xfs_refcount_intent *ri)
{
- xfs_agnumber_t agno;
+ struct xfs_mount *mp = tp->t_mountp;
+
+ trace_xfs_refcount_defer(mp, ri);
- agno = XFS_FSB_TO_AGNO(mp, ri->ri_startblock);
- ri->ri_pag = xfs_perag_intent_get(mp, agno);
+ ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_startblock);
+ xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type);
}
-/* Release a passive AG ref after finishing refcounting work. */
-static inline void
-xfs_refcount_update_put_group(
- struct xfs_refcount_intent *ri)
+/* Cancel a deferred refcount update. */
+STATIC void
+xfs_refcount_update_cancel_item(
+ struct list_head *item)
{
+ struct xfs_refcount_intent *ri = ci_entry(item);
+
xfs_perag_intent_put(ri->ri_pag);
+ kmem_cache_free(xfs_refcount_intent_cache, ri);
}
/* Process a deferred refcount update. */
@@ -352,11 +353,9 @@ xfs_refcount_update_finish_item(
struct list_head *item,
struct xfs_btree_cur **state)
{
- struct xfs_refcount_intent *ri;
+ struct xfs_refcount_intent *ri = ci_entry(item);
int error;
- ri = container_of(item, struct xfs_refcount_intent, ri_list);
-
/* Did we run out of reservation? Requeue what we didn't finish. */
error = xfs_refcount_finish_one(tp, ri, state);
if (!error && ri->ri_blockcount > 0) {
@@ -365,30 +364,33 @@ xfs_refcount_update_finish_item(
return -EAGAIN;
}
- xfs_refcount_update_put_group(ri);
- kmem_cache_free(xfs_refcount_intent_cache, ri);
+ xfs_refcount_update_cancel_item(item);
return error;
}
-/* Abort all pending CUIs. */
+/* Clean up after calling xfs_refcount_finish_one. */
STATIC void
-xfs_refcount_update_abort_intent(
- struct xfs_log_item *intent)
+xfs_refcount_finish_one_cleanup(
+ struct xfs_trans *tp,
+ struct xfs_btree_cur *rcur,
+ int error)
{
- xfs_cui_release(CUI_ITEM(intent));
+ struct xfs_buf *agbp;
+
+ if (rcur == NULL)
+ return;
+ agbp = rcur->bc_ag.agbp;
+ xfs_btree_del_cursor(rcur, error);
+ if (error)
+ xfs_trans_brelse(tp, agbp);
}
-/* Cancel a deferred refcount update. */
+/* Abort all pending CUIs. */
STATIC void
-xfs_refcount_update_cancel_item(
- struct list_head *item)
+xfs_refcount_update_abort_intent(
+ struct xfs_log_item *intent)
{
- struct xfs_refcount_intent *ri;
-
- ri = container_of(item, struct xfs_refcount_intent, ri_list);
-
- xfs_refcount_update_put_group(ri);
- kmem_cache_free(xfs_refcount_intent_cache, ri);
+ xfs_cui_release(CUI_ITEM(intent));
}
/* Is this recovered CUI ok? */
@@ -429,7 +431,7 @@ xfs_cui_recover_work(
ri->ri_type = pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
ri->ri_startblock = pmap->pe_startblock;
ri->ri_blockcount = pmap->pe_len;
- xfs_refcount_update_get_group(mp, ri);
+ ri->ri_pag = xfs_perag_intent_get(mp, pmap->pe_startblock);
xfs_defer_add_item(dfp, &ri->ri_list);
}
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index eb0ab13682d0..bfee8f30c63c 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -71,4 +71,9 @@ struct xfs_cud_log_item {
extern struct kmem_cache *xfs_cui_cache;
extern struct kmem_cache *xfs_cud_cache;
+struct xfs_refcount_intent;
+
+void xfs_refcount_defer_add(struct xfs_trans *tp,
+ struct xfs_refcount_intent *ri);
+
#endif /* __XFS_REFCOUNT_ITEM_H__ */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 265a2a418bc7..6fde6ec8092f 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -603,7 +603,7 @@ xfs_reflink_cancel_cow_blocks(
error = xfs_free_extent_later(*tpp, del.br_startblock,
del.br_blockcount, NULL,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
break;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 65c5dfe17ecf..fb55e4ce49fa 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -6,16 +6,6 @@
#ifndef __XFS_REFLINK_H
#define __XFS_REFLINK_H 1
-static inline bool xfs_is_always_cow_inode(struct xfs_inode *ip)
-{
- return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
-}
-
-static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
-{
- return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
-}
-
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared);
int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index e473124e29cc..88b5580e1e19 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -21,6 +21,8 @@
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
#include "xfs_ag.h"
+#include "xfs_btree.h"
+#include "xfs_trace.h"
struct kmem_cache *xfs_rui_cache;
struct kmem_cache *xfs_rud_cache;
@@ -226,47 +228,9 @@ static const struct xfs_item_ops xfs_rud_item_ops = {
.iop_intent = xfs_rud_item_intent,
};
-/* Set the map extent flags for this reverse mapping. */
-static void
-xfs_trans_set_rmap_flags(
- struct xfs_map_extent *map,
- enum xfs_rmap_intent_type type,
- int whichfork,
- xfs_exntst_t state)
+static inline struct xfs_rmap_intent *ri_entry(const struct list_head *e)
{
- map->me_flags = 0;
- if (state == XFS_EXT_UNWRITTEN)
- map->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
- if (whichfork == XFS_ATTR_FORK)
- map->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
- switch (type) {
- case XFS_RMAP_MAP:
- map->me_flags |= XFS_RMAP_EXTENT_MAP;
- break;
- case XFS_RMAP_MAP_SHARED:
- map->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
- break;
- case XFS_RMAP_UNMAP:
- map->me_flags |= XFS_RMAP_EXTENT_UNMAP;
- break;
- case XFS_RMAP_UNMAP_SHARED:
- map->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
- break;
- case XFS_RMAP_CONVERT:
- map->me_flags |= XFS_RMAP_EXTENT_CONVERT;
- break;
- case XFS_RMAP_CONVERT_SHARED:
- map->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
- break;
- case XFS_RMAP_ALLOC:
- map->me_flags |= XFS_RMAP_EXTENT_ALLOC;
- break;
- case XFS_RMAP_FREE:
- map->me_flags |= XFS_RMAP_EXTENT_FREE;
- break;
- default:
- ASSERT(0);
- }
+ return list_entry(e, struct xfs_rmap_intent, ri_list);
}
/* Sort rmap intents by AG. */
@@ -276,11 +240,8 @@ xfs_rmap_update_diff_items(
const struct list_head *a,
const struct list_head *b)
{
- struct xfs_rmap_intent *ra;
- struct xfs_rmap_intent *rb;
-
- ra = container_of(a, struct xfs_rmap_intent, ri_list);
- rb = container_of(b, struct xfs_rmap_intent, ri_list);
+ struct xfs_rmap_intent *ra = ri_entry(a);
+ struct xfs_rmap_intent *rb = ri_entry(b);
return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno;
}
@@ -307,8 +268,40 @@ xfs_rmap_update_log_item(
map->me_startblock = ri->ri_bmap.br_startblock;
map->me_startoff = ri->ri_bmap.br_startoff;
map->me_len = ri->ri_bmap.br_blockcount;
- xfs_trans_set_rmap_flags(map, ri->ri_type, ri->ri_whichfork,
- ri->ri_bmap.br_state);
+
+ map->me_flags = 0;
+ if (ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN)
+ map->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
+ if (ri->ri_whichfork == XFS_ATTR_FORK)
+ map->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
+ switch (ri->ri_type) {
+ case XFS_RMAP_MAP:
+ map->me_flags |= XFS_RMAP_EXTENT_MAP;
+ break;
+ case XFS_RMAP_MAP_SHARED:
+ map->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
+ break;
+ case XFS_RMAP_UNMAP:
+ map->me_flags |= XFS_RMAP_EXTENT_UNMAP;
+ break;
+ case XFS_RMAP_UNMAP_SHARED:
+ map->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
+ break;
+ case XFS_RMAP_CONVERT:
+ map->me_flags |= XFS_RMAP_EXTENT_CONVERT;
+ break;
+ case XFS_RMAP_CONVERT_SHARED:
+ map->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
+ break;
+ case XFS_RMAP_ALLOC:
+ map->me_flags |= XFS_RMAP_EXTENT_ALLOC;
+ break;
+ case XFS_RMAP_FREE:
+ map->me_flags |= XFS_RMAP_EXTENT_FREE;
+ break;
+ default:
+ ASSERT(0);
+ }
}
static struct xfs_log_item *
@@ -350,24 +343,29 @@ xfs_rmap_update_create_done(
return &rudp->rud_item;
}
-/* Take a passive ref to the AG containing the space we're rmapping. */
+/* Add this deferred RUI to the transaction. */
void
-xfs_rmap_update_get_group(
- struct xfs_mount *mp,
+xfs_rmap_defer_add(
+ struct xfs_trans *tp,
struct xfs_rmap_intent *ri)
{
- xfs_agnumber_t agno;
+ struct xfs_mount *mp = tp->t_mountp;
+
+ trace_xfs_rmap_defer(mp, ri);
- agno = XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock);
- ri->ri_pag = xfs_perag_intent_get(mp, agno);
+ ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_bmap.br_startblock);
+ xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type);
}
-/* Release a passive AG ref after finishing rmapping work. */
-static inline void
-xfs_rmap_update_put_group(
- struct xfs_rmap_intent *ri)
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_rmap_update_cancel_item(
+ struct list_head *item)
{
+ struct xfs_rmap_intent *ri = ri_entry(item);
+
xfs_perag_intent_put(ri->ri_pag);
+ kmem_cache_free(xfs_rmap_intent_cache, ri);
}
/* Process a deferred rmap update. */
@@ -378,37 +376,38 @@ xfs_rmap_update_finish_item(
struct list_head *item,
struct xfs_btree_cur **state)
{
- struct xfs_rmap_intent *ri;
+ struct xfs_rmap_intent *ri = ri_entry(item);
int error;
- ri = container_of(item, struct xfs_rmap_intent, ri_list);
-
error = xfs_rmap_finish_one(tp, ri, state);
- xfs_rmap_update_put_group(ri);
- kmem_cache_free(xfs_rmap_intent_cache, ri);
+ xfs_rmap_update_cancel_item(item);
return error;
}
-/* Abort all pending RUIs. */
+/* Clean up after calling xfs_rmap_finish_one. */
STATIC void
-xfs_rmap_update_abort_intent(
- struct xfs_log_item *intent)
+xfs_rmap_finish_one_cleanup(
+ struct xfs_trans *tp,
+ struct xfs_btree_cur *rcur,
+ int error)
{
- xfs_rui_release(RUI_ITEM(intent));
+ struct xfs_buf *agbp = NULL;
+
+ if (rcur == NULL)
+ return;
+ agbp = rcur->bc_ag.agbp;
+ xfs_btree_del_cursor(rcur, error);
+ if (error && agbp)
+ xfs_trans_brelse(tp, agbp);
}
-/* Cancel a deferred rmap update. */
+/* Abort all pending RUIs. */
STATIC void
-xfs_rmap_update_cancel_item(
- struct list_head *item)
+xfs_rmap_update_abort_intent(
+ struct xfs_log_item *intent)
{
- struct xfs_rmap_intent *ri;
-
- ri = container_of(item, struct xfs_rmap_intent, ri_list);
-
- xfs_rmap_update_put_group(ri);
- kmem_cache_free(xfs_rmap_intent_cache, ri);
+ xfs_rui_release(RUI_ITEM(intent));
}
/* Is this recovered RUI ok? */
@@ -495,7 +494,7 @@ xfs_rui_recover_work(
ri->ri_bmap.br_blockcount = map->me_len;
ri->ri_bmap.br_state = (map->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ?
XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- xfs_rmap_update_get_group(mp, ri);
+ ri->ri_pag = xfs_perag_intent_get(mp, map->me_startblock);
xfs_defer_add_item(dfp, &ri->ri_list);
}
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 802e5119eaca..40d331555675 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -71,4 +71,8 @@ struct xfs_rud_log_item {
extern struct kmem_cache *xfs_rui_cache;
extern struct kmem_cache *xfs_rud_cache;
+struct xfs_rmap_intent;
+
+void xfs_rmap_defer_add(struct xfs_trans *tp, struct xfs_rmap_intent *ri);
+
#endif /* __XFS_RMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5a7ddfed1bb8..0c3e96c621a6 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -12,6 +12,7 @@
#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
+#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_bmap_util.h"
@@ -1382,7 +1383,7 @@ retry:
start = 0;
} else if (xfs_bmap_adjacent(ap)) {
start = xfs_rtb_to_rtx(mp, ap->blkno);
- } else if (ap->eof && ap->offset == 0) {
+ } else if (ap->datatype & XFS_ALLOC_INITIAL_USER_DATA) {
/*
* If it's an allocation to an empty file at offset 0, pick an
* extent that will space things out in the rt area.
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 17aee806ec2e..77f19e2f66e0 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -90,19 +90,25 @@ xfs_symlink(
struct xfs_inode **ipp)
{
struct xfs_mount *mp = dp->i_mount;
+ struct xfs_icreate_args args = {
+ .idmap = idmap,
+ .pip = dp,
+ .mode = S_IFLNK | (mode & ~S_IFMT),
+ };
+ struct xfs_dir_update du = {
+ .dp = dp,
+ .name = link_name,
+ };
struct xfs_trans *tp = NULL;
- struct xfs_inode *ip = NULL;
int error = 0;
int pathlen;
bool unlock_dp_on_error = false;
xfs_filblks_t fs_blocks;
- prid_t prid;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
+ struct xfs_dquot *udqp;
+ struct xfs_dquot *gdqp;
+ struct xfs_dquot *pdqp;
uint resblks;
xfs_ino_t ino;
- struct xfs_parent_args *ppargs;
*ipp = NULL;
@@ -119,15 +125,8 @@ xfs_symlink(
return -ENAMETOOLONG;
ASSERT(pathlen > 0);
- prid = xfs_get_initial_prid(dp);
-
- /*
- * Make sure that we have allocated dquot(s) on disk.
- */
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
- mapped_fsgid(idmap, &init_user_ns), prid,
- XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
- &udqp, &gdqp, &pdqp);
+ /* Make sure that we have allocated dquot(s) on disk. */
+ error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -143,7 +142,7 @@ xfs_symlink(
fs_blocks = xfs_symlink_blocks(mp, pathlen);
resblks = xfs_symlink_space_res(mp, link_name->len, fs_blocks);
- error = xfs_parent_start(mp, &ppargs);
+ error = xfs_parent_start(mp, &du.ppargs);
if (error)
goto out_release_dquots;
@@ -168,9 +167,7 @@ xfs_symlink(
*/
error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino);
if (!error)
- error = xfs_init_new_inode(idmap, tp, dp, ino,
- S_IFLNK | (mode & ~S_IFMT), 1, 0, prid,
- xfs_has_parent(mp), &ip);
+ error = xfs_icreate(tp, ino, &args, &du.ip);
if (error)
goto out_trans_cancel;
@@ -186,33 +183,22 @@ xfs_symlink(
/*
* Also attach the dquot(s) to it, if applicable.
*/
- xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+ xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp);
resblks -= XFS_IALLOC_SPACE_RES(mp);
- error = xfs_symlink_write_target(tp, ip, ip->i_ino, target_path,
+ error = xfs_symlink_write_target(tp, du.ip, du.ip->i_ino, target_path,
pathlen, fs_blocks, resblks);
if (error)
goto out_trans_cancel;
resblks -= fs_blocks;
- i_size_write(VFS_I(ip), ip->i_disk_size);
+ i_size_write(VFS_I(du.ip), du.ip->i_disk_size);
/*
* Create the directory entry for the symlink.
*/
- error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, resblks);
+ error = xfs_dir_create_child(tp, resblks, &du);
if (error)
goto out_trans_cancel;
- xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-
- /* Add parent pointer for the new symlink. */
- if (ppargs) {
- error = xfs_parent_addname(tp, ppargs, dp, link_name, ip);
- if (error)
- goto out_trans_cancel;
- }
-
- xfs_dir_update_hook(dp, ip, 1, link_name);
/*
* If this is a synchronous mount, make sure that the
@@ -230,10 +216,10 @@ xfs_symlink(
xfs_qm_dqrele(gdqp);
xfs_qm_dqrele(pdqp);
- *ipp = ip;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ *ipp = du.ip;
+ xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
return 0;
out_trans_cancel:
@@ -244,13 +230,13 @@ out_release_inode:
* setup of the inode and release the inode. This prevents recursive
* transactions and deadlocks from xfs_inactive.
*/
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_finish_inode_setup(ip);
- xfs_irele(ip);
+ if (du.ip) {
+ xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
+ xfs_finish_inode_setup(du.ip);
+ xfs_irele(du.ip);
}
out_parent:
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
out_release_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index d2391eec37fe..60cb5318fdae 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -432,39 +432,30 @@ log_tail_lsn_show(
XFS_SYSFS_ATTR_RO(log_tail_lsn);
STATIC ssize_t
-reserve_grant_head_show(
+reserve_grant_head_bytes_show(
struct kobject *kobject,
char *buf)
-
{
- int cycle;
- int bytes;
- struct xlog *log = to_xlog(kobject);
-
- xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
- return sysfs_emit(buf, "%d:%d\n", cycle, bytes);
+ return sysfs_emit(buf, "%lld\n",
+ atomic64_read(&to_xlog(kobject)->l_reserve_head.grant));
}
-XFS_SYSFS_ATTR_RO(reserve_grant_head);
+XFS_SYSFS_ATTR_RO(reserve_grant_head_bytes);
STATIC ssize_t
-write_grant_head_show(
+write_grant_head_bytes_show(
struct kobject *kobject,
char *buf)
{
- int cycle;
- int bytes;
- struct xlog *log = to_xlog(kobject);
-
- xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
- return sysfs_emit(buf, "%d:%d\n", cycle, bytes);
+ return sysfs_emit(buf, "%lld\n",
+ atomic64_read(&to_xlog(kobject)->l_write_head.grant));
}
-XFS_SYSFS_ATTR_RO(write_grant_head);
+XFS_SYSFS_ATTR_RO(write_grant_head_bytes);
static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(log_head_lsn),
ATTR_LIST(log_tail_lsn),
- ATTR_LIST(reserve_grant_head),
- ATTR_LIST(write_grant_head),
+ ATTR_LIST(reserve_grant_head_bytes),
+ ATTR_LIST(write_grant_head_bytes),
NULL,
};
ATTRIBUTE_GROUPS(xfs_log);
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 9c7fbaae2717..2af9f274e872 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -22,6 +22,7 @@
#include "xfs_trans.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_quota.h"
#include "xfs_dquot_item.h"
@@ -38,10 +39,11 @@
#include "xfs_iomap.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"
-#include "xfs_bmap.h"
#include "xfs_exchmaps.h"
#include "xfs_exchrange.h"
#include "xfs_parent.h"
+#include "xfs_rmap.h"
+#include "xfs_refcount.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 25ff6fe1eb6c..5646d300b286 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -90,6 +90,9 @@ struct xfs_exchrange;
struct xfs_getparents;
struct xfs_parent_irec;
struct xfs_attrlist_cursor_kern;
+struct xfs_extent_free_item;
+struct xfs_rmap_intent;
+struct xfs_refcount_intent;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -1227,6 +1230,7 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
TP_ARGS(log, tic),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(unsigned long, tic)
__field(char, ocnt)
__field(char, cnt)
__field(int, curr_res)
@@ -1234,16 +1238,16 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__field(unsigned int, flags)
__field(int, reserveq)
__field(int, writeq)
- __field(int, grant_reserve_cycle)
- __field(int, grant_reserve_bytes)
- __field(int, grant_write_cycle)
- __field(int, grant_write_bytes)
+ __field(uint64_t, grant_reserve_bytes)
+ __field(uint64_t, grant_write_bytes)
+ __field(uint64_t, tail_space)
__field(int, curr_cycle)
__field(int, curr_block)
__field(xfs_lsn_t, tail_lsn)
),
TP_fast_assign(
__entry->dev = log->l_mp->m_super->s_dev;
+ __entry->tic = (unsigned long)tic;
__entry->ocnt = tic->t_ocnt;
__entry->cnt = tic->t_cnt;
__entry->curr_res = tic->t_curr_res;
@@ -1251,23 +1255,22 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->flags = tic->t_flags;
__entry->reserveq = list_empty(&log->l_reserve_head.waiters);
__entry->writeq = list_empty(&log->l_write_head.waiters);
- xlog_crack_grant_head(&log->l_reserve_head.grant,
- &__entry->grant_reserve_cycle,
- &__entry->grant_reserve_bytes);
- xlog_crack_grant_head(&log->l_write_head.grant,
- &__entry->grant_write_cycle,
- &__entry->grant_write_bytes);
+ __entry->tail_space = READ_ONCE(log->l_tail_space);
+ __entry->grant_reserve_bytes = __entry->tail_space +
+ atomic64_read(&log->l_reserve_head.grant);
+ __entry->grant_write_bytes = __entry->tail_space +
+ atomic64_read(&log->l_write_head.grant);
__entry->curr_cycle = log->l_curr_cycle;
__entry->curr_block = log->l_curr_block;
__entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
),
- TP_printk("dev %d:%d t_ocnt %u t_cnt %u t_curr_res %u "
- "t_unit_res %u t_flags %s reserveq %s "
- "writeq %s grant_reserve_cycle %d "
- "grant_reserve_bytes %d grant_write_cycle %d "
- "grant_write_bytes %d curr_cycle %d curr_block %d "
+ TP_printk("dev %d:%d tic 0x%lx t_ocnt %u t_cnt %u t_curr_res %u "
+ "t_unit_res %u t_flags %s reserveq %s writeq %s "
+ "tail space %llu grant_reserve_bytes %llu "
+ "grant_write_bytes %llu curr_cycle %d curr_block %d "
"tail_cycle %d tail_block %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->tic,
__entry->ocnt,
__entry->cnt,
__entry->curr_res,
@@ -1275,9 +1278,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
__entry->reserveq ? "empty" : "active",
__entry->writeq ? "empty" : "active",
- __entry->grant_reserve_cycle,
+ __entry->tail_space,
__entry->grant_reserve_bytes,
- __entry->grant_write_cycle,
__entry->grant_write_bytes,
__entry->curr_cycle,
__entry->curr_block,
@@ -1305,6 +1307,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant);
DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub);
DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait);
+DEFINE_LOGGRANT_EVENT(xfs_log_cil_return);
DECLARE_EVENT_CLASS(xfs_log_item_class,
TP_PROTO(struct xfs_log_item *lip),
@@ -1404,19 +1407,19 @@ TRACE_EVENT(xfs_log_assign_tail_lsn,
__field(dev_t, dev)
__field(xfs_lsn_t, new_lsn)
__field(xfs_lsn_t, old_lsn)
- __field(xfs_lsn_t, last_sync_lsn)
+ __field(xfs_lsn_t, head_lsn)
),
TP_fast_assign(
__entry->dev = log->l_mp->m_super->s_dev;
__entry->new_lsn = new_lsn;
__entry->old_lsn = atomic64_read(&log->l_tail_lsn);
- __entry->last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
+ __entry->head_lsn = log->l_ailp->ail_head_lsn;
),
- TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d",
+ TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, head lsn %d/%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn),
CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn),
- CYCLE_LSN(__entry->last_sync_lsn), BLOCK_LSN(__entry->last_sync_lsn))
+ CYCLE_LSN(__entry->head_lsn), BLOCK_LSN(__entry->head_lsn))
)
DECLARE_EVENT_CLASS(xfs_file_class,
@@ -2460,6 +2463,35 @@ DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
DEFINE_DISCARD_EVENT(xfs_discard_exclude);
DEFINE_DISCARD_EVENT(xfs_discard_busy);
+DECLARE_EVENT_CLASS(xfs_rtdiscard_class,
+ TP_PROTO(struct xfs_mount *mp,
+ xfs_rtblock_t rtbno, xfs_rtblock_t len),
+ TP_ARGS(mp, rtbno, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_rtblock_t, rtbno)
+ __field(xfs_rtblock_t, len)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_rtdev_targp->bt_dev;
+ __entry->rtbno = rtbno;
+ __entry->len = len;
+ ),
+ TP_printk("dev %d:%d rtbno 0x%llx rtbcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rtbno,
+ __entry->len)
+)
+
+#define DEFINE_RTDISCARD_EVENT(name) \
+DEFINE_EVENT(xfs_rtdiscard_class, name, \
+ TP_PROTO(struct xfs_mount *mp, \
+ xfs_rtblock_t rtbno, xfs_rtblock_t len), \
+ TP_ARGS(mp, rtbno, len))
+DEFINE_RTDISCARD_EVENT(xfs_discard_rtextent);
+DEFINE_RTDISCARD_EVENT(xfs_discard_rttoosmall);
+DEFINE_RTDISCARD_EVENT(xfs_discard_rtrelax);
+
DECLARE_EVENT_CLASS(xfs_btree_cur_class,
TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp),
TP_ARGS(cur, level, bp),
@@ -2681,41 +2713,37 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_pause);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_unpause);
DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- int type, xfs_agblock_t agbno, xfs_extlen_t len),
- TP_ARGS(mp, agno, type, agbno, len),
+ TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free),
+ TP_ARGS(mp, free),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
- __field(int, type)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
+ __field(unsigned int, flags)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->type = type;
- __entry->agbno = agbno;
- __entry->len = len;
+ __entry->agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
+ __entry->agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+ __entry->len = free->xefi_blockcount;
+ __entry->flags = free->xefi_flags;
),
- TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x fsbcount 0x%x",
+ TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
__entry->agno,
__entry->agbno,
- __entry->len)
+ __entry->len,
+ __entry->flags)
);
#define DEFINE_FREE_EXTENT_DEFERRED_EVENT(name) \
DEFINE_EVENT(xfs_free_extent_deferred_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- int type, \
- xfs_agblock_t bno, \
- xfs_extlen_t len), \
- TP_ARGS(mp, agno, type, bno, len))
-DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_bmap_free_defer);
-DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_bmap_free_deferred);
+ TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), \
+ TP_ARGS(mp, free))
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_defer);
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_deferred);
+DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer);
+DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred);
DECLARE_EVENT_CLASS(xfs_defer_pending_item_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp,
@@ -2760,10 +2788,10 @@ DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item);
/* rmap tracepoints */
DECLARE_EVENT_CLASS(xfs_rmap_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ TP_PROTO(struct xfs_btree_cur *cur,
xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten,
const struct xfs_owner_info *oinfo),
- TP_ARGS(mp, agno, agbno, len, unwritten, oinfo),
+ TP_ARGS(cur, agbno, len, unwritten, oinfo),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2774,8 +2802,8 @@ DECLARE_EVENT_CLASS(xfs_rmap_class,
__field(unsigned long, flags)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->agbno = agbno;
__entry->len = len;
__entry->owner = oinfo->oi_owner;
@@ -2795,57 +2823,109 @@ DECLARE_EVENT_CLASS(xfs_rmap_class,
);
#define DEFINE_RMAP_EVENT(name) \
DEFINE_EVENT(xfs_rmap_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+ TP_PROTO(struct xfs_btree_cur *cur, \
xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \
const struct xfs_owner_info *oinfo), \
- TP_ARGS(mp, agno, agbno, len, unwritten, oinfo))
+ TP_ARGS(cur, agbno, len, unwritten, oinfo))
-/* simple AG-based error/%ip tracepoint class */
-DECLARE_EVENT_CLASS(xfs_ag_error_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error,
+/* btree cursor error/%ip tracepoint class */
+DECLARE_EVENT_CLASS(xfs_btree_error_class,
+ TP_PROTO(struct xfs_btree_cur *cur, int error,
unsigned long caller_ip),
- TP_ARGS(mp, agno, error, caller_ip),
+ TP_ARGS(cur, error, caller_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(xfs_ino_t, ino)
__field(int, error)
__field(unsigned long, caller_ip)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_INODE:
+ __entry->agno = 0;
+ __entry->ino = cur->bc_ino.ip->i_ino;
+ break;
+ case XFS_BTREE_TYPE_AG:
+ __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->ino = 0;
+ break;
+ case XFS_BTREE_TYPE_MEM:
+ __entry->agno = 0;
+ __entry->ino = 0;
+ break;
+ }
__entry->error = error;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d agno 0x%x error %d caller %pS",
+ TP_printk("dev %d:%d agno 0x%x ino 0x%llx error %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __entry->ino,
__entry->error,
(char *)__entry->caller_ip)
);
-#define DEFINE_AG_ERROR_EVENT(name) \
-DEFINE_EVENT(xfs_ag_error_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \
+#define DEFINE_BTREE_ERROR_EVENT(name) \
+DEFINE_EVENT(xfs_btree_error_class, name, \
+ TP_PROTO(struct xfs_btree_cur *cur, int error, \
unsigned long caller_ip), \
- TP_ARGS(mp, agno, error, caller_ip))
+ TP_ARGS(cur, error, caller_ip))
DEFINE_RMAP_EVENT(xfs_rmap_unmap);
DEFINE_RMAP_EVENT(xfs_rmap_unmap_done);
-DEFINE_AG_ERROR_EVENT(xfs_rmap_unmap_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_rmap_unmap_error);
DEFINE_RMAP_EVENT(xfs_rmap_map);
DEFINE_RMAP_EVENT(xfs_rmap_map_done);
-DEFINE_AG_ERROR_EVENT(xfs_rmap_map_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_rmap_map_error);
DEFINE_RMAP_EVENT(xfs_rmap_convert);
DEFINE_RMAP_EVENT(xfs_rmap_convert_done);
-DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_error);
-DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_state);
+DEFINE_BTREE_ERROR_EVENT(xfs_rmap_convert_error);
+
+TRACE_EVENT(xfs_rmap_convert_state,
+ TP_PROTO(struct xfs_btree_cur *cur, int state,
+ unsigned long caller_ip),
+ TP_ARGS(cur, state, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_ino_t, ino)
+ __field(int, state)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_INODE:
+ __entry->agno = 0;
+ __entry->ino = cur->bc_ino.ip->i_ino;
+ break;
+ case XFS_BTREE_TYPE_AG:
+ __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->ino = 0;
+ break;
+ case XFS_BTREE_TYPE_MEM:
+ __entry->agno = 0;
+ __entry->ino = 0;
+ break;
+ }
+ __entry->state = state;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d agno 0x%x ino 0x%llx state %d caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->ino,
+ __entry->state,
+ (char *)__entry->caller_ip)
+);
DECLARE_EVENT_CLASS(xfs_rmapbt_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ TP_PROTO(struct xfs_btree_cur *cur,
xfs_agblock_t agbno, xfs_extlen_t len,
uint64_t owner, uint64_t offset, unsigned int flags),
- TP_ARGS(mp, agno, agbno, len, owner, offset, flags),
+ TP_ARGS(cur, agbno, len, owner, offset, flags),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2856,8 +2936,8 @@ DECLARE_EVENT_CLASS(xfs_rmapbt_class,
__field(unsigned int, flags)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->agbno = agbno;
__entry->len = len;
__entry->owner = owner;
@@ -2875,25 +2955,27 @@ DECLARE_EVENT_CLASS(xfs_rmapbt_class,
);
#define DEFINE_RMAPBT_EVENT(name) \
DEFINE_EVENT(xfs_rmapbt_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+ TP_PROTO(struct xfs_btree_cur *cur, \
xfs_agblock_t agbno, xfs_extlen_t len, \
uint64_t owner, uint64_t offset, unsigned int flags), \
- TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
+ TP_ARGS(cur, agbno, len, owner, offset, flags))
+
+TRACE_DEFINE_ENUM(XFS_RMAP_MAP);
+TRACE_DEFINE_ENUM(XFS_RMAP_MAP_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP);
+TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT);
+TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_ALLOC);
+TRACE_DEFINE_ENUM(XFS_RMAP_FREE);
DECLARE_EVENT_CLASS(xfs_rmap_deferred_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- int op,
- xfs_agblock_t agbno,
- xfs_ino_t ino,
- int whichfork,
- xfs_fileoff_t offset,
- xfs_filblks_t len,
- xfs_exntst_t state),
- TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state),
+ TP_PROTO(struct xfs_mount *mp, struct xfs_rmap_intent *ri),
+ TP_ARGS(mp, ri),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(unsigned long long, owner)
__field(xfs_agnumber_t, agno)
- __field(xfs_ino_t, ino)
__field(xfs_agblock_t, agbno)
__field(int, whichfork)
__field(xfs_fileoff_t, l_loff)
@@ -2903,21 +2985,22 @@ DECLARE_EVENT_CLASS(xfs_rmap_deferred_class,
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->ino = ino;
- __entry->agbno = agbno;
- __entry->whichfork = whichfork;
- __entry->l_loff = offset;
- __entry->l_len = len;
- __entry->l_state = state;
- __entry->op = op;
- ),
- TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
+ __entry->agno = XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock);
+ __entry->agbno = XFS_FSB_TO_AGBNO(mp,
+ ri->ri_bmap.br_startblock);
+ __entry->owner = ri->ri_owner;
+ __entry->whichfork = ri->ri_whichfork;
+ __entry->l_loff = ri->ri_bmap.br_startoff;
+ __entry->l_len = ri->ri_bmap.br_blockcount;
+ __entry->l_state = ri->ri_bmap.br_state;
+ __entry->op = ri->ri_type;
+ ),
+ TP_printk("dev %d:%d op %s agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->op,
+ __print_symbolic(__entry->op, XFS_RMAP_INTENT_STRINGS),
__entry->agno,
__entry->agbno,
- __entry->ino,
+ __entry->owner,
__print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
__entry->l_loff,
__entry->l_len,
@@ -2925,24 +3008,17 @@ DECLARE_EVENT_CLASS(xfs_rmap_deferred_class,
);
#define DEFINE_RMAP_DEFERRED_EVENT(name) \
DEFINE_EVENT(xfs_rmap_deferred_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- int op, \
- xfs_agblock_t agbno, \
- xfs_ino_t ino, \
- int whichfork, \
- xfs_fileoff_t offset, \
- xfs_filblks_t len, \
- xfs_exntst_t state), \
- TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state))
+ TP_PROTO(struct xfs_mount *mp, struct xfs_rmap_intent *ri), \
+ TP_ARGS(mp, ri))
DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer);
DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred);
DEFINE_RMAPBT_EVENT(xfs_rmap_update);
DEFINE_RMAPBT_EVENT(xfs_rmap_insert);
DEFINE_RMAPBT_EVENT(xfs_rmap_delete);
-DEFINE_AG_ERROR_EVENT(xfs_rmap_insert_error);
-DEFINE_AG_ERROR_EVENT(xfs_rmap_delete_error);
-DEFINE_AG_ERROR_EVENT(xfs_rmap_update_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_rmap_insert_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_rmap_delete_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_rmap_update_error);
DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_candidate);
DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_query);
@@ -3068,21 +3144,74 @@ DEFINE_AG_RESV_EVENT(xfs_ag_resv_free_extent);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_critical);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed);
+/* simple AG-based error/%ip tracepoint class */
+DECLARE_EVENT_CLASS(xfs_ag_error_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error,
+ unsigned long caller_ip),
+ TP_ARGS(mp, agno, error, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(int, error)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->error = error;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d agno 0x%x error %d caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->error,
+ (char *)__entry->caller_ip)
+);
+
+#define DEFINE_AG_ERROR_EVENT(name) \
+DEFINE_EVENT(xfs_ag_error_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, agno, error, caller_ip))
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
/* refcount tracepoint classes */
-/* reuse the discard trace class for agbno/aglen-based traces */
-#define DEFINE_AG_EXTENT_EVENT(name) DEFINE_DISCARD_EVENT(name)
+DECLARE_EVENT_CLASS(xfs_refcount_class,
+ TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno,
+ xfs_extlen_t len),
+ TP_ARGS(cur, agbno, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+ ),
+ TP_fast_assign(
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agbno = agbno;
+ __entry->len = len;
+ ),
+ TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->agbno,
+ __entry->len)
+);
+#define DEFINE_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(xfs_refcount_class, name, \
+ TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, \
+ xfs_extlen_t len), \
+ TP_ARGS(cur, agbno, len))
-/* ag btree lookup tracepoint class */
TRACE_DEFINE_ENUM(XFS_LOOKUP_EQi);
TRACE_DEFINE_ENUM(XFS_LOOKUP_LEi);
TRACE_DEFINE_ENUM(XFS_LOOKUP_GEi);
-DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_lookup_t dir),
- TP_ARGS(mp, agno, agbno, dir),
+TRACE_EVENT(xfs_refcount_lookup,
+ TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno,
+ xfs_lookup_t dir),
+ TP_ARGS(cur, agbno, dir),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -3090,8 +3219,8 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
__field(xfs_lookup_t, dir)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->agbno = agbno;
__entry->dir = dir;
),
@@ -3103,17 +3232,10 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
__entry->dir)
)
-#define DEFINE_AG_BTREE_LOOKUP_EVENT(name) \
-DEFINE_EVENT(xfs_ag_btree_lookup_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- xfs_agblock_t agbno, xfs_lookup_t dir), \
- TP_ARGS(mp, agno, agbno, dir))
-
/* single-rcext tracepoint class */
DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- struct xfs_refcount_irec *irec),
- TP_ARGS(mp, agno, irec),
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec),
+ TP_ARGS(cur, irec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -3123,8 +3245,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
__field(xfs_nlink_t, refcount)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
@@ -3141,15 +3263,14 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
#define DEFINE_REFCOUNT_EXTENT_EVENT(name) \
DEFINE_EVENT(xfs_refcount_extent_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- struct xfs_refcount_irec *irec), \
- TP_ARGS(mp, agno, irec))
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec), \
+ TP_ARGS(cur, irec))
/* single-rcext and an agbno tracepoint class */
DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- struct xfs_refcount_irec *irec, xfs_agblock_t agbno),
- TP_ARGS(mp, agno, irec, agbno),
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec,
+ xfs_agblock_t agbno),
+ TP_ARGS(cur, irec, agbno),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -3160,8 +3281,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
__field(xfs_agblock_t, agbno)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
@@ -3180,15 +3301,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
#define DEFINE_REFCOUNT_EXTENT_AT_EVENT(name) \
DEFINE_EVENT(xfs_refcount_extent_at_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- struct xfs_refcount_irec *irec, xfs_agblock_t agbno), \
- TP_ARGS(mp, agno, irec, agbno))
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, \
+ xfs_agblock_t agbno), \
+ TP_ARGS(cur, irec, agbno))
/* double-rcext tracepoint class */
DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2),
- TP_ARGS(mp, agno, i1, i2),
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1,
+ struct xfs_refcount_irec *i2),
+ TP_ARGS(cur, i1, i2),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -3202,8 +3323,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
__field(xfs_nlink_t, i2_refcount)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
@@ -3229,16 +3350,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
#define DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(name) \
DEFINE_EVENT(xfs_refcount_double_extent_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2), \
- TP_ARGS(mp, agno, i1, i2))
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \
+ struct xfs_refcount_irec *i2), \
+ TP_ARGS(cur, i1, i2))
/* double-rcext and an agbno tracepoint class */
DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2,
- xfs_agblock_t agbno),
- TP_ARGS(mp, agno, i1, i2, agbno),
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1,
+ struct xfs_refcount_irec *i2, xfs_agblock_t agbno),
+ TP_ARGS(cur, i1, i2, agbno),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -3253,8 +3373,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
__field(xfs_agblock_t, agbno)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
@@ -3282,17 +3402,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
#define DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(name) \
DEFINE_EVENT(xfs_refcount_double_extent_at_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, \
- xfs_agblock_t agbno), \
- TP_ARGS(mp, agno, i1, i2, agbno))
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \
+ struct xfs_refcount_irec *i2, xfs_agblock_t agbno), \
+ TP_ARGS(cur, i1, i2, agbno))
/* triple-rcext tracepoint class */
DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2,
- struct xfs_refcount_irec *i3),
- TP_ARGS(mp, agno, i1, i2, i3),
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1,
+ struct xfs_refcount_irec *i2, struct xfs_refcount_irec *i3),
+ TP_ARGS(cur, i1, i2, i3),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -3310,8 +3428,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
__field(xfs_nlink_t, i3_refcount)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
@@ -3346,109 +3464,82 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
#define DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(name) \
DEFINE_EVENT(xfs_refcount_triple_extent_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, \
- struct xfs_refcount_irec *i3), \
- TP_ARGS(mp, agno, i1, i2, i3))
+ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \
+ struct xfs_refcount_irec *i2, struct xfs_refcount_irec *i3), \
+ TP_ARGS(cur, i1, i2, i3))
/* refcount btree tracepoints */
-DEFINE_AG_BTREE_LOOKUP_EVENT(xfs_refcount_lookup);
DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_get);
DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_update);
DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_insert);
DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_delete);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_insert_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_delete_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_update_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_insert_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_delete_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_update_error);
/* refcount adjustment tracepoints */
-DEFINE_AG_EXTENT_EVENT(xfs_refcount_increase);
-DEFINE_AG_EXTENT_EVENT(xfs_refcount_decrease);
-DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_increase);
-DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_decrease);
+DEFINE_REFCOUNT_EVENT(xfs_refcount_increase);
+DEFINE_REFCOUNT_EVENT(xfs_refcount_decrease);
+DEFINE_REFCOUNT_EVENT(xfs_refcount_cow_increase);
+DEFINE_REFCOUNT_EVENT(xfs_refcount_cow_decrease);
DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(xfs_refcount_merge_center_extents);
DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_modify_extent);
-DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_recover_extent);
DEFINE_REFCOUNT_EXTENT_AT_EVENT(xfs_refcount_split_extent);
DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_left_extent);
DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_right_extent);
DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_left_extent);
DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_right_extent);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_cow_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_center_extents_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_modify_extent_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_split_extent_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_left_extent_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_right_extent_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_find_left_extent_error);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_find_right_extent_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_adjust_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_adjust_cow_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_center_extents_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_modify_extent_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_split_extent_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_left_extent_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_right_extent_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_left_extent_error);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_right_extent_error);
/* reflink helpers */
-DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared);
-DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared_result);
-DEFINE_AG_ERROR_EVENT(xfs_refcount_find_shared_error);
+DEFINE_REFCOUNT_EVENT(xfs_refcount_find_shared);
+DEFINE_REFCOUNT_EVENT(xfs_refcount_find_shared_result);
+DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_shared_error);
+
+TRACE_DEFINE_ENUM(XFS_REFCOUNT_INCREASE);
+TRACE_DEFINE_ENUM(XFS_REFCOUNT_DECREASE);
+TRACE_DEFINE_ENUM(XFS_REFCOUNT_ALLOC_COW);
+TRACE_DEFINE_ENUM(XFS_REFCOUNT_FREE_COW);
DECLARE_EVENT_CLASS(xfs_refcount_deferred_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- int type, xfs_agblock_t agbno, xfs_extlen_t len),
- TP_ARGS(mp, agno, type, agbno, len),
+ TP_PROTO(struct xfs_mount *mp, struct xfs_refcount_intent *refc),
+ TP_ARGS(mp, refc),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
- __field(int, type)
+ __field(int, op)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->type = type;
- __entry->agbno = agbno;
- __entry->len = len;
+ __entry->agno = XFS_FSB_TO_AGNO(mp, refc->ri_startblock);
+ __entry->op = refc->ri_type;
+ __entry->agbno = XFS_FSB_TO_AGBNO(mp, refc->ri_startblock);
+ __entry->len = refc->ri_blockcount;
),
- TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x fsbcount 0x%x",
+ TP_printk("dev %d:%d op %s agno 0x%x agbno 0x%x fsbcount 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
+ __print_symbolic(__entry->op, XFS_REFCOUNT_INTENT_STRINGS),
__entry->agno,
__entry->agbno,
__entry->len)
);
#define DEFINE_REFCOUNT_DEFERRED_EVENT(name) \
DEFINE_EVENT(xfs_refcount_deferred_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- int type, \
- xfs_agblock_t bno, \
- xfs_extlen_t len), \
- TP_ARGS(mp, agno, type, bno, len))
+ TP_PROTO(struct xfs_mount *mp, struct xfs_refcount_intent *refc), \
+ TP_ARGS(mp, refc))
DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_defer);
DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_deferred);
-
-TRACE_EVENT(xfs_refcount_finish_one_leftover,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- int type, xfs_agblock_t agbno, xfs_extlen_t len),
- TP_ARGS(mp, agno, type, agbno, len),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(int, type)
- __field(xfs_agblock_t, agbno)
- __field(xfs_extlen_t, len)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->type = type;
- __entry->agbno = agbno;
- __entry->len = len;
- ),
- TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->type,
- __entry->agno,
- __entry->agbno,
- __entry->len)
-);
+DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_finish_one_leftover);
/* simple inode-based error/%ip tracepoint class */
DECLARE_EVENT_CLASS(xfs_inode_error_class,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 828da4ac4316..bdf3704dc301 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -725,135 +725,6 @@ xfs_trans_free_items(
}
}
-static inline void
-xfs_log_item_batch_insert(
- struct xfs_ail *ailp,
- struct xfs_ail_cursor *cur,
- struct xfs_log_item **log_items,
- int nr_items,
- xfs_lsn_t commit_lsn)
-{
- int i;
-
- spin_lock(&ailp->ail_lock);
- /* xfs_trans_ail_update_bulk drops ailp->ail_lock */
- xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
-
- for (i = 0; i < nr_items; i++) {
- struct xfs_log_item *lip = log_items[i];
-
- if (lip->li_ops->iop_unpin)
- lip->li_ops->iop_unpin(lip, 0);
- }
-}
-
-/*
- * Bulk operation version of xfs_trans_committed that takes a log vector of
- * items to insert into the AIL. This uses bulk AIL insertion techniques to
- * minimise lock traffic.
- *
- * If we are called with the aborted flag set, it is because a log write during
- * a CIL checkpoint commit has failed. In this case, all the items in the
- * checkpoint have already gone through iop_committed and iop_committing, which
- * means that checkpoint commit abort handling is treated exactly the same
- * as an iclog write error even though we haven't started any IO yet. Hence in
- * this case all we need to do is iop_committed processing, followed by an
- * iop_unpin(aborted) call.
- *
- * The AIL cursor is used to optimise the insert process. If commit_lsn is not
- * at the end of the AIL, the insert cursor avoids the need to walk
- * the AIL to find the insertion point on every xfs_log_item_batch_insert()
- * call. This saves a lot of needless list walking and is a net win, even
- * though it slightly increases that amount of AIL lock traffic to set it up
- * and tear it down.
- */
-void
-xfs_trans_committed_bulk(
- struct xfs_ail *ailp,
- struct list_head *lv_chain,
- xfs_lsn_t commit_lsn,
- bool aborted)
-{
-#define LOG_ITEM_BATCH_SIZE 32
- struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
- struct xfs_log_vec *lv;
- struct xfs_ail_cursor cur;
- int i = 0;
-
- spin_lock(&ailp->ail_lock);
- xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
- spin_unlock(&ailp->ail_lock);
-
- /* unpin all the log items */
- list_for_each_entry(lv, lv_chain, lv_list) {
- struct xfs_log_item *lip = lv->lv_item;
- xfs_lsn_t item_lsn;
-
- if (aborted)
- set_bit(XFS_LI_ABORTED, &lip->li_flags);
-
- if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
- lip->li_ops->iop_release(lip);
- continue;
- }
-
- if (lip->li_ops->iop_committed)
- item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
- else
- item_lsn = commit_lsn;
-
- /* item_lsn of -1 means the item needs no further processing */
- if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
- continue;
-
- /*
- * if we are aborting the operation, no point in inserting the
- * object into the AIL as we are in a shutdown situation.
- */
- if (aborted) {
- ASSERT(xlog_is_shutdown(ailp->ail_log));
- if (lip->li_ops->iop_unpin)
- lip->li_ops->iop_unpin(lip, 1);
- continue;
- }
-
- if (item_lsn != commit_lsn) {
-
- /*
- * Not a bulk update option due to unusual item_lsn.
- * Push into AIL immediately, rechecking the lsn once
- * we have the ail lock. Then unpin the item. This does
- * not affect the AIL cursor the bulk insert path is
- * using.
- */
- spin_lock(&ailp->ail_lock);
- if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
- xfs_trans_ail_update(ailp, lip, item_lsn);
- else
- spin_unlock(&ailp->ail_lock);
- if (lip->li_ops->iop_unpin)
- lip->li_ops->iop_unpin(lip, 0);
- continue;
- }
-
- /* Item is a candidate for bulk AIL insert. */
- log_items[i++] = lv->lv_item;
- if (i >= LOG_ITEM_BATCH_SIZE) {
- xfs_log_item_batch_insert(ailp, &cur, log_items,
- LOG_ITEM_BATCH_SIZE, commit_lsn);
- i = 0;
- }
- }
-
- /* make sure we insert the remainder! */
- if (i)
- xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
-
- spin_lock(&ailp->ail_lock);
- xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->ail_lock);
-}
-
/*
* Sort transaction items prior to running precommit operations. This will
* attempt to order the items such that they will always be locked in the same
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 1636663707dc..f06cc0f41665 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -58,13 +58,15 @@ struct xfs_log_item {
#define XFS_LI_FAILED 2
#define XFS_LI_DIRTY 3
#define XFS_LI_WHITEOUT 4
+#define XFS_LI_FLUSHING 5
#define XFS_LI_FLAGS \
{ (1u << XFS_LI_IN_AIL), "IN_AIL" }, \
{ (1u << XFS_LI_ABORTED), "ABORTED" }, \
{ (1u << XFS_LI_FAILED), "FAILED" }, \
{ (1u << XFS_LI_DIRTY), "DIRTY" }, \
- { (1u << XFS_LI_WHITEOUT), "WHITEOUT" }
+ { (1u << XFS_LI_WHITEOUT), "WHITEOUT" }, \
+ { (1u << XFS_LI_FLUSHING), "FLUSHING" }
struct xfs_item_ops {
unsigned flags;
@@ -224,7 +226,6 @@ void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
bool xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
-void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
void xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint,
uint);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index e4c343096f95..0fafcc9f3dbe 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -135,25 +135,6 @@ xfs_ail_min_lsn(
}
/*
- * Return the maximum lsn held in the AIL, or zero if the AIL is empty.
- */
-static xfs_lsn_t
-xfs_ail_max_lsn(
- struct xfs_ail *ailp)
-{
- xfs_lsn_t lsn = 0;
- struct xfs_log_item *lip;
-
- spin_lock(&ailp->ail_lock);
- lip = xfs_ail_max(ailp);
- if (lip)
- lsn = lip->li_lsn;
- spin_unlock(&ailp->ail_lock);
-
- return lsn;
-}
-
-/*
* The cursor keeps track of where our current traversal is up to by tracking
* the next item in the list for us. However, for this to be safe, removing an
* object from the AIL needs to invalidate any cursor that points to it. hence
@@ -414,6 +395,74 @@ xfsaild_push_item(
return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
}
+/*
+ * Compute the LSN that we'd need to push the log tail towards in order to have
+ * at least 25% of the log space free. If the log free space already meets this
+ * threshold, this function returns the lowest LSN in the AIL to slowly keep
+ * writeback ticking over and the tail of the log moving forward.
+ */
+static xfs_lsn_t
+xfs_ail_calc_push_target(
+ struct xfs_ail *ailp)
+{
+ struct xlog *log = ailp->ail_log;
+ struct xfs_log_item *lip;
+ xfs_lsn_t target_lsn;
+ xfs_lsn_t max_lsn;
+ xfs_lsn_t min_lsn;
+ int32_t free_bytes;
+ uint32_t target_block;
+ uint32_t target_cycle;
+
+ lockdep_assert_held(&ailp->ail_lock);
+
+ lip = xfs_ail_max(ailp);
+ if (!lip)
+ return NULLCOMMITLSN;
+
+ max_lsn = lip->li_lsn;
+ min_lsn = __xfs_ail_min_lsn(ailp);
+
+ /*
+ * If we are supposed to push all the items in the AIL, we want to push
+ * to the current head. We then clear the push flag so that we don't
+ * keep pushing newly queued items beyond where the push all command was
+ * run. If the push waiter wants to empty the ail, it should queue
+ * itself on the ail_empty wait queue.
+ */
+ if (test_and_clear_bit(XFS_AIL_OPSTATE_PUSH_ALL, &ailp->ail_opstate))
+ return max_lsn;
+
+ /* If someone wants the AIL empty, keep pushing everything we have. */
+ if (waitqueue_active(&ailp->ail_empty))
+ return max_lsn;
+
+ /*
+ * Background pushing - attempt to keep 25% of the log free and if we
+ * have that much free retain the existing target.
+ */
+ free_bytes = log->l_logsize - xlog_lsn_sub(log, max_lsn, min_lsn);
+ if (free_bytes >= log->l_logsize >> 2)
+ return ailp->ail_target;
+
+ target_cycle = CYCLE_LSN(min_lsn);
+ target_block = BLOCK_LSN(min_lsn) + (log->l_logBBsize >> 2);
+ if (target_block >= log->l_logBBsize) {
+ target_block -= log->l_logBBsize;
+ target_cycle += 1;
+ }
+ target_lsn = xlog_assign_lsn(target_cycle, target_block);
+
+ /* Cap the target to the highest LSN known to be in the AIL. */
+ if (XFS_LSN_CMP(target_lsn, max_lsn) > 0)
+ return max_lsn;
+
+ /* If the existing target is higher than the new target, keep it. */
+ if (XFS_LSN_CMP(ailp->ail_target, target_lsn) >= 0)
+ return ailp->ail_target;
+ return target_lsn;
+}
+
static long
xfsaild_push(
struct xfs_ail *ailp)
@@ -422,7 +471,6 @@ xfsaild_push(
struct xfs_ail_cursor cur;
struct xfs_log_item *lip;
xfs_lsn_t lsn;
- xfs_lsn_t target = NULLCOMMITLSN;
long tout;
int stuck = 0;
int flushing = 0;
@@ -447,37 +495,26 @@ xfsaild_push(
}
spin_lock(&ailp->ail_lock);
-
- /*
- * If we have a sync push waiter, we always have to push till the AIL is
- * empty. Update the target to point to the end of the AIL so that
- * capture updates that occur after the sync push waiter has gone to
- * sleep.
- */
- if (waitqueue_active(&ailp->ail_empty)) {
- lip = xfs_ail_max(ailp);
- if (lip)
- target = lip->li_lsn;
- } else {
- /* barrier matches the ail_target update in xfs_ail_push() */
- smp_rmb();
- target = ailp->ail_target;
- ailp->ail_target_prev = target;
- }
+ WRITE_ONCE(ailp->ail_target, xfs_ail_calc_push_target(ailp));
+ if (ailp->ail_target == NULLCOMMITLSN)
+ goto out_done;
/* we're done if the AIL is empty or our push has reached the end */
lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
if (!lip)
- goto out_done;
+ goto out_done_cursor;
XFS_STATS_INC(mp, xs_push_ail);
- ASSERT(target != NULLCOMMITLSN);
+ ASSERT(ailp->ail_target != NULLCOMMITLSN);
lsn = lip->li_lsn;
- while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
+ while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) {
int lock_result;
+ if (test_bit(XFS_LI_FLUSHING, &lip->li_flags))
+ goto next_item;
+
/*
* Note that iop_push may unlock and reacquire the AIL lock. We
* rely on the AIL cursor implementation to be able to deal with
@@ -547,20 +584,24 @@ xfsaild_push(
if (stuck > 100)
break;
+next_item:
lip = xfs_trans_ail_cursor_next(ailp, &cur);
if (lip == NULL)
break;
+ if (lip->li_lsn != lsn && count > 1000)
+ break;
lsn = lip->li_lsn;
}
-out_done:
+out_done_cursor:
xfs_trans_ail_cursor_done(&cur);
+out_done:
spin_unlock(&ailp->ail_lock);
if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
ailp->ail_log_flush++;
- if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
+ if (!count || XFS_LSN_CMP(lsn, ailp->ail_target) >= 0) {
/*
* We reached the target or the AIL is empty, so wait a bit
* longer for I/O to complete and remove pushed items from the
@@ -585,7 +626,7 @@ out_done:
/*
* Assume we have more work to do in a short while.
*/
- tout = 10;
+ tout = 0;
}
return tout;
@@ -603,7 +644,7 @@ xfsaild(
set_freezable();
while (1) {
- if (tout && tout <= 20)
+ if (tout)
set_current_state(TASK_KILLABLE|TASK_FREEZABLE);
else
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
@@ -639,21 +680,9 @@ xfsaild(
break;
}
+ /* Idle if the AIL is empty. */
spin_lock(&ailp->ail_lock);
-
- /*
- * Idle if the AIL is empty and we are not racing with a target
- * update. We check the AIL after we set the task to a sleep
- * state to guarantee that we either catch an ail_target update
- * or that a wake_up resets the state to TASK_RUNNING.
- * Otherwise, we run the risk of sleeping indefinitely.
- *
- * The barrier matches the ail_target update in xfs_ail_push().
- */
- smp_rmb();
- if (!xfs_ail_min(ailp) &&
- ailp->ail_target == ailp->ail_target_prev &&
- list_empty(&ailp->ail_buf_list)) {
+ if (!xfs_ail_min(ailp) && list_empty(&ailp->ail_buf_list)) {
spin_unlock(&ailp->ail_lock);
schedule();
tout = 0;
@@ -676,56 +705,6 @@ xfsaild(
}
/*
- * This routine is called to move the tail of the AIL forward. It does this by
- * trying to flush items in the AIL whose lsns are below the given
- * threshold_lsn.
- *
- * The push is run asynchronously in a workqueue, which means the caller needs
- * to handle waiting on the async flush for space to become available.
- * We don't want to interrupt any push that is in progress, hence we only queue
- * work if we set the pushing bit appropriately.
- *
- * We do this unlocked - we only need to know whether there is anything in the
- * AIL at the time we are called. We don't need to access the contents of
- * any of the objects, so the lock is not needed.
- */
-void
-xfs_ail_push(
- struct xfs_ail *ailp,
- xfs_lsn_t threshold_lsn)
-{
- struct xfs_log_item *lip;
-
- lip = xfs_ail_min(ailp);
- if (!lip || xlog_is_shutdown(ailp->ail_log) ||
- XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0)
- return;
-
- /*
- * Ensure that the new target is noticed in push code before it clears
- * the XFS_AIL_PUSHING_BIT.
- */
- smp_wmb();
- xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);
- smp_wmb();
-
- wake_up_process(ailp->ail_task);
-}
-
-/*
- * Push out all items in the AIL immediately
- */
-void
-xfs_ail_push_all(
- struct xfs_ail *ailp)
-{
- xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp);
-
- if (threshold_lsn)
- xfs_ail_push(ailp, threshold_lsn);
-}
-
-/*
* Push out all items in the AIL immediately and wait until the AIL is empty.
*/
void
@@ -748,21 +727,49 @@ xfs_ail_push_all_sync(
}
void
+__xfs_ail_assign_tail_lsn(
+ struct xfs_ail *ailp)
+{
+ struct xlog *log = ailp->ail_log;
+ xfs_lsn_t tail_lsn;
+
+ assert_spin_locked(&ailp->ail_lock);
+
+ if (xlog_is_shutdown(log))
+ return;
+
+ tail_lsn = __xfs_ail_min_lsn(ailp);
+ if (!tail_lsn)
+ tail_lsn = ailp->ail_head_lsn;
+
+ WRITE_ONCE(log->l_tail_space,
+ xlog_lsn_sub(log, ailp->ail_head_lsn, tail_lsn));
+ trace_xfs_log_assign_tail_lsn(log, tail_lsn);
+ atomic64_set(&log->l_tail_lsn, tail_lsn);
+}
+
+/*
+ * Callers should pass the original tail lsn so that we can detect if the tail
+ * has moved as a result of the operation that was performed. If the caller
+ * needs to force a tail space update, it should pass NULLCOMMITLSN to bypass
+ * the "did the tail LSN change?" checks. If the caller wants to avoid a tail
+ * update (e.g. it knows the tail did not change) it should pass an @old_lsn of
+ * 0.
+ */
+void
xfs_ail_update_finish(
struct xfs_ail *ailp,
xfs_lsn_t old_lsn) __releases(ailp->ail_lock)
{
struct xlog *log = ailp->ail_log;
- /* if the tail lsn hasn't changed, don't do updates or wakeups. */
+ /* If the tail lsn hasn't changed, don't do updates or wakeups. */
if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
spin_unlock(&ailp->ail_lock);
return;
}
- if (!xlog_is_shutdown(log))
- xlog_assign_tail_lsn_locked(log->l_mp);
-
+ __xfs_ail_assign_tail_lsn(ailp);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
spin_unlock(&ailp->ail_lock);
@@ -829,6 +836,19 @@ xfs_trans_ail_update_bulk(
if (!list_empty(&tmp))
xfs_ail_splice(ailp, cur, &tmp, lsn);
+ /*
+ * If this is the first insert, wake up the push daemon so it can
+ * actively scan for items to push. We also need to do a log tail
+ * LSN update to ensure that it is correctly tracked by the log, so
+ * set the tail_lsn to NULLCOMMITLSN so that xfs_ail_update_finish()
+ * will see that the tail lsn has changed and will update the tail
+ * appropriately.
+ */
+ if (!mlip) {
+ wake_up_process(ailp->ail_task);
+ tail_lsn = NULLCOMMITLSN;
+ }
+
xfs_ail_update_finish(ailp, tail_lsn);
}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index d5400150358e..bd841df93021 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -19,9 +19,6 @@ void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
-void xfs_trans_committed_bulk(struct xfs_ail *ailp,
- struct list_head *lv_chain,
- xfs_lsn_t commit_lsn, bool aborted);
/*
* AIL traversal cursor.
*
@@ -55,16 +52,20 @@ struct xfs_ail {
struct xlog *ail_log;
struct task_struct *ail_task;
struct list_head ail_head;
- xfs_lsn_t ail_target;
- xfs_lsn_t ail_target_prev;
struct list_head ail_cursors;
spinlock_t ail_lock;
xfs_lsn_t ail_last_pushed_lsn;
+ xfs_lsn_t ail_head_lsn;
int ail_log_flush;
+ unsigned long ail_opstate;
struct list_head ail_buf_list;
wait_queue_head_t ail_empty;
+ xfs_lsn_t ail_target;
};
+/* Push all items out of the AIL immediately. */
+#define XFS_AIL_OPSTATE_PUSH_ALL 0u
+
/*
* From xfs_trans_ail.c
*/
@@ -101,10 +102,23 @@ void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn)
__releases(ailp->ail_lock);
void xfs_trans_ail_delete(struct xfs_log_item *lip, int shutdown_type);
-void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
-void xfs_ail_push_all(struct xfs_ail *);
-void xfs_ail_push_all_sync(struct xfs_ail *);
-struct xfs_log_item *xfs_ail_min(struct xfs_ail *ailp);
+static inline void xfs_ail_push(struct xfs_ail *ailp)
+{
+ wake_up_process(ailp->ail_task);
+}
+
+static inline void xfs_ail_push_all(struct xfs_ail *ailp)
+{
+ if (!test_and_set_bit(XFS_AIL_OPSTATE_PUSH_ALL, &ailp->ail_opstate))
+ xfs_ail_push(ailp);
+}
+
+static inline xfs_lsn_t xfs_ail_get_push_target(struct xfs_ail *ailp)
+{
+ return READ_ONCE(ailp->ail_target);
+}
+
+void xfs_ail_push_all_sync(struct xfs_ail *ailp);
xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
@@ -117,6 +131,18 @@ struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur);
void xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur);
+void __xfs_ail_assign_tail_lsn(struct xfs_ail *ailp);
+
+static inline void
+xfs_ail_assign_tail_lsn(
+ struct xfs_ail *ailp)
+{
+
+ spin_lock(&ailp->ail_lock);
+ __xfs_ail_assign_tail_lsn(ailp);
+ spin_unlock(&ailp->ail_lock);
+}
+
#if BITS_PER_LONG != 64
static inline void
xfs_trans_ail_copy_lsn(