From 1247ec4c5f6e64b3cf01af58173820cdc61f11ed Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 28 Nov 2016 14:57:42 +1100 Subject: xfs: add XBF_XBF_NO_IOACCT to buf trace output When XBF_NO_IOACCT got added, it missed the translation in XFS_BUF_FLAGS, so we see "0x8" in trace output rather than the flag name. Fix it. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 1c2e52b2d926..38c9a95bda7d 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -71,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t; { XBF_READ, "READ" }, \ { XBF_WRITE, "WRITE" }, \ { XBF_READ_AHEAD, "READ_AHEAD" }, \ + { XBF_NO_IOACCT, "NO_IOACCT" }, \ { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ -- cgit v1.2.3 From fd26a88093bab6529ea2de819114ca92dbd1d71d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 28 Nov 2016 14:57:42 +1100 Subject: xfs: factor rmap btree size into the indlen calculations When we're estimating the amount of space it's going to take to satisfy a delalloc reservation, we need to include the space that we might need to grow the rmapbt. This helps us to avoid running out of space later when _iomap_write_allocate needs more space than we reserved. Eryu Guan observed this happening on generic/224 when sunit/swidth were set. Reported-by: Eryu Guan Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c2e13db14563..164790c1b419 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -49,6 +49,7 @@ #include "xfs_rmap.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" +#include "xfs_rmap_btree.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -190,8 +191,12 @@ xfs_bmap_worst_indlen( int maxrecs; /* maximum record count at this level */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t rval; /* return value */ + xfs_filblks_t orig_len; mp = ip->i_mount; + + /* Calculate the worst-case size of the bmbt. */ + orig_len = len; maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); @@ -199,12 +204,20 @@ xfs_bmap_worst_indlen( len += maxrecs - 1; do_div(len, maxrecs); rval += len; - if (len == 1) - return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + if (len == 1) { + rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - level - 1; + break; + } if (level == 0) maxrecs = mp->m_bmap_dmxr[1]; } + + /* Calculate the worst-case size of the rmapbt. */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + + mp->m_rmap_maxlevels; + return rval; } -- cgit v1.2.3 From bb6e0ebed7f7474e0e44cf6b778ee2fda759ddd6 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Mon, 28 Nov 2016 14:57:42 +1100 Subject: fs: xfs: xfs_icreate_item: constify xfs_item_ops structure Declare the structure xfs_item_ops as const as it is only passed as an argument to the function xfs_log_item_init. As this argument is of type const struct xfs_item_ops *, so xfs_item_ops structures having this property can be declared as const. Done using Coccinelle: @r1 disable optional_qualifier @ identifier i; position p; @@ static struct xfs_item_ops i@p = {...}; @ok1@ identifier r1.i; position p; expression e1,e2,e3; @@ xfs_log_item_init(e1,e2,e3,&i@p) @bad@ position p!={r1.p,ok1.p}; identifier r1.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r1.i; @@ static +const struct xfs_item_ops i={...}; @depends on !bad disable optional_qualifier@ identifier r1.i; @@ +const struct xfs_item_ops i; File size before: text data bss dec hex filename 737 64 8 809 329 fs/xfs/xfs_icreate_item.o File size after: text data bss dec hex filename 801 0 8 809 329 fs/xfs/xfs_icreate_item.o Signed-off-by: Bhumika Goyal Reviewed-by: Eric Sandeen Signed-off-by: Dave Chinner --- fs/xfs/xfs_icreate_item.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index d45ca72af6fb..865ad1373e5e 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -133,7 +133,7 @@ xfs_icreate_item_committing( /* * This is the ops vector shared by all buf log items. */ -static struct xfs_item_ops xfs_icreate_item_ops = { +static const struct xfs_item_ops xfs_icreate_item_ops = { .iop_size = xfs_icreate_item_size, .iop_format = xfs_icreate_item_format, .iop_pin = xfs_icreate_item_pin, -- cgit v1.2.3 From cf7841c12d85d1fe0ad33fb8bc5746809a882010 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Mon, 28 Nov 2016 14:57:42 +1100 Subject: fs: xfs: libxfs: constify xfs_nameops structures Declare the structure xfs_nameops as const as it is only stored in the m_dirnameops field of a xfs_mount structure. This field is of type const struct xfs_nameops *, so xfs_nameops structures having this property can be declared as const. Done using Coccinelle: @r1 disable optional_qualifier @ identifier i; position p; @@ static struct xfs_nameops i@p = {...}; @ok1@ identifier r1.i; position p; struct xfs_mount mp; @@ mp.m_dirnameops=&i@p @bad@ position p!={r1.p,ok1.p}; identifier r1.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r1.i; @@ static +const struct xfs_nameops i={...}; @depends on !bad disable optional_qualifier@ identifier r1.i; @@ +const struct xfs_nameops i; File size before: text data bss dec hex filename 5302 85 0 5387 150b fs/xfs/libxfs/xfs_dir2.o File size after: text data bss dec hex filename 5318 69 0 5387 150b fs/xfs/libxfs/xfs_dir2.o Signed-off-by: Bhumika Goyal Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_dir2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 20a96dd5af7e..c58d72c220f5 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -93,7 +93,7 @@ xfs_ascii_ci_compname( return result; } -static struct xfs_nameops xfs_ascii_ci_nameops = { +static const struct xfs_nameops xfs_ascii_ci_nameops = { .hashname = xfs_ascii_ci_hashname, .compname = xfs_ascii_ci_compname, }; -- cgit v1.2.3 From fba3e594ef0ad911fa8f559732d588172f212d71 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 28 Nov 2016 14:57:42 +1100 Subject: xfs: always succeed when deduping zero bytes It turns out that btrfs and xfs had differing interpretations of what to do when the dedupe length is zero. Change xfs to follow btrfs' semantics so that the userland interface is consistent. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 56372bee08c5..c58371fde08d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1317,8 +1317,14 @@ xfs_reflink_remap_range( goto out_unlock; } - if (len == 0) + /* Zero length dedupe exits immediately; reflink goes to EOF. */ + if (len == 0) { + if (is_dedupe) { + ret = 0; + goto out_unlock; + } len = isize - pos_in; + } /* Ensure offsets don't wrap and the input is inside i_size */ if (pos_in + len < pos_in || pos_out + len < pos_out || -- cgit v1.2.3 From 974ae922efd93b07b6cdf989ae959883f6f05fd8 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 28 Nov 2016 14:57:42 +1100 Subject: xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 23 +++++++++++++++++++++-- fs/xfs/libxfs/xfs_bmap.h | 2 +- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_reflink.c | 2 +- 4 files changed, 24 insertions(+), 5 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 164790c1b419..6b7e6eb29414 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -50,6 +50,7 @@ #include "xfs_ag_resv.h" #include "xfs_refcount.h" #include "xfs_rmap_btree.h" +#include "xfs_icache.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -4154,8 +4155,9 @@ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, + xfs_fileoff_t off, xfs_filblks_t len, + xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof) @@ -4167,10 +4169,17 @@ xfs_bmapi_reserve_delalloc( char rt = XFS_IS_REALTIME_INODE(ip); xfs_extlen_t extsz; int error; + xfs_fileoff_t aoff = off; - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); + /* + * Cap the alloc length. Keep track of prealloc so we know whether to + * tag the inode before we return. + */ + alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); + if (prealloc && alen >= len) + prealloc = alen - len; /* Figure out the extent size, adjust alen */ if (whichfork == XFS_COW_FORK) @@ -4236,6 +4245,16 @@ xfs_bmapi_reserve_delalloc( */ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + /* + * Tag the inode if blocks were preallocated. Note that COW fork + * preallocation can occur at the start or end of the extent, even when + * prealloc == 0, so we must also check the aligned offset and length. + */ + if (whichfork == XFS_DATA_FORK && prealloc) + xfs_inode_set_eofblocks_tag(ip); + if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) + xfs_inode_set_cowblocks_tag(ip); + ASSERT(got->br_startoff <= aoff); ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); ASSERT(isnullstartblock(got->br_startblock)); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index ffed1f9208ce..cecd094404cc 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -238,7 +238,7 @@ int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, int num_exts); int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, xfs_filblks_t len, + xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); enum xfs_bmap_intent_type { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2272190b70ae..e4bfde212cc2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -620,7 +620,7 @@ xfs_file_iomap_begin_delay( retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, - end_fsb - offset_fsb, &got, &idx, eof); + end_fsb - offset_fsb, 0, &got, &idx, eof); switch (error) { case 0: break; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c58371fde08d..a5b027a43990 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -295,7 +295,7 @@ xfs_reflink_reserve_cow( retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, - end_fsb - imap->br_startoff, &got, &idx, eof); + end_fsb - imap->br_startoff, 0, &got, &idx, eof); switch (error) { case 0: break; -- cgit v1.2.3 From 0260d8ff5f76617e3a55a1c471383ecb4404c3ad Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 28 Nov 2016 14:57:42 +1100 Subject: xfs: clean up cow fork reservation and tag inodes correctly COW fork reservation is implemented via delayed allocation. The code is modeled after the traditional delalloc allocation code, but is slightly different in terms of how preallocation occurs. Rather than post-eof speculative preallocation, COW fork preallocation is implemented via a COW extent size hint that is designed to minimize fragmentation as a reflinked file is split over time. xfs_reflink_reserve_cow() still uses logic that is oriented towards dealing with post-eof speculative preallocation, however, and is stale or not necessarily correct. First, the EOF alignment to the COW extent size hint is implemented in xfs_bmapi_reserve_delalloc() (which does so correctly by aligning the start and end offsets) and so is not necessary in xfs_reflink_reserve_cow(). The backoff and retry logic on ENOSPC is also ineffective for the same reason, as xfs_bmapi_reserve_delalloc() will simply perform the same allocation request on the retry. Finally, since the COW extent size hint aligns the start and end offset of the range to allocate, the end_fsb != orig_end_fsb logic is not sufficient. Indeed, if a write request happens to end on an aligned offset, it is possible that we do not tag the inode for COW preallocation even though xfs_bmapi_reserve_delalloc() may have preallocated at the start offset. Kill the unnecessary, duplicate code in xfs_reflink_reserve_cow(). Remove the inode tag logic as well since xfs_bmapi_reserve_delalloc() has been updated to tag the inode correctly. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a5b027a43990..becf2465dd23 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -245,11 +245,9 @@ xfs_reflink_reserve_cow( { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got; - xfs_fileoff_t end_fsb, orig_end_fsb; int error = 0; bool eof = false, trimmed; xfs_extnum_t idx; - xfs_extlen_t align; /* * Search the COW fork extent list first. This serves two purposes: @@ -287,33 +285,12 @@ xfs_reflink_reserve_cow( if (error) return error; - end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount; - - align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); - if (align) - end_fsb = roundup_64(end_fsb, align); - -retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, - end_fsb - imap->br_startoff, 0, &got, &idx, eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ + imap->br_blockcount, 0, &got, &idx, eof); + if (error == -ENOSPC || error == -EDQUOT) trace_xfs_reflink_cow_enospc(ip, imap); - if (end_fsb != orig_end_fsb) { - end_fsb = orig_end_fsb; - goto retry; - } - /*FALLTHRU*/ - default: + if (error) return error; - } - - if (end_fsb != orig_end_fsb) - xfs_inode_set_cowblocks_tag(ip); trace_xfs_reflink_cow_alloc(ip, &got); return 0; -- cgit v1.2.3 From f782088c9e5d08e9494c63e68b4e85716df3e5f8 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 28 Nov 2016 14:57:42 +1100 Subject: xfs: pass post-eof speculative prealloc blocks to bmapi xfs_file_iomap_begin_delay() implements post-eof speculative preallocation by extending the block count of the requested delayed allocation. Now that xfs_bmapi_reserve_delalloc() has been updated to handle prealloc blocks separately and tag the inode, update xfs_file_iomap_begin_delay() to use the new parameter and rely on the former to tag the inode. Note that this patch does not change behavior. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index e4bfde212cc2..15a83813b708 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -536,10 +536,11 @@ xfs_file_iomap_begin_delay( xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t maxbytes_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - xfs_fileoff_t end_fsb, orig_end_fsb; + xfs_fileoff_t end_fsb; int error = 0, eof = 0; struct xfs_bmbt_irec got; xfs_extnum_t idx; + xfs_fsblock_t prealloc_blocks = 0; ASSERT(!XFS_IS_REALTIME_INODE(ip)); ASSERT(!xfs_get_extsz_hint(ip)); @@ -594,33 +595,32 @@ xfs_file_iomap_begin_delay( * the lower level functions are updated. */ count = min_t(loff_t, count, 1024 * PAGE_SIZE); - end_fsb = orig_end_fsb = - min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); if (eof) { - xfs_fsblock_t prealloc_blocks; - prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx); if (prealloc_blocks) { xfs_extlen_t align; xfs_off_t end_offset; + xfs_fileoff_t p_end_fsb; end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1); - end_fsb = XFS_B_TO_FSBT(mp, end_offset) + - prealloc_blocks; + p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + + prealloc_blocks; align = xfs_eof_alignment(ip, 0); if (align) - end_fsb = roundup_64(end_fsb, align); + p_end_fsb = roundup_64(p_end_fsb, align); - end_fsb = min(end_fsb, maxbytes_fsb); - ASSERT(end_fsb > offset_fsb); + p_end_fsb = min(p_end_fsb, maxbytes_fsb); + ASSERT(p_end_fsb > offset_fsb); + prealloc_blocks = p_end_fsb - end_fsb; } } retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, - end_fsb - offset_fsb, 0, &got, &idx, eof); + end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof); switch (error) { case 0: break; @@ -628,8 +628,8 @@ retry: case -EDQUOT: /* retry without any preallocation */ trace_xfs_delalloc_enospc(ip, offset, count); - if (end_fsb != orig_end_fsb) { - end_fsb = orig_end_fsb; + if (prealloc_blocks) { + prealloc_blocks = 0; goto retry; } /*FALLTHRU*/ @@ -637,13 +637,6 @@ retry: goto out_unlock; } - /* - * Tag the inode as speculatively preallocated so we can reclaim this - * space on demand, if necessary. - */ - if (end_fsb != orig_end_fsb) - xfs_inode_set_eofblocks_tag(ip); - trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) -- cgit v1.2.3