diff options
-rw-r--r-- | Documentation/filesystems/xfs.txt | 29 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 104 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 125 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.c | 24 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_data.c | 39 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c | 42 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_error.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 84 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 554 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 58 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 125 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_pnfs.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 58 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 3 |
24 files changed, 776 insertions, 580 deletions
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 0bfafe108357..5a5a05582b58 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -228,30 +228,19 @@ default behaviour. Deprecated Mount Options ======================== - delaylog/nodelaylog - Delayed logging is the only logging method that XFS supports - now, so these mount options are now ignored. - - Due for removal in 3.12. - - ihashsize=value - In memory inode hashes have been removed, so this option has - no function as of August 2007. Option is deprecated. - - Due for removal in 3.12. +None at present. - irixsgid - This behaviour is now controlled by a sysctl, so the mount - option is ignored. - Due for removal in 3.12. +Removed Mount Options +===================== - osyncisdsync - osyncisosync - O_SYNC and O_DSYNC are fully supported, so there is no need - for these options any more. + Name Removed + ---- ------- + delaylog/nodelaylog v3.20 + ihashsize v3.20 + irixsgid v3.20 + osyncisdsync/osyncisosync v3.20 - Due for removal in 3.12. sysctls ======= diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index a6fbf4472017..516162be1398 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -260,6 +260,7 @@ xfs_alloc_fix_len( rlen = rlen - (k - args->mod); else rlen = rlen - args->prod + (args->mod - k); + /* casts to (int) catch length underflows */ if ((int)rlen < (int)args->minlen) return; ASSERT(rlen >= args->minlen && rlen <= args->maxlen); @@ -286,7 +287,8 @@ xfs_alloc_fix_minleft( if (diff >= 0) return 1; args->len += diff; /* shrink the allocated space */ - if (args->len >= args->minlen) + /* casts to (int) catch length underflows */ + if ((int)args->len >= (int)args->minlen) return 1; args->agbno = NULLAGBLOCK; return 0; @@ -315,6 +317,9 @@ xfs_alloc_fixup_trees( xfs_agblock_t nfbno2; /* second new free startblock */ xfs_extlen_t nflen1=0; /* first new free length */ xfs_extlen_t nflen2=0; /* second new free length */ + struct xfs_mount *mp; + + mp = cnt_cur->bc_mp; /* * Look up the record in the by-size tree if necessary. @@ -323,13 +328,13 @@ xfs_alloc_fixup_trees( #ifdef DEBUG if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) return error; - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, i == 1 && nfbno1 == fbno && nflen1 == flen); #endif } else { if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); } /* * Look up the record in the by-block tree if necessary. @@ -338,13 +343,13 @@ xfs_alloc_fixup_trees( #ifdef DEBUG if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) return error; - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, i == 1 && nfbno1 == fbno && nflen1 == flen); #endif } else { if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); } #ifdef DEBUG @@ -355,7 +360,7 @@ xfs_alloc_fixup_trees( bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, bnoblock->bb_numrecs == cntblock->bb_numrecs); } #endif @@ -386,25 +391,25 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_btree_delete(cnt_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); /* * Add new by-size btree entry(s). */ if (nfbno1 != NULLAGBLOCK) { if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 0); + XFS_WANT_CORRUPTED_RETURN(mp, i == 0); if ((error = xfs_btree_insert(cnt_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); } if (nfbno2 != NULLAGBLOCK) { if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 0); + XFS_WANT_CORRUPTED_RETURN(mp, i == 0); if ((error = xfs_btree_insert(cnt_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); } /* * Fix up the by-block btree entry(s). @@ -415,7 +420,7 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_btree_delete(bno_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); } else { /* * Update the by-block entry to start later|be shorter. @@ -429,10 +434,10 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 0); + XFS_WANT_CORRUPTED_RETURN(mp, i == 0); if ((error = xfs_btree_insert(bno_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); } return 0; } @@ -682,7 +687,7 @@ xfs_alloc_ag_vextent_exact( error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); ASSERT(fbno <= args->agbno); /* @@ -783,7 +788,7 @@ xfs_alloc_find_best_extent( error = xfs_alloc_get_rec(*scur, sbno, slen, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); /* @@ -946,7 +951,7 @@ restart: if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); if (ltlen >= args->minlen) break; if ((error = xfs_btree_increment(cnt_cur, 0, &i))) @@ -966,7 +971,7 @@ restart: */ if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); xfs_alloc_compute_aligned(args, ltbno, ltlen, <bnoa, <lena); if (ltlena < args->minlen) @@ -999,7 +1004,7 @@ restart: cnt_cur->bc_ptrs[0] = besti; if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; if (!xfs_alloc_fix_minleft(args)) { @@ -1088,7 +1093,7 @@ restart: if (bno_cur_lt) { if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); xfs_alloc_compute_aligned(args, ltbno, ltlen, <bnoa, <lena); if (ltlena >= args->minlen) @@ -1104,7 +1109,7 @@ restart: if (bno_cur_gt) { if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); xfs_alloc_compute_aligned(args, gtbno, gtlen, >bnoa, >lena); if (gtlena >= args->minlen) @@ -1303,7 +1308,7 @@ restart: error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); @@ -1342,7 +1347,7 @@ restart: * This can't happen in the second case above. */ rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); - XFS_WANT_CORRUPTED_GOTO(rlen == 0 || + XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), error0); if (rlen < args->maxlen) { xfs_agblock_t bestfbno; @@ -1362,13 +1367,13 @@ restart: if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); if (flen < bestrlen) break; xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); - XFS_WANT_CORRUPTED_GOTO(rlen == 0 || + XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), error0); if (rlen > bestrlen) { @@ -1383,7 +1388,7 @@ restart: if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); rlen = bestrlen; rbno = bestrbno; flen = bestflen; @@ -1408,7 +1413,7 @@ restart: if (!xfs_alloc_fix_minleft(args)) goto out_nominleft; rlen = args->len; - XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); /* * Allocate and initialize a cursor for the by-block tree. */ @@ -1422,7 +1427,7 @@ restart: cnt_cur = bno_cur = NULL; args->len = rlen; args->agbno = rbno; - XFS_WANT_CORRUPTED_GOTO( + XFS_WANT_CORRUPTED_GOTO(args->mp, args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), error0); @@ -1467,7 +1472,7 @@ xfs_alloc_ag_vextent_small( if (i) { if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); } /* * Nothing in the btree, try the freelist. Make sure @@ -1493,7 +1498,7 @@ xfs_alloc_ag_vextent_small( } args->len = 1; args->agbno = fbno; - XFS_WANT_CORRUPTED_GOTO( + XFS_WANT_CORRUPTED_GOTO(args->mp, args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), error0); @@ -1579,7 +1584,7 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); /* * It's not contiguous, though. */ @@ -1591,7 +1596,8 @@ xfs_free_ag_extent( * space was invalid, it's (partly) already free. * Very bad. */ - XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); + XFS_WANT_CORRUPTED_GOTO(mp, + ltbno + ltlen <= bno, error0); } } /* @@ -1606,7 +1612,7 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); /* * It's not contiguous, though. */ @@ -1618,7 +1624,7 @@ xfs_free_ag_extent( * space was invalid, it's (partly) already free. * Very bad. */ - XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); + XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0); } } /* @@ -1635,31 +1641,31 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); /* * Delete the old by-size entry on the right. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); /* * Delete the old by-block entry for the right block. */ if ((error = xfs_btree_delete(bno_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); /* * Move the by-block cursor back to the left neighbor. */ if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); #ifdef DEBUG /* * Check that this is the right record: delete didn't @@ -1672,7 +1678,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO( + XFS_WANT_CORRUPTED_GOTO(mp, i == 1 && xxbno == ltbno && xxlen == ltlen, error0); } @@ -1695,17 +1701,17 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); /* * Back up the by-block cursor to the left neighbor, and * update its length. */ if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); nbno = ltbno; nlen = len + ltlen; if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) @@ -1721,10 +1727,10 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); /* * Update the starting block and length of the right * neighbor in the by-block tree. @@ -1743,7 +1749,7 @@ xfs_free_ag_extent( nlen = len; if ((error = xfs_btree_insert(bno_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); } xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); bno_cur = NULL; @@ -1752,10 +1758,10 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 0, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0); if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); cnt_cur = NULL; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 990595548958..b872e9cedb69 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -397,7 +397,8 @@ xfs_bmap_check_leaf_extents( xfs_check_block(block, mp, 0, 0); pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); - XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); + XFS_WANT_CORRUPTED_GOTO(mp, + XFS_FSB_SANITY_CHECK(mp, bno), error0); if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); @@ -1002,7 +1003,7 @@ xfs_bmap_add_attrfork_btree( if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) goto error0; /* must be at least one entry */ - XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0); if ((error = xfs_btree_new_iroot(cur, flags, &stat))) goto error0; if (stat == 0) { @@ -1288,7 +1289,8 @@ xfs_bmap_read_extents( break; pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); - XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); + XFS_WANT_CORRUPTED_GOTO(mp, + XFS_FSB_SANITY_CHECK(mp, bno), error0); xfs_trans_brelse(tp, bp); } /* @@ -1722,7 +1724,9 @@ xfs_bmap_add_extent_delay_real( xfs_filblks_t temp=0; /* value for da_new calculations */ xfs_filblks_t temp2=0;/* value for da_new calculations */ int tmp_rval; /* partial logging flags */ + struct xfs_mount *mp; + mp = bma->tp ? bma->tp->t_mountp : NULL; ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); ASSERT(bma->idx >= 0); @@ -1833,15 +1837,15 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_btree_delete(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_btree_decrement(bma->cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + @@ -1874,7 +1878,7 @@ xfs_bmap_add_extent_delay_real( &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + @@ -1905,7 +1909,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_bmbt_update(bma->cur, PREV.br_startoff, new->br_startblock, PREV.br_blockcount + @@ -1935,12 +1939,12 @@ xfs_bmap_add_extent_delay_real( &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } break; @@ -1968,7 +1972,7 @@ xfs_bmap_add_extent_delay_real( &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + @@ -2005,12 +2009,12 @@ xfs_bmap_add_extent_delay_real( &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { @@ -2051,7 +2055,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_bmbt_update(bma->cur, new->br_startoff, new->br_startblock, new->br_blockcount + @@ -2089,12 +2093,12 @@ xfs_bmap_add_extent_delay_real( &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { @@ -2158,12 +2162,12 @@ xfs_bmap_add_extent_delay_real( &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { @@ -2274,6 +2278,7 @@ xfs_bmap_add_extent_unwritten_real( /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ + struct xfs_mount *mp = tp->t_mountp; *logflagsp = 0; @@ -2386,19 +2391,19 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_startblock, RIGHT.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + PREV.br_blockcount + @@ -2429,13 +2434,13 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + PREV.br_blockcount, @@ -2464,13 +2469,13 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_startblock, RIGHT.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, @@ -2497,7 +2502,7 @@ xfs_bmap_add_extent_unwritten_real( new->br_startblock, new->br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, new->br_blockcount, newext))) @@ -2534,7 +2539,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff + new->br_blockcount, PREV.br_startblock + new->br_blockcount, @@ -2576,7 +2581,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff + new->br_blockcount, PREV.br_startblock + new->br_blockcount, @@ -2586,7 +2591,7 @@ xfs_bmap_add_extent_unwritten_real( cur->bc_rec.b = *new; if ((error = xfs_btree_insert(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } break; @@ -2616,7 +2621,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff, PREV.br_startblock, PREV.br_blockcount - new->br_blockcount, @@ -2654,7 +2659,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff, PREV.br_startblock, PREV.br_blockcount - new->br_blockcount, @@ -2664,11 +2669,11 @@ xfs_bmap_add_extent_unwritten_real( new->br_startblock, new->br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; if ((error = xfs_btree_insert(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } break; @@ -2702,7 +2707,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); /* new right extent - oldext */ if ((error = xfs_bmbt_update(cur, r[1].br_startoff, r[1].br_startblock, r[1].br_blockcount, @@ -2714,7 +2719,7 @@ xfs_bmap_add_extent_unwritten_real( new->br_startoff - PREV.br_startoff; if ((error = xfs_btree_insert(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); /* * Reset the cursor to the position of the new extent * we are about to insert as we can't trust it after @@ -2724,12 +2729,12 @@ xfs_bmap_add_extent_unwritten_real( new->br_startblock, new->br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); /* new middle extent - newext */ cur->bc_rec.b.br_state = new->br_state; if ((error = xfs_btree_insert(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } break; @@ -2933,7 +2938,9 @@ xfs_bmap_add_extent_hole_real( xfs_bmbt_irec_t right; /* right neighbor extent entry */ int rval=0; /* return value (logging flags) */ int state; /* state bits, accessed thru macros */ + struct xfs_mount *mp; + mp = bma->tp ? bma->tp->t_mountp : NULL; ifp = XFS_IFORK_PTR(bma->ip, whichfork); ASSERT(bma->idx >= 0); @@ -3021,15 +3028,15 @@ xfs_bmap_add_extent_hole_real( &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_btree_delete(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_btree_decrement(bma->cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_bmbt_update(bma->cur, left.br_startoff, left.br_startblock, left.br_blockcount + @@ -3062,7 +3069,7 @@ xfs_bmap_add_extent_hole_real( &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_bmbt_update(bma->cur, left.br_startoff, left.br_startblock, left.br_blockcount + @@ -3096,7 +3103,7 @@ xfs_bmap_add_extent_hole_real( right.br_blockcount, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); error = xfs_bmbt_update(bma->cur, new->br_startoff, new->br_startblock, new->br_blockcount + @@ -3126,12 +3133,12 @@ xfs_bmap_add_extent_hole_real( new->br_blockcount, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); bma->cur->bc_rec.b.br_state = new->br_state; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } break; } @@ -4763,7 +4770,7 @@ xfs_bmap_del_extent( got.br_startblock, got.br_blockcount, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } da_old = da_new = 0; } else { @@ -4797,7 +4804,7 @@ xfs_bmap_del_extent( } if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); break; case 2: @@ -4897,7 +4904,8 @@ xfs_bmap_del_extent( got.br_startblock, temp, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, + i == 1, done); /* * Update the btree record back * to the original value. @@ -4918,7 +4926,7 @@ xfs_bmap_del_extent( error = -ENOSPC; goto done; } - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); } else flags |= xfs_ilog_fext(whichfork); XFS_IFORK_NEXT_SET(ip, whichfork, @@ -5412,6 +5420,7 @@ xfs_bmse_merge( struct xfs_bmbt_irec left; xfs_filblks_t blockcount; int error, i; + struct xfs_mount *mp = ip->i_mount; xfs_bmbt_get_all(gotp, &got); xfs_bmbt_get_all(leftp, &left); @@ -5446,19 +5455,19 @@ xfs_bmse_merge( got.br_blockcount, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); error = xfs_btree_delete(cur, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); /* lookup and update size of the previous extent */ error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, left.br_blockcount, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); left.br_blockcount = blockcount; @@ -5480,6 +5489,7 @@ xfs_bmse_shift_one( int *logflags) { struct xfs_ifork *ifp; + struct xfs_mount *mp; xfs_fileoff_t startoff; struct xfs_bmbt_rec_host *leftp; struct xfs_bmbt_irec got; @@ -5487,13 +5497,14 @@ xfs_bmse_shift_one( int error; int i; + mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); xfs_bmbt_get_all(gotp, &got); startoff = got.br_startoff - offset_shift_fsb; /* delalloc extents should be prevented by caller */ - XFS_WANT_CORRUPTED_RETURN(!isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); /* * Check for merge if we've got an extent to the left, otherwise make @@ -5532,7 +5543,7 @@ xfs_bmse_shift_one( got.br_blockcount, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(mp, i == 1); got.br_startoff = startoff; return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 81cad433df85..c72283dd8d44 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -168,7 +168,7 @@ xfs_btree_check_lptr( xfs_fsblock_t bno, /* btree block disk address */ int level) /* btree block level */ { - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, level > 0 && bno != NULLFSBLOCK && XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); @@ -187,7 +187,7 @@ xfs_btree_check_sptr( { xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, level > 0 && bno != NULLAGBLOCK && bno != 0 && @@ -1825,7 +1825,7 @@ xfs_btree_lookup( error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; @@ -2285,7 +2285,7 @@ xfs_btree_rshift( if (error) goto error0; i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); error = xfs_btree_increment(tcur, level, &i); if (error) @@ -3138,7 +3138,7 @@ xfs_btree_insert( goto error0; } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); level++; /* @@ -3582,15 +3582,15 @@ xfs_btree_delrec( * Actually any entry but the first would suffice. */ i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); error = xfs_btree_increment(tcur, level, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); /* Grab a pointer to the block. */ right = xfs_btree_get_block(tcur, level, &rbp); @@ -3634,12 +3634,12 @@ xfs_btree_delrec( rrecs = xfs_btree_get_numrecs(right); if (!xfs_btree_ptr_is_null(cur, &lptr)) { i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); error = xfs_btree_decrement(tcur, level, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); } } @@ -3653,13 +3653,13 @@ xfs_btree_delrec( * previous block. */ i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); error = xfs_btree_decrement(tcur, level, &i); if (error) goto error0; i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); /* Grab a pointer to the block. */ left = xfs_btree_get_block(tcur, level, &lbp); diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 5ff31be9b1cd..de1ea16f5748 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -89,7 +89,7 @@ __xfs_dir3_data_check( * so just ensure that the count falls somewhere inside the * block right now. */ - XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < + XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): @@ -107,21 +107,21 @@ __xfs_dir3_data_check( bf = ops->data_bestfree_p(hdr); count = lastfree = freeseen = 0; if (!bf[0].length) { - XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); + XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); freeseen |= 1 << 0; } if (!bf[1].length) { - XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); + XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); freeseen |= 1 << 1; } if (!bf[2].length) { - XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); + XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); freeseen |= 1 << 2; } - XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= + XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= be16_to_cpu(bf[1].length)); - XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= + XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= be16_to_cpu(bf[2].length)); /* * Loop over the data/unused entries. @@ -134,18 +134,18 @@ __xfs_dir3_data_check( * doesn't need to be there. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - XFS_WANT_CORRUPTED_RETURN(lastfree == 0); - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); + XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == (char *)dup - (char *)hdr); dfp = xfs_dir2_data_freefind(hdr, bf, dup); if (dfp) { i = (int)(dfp - bf); - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, (freeseen & (1 << i)) == 0); freeseen |= 1 << i; } else { - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(dup->length) <= be16_to_cpu(bf[2].length)); } @@ -160,13 +160,13 @@ __xfs_dir3_data_check( * The linear search is crude but this is DEBUG code. */ dep = (xfs_dir2_data_entry_t *)p; - XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); + XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(*ops->data_entry_tag_p(dep)) == (char *)dep - (char *)hdr); - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); count++; lastfree = 0; @@ -183,14 +183,15 @@ __xfs_dir3_data_check( be32_to_cpu(lep[i].hashval) == hash) break; } - XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); + XFS_WANT_CORRUPTED_RETURN(mp, + i < be32_to_cpu(btp->count)); } p += ops->data_entsize(dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. */ - XFS_WANT_CORRUPTED_RETURN(freeseen == 7); + XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { @@ -198,13 +199,13 @@ __xfs_dir3_data_check( cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; if (i > 0) - XFS_WANT_CORRUPTED_RETURN( + XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval)); } - XFS_WANT_CORRUPTED_RETURN(count == + XFS_WANT_CORRUPTED_RETURN(mp, count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); - XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); + XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); } return 0; } diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 5b4ba9f6b37d..07349a183a11 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -701,7 +701,7 @@ xfs_ialloc_next_rec( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); } return 0; @@ -725,7 +725,7 @@ xfs_ialloc_get_rec( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); } return 0; @@ -784,12 +784,12 @@ xfs_dialloc_ag_inobt( error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); error = xfs_inobt_get_rec(cur, &rec, &j); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(j == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0); if (rec.ir_freecount > 0) { /* @@ -945,19 +945,19 @@ newino: error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); for (;;) { error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); if (rec.ir_freecount > 0) break; error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); } alloc_inode: @@ -1017,7 +1017,7 @@ xfs_dialloc_ag_finobt_near( error = xfs_inobt_get_rec(lcur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1); /* * See if we've landed in the parent inode record. The finobt @@ -1040,10 +1040,10 @@ xfs_dialloc_ag_finobt_near( error = xfs_inobt_get_rec(rcur, &rrec, &j); if (error) goto error_rcur; - XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); + XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur); } - XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); + XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur); if (i == 1 && j == 1) { /* * Both the left and right records are valid. Choose the closer @@ -1096,7 +1096,7 @@ xfs_dialloc_ag_finobt_newino( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); return 0; } } @@ -1107,12 +1107,12 @@ xfs_dialloc_ag_finobt_newino( error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); return 0; } @@ -1134,19 +1134,19 @@ xfs_dialloc_ag_update_inobt( error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); error = xfs_inobt_get_rec(cur, &rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); rec.ir_free &= ~XFS_INOBT_MASK(offset); rec.ir_freecount--; - XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) && (rec.ir_freecount == frec->ir_freecount)); return xfs_inobt_update(cur, &rec); @@ -1477,14 +1477,14 @@ xfs_difree_inobt( __func__, error); goto error0; } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); error = xfs_inobt_get_rec(cur, &rec, &i); if (error) { xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", __func__, error); goto error0; } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); /* * Get the offset in the inode chunk. */ @@ -1594,7 +1594,7 @@ xfs_difree_finobt( * freed an inode in a previously fully allocated chunk. If not, * something is out of sync. */ - XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); + XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error); error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, ibtrec->ir_free, &i); @@ -1615,12 +1615,12 @@ xfs_difree_finobt( error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error; - XFS_WANT_CORRUPTED_GOTO(i == 1, error); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error); rec.ir_free |= XFS_INOBT_MASK(offset); rec.ir_freecount++; - XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && + XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) && (rec.ir_freecount == ibtrec->ir_freecount), error); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a270095ec3c0..dc4bfc5d88fc 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -111,14 +111,6 @@ xfs_mount_validate_sb( bool check_inprogress, bool check_version) { - - /* - * If the log device and data device have the - * same device number, the log is internal. - * Consequently, the sb_logstart should be non-zero. If - * we have a zero sb_logstart in this case, we may be trying to mount - * a volume filesystem in a non-volume manner. - */ if (sbp->sb_magicnum != XFS_SB_MAGIC) { xfs_warn(mp, "bad magic number"); return -EWRONGFS; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 22a5dcb70b32..7efa23e72a90 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1599,13 +1599,6 @@ xfs_swap_extent_flush( /* Verify O_DIRECT for ftmp */ if (VFS_I(ip)->i_mapping->nrpages) return -EINVAL; - - /* - * Don't try to swap extents on mmap()d files because we can't lock - * out races against page faults safely. - */ - if (mapping_mapped(VFS_I(ip)->i_mapping)) - return -EBUSY; return 0; } @@ -1633,13 +1626,14 @@ xfs_swap_extents( } /* - * Lock up the inodes against other IO and truncate to begin with. - * Then we can ensure the inodes are flushed and have no page cache - * safely. Once we have done this we can take the ilocks and do the rest - * of the checks. + * Lock the inodes against other IO, page faults and truncate to + * begin with. Then we can ensure the inodes are flushed and have no + * page cache safely. Once we have done this we can take the ilocks and + * do the rest of the checks. */ - lock_flags = XFS_IOLOCK_EXCL; + lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { @@ -1666,8 +1660,16 @@ xfs_swap_extents( xfs_trans_cancel(tp, 0); goto out_unlock; } + + /* + * Lock and join the inodes to the tansaction so that transaction commit + * or cancel will unlock the inodes from this point onwards. + */ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); lock_flags |= XFS_ILOCK_EXCL; + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ijoin(tp, tip, lock_flags); + /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || @@ -1720,9 +1722,6 @@ xfs_swap_extents( goto out_trans_cancel; } - xfs_trans_ijoin(tp, ip, lock_flags); - xfs_trans_ijoin(tp, tip, lock_flags); - /* * Before we've swapped the forks, lets set the owners of the forks * appropriately. We have to do this as we are demand paging the btree @@ -1856,5 +1855,5 @@ out_unlock: out_trans_cancel: xfs_trans_cancel(tp, 0); - goto out_unlock; + goto out; } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 507d96a57ac7..092d652bc03d 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -537,9 +537,9 @@ xfs_buf_item_push( /* has a previous flush failed due to IO errors? */ if ((bp->b_flags & XBF_WRITE_FAIL) && - ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { + ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { xfs_warn(bp->b_target->bt_mount, -"Detected failing async write on buffer block 0x%llx. Retrying async write.", +"Failing async write on buffer block 0x%llx. Retrying async write.", (long long)bp->b_bn); } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 799e5a2d334d..e85a9519a5ae 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -84,7 +84,7 @@ xfs_trim_extents( error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); if (error) goto out_del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor); ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); /* diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 279a76e52791..c0394ed126fc 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -40,25 +40,25 @@ extern void xfs_verifier_error(struct xfs_buf *bp); /* * Macros to set EFSCORRUPTED & return/branch. */ -#define XFS_WANT_CORRUPTED_GOTO(x,l) \ +#define XFS_WANT_CORRUPTED_GOTO(mp, x, l) \ { \ int fs_is_ok = (x); \ ASSERT(fs_is_ok); \ if (unlikely(!fs_is_ok)) { \ XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ - XFS_ERRLEVEL_LOW, NULL); \ + XFS_ERRLEVEL_LOW, mp); \ error = -EFSCORRUPTED; \ goto l; \ } \ } -#define XFS_WANT_CORRUPTED_RETURN(x) \ +#define XFS_WANT_CORRUPTED_RETURN(mp, x) \ { \ int fs_is_ok = (x); \ ASSERT(fs_is_ok); \ if (unlikely(!fs_is_ok)) { \ XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ - XFS_ERRLEVEL_LOW, NULL); \ + XFS_ERRLEVEL_LOW, mp); \ return -EFSCORRUPTED; \ } \ } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ce615d12fb44..b101e80f2862 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -397,7 +397,8 @@ STATIC int /* error (positive) */ xfs_zero_last_block( struct xfs_inode *ip, xfs_fsize_t offset, - xfs_fsize_t isize) + xfs_fsize_t isize, + bool *did_zeroing) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); @@ -425,6 +426,7 @@ xfs_zero_last_block( zero_len = mp->m_sb.sb_blocksize - zero_offset; if (isize + zero_len > offset) zero_len = offset - isize; + *did_zeroing = true; return xfs_iozero(ip, isize, zero_len); } @@ -443,7 +445,8 @@ int /* error (positive) */ xfs_zero_eof( struct xfs_inode *ip, xfs_off_t offset, /* starting I/O offset */ - xfs_fsize_t isize) /* current inode size */ + xfs_fsize_t isize, /* current inode size */ + bool *did_zeroing) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_zero_fsb; @@ -465,7 +468,7 @@ xfs_zero_eof( * We only zero a part of that block so it is handled specially. */ if (XFS_B_FSB_OFFSET(mp, isize) != 0) { - error = xfs_zero_last_block(ip, offset, isize); + error = xfs_zero_last_block(ip, offset, isize, did_zeroing); if (error) return error; } @@ -525,6 +528,7 @@ xfs_zero_eof( if (error) return error; + *did_zeroing = true; start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); } @@ -567,13 +571,15 @@ restart: * having to redo all checks before. */ if (*pos > i_size_read(inode)) { + bool zero = false; + if (*iolock == XFS_IOLOCK_SHARED) { xfs_rw_iunlock(ip, *iolock); *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, *iolock); goto restart; } - error = xfs_zero_eof(ip, *pos, i_size_read(inode)); + error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); if (error) return error; } @@ -841,6 +847,9 @@ xfs_file_fallocate( if (error) goto out_unlock; + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + iolock |= XFS_MMAPLOCK_EXCL; + if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) @@ -991,20 +1000,6 @@ xfs_file_mmap( } /* - * mmap()d file has taken write protection fault and is being made - * writable. We can set the page state up correctly for a writable - * page, which means we can do correct delalloc accounting (ENOSPC - * checking!) and unwritten extent mapping. - */ -STATIC int -xfs_vm_page_mkwrite( - struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - return block_page_mkwrite(vma, vmf, xfs_get_blocks); -} - -/* * This type is designed to indicate the type of offset we would like * to search from page cache for xfs_seek_hole_data(). */ @@ -1379,6 +1374,55 @@ xfs_file_llseek( } } +/* + * Locking for serialisation of IO during page faults. This results in a lock + * ordering of: + * + * mmap_sem (MM) + * i_mmap_lock (XFS - truncate serialisation) + * page_lock (MM) + * i_lock (XFS - extent map serialisation) + */ +STATIC int +xfs_filemap_fault( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); + int error; + + trace_xfs_filemap_fault(ip); + + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + error = filemap_fault(vma, vmf); + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + + return error; +} + +/* + * mmap()d file has taken write protection fault and is being made writable. We + * can set the page state up correctly for a writable page, which means we can + * do correct delalloc accounting (ENOSPC checking!) and unwritten extent + * mapping. + */ +STATIC int +xfs_filemap_page_mkwrite( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); + int error; + + trace_xfs_filemap_page_mkwrite(ip); + + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + error = block_page_mkwrite(vma, vmf, xfs_get_blocks); + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + + return error; +} + const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, .read = new_sync_read, @@ -1411,7 +1455,7 @@ const struct file_operations xfs_dir_file_operations = { }; static const struct vm_operations_struct xfs_file_vm_ops = { - .fault = filemap_fault, + .fault = xfs_filemap_fault, .map_pages = filemap_map_pages, - .page_mkwrite = xfs_vm_page_mkwrite, + .page_mkwrite = xfs_filemap_page_mkwrite, }; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 9771b7ef62ed..76a9f2783282 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -439,11 +439,11 @@ again: *ipp = ip; /* - * If we have a real type for an on-disk inode, we can set ops(&unlock) + * If we have a real type for an on-disk inode, we can setup the inode * now. If it's a new inode being created, xfs_ialloc will handle it. */ if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) - xfs_setup_inode(ip); + xfs_setup_existing_inode(ip); return 0; out_error_or_again: diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b7064f20de7f..d6ebc85192b7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared( } /* - * The xfs inode contains 2 locks: a multi-reader lock called the - * i_iolock and a multi-reader lock called the i_lock. This routine - * allows either or both of the locks to be obtained. + * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and + * the i_lock. This routine allows various combinations of the locks to be + * obtained. * - * The 2 locks should always be ordered so that the IO lock is - * obtained first in order to prevent deadlock. + * The 3 locks should always be ordered so that the IO lock is obtained first, + * the mmap lock second and the ilock last in order to prevent deadlock. * - * ip -- the inode being locked - * lock_flags -- this parameter indicates the inode's locks - * to be locked. It can be: - * XFS_IOLOCK_SHARED, - * XFS_IOLOCK_EXCL, - * XFS_ILOCK_SHARED, - * XFS_ILOCK_EXCL, - * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, - * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, - * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, - * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL + * Basic locking order: + * + * i_iolock -> i_mmap_lock -> page_lock -> i_ilock + * + * mmap_sem locking order: + * + * i_iolock -> page lock -> mmap_sem + * mmap_sem -> i_mmap_lock -> page_lock + * + * The difference in mmap_sem locking order mean that we cannot hold the + * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can + * fault in pages during copy in/out (for buffered IO) or require the mmap_sem + * in get_user_pages() to map the user pages into the kernel address space for + * direct IO. Similarly the i_iolock cannot be taken inside a page fault because + * page faults already hold the mmap_sem. + * + * Hence to serialise fully against both syscall and mmap based IO, we need to + * take both the i_iolock and the i_mmap_lock. These locks should *only* be both + * taken in places where we need to invalidate the page cache in a race + * free manner (e.g. truncate, hole punch and other extent manipulation + * functions). */ void xfs_ilock( @@ -150,6 +160,8 @@ xfs_ilock( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); @@ -159,6 +171,11 @@ xfs_ilock( else if (lock_flags & XFS_IOLOCK_SHARED) mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); + if (lock_flags & XFS_MMAPLOCK_EXCL) + mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); + else if (lock_flags & XFS_MMAPLOCK_SHARED) + mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); + if (lock_flags & XFS_ILOCK_EXCL) mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); else if (lock_flags & XFS_ILOCK_SHARED) @@ -191,6 +208,8 @@ xfs_ilock_nowait( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); @@ -202,21 +221,35 @@ xfs_ilock_nowait( if (!mrtryaccess(&ip->i_iolock)) goto out; } + + if (lock_flags & XFS_MMAPLOCK_EXCL) { + if (!mrtryupdate(&ip->i_mmaplock)) + goto out_undo_iolock; + } else if (lock_flags & XFS_MMAPLOCK_SHARED) { + if (!mrtryaccess(&ip->i_mmaplock)) + goto out_undo_iolock; + } + if (lock_flags & XFS_ILOCK_EXCL) { if (!mrtryupdate(&ip->i_lock)) - goto out_undo_iolock; + goto out_undo_mmaplock; } else if (lock_flags & XFS_ILOCK_SHARED) { if (!mrtryaccess(&ip->i_lock)) - goto out_undo_iolock; + goto out_undo_mmaplock; } return 1; - out_undo_iolock: +out_undo_mmaplock: + if (lock_flags & XFS_MMAPLOCK_EXCL) + mrunlock_excl(&ip->i_mmaplock); + else if (lock_flags & XFS_MMAPLOCK_SHARED) + mrunlock_shared(&ip->i_mmaplock); +out_undo_iolock: if (lock_flags & XFS_IOLOCK_EXCL) mrunlock_excl(&ip->i_iolock); else if (lock_flags & XFS_IOLOCK_SHARED) mrunlock_shared(&ip->i_iolock); - out: +out: return 0; } @@ -244,6 +277,8 @@ xfs_iunlock( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); @@ -254,6 +289,11 @@ xfs_iunlock( else if (lock_flags & XFS_IOLOCK_SHARED) mrunlock_shared(&ip->i_iolock); + if (lock_flags & XFS_MMAPLOCK_EXCL) + mrunlock_excl(&ip->i_mmaplock); + else if (lock_flags & XFS_MMAPLOCK_SHARED) + mrunlock_shared(&ip->i_mmaplock); + if (lock_flags & XFS_ILOCK_EXCL) mrunlock_excl(&ip->i_lock); else if (lock_flags & XFS_ILOCK_SHARED) @@ -271,11 +311,14 @@ xfs_ilock_demote( xfs_inode_t *ip, uint lock_flags) { - ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); + ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)); + ASSERT((lock_flags & + ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); if (lock_flags & XFS_ILOCK_EXCL) mrdemote(&ip->i_lock); + if (lock_flags & XFS_MMAPLOCK_EXCL) + mrdemote(&ip->i_mmaplock); if (lock_flags & XFS_IOLOCK_EXCL) mrdemote(&ip->i_iolock); @@ -294,6 +337,12 @@ xfs_isilocked( return rwsem_is_locked(&ip->i_lock.mr_lock); } + if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { + if (!(lock_flags & XFS_MMAPLOCK_SHARED)) + return !!ip->i_mmaplock.mr_writer; + return rwsem_is_locked(&ip->i_mmaplock.mr_lock); + } + if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { if (!(lock_flags & XFS_IOLOCK_SHARED)) return !!ip->i_iolock.mr_writer; @@ -314,14 +363,27 @@ int xfs_lock_delays; #endif /* - * Bump the subclass so xfs_lock_inodes() acquires each lock with - * a different value + * Bump the subclass so xfs_lock_inodes() acquires each lock with a different + * value. This shouldn't be called for page fault locking, but we also need to + * ensure we don't overrun the number of lockdep subclasses for the iolock or + * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. */ static inline int xfs_lock_inumorder(int lock_mode, int subclass) { - if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { + ASSERT(subclass + XFS_LOCK_INUMORDER < + (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + } + + if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { + ASSERT(subclass + XFS_LOCK_INUMORDER < + (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); + lock_mode |= (subclass + XFS_LOCK_INUMORDER) << + XFS_MMAPLOCK_SHIFT; + } + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; @@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass) } /* - * The following routine will lock n inodes in exclusive mode. - * We assume the caller calls us with the inodes in i_ino order. + * The following routine will lock n inodes in exclusive mode. We assume the + * caller calls us with the inodes in i_ino order. * - * We need to detect deadlock where an inode that we lock - * is in the AIL and we start waiting for another inode that is locked - * by a thread in a long running transaction (such as truncate). This can - * result in deadlock since the long running trans might need to wait - * for the inode we just locked in order to push the tail and free space - * in the log. + * We need to detect deadlock where an inode that we lock is in the AIL and we + * start waiting for another inode that is locked by a thread in a long running + * transaction (such as truncate). This can result in deadlock since the long + * running trans might need to wait for the inode we just locked in order to + * push the tail and free space in the log. */ void xfs_lock_inodes( @@ -348,30 +409,27 @@ xfs_lock_inodes( int attempts = 0, i, j, try_lock; xfs_log_item_t *lp; - ASSERT(ips && (inodes >= 2)); /* we need at least two */ + /* currently supports between 2 and 5 inodes */ + ASSERT(ips && inodes >= 2 && inodes <= 5); try_lock = 0; i = 0; - again: for (; i < inodes; i++) { ASSERT(ips[i]); - if (i && (ips[i] == ips[i-1])) /* Already locked */ + if (i && (ips[i] == ips[i - 1])) /* Already locked */ continue; /* - * If try_lock is not set yet, make sure all locked inodes - * are not in the AIL. - * If any are, set try_lock to be used later. + * If try_lock is not set yet, make sure all locked inodes are + * not in the AIL. If any are, set try_lock to be used later. */ - if (!try_lock) { for (j = (i - 1); j >= 0 && !try_lock; j--) { lp = (xfs_log_item_t *)ips[j]->i_itemp; - if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { + if (lp && (lp->li_flags & XFS_LI_IN_AIL)) try_lock++; - } } } @@ -381,51 +439,42 @@ again: * we can't get any, we must release all we have * and try again. */ + if (!try_lock) { + xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); + continue; + } + + /* try_lock means we have an inode locked that is in the AIL. */ + ASSERT(i != 0); + if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) + continue; - if (try_lock) { - /* try_lock must be 0 if i is 0. */ + /* + * Unlock all previous guys and try again. xfs_iunlock will try + * to push the tail if the inode is in the AIL. + */ + attempts++; + for (j = i - 1; j >= 0; j--) { /* - * try_lock means we have an inode locked - * that is in the AIL. + * Check to see if we've already unlocked this one. Not + * the first one going back, and the inode ptr is the + * same. */ - ASSERT(i != 0); - if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { - attempts++; - - /* - * Unlock all previous guys and try again. - * xfs_iunlock will try to push the tail - * if the inode is in the AIL. - */ - - for(j = i - 1; j >= 0; j--) { - - /* - * Check to see if we've already - * unlocked this one. - * Not the first one going back, - * and the inode ptr is the same. - */ - if ((j != (i - 1)) && ips[j] == - ips[j+1]) - continue; - - xfs_iunlock(ips[j], lock_mode); - } + if (j != (i - 1) && ips[j] == ips[j + 1]) + continue; + + xfs_iunlock(ips[j], lock_mode); + } - if ((attempts % 5) == 0) { - delay(1); /* Don't just spin the CPU */ + if ((attempts % 5) == 0) { + delay(1); /* Don't just spin the CPU */ #ifdef DEBUG - xfs_lock_delays++; + xfs_lock_delays++; #endif - } - i = 0; - try_lock = 0; - goto again; - } - } else { - xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); } + i = 0; + try_lock = 0; + goto again; } #ifdef DEBUG @@ -440,10 +489,10 @@ again: } /* - * xfs_lock_two_inodes() can only be used to lock one type of lock - * at a time - the iolock or the ilock, but not both at once. If - * we lock both at once, lockdep will report false positives saying - * we have violated locking orders. + * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - + * the iolock, the mmaplock or the ilock, but not more than one at a time. If we + * lock more than one at a time, lockdep will report false positives saying we + * have violated locking orders. */ void xfs_lock_two_inodes( @@ -455,8 +504,12 @@ xfs_lock_two_inodes( int attempts = 0; xfs_log_item_t *lp; - if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) - ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { + ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); + ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) + ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { @@ -818,7 +871,7 @@ xfs_ialloc( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, flags); - /* now that we have an i_mode we can setup inode ops and unlock */ + /* now that we have an i_mode we can setup the inode structure */ xfs_setup_inode(ip); *ipp = ip; @@ -1235,12 +1288,14 @@ xfs_create( xfs_trans_cancel(tp, cancel_flags); out_release_inode: /* - * Wait until after the current transaction is aborted to - * release the inode. This prevents recursive transactions - * and deadlocks from xfs_inactive. + * Wait until after the current transaction is aborted to finish the + * setup of the inode and release the inode. This prevents recursive + * transactions and deadlocks from xfs_inactive. */ - if (ip) + if (ip) { + xfs_finish_inode_setup(ip); IRELE(ip); + } xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -1345,12 +1400,14 @@ xfs_create_tmpfile( xfs_trans_cancel(tp, cancel_flags); out_release_inode: /* - * Wait until after the current transaction is aborted to - * release the inode. This prevents recursive transactions - * and deadlocks from xfs_inactive. + * Wait until after the current transaction is aborted to finish the + * setup of the inode and release the inode. This prevents recursive + * transactions and deadlocks from xfs_inactive. */ - if (ip) + if (ip) { + xfs_finish_inode_setup(ip); IRELE(ip); + } xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -2611,19 +2668,22 @@ xfs_remove( /* * Enter all inodes for a rename transaction into a sorted array. */ +#define __XFS_SORT_INODES 5 STATIC void xfs_sort_for_rename( - xfs_inode_t *dp1, /* in: old (source) directory inode */ - xfs_inode_t *dp2, /* in: new (target) directory inode */ - xfs_inode_t *ip1, /* in: inode of old entry */ - xfs_inode_t *ip2, /* in: inode of new entry, if it - already exists, NULL otherwise. */ - xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ - int *num_inodes) /* out: number of inodes in array */ + struct xfs_inode *dp1, /* in: old (source) directory inode */ + struct xfs_inode *dp2, /* in: new (target) directory inode */ + struct xfs_inode *ip1, /* in: inode of old entry */ + struct xfs_inode *ip2, /* in: inode of new entry */ + struct xfs_inode *wip, /* in: whiteout inode */ + struct xfs_inode **i_tab,/* out: sorted array of inodes */ + int *num_inodes) /* in/out: inodes in array */ { - xfs_inode_t *temp; int i, j; + ASSERT(*num_inodes == __XFS_SORT_INODES); + memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *)); + /* * i_tab contains a list of pointers to inodes. We initialize * the table here & we'll sort it. We will then use it to @@ -2631,25 +2691,24 @@ xfs_sort_for_rename( * * Note that the table may contain duplicates. e.g., dp1 == dp2. */ - i_tab[0] = dp1; - i_tab[1] = dp2; - i_tab[2] = ip1; - if (ip2) { - *num_inodes = 4; - i_tab[3] = ip2; - } else { - *num_inodes = 3; - i_tab[3] = NULL; - } + i = 0; + i_tab[i++] = dp1; + i_tab[i++] = dp2; + i_tab[i++] = ip1; + if (ip2) + i_tab[i++] = ip2; + if (wip) + i_tab[i++] = wip; + *num_inodes = i; /* * Sort the elements via bubble sort. (Remember, there are at - * most 4 elements to sort, so this is adequate.) + * most 5 elements to sort, so this is adequate.) */ for (i = 0; i < *num_inodes; i++) { for (j = 1; j < *num_inodes; j++) { if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { - temp = i_tab[j]; + struct xfs_inode *temp = i_tab[j]; i_tab[j] = i_tab[j-1]; i_tab[j-1] = temp; } @@ -2657,6 +2716,31 @@ xfs_sort_for_rename( } } +static int +xfs_finish_rename( + struct xfs_trans *tp, + struct xfs_bmap_free *free_list) +{ + int committed = 0; + int error; + + /* + * If this is a synchronous mount, make sure that the rename transaction + * goes to disk before returning to the user. + */ + if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + xfs_trans_set_sync(tp); + + error = xfs_bmap_finish(&tp, free_list, &committed); + if (error) { + xfs_bmap_cancel(free_list); + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + return error; + } + + return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +} + /* * xfs_cross_rename() * @@ -2685,14 +2769,14 @@ xfs_cross_rename( ip2->i_ino, first_block, free_list, spaceres); if (error) - goto out; + goto out_trans_abort; /* Swap inode number for dirent in second parent */ error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, first_block, free_list, spaceres); if (error) - goto out; + goto out_trans_abort; /* * If we're renaming one or more directories across different parents, @@ -2707,16 +2791,16 @@ xfs_cross_rename( dp1->i_ino, first_block, free_list, spaceres); if (error) - goto out; + goto out_trans_abort; /* transfer ip2 ".." reference to dp1 */ if (!S_ISDIR(ip1->i_d.di_mode)) { error = xfs_droplink(tp, dp2); if (error) - goto out; + goto out_trans_abort; error = xfs_bumplink(tp, dp1); if (error) - goto out; + goto out_trans_abort; } /* @@ -2734,16 +2818,16 @@ xfs_cross_rename( dp2->i_ino, first_block, free_list, spaceres); if (error) - goto out; + goto out_trans_abort; /* transfer ip1 ".." reference to dp2 */ if (!S_ISDIR(ip2->i_d.di_mode)) { error = xfs_droplink(tp, dp1); if (error) - goto out; + goto out_trans_abort; error = xfs_bumplink(tp, dp2); if (error) - goto out; + goto out_trans_abort; } /* @@ -2771,66 +2855,108 @@ xfs_cross_rename( } xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); -out: + return xfs_finish_rename(tp, free_list); + +out_trans_abort: + xfs_bmap_cancel(free_list); + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); return error; } /* + * xfs_rename_alloc_whiteout() + * + * Return a referenced, unlinked, unlocked inode that that can be used as a + * whiteout in a rename transaction. We use a tmpfile inode here so that if we + * crash between allocating the inode and linking it into the rename transaction + * recovery will free the inode and we won't leak it. + */ +static int +xfs_rename_alloc_whiteout( + struct xfs_inode *dp, + struct xfs_inode **wip) +{ + struct xfs_inode *tmpfile; + int error; + + error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile); + if (error) + return error; + + /* Satisfy xfs_bumplink that this is a real tmpfile */ + xfs_finish_inode_setup(tmpfile); + VFS_I(tmpfile)->i_state |= I_LINKABLE; + + *wip = tmpfile; + return 0; +} + +/* * xfs_rename */ int xfs_rename( - xfs_inode_t *src_dp, - struct xfs_name *src_name, - xfs_inode_t *src_ip, - xfs_inode_t *target_dp, - struct xfs_name *target_name, - xfs_inode_t *target_ip, - unsigned int flags) + struct xfs_inode *src_dp, + struct xfs_name *src_name, + struct xfs_inode *src_ip, + struct xfs_inode *target_dp, + struct xfs_name *target_name, + struct xfs_inode *target_ip, + unsigned int flags) { - xfs_trans_t *tp = NULL; - xfs_mount_t *mp = src_dp->i_mount; - bool new_parent; /* moving to a new dir */ - bool src_is_directory; /* src_name is a directory */ - int error; - xfs_bmap_free_t free_list; - xfs_fsblock_t first_block; - int cancel_flags; - int committed; - xfs_inode_t *inodes[4]; - int spaceres; - int num_inodes; + struct xfs_mount *mp = src_dp->i_mount; + struct xfs_trans *tp; + struct xfs_bmap_free free_list; + xfs_fsblock_t first_block; + struct xfs_inode *wip = NULL; /* whiteout inode */ + struct xfs_inode *inodes[__XFS_SORT_INODES]; + int num_inodes = __XFS_SORT_INODES; + bool new_parent = (src_dp != target_dp); + bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode); + int cancel_flags = 0; + int spaceres; + int error; trace_xfs_rename(src_dp, target_dp, src_name, target_name); - new_parent = (src_dp != target_dp); - src_is_directory = S_ISDIR(src_ip->i_d.di_mode); + if ((flags & RENAME_EXCHANGE) && !target_ip) + return -EINVAL; + + /* + * If we are doing a whiteout operation, allocate the whiteout inode + * we will be placing at the target and ensure the type is set + * appropriately. + */ + if (flags & RENAME_WHITEOUT) { + ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); + error = xfs_rename_alloc_whiteout(target_dp, &wip); + if (error) + return error; + + /* setup target dirent info as whiteout */ + src_name->type = XFS_DIR3_FT_CHRDEV; + } - xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, + xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, inodes, &num_inodes); - xfs_bmap_init(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); if (error == -ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); } - if (error) { - xfs_trans_cancel(tp, 0); - goto std_return; - } + if (error) + goto out_trans_cancel; + cancel_flags = XFS_TRANS_RELEASE_LOG_RES; /* * Attach the dquots to the inodes */ error = xfs_qm_vop_rename_dqattach(inodes); - if (error) { - xfs_trans_cancel(tp, cancel_flags); - goto std_return; - } + if (error) + goto out_trans_cancel; /* * Lock all the participating inodes. Depending upon whether @@ -2851,6 +2977,8 @@ xfs_rename( xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); if (target_ip) xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); + if (wip) + xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL); /* * If we are using project inheritance, we only allow renames @@ -2860,20 +2988,16 @@ xfs_rename( if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { error = -EXDEV; - goto error_return; + goto out_trans_cancel; } - /* - * Handle RENAME_EXCHANGE flags - */ - if (flags & RENAME_EXCHANGE) { - error = xfs_cross_rename(tp, src_dp, src_name, src_ip, - target_dp, target_name, target_ip, - &free_list, &first_block, spaceres); - if (error) - goto abort_return; - goto finish_rename; - } + xfs_bmap_init(&free_list, &first_block); + + /* RENAME_EXCHANGE is unique from here on. */ + if (flags & RENAME_EXCHANGE) + return xfs_cross_rename(tp, src_dp, src_name, src_ip, + target_dp, target_name, target_ip, + &free_list, &first_block, spaceres); /* * Set up the target. @@ -2886,7 +3010,7 @@ xfs_rename( if (!spaceres) { error = xfs_dir_canenter(tp, target_dp, target_name); if (error) - goto error_return; + goto out_trans_cancel; } /* * If target does not exist and the rename crosses @@ -2897,9 +3021,9 @@ xfs_rename( src_ip->i_ino, &first_block, &free_list, spaceres); if (error == -ENOSPC) - goto error_return; + goto out_bmap_cancel; if (error) - goto abort_return; + goto out_trans_abort; xfs_trans_ichgtime(tp, target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -2907,7 +3031,7 @@ xfs_rename( if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) - goto abort_return; + goto out_trans_abort; } } else { /* target_ip != NULL */ /* @@ -2922,7 +3046,7 @@ xfs_rename( if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = -EEXIST; - goto error_return; + goto out_trans_cancel; } } @@ -2939,7 +3063,7 @@ xfs_rename( src_ip->i_ino, &first_block, &free_list, spaceres); if (error) - goto abort_return; + goto out_trans_abort; xfs_trans_ichgtime(tp, target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -2950,7 +3074,7 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) - goto abort_return; + goto out_trans_abort; if (src_is_directory) { /* @@ -2958,7 +3082,7 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) - goto abort_return; + goto out_trans_abort; } } /* target_ip != NULL */ @@ -2975,7 +3099,7 @@ xfs_rename( &first_block, &free_list, spaceres); ASSERT(error != -EEXIST); if (error) - goto abort_return; + goto out_trans_abort; } /* @@ -3001,49 +3125,67 @@ xfs_rename( */ error = xfs_droplink(tp, src_dp); if (error) - goto abort_return; + goto out_trans_abort; } - error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, + /* + * For whiteouts, we only need to update the source dirent with the + * inode number of the whiteout inode rather than removing it + * altogether. + */ + if (wip) { + error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, &first_block, &free_list, spaceres); + } else + error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, + &first_block, &free_list, spaceres); if (error) - goto abort_return; - - xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); - if (new_parent) - xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); + goto out_trans_abort; -finish_rename: /* - * If this is a synchronous mount, make sure that the - * rename transaction goes to disk before returning to - * the user. + * For whiteouts, we need to bump the link count on the whiteout inode. + * This means that failures all the way up to this point leave the inode + * on the unlinked list and so cleanup is a simple matter of dropping + * the remaining reference to it. If we fail here after bumping the link + * count, we're shutting down the filesystem so we'll never see the + * intermediate state on disk. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { - xfs_trans_set_sync(tp); - } + if (wip) { + ASSERT(wip->i_d.di_nlink == 0); + error = xfs_bumplink(tp, wip); + if (error) + goto out_trans_abort; + error = xfs_iunlink_remove(tp, wip); + if (error) + goto out_trans_abort; + xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); - error = xfs_bmap_finish(&tp, &free_list, &committed); - if (error) { - xfs_bmap_cancel(&free_list); - xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT)); - goto std_return; + /* + * Now we have a real link, clear the "I'm a tmpfile" state + * flag from the inode so it doesn't accidentally get misused in + * future. + */ + VFS_I(wip)->i_state &= ~I_LINKABLE; } - /* - * trans_commit will unlock src_ip, target_ip & decrement - * the vnode references. - */ - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); + if (new_parent) + xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); - abort_return: + error = xfs_finish_rename(tp, &free_list); + if (wip) + IRELE(wip); + return error; + +out_trans_abort: cancel_flags |= XFS_TRANS_ABORT; - error_return: +out_bmap_cancel: xfs_bmap_cancel(&free_list); +out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); - std_return: + if (wip) + IRELE(wip); return error; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 86cd6b39bed7..8f22d20368d8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -56,6 +56,7 @@ typedef struct xfs_inode { struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ + mrlock_t i_mmaplock; /* inode mmap IO lock */ atomic_t i_pincount; /* inode pin count */ spinlock_t i_flags_lock; /* inode i_flags lock */ /* Miscellaneous state. */ @@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHARED (1<<1) #define XFS_ILOCK_EXCL (1<<2) #define XFS_ILOCK_SHARED (1<<3) +#define XFS_MMAPLOCK_EXCL (1<<4) +#define XFS_MMAPLOCK_SHARED (1<<5) #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ - | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) + | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ + | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED) #define XFS_LOCK_FLAGS \ { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ - { XFS_ILOCK_SHARED, "ILOCK_SHARED" } + { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ + { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \ + { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" } /* @@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHIFT 16 #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) +#define XFS_MMAPLOCK_SHIFT 20 + #define XFS_ILOCK_SHIFT 24 #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) #define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) #define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) -#define XFS_IOLOCK_DEP_MASK 0x00ff0000 +#define XFS_IOLOCK_DEP_MASK 0x000f0000 +#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 #define XFS_ILOCK_DEP_MASK 0xff000000 -#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) +#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ + XFS_MMAPLOCK_DEP_MASK | \ + XFS_ILOCK_DEP_MASK) -#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) -#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) +#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \ + >> XFS_IOLOCK_SHIFT) +#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \ + >> XFS_MMAPLOCK_SHIFT) +#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \ + >> XFS_ILOCK_SHIFT) /* * For multiple groups support: if S_ISGID bit is set in the parent @@ -384,11 +399,34 @@ enum xfs_prealloc_flags { XFS_PREALLOC_INVISIBLE = (1 << 4), }; -int xfs_update_prealloc_flags(struct xfs_inode *, - enum xfs_prealloc_flags); -int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); -int xfs_iozero(struct xfs_inode *, loff_t, size_t); +int xfs_update_prealloc_flags(struct xfs_inode *ip, + enum xfs_prealloc_flags flags); +int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, + xfs_fsize_t isize, bool *did_zeroing); +int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); + +/* from xfs_iops.c */ +/* + * When setting up a newly allocated inode, we need to call + * xfs_finish_inode_setup() once the inode is fully instantiated at + * the VFS level to prevent the rest of the world seeing the inode + * before we've completed instantiation. Otherwise we can do it + * the moment the inode lookup is complete. + */ +extern void xfs_setup_inode(struct xfs_inode *ip); +static inline void xfs_finish_inode_setup(struct xfs_inode *ip) +{ + xfs_iflags_clear(ip, XFS_INEW); + barrier(); + unlock_new_inode(VFS_I(ip)); +} + +static inline void xfs_setup_existing_inode(struct xfs_inode *ip) +{ + xfs_setup_inode(ip); + xfs_finish_inode_setup(ip); +} #define IHOLD(ip) \ do { \ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ac4feae45eb3..4ee44ddfdfb7 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -631,7 +631,7 @@ xfs_ioc_space( if (filp->f_flags & O_DSYNC) flags |= XFS_PREALLOC_SYNC; - if (ioflags & XFS_IO_INVIS) + if (ioflags & XFS_IO_INVIS) flags |= XFS_PREALLOC_INVISIBLE; error = mnt_want_write_file(filp); @@ -643,6 +643,9 @@ xfs_ioc_space( if (error) goto out_unlock; + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + iolock |= XFS_MMAPLOCK_EXCL; + switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index d919ad7b16bf..015d6a366b16 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -187,6 +187,8 @@ xfs_generic_create( else d_instantiate(dentry, inode); + xfs_finish_inode_setup(ip); + out_free_acl: if (default_acl) posix_acl_release(default_acl); @@ -195,6 +197,7 @@ xfs_generic_create( return error; out_cleanup_inode: + xfs_finish_inode_setup(ip); if (!tmpfile) xfs_cleanup_inode(dir, inode, dentry); iput(inode); @@ -367,9 +370,11 @@ xfs_vn_symlink( goto out_cleanup_inode; d_instantiate(dentry, inode); + xfs_finish_inode_setup(cip); return 0; out_cleanup_inode: + xfs_finish_inode_setup(cip); xfs_cleanup_inode(dir, inode, dentry); iput(inode); out: @@ -389,7 +394,7 @@ xfs_vn_rename( struct xfs_name oname; struct xfs_name nname; - if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; /* if we are exchanging files, we need to set i_mode of both files */ @@ -751,6 +756,7 @@ xfs_setattr_size( int error; uint lock_flags = 0; uint commit_flags = 0; + bool did_zeroing = false; trace_xfs_setattr(ip); @@ -765,6 +771,7 @@ xfs_setattr_size( return error; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); @@ -794,20 +801,16 @@ xfs_setattr_size( return error; /* - * Now we can make the changes. Before we join the inode to the - * transaction, take care of the part of the truncation that must be - * done without the inode lock. This needs to be done before joining - * the inode to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. + * File data changes must be complete before we start the transaction to + * modify the inode. This needs to be done before joining the inode to + * the transaction because the inode cannot be unlocked once it is a + * part of the transaction. + * + * Start with zeroing any data block beyond EOF that we may expose on + * file extension. */ if (newsize > oldsize) { - /* - * Do the first part of growing a file: zero any data in the - * last block that is beyond the old EOF. We need to do this - * before the inode is joined to the transaction to modify - * i_size. - */ - error = xfs_zero_eof(ip, newsize, oldsize); + error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); if (error) return error; } @@ -817,75 +820,42 @@ xfs_setattr_size( * any previous writes that are beyond the on disk EOF and the new * EOF that have not been written out need to be written here. If we * do not write the data out, we expose ourselves to the null files - * problem. - * - * Only flush from the on disk size to the smaller of the in memory - * file size or the new size as that's the range we really care about - * here and prevents waiting for other data not within the range we - * care about here. + * problem. Note that this includes any block zeroing we did above; + * otherwise those blocks may not be zeroed after a crash. */ - if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { + if (newsize > ip->i_d.di_size && + (oldsize != ip->i_d.di_size || did_zeroing)) { error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) return error; } - /* - * Wait for all direct I/O to complete. - */ + /* Now wait for all direct I/O to complete. */ inode_dio_wait(inode); /* - * Do all the page cache truncate work outside the transaction context - * as the "lock" order is page lock->log space reservation. i.e. - * locking pages inside the transaction can ABBA deadlock with - * writeback. We have to do the VFS inode size update before we truncate - * the pagecache, however, to avoid racing with page faults beyond the - * new EOF they are not serialised against truncate operations except by - * page locks and size updates. + * We've already locked out new page faults, so now we can safely remove + * pages from the page cache knowing they won't get refaulted until we + * drop the XFS_MMAP_EXCL lock after the extent manipulations are + * complete. The truncate_setsize() call also cleans partial EOF page + * PTEs on extending truncates and hence ensures sub-page block size + * filesystems are correctly handled, too. * - * Hence we are in a situation where a truncate can fail with ENOMEM - * from xfs_trans_reserve(), but having already truncated the in-memory - * version of the file (i.e. made user visible changes). There's not - * much we can do about this, except to hope that the caller sees ENOMEM - * and retries the truncate operation. + * We have to do all the page cache truncate work outside the + * transaction context as the "lock" order is page lock->log space + * reservation as defined by extent allocation in the writeback path. + * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but + * having already truncated the in-memory version of the file (i.e. made + * user visible changes). There's not much we can do about this, except + * to hope that the caller sees ENOMEM and retries the truncate + * operation. */ error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) return error; truncate_setsize(inode, newsize); - /* - * The "we can't serialise against page faults" pain gets worse. - * - * If the file is mapped then we have to clean the page at the old EOF - * when extending the file. Extending the file can expose changes the - * underlying page mapping (e.g. from beyond EOF to a hole or - * unwritten), and so on the next attempt to write to that page we need - * to remap it for write. i.e. we need .page_mkwrite() to be called. - * Hence we need to clean the page to clean the pte and so a new write - * fault will be triggered appropriately. - * - * If we do it before we change the inode size, then we can race with a - * page fault that maps the page with exactly the same problem. If we do - * it after we change the file size, then a new page fault can come in - * and allocate space before we've run the rest of the truncate - * transaction. That's kinda grotesque, but it's better than have data - * over a hole, and so that's the lesser evil that has been chosen here. - * - * The real solution, however, is to have some mechanism for locking out - * page faults while a truncate is in progress. - */ - if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) { - error = filemap_write_and_wait_range( - VFS_I(ip)->i_mapping, - round_down(oldsize, PAGE_CACHE_SIZE), - round_up(oldsize, PAGE_CACHE_SIZE) - 1); - if (error) - return error; - } - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) @@ -984,8 +954,12 @@ xfs_vn_setattr( xfs_ilock(ip, iolock); error = xfs_break_layouts(dentry->d_inode, &iolock); - if (!error) + if (!error) { + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + iolock |= XFS_MMAPLOCK_EXCL; + error = xfs_setattr_size(ip, iattr); + } xfs_iunlock(ip, iolock); } else { error = xfs_setattr_nonsize(ip, iattr, 0); @@ -1236,16 +1210,12 @@ xfs_diflags_to_iflags( } /* - * Initialize the Linux inode, set up the operation vectors and - * unlock the inode. - * - * When reading existing inodes from disk this is called directly - * from xfs_iget, when creating a new inode it is called from - * xfs_ialloc after setting up the inode. + * Initialize the Linux inode and set up the operation vectors. * - * We are always called with an uninitialised linux inode here. - * We need to initialise the necessary fields and take a reference - * on it. + * When reading existing inodes from disk this is called directly from xfs_iget, + * when creating a new inode it is called from xfs_ialloc after setting up the + * inode. These callers have different criteria for clearing XFS_INEW, so leave + * it up to the caller to deal with unlocking the inode appropriately. */ void xfs_setup_inode( @@ -1332,9 +1302,4 @@ xfs_setup_inode( inode_has_no_xattr(inode); cache_no_acl(inode); } - - xfs_iflags_clear(ip, XFS_INEW); - barrier(); - - unlock_new_inode(inode); } diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index ea7a98e9cb70..a0f84abb0d09 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -25,8 +25,6 @@ extern const struct file_operations xfs_dir_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -extern void xfs_setup_inode(struct xfs_inode *); - /* * Internal setattr interfaces. */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 82e314258f73..80429891dc9b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -229,7 +229,7 @@ xfs_bulkstat_grab_ichunk( error = xfs_inobt_get_rec(cur, irec, &stat); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(stat == 1); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1); /* Check if the record contains the inode in request */ if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 4b33ef112400..365dd57ea760 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -300,8 +300,10 @@ xfs_fs_commit_blocks( tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); - if (error) + if (error) { + xfs_trans_cancel(tp, 0); goto out_drop_iolock; + } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 53cc2aaf8d2b..5538468c7f63 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -719,6 +719,7 @@ xfs_qm_qino_alloc( xfs_trans_t *tp; int error; int committed; + bool need_alloc = true; *ip = NULL; /* @@ -747,6 +748,7 @@ xfs_qm_qino_alloc( return error; mp->m_sb.sb_gquotino = NULLFSINO; mp->m_sb.sb_pquotino = NULLFSINO; + need_alloc = false; } } @@ -758,7 +760,7 @@ xfs_qm_qino_alloc( return error; } - if (!*ip) { + if (need_alloc) { error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); if (error) { @@ -794,11 +796,14 @@ xfs_qm_qino_alloc( spin_unlock(&mp->m_sb_lock); xfs_log_sb(tp); - if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_alert(mp, "%s failed (error %d)!", __func__, error); - return error; } - return 0; + if (need_alloc) + xfs_finish_inode_setup(*ip); + return error; } @@ -836,6 +841,11 @@ xfs_qm_reset_dqcounts( */ xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, "xfs_quotacheck"); + /* + * Reset type in case we are reusing group quota file for + * project quotas or vice versa + */ + ddq->d_flags = type; ddq->d_bcount = 0; ddq->d_icount = 0; ddq->d_rtbcount = 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 194291381252..8782b36517b5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -109,8 +109,6 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ -#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ #define MNTOPT_DISCARD "discard" /* Discard unused blocks */ #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ @@ -361,28 +359,10 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); mp->m_qflags &= ~XFS_GQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { - xfs_warn(mp, - "delaylog is the default now, option is deprecated."); - } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { - xfs_warn(mp, - "nodelaylog support has been removed, option is deprecated."); } else if (!strcmp(this_char, MNTOPT_DISCARD)) { mp->m_flags |= XFS_MOUNT_DISCARD; } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { mp->m_flags &= ~XFS_MOUNT_DISCARD; - } else if (!strcmp(this_char, "ihashsize")) { - xfs_warn(mp, - "ihashsize no longer used, option is deprecated."); - } else if (!strcmp(this_char, "osyncisdsync")) { - xfs_warn(mp, - "osyncisdsync has no effect, option is deprecated."); - } else if (!strcmp(this_char, "osyncisosync")) { - xfs_warn(mp, - "osyncisosync has no effect, option is deprecated."); - } else if (!strcmp(this_char, "irixsgid")) { - xfs_warn(mp, - "irixsgid is now a sysctl(2) variable, option is deprecated."); } else { xfs_warn(mp, "unknown mount option [%s].", this_char); return -EINVAL; @@ -986,6 +966,8 @@ xfs_fs_inode_init_once( atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); + mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", ip->i_ino); } @@ -1564,6 +1546,7 @@ xfs_fs_put_super( { struct xfs_mount *mp = XFS_M(sb); + xfs_notice(mp, "Unmounting Filesystem"); xfs_filestream_unmount(mp); xfs_unmountfs(mp); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 25791df6f638..3df411eadb86 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -177,7 +177,7 @@ xfs_symlink( int pathlen; struct xfs_bmap_free free_list; xfs_fsblock_t first_block; - bool unlock_dp_on_error = false; + bool unlock_dp_on_error = false; uint cancel_flags; int committed; xfs_fileoff_t first_fsb; @@ -221,7 +221,7 @@ xfs_symlink( XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) - goto std_return; + return error; tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; @@ -241,7 +241,7 @@ xfs_symlink( } if (error) { cancel_flags = 0; - goto error_return; + goto out_trans_cancel; } xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); @@ -252,7 +252,7 @@ xfs_symlink( */ if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { error = -EPERM; - goto error_return; + goto out_trans_cancel; } /* @@ -261,7 +261,7 @@ xfs_symlink( error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, pdqp, resblks, 1, 0); if (error) - goto error_return; + goto out_trans_cancel; /* * Check for ability to enter directory entry, if no space reserved. @@ -269,7 +269,7 @@ xfs_symlink( if (!resblks) { error = xfs_dir_canenter(tp, dp, link_name); if (error) - goto error_return; + goto out_trans_cancel; } /* * Initialize the bmap freelist prior to calling either @@ -282,15 +282,14 @@ xfs_symlink( */ error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, resblks > 0, &ip, NULL); - if (error) { - if (error == -ENOSPC) - goto error_return; - goto error1; - } + if (error) + goto out_trans_cancel; /* - * An error after we've joined dp to the transaction will result in the - * transaction cancel unlocking dp so don't do it explicitly in the + * Now we join the directory inode to the transaction. We do not do it + * earlier because xfs_dir_ialloc might commit the previous transaction + * (and release all the locks). An error from here on will result in + * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); @@ -330,7 +329,7 @@ xfs_symlink( XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, &free_list); if (error) - goto error2; + goto out_bmap_cancel; if (resblks) resblks -= fs_blocks; @@ -348,7 +347,7 @@ xfs_symlink( BTOBB(byte_cnt), 0); if (!bp) { error = -ENOMEM; - goto error2; + goto out_bmap_cancel; } bp->b_ops = &xfs_symlink_buf_ops; @@ -378,7 +377,7 @@ xfs_symlink( error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, &first_block, &free_list, resblks); if (error) - goto error2; + goto out_bmap_cancel; xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); @@ -392,10 +391,13 @@ xfs_symlink( } error = xfs_bmap_finish(&tp, &free_list, &committed); - if (error) { - goto error2; - } + if (error) + goto out_bmap_cancel; + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) + goto out_release_inode; + xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); @@ -403,20 +405,28 @@ xfs_symlink( *ipp = ip; return 0; - error2: - IRELE(ip); - error1: +out_bmap_cancel: xfs_bmap_cancel(&free_list); cancel_flags |= XFS_TRANS_ABORT; - error_return: +out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); +out_release_inode: + /* + * Wait until after the current transaction is aborted to finish the + * setup of the inode and release the inode. This prevents recursive + * transactions and deadlocks from xfs_inactive. + */ + if (ip) { + xfs_finish_inode_setup(ip); + IRELE(ip); + } + xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); - std_return: return error; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index b5ac81eeb061..a302738d224d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -685,6 +685,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); +DEFINE_INODE_EVENT(xfs_filemap_fault); +DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); + DECLARE_EVENT_CLASS(xfs_iref_class, TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), TP_ARGS(ip, caller_ip), |