summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_inode.c171
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c8
3 files changed, 83 insertions, 98 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 31e105f95739..ece3622f6d28 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3603,141 +3603,120 @@ flush_out:
* locked. The function will walk across all the inodes on the cluster buffer it
* can find and lock without blocking, and flush them to the cluster buffer.
*
- * On success, the caller must write out the buffer returned in *bp and
- * release it. On failure, the filesystem will be shut down, the buffer will
- * have been unlocked and released, and EFSCORRUPTED will be returned.
+ * On successful flushing of at least one inode, the caller must write out the
+ * buffer and release it. If no inodes are flushed, -EAGAIN will be returned and
+ * the caller needs to release the buffer. On failure, the filesystem will be
+ * shut down, the buffer will have been unlocked and released, and EFSCORRUPTED
+ * will be returned.
*/
int
xfs_iflush_cluster(
- struct xfs_inode *ip,
struct xfs_buf *bp)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_perag *pag;
- unsigned long first_index, mask;
- int cilist_size;
- struct xfs_inode **cilist;
- struct xfs_inode *cip;
- struct xfs_ino_geometry *igeo = M_IGEO(mp);
- int error = 0;
- int nr_found;
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_log_item *lip, *n;
+ struct xfs_inode *ip;
+ struct xfs_inode_log_item *iip;
int clcount = 0;
- int i;
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-
- cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *);
- cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
- if (!cilist)
- goto out_put;
-
- mask = ~(igeo->inodes_per_cluster - 1);
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
- rcu_read_lock();
- /* really need a gang lookup range call here */
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
- first_index, igeo->inodes_per_cluster);
- if (nr_found == 0)
- goto out_free;
+ int error = 0;
- for (i = 0; i < nr_found; i++) {
- cip = cilist[i];
+ /*
+ * We must use the safe variant here as on shutdown xfs_iflush_abort()
+ * can remove itself from the list.
+ */
+ list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
+ iip = (struct xfs_inode_log_item *)lip;
+ ip = iip->ili_inode;
/*
- * because this is an RCU protected lookup, we could find a
- * recently freed or even reallocated inode during the lookup.
- * We need to check under the i_flags_lock for a valid inode
- * here. Skip it if it is not valid or the wrong inode.
+ * Quick and dirty check to avoid locks if possible.
*/
- spin_lock(&cip->i_flags_lock);
- if (!cip->i_ino ||
- __xfs_iflags_test(cip, XFS_ISTALE)) {
- spin_unlock(&cip->i_flags_lock);
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK))
+ continue;
+ if (xfs_ipincount(ip))
continue;
- }
/*
- * Once we fall off the end of the cluster, no point checking
- * any more inodes in the list because they will also all be
- * outside the cluster.
+ * The inode is still attached to the buffer, which means it is
+ * dirty but reclaim might try to grab it. Check carefully for
+ * that, and grab the ilock while still holding the i_flags_lock
+ * to guarantee reclaim will not be able to reclaim this inode
+ * once we drop the i_flags_lock.
*/
- if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) {
- spin_unlock(&cip->i_flags_lock);
- break;
+ spin_lock(&ip->i_flags_lock);
+ ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) {
+ spin_unlock(&ip->i_flags_lock);
+ continue;
}
- spin_unlock(&cip->i_flags_lock);
/*
- * Do an un-protected check to see if the inode is dirty and
- * is a candidate for flushing. These checks will be repeated
- * later after the appropriate locks are acquired.
+ * ILOCK will pin the inode against reclaim and prevent
+ * concurrent transactions modifying the inode while we are
+ * flushing the inode.
*/
- if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+ spin_unlock(&ip->i_flags_lock);
continue;
+ }
+ spin_unlock(&ip->i_flags_lock);
/*
- * Try to get locks. If any are unavailable or it is pinned,
- * then this inode cannot be flushed and is skipped.
+ * Skip inodes that are already flush locked as they have
+ * already been written to the buffer.
*/
-
- if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED))
- continue;
- if (!xfs_iflock_nowait(cip)) {
- xfs_iunlock(cip, XFS_ILOCK_SHARED);
+ if (!xfs_iflock_nowait(ip)) {
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
continue;
}
- if (xfs_ipincount(cip)) {
- xfs_ifunlock(cip);
- xfs_iunlock(cip, XFS_ILOCK_SHARED);
- continue;
- }
-
/*
- * Check the inode number again, just to be certain we are not
- * racing with freeing in xfs_reclaim_inode(). See the comments
- * in that function for more information as to why the initial
- * check is not sufficient.
+ * Abort flushing this inode if we are shut down because the
+ * inode may not currently be in the AIL. This can occur when
+ * log I/O failure unpins the inode without inserting into the
+ * AIL, leaving a dirty/unpinned inode attached to the buffer
+ * that otherwise looks like it should be flushed.
*/
- if (!cip->i_ino) {
- xfs_ifunlock(cip);
- xfs_iunlock(cip, XFS_ILOCK_SHARED);
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ xfs_iunpin_wait(ip);
+ /* xfs_iflush_abort() drops the flush lock */
+ xfs_iflush_abort(ip);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ error = -EIO;
continue;
}
- /*
- * arriving here means that this inode can be flushed. First
- * re-check that it's dirty before flushing.
- */
- if (!xfs_inode_clean(cip)) {
- error = xfs_iflush(cip, bp);
- if (error) {
- xfs_iunlock(cip, XFS_ILOCK_SHARED);
- goto out_free;
- }
- clcount++;
- } else {
- xfs_ifunlock(cip);
+ /* don't block waiting on a log force to unpin dirty inodes */
+ if (xfs_ipincount(ip)) {
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ continue;
}
- xfs_iunlock(cip, XFS_ILOCK_SHARED);
- }
- if (clcount) {
- XFS_STATS_INC(mp, xs_icluster_flushcnt);
- XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
+ if (!xfs_inode_clean(ip))
+ error = xfs_iflush(ip, bp);
+ else
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ if (error)
+ break;
+ clcount++;
}
-out_free:
- rcu_read_unlock();
- kmem_free(cilist);
-out_put:
- xfs_perag_put(pag);
if (error) {
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioend_fail(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
}
- return error;
+
+ if (!clcount)
+ return -EAGAIN;
+
+ XFS_STATS_INC(mp, xs_icluster_flushcnt);
+ XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
+ return 0;
+
}
/* Release an inode. */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c482e7306fe0..e9a8bb184d1f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -426,7 +426,7 @@ int xfs_log_force_inode(struct xfs_inode *ip);
void xfs_iunpin_wait(xfs_inode_t *);
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
-int xfs_iflush_cluster(struct xfs_inode *, struct xfs_buf *);
+int xfs_iflush_cluster(struct xfs_buf *);
void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
struct xfs_inode *ip1, uint ip1_mode);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index e8eda2ac25fb..4e7fce8d4f7c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -507,12 +507,18 @@ xfs_inode_item_push(
* reference for IO until we queue the buffer for delwri submission.
*/
xfs_buf_hold(bp);
- error = xfs_iflush_cluster(ip, bp);
+ error = xfs_iflush_cluster(bp);
if (!error) {
if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING;
xfs_buf_relse(bp);
} else {
+ /*
+ * Release the buffer if we were unable to flush anything. On
+ * any other error, the buffer has already been released.
+ */
+ if (error == -EAGAIN)
+ xfs_buf_relse(bp);
rval = XFS_ITEM_LOCKED;
}