summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c3
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c53
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_inode_item.c61
-rw-r--r--fs/xfs/xfs_trans_ail.c8
5 files changed, 105 insertions, 24 deletions
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 5c93e8e6de74..b4a6c091571e 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -176,7 +176,8 @@ xfs_imap_to_bp(
}
*bpp = bp;
- *dipp = xfs_buf_offset(bp, imap->im_boffset);
+ if (dipp)
+ *dipp = xfs_buf_offset(bp, imap->im_boffset);
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index c66d9d1dd58b..ad5974365c58 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -8,6 +8,8 @@
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
@@ -72,13 +74,19 @@ xfs_trans_ichgtime(
}
/*
- * This is called to mark the fields indicated in fieldmask as needing
- * to be logged when the transaction is committed. The inode must
- * already be associated with the given transaction.
+ * This is called to mark the fields indicated in fieldmask as needing to be
+ * logged when the transaction is committed. The inode must already be
+ * associated with the given transaction.
*
- * The values for fieldmask are defined in xfs_inode_item.h. We always
- * log all of the core inode if any of it has changed, and we always log
- * all of the inline data/extents/b-tree root if any of them has changed.
+ * The values for fieldmask are defined in xfs_inode_item.h. We always log all
+ * of the core inode if any of it has changed, and we always log all of the
+ * inline data/extents/b-tree root if any of them has changed.
+ *
+ * Grab and pin the cluster buffer associated with this inode to avoid RMW
+ * cycles at inode writeback time. Avoid the need to add error handling to every
+ * xfs_trans_log_inode() call by shutting down on read error. This will cause
+ * transactions to fail and everything to error out, just like if we return a
+ * read error in a dirty transaction and cancel it.
*/
void
xfs_trans_log_inode(
@@ -131,6 +139,39 @@ xfs_trans_log_inode(
spin_lock(&iip->ili_lock);
iip->ili_fsync_fields |= flags;
+ if (!iip->ili_item.li_buf) {
+ struct xfs_buf *bp;
+ int error;
+
+ /*
+ * We hold the ILOCK here, so this inode is not going to be
+ * flushed while we are here. Further, because there is no
+ * buffer attached to the item, we know that there is no IO in
+ * progress, so nothing will clear the ili_fields while we read
+ * in the buffer. Hence we can safely drop the spin lock and
+ * read the buffer knowing that the state will not change from
+ * here.
+ */
+ spin_unlock(&iip->ili_lock);
+ error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, NULL,
+ &bp, 0);
+ if (error) {
+ xfs_force_shutdown(ip->i_mount, SHUTDOWN_META_IO_ERROR);
+ return;
+ }
+
+ /*
+ * We need an explicit buffer reference for the log item but
+ * don't want the buffer to remain attached to the transaction.
+ * Hold the buffer but release the transaction reference.
+ */
+ xfs_buf_hold(bp);
+ xfs_trans_brelse(tp, bp);
+
+ spin_lock(&iip->ili_lock);
+ iip->ili_item.li_buf = bp;
+ }
+
/*
* Always OR in the bits from the ili_last_fields field. This is to
* coordinate with the xfs_iflush() and xfs_iflush_done() routines in
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index d61f20b989cd..ecb3362395af 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1143,11 +1143,9 @@ xfs_buf_inode_iodone(
if (ret == XBF_IOERROR_DONE)
return;
ASSERT(ret == XBF_IOERROR_FAIL);
- spin_lock(&bp->b_mount->m_ail->ail_lock);
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
- xfs_set_li_failed(lip, bp);
+ set_bit(XFS_LI_FAILED, &lip->li_flags);
}
- spin_unlock(&bp->b_mount->m_ail->ail_lock);
xfs_buf_ioerror(bp, 0);
xfs_buf_relse(bp);
return;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 0ba75764a8dc..64bdda72f7b2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -439,6 +439,7 @@ xfs_inode_item_pin(
struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ ASSERT(lip->li_buf);
trace_xfs_inode_pin(ip, _RET_IP_);
atomic_inc(&ip->i_pincount);
@@ -450,6 +451,12 @@ xfs_inode_item_pin(
* item which was previously pinned with a call to xfs_inode_item_pin().
*
* Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
+ *
+ * Note that unpin can race with inode cluster buffer freeing marking the buffer
+ * stale. In that case, flush completions are run from the buffer unpin call,
+ * which may happen before the inode is unpinned. If we lose the race, there
+ * will be no buffer attached to the log item, but the inode will be marked
+ * XFS_ISTALE.
*/
STATIC void
xfs_inode_item_unpin(
@@ -459,6 +466,7 @@ xfs_inode_item_unpin(
struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
trace_xfs_inode_unpin(ip, _RET_IP_);
+ ASSERT(lip->li_buf || xfs_iflags_test(ip, XFS_ISTALE));
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount))
wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
@@ -629,10 +637,15 @@ xfs_inode_item_init(
*/
void
xfs_inode_item_destroy(
- xfs_inode_t *ip)
+ struct xfs_inode *ip)
{
- kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
- kmem_cache_free(xfs_ili_zone, ip->i_itemp);
+ struct xfs_inode_log_item *iip = ip->i_itemp;
+
+ ASSERT(iip->ili_item.li_buf == NULL);
+
+ ip->i_itemp = NULL;
+ kmem_free(iip->ili_item.li_lv_shadow);
+ kmem_cache_free(xfs_ili_zone, iip);
}
@@ -673,11 +686,10 @@ xfs_iflush_done(
list_move_tail(&lip->li_bio_list, &tmp);
/* Do an unlocked check for needing the AIL lock. */
- if (lip->li_lsn == iip->ili_flush_lsn ||
+ if (iip->ili_flush_lsn == lip->li_lsn ||
test_bit(XFS_LI_FAILED, &lip->li_flags))
need_ail++;
}
- ASSERT(list_empty(&bp->b_li_list));
/*
* We only want to pull the item from the AIL if it is actually there
@@ -690,7 +702,7 @@ xfs_iflush_done(
/* this is an opencoded batch version of xfs_trans_ail_delete */
spin_lock(&ailp->ail_lock);
list_for_each_entry(lip, &tmp, li_bio_list) {
- xfs_clear_li_failed(lip);
+ clear_bit(XFS_LI_FAILED, &lip->li_flags);
if (lip->li_lsn == INODE_ITEM(lip)->ili_flush_lsn) {
xfs_lsn_t lsn = xfs_ail_delete_one(ailp, lip);
if (!tail_lsn && lsn)
@@ -706,14 +718,29 @@ xfs_iflush_done(
* them is safely on disk.
*/
list_for_each_entry_safe(lip, n, &tmp, li_bio_list) {
+ bool drop_buffer = false;
+
list_del_init(&lip->li_bio_list);
iip = INODE_ITEM(lip);
spin_lock(&iip->ili_lock);
+
+ /*
+ * Remove the reference to the cluster buffer if the inode is
+ * clean in memory. Drop the buffer reference once we've dropped
+ * the locks we hold.
+ */
+ ASSERT(iip->ili_item.li_buf == bp);
+ if (!iip->ili_fields) {
+ iip->ili_item.li_buf = NULL;
+ drop_buffer = true;
+ }
iip->ili_last_fields = 0;
+ iip->ili_flush_lsn = 0;
spin_unlock(&iip->ili_lock);
-
xfs_ifunlock(iip->ili_inode);
+ if (drop_buffer)
+ xfs_buf_rele(bp);
}
}
@@ -725,12 +752,20 @@ xfs_iflush_done(
*/
void
xfs_iflush_abort(
- struct xfs_inode *ip)
+ struct xfs_inode *ip)
{
- struct xfs_inode_log_item *iip = ip->i_itemp;
+ struct xfs_inode_log_item *iip = ip->i_itemp;
+ struct xfs_buf *bp = NULL;
if (iip) {
+ /*
+ * Clear the failed bit before removing the item from the AIL so
+ * xfs_trans_ail_delete() doesn't try to clear and release the
+ * buffer attached to the log item before we are done with it.
+ */
+ clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags);
xfs_trans_ail_delete(&iip->ili_item, 0);
+
/*
* Clear the inode logging fields so no more flushes are
* attempted.
@@ -739,12 +774,14 @@ xfs_iflush_abort(
iip->ili_last_fields = 0;
iip->ili_fields = 0;
iip->ili_fsync_fields = 0;
+ iip->ili_flush_lsn = 0;
+ bp = iip->ili_item.li_buf;
+ iip->ili_item.li_buf = NULL;
spin_unlock(&iip->ili_lock);
}
- /*
- * Release the inode's flush lock since we're done with it.
- */
xfs_ifunlock(ip);
+ if (bp)
+ xfs_buf_rele(bp);
}
/*
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index ac33f6393f99..c3be6e440134 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -377,8 +377,12 @@ xfsaild_resubmit_item(
}
/* protected by ail_lock */
- list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
- xfs_clear_li_failed(lip);
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
+ if (bp->b_flags & _XBF_INODES)
+ clear_bit(XFS_LI_FAILED, &lip->li_flags);
+ else
+ xfs_clear_li_failed(lip);
+ }
xfs_buf_unlock(bp);
return XFS_ITEM_SUCCESS;