summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2014-08-04 13:55:27 +1000
committerDave Chinner <david@fromorbit.com>2014-08-04 13:55:27 +1000
commit645f9857213476407d8ed1b59619fdff7128d3e6 (patch)
treef48e76ffa8b4af8bfa0c64d54ff96d76b61f6f65 /fs
parentb076d8720d793cde04b75b4941b8774e209649b4 (diff)
parent4ef897a27543b513351262881660147366c042a1 (diff)
downloadlwn-645f9857213476407d8ed1b59619fdff7128d3e6.tar.gz
lwn-645f9857213476407d8ed1b59619fdff7128d3e6.zip
Merge branch 'xfs-misc-fixes-3.17-2' into for-next
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/libxfs/xfs_sb.c24
-rw-r--r--fs/xfs/xfs_bmap_util.c100
-rw-r--r--fs/xfs/xfs_buf.c14
-rw-r--r--fs/xfs/xfs_dquot.c3
-rw-r--r--fs/xfs/xfs_file.c10
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_ioctl.c6
-rw-r--r--fs/xfs/xfs_ioctl32.c3
-rw-r--r--fs/xfs/xfs_iops.c4
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log.c8
-rw-r--r--fs/xfs/xfs_log_recover.c95
-rw-r--r--fs/xfs/xfs_mount.c1
-rw-r--r--fs/xfs/xfs_qm.c8
-rw-r--r--fs/xfs/xfs_vnode.h46
16 files changed, 175 insertions, 161 deletions
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 6e93b5ef0a6b..ad525a5623a4 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -386,10 +386,11 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp)
}
}
-void
-xfs_sb_from_disk(
+static void
+__xfs_sb_from_disk(
struct xfs_sb *to,
- xfs_dsb_t *from)
+ xfs_dsb_t *from,
+ bool convert_xquota)
{
to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
@@ -445,6 +446,17 @@ xfs_sb_from_disk(
to->sb_pad = 0;
to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
to->sb_lsn = be64_to_cpu(from->sb_lsn);
+ /* Convert on-disk flags to in-memory flags? */
+ if (convert_xquota)
+ xfs_sb_quota_from_disk(to);
+}
+
+void
+xfs_sb_from_disk(
+ struct xfs_sb *to,
+ xfs_dsb_t *from)
+{
+ __xfs_sb_from_disk(to, from, true);
}
static inline void
@@ -577,7 +589,11 @@ xfs_sb_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_sb sb;
- xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
+ /*
+ * Use call variant which doesn't convert quota flags from disk
+ * format, because xfs_mount_validate_sb checks the on-disk flags.
+ */
+ __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false);
/*
* Only check the in progress field for the primary superblock as
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d32889af1777..2f1e30d39a35 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -809,7 +809,7 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
* have speculative prealloc/delalloc blocks to remove.
*/
if (VFS_I(ip)->i_size == 0 &&
- VN_CACHED(VFS_I(ip)) == 0 &&
+ VFS_I(ip)->i_mapping->nrpages == 0 &&
ip->i_delayed_blks == 0)
return false;
@@ -1619,6 +1619,30 @@ xfs_swap_extents_check_format(
}
int
+xfs_swap_extent_flush(
+ struct xfs_inode *ip)
+{
+ int error;
+
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ if (error)
+ return error;
+ truncate_pagecache_range(VFS_I(ip), 0, -1);
+
+ /* Verify O_DIRECT for ftmp */
+ if (VFS_I(ip)->i_mapping->nrpages)
+ return -EINVAL;
+
+ /*
+ * Don't try to swap extents on mmap()d files because we can't lock
+ * out races against page faults safely.
+ */
+ if (mapping_mapped(VFS_I(ip)->i_mapping))
+ return -EBUSY;
+ return 0;
+}
+
+int
xfs_swap_extents(
xfs_inode_t *ip, /* target inode */
xfs_inode_t *tip, /* tmp inode */
@@ -1633,6 +1657,7 @@ xfs_swap_extents(
int aforkblks = 0;
int taforkblks = 0;
__uint64_t tmp;
+ int lock_flags;
tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
if (!tempifp) {
@@ -1641,13 +1666,13 @@ xfs_swap_extents(
}
/*
- * we have to do two separate lock calls here to keep lockdep
- * happy. If we try to get all the locks in one call, lock will
- * report false positives when we drop the ILOCK and regain them
- * below.
+ * Lock up the inodes against other IO and truncate to begin with.
+ * Then we can ensure the inodes are flushed and have no page cache
+ * safely. Once we have done this we can take the ilocks and do the rest
+ * of the checks.
*/
+ lock_flags = XFS_IOLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
/* Verify that both files have the same format */
if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1661,23 +1686,28 @@ xfs_swap_extents(
goto out_unlock;
}
- error = filemap_write_and_wait(VFS_I(tip)->i_mapping);
+ error = xfs_swap_extent_flush(ip);
+ if (error)
+ goto out_unlock;
+ error = xfs_swap_extent_flush(tip);
if (error)
goto out_unlock;
- truncate_pagecache_range(VFS_I(tip), 0, -1);
- /* Verify O_DIRECT for ftmp */
- if (VN_CACHED(VFS_I(tip)) != 0) {
- error = -EINVAL;
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
goto out_unlock;
}
+ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
+ lock_flags |= XFS_ILOCK_EXCL;
/* Verify all data are being swapped */
if (sxp->sx_offset != 0 ||
sxp->sx_length != ip->i_d.di_size ||
sxp->sx_length != tip->i_d.di_size) {
error = -EFAULT;
- goto out_unlock;
+ goto out_trans_cancel;
}
trace_xfs_swap_extent_before(ip, 0);
@@ -1689,7 +1719,7 @@ xfs_swap_extents(
xfs_notice(mp,
"%s: inode 0x%llx format is incompatible for exchanging.",
__func__, ip->i_ino);
- goto out_unlock;
+ goto out_trans_cancel;
}
/*
@@ -1704,42 +1734,8 @@ xfs_swap_extents(
(sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
(sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
error = -EBUSY;
- goto out_unlock;
- }
-
- /* We need to fail if the file is memory mapped. Once we have tossed
- * all existing pages, the page fault will have no option
- * but to go to the filesystem for pages. By making the page fault call
- * vop_read (or write in the case of autogrow) they block on the iolock
- * until we have switched the extents.
- */
- if (VN_MAPPED(VFS_I(ip))) {
- error = -EBUSY;
- goto out_unlock;
- }
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_iunlock(tip, XFS_ILOCK_EXCL);
-
- /*
- * There is a race condition here since we gave up the
- * ilock. However, the data fork will not change since
- * we have the iolock (locked for truncation too) so we
- * are safe. We don't really care if non-io related
- * fields change.
- */
- truncate_pagecache_range(VFS_I(ip), 0, -1);
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- xfs_iunlock(tip, XFS_IOLOCK_EXCL);
- xfs_trans_cancel(tp, 0);
- goto out;
+ goto out_trans_cancel;
}
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
-
/*
* Count the number of extended attribute blocks
*/
@@ -1757,8 +1753,8 @@ xfs_swap_extents(
goto out_trans_cancel;
}
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, lock_flags);
+ xfs_trans_ijoin(tp, tip, lock_flags);
/*
* Before we've swapped the forks, lets set the owners of the forks
@@ -1887,8 +1883,8 @@ out:
return error;
out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_iunlock(ip, lock_flags);
+ xfs_iunlock(tip, lock_flags);
goto out;
out_trans_cancel:
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a6dc83e70ece..cd7b8ca9b064 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1330,6 +1330,20 @@ _xfs_buf_ioapply(
SHUTDOWN_CORRUPT_INCORE);
return;
}
+ } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ /*
+ * non-crc filesystems don't attach verifiers during
+ * log recovery, so don't warn for such filesystems.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_warn(mp,
+ "%s: no ops on block 0x%llx/0x%x",
+ __func__, bp->b_bn, bp->b_length);
+ xfs_hex_dump(bp->b_addr, 64);
+ dump_stack();
+ }
}
} else if (bp->b_flags & XBF_READ_AHEAD) {
rw = READA;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 8a44a79f49af..63c2de49f61d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -974,7 +974,8 @@ xfs_qm_dqflush(
* Get the buffer containing the on-disk dquot
*/
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL);
+ mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+ &xfs_dquot_buf_ops);
if (error)
goto out_unlock;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index fcf91a22f5d8..076b1708d134 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -247,11 +247,11 @@ xfs_file_read_iter(
XFS_STATS_INC(xs_read_calls);
if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
+ ioflags |= XFS_IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
- if (unlikely(ioflags & IO_ISDIRECT)) {
+ if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
@@ -284,7 +284,7 @@ xfs_file_read_iter(
* proceeed concurrently without serialisation.
*/
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
+ if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
@@ -326,7 +326,7 @@ xfs_file_splice_read(
XFS_STATS_INC(xs_read_calls);
if (infilp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1a5e068bc420..fea3c92fb3f0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1635,7 +1635,7 @@ xfs_release(
truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
if (truncated) {
xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
- if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) {
+ if (ip->i_delayed_blks > 0) {
error = filemap_flush(VFS_I(ip)->i_mapping);
if (error)
return error;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f72bffa67266..c10e3fadd9af 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -398,4 +398,14 @@ do { \
extern struct kmem_zone *xfs_inode_zone;
+/*
+ * Flags for read/write calls
+ */
+#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */
+#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */
+
+#define XFS_IO_FLAGS \
+ { XFS_IO_ISDIRECT, "DIRECT" }, \
+ { XFS_IO_INVIS, "INVIS"}
+
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 494237ed4a65..3799695b9249 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -736,7 +736,7 @@ xfs_ioc_space(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- if (!(ioflags & IO_INVIS)) {
+ if (!(ioflags & XFS_IO_INVIS)) {
ip->i_d.di_mode &= ~S_ISUID;
if (ip->i_d.di_mode & S_IXGRP)
ip->i_d.di_mode &= ~S_ISGID;
@@ -1376,7 +1376,7 @@ xfs_ioc_getbmap(
return -EINVAL;
bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
- if (ioflags & IO_INVIS)
+ if (ioflags & XFS_IO_INVIS)
bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
@@ -1520,7 +1520,7 @@ xfs_file_ioctl(
int error;
if (filp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
trace_xfs_file_ioctl(ip);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index cf63418bf05f..a554646ff141 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -28,7 +28,6 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_vnode.h"
#include "xfs_inode.h"
#include "xfs_itable.h"
#include "xfs_error.h"
@@ -537,7 +536,7 @@ xfs_file_compat_ioctl(
int error;
if (filp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
trace_xfs_file_compat_ioctl(ip);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d75621ae3e36..72129493e9d3 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1055,12 +1055,12 @@ xfs_vn_fiemap(
return error;
/* Set up bmap header for xfs internal routine */
- bm.bmv_offset = BTOBB(start);
+ bm.bmv_offset = BTOBBT(start);
/* Special case for whole file */
if (length == FIEMAP_MAX_OFFSET)
bm.bmv_length = -1LL;
else
- bm.bmv_length = BTOBB(length);
+ bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
/* We add one because in getbmap world count includes the header */
bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index d3ef6de475bb..d10dc8f397c9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -101,7 +101,7 @@ typedef __uint64_t __psunsigned_t;
#include <asm/byteorder.h>
#include <asm/unaligned.h>
-#include "xfs_vnode.h"
+#include "xfs_fs.h"
#include "xfs_stats.h"
#include "xfs_sysctl.h"
#include "xfs_iops.h"
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 149a4a575a09..ca4fd5bd8522 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1378,8 +1378,14 @@ xlog_alloc_log(
xlog_get_iclog_buffer_size(mp, log);
+ /*
+ * Use a NULL block for the extra log buffer used during splits so that
+ * it will trigger errors if we ever try to do IO on it without first
+ * having set it up properly.
+ */
error = -ENOMEM;
- bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
+ bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
+ BTOBB(log->l_iclog_size), 0);
if (!bp)
goto out_free_log;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fbc2362d13e3..1fd5787add99 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2126,6 +2126,17 @@ xlog_recover_validate_buf_type(
__uint16_t magic16;
__uint16_t magicda;
+ /*
+ * We can only do post recovery validation on items on CRC enabled
+ * fielsystems as we need to know when the buffer was written to be able
+ * to determine if we should have replayed the item. If we replay old
+ * metadata over a newer buffer, then it will enter a temporarily
+ * inconsistent state resulting in verification failures. Hence for now
+ * just avoid the verification stage for non-crc filesystems
+ */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
magicda = be16_to_cpu(info->magic);
@@ -2163,8 +2174,6 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_agf_buf_ops;
break;
case XFS_BLFT_AGFL_BUF:
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- break;
if (magic32 != XFS_AGFL_MAGIC) {
xfs_warn(mp, "Bad AGFL block magic!");
ASSERT(0);
@@ -2197,10 +2206,6 @@ xlog_recover_validate_buf_type(
#endif
break;
case XFS_BLFT_DINO_BUF:
- /*
- * we get here with inode allocation buffers, not buffers that
- * track unlinked list changes.
- */
if (magic16 != XFS_DINODE_MAGIC) {
xfs_warn(mp, "Bad INODE block magic!");
ASSERT(0);
@@ -2280,8 +2285,6 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_attr3_leaf_buf_ops;
break;
case XFS_BLFT_ATTR_RMT_BUF:
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- break;
if (magic32 != XFS_ATTR3_RMT_MAGIC) {
xfs_warn(mp, "Bad attr remote magic!");
ASSERT(0);
@@ -2388,16 +2391,7 @@ xlog_recover_do_reg_buffer(
/* Shouldn't be any more regions */
ASSERT(i == item->ri_total);
- /*
- * We can only do post recovery validation on items on CRC enabled
- * fielsystems as we need to know when the buffer was written to be able
- * to determine if we should have replayed the item. If we replay old
- * metadata over a newer buffer, then it will enter a temporarily
- * inconsistent state resulting in verification failures. Hence for now
- * just avoid the verification stage for non-crc filesystems
- */
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xlog_recover_validate_buf_type(mp, bp, buf_f);
+ xlog_recover_validate_buf_type(mp, bp, buf_f);
}
/*
@@ -2405,8 +2399,11 @@ xlog_recover_do_reg_buffer(
* Simple algorithm: if we have found a QUOTAOFF log item of the same type
* (ie. USR or GRP), then just toss this buffer away; don't recover it.
* Else, treat it as a regular buffer and do recovery.
+ *
+ * Return false if the buffer was tossed and true if we recovered the buffer to
+ * indicate to the caller if the buffer needs writing.
*/
-STATIC void
+STATIC bool
xlog_recover_do_dquot_buffer(
struct xfs_mount *mp,
struct xlog *log,
@@ -2421,9 +2418,8 @@ xlog_recover_do_dquot_buffer(
/*
* Filesystems are required to send in quota flags at mount time.
*/
- if (mp->m_qflags == 0) {
- return;
- }
+ if (!mp->m_qflags)
+ return false;
type = 0;
if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
@@ -2436,9 +2432,10 @@ xlog_recover_do_dquot_buffer(
* This type of quotas was turned off, so ignore this buffer
*/
if (log->l_quotaoffs_flag & type)
- return;
+ return false;
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
+ return true;
}
/*
@@ -2505,23 +2502,44 @@ xlog_recover_buffer_pass2(
}
/*
- * recover the buffer only if we get an LSN from it and it's less than
+ * Recover the buffer only if we get an LSN from it and it's less than
* the lsn of the transaction we are replaying.
+ *
+ * Note that we have to be extremely careful of readahead here.
+ * Readahead does not attach verfiers to the buffers so if we don't
+ * actually do any replay after readahead because of the LSN we found
+ * in the buffer if more recent than that current transaction then we
+ * need to attach the verifier directly. Failure to do so can lead to
+ * future recovery actions (e.g. EFI and unlinked list recovery) can
+ * operate on the buffers and they won't get the verifier attached. This
+ * can lead to blocks on disk having the correct content but a stale
+ * CRC.
+ *
+ * It is safe to assume these clean buffers are currently up to date.
+ * If the buffer is dirtied by a later transaction being replayed, then
+ * the verifier will be reset to match whatever recover turns that
+ * buffer into.
*/
lsn = xlog_recover_get_buf_lsn(mp, bp);
- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0)
+ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+ xlog_recover_validate_buf_type(mp, bp, buf_f);
goto out_release;
+ }
if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
+ if (error)
+ goto out_release;
} else if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
- xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ bool dirty;
+
+ dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ if (!dirty)
+ goto out_release;
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
}
- if (error)
- goto out_release;
/*
* Perform delayed write on the buffer. Asynchronous writes will be
@@ -3011,9 +3029,16 @@ xlog_recover_dquot_pass2(
return -EIO;
ASSERT(dq_f->qlf_len == 1);
+ /*
+ * At this point we are assuming that the dquots have been allocated
+ * and hence the buffer has valid dquots stamped in it. It should,
+ * therefore, pass verifier validation. If the dquot is bad, then the
+ * we'll return an error here, so we don't need to specifically check
+ * the dquot in the buffer after the verifier has run.
+ */
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
- NULL);
+ &xfs_dquot_buf_ops);
if (error)
return error;
@@ -3021,18 +3046,6 @@ xlog_recover_dquot_pass2(
ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
/*
- * At least the magic num portion should be on disk because this
- * was among a chunk of dquots created earlier, and we did some
- * minimal initialization then.
- */
- error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
- "xlog_recover_dquot_pass2");
- if (error) {
- xfs_buf_relse(bp);
- return -EIO;
- }
-
- /*
* If the dquot has an LSN in it, recover the dquot only if it's less
* than the lsn of the transaction we are replaying.
*/
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 5b639df0413e..fbf0384a466f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -323,7 +323,6 @@ reread:
* Initialize the mount structure from the superblock.
*/
xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
- xfs_sb_quota_from_disk(sbp);
/*
* If we haven't validated the superblock, do so now before we try
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 7e1a80b45f87..10232102b4a6 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -911,6 +911,12 @@ xfs_qm_dqiter_bufs(
if (error)
break;
+ /*
+ * A corrupt buffer might not have a verifier attached, so
+ * make sure we have the correct one attached before writeback
+ * occurs.
+ */
+ bp->b_ops = &xfs_dquot_buf_ops;
xfs_qm_reset_dqcounts(mp, bp, firstid, type);
xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
@@ -996,7 +1002,7 @@ xfs_qm_dqiterate(
xfs_buf_readahead(mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, rablkno),
mp->m_quotainfo->qi_dqchunklen,
- NULL);
+ &xfs_dquot_buf_ops);
rablkno++;
}
}
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
deleted file mode 100644
index e8a77383c0d5..000000000000
--- a/fs/xfs/xfs_vnode.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-#include "xfs_fs.h"
-
-struct file;
-struct xfs_inode;
-struct attrlist_cursor_kern;
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISDIRECT 0x00004 /* bypass page cache */
-#define IO_INVIS 0x00020 /* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
- { IO_ISDIRECT, "DIRECT" }, \
- { IO_INVIS, "INVIS"}
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
-#define VN_CACHED(vp) (vp->i_mapping->nrpages)
-#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \
- PAGECACHE_TAG_DIRTY)
-
-
-#endif /* __XFS_VNODE_H__ */