summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_acl.c4
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_attr_leaf.c64
-rw-r--r--fs/xfs/xfs_bmap.c20
-rw-r--r--fs/xfs/xfs_buf.c6
-rw-r--r--fs/xfs/xfs_buf.h3
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_dquot.c500
-rw-r--r--fs/xfs/xfs_dquot.h39
-rw-r--r--fs/xfs/xfs_dquot_item.c11
-rw-r--r--fs/xfs/xfs_export.c8
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_ialloc.h2
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_inode_item.c4
-rw-r--r--fs/xfs/xfs_ioctl.c8
-rw-r--r--fs/xfs/xfs_ioctl32.c8
-rw-r--r--fs/xfs/xfs_iops.c8
-rw-r--r--fs/xfs/xfs_log.c429
-rw-r--r--fs/xfs/xfs_log.h10
-rw-r--r--fs/xfs/xfs_log_cil.c96
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_qm.c467
-rw-r--r--fs/xfs/xfs_qm.h6
-rw-r--r--fs/xfs/xfs_quota.h12
-rw-r--r--fs/xfs/xfs_super.c70
-rw-r--r--fs/xfs/xfs_sync.c51
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.h14
-rw-r--r--fs/xfs/xfs_trans.c475
-rw-r--r--fs/xfs/xfs_trans.h9
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c18
-rw-r--r--fs/xfs/xfs_vnodeops.h4
38 files changed, 779 insertions, 1615 deletions
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b6c4b3795c4a..ac702a6eab9b 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -39,9 +39,11 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
struct posix_acl_entry *acl_e;
struct posix_acl *acl;
struct xfs_acl_entry *ace;
- int count, i;
+ unsigned int count, i;
count = be32_to_cpu(aclp->acl_cnt);
+ if (count > XFS_ACL_MAX_ENTRIES)
+ return ERR_PTR(-EFSCORRUPTED);
acl = posix_acl_alloc(count, GFP_KERNEL);
if (!acl)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 33b13310ee0c..574d4ee9b625 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -189,7 +189,7 @@ xfs_end_io(
int error = 0;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- error = -EIO;
+ ioend->io_error = -EIO;
goto done;
}
if (ioend->io_error)
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d4906e7c9787..c1b55e596551 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -110,6 +110,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
/*
* Query whether the requested number of additional bytes of extended
* attribute space will be able to fit inline.
+ *
* Returns zero if not, else the di_forkoff fork offset to be used in the
* literal area for attribute data once the new bytes have been added.
*
@@ -122,7 +123,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
int offset;
int minforkoff; /* lower limit on valid forkoff locations */
int maxforkoff; /* upper limit on valid forkoff locations */
- int dsize;
+ int dsize;
xfs_mount_t *mp = dp->i_mount;
offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
@@ -136,47 +137,60 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
return (offset >= minforkoff) ? minforkoff : 0;
}
- if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
- if (bytes <= XFS_IFORK_ASIZE(dp))
- return dp->i_d.di_forkoff;
+ /*
+ * If the requested numbers of bytes is smaller or equal to the
+ * current attribute fork size we can always proceed.
+ *
+ * Note that if_bytes in the data fork might actually be larger than
+ * the current data fork size is due to delalloc extents. In that
+ * case either the extent count will go down when they are converted
+ * to real extents, or the delalloc conversion will take care of the
+ * literal area rebalancing.
+ */
+ if (bytes <= XFS_IFORK_ASIZE(dp))
+ return dp->i_d.di_forkoff;
+
+ /*
+ * For attr2 we can try to move the forkoff if there is space in the
+ * literal area, but for the old format we are done if there is no
+ * space in the fixed attribute fork.
+ */
+ if (!(mp->m_flags & XFS_MOUNT_ATTR2))
return 0;
- }
dsize = dp->i_df.if_bytes;
-
+
switch (dp->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
- /*
+ /*
* If there is no attr fork and the data fork is extents,
- * determine if creating the default attr fork will result
- * in the extents form migrating to btree. If so, the
- * minimum offset only needs to be the space required for
+ * determine if creating the default attr fork will result
+ * in the extents form migrating to btree. If so, the
+ * minimum offset only needs to be the space required for
* the btree root.
- */
+ */
if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
xfs_default_attroffset(dp))
dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
break;
-
case XFS_DINODE_FMT_BTREE:
/*
- * If have data btree then keep forkoff if we have one,
- * otherwise we are adding a new attr, so then we set
- * minforkoff to where the btree root can finish so we have
+ * If we have a data btree then keep forkoff if we have one,
+ * otherwise we are adding a new attr, so then we set
+ * minforkoff to where the btree root can finish so we have
* plenty of room for attrs
*/
if (dp->i_d.di_forkoff) {
- if (offset < dp->i_d.di_forkoff)
+ if (offset < dp->i_d.di_forkoff)
return 0;
- else
- return dp->i_d.di_forkoff;
- } else
- dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
+ return dp->i_d.di_forkoff;
+ }
+ dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
break;
}
-
- /*
- * A data fork btree root must have space for at least
+
+ /*
+ * A data fork btree root must have space for at least
* MINDBTPTRS key/ptr pairs if the data fork is small or empty.
*/
minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
@@ -186,10 +200,10 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
maxforkoff = maxforkoff >> 3; /* rounded down */
- if (offset >= minforkoff && offset < maxforkoff)
- return offset;
if (offset >= maxforkoff)
return maxforkoff;
+ if (offset >= minforkoff)
+ return offset;
return 0;
}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c68baeb0974a..d0ab78837057 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2383,6 +2383,8 @@ xfs_bmap_btalloc(
int tryagain;
int error;
+ ASSERT(ap->length);
+
mp = ap->ip->i_mount;
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) {
@@ -4629,6 +4631,8 @@ xfs_bmapi_allocate(
int error;
int rt;
+ ASSERT(bma->length > 0);
+
rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip);
/*
@@ -4849,6 +4853,7 @@ xfs_bmapi_write(
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(!(flags & XFS_BMAPI_IGSTATE));
ASSERT(tp != NULL);
+ ASSERT(len > 0);
whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
XFS_ATTR_FORK : XFS_DATA_FORK;
@@ -4918,9 +4923,22 @@ xfs_bmapi_write(
bma.eof = eof;
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
bma.wasdel = wasdelay;
- bma.length = len;
bma.offset = bno;
+ /*
+ * There's a 32/64 bit type mismatch between the
+ * allocation length request (which can be 64 bits in
+ * length) and the bma length request, which is
+ * xfs_extlen_t and therefore 32 bits. Hence we have to
+ * check for 32-bit overflows and handle them here.
+ */
+ if (len > (xfs_filblks_t)MAXEXTLEN)
+ bma.length = MAXEXTLEN;
+ else
+ bma.length = len;
+
+ ASSERT(len > 0);
+ ASSERT(bma.length > 0);
error = xfs_bmapi_allocate(&bma, flags);
if (error)
goto error0;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cf0ac056815f..2277bcae395f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1701,12 +1701,8 @@ xfsbufd(
struct list_head tmp;
struct blk_plug plug;
- if (unlikely(freezing(current))) {
- set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+ if (unlikely(freezing(current)))
refrigerator();
- } else {
- clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
- }
/* sleep for a long time if there is nothing to do. */
if (list_empty(&target->bt_delwri_queue))
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5bab046e859f..df7ffb0affe7 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -90,8 +90,7 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_DELWRI_Q, "DELWRI_Q" }
typedef enum {
- XBT_FORCE_SLEEP = 0,
- XBT_FORCE_FLUSH = 1,
+ XBT_FORCE_FLUSH = 0,
} xfs_buftarg_flags_t;
typedef struct xfs_buftarg {
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 1a3513881bce..eac97ef81e2a 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -656,7 +656,7 @@ xfs_buf_item_committing(
/*
* This is the ops vector shared by all buf log items.
*/
-static struct xfs_item_ops xfs_buf_item_ops = {
+static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_size = xfs_buf_item_size,
.iop_format = xfs_buf_item_format,
.iop_pin = xfs_buf_item_pin,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 25d7280e9f6b..b4ff40b5f918 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -39,20 +39,19 @@
#include "xfs_qm.h"
#include "xfs_trace.h"
-
/*
- LOCK ORDER
-
- inode lock (ilock)
- dquot hash-chain lock (hashlock)
- xqm dquot freelist lock (freelistlock
- mount's dquot list lock (mplistlock)
- user dquot lock - lock ordering among dquots is based on the uid or gid
- group dquot lock - similar to udquots. Between the two dquots, the udquot
- has to be locked first.
- pin lock - the dquot lock must be held to take this lock.
- flush lock - ditto.
-*/
+ * Lock order:
+ *
+ * ip->i_lock
+ * qh->qh_lock
+ * qi->qi_dqlist_lock
+ * dquot->q_qlock (xfs_dqlock() and friends)
+ * dquot->q_flush (xfs_dqflock() and friends)
+ * xfs_Gqm->qm_dqfrlist_lock
+ *
+ * If two dquots need to be locked the order is user before group/project,
+ * otherwise by the lowest id first, see xfs_dqlock2.
+ */
#ifdef DEBUG
xfs_buftarg_t *xfs_dqerror_target;
@@ -155,24 +154,6 @@ xfs_qm_dqdestroy(
}
/*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
- xfs_dqid_t id,
- uint type,
- xfs_dqblk_t *d)
-{
- /*
- * Caller has zero'd the entire dquot 'chunk' already.
- */
- d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
- d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
- d->dd_diskdq.d_id = cpu_to_be32(id);
- d->dd_diskdq.d_flags = type;
-}
-
-/*
* If default limits are in force, push them into the dquot now.
* We overwrite the dquot limits only if they are zero and this
* is not the root dquot.
@@ -328,8 +309,13 @@ xfs_qm_init_dquot_blk(
curid = id - (id % q->qi_dqperchunk);
ASSERT(curid >= 0);
memset(d, 0, BBTOB(q->qi_dqchunklen));
- for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
- xfs_qm_dqinit_core(curid, type, d);
+ for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
+ d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+ d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+ d->dd_diskdq.d_id = cpu_to_be32(curid);
+ d->dd_diskdq.d_flags = type;
+ }
+
xfs_trans_dquot_buf(tp, bp,
(type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
@@ -564,36 +550,62 @@ xfs_qm_dqtobp(
* Read in the ondisk dquot using dqtobp() then copy it to an incore version,
* and release the buffer immediately.
*
+ * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
*/
-/* ARGSUSED */
-STATIC int
+int
xfs_qm_dqread(
- xfs_trans_t **tpp,
- xfs_dqid_t id,
- xfs_dquot_t *dqp, /* dquot to get filled in */
- uint flags)
+ struct xfs_mount *mp,
+ xfs_dqid_t id,
+ uint type,
+ uint flags,
+ struct xfs_dquot **O_dqpp)
{
- xfs_disk_dquot_t *ddqp;
- xfs_buf_t *bp;
- int error;
- xfs_trans_t *tp;
+ struct xfs_dquot *dqp;
+ struct xfs_disk_dquot *ddqp;
+ struct xfs_buf *bp;
+ struct xfs_trans *tp = NULL;
+ int error;
+ int cancelflags = 0;
- ASSERT(tpp);
+ dqp = xfs_qm_dqinit(mp, id, type);
trace_xfs_dqread(dqp);
+ if (flags & XFS_QMOPT_DQALLOC) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+ error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
+ XFS_WRITE_LOG_RES(mp) +
+ /*
+ * Round the chunklen up to the next multiple
+ * of 128 (buf log item chunk size)).
+ */
+ BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
+ 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_WRITE_LOG_COUNT);
+ if (error)
+ goto error1;
+ cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+ }
+
/*
* get a pointer to the on-disk dquot and the buffer containing it
* dqp already knows its own type (GROUP/USER).
*/
- if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
- return (error);
+ error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
+ if (error) {
+ /*
+ * This can happen if quotas got turned off (ESRCH),
+ * or if the dquot didn't exist on disk and we ask to
+ * allocate (ENOENT).
+ */
+ trace_xfs_dqread_fail(dqp);
+ cancelflags |= XFS_TRANS_ABORT;
+ goto error1;
}
- tp = *tpp;
/* copy everything from disk dquot to the incore dquot */
memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
- ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
xfs_qm_dquot_logitem_init(dqp);
/*
@@ -622,77 +634,22 @@ xfs_qm_dqread(
ASSERT(xfs_buf_islocked(bp));
xfs_trans_brelse(tp, bp);
- return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
- xfs_mount_t *mp,
- xfs_dqid_t id, /* gid or uid, depending on type */
- uint type, /* UDQUOT or GDQUOT */
- uint flags, /* DQALLOC, DQREPAIR */
- xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */
-{
- xfs_dquot_t *dqp;
- int error;
- xfs_trans_t *tp;
- int cancelflags=0;
-
- dqp = xfs_qm_dqinit(mp, id, type);
- tp = NULL;
- if (flags & XFS_QMOPT_DQALLOC) {
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
- error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
- XFS_WRITE_LOG_RES(mp) +
- BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
- 128,
- 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_WRITE_LOG_COUNT);
- if (error) {
- cancelflags = 0;
- goto error0;
- }
- cancelflags = XFS_TRANS_RELEASE_LOG_RES;
- }
-
- /*
- * Read it from disk; xfs_dqread() takes care of
- * all the necessary initialization of dquot's fields (locks, etc)
- */
- if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
- /*
- * This can happen if quotas got turned off (ESRCH),
- * or if the dquot didn't exist on disk and we ask to
- * allocate (ENOENT).
- */
- trace_xfs_dqread_fail(dqp);
- cancelflags |= XFS_TRANS_ABORT;
- goto error0;
- }
if (tp) {
- if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
- goto error1;
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ if (error)
+ goto error0;
}
*O_dqpp = dqp;
- return (0);
+ return error;
- error0:
- ASSERT(error);
+error1:
if (tp)
xfs_trans_cancel(tp, cancelflags);
- error1:
+error0:
xfs_qm_dqdestroy(dqp);
*O_dqpp = NULL;
- return (error);
+ return error;
}
/*
@@ -710,12 +667,9 @@ xfs_qm_dqlookup(
xfs_dquot_t **O_dqpp)
{
xfs_dquot_t *dqp;
- uint flist_locked;
ASSERT(mutex_is_locked(&qh->qh_lock));
- flist_locked = B_FALSE;
-
/*
* Traverse the hashchain looking for a match
*/
@@ -725,70 +679,31 @@ xfs_qm_dqlookup(
* dqlock to look at the id field of the dquot, since the
* id can't be modified without the hashlock anyway.
*/
- if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
- trace_xfs_dqlookup_found(dqp);
+ if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
+ continue;
- /*
- * All in core dquots must be on the dqlist of mp
- */
- ASSERT(!list_empty(&dqp->q_mplist));
-
- xfs_dqlock(dqp);
- if (dqp->q_nrefs == 0) {
- ASSERT(!list_empty(&dqp->q_freelist));
- if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
- trace_xfs_dqlookup_want(dqp);
-
- /*
- * We may have raced with dqreclaim_one()
- * (and lost). So, flag that we don't
- * want the dquot to be reclaimed.
- */
- dqp->dq_flags |= XFS_DQ_WANT;
- xfs_dqunlock(dqp);
- mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
- xfs_dqlock(dqp);
- dqp->dq_flags &= ~(XFS_DQ_WANT);
- }
- flist_locked = B_TRUE;
- }
+ trace_xfs_dqlookup_found(dqp);
- /*
- * id couldn't have changed; we had the hashlock all
- * along
- */
- ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-
- if (flist_locked) {
- if (dqp->q_nrefs != 0) {
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- flist_locked = B_FALSE;
- } else {
- /* take it off the freelist */
- trace_xfs_dqlookup_freelist(dqp);
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- }
- }
+ xfs_dqlock(dqp);
+ if (dqp->dq_flags & XFS_DQ_FREEING) {
+ *O_dqpp = NULL;
+ xfs_dqunlock(dqp);
+ return -1;
+ }
- XFS_DQHOLD(dqp);
+ dqp->q_nrefs++;
- if (flist_locked)
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- /*
- * move the dquot to the front of the hashchain
- */
- ASSERT(mutex_is_locked(&qh->qh_lock));
- list_move(&dqp->q_hashlist, &qh->qh_list);
- trace_xfs_dqlookup_done(dqp);
- *O_dqpp = dqp;
- return 0;
- }
+ /*
+ * move the dquot to the front of the hashchain
+ */
+ list_move(&dqp->q_hashlist, &qh->qh_list);
+ trace_xfs_dqlookup_done(dqp);
+ *O_dqpp = dqp;
+ return 0;
}
*O_dqpp = NULL;
- ASSERT(mutex_is_locked(&qh->qh_lock));
- return (1);
+ return 1;
}
/*
@@ -829,11 +744,7 @@ xfs_qm_dqget(
return (EIO);
}
}
-#endif
-
- again:
-#ifdef DEBUG
ASSERT(type == XFS_DQ_USER ||
type == XFS_DQ_PROJ ||
type == XFS_DQ_GROUP);
@@ -845,13 +756,21 @@ xfs_qm_dqget(
ASSERT(ip->i_gdquot == NULL);
}
#endif
+
+restart:
mutex_lock(&h->qh_lock);
/*
* Look in the cache (hashtable).
* The chain is kept locked during lookup.
*/
- if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+ switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
+ case -1:
+ XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+ mutex_unlock(&h->qh_lock);
+ delay(1);
+ goto restart;
+ case 0:
XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
/*
* The dquot was found, moved to the front of the chain,
@@ -862,9 +781,11 @@ xfs_qm_dqget(
ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
mutex_unlock(&h->qh_lock);
trace_xfs_dqget_hit(*O_dqpp);
- return (0); /* success */
+ return 0; /* success */
+ default:
+ XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+ break;
}
- XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
/*
* Dquot cache miss. We don't want to keep the inode lock across
@@ -882,41 +803,18 @@ xfs_qm_dqget(
version = h->qh_version;
mutex_unlock(&h->qh_lock);
- /*
- * Allocate the dquot on the kernel heap, and read the ondisk
- * portion off the disk. Also, do all the necessary initialization
- * This can return ENOENT if dquot didn't exist on disk and we didn't
- * ask it to allocate; ESRCH if quotas got turned off suddenly.
- */
- if ((error = xfs_qm_idtodq(mp, id, type,
- flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
- XFS_QMOPT_DOWARN),
- &dqp))) {
- if (ip)
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- return (error);
- }
+ error = xfs_qm_dqread(mp, id, type, flags, &dqp);
- /*
- * See if this is mount code calling to look at the overall quota limits
- * which are stored in the id == 0 user or group's dquot.
- * Since we may not have done a quotacheck by this point, just return
- * the dquot without attaching it to any hashtables, lists, etc, or even
- * taking a reference.
- * The caller must dqdestroy this once done.
- */
- if (flags & XFS_QMOPT_DQSUSER) {
- ASSERT(id == 0);
- ASSERT(! ip);
- goto dqret;
- }
+ if (ip)
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ if (error)
+ return error;
/*
* Dquot lock comes after hashlock in the lock ordering
*/
if (ip) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
/*
* A dquot could be attached to this inode by now, since
* we had dropped the ilock.
@@ -961,16 +859,21 @@ xfs_qm_dqget(
* lock order between the two dquots here since dqp isn't
* on any findable lists yet.
*/
- if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+ switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
+ case 0:
+ case -1:
/*
- * Duplicate found. Just throw away the new dquot
- * and start over.
+ * Duplicate found, either in cache or on its way out.
+ * Just throw away the new dquot and start over.
*/
- xfs_qm_dqput(tmpdqp);
+ if (tmpdqp)
+ xfs_qm_dqput(tmpdqp);
mutex_unlock(&h->qh_lock);
xfs_qm_dqdestroy(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
- goto again;
+ goto restart;
+ default:
+ break;
}
}
@@ -1015,67 +918,49 @@ xfs_qm_dqget(
*/
void
xfs_qm_dqput(
- xfs_dquot_t *dqp)
+ struct xfs_dquot *dqp)
{
- xfs_dquot_t *gdqp;
+ struct xfs_dquot *gdqp;
ASSERT(dqp->q_nrefs > 0);
ASSERT(XFS_DQ_IS_LOCKED(dqp));
trace_xfs_dqput(dqp);
- if (dqp->q_nrefs != 1) {
- dqp->q_nrefs--;
+recurse:
+ if (--dqp->q_nrefs > 0) {
xfs_dqunlock(dqp);
return;
}
+ trace_xfs_dqput_free(dqp);
+
+ mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+ if (list_empty(&dqp->q_freelist)) {
+ list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+ xfs_Gqm->qm_dqfrlist_cnt++;
+ }
+ mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+
/*
- * drop the dqlock and acquire the freelist and dqlock
- * in the right order; but try to get it out-of-order first
+ * If we just added a udquot to the freelist, then we want to release
+ * the gdquot reference that it (probably) has. Otherwise it'll keep
+ * the gdquot from getting reclaimed.
*/
- if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
- trace_xfs_dqput_wait(dqp);
- xfs_dqunlock(dqp);
- mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
- xfs_dqlock(dqp);
+ gdqp = dqp->q_gdquot;
+ if (gdqp) {
+ xfs_dqlock(gdqp);
+ dqp->q_gdquot = NULL;
}
+ xfs_dqunlock(dqp);
- while (1) {
- gdqp = NULL;
-
- /* We can't depend on nrefs being == 1 here */
- if (--dqp->q_nrefs == 0) {
- trace_xfs_dqput_free(dqp);
-
- list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
- xfs_Gqm->qm_dqfrlist_cnt++;
-
- /*
- * If we just added a udquot to the freelist, then
- * we want to release the gdquot reference that
- * it (probably) has. Otherwise it'll keep the
- * gdquot from getting reclaimed.
- */
- if ((gdqp = dqp->q_gdquot)) {
- /*
- * Avoid a recursive dqput call
- */
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
- }
- xfs_dqunlock(dqp);
-
- /*
- * If we had a group quota inside the user quota as a hint,
- * release it now.
- */
- if (! gdqp)
- break;
+ /*
+ * If we had a group quota hint, release it now.
+ */
+ if (gdqp) {
dqp = gdqp;
+ goto recurse;
}
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
}
/*
@@ -1169,7 +1054,7 @@ xfs_qm_dqflush(
* If not dirty, or it's pinned and we are not supposed to block, nada.
*/
if (!XFS_DQ_IS_DIRTY(dqp) ||
- (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
+ ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
xfs_dqfunlock(dqp);
return 0;
}
@@ -1257,40 +1142,17 @@ xfs_qm_dqflush(
}
-int
-xfs_qm_dqlock_nowait(
- xfs_dquot_t *dqp)
-{
- return mutex_trylock(&dqp->q_qlock);
-}
-
-void
-xfs_dqlock(
- xfs_dquot_t *dqp)
-{
- mutex_lock(&dqp->q_qlock);
-}
-
void
xfs_dqunlock(
xfs_dquot_t *dqp)
{
- mutex_unlock(&(dqp->q_qlock));
+ xfs_dqunlock_nonotify(dqp);
if (dqp->q_logitem.qli_dquot == dqp) {
- /* Once was dqp->q_mount, but might just have been cleared */
xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
- (xfs_log_item_t*)&(dqp->q_logitem));
+ &dqp->q_logitem.qli_item);
}
}
-
-void
-xfs_dqunlock_nonotify(
- xfs_dquot_t *dqp)
-{
- mutex_unlock(&(dqp->q_qlock));
-}
-
/*
* Lock two xfs_dquot structures.
*
@@ -1319,43 +1181,18 @@ xfs_dqlock2(
}
}
-
/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
+ * Take a dquot out of the mount's dqlist as well as the hashlist. This is
+ * called via unmount as well as quotaoff, and the purge will always succeed.
*/
-/* ARGSUSED */
-int
+void
xfs_qm_dqpurge(
- xfs_dquot_t *dqp)
+ struct xfs_dquot *dqp)
{
- xfs_dqhash_t *qh = dqp->q_hash;
- xfs_mount_t *mp = dqp->q_mount;
-
- ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
- ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_dqhash *qh = dqp->q_hash;
xfs_dqlock(dqp);
- /*
- * We really can't afford to purge a dquot that is
- * referenced, because these are hard refs.
- * It shouldn't happen in general because we went thru _all_ inodes in
- * dqrele_all_inodes before calling this and didn't let the mountlock go.
- * However it is possible that we have dquots with temporary
- * references that are not attached to an inode. e.g. see xfs_setattr().
- */
- if (dqp->q_nrefs != 0) {
- xfs_dqunlock(dqp);
- mutex_unlock(&dqp->q_hash->qh_lock);
- return (1);
- }
-
- ASSERT(!list_empty(&dqp->q_freelist));
/*
* If we're turning off quotas, we have to make sure that, for
@@ -1370,23 +1207,18 @@ xfs_qm_dqpurge(
* Block on the flush lock after nudging dquot buffer,
* if it is incore.
*/
- xfs_qm_dqflock_pushbuf_wait(dqp);
+ xfs_dqflock_pushbuf_wait(dqp);
}
/*
- * XXXIf we're turning this type of quotas off, we don't care
+ * If we are turning this type of quotas off, we don't care
* about the dirty metadata sitting in this dquot. OTOH, if
* we're unmounting, we do care, so we flush it and wait.
*/
if (XFS_DQ_IS_DIRTY(dqp)) {
int error;
- /* dqflush unlocks dqflock */
/*
- * Given that dqpurge is a very rare occurrence, it is OK
- * that we're holding the hashlist and mplist locks
- * across the disk write. But, ... XXXsup
- *
* We don't care about getting disk errors here. We need
* to purge this dquot anyway, so we go ahead regardless.
*/
@@ -1396,38 +1228,44 @@ xfs_qm_dqpurge(
__func__, dqp);
xfs_dqflock(dqp);
}
+
ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
!(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+ xfs_dqfunlock(dqp);
+ xfs_dqunlock(dqp);
+
+ mutex_lock(&qh->qh_lock);
list_del_init(&dqp->q_hashlist);
qh->qh_version++;
+ mutex_unlock(&qh->qh_lock);
+
+ mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
mp->m_quotainfo->qi_dqreclaims++;
mp->m_quotainfo->qi_dquots--;
+ mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+
/*
- * XXX Move this to the front of the freelist, if we can get the
- * freelist lock.
+ * We move dquots to the freelist as soon as their reference count
+ * hits zero, so it really should be on the freelist here.
*/
+ mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
ASSERT(!list_empty(&dqp->q_freelist));
+ list_del_init(&dqp->q_freelist);
+ xfs_Gqm->qm_dqfrlist_cnt--;
+ mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- dqp->q_mount = NULL;
- dqp->q_hash = NULL;
- dqp->dq_flags = XFS_DQ_INACTIVE;
- memset(&dqp->q_core, 0, sizeof(dqp->q_core));
- xfs_dqfunlock(dqp);
- xfs_dqunlock(dqp);
- mutex_unlock(&qh->qh_lock);
- return (0);
+ xfs_qm_dqdestroy(dqp);
}
-
/*
* Give the buffer a little push if it is incore and
* wait on the flush lock.
*/
void
-xfs_qm_dqflock_pushbuf_wait(
+xfs_dqflock_pushbuf_wait(
xfs_dquot_t *dqp)
{
xfs_mount_t *mp = dqp->q_mount;
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 34b7e945dbfa..a1d91d8f1802 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -80,8 +80,6 @@ enum {
XFS_QLOCK_NESTED,
};
-#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
-
/*
* Manage the q_flush completion queue embedded in the dquot. This completion
* queue synchronizes processes attempting to flush the in-core dquot back to
@@ -102,6 +100,21 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
complete(&dqp->q_flush);
}
+static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp)
+{
+ return mutex_trylock(&dqp->q_qlock);
+}
+
+static inline void xfs_dqlock(struct xfs_dquot *dqp)
+{
+ mutex_lock(&dqp->q_qlock);
+}
+
+static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
+{
+ mutex_unlock(&dqp->q_qlock);
+}
+
#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -116,12 +129,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
(XFS_IS_UQUOTA_ON((d)->q_mount)) : \
(XFS_IS_OQUOTA_ON((d)->q_mount))))
+extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
+ uint, struct xfs_dquot **);
extern void xfs_qm_dqdestroy(xfs_dquot_t *);
extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int xfs_qm_dqpurge(xfs_dquot_t *);
+extern void xfs_qm_dqpurge(xfs_dquot_t *);
extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
xfs_disk_dquot_t *);
extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
@@ -129,9 +142,17 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
xfs_dqid_t, uint, uint, xfs_dquot_t **);
extern void xfs_qm_dqput(xfs_dquot_t *);
-extern void xfs_dqlock(xfs_dquot_t *);
-extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
-extern void xfs_dqunlock(xfs_dquot_t *);
-extern void xfs_dqunlock_nonotify(xfs_dquot_t *);
+
+extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
+extern void xfs_dqunlock(struct xfs_dquot *);
+extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
+
+static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
+{
+ xfs_dqlock(dqp);
+ dqp->q_nrefs++;
+ xfs_dqunlock(dqp);
+ return dqp;
+}
#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index bb3f71d236d2..34baeae45265 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -73,7 +73,6 @@ xfs_qm_dquot_logitem_format(
logvec->i_len = sizeof(xfs_disk_dquot_t);
logvec->i_type = XLOG_REG_TYPE_DQUOT;
- ASSERT(2 == lip->li_desc->lid_size);
qlip->qli_format.qlf_size = 2;
}
@@ -134,7 +133,7 @@ xfs_qm_dquot_logitem_push(
* lock without sleeping, then there must not have been
* anyone in the process of flushing the dquot.
*/
- error = xfs_qm_dqflush(dqp, 0);
+ error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
if (error)
xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
__func__, error, dqp);
@@ -237,7 +236,7 @@ xfs_qm_dquot_logitem_trylock(
if (atomic_read(&dqp->q_pincount) > 0)
return XFS_ITEM_PINNED;
- if (!xfs_qm_dqlock_nowait(dqp))
+ if (!xfs_dqlock_nowait(dqp))
return XFS_ITEM_LOCKED;
if (!xfs_dqflock_nowait(dqp)) {
@@ -295,7 +294,7 @@ xfs_qm_dquot_logitem_committing(
/*
* This is the ops vector for dquots
*/
-static struct xfs_item_ops xfs_dquot_item_ops = {
+static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = xfs_qm_dquot_logitem_size,
.iop_format = xfs_qm_dquot_logitem_format,
.iop_pin = xfs_qm_dquot_logitem_pin,
@@ -483,7 +482,7 @@ xfs_qm_qoff_logitem_committing(
{
}
-static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
.iop_size = xfs_qm_qoff_logitem_size,
.iop_format = xfs_qm_qoff_logitem_format,
.iop_pin = xfs_qm_qoff_logitem_pin,
@@ -498,7 +497,7 @@ static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
/*
* This is the ops vector shared by all quotaoff-start log items.
*/
-static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
.iop_size = xfs_qm_qoff_logitem_size,
.iop_format = xfs_qm_qoff_logitem_format,
.iop_pin = xfs_qm_qoff_logitem_pin,
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index da108977b21f..558910f5e3c0 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -98,22 +98,22 @@ xfs_fs_encode_fh(
switch (fileid_type) {
case FILEID_INO32_GEN_PARENT:
spin_lock(&dentry->d_lock);
- fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
+ fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
spin_unlock(&dentry->d_lock);
/*FALLTHRU*/
case FILEID_INO32_GEN:
- fid->i32.ino = inode->i_ino;
+ fid->i32.ino = XFS_I(inode)->i_ino;
fid->i32.gen = inode->i_generation;
break;
case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
spin_lock(&dentry->d_lock);
- fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
+ fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
spin_unlock(&dentry->d_lock);
/*FALLTHRU*/
case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
- fid64->ino = inode->i_ino;
+ fid64->ino = XFS_I(inode)->i_ino;
fid64->gen = inode->i_generation;
break;
}
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d22e62623437..35c2aff38b20 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -217,7 +217,7 @@ xfs_efi_item_committing(
/*
* This is the ops vector shared by all efi log items.
*/
-static struct xfs_item_ops xfs_efi_item_ops = {
+static const struct xfs_item_ops xfs_efi_item_ops = {
.iop_size = xfs_efi_item_size,
.iop_format = xfs_efi_item_format,
.iop_pin = xfs_efi_item_pin,
@@ -477,7 +477,7 @@ xfs_efd_item_committing(
/*
* This is the ops vector shared by all efd log items.
*/
-static struct xfs_item_ops xfs_efd_item_ops = {
+static const struct xfs_item_ops xfs_efd_item_ops = {
.iop_size = xfs_efd_item_size,
.iop_format = xfs_efd_item_format,
.iop_pin = xfs_efd_item_pin,
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 169380e66057..dad1a31aa4fc 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -447,7 +447,7 @@ STATIC xfs_buf_t * /* allocation group buffer */
xfs_ialloc_ag_select(
xfs_trans_t *tp, /* transaction pointer */
xfs_ino_t parent, /* parent directory inode number */
- mode_t mode, /* bits set to indicate file type */
+ umode_t mode, /* bits set to indicate file type */
int okalloc) /* ok to allocate more space */
{
xfs_buf_t *agbp; /* allocation group header buffer */
@@ -640,7 +640,7 @@ int
xfs_dialloc(
xfs_trans_t *tp, /* transaction pointer */
xfs_ino_t parent, /* parent inode (directory) */
- mode_t mode, /* mode bits for new inode */
+ umode_t mode, /* mode bits for new inode */
int okalloc, /* ok to allocate more space */
xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
boolean_t *alloc_done, /* true if we needed to replenish
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index bb5385475e1f..666a037398d6 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -81,7 +81,7 @@ int /* error */
xfs_dialloc(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t parent, /* parent inode (directory) */
- mode_t mode, /* mode bits for new inode */
+ umode_t mode, /* mode bits for new inode */
int okalloc, /* ok to allocate more space */
struct xfs_buf **agbp, /* buf for a.g. inode header */
boolean_t *alloc_done, /* an allocation was done to replenish
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0fa98b1c70ea..3960a066d7ff 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -107,7 +107,6 @@ xfs_inode_free_callback(
struct inode *inode = container_of(head, struct inode, i_rcu);
struct xfs_inode *ip = XFS_I(inode);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_zone_free(xfs_inode_zone, ip);
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c0237c602f11..9dda7cc32848 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -961,7 +961,7 @@ int
xfs_ialloc(
xfs_trans_t *tp,
xfs_inode_t *pip,
- mode_t mode,
+ umode_t mode,
xfs_nlink_t nlink,
xfs_dev_t rdev,
prid_t prid,
@@ -1002,7 +1002,7 @@ xfs_ialloc(
return error;
ASSERT(ip != NULL);
- ip->i_d.di_mode = (__uint16_t)mode;
+ ip->i_d.di_mode = mode;
ip->i_d.di_onlink = 0;
ip->i_d.di_nlink = nlink;
ASSERT(ip->i_d.di_nlink == nlink);
@@ -2835,6 +2835,27 @@ corrupt_out:
return XFS_ERROR(EFSCORRUPTED);
}
+void
+xfs_promote_inode(
+ struct xfs_inode *ip)
+{
+ struct xfs_buf *bp;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+
+ bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
+ ip->i_imap.im_len, XBF_TRYLOCK);
+ if (!bp)
+ return;
+
+ if (XFS_BUF_ISDELAYWRITE(bp)) {
+ xfs_buf_delwri_promote(bp);
+ wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
+ }
+
+ xfs_buf_relse(bp);
+}
+
/*
* Return a pointer to the extent record at file index idx.
*/
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 760140d1dd66..f0e6b151ba37 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -481,7 +481,7 @@ void xfs_inode_free(struct xfs_inode *ip);
/*
* xfs_inode.c prototypes.
*/
-int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
+int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
xfs_nlink_t, xfs_dev_t, prid_t, int,
struct xfs_buf **, boolean_t *, xfs_inode_t **);
@@ -498,6 +498,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
void xfs_iunpin_wait(xfs_inode_t *);
int xfs_iflush(xfs_inode_t *, uint);
+void xfs_promote_inode(struct xfs_inode *);
void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b7cf21ba240f..cfd6c7f8cc3c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -437,7 +437,6 @@ xfs_inode_item_format(
* Assert that no attribute-related log flags are set.
*/
if (!XFS_IFORK_Q(ip)) {
- ASSERT(nvecs == lip->li_desc->lid_size);
iip->ili_format.ilf_size = nvecs;
ASSERT(!(iip->ili_format.ilf_fields &
(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
@@ -521,7 +520,6 @@ xfs_inode_item_format(
break;
}
- ASSERT(nvecs == lip->li_desc->lid_size);
iip->ili_format.ilf_size = nvecs;
}
@@ -795,7 +793,7 @@ xfs_inode_item_committing(
/*
* This is the ops vector shared by all buf log items.
*/
-static struct xfs_item_ops xfs_inode_item_ops = {
+static const struct xfs_item_ops xfs_inode_item_ops = {
.iop_size = xfs_inode_item_size,
.iop_format = xfs_inode_item_format,
.iop_pin = xfs_inode_item_pin,
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d99a90518909..76f3ca5cfc36 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -559,23 +559,23 @@ xfs_attrmulti_by_handle(
ops[i].am_flags);
break;
case ATTR_OP_SET:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+ ops[i].am_error = mnt_want_write_file(parfilp);
if (ops[i].am_error)
break;
ops[i].am_error = xfs_attrmulti_attr_set(
dentry->d_inode, attr_name,
ops[i].am_attrvalue, ops[i].am_length,
ops[i].am_flags);
- mnt_drop_write(parfilp->f_path.mnt);
+ mnt_drop_write_file(parfilp);
break;
case ATTR_OP_REMOVE:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+ ops[i].am_error = mnt_want_write_file(parfilp);
if (ops[i].am_error)
break;
ops[i].am_error = xfs_attrmulti_attr_remove(
dentry->d_inode, attr_name,
ops[i].am_flags);
- mnt_drop_write(parfilp->f_path.mnt);
+ mnt_drop_write_file(parfilp);
break;
default:
ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 54e623bfbb85..f9ccb7b7c043 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -454,23 +454,23 @@ xfs_compat_attrmulti_by_handle(
&ops[i].am_length, ops[i].am_flags);
break;
case ATTR_OP_SET:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+ ops[i].am_error = mnt_want_write_file(parfilp);
if (ops[i].am_error)
break;
ops[i].am_error = xfs_attrmulti_attr_set(
dentry->d_inode, attr_name,
compat_ptr(ops[i].am_attrvalue),
ops[i].am_length, ops[i].am_flags);
- mnt_drop_write(parfilp->f_path.mnt);
+ mnt_drop_write_file(parfilp);
break;
case ATTR_OP_REMOVE:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+ ops[i].am_error = mnt_want_write_file(parfilp);
if (ops[i].am_error)
break;
ops[i].am_error = xfs_attrmulti_attr_remove(
dentry->d_inode, attr_name,
ops[i].am_flags);
- mnt_drop_write(parfilp->f_path.mnt);
+ mnt_drop_write_file(parfilp);
break;
default:
ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 23ce927973a4..f9babd179223 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -168,7 +168,7 @@ STATIC int
xfs_vn_mknod(
struct inode *dir,
struct dentry *dentry,
- int mode,
+ umode_t mode,
dev_t rdev)
{
struct inode *inode;
@@ -231,7 +231,7 @@ STATIC int
xfs_vn_create(
struct inode *dir,
struct dentry *dentry,
- int mode,
+ umode_t mode,
struct nameidata *nd)
{
return xfs_vn_mknod(dir, dentry, mode, 0);
@@ -241,7 +241,7 @@ STATIC int
xfs_vn_mkdir(
struct inode *dir,
struct dentry *dentry,
- int mode)
+ umode_t mode)
{
return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
}
@@ -366,7 +366,7 @@ xfs_vn_symlink(
struct xfs_inode *cip = NULL;
struct xfs_name name;
int error;
- mode_t mode;
+ umode_t mode;
mode = S_IFLNK |
(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2758a6277c52..e2cc3568c299 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -150,6 +150,117 @@ xlog_grant_add_space(
} while (head_val != old);
}
+STATIC bool
+xlog_reserveq_wake(
+ struct log *log,
+ int *free_bytes)
+{
+ struct xlog_ticket *tic;
+ int need_bytes;
+
+ list_for_each_entry(tic, &log->l_reserveq, t_queue) {
+ if (tic->t_flags & XLOG_TIC_PERM_RESERV)
+ need_bytes = tic->t_unit_res * tic->t_cnt;
+ else
+ need_bytes = tic->t_unit_res;
+
+ if (*free_bytes < need_bytes)
+ return false;
+ *free_bytes -= need_bytes;
+
+ trace_xfs_log_grant_wake_up(log, tic);
+ wake_up(&tic->t_wait);
+ }
+
+ return true;
+}
+
+STATIC bool
+xlog_writeq_wake(
+ struct log *log,
+ int *free_bytes)
+{
+ struct xlog_ticket *tic;
+ int need_bytes;
+
+ list_for_each_entry(tic, &log->l_writeq, t_queue) {
+ ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
+
+ need_bytes = tic->t_unit_res;
+
+ if (*free_bytes < need_bytes)
+ return false;
+ *free_bytes -= need_bytes;
+
+ trace_xfs_log_regrant_write_wake_up(log, tic);
+ wake_up(&tic->t_wait);
+ }
+
+ return true;
+}
+
+STATIC int
+xlog_reserveq_wait(
+ struct log *log,
+ struct xlog_ticket *tic,
+ int need_bytes)
+{
+ list_add_tail(&tic->t_queue, &log->l_reserveq);
+
+ do {
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto shutdown;
+ xlog_grant_push_ail(log, need_bytes);
+
+ XFS_STATS_INC(xs_sleep_logspace);
+ trace_xfs_log_grant_sleep(log, tic);
+
+ xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
+ trace_xfs_log_grant_wake(log, tic);
+
+ spin_lock(&log->l_grant_reserve_lock);
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto shutdown;
+ } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes);
+
+ list_del_init(&tic->t_queue);
+ return 0;
+shutdown:
+ list_del_init(&tic->t_queue);
+ return XFS_ERROR(EIO);
+}
+
+STATIC int
+xlog_writeq_wait(
+ struct log *log,
+ struct xlog_ticket *tic,
+ int need_bytes)
+{
+ list_add_tail(&tic->t_queue, &log->l_writeq);
+
+ do {
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto shutdown;
+ xlog_grant_push_ail(log, need_bytes);
+
+ XFS_STATS_INC(xs_sleep_logspace);
+ trace_xfs_log_regrant_write_sleep(log, tic);
+
+ xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
+ trace_xfs_log_regrant_write_wake(log, tic);
+
+ spin_lock(&log->l_grant_write_lock);
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto shutdown;
+ } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes);
+
+ list_del_init(&tic->t_queue);
+ return 0;
+shutdown:
+ list_del_init(&tic->t_queue);
+ return XFS_ERROR(EIO);
+}
+
static void
xlog_tic_reset_res(xlog_ticket_t *tic)
{
@@ -350,8 +461,19 @@ xfs_log_reserve(
retval = xlog_grant_log_space(log, internal_ticket);
}
+ if (unlikely(retval)) {
+ /*
+ * If we are failing, make sure the ticket doesn't have any
+ * current reservations. We don't want to add this back
+ * when the ticket/ transaction gets cancelled.
+ */
+ internal_ticket->t_curr_res = 0;
+ /* ungrant will give back unit_res * t_cnt. */
+ internal_ticket->t_cnt = 0;
+ }
+
return retval;
-} /* xfs_log_reserve */
+}
/*
@@ -626,7 +748,7 @@ xfs_log_item_init(
struct xfs_mount *mp,
struct xfs_log_item *item,
int type,
- struct xfs_item_ops *ops)
+ const struct xfs_item_ops *ops)
{
item->li_mountp = mp;
item->li_ailp = mp->m_ail;
@@ -638,38 +760,6 @@ xfs_log_item_init(
INIT_LIST_HEAD(&item->li_cil);
}
-/*
- * Write region vectors to log. The write happens using the space reservation
- * of the ticket (tic). It is not a requirement that all writes for a given
- * transaction occur with one call to xfs_log_write(). However, it is important
- * to note that the transaction reservation code makes an assumption about the
- * number of log headers a transaction requires that may be violated if you
- * don't pass all the transaction vectors in one call....
- */
-int
-xfs_log_write(
- struct xfs_mount *mp,
- struct xfs_log_iovec reg[],
- int nentries,
- struct xlog_ticket *tic,
- xfs_lsn_t *start_lsn)
-{
- struct log *log = mp->m_log;
- int error;
- struct xfs_log_vec vec = {
- .lv_niovecs = nentries,
- .lv_iovecp = reg,
- };
-
- if (XLOG_FORCED_SHUTDOWN(log))
- return XFS_ERROR(EIO);
-
- error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
- if (error)
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
- return error;
-}
-
void
xfs_log_move_tail(xfs_mount_t *mp,
xfs_lsn_t tail_lsn)
@@ -1563,7 +1653,7 @@ xlog_print_tic_res(
};
xfs_warn(mp,
- "xfs_log_write: reservation summary:\n"
+ "xlog_write: reservation summary:\n"
" trans type = %s (%u)\n"
" unit res = %d bytes\n"
" current res = %d bytes\n"
@@ -1592,7 +1682,7 @@ xlog_print_tic_res(
}
xfs_alert_tag(mp, XFS_PTAG_LOGRES,
- "xfs_log_write: reservation ran out. Need to up reservation");
+ "xlog_write: reservation ran out. Need to up reservation");
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
@@ -1846,23 +1936,21 @@ xlog_write(
*start_lsn = 0;
len = xlog_write_calc_vec_length(ticket, log_vector);
- if (log->l_cilp) {
- /*
- * Region headers and bytes are already accounted for.
- * We only need to take into account start records and
- * split regions in this function.
- */
- if (ticket->t_flags & XLOG_TIC_INITED)
- ticket->t_curr_res -= sizeof(xlog_op_header_t);
- /*
- * Commit record headers need to be accounted for. These
- * come in as separate writes so are easy to detect.
- */
- if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
- ticket->t_curr_res -= sizeof(xlog_op_header_t);
- } else
- ticket->t_curr_res -= len;
+ /*
+ * Region headers and bytes are already accounted for.
+ * We only need to take into account start records and
+ * split regions in this function.
+ */
+ if (ticket->t_flags & XLOG_TIC_INITED)
+ ticket->t_curr_res -= sizeof(xlog_op_header_t);
+
+ /*
+ * Commit record headers need to be accounted for. These
+ * come in as separate writes so are easy to detect.
+ */
+ if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
+ ticket->t_curr_res -= sizeof(xlog_op_header_t);
if (ticket->t_curr_res < 0)
xlog_print_tic_res(log->l_mp, ticket);
@@ -2481,8 +2569,8 @@ restart:
/*
* Atomically get the log space required for a log ticket.
*
- * Once a ticket gets put onto the reserveq, it will only return after
- * the needed reservation is satisfied.
+ * Once a ticket gets put onto the reserveq, it will only return after the
+ * needed reservation is satisfied.
*
* This function is structured so that it has a lock free fast path. This is
* necessary because every new transaction reservation will come through this
@@ -2490,113 +2578,53 @@ restart:
* every pass.
*
* As tickets are only ever moved on and off the reserveq under the
- * l_grant_reserve_lock, we only need to take that lock if we are going
- * to add the ticket to the queue and sleep. We can avoid taking the lock if the
- * ticket was never added to the reserveq because the t_queue list head will be
- * empty and we hold the only reference to it so it can safely be checked
- * unlocked.
+ * l_grant_reserve_lock, we only need to take that lock if we are going to add
+ * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
+ * was never added to the reserveq because the t_queue list head will be empty
+ * and we hold the only reference to it so it can safely be checked unlocked.
*/
STATIC int
-xlog_grant_log_space(xlog_t *log,
- xlog_ticket_t *tic)
+xlog_grant_log_space(
+ struct log *log,
+ struct xlog_ticket *tic)
{
- int free_bytes;
- int need_bytes;
+ int free_bytes, need_bytes;
+ int error = 0;
-#ifdef DEBUG
- if (log->l_flags & XLOG_ACTIVE_RECOVERY)
- panic("grant Recovery problem");
-#endif
+ ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
trace_xfs_log_grant_enter(log, tic);
+ /*
+ * If there are other waiters on the queue then give them a chance at
+ * logspace before us. Wake up the first waiters, if we do not wake
+ * up all the waiters then go to sleep waiting for more free space,
+ * otherwise try to get some space for this transaction.
+ */
need_bytes = tic->t_unit_res;
if (tic->t_flags & XFS_LOG_PERM_RESERV)
need_bytes *= tic->t_ocnt;
-
- /* something is already sleeping; insert new transaction at end */
- if (!list_empty_careful(&log->l_reserveq)) {
- spin_lock(&log->l_grant_reserve_lock);
- /* recheck the queue now we are locked */
- if (list_empty(&log->l_reserveq)) {
- spin_unlock(&log->l_grant_reserve_lock);
- goto redo;
- }
- list_add_tail(&tic->t_queue, &log->l_reserveq);
-
- trace_xfs_log_grant_sleep1(log, tic);
-
- /*
- * Gotta check this before going to sleep, while we're
- * holding the grant lock.
- */
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- XFS_STATS_INC(xs_sleep_logspace);
- xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
-
- /*
- * If we got an error, and the filesystem is shutting down,
- * we'll catch it down below. So just continue...
- */
- trace_xfs_log_grant_wake1(log, tic);
- }
-
-redo:
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return_unlocked;
-
free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
- if (free_bytes < need_bytes) {
+ if (!list_empty_careful(&log->l_reserveq)) {
spin_lock(&log->l_grant_reserve_lock);
- if (list_empty(&tic->t_queue))
- list_add_tail(&tic->t_queue, &log->l_reserveq);
-
- trace_xfs_log_grant_sleep2(log, tic);
-
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- xlog_grant_push_ail(log, need_bytes);
-
- XFS_STATS_INC(xs_sleep_logspace);
- xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
-
- trace_xfs_log_grant_wake2(log, tic);
- goto redo;
- }
-
- if (!list_empty(&tic->t_queue)) {
+ if (!xlog_reserveq_wake(log, &free_bytes) ||
+ free_bytes < need_bytes)
+ error = xlog_reserveq_wait(log, tic, need_bytes);
+ spin_unlock(&log->l_grant_reserve_lock);
+ } else if (free_bytes < need_bytes) {
spin_lock(&log->l_grant_reserve_lock);
- list_del_init(&tic->t_queue);
+ error = xlog_reserveq_wait(log, tic, need_bytes);
spin_unlock(&log->l_grant_reserve_lock);
}
+ if (error)
+ return error;
- /* we've got enough space */
xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_grant_exit(log, tic);
xlog_verify_grant_tail(log);
return 0;
-
-error_return_unlocked:
- spin_lock(&log->l_grant_reserve_lock);
-error_return:
- list_del_init(&tic->t_queue);
- spin_unlock(&log->l_grant_reserve_lock);
- trace_xfs_log_grant_error(log, tic);
-
- /*
- * If we are failing, make sure the ticket doesn't have any
- * current reservations. We don't want to add this back when
- * the ticket/transaction gets cancelled.
- */
- tic->t_curr_res = 0;
- tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- return XFS_ERROR(EIO);
-} /* xlog_grant_log_space */
-
+}
/*
* Replenish the byte reservation required by moving the grant write head.
@@ -2605,10 +2633,12 @@ error_return:
* free fast path.
*/
STATIC int
-xlog_regrant_write_log_space(xlog_t *log,
- xlog_ticket_t *tic)
+xlog_regrant_write_log_space(
+ struct log *log,
+ struct xlog_ticket *tic)
{
- int free_bytes, need_bytes;
+ int free_bytes, need_bytes;
+ int error = 0;
tic->t_curr_res = tic->t_unit_res;
xlog_tic_reset_res(tic);
@@ -2616,104 +2646,38 @@ xlog_regrant_write_log_space(xlog_t *log,
if (tic->t_cnt > 0)
return 0;
-#ifdef DEBUG
- if (log->l_flags & XLOG_ACTIVE_RECOVERY)
- panic("regrant Recovery problem");
-#endif
+ ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
trace_xfs_log_regrant_write_enter(log, tic);
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return_unlocked;
- /* If there are other waiters on the queue then give them a
- * chance at logspace before us. Wake up the first waiters,
- * if we do not wake up all the waiters then go to sleep waiting
- * for more free space, otherwise try to get some space for
- * this transaction.
+ /*
+ * If there are other waiters on the queue then give them a chance at
+ * logspace before us. Wake up the first waiters, if we do not wake
+ * up all the waiters then go to sleep waiting for more free space,
+ * otherwise try to get some space for this transaction.
*/
need_bytes = tic->t_unit_res;
- if (!list_empty_careful(&log->l_writeq)) {
- struct xlog_ticket *ntic;
-
- spin_lock(&log->l_grant_write_lock);
- free_bytes = xlog_space_left(log, &log->l_grant_write_head);
- list_for_each_entry(ntic, &log->l_writeq, t_queue) {
- ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
-
- if (free_bytes < ntic->t_unit_res)
- break;
- free_bytes -= ntic->t_unit_res;
- wake_up(&ntic->t_wait);
- }
-
- if (ntic != list_first_entry(&log->l_writeq,
- struct xlog_ticket, t_queue)) {
- if (list_empty(&tic->t_queue))
- list_add_tail(&tic->t_queue, &log->l_writeq);
- trace_xfs_log_regrant_write_sleep1(log, tic);
-
- xlog_grant_push_ail(log, need_bytes);
-
- XFS_STATS_INC(xs_sleep_logspace);
- xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
- trace_xfs_log_regrant_write_wake1(log, tic);
- } else
- spin_unlock(&log->l_grant_write_lock);
- }
-
-redo:
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return_unlocked;
-
free_bytes = xlog_space_left(log, &log->l_grant_write_head);
- if (free_bytes < need_bytes) {
+ if (!list_empty_careful(&log->l_writeq)) {
spin_lock(&log->l_grant_write_lock);
- if (list_empty(&tic->t_queue))
- list_add_tail(&tic->t_queue, &log->l_writeq);
-
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- xlog_grant_push_ail(log, need_bytes);
-
- XFS_STATS_INC(xs_sleep_logspace);
- trace_xfs_log_regrant_write_sleep2(log, tic);
- xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
-
- trace_xfs_log_regrant_write_wake2(log, tic);
- goto redo;
- }
-
- if (!list_empty(&tic->t_queue)) {
+ if (!xlog_writeq_wake(log, &free_bytes) ||
+ free_bytes < need_bytes)
+ error = xlog_writeq_wait(log, tic, need_bytes);
+ spin_unlock(&log->l_grant_write_lock);
+ } else if (free_bytes < need_bytes) {
spin_lock(&log->l_grant_write_lock);
- list_del_init(&tic->t_queue);
+ error = xlog_writeq_wait(log, tic, need_bytes);
spin_unlock(&log->l_grant_write_lock);
}
- /* we've got enough space */
+ if (error)
+ return error;
+
xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_regrant_write_exit(log, tic);
xlog_verify_grant_tail(log);
return 0;
-
-
- error_return_unlocked:
- spin_lock(&log->l_grant_write_lock);
- error_return:
- list_del_init(&tic->t_queue);
- spin_unlock(&log->l_grant_write_lock);
- trace_xfs_log_regrant_write_error(log, tic);
-
- /*
- * If we are failing, make sure the ticket doesn't have any
- * current reservations. We don't want to add this back when
- * the ticket/transaction gets cancelled.
- */
- tic->t_curr_res = 0;
- tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- return XFS_ERROR(EIO);
-} /* xlog_regrant_write_log_space */
-
+}
/* The first cnt-1 times through here we don't need to
* move the grant write head because the permanent
@@ -2933,8 +2897,7 @@ _xfs_log_force(
XFS_STATS_INC(xs_log_force);
- if (log->l_cilp)
- xlog_cil_force(log);
+ xlog_cil_force(log);
spin_lock(&log->l_icloglock);
@@ -3083,11 +3046,9 @@ _xfs_log_force_lsn(
XFS_STATS_INC(xs_log_force);
- if (log->l_cilp) {
- lsn = xlog_cil_force_lsn(log, lsn);
- if (lsn == NULLCOMMITLSN)
- return 0;
- }
+ lsn = xlog_cil_force_lsn(log, lsn);
+ if (lsn == NULLCOMMITLSN)
+ return 0;
try_again:
spin_lock(&log->l_icloglock);
@@ -3655,7 +3616,7 @@ xfs_log_force_umount(
* completed transactions are flushed to disk with the xfs_log_force()
* call below.
*/
- if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
+ if (!logerror)
xlog_cil_force(log);
/*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 78c9039994af..2aee3b22d29c 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -137,7 +137,7 @@ struct xfs_trans;
void xfs_log_item_init(struct xfs_mount *mp,
struct xfs_log_item *item,
int type,
- struct xfs_item_ops *ops);
+ const struct xfs_item_ops *ops);
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
@@ -174,11 +174,6 @@ int xfs_log_reserve(struct xfs_mount *mp,
__uint8_t clientid,
uint flags,
uint t_type);
-int xfs_log_write(struct xfs_mount *mp,
- xfs_log_iovec_t region[],
- int nentries,
- struct xlog_ticket *ticket,
- xfs_lsn_t *start_lsn);
int xfs_log_unmount_write(struct xfs_mount *mp);
void xfs_log_unmount(struct xfs_mount *mp);
int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
@@ -189,8 +184,7 @@ void xlog_iodone(struct xfs_buf *);
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
-void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
- struct xfs_log_vec *log_vector,
+int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, int flags);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index c7755d5a5fbe..26db6b13f1f9 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -32,10 +32,7 @@
#include "xfs_discard.h"
/*
- * Perform initial CIL structure initialisation. If the CIL is not
- * enabled in this filesystem, ensure the log->l_cilp is null so
- * we can check this conditional to determine if we are doing delayed
- * logging or not.
+ * Perform initial CIL structure initialisation.
*/
int
xlog_cil_init(
@@ -44,10 +41,6 @@ xlog_cil_init(
struct xfs_cil *cil;
struct xfs_cil_ctx *ctx;
- log->l_cilp = NULL;
- if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
- return 0;
-
cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
if (!cil)
return ENOMEM;
@@ -80,9 +73,6 @@ void
xlog_cil_destroy(
struct log *log)
{
- if (!log->l_cilp)
- return;
-
if (log->l_cilp->xc_ctx) {
if (log->l_cilp->xc_ctx->ticket)
xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
@@ -137,9 +127,6 @@ void
xlog_cil_init_post_recovery(
struct log *log)
{
- if (!log->l_cilp)
- return;
-
log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
log->l_cilp->xc_ctx->sequence = 1;
log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
@@ -172,37 +159,73 @@ xlog_cil_init_post_recovery(
* format the regions into the iclog as though they are being formatted
* directly out of the objects themselves.
*/
-static void
-xlog_cil_format_items(
- struct log *log,
- struct xfs_log_vec *log_vector)
+static struct xfs_log_vec *
+xlog_cil_prepare_log_vecs(
+ struct xfs_trans *tp)
{
- struct xfs_log_vec *lv;
+ struct xfs_log_item_desc *lidp;
+ struct xfs_log_vec *lv = NULL;
+ struct xfs_log_vec *ret_lv = NULL;
- ASSERT(log_vector);
- for (lv = log_vector; lv; lv = lv->lv_next) {
+
+ /* Bail out if we didn't find a log item. */
+ if (list_empty(&tp->t_items)) {
+ ASSERT(0);
+ return NULL;
+ }
+
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_vec *new_lv;
void *ptr;
int index;
int len = 0;
+ uint niovecs;
+
+ /* Skip items which aren't dirty in this transaction. */
+ if (!(lidp->lid_flags & XFS_LID_DIRTY))
+ continue;
+
+ /* Skip items that do not have any vectors for writing */
+ niovecs = IOP_SIZE(lidp->lid_item);
+ if (!niovecs)
+ continue;
+
+ new_lv = kmem_zalloc(sizeof(*new_lv) +
+ niovecs * sizeof(struct xfs_log_iovec),
+ KM_SLEEP);
+
+ /* The allocated iovec region lies beyond the log vector. */
+ new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
+ new_lv->lv_niovecs = niovecs;
+ new_lv->lv_item = lidp->lid_item;
/* build the vector array and calculate it's length */
- IOP_FORMAT(lv->lv_item, lv->lv_iovecp);
- for (index = 0; index < lv->lv_niovecs; index++)
- len += lv->lv_iovecp[index].i_len;
+ IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
+ for (index = 0; index < new_lv->lv_niovecs; index++)
+ len += new_lv->lv_iovecp[index].i_len;
- lv->lv_buf_len = len;
- lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
- ptr = lv->lv_buf;
+ new_lv->lv_buf_len = len;
+ new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
+ KM_SLEEP|KM_NOFS);
+ ptr = new_lv->lv_buf;
- for (index = 0; index < lv->lv_niovecs; index++) {
- struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
+ for (index = 0; index < new_lv->lv_niovecs; index++) {
+ struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
memcpy(ptr, vec->i_addr, vec->i_len);
vec->i_addr = ptr;
ptr += vec->i_len;
}
- ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
+ ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
+
+ if (!ret_lv)
+ ret_lv = new_lv;
+ else
+ lv->lv_next = new_lv;
+ lv = new_lv;
}
+
+ return ret_lv;
}
/*
@@ -635,28 +658,30 @@ out_abort:
* background commit, returns without it held once background commits are
* allowed again.
*/
-void
+int
xfs_log_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
- struct xfs_log_vec *log_vector,
xfs_lsn_t *commit_lsn,
int flags)
{
struct log *log = mp->m_log;
int log_flags = 0;
int push = 0;
+ struct xfs_log_vec *log_vector;
if (flags & XFS_TRANS_RELEASE_LOG_RES)
log_flags = XFS_LOG_REL_PERM_RESERV;
/*
- * do all the hard work of formatting items (including memory
+ * Do all the hard work of formatting items (including memory
* allocation) outside the CIL context lock. This prevents stalling CIL
* pushes when we are low on memory and a transaction commit spends a
* lot of time in memory reclaim.
*/
- xlog_cil_format_items(log, log_vector);
+ log_vector = xlog_cil_prepare_log_vecs(tp);
+ if (!log_vector)
+ return ENOMEM;
/* lock out background commit */
down_read(&log->l_cilp->xc_ctx_lock);
@@ -709,6 +734,7 @@ xfs_log_commit_cil(
*/
if (push)
xlog_cil_push(log, 0);
+ return 0;
}
/*
@@ -786,8 +812,6 @@ xfs_log_item_in_current_chkpt(
{
struct xfs_cil_ctx *ctx;
- if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
- return false;
if (list_empty(&lip->li_cil))
return false;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bb24dac42a25..19f69e232509 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -219,7 +219,6 @@ typedef struct xfs_mount {
#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
must be synchronous except
for space allocations */
-#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
operations, typically for
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5cff443f6cdb..671f37eae1c7 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -154,12 +154,17 @@ STATIC void
xfs_qm_destroy(
struct xfs_qm *xqm)
{
- struct xfs_dquot *dqp, *n;
int hsize, i;
ASSERT(xqm != NULL);
ASSERT(xqm->qm_nrefs == 0);
+
unregister_shrinker(&xfs_qm_shaker);
+
+ mutex_lock(&xqm->qm_dqfrlist_lock);
+ ASSERT(list_empty(&xqm->qm_dqfrlist));
+ mutex_unlock(&xqm->qm_dqfrlist_lock);
+
hsize = xqm->qm_dqhashmask + 1;
for (i = 0; i < hsize; i++) {
xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -171,17 +176,6 @@ xfs_qm_destroy(
xqm->qm_grp_dqhtable = NULL;
xqm->qm_dqhashmask = 0;
- /* frlist cleanup */
- mutex_lock(&xqm->qm_dqfrlist_lock);
- list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
- xfs_dqlock(dqp);
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- xfs_dqunlock(dqp);
- xfs_qm_dqdestroy(dqp);
- }
- mutex_unlock(&xqm->qm_dqfrlist_lock);
- mutex_destroy(&xqm->qm_dqfrlist_lock);
kmem_free(xqm);
}
@@ -232,34 +226,10 @@ STATIC void
xfs_qm_rele_quotafs_ref(
struct xfs_mount *mp)
{
- xfs_dquot_t *dqp, *n;
-
ASSERT(xfs_Gqm);
ASSERT(xfs_Gqm->qm_nrefs > 0);
/*
- * Go thru the freelist and destroy all inactive dquots.
- */
- mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
- list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
- xfs_dqlock(dqp);
- if (dqp->dq_flags & XFS_DQ_INACTIVE) {
- ASSERT(dqp->q_mount == NULL);
- ASSERT(! XFS_DQ_IS_DIRTY(dqp));
- ASSERT(list_empty(&dqp->q_hashlist));
- ASSERT(list_empty(&dqp->q_mplist));
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- xfs_dqunlock(dqp);
- xfs_qm_dqdestroy(dqp);
- } else {
- xfs_dqunlock(dqp);
- }
- }
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-
- /*
* Destroy the entire XQM. If somebody mounts with quotaon, this'll
* be restarted.
*/
@@ -415,8 +385,7 @@ xfs_qm_unmount_quotas(
*/
STATIC int
xfs_qm_dqflush_all(
- struct xfs_mount *mp,
- int sync_mode)
+ struct xfs_mount *mp)
{
struct xfs_quotainfo *q = mp->m_quotainfo;
int recl;
@@ -429,7 +398,8 @@ again:
mutex_lock(&q->qi_dqlist_lock);
list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
xfs_dqlock(dqp);
- if (! XFS_DQ_IS_DIRTY(dqp)) {
+ if ((dqp->dq_flags & XFS_DQ_FREEING) ||
+ !XFS_DQ_IS_DIRTY(dqp)) {
xfs_dqunlock(dqp);
continue;
}
@@ -444,14 +414,14 @@ again:
* out immediately. We'll be able to acquire
* the flush lock when the I/O completes.
*/
- xfs_qm_dqflock_pushbuf_wait(dqp);
+ xfs_dqflock_pushbuf_wait(dqp);
}
/*
* Let go of the mplist lock. We don't want to hold it
* across a disk write.
*/
mutex_unlock(&q->qi_dqlist_lock);
- error = xfs_qm_dqflush(dqp, sync_mode);
+ error = xfs_qm_dqflush(dqp, 0);
xfs_dqunlock(dqp);
if (error)
return error;
@@ -468,6 +438,7 @@ again:
/* return ! busy */
return 0;
}
+
/*
* Release the group dquot pointers the user dquots may be
* carrying around as a hint. mplist is locked on entry and exit.
@@ -478,31 +449,26 @@ xfs_qm_detach_gdquots(
{
struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_dquot *dqp, *gdqp;
- int nrecl;
again:
ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
xfs_dqlock(dqp);
- if ((gdqp = dqp->q_gdquot)) {
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
- xfs_dqunlock(dqp);
-
- if (gdqp) {
- /*
- * Can't hold the mplist lock across a dqput.
- * XXXmust convert to marker based iterations here.
- */
- nrecl = q->qi_dqreclaims;
+ if (dqp->dq_flags & XFS_DQ_FREEING) {
+ xfs_dqunlock(dqp);
mutex_unlock(&q->qi_dqlist_lock);
- xfs_qm_dqput(gdqp);
-
+ delay(1);
mutex_lock(&q->qi_dqlist_lock);
- if (nrecl != q->qi_dqreclaims)
- goto again;
+ goto again;
}
+
+ gdqp = dqp->q_gdquot;
+ if (gdqp)
+ dqp->q_gdquot = NULL;
+ xfs_dqunlock(dqp);
+
+ if (gdqp)
+ xfs_qm_dqrele(gdqp);
}
}
@@ -520,8 +486,8 @@ xfs_qm_dqpurge_int(
struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_dquot *dqp, *n;
uint dqtype;
- int nrecl;
- int nmisses;
+ int nmisses = 0;
+ LIST_HEAD (dispose_list);
if (!q)
return 0;
@@ -540,47 +506,26 @@ xfs_qm_dqpurge_int(
*/
xfs_qm_detach_gdquots(mp);
- again:
- nmisses = 0;
- ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
/*
- * Try to get rid of all of the unwanted dquots. The idea is to
- * get them off mplist and hashlist, but leave them on freelist.
+ * Try to get rid of all of the unwanted dquots.
*/
list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
- /*
- * It's OK to look at the type without taking dqlock here.
- * We're holding the mplist lock here, and that's needed for
- * a dqreclaim.
- */
- if ((dqp->dq_flags & dqtype) == 0)
- continue;
-
- if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
- nrecl = q->qi_dqreclaims;
- mutex_unlock(&q->qi_dqlist_lock);
- mutex_lock(&dqp->q_hash->qh_lock);
- mutex_lock(&q->qi_dqlist_lock);
-
- /*
- * XXXTheoretically, we can get into a very long
- * ping pong game here.
- * No one can be adding dquots to the mplist at
- * this point, but somebody might be taking things off.
- */
- if (nrecl != q->qi_dqreclaims) {
- mutex_unlock(&dqp->q_hash->qh_lock);
- goto again;
- }
+ xfs_dqlock(dqp);
+ if ((dqp->dq_flags & dqtype) != 0 &&
+ !(dqp->dq_flags & XFS_DQ_FREEING)) {
+ if (dqp->q_nrefs == 0) {
+ dqp->dq_flags |= XFS_DQ_FREEING;
+ list_move_tail(&dqp->q_mplist, &dispose_list);
+ } else
+ nmisses++;
}
-
- /*
- * Take the dquot off the mplist and hashlist. It may remain on
- * freelist in INACTIVE state.
- */
- nmisses += xfs_qm_dqpurge(dqp);
+ xfs_dqunlock(dqp);
}
mutex_unlock(&q->qi_dqlist_lock);
+
+ list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
+ xfs_qm_dqpurge(dqp);
+
return nmisses;
}
@@ -648,12 +593,9 @@ xfs_qm_dqattach_one(
*/
dqp = udqhint->q_gdquot;
if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
- xfs_dqlock(dqp);
- XFS_DQHOLD(dqp);
ASSERT(*IO_idqpp == NULL);
- *IO_idqpp = dqp;
- xfs_dqunlock(dqp);
+ *IO_idqpp = xfs_qm_dqhold(dqp);
xfs_dqunlock(udqhint);
return 0;
}
@@ -674,7 +616,8 @@ xfs_qm_dqattach_one(
* disk and we didn't ask it to allocate;
* ESRCH if quotas got turned off suddenly.
*/
- error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+ error = xfs_qm_dqget(ip->i_mount, ip, id, type,
+ doalloc | XFS_QMOPT_DOWARN, &dqp);
if (error)
return error;
@@ -692,11 +635,7 @@ xfs_qm_dqattach_one(
/*
* Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering constraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
+ * udquot as a hint for future lookups.
*/
STATIC void
xfs_qm_dqattach_grouphint(
@@ -707,45 +646,17 @@ xfs_qm_dqattach_grouphint(
xfs_dqlock(udq);
- if ((tmp = udq->q_gdquot)) {
- if (tmp == gdq) {
- xfs_dqunlock(udq);
- return;
- }
+ tmp = udq->q_gdquot;
+ if (tmp) {
+ if (tmp == gdq)
+ goto done;
udq->q_gdquot = NULL;
- /*
- * We can't keep any dqlocks when calling dqrele,
- * because the freelist lock comes before dqlocks.
- */
- xfs_dqunlock(udq);
- /*
- * we took a hard reference once upon a time in dqget,
- * so give it back when the udquot no longer points at it
- * dqput() does the unlocking of the dquot.
- */
xfs_qm_dqrele(tmp);
-
- xfs_dqlock(udq);
- xfs_dqlock(gdq);
-
- } else {
- ASSERT(XFS_DQ_IS_LOCKED(udq));
- xfs_dqlock(gdq);
- }
-
- ASSERT(XFS_DQ_IS_LOCKED(udq));
- ASSERT(XFS_DQ_IS_LOCKED(gdq));
- /*
- * Somebody could have attached a gdquot here,
- * when we dropped the uqlock. If so, just do nothing.
- */
- if (udq->q_gdquot == NULL) {
- XFS_DQHOLD(gdq);
- udq->q_gdquot = gdq;
}
- xfs_dqunlock(gdq);
+ udq->q_gdquot = xfs_qm_dqhold(gdq);
+done:
xfs_dqunlock(udq);
}
@@ -812,17 +723,13 @@ xfs_qm_dqattach_locked(
ASSERT(ip->i_gdquot);
/*
- * We may or may not have the i_udquot locked at this point,
- * but this check is OK since we don't depend on the i_gdquot to
- * be accurate 100% all the time. It is just a hint, and this
- * will succeed in general.
- */
- if (ip->i_udquot->q_gdquot == ip->i_gdquot)
- goto done;
- /*
- * Attach i_gdquot to the gdquot hint inside the i_udquot.
+ * We do not have i_udquot locked at this point, but this check
+ * is OK since we don't depend on the i_gdquot to be accurate
+ * 100% all the time. It is just a hint, and this will
+ * succeed in general.
*/
- xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
+ if (ip->i_udquot->q_gdquot != ip->i_gdquot)
+ xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
}
done:
@@ -878,100 +785,6 @@ xfs_qm_dqdetach(
}
}
-int
-xfs_qm_sync(
- struct xfs_mount *mp,
- int flags)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- int recl, restarts;
- struct xfs_dquot *dqp;
- int error;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
- return 0;
-
- restarts = 0;
-
- again:
- mutex_lock(&q->qi_dqlist_lock);
- /*
- * dqpurge_all() also takes the mplist lock and iterate thru all dquots
- * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
- * when we have the mplist lock, we know that dquots will be consistent
- * as long as we have it locked.
- */
- if (!XFS_IS_QUOTA_ON(mp)) {
- mutex_unlock(&q->qi_dqlist_lock);
- return 0;
- }
- ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
- list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
- /*
- * If this is vfs_sync calling, then skip the dquots that
- * don't 'seem' to be dirty. ie. don't acquire dqlock.
- * This is very similar to what xfs_sync does with inodes.
- */
- if (flags & SYNC_TRYLOCK) {
- if (!XFS_DQ_IS_DIRTY(dqp))
- continue;
- if (!xfs_qm_dqlock_nowait(dqp))
- continue;
- } else {
- xfs_dqlock(dqp);
- }
-
- /*
- * Now, find out for sure if this dquot is dirty or not.
- */
- if (! XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqunlock(dqp);
- continue;
- }
-
- /* XXX a sentinel would be better */
- recl = q->qi_dqreclaims;
- if (!xfs_dqflock_nowait(dqp)) {
- if (flags & SYNC_TRYLOCK) {
- xfs_dqunlock(dqp);
- continue;
- }
- /*
- * If we can't grab the flush lock then if the caller
- * really wanted us to give this our best shot, so
- * see if we can give a push to the buffer before we wait
- * on the flush lock. At this point, we know that
- * even though the dquot is being flushed,
- * it has (new) dirty data.
- */
- xfs_qm_dqflock_pushbuf_wait(dqp);
- }
- /*
- * Let go of the mplist lock. We don't want to hold it
- * across a disk write
- */
- mutex_unlock(&q->qi_dqlist_lock);
- error = xfs_qm_dqflush(dqp, flags);
- xfs_dqunlock(dqp);
- if (error && XFS_FORCED_SHUTDOWN(mp))
- return 0; /* Need to prevent umount failure */
- else if (error)
- return error;
-
- mutex_lock(&q->qi_dqlist_lock);
- if (recl != q->qi_dqreclaims) {
- if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
- break;
-
- mutex_unlock(&q->qi_dqlist_lock);
- goto again;
- }
- }
-
- mutex_unlock(&q->qi_dqlist_lock);
- return 0;
-}
-
/*
* The hash chains and the mplist use the same xfs_dqhash structure as
* their list head, but we can take the mplist qh_lock and one of the
@@ -1033,18 +846,21 @@ xfs_qm_init_quotainfo(
/*
* We try to get the limits from the superuser's limits fields.
* This is quite hacky, but it is standard quota practice.
+ *
* We look at the USR dquot with id == 0 first, but if user quotas
* are not enabled we goto the GRP dquot with id == 0.
* We don't really care to keep separate default limits for user
* and group quotas, at least not at this point.
+ *
+ * Since we may not have done a quotacheck by this point, just read
+ * the dquot without attaching it to any hashtables or lists.
*/
- error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
- XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
- (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
- XFS_DQ_PROJ),
- XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
- &dqp);
- if (! error) {
+ error = xfs_qm_dqread(mp, 0,
+ XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
+ (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
+ XFS_DQ_PROJ),
+ XFS_QMOPT_DOWARN, &dqp);
+ if (!error) {
xfs_disk_dquot_t *ddqp = &dqp->q_core;
/*
@@ -1071,11 +887,6 @@ xfs_qm_init_quotainfo(
qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
- /*
- * We sent the XFS_QMOPT_DQSUSER flag to dqget because
- * we don't want this dquot cached. We haven't done a
- * quotacheck yet, and quotacheck doesn't like incore dquots.
- */
xfs_qm_dqdestroy(dqp);
} else {
qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -1660,7 +1471,7 @@ xfs_qm_quotacheck(
* successfully.
*/
if (!error)
- error = xfs_qm_dqflush_all(mp, 0);
+ error = xfs_qm_dqflush_all(mp);
/*
* We can get this error if we couldn't do a dquot allocation inside
@@ -1792,59 +1603,33 @@ xfs_qm_init_quotainos(
/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
+ * Pop the least recently used dquot off the freelist and recycle it.
*/
-STATIC xfs_dquot_t *
+STATIC struct xfs_dquot *
xfs_qm_dqreclaim_one(void)
{
- xfs_dquot_t *dqpout;
- xfs_dquot_t *dqp;
- int restarts;
- int startagain;
-
- restarts = 0;
- dqpout = NULL;
+ struct xfs_dquot *dqp;
+ int restarts = 0;
- /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-again:
- startagain = 0;
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
+restart:
list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
struct xfs_mount *mp = dqp->q_mount;
- xfs_dqlock(dqp);
+
+ if (!xfs_dqlock_nowait(dqp))
+ continue;
/*
- * We are racing with dqlookup here. Naturally we don't
- * want to reclaim a dquot that lookup wants. We release the
- * freelist lock and start over, so that lookup will grab
- * both the dquot and the freelistlock.
+ * This dquot has already been grabbed by dqlookup.
+ * Remove it from the freelist and try again.
*/
- if (dqp->dq_flags & XFS_DQ_WANT) {
- ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-
+ if (dqp->q_nrefs) {
trace_xfs_dqreclaim_want(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dqwants);
- restarts++;
- startagain = 1;
- goto dqunlock;
- }
- /*
- * If the dquot is inactive, we are assured that it is
- * not on the mplist or the hashlist, and that makes our
- * life easier.
- */
- if (dqp->dq_flags & XFS_DQ_INACTIVE) {
- ASSERT(mp == NULL);
- ASSERT(! XFS_DQ_IS_DIRTY(dqp));
- ASSERT(list_empty(&dqp->q_hashlist));
- ASSERT(list_empty(&dqp->q_mplist));
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
- dqpout = dqp;
- XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+ restarts++;
goto dqunlock;
}
@@ -1873,64 +1658,49 @@ again:
* We flush it delayed write, so don't bother
* releasing the freelist lock.
*/
- error = xfs_qm_dqflush(dqp, 0);
+ error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
if (error) {
xfs_warn(mp, "%s: dquot %p flush failed",
__func__, dqp);
}
goto dqunlock;
}
+ xfs_dqfunlock(dqp);
/*
- * We're trying to get the hashlock out of order. This races
- * with dqlookup; so, we giveup and goto the next dquot if
- * we couldn't get the hashlock. This way, we won't starve
- * a dqlookup process that holds the hashlock that is
- * waiting for the freelist lock.
+ * Prevent lookup now that we are going to reclaim the dquot.
+ * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
+ * thus we can drop the lock now.
*/
- if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
- restarts++;
- goto dqfunlock;
- }
+ dqp->dq_flags |= XFS_DQ_FREEING;
+ xfs_dqunlock(dqp);
- /*
- * This races with dquot allocation code as well as dqflush_all
- * and reclaim code. So, if we failed to grab the mplist lock,
- * giveup everything and start over.
- */
- if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
- restarts++;
- startagain = 1;
- goto qhunlock;
- }
+ mutex_lock(&dqp->q_hash->qh_lock);
+ list_del_init(&dqp->q_hashlist);
+ dqp->q_hash->qh_version++;
+ mutex_unlock(&dqp->q_hash->qh_lock);
- ASSERT(dqp->q_nrefs == 0);
+ mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
mp->m_quotainfo->qi_dquots--;
mp->m_quotainfo->qi_dqreclaims++;
- list_del_init(&dqp->q_hashlist);
- dqp->q_hash->qh_version++;
+ mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+
+ ASSERT(dqp->q_nrefs == 0);
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
- dqpout = dqp;
- mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-qhunlock:
- mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
- xfs_dqfunlock(dqp);
+
+ mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+ return dqp;
dqunlock:
xfs_dqunlock(dqp);
- if (dqpout)
- break;
if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
break;
- if (startagain) {
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- goto again;
- }
+ goto restart;
}
+
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- return dqpout;
+ return NULL;
}
/*
@@ -2150,10 +1920,7 @@ xfs_qm_vop_dqalloc(
* this to caller
*/
ASSERT(ip->i_udquot);
- uq = ip->i_udquot;
- xfs_dqlock(uq);
- XFS_DQHOLD(uq);
- xfs_dqunlock(uq);
+ uq = xfs_qm_dqhold(ip->i_udquot);
}
}
if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
@@ -2174,10 +1941,7 @@ xfs_qm_vop_dqalloc(
xfs_ilock(ip, lockflags);
} else {
ASSERT(ip->i_gdquot);
- gq = ip->i_gdquot;
- xfs_dqlock(gq);
- XFS_DQHOLD(gq);
- xfs_dqunlock(gq);
+ gq = xfs_qm_dqhold(ip->i_gdquot);
}
} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
if (xfs_get_projid(ip) != prid) {
@@ -2197,10 +1961,7 @@ xfs_qm_vop_dqalloc(
xfs_ilock(ip, lockflags);
} else {
ASSERT(ip->i_gdquot);
- gq = ip->i_gdquot;
- xfs_dqlock(gq);
- XFS_DQHOLD(gq);
- xfs_dqunlock(gq);
+ gq = xfs_qm_dqhold(ip->i_gdquot);
}
}
if (uq)
@@ -2250,14 +2011,10 @@ xfs_qm_vop_chown(
xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
/*
- * Take an extra reference, because the inode
- * is going to keep this dquot pointer even
- * after the trans_commit.
+ * Take an extra reference, because the inode is going to keep
+ * this dquot pointer even after the trans_commit.
*/
- xfs_dqlock(newdq);
- XFS_DQHOLD(newdq);
- xfs_dqunlock(newdq);
- *IO_olddq = newdq;
+ *IO_olddq = xfs_qm_dqhold(newdq);
return prevdq;
}
@@ -2389,25 +2146,21 @@ xfs_qm_vop_create_dqattach(
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
if (udqp) {
- xfs_dqlock(udqp);
- XFS_DQHOLD(udqp);
- xfs_dqunlock(udqp);
ASSERT(ip->i_udquot == NULL);
- ip->i_udquot = udqp;
ASSERT(XFS_IS_UQUOTA_ON(mp));
ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
+
+ ip->i_udquot = xfs_qm_dqhold(udqp);
xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
if (gdqp) {
- xfs_dqlock(gdqp);
- XFS_DQHOLD(gdqp);
- xfs_dqunlock(gdqp);
ASSERT(ip->i_gdquot == NULL);
- ip->i_gdquot = gdqp;
ASSERT(XFS_IS_OQUOTA_ON(mp));
ASSERT((XFS_IS_GQUOTA_ON(mp) ?
ip->i_d.di_gid : xfs_get_projid(ip)) ==
be32_to_cpu(gdqp->q_core.d_id));
+
+ ip->i_gdquot = xfs_qm_dqhold(gdqp);
xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
}
}
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 43b9abe1052c..9b4f3adefbc5 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -33,12 +33,6 @@ extern kmem_zone_t *qm_dqzone;
extern kmem_zone_t *qm_dqtrxzone;
/*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS 7
-
-/*
* Ditto, for xfs_qm_dqreclaim_one.
*/
#define XFS_QM_RECLAIM_MAX_RESTARTS 4
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index a595f29567fe..8a0807e0f979 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,8 +87,7 @@ typedef struct xfs_dqblk {
#define XFS_DQ_PROJ 0x0002 /* project quota */
#define XFS_DQ_GROUP 0x0004 /* a group quota */
#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
-#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */
-#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */
+#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
@@ -97,8 +96,7 @@ typedef struct xfs_dqblk {
{ XFS_DQ_PROJ, "PROJ" }, \
{ XFS_DQ_GROUP, "GROUP" }, \
{ XFS_DQ_DIRTY, "DIRTY" }, \
- { XFS_DQ_WANT, "WANT" }, \
- { XFS_DQ_INACTIVE, "INACTIVE" }
+ { XFS_DQ_FREEING, "FREEING" }
/*
* In the worst case, when both user and group quotas are on,
@@ -199,7 +197,6 @@ typedef struct xfs_qoff_logformat {
#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */
#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
-#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */
#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
@@ -326,7 +323,6 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
extern void xfs_qm_dqdetach(struct xfs_inode *);
extern void xfs_qm_dqrele(struct xfs_dquot *);
extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
-extern int xfs_qm_sync(struct xfs_mount *, int);
extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
extern void xfs_qm_mount_quotas(struct xfs_mount *);
extern void xfs_qm_unmount(struct xfs_mount *);
@@ -366,10 +362,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
#define xfs_qm_dqdetach(ip)
#define xfs_qm_dqrele(d)
#define xfs_qm_statvfs(ip, s)
-static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
-{
- return 0;
-}
#define xfs_qm_newmount(mp, a, b) (0)
#define xfs_qm_mount_quotas(mp)
#define xfs_qm_unmount(mp)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 3eca58f51ae9..281961c1d81a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -199,7 +199,6 @@ xfs_parseargs(
mp->m_flags |= XFS_MOUNT_BARRIER;
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- mp->m_flags |= XFS_MOUNT_DELAYLOG;
/*
* These can be overridden by the mount option parsing.
@@ -353,11 +352,11 @@ xfs_parseargs(
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
mp->m_qflags &= ~XFS_OQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
- mp->m_flags |= XFS_MOUNT_DELAYLOG;
+ xfs_warn(mp,
+ "delaylog is the default now, option is deprecated.");
} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
- mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
xfs_warn(mp,
- "nodelaylog is deprecated and will be removed in Linux 3.3");
+ "nodelaylog support has been removed, option is deprecated.");
} else if (!strcmp(this_char, MNTOPT_DISCARD)) {
mp->m_flags |= XFS_MOUNT_DISCARD;
} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
@@ -395,13 +394,6 @@ xfs_parseargs(
return EINVAL;
}
- if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
- !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
- xfs_warn(mp,
- "the discard option is incompatible with the nodelaylog option");
- return EINVAL;
- }
-
#ifndef CONFIG_XFS_QUOTA
if (XFS_IS_QUOTA_RUNNING(mp)) {
xfs_warn(mp, "quota support not available in this kernel.");
@@ -501,7 +493,6 @@ xfs_showargs(
{ XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
{ XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
- { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
{ XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
{ 0, NULL }
};
@@ -869,27 +860,6 @@ xfs_fs_dirty_inode(
}
STATIC int
-xfs_log_inode(
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- return xfs_trans_commit(tp, 0);
-}
-
-STATIC int
xfs_fs_write_inode(
struct inode *inode,
struct writeback_control *wbc)
@@ -902,10 +872,8 @@ xfs_fs_write_inode(
if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
- if (!ip->i_update_core)
- return 0;
- if (wbc->sync_mode == WB_SYNC_ALL) {
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
/*
* Make sure the inode has made it it into the log. Instead
* of forcing it all the way to stable storage using a
@@ -913,11 +881,14 @@ xfs_fs_write_inode(
* ->sync_fs call do that for thus, which reduces the number
* of synchronous log forces dramatically.
*/
- error = xfs_log_inode(ip);
+ error = xfs_log_dirty_inode(ip, NULL, 0);
if (error)
goto out;
return 0;
} else {
+ if (!ip->i_update_core)
+ return 0;
+
/*
* We make this non-blocking if the inode is contended, return
* EAGAIN to indicate to the caller that they did not succeed.
@@ -1034,17 +1005,10 @@ xfs_fs_sync_fs(
int error;
/*
- * Not much we can do for the first async pass. Writing out the
- * superblock would be counter-productive as we are going to redirty
- * when writing out other data and metadata (and writing out a single
- * block is quite fast anyway).
- *
- * Try to asynchronously kick off quota syncing at least.
+ * Doing anything during the async pass would be counterproductive.
*/
- if (!wait) {
- xfs_qm_sync(mp, SYNC_TRYLOCK);
+ if (!wait)
return 0;
- }
error = xfs_quiesce_data(mp);
if (error)
@@ -1258,9 +1222,9 @@ xfs_fs_unfreeze(
STATIC int
xfs_fs_show_options(
struct seq_file *m,
- struct vfsmount *mnt)
+ struct dentry *root)
{
- return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
+ return -xfs_showargs(XFS_M(root->d_sb), m);
}
/*
@@ -1641,12 +1605,12 @@ STATIC int __init
xfs_init_workqueues(void)
{
/*
- * max_active is set to 8 to give enough concurency to allow
- * multiple work operations on each CPU to run. This allows multiple
- * filesystems to be running sync work concurrently, and scales with
- * the number of CPUs in the system.
+ * We never want to the same work item to run twice, reclaiming inodes
+ * or idling the log is not going to get any faster by multiple CPUs
+ * competing for ressources. Use the default large max_active value
+ * so that even lots of filesystems can perform these task in parallel.
*/
- xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
+ xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
if (!xfs_syncd_wq)
return -ENOMEM;
return 0;
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index aa3dc1a4d53d..72c01a1c16e7 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -336,6 +336,32 @@ xfs_sync_fsdata(
return error;
}
+int
+xfs_log_dirty_inode(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+
+ if (!ip->i_update_core)
+ return 0;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return xfs_trans_commit(tp, 0);
+}
+
/*
* When remounting a filesystem read-only or freezing the filesystem, we have
* two phases to execute. This first phase is syncing the data before we
@@ -359,10 +385,17 @@ xfs_quiesce_data(
{
int error, error2 = 0;
- xfs_qm_sync(mp, SYNC_TRYLOCK);
- xfs_qm_sync(mp, SYNC_WAIT);
+ /*
+ * Log all pending size and timestamp updates. The vfs writeback
+ * code is supposed to do this, but due to its overagressive
+ * livelock detection it will skip inodes where appending writes
+ * were written out in the first non-blocking sync phase if their
+ * completion took long enough that it happened after taking the
+ * timestamp for the cut-off in the blocking phase.
+ */
+ xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
- /* force out the newly dirtied log buffers */
+ /* force out the log */
xfs_log_force(mp, XFS_LOG_SYNC);
/* write superblock and hoover up shutdown errors */
@@ -470,7 +503,6 @@ xfs_sync_worker(
error = xfs_fs_log_dummy(mp);
else
xfs_log_force(mp, 0);
- error = xfs_qm_sync(mp, SYNC_TRYLOCK);
/* start pushing all the metadata that is currently dirty */
xfs_ail_push_all(mp->m_ail);
@@ -770,6 +802,17 @@ restart:
if (!xfs_iflock_nowait(ip)) {
if (!(sync_mode & SYNC_WAIT))
goto out;
+
+ /*
+ * If we only have a single dirty inode in a cluster there is
+ * a fair chance that the AIL push may have pushed it into
+ * the buffer, but xfsbufd won't touch it until 30 seconds
+ * from now, and thus we will lock up here.
+ *
+ * Promote the inode buffer to the front of the delwri list
+ * and wake up xfsbufd now.
+ */
+ xfs_promote_inode(ip);
xfs_iflock(ip);
}
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 941202e7ac6e..fa965479d788 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -34,6 +34,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
+int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
+
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
int xfs_reclaim_inodes_count(struct xfs_mount *mp);
void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f1d2802b2f07..a9d5b1e06efe 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -743,8 +743,6 @@ DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
DEFINE_DQUOT_EVENT(xfs_dqread);
DEFINE_DQUOT_EVENT(xfs_dqread_fail);
DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
DEFINE_DQUOT_EVENT(xfs_dqget_hit);
DEFINE_DQUOT_EVENT(xfs_dqget_miss);
@@ -834,18 +832,14 @@ DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 1f35b2feca97..329b06aba1c2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1158,7 +1158,6 @@ xfs_trans_add_item(
lidp->lid_item = lip;
lidp->lid_flags = 0;
- lidp->lid_size = 0;
list_add_tail(&lidp->lid_trans, &tp->t_items);
lip->li_desc = lidp;
@@ -1210,219 +1209,6 @@ xfs_trans_free_items(
}
}
-/*
- * Unlock the items associated with a transaction.
- *
- * Items which were not logged should be freed. Those which were logged must
- * still be tracked so they can be unpinned when the transaction commits.
- */
-STATIC void
-xfs_trans_unlock_items(
- struct xfs_trans *tp,
- xfs_lsn_t commit_lsn)
-{
- struct xfs_log_item_desc *lidp, *next;
-
- list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
- struct xfs_log_item *lip = lidp->lid_item;
-
- lip->li_desc = NULL;
-
- if (commit_lsn != NULLCOMMITLSN)
- IOP_COMMITTING(lip, commit_lsn);
- IOP_UNLOCK(lip);
-
- /*
- * Free the descriptor if the item is not dirty
- * within this transaction.
- */
- if (!(lidp->lid_flags & XFS_LID_DIRTY))
- xfs_trans_free_item_desc(lidp);
- }
-}
-
-/*
- * Total up the number of log iovecs needed to commit this
- * transaction. The transaction itself needs one for the
- * transaction header. Ask each dirty item in turn how many
- * it needs to get the total.
- */
-static uint
-xfs_trans_count_vecs(
- struct xfs_trans *tp)
-{
- int nvecs;
- struct xfs_log_item_desc *lidp;
-
- nvecs = 1;
-
- /* In the non-debug case we need to start bailing out if we
- * didn't find a log_item here, return zero and let trans_commit
- * deal with it.
- */
- if (list_empty(&tp->t_items)) {
- ASSERT(0);
- return 0;
- }
-
- list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- /*
- * Skip items which aren't dirty in this transaction.
- */
- if (!(lidp->lid_flags & XFS_LID_DIRTY))
- continue;
- lidp->lid_size = IOP_SIZE(lidp->lid_item);
- nvecs += lidp->lid_size;
- }
-
- return nvecs;
-}
-
-/*
- * Fill in the vector with pointers to data to be logged
- * by this transaction. The transaction header takes
- * the first vector, and then each dirty item takes the
- * number of vectors it indicated it needed in xfs_trans_count_vecs().
- *
- * As each item fills in the entries it needs, also pin the item
- * so that it cannot be flushed out until the log write completes.
- */
-static void
-xfs_trans_fill_vecs(
- struct xfs_trans *tp,
- struct xfs_log_iovec *log_vector)
-{
- struct xfs_log_item_desc *lidp;
- struct xfs_log_iovec *vecp;
- uint nitems;
-
- /*
- * Skip over the entry for the transaction header, we'll
- * fill that in at the end.
- */
- vecp = log_vector + 1;
-
- nitems = 0;
- ASSERT(!list_empty(&tp->t_items));
- list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- /* Skip items which aren't dirty in this transaction. */
- if (!(lidp->lid_flags & XFS_LID_DIRTY))
- continue;
-
- /*
- * The item may be marked dirty but not log anything. This can
- * be used to get called when a transaction is committed.
- */
- if (lidp->lid_size)
- nitems++;
- IOP_FORMAT(lidp->lid_item, vecp);
- vecp += lidp->lid_size;
- IOP_PIN(lidp->lid_item);
- }
-
- /*
- * Now that we've counted the number of items in this transaction, fill
- * in the transaction header. Note that the transaction header does not
- * have a log item.
- */
- tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
- tp->t_header.th_type = tp->t_type;
- tp->t_header.th_num_items = nitems;
- log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
- log_vector->i_len = sizeof(xfs_trans_header_t);
- log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
-}
-
-/*
- * The committed item processing consists of calling the committed routine of
- * each logged item, updating the item's position in the AIL if necessary, and
- * unpinning each item. If the committed routine returns -1, then do nothing
- * further with the item because it may have been freed.
- *
- * Since items are unlocked when they are copied to the incore log, it is
- * possible for two transactions to be completing and manipulating the same
- * item simultaneously. The AIL lock will protect the lsn field of each item.
- * The value of this field can never go backwards.
- *
- * We unpin the items after repositioning them in the AIL, because otherwise
- * they could be immediately flushed and we'd have to race with the flusher
- * trying to pull the item from the AIL as we add it.
- */
-static void
-xfs_trans_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn,
- int aborted)
-{
- xfs_lsn_t item_lsn;
- struct xfs_ail *ailp;
-
- if (aborted)
- lip->li_flags |= XFS_LI_ABORTED;
- item_lsn = IOP_COMMITTED(lip, commit_lsn);
-
- /* item_lsn of -1 means the item needs no further processing */
- if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
- return;
-
- /*
- * If the returned lsn is greater than what it contained before, update
- * the location of the item in the AIL. If it is not, then do nothing.
- * Items can never move backwards in the AIL.
- *
- * While the new lsn should usually be greater, it is possible that a
- * later transaction completing simultaneously with an earlier one
- * using the same item could complete first with a higher lsn. This
- * would cause the earlier transaction to fail the test below.
- */
- ailp = lip->li_ailp;
- spin_lock(&ailp->xa_lock);
- if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
- /*
- * This will set the item's lsn to item_lsn and update the
- * position of the item in the AIL.
- *
- * xfs_trans_ail_update() drops the AIL lock.
- */
- xfs_trans_ail_update(ailp, lip, item_lsn);
- } else {
- spin_unlock(&ailp->xa_lock);
- }
-
- /*
- * Now that we've repositioned the item in the AIL, unpin it so it can
- * be flushed. Pass information about buffer stale state down from the
- * log item flags, if anyone else stales the buffer we do not want to
- * pay any attention to it.
- */
- IOP_UNPIN(lip, 0);
-}
-
-/*
- * This is typically called by the LM when a transaction has been fully
- * committed to disk. It needs to unpin the items which have
- * been logged by the transaction and update their positions
- * in the AIL if necessary.
- *
- * This also gets called when the transactions didn't get written out
- * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
- */
-STATIC void
-xfs_trans_committed(
- void *arg,
- int abortflag)
-{
- struct xfs_trans *tp = arg;
- struct xfs_log_item_desc *lidp, *next;
-
- list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
- xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
- xfs_trans_free_item_desc(lidp);
- }
-
- xfs_trans_free(tp);
-}
-
static inline void
xfs_log_item_batch_insert(
struct xfs_ail *ailp,
@@ -1538,258 +1324,6 @@ xfs_trans_committed_bulk(
}
/*
- * Called from the trans_commit code when we notice that the filesystem is in
- * the middle of a forced shutdown.
- *
- * When we are called here, we have already pinned all the items in the
- * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
- * so we can simply walk the items in the transaction, unpin them with an abort
- * flag and then free the items. Note that unpinning the items can result in
- * them being freed immediately, so we need to use a safe list traversal method
- * here.
- */
-STATIC void
-xfs_trans_uncommit(
- struct xfs_trans *tp,
- uint flags)
-{
- struct xfs_log_item_desc *lidp, *n;
-
- list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
- if (lidp->lid_flags & XFS_LID_DIRTY)
- IOP_UNPIN(lidp->lid_item, 1);
- }
-
- xfs_trans_unreserve_and_mod_sb(tp);
- xfs_trans_unreserve_and_mod_dquots(tp);
-
- xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
- xfs_trans_free(tp);
-}
-
-/*
- * Format the transaction direct to the iclog. This isolates the physical
- * transaction commit operation from the logical operation and hence allows
- * other methods to be introduced without affecting the existing commit path.
- */
-static int
-xfs_trans_commit_iclog(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- xfs_lsn_t *commit_lsn,
- int flags)
-{
- int shutdown;
- int error;
- int log_flags = 0;
- struct xlog_in_core *commit_iclog;
-#define XFS_TRANS_LOGVEC_COUNT 16
- struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
- struct xfs_log_iovec *log_vector;
- uint nvec;
-
-
- /*
- * Ask each log item how many log_vector entries it will
- * need so we can figure out how many to allocate.
- * Try to avoid the kmem_alloc() call in the common case
- * by using a vector from the stack when it fits.
- */
- nvec = xfs_trans_count_vecs(tp);
- if (nvec == 0) {
- return ENOMEM; /* triggers a shutdown! */
- } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
- log_vector = log_vector_fast;
- } else {
- log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec *
- sizeof(xfs_log_iovec_t),
- KM_SLEEP);
- }
-
- /*
- * Fill in the log_vector and pin the logged items, and
- * then write the transaction to the log.
- */
- xfs_trans_fill_vecs(tp, log_vector);
-
- if (flags & XFS_TRANS_RELEASE_LOG_RES)
- log_flags = XFS_LOG_REL_PERM_RESERV;
-
- error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
-
- /*
- * The transaction is committed incore here, and can go out to disk
- * at any time after this call. However, all the items associated
- * with the transaction are still locked and pinned in memory.
- */
- *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
-
- tp->t_commit_lsn = *commit_lsn;
- trace_xfs_trans_commit_lsn(tp);
-
- if (nvec > XFS_TRANS_LOGVEC_COUNT)
- kmem_free(log_vector);
-
- /*
- * If we got a log write error. Unpin the logitems that we
- * had pinned, clean up, free trans structure, and return error.
- */
- if (error || *commit_lsn == -1) {
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
- xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
- return XFS_ERROR(EIO);
- }
-
- /*
- * Once the transaction has committed, unused
- * reservations need to be released and changes to
- * the superblock need to be reflected in the in-core
- * version. Do that now.
- */
- xfs_trans_unreserve_and_mod_sb(tp);
-
- /*
- * Tell the LM to call the transaction completion routine
- * when the log write with LSN commit_lsn completes (e.g.
- * when the transaction commit really hits the on-disk log).
- * After this call we cannot reference tp, because the call
- * can happen at any time and the call will free the transaction
- * structure pointed to by tp. The only case where we call
- * the completion routine (xfs_trans_committed) directly is
- * if the log is turned off on a debug kernel or we're
- * running in simulation mode (the log is explicitly turned
- * off).
- */
- tp->t_logcb.cb_func = xfs_trans_committed;
- tp->t_logcb.cb_arg = tp;
-
- /*
- * We need to pass the iclog buffer which was used for the
- * transaction commit record into this function, and attach
- * the callback to it. The callback must be attached before
- * the items are unlocked to avoid racing with other threads
- * waiting for an item to unlock.
- */
- shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb));
-
- /*
- * Mark this thread as no longer being in a transaction
- */
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-
- /*
- * Once all the items of the transaction have been copied
- * to the in core log and the callback is attached, the
- * items can be unlocked.
- *
- * This will free descriptors pointing to items which were
- * not logged since there is nothing more to do with them.
- * For items which were logged, we will keep pointers to them
- * so they can be unpinned after the transaction commits to disk.
- * This will also stamp each modified meta-data item with
- * the commit lsn of this transaction for dependency tracking
- * purposes.
- */
- xfs_trans_unlock_items(tp, *commit_lsn);
-
- /*
- * If we detected a log error earlier, finish committing
- * the transaction now (unpin log items, etc).
- *
- * Order is critical here, to avoid using the transaction
- * pointer after its been freed (by xfs_trans_committed
- * either here now, or as a callback). We cannot do this
- * step inside xfs_log_notify as was done earlier because
- * of this issue.
- */
- if (shutdown)
- xfs_trans_committed(tp, XFS_LI_ABORTED);
-
- /*
- * Now that the xfs_trans_committed callback has been attached,
- * and the items are released we can finally allow the iclog to
- * go to disk.
- */
- return xfs_log_release_iclog(mp, commit_iclog);
-}
-
-/*
- * Walk the log items and allocate log vector structures for
- * each item large enough to fit all the vectors they require.
- * Note that this format differs from the old log vector format in
- * that there is no transaction header in these log vectors.
- */
-STATIC struct xfs_log_vec *
-xfs_trans_alloc_log_vecs(
- xfs_trans_t *tp)
-{
- struct xfs_log_item_desc *lidp;
- struct xfs_log_vec *lv = NULL;
- struct xfs_log_vec *ret_lv = NULL;
-
-
- /* Bail out if we didn't find a log item. */
- if (list_empty(&tp->t_items)) {
- ASSERT(0);
- return NULL;
- }
-
- list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- struct xfs_log_vec *new_lv;
-
- /* Skip items which aren't dirty in this transaction. */
- if (!(lidp->lid_flags & XFS_LID_DIRTY))
- continue;
-
- /* Skip items that do not have any vectors for writing */
- lidp->lid_size = IOP_SIZE(lidp->lid_item);
- if (!lidp->lid_size)
- continue;
-
- new_lv = kmem_zalloc(sizeof(*new_lv) +
- lidp->lid_size * sizeof(struct xfs_log_iovec),
- KM_SLEEP);
-
- /* The allocated iovec region lies beyond the log vector. */
- new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
- new_lv->lv_niovecs = lidp->lid_size;
- new_lv->lv_item = lidp->lid_item;
- if (!ret_lv)
- ret_lv = new_lv;
- else
- lv->lv_next = new_lv;
- lv = new_lv;
- }
-
- return ret_lv;
-}
-
-static int
-xfs_trans_commit_cil(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- xfs_lsn_t *commit_lsn,
- int flags)
-{
- struct xfs_log_vec *log_vector;
-
- /*
- * Get each log item to allocate a vector structure for
- * the log item to to pass to the log write code. The
- * CIL commit code will format the vector and save it away.
- */
- log_vector = xfs_trans_alloc_log_vecs(tp);
- if (!log_vector)
- return ENOMEM;
-
- xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
-
- current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
- xfs_trans_free(tp);
- return 0;
-}
-
-/*
* Commit the given transaction to the log.
*
* XFS disk error handling mechanism is not based on a typical
@@ -1845,17 +1379,16 @@ xfs_trans_commit(
xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp);
- if (mp->m_flags & XFS_MOUNT_DELAYLOG)
- error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
- else
- error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
-
+ error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
if (error == ENOMEM) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
error = XFS_ERROR(EIO);
goto out_unreserve;
}
+ current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ xfs_trans_free(tp);
+
/*
* If the transaction needs to be synchronous, then force the
* log out now and wait for it.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 603f3eb52041..f6118703f20d 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -163,9 +163,8 @@ typedef struct xfs_trans_header {
*/
struct xfs_log_item_desc {
struct xfs_log_item *lid_item;
- ushort lid_size;
- unsigned char lid_flags;
struct list_head lid_trans;
+ unsigned char lid_flags;
};
#define XFS_LID_DIRTY 0x1
@@ -326,7 +325,7 @@ typedef struct xfs_log_item {
struct xfs_log_item *);
/* buffer item iodone */
/* callback func */
- struct xfs_item_ops *li_ops; /* function list */
+ const struct xfs_item_ops *li_ops; /* function list */
/* delayed logging */
struct list_head li_cil; /* CIL pointers */
@@ -341,7 +340,7 @@ typedef struct xfs_log_item {
{ XFS_LI_IN_AIL, "IN_AIL" }, \
{ XFS_LI_ABORTED, "ABORTED" }
-typedef struct xfs_item_ops {
+struct xfs_item_ops {
uint (*iop_size)(xfs_log_item_t *);
void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
void (*iop_pin)(xfs_log_item_t *);
@@ -352,7 +351,7 @@ typedef struct xfs_item_ops {
void (*iop_push)(xfs_log_item_t *);
bool (*iop_pushbuf)(xfs_log_item_t *);
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
-} xfs_item_ops_t;
+};
#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 8b32d1a4c5a1..89dbb4a50872 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -53,7 +53,7 @@ xfs_dir_ialloc(
output: may be a new transaction. */
xfs_inode_t *dp, /* directory within whose allocate
the inode. */
- mode_t mode,
+ umode_t mode,
xfs_nlink_t nlink,
xfs_dev_t rdev,
prid_t prid, /* project id */
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 456fca314933..5eeab4690cfe 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,7 +18,7 @@
#ifndef __XFS_UTILS_H__
#define __XFS_UTILS_H__
-extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
+extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, umode_t, xfs_nlink_t,
xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 4ecf2a549060..f2fea868d4db 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -112,7 +112,7 @@ xfs_readlink(
char *link)
{
xfs_mount_t *mp = ip->i_mount;
- int pathlen;
+ xfs_fsize_t pathlen;
int error = 0;
trace_xfs_readlink(ip);
@@ -122,13 +122,19 @@ xfs_readlink(
xfs_ilock(ip, XFS_ILOCK_SHARED);
- ASSERT(S_ISLNK(ip->i_d.di_mode));
- ASSERT(ip->i_d.di_size <= MAXPATHLEN);
-
pathlen = ip->i_d.di_size;
if (!pathlen)
goto out;
+ if (pathlen < 0 || pathlen > MAXPATHLEN) {
+ xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
+ __func__, (unsigned long long) ip->i_ino,
+ (long long) pathlen);
+ ASSERT(0);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+
+
if (ip->i_df.if_flags & XFS_IFINLINE) {
memcpy(link, ip->i_df.if_u1.if_data, pathlen);
link[pathlen] = '\0';
@@ -816,7 +822,7 @@ int
xfs_create(
xfs_inode_t *dp,
struct xfs_name *name,
- mode_t mode,
+ umode_t mode,
xfs_dev_t rdev,
xfs_inode_t **ipp)
{
@@ -1475,7 +1481,7 @@ xfs_symlink(
xfs_inode_t *dp,
struct xfs_name *link_name,
const char *target_path,
- mode_t mode,
+ umode_t mode,
xfs_inode_t **ipp)
{
xfs_mount_t *mp = dp->i_mount;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 35d3d513e1e9..0c877cbde142 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -26,7 +26,7 @@ int xfs_release(struct xfs_inode *ip);
int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
-int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
+int xfs_create(struct xfs_inode *dp, struct xfs_name *name, umode_t mode,
xfs_dev_t rdev, struct xfs_inode **ipp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
@@ -35,7 +35,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
xfs_off_t *offset, filldir_t filldir);
int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
- const char *target_path, mode_t mode, struct xfs_inode **ipp);
+ const char *target_path, umode_t mode, struct xfs_inode **ipp);
int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
int xfs_change_file_space(struct xfs_inode *ip, int cmd,
xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);