summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2005-11-02 10:33:05 +1100
committerNathan Scott <nathans@sgi.com>2005-11-02 10:33:05 +1100
commite8c8b3a79d85c22d3665b97dde843dc4d8d7ae37 (patch)
treea82b58ebc5c40474cdc53c91bd135b97034de565 /fs
parentee34807a65aa0c5911dc27682863afca780a003e (diff)
downloadlwn-e8c8b3a79d85c22d3665b97dde843dc4d8d7ae37.tar.gz
lwn-e8c8b3a79d85c22d3665b97dde843dc4d8d7ae37.zip
[XFS] Introduce two new mount options (nolargeio/largeio) to allow
filesystems to expose the filesystem stripe width in stat(2) rather than the page cache size. This allows applications requiring high bandwidth to easily determine the optimum I/O size for the underlying filesystem. The default is to report the page cache size (i.e. "nolargeio"). SGI-PV: 942818 SGI-Modid: xfs-linux:xfs-kern:23830a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c1
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_mount.h27
-rw-r--r--fs/xfs/xfs_vfsops.c11
-rw-r--r--fs/xfs/xfs_vnodeops.c35
6 files changed, 43 insertions, 35 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index fa87279405d8..f6f6b6750d4f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -189,7 +189,7 @@ xfs_revalidate_inode(
break;
}
- inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blksize = xfs_preferred_iosize(mp);
inode->i_generation = ip->i_d.di_gen;
i_size_write(inode, ip->i_d.di_size);
inode->i_blocks =
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 268f45bf6a9a..61999649ecef 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -124,6 +124,7 @@ vn_revalidate_core(
inode->i_mtime = vap->va_mtime;
inode->i_ctime = vap->va_ctime;
inode->i_atime = vap->va_atime;
+ inode->i_blksize = vap->va_blocksize;
if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
else
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index c93cb282f3d8..90d9d56c4dc8 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -106,5 +106,7 @@ struct xfs_mount_args {
#define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */
#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
* symlink,mkdir,rmdir,mknod */
+#define XFSMNT_COMPAT_IOSIZE 0x80000000 /* don't report large preferred
+ * I/O size in stat() */
#endif /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0653beecf93a..b71af184aea6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -421,6 +421,9 @@ typedef struct xfs_mount {
* allocation */
#define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */
#define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */
+#define XFS_MOUNT_COMPAT_IOSIZE 0x00400000 /* don't report large preferred
+ * I/O size in stat() */
+
/*
* Default minimum read and write sizes.
@@ -442,6 +445,30 @@ typedef struct xfs_mount {
#define XFS_WSYNC_READIO_LOG 15 /* 32K */
#define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */
+/*
+ * Allow large block sizes to be reported to userspace programs if the
+ * "largeio" mount option is used.
+ *
+ * If compatibility mode is specified, simply return the basic unit of caching
+ * so that we don't get inefficient read/modify/write I/O from user apps.
+ * Otherwise....
+ *
+ * If the underlying volume is a stripe, then return the stripe width in bytes
+ * as the recommended I/O size. It is not a stripe and we've set a default
+ * buffered I/O size, return that, otherwise return the compat default.
+ */
+static inline unsigned long
+xfs_preferred_iosize(xfs_mount_t *mp)
+{
+ if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
+ return PAGE_CACHE_SIZE;
+ return (mp->m_swidth ?
+ (mp->m_swidth << mp->m_sb.sb_blocklog) :
+ ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
+ (1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
+ PAGE_CACHE_SIZE));
+}
+
#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)
#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 9142351df515..7227baee8994 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -307,6 +307,9 @@ xfs_start_flags(
if (ap->flags & XFSMNT_DIRSYNC)
mp->m_flags |= XFS_MOUNT_DIRSYNC;
+ if (ap->flags & XFSMNT_COMPAT_IOSIZE)
+ mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+
/*
* no recovery flag requires a read-only mount
*/
@@ -1645,6 +1648,9 @@ xfs_vget(
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
+#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
+#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes
+ * in stat(). */
STATIC unsigned long
suffix_strtoul(const char *cp, char **endp, unsigned int base)
@@ -1681,6 +1687,7 @@ xfs_parseargs(
int dsunit, dswidth, vol_dsunit, vol_dswidth;
int iosize;
+ args->flags |= XFSMNT_COMPAT_IOSIZE;
#if 0 /* XXX: off by default, until some remaining issues ironed out */
args->flags |= XFSMNT_IDELETE; /* default to on */
#endif
@@ -1809,6 +1816,10 @@ xfs_parseargs(
args->flags &= ~XFSMNT_IDELETE;
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
args->flags |= XFSMNT_IDELETE;
+ } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+ args->flags &= ~XFSMNT_COMPAT_IOSIZE;
+ } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+ args->flags |= XFSMNT_COMPAT_IOSIZE;
} else if (!strcmp(this_char, "osyncisdsync")) {
/* no-op, this is now the default */
printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e2bf2ef58b66..8221b11a48c0 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -181,40 +181,7 @@ xfs_getattr(
vap->va_rdev = 0;
if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
-
-#if 0
- /* Large block sizes confuse various
- * user space programs, so letting the
- * stripe size through is not a good
- * idea for now.
- */
- vap->va_blocksize = mp->m_swidth ?
- /*
- * If the underlying volume is a stripe, then
- * return the stripe width in bytes as the
- * recommended I/O size.
- */
- (mp->m_swidth << mp->m_sb.sb_blocklog) :
- /*
- * Return the largest of the preferred buffer
- * sizes since doing small I/Os into larger
- * buffers causes buffers to be decommissioned.
- * The value returned is in bytes.
- */
- (1 << (int)MAX(mp->m_readio_log,
- mp->m_writeio_log));
-
-#else
- vap->va_blocksize =
- /*
- * Return the largest of the preferred buffer
- * sizes since doing small I/Os into larger
- * buffers causes buffers to be decommissioned.
- * The value returned is in bytes.
- */
- 1 << (int)MAX(mp->m_readio_log,
- mp->m_writeio_log);
-#endif
+ vap->va_blocksize = xfs_preferred_iosize(mp);
} else {
/*