[GFS2] Put back O_DIRECT support

This patch adds back O_DIRECT support with various caveats attached: 1. Journaled data can be read via O_DIRECT since its now the same on disk format as normal data files. 2. Journaled data writes with O_DIRECT will be failed sliently back to normal writes (should we really do this I wonder or should we return an error instead?) 3. Stuffed files will be failed back to normal buffered I/O 4. All the usual corner cases (write beyond current end of file, write to an unallocated block) will also revert to normal buffered I/O. The I/O path is slightly odd as reads arrive at the page cache layer with the lock for the file already held, but writes arrive unlocked. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
author: Steven Whitehouse <swhiteho@redhat.com> 2006-02-14 11:54:42 +0000
committer: Steven Whitehouse <swhiteho@redhat.com> 2006-02-14 11:54:42 +0000
commit: d1665e414297c3a46fd80cb8242ad0c8e82acae7 (patch)
tree: 7cb19fc4cbfc21d6d890dd3b373d3854920862db /fs
parent: fc69d0d336214219abb521d8ff060f786d7f369e (diff)
download: lwn-d1665e414297c3a46fd80cb8242ad0c8e82acae7.tar.gz
lwn-d1665e414297c3a46fd80cb8242ad0c8e82acae7.zip
2 files changed, 75 insertions, 26 deletions
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index b14357e89421..74706f352780 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -14,6 +14,7 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
+#include <linux/fs.h>
 #include <asm/semaphore.h>
 
 #include "gfs2.h"
@@ -555,30 +556,73 @@ static int gfs2_invalidatepage(struct page *page, unsigned long offset)
 	return ret;
 }
 
-static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			  loff_t offset, unsigned long nr_segs)
+static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
+				    loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = get_v2ip(inode);
+	struct gfs2_holder gh;
+	int rv;
+
+	/*
+	 * Shared lock, even though its write, since we do no allocation
+	 * on this path. All we need change is atime.
+	 */
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+	rv = gfs2_glock_nq_m_atime(1, &gh);
+	if (rv)
+		goto out;
+
+	/*
+	 * Should we return an error here? I can't see that O_DIRECT for
+	 * a journaled file makes any sense. For now we'll silently fall
+	 * back to buffered I/O, likewise we do the same for stuffed
+	 * files since they are (a) small and (b) unaligned.
+	 */
+	if (gfs2_is_jdata(ip))
+		goto out;
+
+	if (gfs2_is_stuffed(ip))
+		goto out;
+
+	rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
+				  iov, offset, nr_segs, get_blocks_noalloc,
+				  NULL, DIO_OWN_LOCKING);
+out:
+	gfs2_glock_dq_m(1, &gh);
+	gfs2_holder_uninit(&gh);
+
+	return rv;
+}
+
+/**
+ * gfs2_direct_IO
+ *
+ * This is called with a shared lock already held for the read path.
+ * Currently, no locks are held when the write path is called.
+ */
+static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
+			      const struct iovec *iov, loff_t offset,
+			      unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	struct gfs2_inode *ip = get_v2ip(inode);
 	struct gfs2_sbd *sdp = ip->i_sbd;
-	get_blocks_t *gb = get_blocks;
 
 	atomic_inc(&sdp->sd_ops_address);
 
-	if (gfs2_is_jdata(ip))
-		return -EINVAL;
+	if (rw == WRITE)
+		return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
 
-	if (rw == WRITE) {
-		return -EOPNOTSUPP; /* for now */
-	} else {
-		if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
-		    gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
-			return -EINVAL;
-	}
+	if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
+	    gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+		return -EINVAL;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				  offset, nr_segs, gb, NULL);
+	return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
+			 	    offset, nr_segs, get_blocks, NULL,
+				    DIO_OWN_LOCKING);
 }
 
 struct address_space_operations gfs2_file_aops = {
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 56820b39a993..bcde7a0b76f1 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -176,16 +176,16 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
 		 * If any segment has a negative length, or the cumulative
 		 * length ever wraps negative then return -EINVAL.
 		 */
-	count += iv->iov_len;
-	if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-		return -EINVAL;
-	if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
-		continue;
-	if (seg == 0)
-		return -EFAULT;
-	nr_segs = seg;
-	count -= iv->iov_len;   /* This segment is no good */
-	break;
+		count += iv->iov_len;
+		if (unlikely((ssize_t)(count|iv->iov_len) < 0))
+			return -EINVAL;
+		if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+			continue;
+		if (seg == 0)
+			return -EFAULT;
+		nr_segs = seg;
+		count -= iv->iov_len;   /* This segment is no good */
+		break;
 	}
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
@@ -204,10 +204,14 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
 		retval = gfs2_glock_nq_m_atime(1, &gh);
 		if (retval)
 			goto out;
-
+		if (gfs2_is_stuffed(ip)) {
+			gfs2_glock_dq_m(1, &gh);
+			gfs2_holder_uninit(&gh);
+			goto fallback_to_normal;
+		}
 		size = i_size_read(inode);
 		if (pos < size) {
-			 retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
+			retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
 			if (retval > 0 && !is_sync_kiocb(iocb))
 				retval = -EIOCBQUEUED;
 			if (retval > 0)
@@ -219,6 +223,7 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
 		goto out;
 	}
 
+fallback_to_normal:
 	retval = 0;
 	if (count) {
 		for (seg = 0; seg < nr_segs; seg++) {
author	Steven Whitehouse <swhiteho@redhat.com>	2006-02-14 11:54:42 +0000
committer	Steven Whitehouse <swhiteho@redhat.com>	2006-02-14 11:54:42 +0000
commit	d1665e414297c3a46fd80cb8242ad0c8e82acae7 (patch)
tree	7cb19fc4cbfc21d6d890dd3b373d3854920862db /fs
parent	fc69d0d336214219abb521d8ff060f786d7f369e (diff)
download	lwn-d1665e414297c3a46fd80cb8242ad0c8e82acae7.tar.gz lwn-d1665e414297c3a46fd80cb8242ad0c8e82acae7.zip