diff options
author | Andreas Gruenbacher <agruenba@redhat.com> | 2018-06-19 15:08:02 +0100 |
---|---|---|
committer | Andreas Gruenbacher <agruenba@redhat.com> | 2018-07-02 16:27:32 +0100 |
commit | 967bcc91b044936e85dbb5848952dc1335a846f4 (patch) | |
tree | 730618d2da59b6097f6799c17ad2a4effb9ca34a | |
parent | bcfe94139a45fae128844558d6e27a0258860a90 (diff) | |
download | lwn-967bcc91b044936e85dbb5848952dc1335a846f4.tar.gz lwn-967bcc91b044936e85dbb5848952dc1335a846f4.zip |
gfs2: iomap direct I/O support
The page unmapping previously done in gfs2_direct_IO is now done
generically in iomap_dio_rw.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
-rw-r--r-- | fs/gfs2/aops.c | 100 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 14 | ||||
-rw-r--r-- | fs/gfs2/file.c | 132 |
3 files changed, 136 insertions, 110 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ecfbca9c88ff..1054cc4a96db 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -84,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, return 0; } -static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - return gfs2_block_map(inode, lblock, bh_result, 0); -} - /** * gfs2_writepage_common - Common bits of writepage * @page: The page to be written @@ -1025,96 +1019,6 @@ out: } /** - * gfs2_ok_for_dio - check that dio is valid on this file - * @ip: The inode - * @offset: The offset at which we are reading or writing - * - * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) - * 1 (to accept the i/o request) - */ -static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) -{ - /* - * Should we return an error here? I can't see that O_DIRECT for - * a stuffed file makes any sense. For now we'll silently fall - * back to buffered I/O - */ - if (gfs2_is_stuffed(ip)) - return 0; - - if (offset >= i_size_read(&ip->i_inode)) - return 0; - return 1; -} - - - -static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct address_space *mapping = inode->i_mapping; - struct gfs2_inode *ip = GFS2_I(inode); - loff_t offset = iocb->ki_pos; - struct gfs2_holder gh; - int rv; - - /* - * Deferred lock, even if its a write, since we do no allocation - * on this path. All we need change is atime, and this lock mode - * ensures that other nodes have flushed their buffered read caches - * (i.e. their page cache entries for this inode). We do not, - * unfortunately have the option of only flushing a range like - * the VFS does. - */ - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); - rv = gfs2_glock_nq(&gh); - if (rv) - goto out_uninit; - rv = gfs2_ok_for_dio(ip, offset); - if (rv != 1) - goto out; /* dio not valid, fall back to buffered i/o */ - - /* - * Now since we are holding a deferred (CW) lock at this point, you - * might be wondering why this is ever needed. There is a case however - * where we've granted a deferred local lock against a cached exclusive - * glock. That is ok provided all granted local locks are deferred, but - * it also means that it is possible to encounter pages which are - * cached and possibly also mapped. So here we check for that and sort - * them out ahead of the dio. The glock state machine will take care of - * everything else. - * - * If in fact the cached glock state (gl->gl_state) is deferred (CW) in - * the first place, mapping->nr_pages will always be zero. - */ - if (mapping->nrpages) { - loff_t lstart = offset & ~(PAGE_SIZE - 1); - loff_t len = iov_iter_count(iter); - loff_t end = PAGE_ALIGN(offset + len) - 1; - - rv = 0; - if (len == 0) - goto out; - if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) - unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); - rv = filemap_write_and_wait_range(mapping, lstart, end); - if (rv) - goto out; - if (iov_iter_rw(iter) == WRITE) - truncate_inode_pages_range(mapping, lstart, end); - } - - rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - gfs2_get_block_direct, NULL, NULL, 0); -out: - gfs2_glock_dq(&gh); -out_uninit: - gfs2_holder_uninit(&gh); - return rv; -} - -/** * gfs2_releasepage - free the metadata associated with a page * @page: the page that's being released * @gfp_mask: passed from Linux VFS, ignored by us @@ -1194,7 +1098,7 @@ static const struct address_space_operations gfs2_writeback_aops = { .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, - .direct_IO = gfs2_direct_IO, + .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, @@ -1211,7 +1115,7 @@ static const struct address_space_operations gfs2_ordered_aops = { .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, - .direct_IO = gfs2_direct_IO, + .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8b5876e19ecf..29391090d5b7 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -915,6 +915,9 @@ do_alloc: } else if (flags & IOMAP_WRITE) { u64 alloc_size; + if (flags & IOMAP_DIRECT) + goto out; /* (see gfs2_file_direct_write) */ + len = gfs2_alloc_size(inode, mp, len); alloc_size = len << inode->i_blkbits; if (alloc_size < iomap->length) @@ -1082,11 +1085,18 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, int ret; trace_gfs2_iomap_start(ip, pos, length, flags); - if (flags & IOMAP_WRITE) { + if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); } else { ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); release_metapath(&mp); + /* + * Silently fall back to buffered I/O for stuffed files or if + * we've hot a hole (see gfs2_file_direct_write). + */ + if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) && + iomap->type != IOMAP_MAPPED) + ret = -ENOTBLK; } trace_gfs2_iomap_end(ip, iomap, ret); return ret; @@ -1100,7 +1110,7 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, struct gfs2_trans *tr = current->journal_info; struct buffer_head *dibh = iomap->private; - if (!(flags & IOMAP_WRITE)) + if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) goto out; if (iomap->type != IOMAP_INLINE) { diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 16dd395479a5..89280515169e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -690,6 +690,85 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } +static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + size_t count = iov_iter_count(to); + struct gfs2_holder gh; + ssize_t ret; + + if (!count) + return 0; /* skip atime */ + + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + + /* fall back to buffered I/O for stuffed files */ + ret = -ENOTBLK; + if (gfs2_is_stuffed(ip)) + goto out; + + ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL); + +out: + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return ret; +} + +static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + size_t len = iov_iter_count(from); + loff_t offset = iocb->ki_pos; + struct gfs2_holder gh; + ssize_t ret; + + /* + * Deferred lock, even if its a write, since we do no allocation on + * this path. All we need to change is the atime, and this lock mode + * ensures that other nodes have flushed their buffered read caches + * (i.e. their page cache entries for this inode). We do not, + * unfortunately, have the option of only flushing a range like the + * VFS does. + */ + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + + /* Silently fall back to buffered I/O when writing beyond EOF */ + if (offset + len > i_size_read(&ip->i_inode)) + goto out; + + ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL); + +out: + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return ret; +} + +static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + ssize_t ret; + + if (iocb->ki_flags & IOCB_DIRECT) { + ret = gfs2_file_direct_read(iocb, to); + if (likely(ret != -ENOTBLK)) + return ret; + iocb->ki_flags &= ~IOCB_DIRECT; + } + return generic_file_read_iter(iocb, to); +} + /** * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context @@ -707,7 +786,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); - ssize_t ret; + ssize_t written = 0, ret; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -724,9 +803,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) gfs2_glock_dq_uninit(&gh); } - if (iocb->ki_flags & IOCB_DIRECT) - return generic_file_write_iter(iocb, from); - inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret <= 0) @@ -743,19 +819,55 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out2; - ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + if (iocb->ki_flags & IOCB_DIRECT) { + struct address_space *mapping = file->f_mapping; + loff_t pos, endbyte; + ssize_t buffered; + + written = gfs2_file_direct_write(iocb, from); + if (written < 0 || !iov_iter_count(from)) + goto out2; + + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + if (unlikely(ret < 0)) + goto out2; + buffered = ret; + + /* + * We need to ensure that the page cache pages are written to + * disk and invalidated to preserve the expected O_DIRECT + * semantics. + */ + pos = iocb->ki_pos; + endbyte = pos + buffered - 1; + ret = filemap_write_and_wait_range(mapping, pos, endbyte); + if (!ret) { + iocb->ki_pos += buffered; + written += buffered; + invalidate_mapping_pages(mapping, + pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); + } else { + /* + * We don't know how much we wrote, so just return + * the number of bytes which were direct-written + */ + } + } else { + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + if (likely(ret > 0)) + iocb->ki_pos += ret; + } out2: current->backing_dev_info = NULL; out: inode_unlock(inode); if (likely(ret > 0)) { - iocb->ki_pos += ret; - /* Handle various SYNC-type writes */ ret = generic_write_sync(iocb, ret); } - return ret; + return written ? written : ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, @@ -1157,7 +1269,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, - .read_iter = generic_file_read_iter, + .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, @@ -1187,7 +1299,7 @@ const struct file_operations gfs2_dir_fops = { const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, - .read_iter = generic_file_read_iter, + .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, |