diff options
author | Jan Kara <jack@suse.cz> | 2016-05-13 00:44:16 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2016-05-13 00:44:16 -0400 |
commit | 914f82a32d026884743fb3de9f6f0a5908a9d5dd (patch) | |
tree | 7f56a9e69a6e6df90a27750c9bfc08a604a4ed10 /fs/ext4/indirect.c | |
parent | dbc427ce4028580f1244b5b57ca1cbea31aad1e7 (diff) | |
download | lwn-914f82a32d026884743fb3de9f6f0a5908a9d5dd.tar.gz lwn-914f82a32d026884743fb3de9f6f0a5908a9d5dd.zip |
ext4: refactor direct IO code
Currently ext4 direct IO handling is split between ext4_ext_direct_IO()
and ext4_ind_direct_IO(). However the extent based function calls into
the indirect based one for some cases and for example it is not able to
handle file extending. Previously it was not also properly handling
retries in case of ENOSPC errors. With DAX things would get even more
contrieved so just refactor the direct IO code and instead of indirect /
extent split do the split to read vs writes.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/indirect.c')
-rw-r--r-- | fs/ext4/indirect.c | 127 |
1 files changed, 0 insertions, 127 deletions
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 3027fa681de5..bc15c2c17633 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -649,133 +649,6 @@ out: } /* - * O_DIRECT for ext3 (or indirect map) based files - * - * If the O_DIRECT write will extend the file then add this inode to the - * orphan list. So recovery will truncate it back to the original size - * if the machine crashes during the write. - * - * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. But current - * VFS code falls back into buffered path in that case so we are safe. - */ -ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle; - ssize_t ret; - int orphan = 0; - size_t count = iov_iter_count(iter); - int retries = 0; - - if (iov_iter_rw(iter) == WRITE) { - loff_t final_size = offset + count; - - if (final_size > inode->i_size) { - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = ext4_orphan_add(handle, inode); - if (ret) { - ext4_journal_stop(handle); - goto out; - } - orphan = 1; - ei->i_disksize = inode->i_size; - ext4_journal_stop(handle); - } - } - -retry: - if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) { - /* - * Nolock dioread optimization may be dynamically disabled - * via ext4_inode_block_unlocked_dio(). Check inode's state - * while holding extra i_dio_count ref. - */ - inode_dio_begin(inode); - smp_mb(); - if (unlikely(ext4_test_inode_state(inode, - EXT4_STATE_DIOREAD_LOCK))) { - inode_dio_end(inode); - goto locked; - } - if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, - ext4_dio_get_block, NULL, 0); - else - ret = __blockdev_direct_IO(iocb, inode, - inode->i_sb->s_bdev, iter, - offset, ext4_dio_get_block, - NULL, NULL, 0); - inode_dio_end(inode); - } else { -locked: - if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, - ext4_dio_get_block, NULL, DIO_LOCKING); - else - ret = blockdev_direct_IO(iocb, inode, iter, offset, - ext4_dio_get_block); - - if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + count; - - if (end > isize) - ext4_truncate_failed_write(inode); - } - } - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - - if (orphan) { - int err; - - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); - if (inode->i_nlink) - ext4_orphan_del(NULL, inode); - - goto out; - } - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - if (ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext4_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext4_mark_inode_dirty(handle, inode); - } - } - err = ext4_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - return ret; -} - -/* * Calculate the number of metadata blocks need to reserve * to allocate a new block at @lblocks for non extent file based file */ |