diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-10-03 12:30:02 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-10-03 12:30:02 -0400 |
commit | cb843a6f513a1a91c54951005e60bd9b95bdf973 (patch) | |
tree | 12edfb1154691f1a8aaeeadb97899397574aa785 /fs/btrfs | |
parent | 323ac95bce442bbde514e3ce57e840402f80d909 (diff) | |
download | lwn-cb843a6f513a1a91c54951005e60bd9b95bdf973.tar.gz lwn-cb843a6f513a1a91c54951005e60bd9b95bdf973.zip |
Btrfs: O_DIRECT writes via buffered writes + invaldiate
This reworks the btrfs O_DIRECT write code a bit. It had always fallen
back to buffered IO and done an invalidate, but needed to be updated
for the data=ordered code. The invalidate wasn't actually removing pages
because they were still inside an ordered extent.
This also combines the O_DIRECT/O_SYNC paths where possible, and kicks
off IO in the main btrfs_file_write loop to keep the pipe down the the
disk full as we process long writes.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/file.c | 64 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 3 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 2 |
3 files changed, 38 insertions, 31 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3088a1184483..a03d1bbb19ad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct page *pinned[2]; unsigned long first_index; unsigned long last_index; + int will_write; + + will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || + (file->f_flags & O_DIRECT)); nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / (sizeof(struct page *))); @@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (ret) goto out; + if (will_write) { + btrfs_fdatawrite_range(inode->i_mapping, pos, + pos + write_bytes - 1, + WB_SYNC_NONE); + } else { + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + num_pages); + if (num_pages < + (root->leafsize >> PAGE_CACHE_SHIFT) + 1) + btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); + } + buf += write_bytes; count -= write_bytes; pos += write_bytes; num_written += write_bytes; - balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); - if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) - btrfs_btree_balance_dirty(root, 1); - btrfs_throttle(root); cond_resched(); } out: @@ -1023,36 +1036,29 @@ out_nolock: page_cache_release(pinned[1]); *ppos = pos; - if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { + if (num_written > 0 && will_write) { struct btrfs_trans_handle *trans; - err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, - start_pos + num_written -1, - WB_SYNC_NONE); - if (err < 0) - num_written = err; - - err = btrfs_wait_on_page_writeback_range(inode->i_mapping, - start_pos, start_pos + num_written - 1); - if (err < 0) + err = btrfs_wait_ordered_range(inode, start_pos, num_written); + if (err) num_written = err; - trans = btrfs_start_transaction(root, 1); - ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); - if (ret == 0) { - btrfs_sync_log(trans, root); - btrfs_end_transaction(trans, root); - } else { - btrfs_commit_transaction(trans, root); + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + trans = btrfs_start_transaction(root, 1); + ret = btrfs_log_dentry_safe(trans, root, + file->f_dentry); + if (ret == 0) { + btrfs_sync_log(trans, root); + btrfs_end_transaction(trans, root); + } else { + btrfs_commit_transaction(trans, root); + } + } + if (file->f_flags & O_DIRECT) { + invalidate_mapping_pages(inode->i_mapping, + start_pos >> PAGE_CACHE_SHIFT, + (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } - } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { - do_sync_mapping_range(inode->i_mapping, start_pos, - start_pos + num_written - 1, - SYNC_FILE_RANGE_WRITE | - SYNC_FILE_RANGE_WAIT_AFTER); - invalidate_mapping_pages(inode->i_mapping, - start_pos >> PAGE_CACHE_SHIFT, - (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; return num_written ? num_written : err; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index dcc1730dd837..2eb6caba57c2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -397,7 +397,7 @@ void btrfs_start_ordered_extent(struct inode *inode, /* * Used to wait on ordered extents across a large range of bytes. */ -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { u64 end; u64 orig_end; @@ -451,6 +451,7 @@ again: (unsigned long long)orig_end); goto again; } + return 0; } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index fd45519f30a8..f50f8870a144 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -135,7 +135,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); void btrfs_start_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry, int wait); -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_ordered_update_i_size(struct inode *inode, |