summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2010-05-25 20:56:50 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 21:52:08 -0400
commit4845e44ffdb26be9b25610664228e8ecaf949a0d (patch)
tree8852e175b6b02a36df6b47c54d574f3365ddb34f /fs/btrfs/inode.c
parenteaf25d933e64c2bf3c79b83e8820404f36fdfc52 (diff)
downloadlwn-4845e44ffdb26be9b25610664228e8ecaf949a0d.tar.gz
lwn-4845e44ffdb26be9b25610664228e8ecaf949a0d.zip
Btrfs: rework O_DIRECT enospc handling
This changes O_DIRECT write code to mark extents as delalloc while it is processing them. Yan Zheng has reworked the enospc accounting based on tracking delalloc extents and this makes it much easier to track enospc in the O_DIRECT code. There are a few space cases with the O_DIRECT code though, it only sets the EXTENT_DELALLOC bits, instead of doing EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, because we don't want to mess with clearing the dirty and uptodate bits when things go wrong. This is important because there are no pages in the page cache, so any extent state structs that we put in the tree won't get freed by releasepage. We have to clear them ourselves as the DIO ends. With this commit, we reserve space at in btrfs_file_aio_write, and then as each btrfs_direct_IO call progresses it sets EXTENT_DELALLOC on the range. btrfs_get_blocks_direct is responsible for clearing the delalloc at the same time it drops the extent lock. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c52
1 files changed, 41 insertions, 11 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 13a4aa222861..00aefbdcc2df 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5327,8 +5327,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
return PTR_ERR(em);
len = min(len, em->block_len);
}
- unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len - 1,
- GFP_NOFS);
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
+ EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
+ 0, NULL, GFP_NOFS);
map:
bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
inode->i_blkbits;
@@ -5596,14 +5597,18 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct btrfs_ordered_extent *ordered;
+ struct extent_state *cached_state = NULL;
u64 lockstart, lockend;
ssize_t ret;
+ int writing = rw & WRITE;
+ int write_bits = 0;
lockstart = offset;
lockend = offset + iov_length(iov, nr_segs) - 1;
+
while (1) {
- lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- GFP_NOFS);
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ 0, &cached_state, GFP_NOFS);
/*
* We're concerned with the entire range that we're going to be
* doing DIO to, so we need to make sure theres no ordered
@@ -5613,29 +5618,54 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
lockend - lockstart + 1);
if (!ordered)
break;
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- GFP_NOFS);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state, GFP_NOFS);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
cond_resched();
}
+ /*
+ * we don't use btrfs_set_extent_delalloc because we don't want
+ * the dirty or uptodate bits
+ */
+ if (writing) {
+ write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
+ ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ EXTENT_DELALLOC, 0, NULL, &cached_state,
+ GFP_NOFS);
+ if (ret) {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, EXTENT_LOCKED | write_bits,
+ 1, 0, &cached_state, GFP_NOFS);
+ goto out;
+ }
+ }
+
+ free_extent_state(cached_state);
+ cached_state = NULL;
+
ret = __blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs,
btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, 0);
if (ret < 0 && ret != -EIOCBQUEUED) {
- unlock_extent(&BTRFS_I(inode)->io_tree, offset,
- offset + iov_length(iov, nr_segs) - 1, GFP_NOFS);
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
+ offset + iov_length(iov, nr_segs) - 1,
+ EXTENT_LOCKED | write_bits, 1, 0,
+ &cached_state, GFP_NOFS);
} else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
/*
* We're falling back to buffered, unlock the section we didn't
* do IO on.
*/
- unlock_extent(&BTRFS_I(inode)->io_tree, offset + ret,
- offset + iov_length(iov, nr_segs) - 1, GFP_NOFS);
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
+ offset + iov_length(iov, nr_segs) - 1,
+ EXTENT_LOCKED | write_bits, 1, 0,
+ &cached_state, GFP_NOFS);
}
-
+out:
+ free_extent_state(cached_state);
return ret;
}