From 8b62b72b26bcd72082c4a69d179dd906bcc22200 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 2 Sep 2009 16:53:46 -0400 Subject: Btrfs: Use PagePrivate2 to track pages in the data=ordered code. Btrfs writes go through delalloc to the data=ordered code. This makes sure that all of the data is on disk before the metadata that references it. The tracking means that we have to make sure each page in an extent is fully written before we add that extent into the on-disk btree. This was done in the past by setting the EXTENT_ORDERED bit for the range of an extent when it was added to the data=ordered code, and then clearing the EXTENT_ORDERED bit in the extent state tree as each page finished IO. One of the reasons we had to do this was because sometimes pages are magically dirtied without page_mkwrite being called. The EXTENT_ORDERED bit is checked at writepage time, and if it isn't there, our page become dirty without going through the proper path. These bit operations make for a number of rbtree searches for each page, and can cause considerable lock contention. This commit switches from the EXTENT_ORDERED bit to use PagePrivate2. As pages go into the ordered code, PagePrivate2 is set on each one. This is a cheap operation because we already have all the pages locked and ready to go. As IO finishes, the PagePrivate2 bit is cleared and the ordered accoutning is updated for each page. At writepage time, if the PagePrivate2 bit is missing, we go into the writepage fixup code to handle improperly dirtied pages. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c9a438d374b6..a102422cd92e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -885,13 +885,6 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, NULL, mask); } -int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, NULL, - mask); -} - int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { @@ -921,13 +914,6 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, NULL, mask); } -int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, - NULL, mask); -} - int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1373,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, int clear_unlock, int clear_delalloc, int clear_dirty, int set_writeback, - int end_writeback) + int end_writeback, + int set_private2) { int ret; struct page *pages[16]; @@ -1392,7 +1379,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, clear_bits |= EXTENT_DELALLOC; clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); - if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) + if (!(unlock_pages || clear_dirty || set_writeback || end_writeback || + set_private2)) return 0; while (nr_pages > 0) { @@ -1400,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); for (i = 0; i < ret; i++) { + + if (set_private2) + SetPagePrivate2(pages[i]); + if (pages[i] == locked_page) { page_cache_release(pages[i]); continue; @@ -2792,7 +2784,7 @@ int try_release_extent_state(struct extent_map_tree *map, int ret = 1; if (test_range_bit(tree, start, end, - EXTENT_IOBITS | EXTENT_ORDERED, 0, NULL)) + EXTENT_IOBITS, 0, NULL)) ret = 0; else { if ((mask & GFP_NOFS) == GFP_NOFS) @@ -2835,8 +2827,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, } if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, - EXTENT_LOCKED | EXTENT_WRITEBACK | - EXTENT_ORDERED, + EXTENT_LOCKED | EXTENT_WRITEBACK, 0, NULL)) { remove_extent_mapping(map, em); /* once for the rb tree */ -- cgit v1.2.3