diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 334 |
1 files changed, 205 insertions, 129 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aaddd7225348..4e03a6d3aa32 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -241,7 +241,7 @@ int __init extent_io_init(void) return -ENOMEM; if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, - offsetof(struct btrfs_io_bio, bio), + offsetof(struct btrfs_bio, bio), BIOSET_NEED_BVECS)) goto free_buffer_cache; @@ -1975,10 +1975,18 @@ static noinline int lock_delalloc_pages(struct inode *inode, /* * Find and lock a contiguous range of bytes in the file marked as delalloc, no - * more than @max_bytes. @Start and @end are used to return the range, + * more than @max_bytes. * - * Return: true if we find something - * false if nothing was in the tree + * @start: The original start bytenr to search. + * Will store the extent range start bytenr. + * @end: The original end bytenr of the search range + * Will store the extent range end bytenr. + * + * Return true if we find a delalloc range which starts inside the original + * range, and @start/@end will store the delalloc range start/end. + * + * Return false if we can't find any delalloc range which starts inside the + * original range, and @start/@end will be the non-delalloc range start/end. */ EXPORT_FOR_TESTS noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, @@ -1986,6 +1994,8 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, u64 *end) { struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; + const u64 orig_start = *start; + const u64 orig_end = *end; u64 max_bytes = BTRFS_MAX_EXTENT_SIZE; u64 delalloc_start; u64 delalloc_end; @@ -1994,15 +2004,23 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, int ret; int loops = 0; + /* Caller should pass a valid @end to indicate the search range end */ + ASSERT(orig_end > orig_start); + + /* The range should at least cover part of the page */ + ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE || + orig_end <= page_offset(locked_page))); again: /* step one, find a bunch of delalloc bytes starting at start */ delalloc_start = *start; delalloc_end = 0; found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end, max_bytes, &cached_state); - if (!found || delalloc_end <= *start) { + if (!found || delalloc_end <= *start || delalloc_start > orig_end) { *start = delalloc_start; - *end = delalloc_end; + + /* @delalloc_end can be -1, never go beyond @orig_end */ + *end = min(delalloc_end, orig_end); free_extent_state(cached_state); return false; } @@ -2282,15 +2300,15 @@ int free_io_failure(struct extent_io_tree *failure_tree, * currently, there can be no more than two copies of every data bit. thus, * exactly one rewrite is required. */ -int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, - u64 length, u64 logical, struct page *page, - unsigned int pg_offset, int mirror_num) +static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, + u64 length, u64 logical, struct page *page, + unsigned int pg_offset, int mirror_num) { struct bio *bio; struct btrfs_device *dev; u64 map_length = 0; u64 sector; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; int ret; ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); @@ -2299,12 +2317,12 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, if (btrfs_is_zoned(fs_info)) return btrfs_repair_one_zone(fs_info, logical); - bio = btrfs_io_bio_alloc(1); + bio = btrfs_bio_alloc(1); bio->bi_iter.bi_size = 0; map_length = length; /* - * Avoid races with device replace and make sure our bbio has devices + * Avoid races with device replace and make sure our bioc has devices * associated to its stripes that don't go away while we are doing the * read repair operation. */ @@ -2317,28 +2335,28 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, * stripe's dev and sector. */ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, - &map_length, &bbio, 0); + &map_length, &bioc, 0); if (ret) { btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } - ASSERT(bbio->mirror_num == 1); + ASSERT(bioc->mirror_num == 1); } else { ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, - &map_length, &bbio, mirror_num); + &map_length, &bioc, mirror_num); if (ret) { btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } - BUG_ON(mirror_num != bbio->mirror_num); + BUG_ON(mirror_num != bioc->mirror_num); } - sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9; + sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; bio->bi_iter.bi_sector = sector; - dev = bbio->stripes[bbio->mirror_num - 1].dev; - btrfs_put_bbio(bbio); + dev = bioc->stripes[bioc->mirror_num - 1].dev; + btrfs_put_bioc(bioc); if (!dev || !dev->bdev || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { btrfs_bio_counter_dec(fs_info); @@ -2618,10 +2636,10 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; - struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio); + struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio); const int icsum = bio_offset >> fs_info->sectorsize_bits; struct bio *repair_bio; - struct btrfs_io_bio *repair_io_bio; + struct btrfs_bio *repair_bbio; blk_status_t status; btrfs_debug(fs_info, @@ -2639,24 +2657,23 @@ int btrfs_repair_one_sector(struct inode *inode, return -EIO; } - repair_bio = btrfs_io_bio_alloc(1); - repair_io_bio = btrfs_io_bio(repair_bio); + repair_bio = btrfs_bio_alloc(1); + repair_bbio = btrfs_bio(repair_bio); repair_bio->bi_opf = REQ_OP_READ; repair_bio->bi_end_io = failed_bio->bi_end_io; repair_bio->bi_iter.bi_sector = failrec->logical >> 9; repair_bio->bi_private = failed_bio->bi_private; - if (failed_io_bio->csum) { + if (failed_bbio->csum) { const u32 csum_size = fs_info->csum_size; - repair_io_bio->csum = repair_io_bio->csum_inline; - memcpy(repair_io_bio->csum, - failed_io_bio->csum + csum_size * icsum, csum_size); + repair_bbio->csum = repair_bbio->csum_inline; + memcpy(repair_bbio->csum, + failed_bbio->csum + csum_size * icsum, csum_size); } bio_add_page(repair_bio, page, failrec->len, pgoff); - repair_io_bio->logical = failrec->start; - repair_io_bio->iter = repair_bio->bi_iter; + repair_bbio->iter = repair_bio->bi_iter; btrfs_debug(btrfs_sb(inode->i_sb), "repair read error: submitting new read to mirror %d", @@ -2976,7 +2993,7 @@ static struct extent_buffer *find_extent_buffer_readpage( static void end_bio_extent_readpage(struct bio *bio) { struct bio_vec *bvec; - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct btrfs_bio *bbio = btrfs_bio(bio); struct extent_io_tree *tree, *failure_tree; struct processed_extent processed = { 0 }; /* @@ -3003,7 +3020,7 @@ static void end_bio_extent_readpage(struct bio *bio) btrfs_debug(fs_info, "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", bio->bi_iter.bi_sector, bio->bi_status, - io_bio->mirror_num); + bbio->mirror_num); tree = &BTRFS_I(inode)->io_tree; failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -3028,14 +3045,14 @@ static void end_bio_extent_readpage(struct bio *bio) end = start + bvec->bv_len - 1; len = bvec->bv_len; - mirror = io_bio->mirror_num; + mirror = bbio->mirror_num; if (likely(uptodate)) { if (is_data_inode(inode)) { - error_bitmap = btrfs_verify_data_csum(io_bio, + error_bitmap = btrfs_verify_data_csum(bbio, bio_offset, page, start, end); ret = error_bitmap; } else { - ret = btrfs_validate_metadata_buffer(io_bio, + ret = btrfs_validate_metadata_buffer(bbio, page, start, end, mirror); } if (ret) @@ -3106,7 +3123,7 @@ readpage_ok: } /* Release the last extent */ endio_readpage_release_extent(&processed, NULL, 0, 0, false); - btrfs_io_bio_free_csum(io_bio); + btrfs_bio_free_csum(bbio); bio_put(bio); } @@ -3115,53 +3132,43 @@ readpage_ok: * new bio by bio_alloc_bioset as it does not initialize the bytes outside of * 'bio' because use of __GFP_ZERO is not supported. */ -static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) +static inline void btrfs_bio_init(struct btrfs_bio *bbio) { - memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio)); + memset(bbio, 0, offsetof(struct btrfs_bio, bio)); } /* - * The following helpers allocate a bio. As it's backed by a bioset, it'll - * never fail. We're returning a bio right now but you can call btrfs_io_bio - * for the appropriate container_of magic + * Allocate a btrfs_io_bio, with @nr_iovecs as maximum number of iovecs. + * + * The bio allocation is backed by bioset and does not fail. */ -struct bio *btrfs_bio_alloc(u64 first_byte) +struct bio *btrfs_bio_alloc(unsigned int nr_iovecs) { struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset); - bio->bi_iter.bi_sector = first_byte >> 9; - btrfs_io_bio_init(btrfs_io_bio(bio)); + ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS); + bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); + btrfs_bio_init(btrfs_bio(bio)); return bio; } struct bio *btrfs_bio_clone(struct bio *bio) { - struct btrfs_io_bio *btrfs_bio; + struct btrfs_bio *bbio; struct bio *new; /* Bio allocation backed by a bioset does not fail */ new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset); - btrfs_bio = btrfs_io_bio(new); - btrfs_io_bio_init(btrfs_bio); - btrfs_bio->iter = bio->bi_iter; + bbio = btrfs_bio(new); + btrfs_bio_init(bbio); + bbio->iter = bio->bi_iter; return new; } -struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) -{ - struct bio *bio; - - /* Bio allocation backed by a bioset does not fail */ - bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); - btrfs_io_bio_init(btrfs_io_bio(bio)); - return bio; -} - struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size) { struct bio *bio; - struct btrfs_io_bio *btrfs_bio; + struct btrfs_bio *bbio; ASSERT(offset <= UINT_MAX && size <= UINT_MAX); @@ -3169,11 +3176,11 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size) bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset); ASSERT(bio); - btrfs_bio = btrfs_io_bio(bio); - btrfs_io_bio_init(btrfs_bio); + bbio = btrfs_bio(bio); + btrfs_bio_init(bbio); bio_trim(bio, offset >> 9, size >> 9); - btrfs_bio->iter = bio->bi_iter; + bbio->iter = bio->bi_iter; return bio; } @@ -3307,14 +3314,15 @@ static int alloc_new_bio(struct btrfs_inode *inode, struct bio *bio; int ret; + bio = btrfs_bio_alloc(BIO_MAX_VECS); /* * For compressed page range, its disk_bytenr is always @disk_bytenr * passed in, no matter if we have added any range into previous bio. */ if (bio_flags & EXTENT_BIO_COMPRESSED) - bio = btrfs_bio_alloc(disk_bytenr); + bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT; else - bio = btrfs_bio_alloc(disk_bytenr + offset); + bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT; bio_ctrl->bio = bio; bio_ctrl->bio_flags = bio_flags; bio->bi_end_io = end_io_func; @@ -3327,7 +3335,7 @@ static int alloc_new_bio(struct btrfs_inode *inode, if (wbc) { struct block_device *bdev; - bdev = fs_info->fs_devices->latest_bdev; + bdev = fs_info->fs_devices->latest_dev->bdev; bio_set_dev(bio, bdev); wbc_init_bio(wbc, bio); } @@ -3341,7 +3349,7 @@ static int alloc_new_bio(struct btrfs_inode *inode, goto error; } - btrfs_io_bio(bio)->device = device; + btrfs_bio(bio)->device = device; } return 0; error: @@ -3599,6 +3607,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, bool force_bio_submit = false; u64 disk_bytenr; + ASSERT(IS_ALIGNED(cur, fs_info->sectorsize)); if (cur >= last_byte) { struct extent_state *cached = NULL; @@ -3777,17 +3786,18 @@ static void update_nr_written(struct writeback_control *wbc, */ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, struct page *page, struct writeback_control *wbc, - u64 delalloc_start, unsigned long *nr_written) + unsigned long *nr_written) { - u64 page_end = delalloc_start + PAGE_SIZE - 1; - bool found; + const u64 page_end = page_offset(page) + PAGE_SIZE - 1; + u64 delalloc_start = page_offset(page); u64 delalloc_to_write = 0; - u64 delalloc_end = 0; int ret; int page_started = 0; + while (delalloc_start < page_end) { + u64 delalloc_end = page_end; + bool found; - while (delalloc_end < page_end) { found = find_lock_delalloc_range(&inode->vfs_inode, page, &delalloc_start, &delalloc_end); @@ -3854,12 +3864,11 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info, struct page *page, u64 *start, u64 *end) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + struct btrfs_subpage_info *spi = fs_info->subpage_info; u64 orig_start = *start; /* Declare as unsigned long so we can use bitmap ops */ - unsigned long dirty_bitmap; unsigned long flags; - int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits; - int range_start_bit = nbits; + int range_start_bit; int range_end_bit; /* @@ -3872,13 +3881,18 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info, return; } + range_start_bit = spi->dirty_offset + + (offset_in_page(orig_start) >> fs_info->sectorsize_bits); + /* We should have the page locked, but just in case */ spin_lock_irqsave(&subpage->lock, flags); - dirty_bitmap = subpage->dirty_bitmap; + bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit, + spi->dirty_offset + spi->bitmap_nr_bits); spin_unlock_irqrestore(&subpage->lock, flags); - bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit, - BTRFS_SUBPAGE_BITMAP_SIZE); + range_start_bit -= spi->dirty_offset; + range_end_bit -= spi->dirty_offset; + *start = page_offset(page) + range_start_bit * fs_info->sectorsize; *end = page_offset(page) + range_end_bit * fs_info->sectorsize; } @@ -4054,8 +4068,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct extent_page_data *epd) { struct inode *inode = page->mapping->host; - u64 start = page_offset(page); - u64 page_end = start + PAGE_SIZE - 1; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + const u64 page_start = page_offset(page); + const u64 page_end = page_start + PAGE_SIZE - 1; int ret; int nr = 0; size_t pg_offset; @@ -4090,8 +4105,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } if (!epd->extent_locked) { - ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start, - &nr_written); + ret = writepage_delalloc(BTRFS_I(inode), page, wbc, &nr_written); if (ret == 1) return 0; if (ret) @@ -4141,8 +4155,20 @@ done: * capable of that. */ if (PageError(page)) - end_extent_writepage(page, ret, start, page_end); - unlock_page(page); + end_extent_writepage(page, ret, page_start, page_end); + if (epd->extent_locked) { + /* + * If epd->extent_locked, it's from extent_write_locked_range(), + * the page can either be locked by lock_page() or + * process_one_page(). + * Let btrfs_page_unlock_writer() handle both cases. + */ + ASSERT(wbc); + btrfs_page_unlock_writer(fs_info, page, wbc->range_start, + wbc->range_end + 1 - wbc->range_start); + } else { + unlock_page(page); + } ASSERT(ret <= 0); return ret; } @@ -4155,6 +4181,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) static void end_extent_buffer_writeback(struct extent_buffer *eb) { + if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags)) + btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len); + clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); smp_mb__after_atomic(); wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); @@ -4602,12 +4631,11 @@ static int submit_eb_subpage(struct page *page, int submitted = 0; u64 page_start = page_offset(page); int bit_start = 0; - const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE; int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits; int ret; /* Lock and write each dirty extent buffers in the range */ - while (bit_start < nbits) { + while (bit_start < fs_info->subpage_info->bitmap_nr_bits) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; struct extent_buffer *eb; unsigned long flags; @@ -4623,7 +4651,8 @@ static int submit_eb_subpage(struct page *page, break; } spin_lock_irqsave(&subpage->lock, flags); - if (!((1 << bit_start) & subpage->dirty_bitmap)) { + if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset, + subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock(&page->mapping->private_lock); bit_start++; @@ -4756,8 +4785,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, free_extent_buffer(eb); return ret; } - if (cache) + if (cache) { + /* Impiles write in zoned mode */ btrfs_put_block_group(cache); + /* Mark the last eb in a block group */ + if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity) + set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags); + } ret = write_one_eb(eb, wbc, epd); free_extent_buffer(eb); if (ret < 0) @@ -4873,7 +4907,7 @@ retry: * extent io tree. Thus we don't want to submit such wild eb * if the fs already has error. */ - if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (!BTRFS_FS_ERROR(fs_info)) { ret = flush_write_bio(&epd); } else { ret = -EROFS; @@ -5069,23 +5103,28 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc) return ret; } -int extent_write_locked_range(struct inode *inode, u64 start, u64 end, - int mode) +/* + * Submit the pages in the range to bio for call sites which delalloc range has + * already been ran (aka, ordered extent inserted) and all pages are still + * locked. + */ +int extent_write_locked_range(struct inode *inode, u64 start, u64 end) { + bool found_error = false; + int first_error = 0; int ret = 0; struct address_space *mapping = inode->i_mapping; struct page *page; - unsigned long nr_pages = (end - start + PAGE_SIZE) >> - PAGE_SHIFT; - + u64 cur = start; + unsigned long nr_pages; + const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize; struct extent_page_data epd = { .bio_ctrl = { 0 }, .extent_locked = 1, - .sync_io = mode == WB_SYNC_ALL, + .sync_io = 1, }; struct writeback_control wbc_writepages = { - .sync_mode = mode, - .nr_to_write = nr_pages * 2, + .sync_mode = WB_SYNC_ALL, .range_start = start, .range_end = end + 1, /* We're called from an async helper function */ @@ -5093,33 +5132,51 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, .no_cgroup_owner = 1, }; + ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize)); + nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >> + PAGE_SHIFT; + wbc_writepages.nr_to_write = nr_pages * 2; + wbc_attach_fdatawrite_inode(&wbc_writepages, inode); - while (start <= end) { - page = find_get_page(mapping, start >> PAGE_SHIFT); - if (clear_page_dirty_for_io(page)) - ret = __extent_writepage(page, &wbc_writepages, &epd); - else { - btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), - page, start, start + PAGE_SIZE - 1, true); - unlock_page(page); + while (cur <= end) { + u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end); + + page = find_get_page(mapping, cur >> PAGE_SHIFT); + /* + * All pages in the range are locked since + * btrfs_run_delalloc_range(), thus there is no way to clear + * the page dirty flag. + */ + ASSERT(PageLocked(page)); + ASSERT(PageDirty(page)); + clear_page_dirty_for_io(page); + ret = __extent_writepage(page, &wbc_writepages, &epd); + ASSERT(ret <= 0); + if (ret < 0) { + found_error = true; + first_error = ret; } put_page(page); - start += PAGE_SIZE; + cur = cur_end + 1; } - ASSERT(ret <= 0); - if (ret == 0) + if (!found_error) ret = flush_write_bio(&epd); else end_write_bio(&epd, ret); wbc_detach_inode(&wbc_writepages); + if (found_error) + return first_error; return ret; } int extent_writepages(struct address_space *mapping, struct writeback_control *wbc) { + struct inode *inode = mapping->host; + const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root); + const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info); int ret = 0; struct extent_page_data epd = { .bio_ctrl = { 0 }, @@ -5127,7 +5184,15 @@ int extent_writepages(struct address_space *mapping, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; + /* + * Allow only a single thread to do the reloc work in zoned mode to + * protect the write pointer updates. + */ + if (data_reloc && zoned) + btrfs_inode_lock(inode, 0); ret = extent_write_cache_pages(mapping, wbc, &epd); + if (data_reloc && zoned) + btrfs_inode_unlock(inode, 0); ASSERT(ret <= 0); if (ret < 0) { end_write_bio(&epd, ret); @@ -6137,13 +6202,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, * page, but it may change in the future for 16K page size * support, so we still preallocate the memory in the loop. */ - ret = btrfs_alloc_subpage(fs_info, &prealloc, - BTRFS_SUBPAGE_METADATA); - if (ret < 0) { - unlock_page(p); - put_page(p); - exists = ERR_PTR(ret); - goto free_eb; + if (fs_info->sectorsize < PAGE_SIZE) { + prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA); + if (IS_ERR(prealloc)) { + ret = PTR_ERR(prealloc); + unlock_page(p); + put_page(p); + exists = ERR_PTR(ret); + goto free_eb; + } } spin_lock(&mapping->private_lock); @@ -7167,32 +7234,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst, } } +#define GANG_LOOKUP_SIZE 16 static struct extent_buffer *get_next_extent_buffer( struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr) { - struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE]; + struct extent_buffer *gang[GANG_LOOKUP_SIZE]; struct extent_buffer *found = NULL; u64 page_start = page_offset(page); - int ret; - int i; + u64 cur = page_start; ASSERT(in_range(bytenr, page_start, PAGE_SIZE)); - ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE); lockdep_assert_held(&fs_info->buffer_lock); - ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang, - bytenr >> fs_info->sectorsize_bits, - PAGE_SIZE / fs_info->nodesize); - for (i = 0; i < ret; i++) { - /* Already beyond page end */ - if (gang[i]->start >= page_start + PAGE_SIZE) - break; - /* Found one */ - if (gang[i]->start >= bytenr) { - found = gang[i]; - break; + while (cur < page_start + PAGE_SIZE) { + int ret; + int i; + + ret = radix_tree_gang_lookup(&fs_info->buffer_radix, + (void **)gang, cur >> fs_info->sectorsize_bits, + min_t(unsigned int, GANG_LOOKUP_SIZE, + PAGE_SIZE / fs_info->nodesize)); + if (ret == 0) + goto out; + for (i = 0; i < ret; i++) { + /* Already beyond page end */ + if (gang[i]->start >= page_start + PAGE_SIZE) + goto out; + /* Found one */ + if (gang[i]->start >= bytenr) { + found = gang[i]; + goto out; + } } + cur = gang[ret - 1]->start + gang[ret - 1]->len; } +out: return found; } |