diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 616 |
1 files changed, 365 insertions, 251 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d9f856358704..197f5e51c474 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -425,14 +425,14 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); ASSERT(folio_pos(folio) <= start && - start + len <= folio_pos(folio) + PAGE_SIZE); + start + len <= folio_pos(folio) + folio_size(folio)); if (uptodate && btrfs_verify_folio(folio, start, len)) btrfs_folio_set_uptodate(fs_info, folio, start, len); else btrfs_folio_clear_uptodate(fs_info, folio, start, len); - if (!btrfs_is_subpage(fs_info, folio->mapping)) + if (!btrfs_is_subpage(fs_info, folio)) folio_unlock(folio); else btrfs_folio_end_lock(fs_info, folio, start, len); @@ -488,11 +488,11 @@ static void end_bbio_data_write(struct btrfs_bio *bbio) static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio) { ASSERT(folio_test_locked(folio)); - if (!btrfs_is_subpage(fs_info, folio->mapping)) + if (!btrfs_is_subpage(fs_info, folio)) return; ASSERT(folio_test_private(folio)); - btrfs_folio_set_lock(fs_info, folio, folio_pos(folio), PAGE_SIZE); + btrfs_folio_set_lock(fs_info, folio, folio_pos(folio), folio_size(folio)); } /* @@ -523,8 +523,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) u64 end; u32 len; - /* For now only order 0 folios are supported for data. */ - ASSERT(folio_order(folio) == 0); btrfs_debug(fs_info, "%s: bi_sector=%llu, err=%d, mirror=%u", __func__, bio->bi_iter.bi_sector, bio->bi_status, @@ -552,7 +550,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) if (likely(uptodate)) { loff_t i_size = i_size_read(inode); - pgoff_t end_index = i_size >> folio_shift(folio); /* * Zero out the remaining part if this range straddles @@ -561,9 +558,11 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) * Here we should only zero the range inside the folio, * not touch anything else. * - * NOTE: i_size is exclusive while end is inclusive. + * NOTE: i_size is exclusive while end is inclusive and + * folio_contains() takes PAGE_SIZE units. */ - if (folio_index(folio) == end_index && i_size <= end) { + if (folio_contains(folio, i_size >> PAGE_SHIFT) && + i_size <= end) { u32 zero_start = max(offset_in_folio(folio, i_size), offset_in_folio(folio, start)); u32 zero_len = offset_in_folio(folio, end) + 1 - @@ -629,7 +628,7 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, for (allocated = 0; allocated < nr_pages;) { unsigned int last = allocated; - allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array); + allocated = alloc_pages_bulk(gfp, nr_pages, page_array); if (unlikely(allocated == last)) { /* No progress, fail and do cleanup. */ for (int i = 0; i < allocated; i++) { @@ -754,7 +753,7 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl, { struct btrfs_inode *inode = folio_to_inode(folio); - ASSERT(pg_offset + size <= PAGE_SIZE); + ASSERT(pg_offset + size <= folio_size(folio)); ASSERT(bio_ctrl->end_io_func); if (bio_ctrl->bbio && @@ -837,7 +836,7 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb, if (folio->mapping) lockdep_assert_held(&folio->mapping->i_private_lock); - if (fs_info->nodesize >= PAGE_SIZE) { + if (!btrfs_meta_is_subpage(fs_info)) { if (!folio_test_private(folio)) folio_attach_private(folio, eb); else @@ -871,7 +870,7 @@ int set_folio_extent_mapped(struct folio *folio) fs_info = folio_to_fs_info(folio); - if (btrfs_is_subpage(fs_info, folio->mapping)) + if (btrfs_is_subpage(fs_info, folio)) return btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA); folio_attach_private(folio, (void *)EXTENT_FOLIO_PRIVATE); @@ -888,8 +887,8 @@ void clear_folio_extent_mapped(struct folio *folio) return; fs_info = folio_to_fs_info(folio); - if (btrfs_is_subpage(fs_info, folio->mapping)) - return btrfs_detach_subpage(fs_info, folio); + if (btrfs_is_subpage(fs_info, folio)) + return btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA); folio_detach_private(folio); } @@ -899,7 +898,6 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode, u64 len, struct extent_map **em_cached) { struct extent_map *em; - struct extent_state *cached_state = NULL; ASSERT(em_cached); @@ -915,14 +913,12 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode, *em_cached = NULL; } - btrfs_lock_and_flush_ordered_range(inode, start, start + len - 1, &cached_state); em = btrfs_get_extent(inode, folio, start, len); if (!IS_ERR(em)) { BUG_ON(*em_cached); refcount_inc(&em->refs); *em_cached = em; } - unlock_extent(&inode->io_tree, start, start + len - 1, &cached_state); return em; } @@ -939,16 +935,12 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, struct inode *inode = folio->mapping->host; struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); u64 start = folio_pos(folio); - const u64 end = start + PAGE_SIZE - 1; - u64 cur = start; + const u64 end = start + folio_size(folio) - 1; u64 extent_offset; u64 last_byte = i_size_read(inode); - u64 block_start; struct extent_map *em; int ret = 0; - size_t pg_offset = 0; - size_t iosize; - size_t blocksize = fs_info->sectorsize; + const size_t blocksize = fs_info->sectorsize; ret = set_folio_extent_mapped(folio); if (ret < 0) { @@ -956,28 +948,32 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, return ret; } - if (folio->index == last_byte >> folio_shift(folio)) { + if (folio_contains(folio, last_byte >> PAGE_SHIFT)) { size_t zero_offset = offset_in_folio(folio, last_byte); - if (zero_offset) { - iosize = folio_size(folio) - zero_offset; - folio_zero_range(folio, zero_offset, iosize); - } + if (zero_offset) + folio_zero_range(folio, zero_offset, + folio_size(folio) - zero_offset); } bio_ctrl->end_io_func = end_bbio_data_read; begin_folio_read(fs_info, folio); - while (cur <= end) { + for (u64 cur = start; cur <= end; cur += blocksize) { enum btrfs_compression_type compress_type = BTRFS_COMPRESS_NONE; + unsigned long pg_offset = offset_in_folio(folio, cur); bool force_bio_submit = false; u64 disk_bytenr; + u64 block_start; ASSERT(IS_ALIGNED(cur, fs_info->sectorsize)); if (cur >= last_byte) { - iosize = folio_size(folio) - pg_offset; - folio_zero_range(folio, pg_offset, iosize); - end_folio_read(folio, true, cur, iosize); + folio_zero_range(folio, pg_offset, end - cur + 1); + end_folio_read(folio, true, cur, end - cur + 1); break; } + if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) { + end_folio_read(folio, true, cur, blocksize); + continue; + } em = get_extent_map(BTRFS_I(inode), folio, cur, end - cur + 1, em_cached); if (IS_ERR(em)) { end_folio_read(folio, false, cur, end + 1 - cur); @@ -989,15 +985,15 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, compress_type = extent_map_compression(em); - iosize = min(extent_map_end(em) - cur, end - cur + 1); - iosize = ALIGN(iosize, blocksize); if (compress_type != BTRFS_COMPRESS_NONE) disk_bytenr = em->disk_bytenr; else disk_bytenr = extent_map_block_start(em) + extent_offset; - block_start = extent_map_block_start(em); + if (em->flags & EXTENT_FLAG_PREALLOC) block_start = EXTENT_MAP_HOLE; + else + block_start = extent_map_block_start(em); /* * If we have a file range that points to a compressed extent @@ -1046,18 +1042,13 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { - folio_zero_range(folio, pg_offset, iosize); - - end_folio_read(folio, true, cur, iosize); - cur = cur + iosize; - pg_offset += iosize; + folio_zero_range(folio, pg_offset, blocksize); + end_folio_read(folio, true, cur, blocksize); continue; } /* the get_extent function already copied into the folio */ if (block_start == EXTENT_MAP_INLINE) { - end_folio_read(folio, true, cur, iosize); - cur = cur + iosize; - pg_offset += iosize; + end_folio_read(folio, true, cur, blocksize); continue; } @@ -1068,22 +1059,204 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, if (force_bio_submit) submit_one_bio(bio_ctrl); - submit_extent_folio(bio_ctrl, disk_bytenr, folio, iosize, + submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize, pg_offset); - cur = cur + iosize; - pg_offset += iosize; } - return 0; } +/* + * Check if we can skip waiting the @ordered extent covering the block at @fileoff. + * + * @fileoff: Both input and output. + * Input as the file offset where the check should start at. + * Output as where the next check should start at, + * if the function returns true. + * + * Return true if we can skip to @fileoff. The caller needs to check the new + * @fileoff value to make sure it covers the full range, before skipping the + * full OE. + * + * Return false if we must wait for the ordered extent. + */ +static bool can_skip_one_ordered_range(struct btrfs_inode *inode, + struct btrfs_ordered_extent *ordered, + u64 *fileoff) +{ + const struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct folio *folio; + const u32 blocksize = fs_info->sectorsize; + u64 cur = *fileoff; + bool ret; + + folio = filemap_get_folio(inode->vfs_inode.i_mapping, cur >> PAGE_SHIFT); + + /* + * We should have locked the folio(s) for range [start, end], thus + * there must be a folio and it must be locked. + */ + ASSERT(!IS_ERR(folio)); + ASSERT(folio_test_locked(folio)); + + /* + * There are several cases for the folio and OE combination: + * + * 1) Folio has no private flag + * The OE has all its IO done but not yet finished, and folio got + * invalidated. + * + * Have we have to wait for the OE to finish, as it may contain the + * to-be-inserted data checksum. + * Without the data checksum inserted into the csum tree, read will + * just fail with missing csum. + */ + if (!folio_test_private(folio)) { + ret = false; + goto out; + } + + /* + * 2) The first block is DIRTY. + * + * This means the OE is created by some other folios whose file pos is + * before this one. And since we are holding the folio lock, the writeback + * of this folio cannot start. + * + * We must skip the whole OE, because it will never start until we + * finished our folio read and unlocked the folio. + */ + if (btrfs_folio_test_dirty(fs_info, folio, cur, blocksize)) { + u64 range_len = min(folio_pos(folio) + folio_size(folio), + ordered->file_offset + ordered->num_bytes) - cur; + + ret = true; + /* + * At least inside the folio, all the remaining blocks should + * also be dirty. + */ + ASSERT(btrfs_folio_test_dirty(fs_info, folio, cur, range_len)); + *fileoff = ordered->file_offset + ordered->num_bytes; + goto out; + } + + /* + * 3) The first block is uptodate. + * + * At least the first block can be skipped, but we are still not fully + * sure. E.g. if the OE has some other folios in the range that cannot + * be skipped. + * So we return true and update @next_ret to the OE/folio boundary. + */ + if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) { + u64 range_len = min(folio_pos(folio) + folio_size(folio), + ordered->file_offset + ordered->num_bytes) - cur; + + /* + * The whole range to the OE end or folio boundary should also + * be uptodate. + */ + ASSERT(btrfs_folio_test_uptodate(fs_info, folio, cur, range_len)); + ret = true; + *fileoff = cur + range_len; + goto out; + } + + /* + * 4) The first block is not uptodate. + * + * This means the folio is invalidated after the writeback was finished, + * but by some other operations (e.g. block aligned buffered write) the + * folio is inserted into filemap. + * Very much the same as case 1). + */ + ret = false; +out: + folio_put(folio); + return ret; +} + +static bool can_skip_ordered_extent(struct btrfs_inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 end) +{ + const u64 range_end = min(end, ordered->file_offset + ordered->num_bytes - 1); + u64 cur = max(start, ordered->file_offset); + + while (cur < range_end) { + bool can_skip; + + can_skip = can_skip_one_ordered_range(inode, ordered, &cur); + if (!can_skip) + return false; + } + return true; +} + +/* + * Locking helper to make sure we get a stable view of extent maps for the + * involved range. + * + * This is for folio read paths (read and readahead), thus the involved range + * should have all the folios locked. + */ +static void lock_extents_for_read(struct btrfs_inode *inode, u64 start, u64 end, + struct extent_state **cached_state) +{ + u64 cur_pos; + + /* Caller must provide a valid @cached_state. */ + ASSERT(cached_state); + + /* The range must at least be page aligned, as all read paths are folio based. */ + ASSERT(IS_ALIGNED(start, PAGE_SIZE)); + ASSERT(IS_ALIGNED(end + 1, PAGE_SIZE)); + +again: + lock_extent(&inode->io_tree, start, end, cached_state); + cur_pos = start; + while (cur_pos < end) { + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_range(inode, cur_pos, + end - cur_pos + 1); + /* + * No ordered extents in the range, and we hold the extent lock, + * no one can modify the extent maps in the range, we're safe to return. + */ + if (!ordered) + break; + + /* Check if we can skip waiting for the whole OE. */ + if (can_skip_ordered_extent(inode, ordered, start, end)) { + cur_pos = min(ordered->file_offset + ordered->num_bytes, + end + 1); + btrfs_put_ordered_extent(ordered); + continue; + } + + /* Now wait for the OE to finish. */ + unlock_extent(&inode->io_tree, start, end, cached_state); + btrfs_start_ordered_extent_nowriteback(ordered, start, end + 1 - start); + btrfs_put_ordered_extent(ordered); + /* We have unlocked the whole range, restart from the beginning. */ + goto again; + } +} + int btrfs_read_folio(struct file *file, struct folio *folio) { + struct btrfs_inode *inode = folio_to_inode(folio); + const u64 start = folio_pos(folio); + const u64 end = start + folio_size(folio) - 1; + struct extent_state *cached_state = NULL; struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ }; struct extent_map *em_cached = NULL; int ret; + lock_extents_for_read(inode, start, end, &cached_state); ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL); + unlock_extent(&inode->io_tree, start, end, &cached_state); + free_extent_map(em_cached); /* @@ -1102,7 +1275,7 @@ static void set_delalloc_bitmap(struct folio *folio, unsigned long *delalloc_bit unsigned int start_bit; unsigned int nbits; - ASSERT(start >= folio_start && start + len <= folio_start + PAGE_SIZE); + ASSERT(start >= folio_start && start + len <= folio_start + folio_size(folio)); start_bit = (start - folio_start) >> fs_info->sectorsize_bits; nbits = len >> fs_info->sectorsize_bits; ASSERT(bitmap_test_range_all_zero(delalloc_bitmap, start_bit, nbits)); @@ -1115,12 +1288,12 @@ static bool find_next_delalloc_bitmap(struct folio *folio, { struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); const u64 folio_start = folio_pos(folio); - const unsigned int bitmap_size = fs_info->sectors_per_page; + const unsigned int bitmap_size = btrfs_blocks_per_folio(fs_info, folio); unsigned int start_bit; unsigned int first_zero; unsigned int first_set; - ASSERT(start >= folio_start && start < folio_start + PAGE_SIZE); + ASSERT(start >= folio_start && start < folio_start + folio_size(folio)); start_bit = (start - folio_start) >> fs_info->sectorsize_bits; first_set = find_next_bit(delalloc_bitmap, bitmap_size, start_bit); @@ -1154,9 +1327,10 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, { struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode); struct writeback_control *wbc = bio_ctrl->wbc; - const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping); + const bool is_subpage = btrfs_is_subpage(fs_info, folio); const u64 page_start = folio_pos(folio); const u64 page_end = page_start + folio_size(folio) - 1; + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); unsigned long delalloc_bitmap = 0; /* * Save the last found delalloc end. As the delalloc end can go beyond @@ -1181,14 +1355,14 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, int bit; /* Save the dirty bitmap as our submission bitmap will be a subset of it. */ - if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) { - ASSERT(fs_info->sectors_per_page > 1); + if (btrfs_is_subpage(fs_info, folio)) { + ASSERT(blocks_per_folio > 1); btrfs_get_subpage_dirty_bitmap(fs_info, folio, &bio_ctrl->submit_bitmap); } else { bio_ctrl->submit_bitmap = 1; } - for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) { + for_each_set_bit(bit, &bio_ctrl->submit_bitmap, blocks_per_folio) { u64 start = page_start + (bit << fs_info->sectorsize_bits); btrfs_folio_set_lock(fs_info, folio, start, fs_info->sectorsize); @@ -1261,7 +1435,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, btrfs_root_id(inode->root), btrfs_ino(inode), folio_pos(folio), - fs_info->sectors_per_page, + blocks_per_folio, &bio_ctrl->submit_bitmap, found_start, found_len, ret); } else { @@ -1306,7 +1480,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, unsigned int bitmap_size = min( (last_finished_delalloc_end - page_start) >> fs_info->sectorsize_bits, - fs_info->sectors_per_page); + blocks_per_folio); for_each_set_bit(bit, &bio_ctrl->submit_bitmap, bitmap_size) btrfs_mark_ordered_io_finished(inode, folio, @@ -1321,7 +1495,7 @@ out: delalloc_end = page_end; /* * delalloc_end is already one less than the total length, so - * we don't subtract one from PAGE_SIZE + * we don't subtract one from PAGE_SIZE. */ delalloc_to_write += DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE); @@ -1330,7 +1504,7 @@ out: * If all ranges are submitted asynchronously, we just need to account * for them here. */ - if (bitmap_empty(&bio_ctrl->submit_bitmap, fs_info->sectors_per_page)) { + if (bitmap_empty(&bio_ctrl->submit_bitmap, blocks_per_folio)) { wbc->nr_to_write -= delalloc_to_write; return 1; } @@ -1431,6 +1605,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, bool submitted_io = false; bool error = false; const u64 folio_start = folio_pos(folio); + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); u64 cur; int bit; int ret = 0; @@ -1439,21 +1614,23 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, start + len <= folio_start + folio_size(folio)); ret = btrfs_writepage_cow_fixup(folio); - if (ret) { + if (ret == -EAGAIN) { /* Fixup worker will requeue */ folio_redirty_for_writepage(bio_ctrl->wbc, folio); folio_unlock(folio); return 1; } + if (ret < 0) + return ret; for (cur = start; cur < start + len; cur += fs_info->sectorsize) set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap); bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap, - fs_info->sectors_per_page); + blocks_per_folio); bio_ctrl->end_io_func = end_bbio_data_write; - for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) { + for_each_set_bit(bit, &bio_ctrl->submit_bitmap, blocks_per_folio) { cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits); if (cur >= i_size) { @@ -1527,6 +1704,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl size_t pg_offset; loff_t i_size = i_size_read(&inode->vfs_inode); unsigned long end_index = i_size >> PAGE_SHIFT; + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); trace_extent_writepage(folio, &inode->vfs_inode, bio_ctrl->wbc); @@ -1548,6 +1726,30 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl * The proper bitmap can only be initialized until writepage_delalloc(). */ bio_ctrl->submit_bitmap = (unsigned long)-1; + + /* + * If the page is dirty but without private set, it's marked dirty + * without informing the fs. + * Nowadays that is a bug, since the introduction of + * pin_user_pages*(). + * + * So here we check if the page has private set to rule out such + * case. + * But we also have a long history of relying on the COW fixup, + * so here we only enable this check for experimental builds until + * we're sure it's safe. + */ + if (IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL) && + unlikely(!folio_test_private(folio))) { + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + btrfs_err_rl(fs_info, + "root %lld ino %llu folio %llu is marked dirty without notifying the fs", + inode->root->root_key.objectid, + btrfs_ino(inode), folio_pos(folio)); + ret = -EUCLEAN; + goto done; + } + ret = set_folio_extent_mapped(folio); if (ret < 0) goto done; @@ -1559,14 +1761,14 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl goto done; ret = extent_writepage_io(inode, folio, folio_pos(folio), - PAGE_SIZE, bio_ctrl, i_size); + folio_size(folio), bio_ctrl, i_size); if (ret == 1) return 0; if (ret < 0) btrfs_err_rl(fs_info, "failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", btrfs_root_id(inode->root), btrfs_ino(inode), - folio_pos(folio), fs_info->sectors_per_page, + folio_pos(folio), blocks_per_folio, &bio_ctrl->submit_bitmap, ret); bio_ctrl->wbc->nr_to_write--; @@ -1722,20 +1924,13 @@ static struct extent_buffer *find_extent_buffer_nolock( static void end_bbio_meta_write(struct btrfs_bio *bbio) { struct extent_buffer *eb = bbio->private; - struct btrfs_fs_info *fs_info = eb->fs_info; struct folio_iter fi; - u32 bio_offset = 0; if (bbio->bio.bi_status != BLK_STS_OK) set_btree_ioerr(eb); bio_for_each_folio_all(fi, &bbio->bio) { - u64 start = eb->start + bio_offset; - struct folio *folio = fi.folio; - u32 len = fi.length; - - btrfs_folio_clear_writeback(fs_info, folio, start, len); - bio_offset += len; + btrfs_meta_folio_clear_writeback(fi.folio, eb); } clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); @@ -1789,38 +1984,21 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, wbc_init_bio(wbc, &bbio->bio); bbio->inode = BTRFS_I(eb->fs_info->btree_inode); bbio->file_offset = eb->start; - if (fs_info->nodesize < PAGE_SIZE) { - struct folio *folio = eb->folios[0]; - bool ret; + for (int i = 0; i < num_extent_folios(eb); i++) { + struct folio *folio = eb->folios[i]; + u64 range_start = max_t(u64, eb->start, folio_pos(folio)); + u32 range_len = min_t(u64, folio_pos(folio) + folio_size(folio), + eb->start + eb->len) - range_start; folio_lock(folio); - btrfs_subpage_set_writeback(fs_info, folio, eb->start, eb->len); - if (btrfs_subpage_clear_and_test_dirty(fs_info, folio, eb->start, - eb->len)) { - folio_clear_dirty_for_io(folio); - wbc->nr_to_write--; - } - ret = bio_add_folio(&bbio->bio, folio, eb->len, - eb->start - folio_pos(folio)); - ASSERT(ret); - wbc_account_cgroup_owner(wbc, folio, eb->len); - folio_unlock(folio); - } else { - int num_folios = num_extent_folios(eb); - - for (int i = 0; i < num_folios; i++) { - struct folio *folio = eb->folios[i]; - bool ret; - - folio_lock(folio); - folio_clear_dirty_for_io(folio); - folio_start_writeback(folio); - ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0); - ASSERT(ret); - wbc_account_cgroup_owner(wbc, folio, eb->folio_size); + btrfs_meta_folio_clear_dirty(folio, eb); + btrfs_meta_folio_set_writeback(folio, eb); + if (!folio_test_dirty(folio)) wbc->nr_to_write -= folio_nr_pages(folio); - folio_unlock(folio); - } + bio_add_folio_nofail(&bbio->bio, folio, range_len, + offset_in_folio(folio, range_start)); + wbc_account_cgroup_owner(wbc, folio, range_len); + folio_unlock(folio); } btrfs_submit_bbio(bbio, 0); } @@ -1846,9 +2024,10 @@ static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc) u64 folio_start = folio_pos(folio); int bit_start = 0; int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits; + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); /* Lock and write each dirty extent buffers in the range */ - while (bit_start < fs_info->sectors_per_page) { + while (bit_start < blocks_per_folio) { struct btrfs_subpage *subpage = folio_get_private(folio); struct extent_buffer *eb; unsigned long flags; @@ -1864,7 +2043,7 @@ static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc) break; } spin_lock_irqsave(&subpage->lock, flags); - if (!test_bit(bit_start + btrfs_bitmap_nr_dirty * fs_info->sectors_per_page, + if (!test_bit(bit_start + btrfs_bitmap_nr_dirty * blocks_per_folio, subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock(&folio->mapping->i_private_lock); @@ -1930,7 +2109,7 @@ static int submit_eb_page(struct folio *folio, struct btrfs_eb_write_context *ct if (!folio_test_private(folio)) return 0; - if (folio_to_fs_info(folio)->nodesize < PAGE_SIZE) + if (btrfs_meta_is_subpage(folio_to_fs_info(folio))) return submit_eb_subpage(folio, wbc); spin_lock(&mapping->i_private_lock); @@ -2189,10 +2368,8 @@ retry: done_index = folio_next_index(folio); /* * At this point we hold neither the i_pages lock nor - * the page lock: the page may be truncated or - * invalidated (changing page->mapping to NULL), - * or even swizzled back from swapper_space to - * tmpfs file mapping + * the folio lock: the folio may be truncated or + * invalidated (changing folio->mapping to NULL). */ if (!folio_trylock(folio)) { submit_write_bio(bio_ctrl, 0); @@ -2230,7 +2407,7 @@ retry: * regular submission. */ if (wbc->sync_mode != WB_SYNC_NONE || - btrfs_is_subpage(inode_to_fs_info(inode), mapping)) { + btrfs_is_subpage(inode_to_fs_info(inode), folio)) { if (folio_test_writeback(folio)) submit_write_bio(bio_ctrl, 0); folio_wait_writeback(folio); @@ -2311,8 +2488,8 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize)); while (cur <= end) { - u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end); - u32 cur_len = cur_end + 1 - cur; + u64 cur_end; + u32 cur_len; struct folio *folio; folio = filemap_get_folio(mapping, cur >> PAGE_SHIFT); @@ -2322,13 +2499,18 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f * code is just in case, but shouldn't actually be run. */ if (IS_ERR(folio)) { + cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end); + cur_len = cur_end + 1 - cur; btrfs_mark_ordered_io_finished(BTRFS_I(inode), NULL, cur, cur_len, false); mapping_set_error(mapping, PTR_ERR(folio)); - cur = cur_end + 1; + cur = cur_end; continue; } + cur_end = min_t(u64, folio_pos(folio) + folio_size(folio) - 1, end); + cur_len = cur_end + 1 - cur; + ASSERT(folio_test_locked(folio)); if (pages_dirty && folio != locked_folio) ASSERT(folio_test_dirty(folio)); @@ -2380,12 +2562,20 @@ void btrfs_readahead(struct readahead_control *rac) { struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD }; struct folio *folio; + struct btrfs_inode *inode = BTRFS_I(rac->mapping->host); + const u64 start = readahead_pos(rac); + const u64 end = start + readahead_length(rac) - 1; + struct extent_state *cached_state = NULL; struct extent_map *em_cached = NULL; u64 prev_em_start = (u64)-1; + lock_extents_for_read(inode, start, end, &cached_state); + while ((folio = readahead_folio(rac)) != NULL) btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start); + unlock_extent(&inode->io_tree, start, end, &cached_state); + if (em_cached) free_extent_map(em_cached); submit_one_bio(&bio_ctrl); @@ -2432,7 +2622,7 @@ static bool try_release_extent_state(struct extent_io_tree *tree, struct folio *folio) { u64 start = folio_pos(folio); - u64 end = start + PAGE_SIZE - 1; + u64 end = start + folio_size(folio) - 1; bool ret; if (test_range_bit_exists(tree, start, end, EXTENT_LOCKED)) { @@ -2470,7 +2660,7 @@ static bool try_release_extent_state(struct extent_io_tree *tree, bool try_release_extent_mapping(struct folio *folio, gfp_t mask) { u64 start = folio_pos(folio); - u64 end = start + PAGE_SIZE - 1; + u64 end = start + folio_size(folio) - 1; struct btrfs_inode *inode = folio_to_inode(folio); struct extent_io_tree *io_tree = &inode->io_tree; @@ -2581,7 +2771,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo return; } - if (fs_info->nodesize >= PAGE_SIZE) { + if (!btrfs_meta_is_subpage(fs_info)) { /* * We do this since we'll remove the pages after we've * removed the eb from the radix tree, so we could race @@ -2607,7 +2797,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo * attached to one dummy eb, no sharing. */ if (!mapped) { - btrfs_detach_subpage(fs_info, folio); + btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); return; } @@ -2618,7 +2808,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo * page range and no unfinished IO. */ if (!folio_range_has_eb(folio)) - btrfs_detach_subpage(fs_info, folio); + btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); spin_unlock(&folio->mapping->i_private_lock); } @@ -2651,15 +2841,14 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) kmem_cache_free(extent_buffer_cache, eb); } -static struct extent_buffer * -__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, - unsigned long len) +static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *fs_info, + u64 start) { struct extent_buffer *eb = NULL; eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL); eb->start = start; - eb->len = len; + eb->len = fs_info->nodesize; eb->fs_info = fs_info; init_rwsem(&eb->lock); @@ -2668,7 +2857,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, spin_lock_init(&eb->refs_lock); atomic_set(&eb->refs, 1); - ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE); + ASSERT(eb->len <= BTRFS_MAX_METADATA_BLOCKSIZE); return eb; } @@ -2676,10 +2865,9 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src) { struct extent_buffer *new; - int num_folios = num_extent_folios(src); int ret; - new = __alloc_extent_buffer(src->fs_info, src->start, src->len); + new = __alloc_extent_buffer(src->fs_info, src->start); if (new == NULL) return NULL; @@ -2696,7 +2884,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src) return NULL; } - for (int i = 0; i < num_folios; i++) { + for (int i = 0; i < num_extent_folios(src); i++) { struct folio *folio = new->folios[i]; ret = attach_extent_buffer_folio(new, folio, NULL); @@ -2712,26 +2900,24 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src) return new; } -struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start, unsigned long len) +struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, + u64 start) { struct extent_buffer *eb; - int num_folios = 0; int ret; - eb = __alloc_extent_buffer(fs_info, start, len); + eb = __alloc_extent_buffer(fs_info, start); if (!eb) return NULL; ret = alloc_eb_folio_array(eb, false); if (ret) - goto err; + goto out; - num_folios = num_extent_folios(eb); - for (int i = 0; i < num_folios; i++) { + for (int i = 0; i < num_extent_folios(eb); i++) { ret = attach_extent_buffer_folio(eb, eb->folios[i], NULL); if (ret < 0) - goto err; + goto out_detach; } set_extent_buffer_uptodate(eb); @@ -2739,23 +2925,19 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); return eb; -err: - for (int i = 0; i < num_folios; i++) { + +out_detach: + for (int i = 0; i < num_extent_folios(eb); i++) { if (eb->folios[i]) { detach_extent_buffer_folio(eb, eb->folios[i]); folio_put(eb->folios[i]); } } +out: kmem_cache_free(extent_buffer_cache, eb); return NULL; } -struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) -{ - return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize); -} - static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; @@ -2794,11 +2976,9 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) static void mark_extent_buffer_accessed(struct extent_buffer *eb) { - int num_folios= num_extent_folios(eb); - check_buffer_tree_ref(eb); - for (int i = 0; i < num_folios; i++) + for (int i = 0; i < num_extent_folios(eb); i++) folio_mark_accessed(eb->folios[i]); } @@ -2831,10 +3011,10 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, return eb; } -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, u64 start) { +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *eb, *exists = NULL; int ret; @@ -2870,8 +3050,11 @@ again: free_eb: btrfs_release_extent_buffer(eb); return exists; -} +#else + /* Stub to avoid linker error when compiled with optimizations turned off. */ + return NULL; #endif +} static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info, struct folio *folio) @@ -2885,7 +3068,7 @@ static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info, * don't try to insert two ebs for the same bytenr. So here we always * return NULL and just continue. */ - if (fs_info->nodesize < PAGE_SIZE) + if (btrfs_meta_is_subpage(fs_info)) return NULL; /* Page not yet attached to an extent buffer */ @@ -2988,7 +3171,7 @@ retry: finish: spin_lock(&mapping->i_private_lock); - if (existing_folio && fs_info->nodesize < PAGE_SIZE) { + if (existing_folio && btrfs_meta_is_subpage(fs_info)) { /* We're going to reuse the existing page, can drop our folio now. */ __free_page(folio_page(eb->folios[i], 0)); eb->folios[i] = existing_folio; @@ -3030,8 +3213,6 @@ finish: struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, u64 owner_root, int level) { - unsigned long len = fs_info->nodesize; - int num_folios; int attached = 0; struct extent_buffer *eb; struct extent_buffer *existing_eb = NULL; @@ -3059,7 +3240,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, if (eb) return eb; - eb = __alloc_extent_buffer(fs_info, start, len); + eb = __alloc_extent_buffer(fs_info, start); if (!eb) return ERR_PTR(-ENOMEM); @@ -3079,8 +3260,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, * The memory will be freed by attach_extent_buffer_page() or freed * manually if we exit earlier. */ - if (fs_info->nodesize < PAGE_SIZE) { - prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA); + if (btrfs_meta_is_subpage(fs_info)) { + prealloc = btrfs_alloc_subpage(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA); if (IS_ERR(prealloc)) { ret = PTR_ERR(prealloc); goto out; @@ -3095,9 +3276,8 @@ reallocate: goto out; } - num_folios = num_extent_folios(eb); /* Attach all pages to the filemap. */ - for (int i = 0; i < num_folios; i++) { + for (int i = 0; i < num_extent_folios(eb); i++) { struct folio *folio; ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb); @@ -3137,7 +3317,7 @@ reallocate: * and free the allocated page. */ folio = eb->folios[i]; - WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len)); + WARN_ON(btrfs_meta_folio_test_dirty(folio, eb)); /* * Check if the current page is physically contiguous with previous eb @@ -3148,7 +3328,7 @@ reallocate: if (i && folio_page(eb->folios[i - 1], 0) + 1 != folio_page(folio, 0)) page_contig = false; - if (!btrfs_folio_test_uptodate(fs_info, folio, eb->start, eb->len)) + if (!btrfs_meta_folio_test_uptodate(folio, eb)) uptodate = 0; /* @@ -3191,7 +3371,7 @@ again: * btree_release_folio will correctly detect that a page belongs to a * live buffer and won't free them prematurely. */ - for (int i = 0; i < num_folios; i++) + for (int i = 0; i < num_extent_folios(eb); i++) folio_unlock(eb->folios[i]); return eb; @@ -3222,7 +3402,7 @@ out: } /* * Now all pages of that extent buffer is unmapped, set UNMAPPED flag, - * so it can be cleaned up without utilizing page->mapping. + * so it can be cleaned up without utilizing folio->mapping. */ set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); @@ -3322,11 +3502,10 @@ void free_extent_buffer_stale(struct extent_buffer *eb) release_extent_buffer(eb); } -static void btree_clear_folio_dirty(struct folio *folio) +static void btree_clear_folio_dirty_tag(struct folio *folio) { - ASSERT(folio_test_dirty(folio)); + ASSERT(!folio_test_dirty(folio)); ASSERT(folio_test_locked(folio)); - folio_clear_dirty_for_io(folio); xa_lock_irq(&folio->mapping->i_pages); if (!folio_test_dirty(folio)) __xa_clear_mark(&folio->mapping->i_pages, @@ -3334,26 +3513,10 @@ static void btree_clear_folio_dirty(struct folio *folio) xa_unlock_irq(&folio->mapping->i_pages); } -static void clear_subpage_extent_buffer_dirty(const struct extent_buffer *eb) -{ - struct btrfs_fs_info *fs_info = eb->fs_info; - struct folio *folio = eb->folios[0]; - bool last; - - /* btree_clear_folio_dirty() needs page locked. */ - folio_lock(folio); - last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, eb->start, eb->len); - if (last) - btree_clear_folio_dirty(folio); - folio_unlock(folio); - WARN_ON(atomic_read(&eb->refs) == 0); -} - void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, struct extent_buffer *eb) { struct btrfs_fs_info *fs_info = eb->fs_info; - int num_folios; btrfs_assert_tree_write_locked(eb); @@ -3380,17 +3543,16 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, -eb->len, fs_info->dirty_metadata_batch); - if (eb->fs_info->nodesize < PAGE_SIZE) - return clear_subpage_extent_buffer_dirty(eb); - - num_folios = num_extent_folios(eb); - for (int i = 0; i < num_folios; i++) { + for (int i = 0; i < num_extent_folios(eb); i++) { struct folio *folio = eb->folios[i]; + bool last; if (!folio_test_dirty(folio)) continue; folio_lock(folio); - btree_clear_folio_dirty(folio); + last = btrfs_meta_folio_clear_and_test_dirty(folio, eb); + if (last) + btree_clear_folio_dirty_tag(folio); folio_unlock(folio); } WARN_ON(atomic_read(&eb->refs) == 0); @@ -3398,37 +3560,34 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, void set_extent_buffer_dirty(struct extent_buffer *eb) { - int num_folios; bool was_dirty; check_buffer_tree_ref(eb); was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); - num_folios = num_extent_folios(eb); WARN_ON(atomic_read(&eb->refs) == 0); WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)); if (!was_dirty) { - bool subpage = eb->fs_info->nodesize < PAGE_SIZE; + bool subpage = btrfs_meta_is_subpage(eb->fs_info); /* * For subpage case, we can have other extent buffers in the - * same page, and in clear_subpage_extent_buffer_dirty() we + * same page, and in clear_extent_buffer_dirty() we * have to clear page dirty without subpage lock held. * This can cause race where our page gets dirty cleared after * we just set it. * - * Thankfully, clear_subpage_extent_buffer_dirty() has locked + * Thankfully, clear_extent_buffer_dirty() has locked * its page for other reasons, we can use page lock to prevent * the above race. */ if (subpage) folio_lock(eb->folios[0]); - for (int i = 0; i < num_folios; i++) - btrfs_folio_set_dirty(eb->fs_info, eb->folios[i], - eb->start, eb->len); + for (int i = 0; i < num_extent_folios(eb); i++) + btrfs_meta_folio_set_dirty(eb->folios[i], eb); if (subpage) folio_unlock(eb->folios[0]); percpu_counter_add_batch(&eb->fs_info->dirty_metadata_bytes, @@ -3436,54 +3595,31 @@ void set_extent_buffer_dirty(struct extent_buffer *eb) eb->fs_info->dirty_metadata_batch); } #ifdef CONFIG_BTRFS_DEBUG - for (int i = 0; i < num_folios; i++) + for (int i = 0; i < num_extent_folios(eb); i++) ASSERT(folio_test_dirty(eb->folios[i])); #endif } void clear_extent_buffer_uptodate(struct extent_buffer *eb) { - struct btrfs_fs_info *fs_info = eb->fs_info; - int num_folios = num_extent_folios(eb); clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - for (int i = 0; i < num_folios; i++) { + for (int i = 0; i < num_extent_folios(eb); i++) { struct folio *folio = eb->folios[i]; if (!folio) continue; - /* - * This is special handling for metadata subpage, as regular - * btrfs_is_subpage() can not handle cloned/dummy metadata. - */ - if (fs_info->nodesize >= PAGE_SIZE) - folio_clear_uptodate(folio); - else - btrfs_subpage_clear_uptodate(fs_info, folio, - eb->start, eb->len); + btrfs_meta_folio_clear_uptodate(folio, eb); } } void set_extent_buffer_uptodate(struct extent_buffer *eb) { - struct btrfs_fs_info *fs_info = eb->fs_info; - int num_folios = num_extent_folios(eb); set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - for (int i = 0; i < num_folios; i++) { - struct folio *folio = eb->folios[i]; - - /* - * This is special handling for metadata subpage, as regular - * btrfs_is_subpage() can not handle cloned/dummy metadata. - */ - if (fs_info->nodesize >= PAGE_SIZE) - folio_mark_uptodate(folio); - else - btrfs_subpage_set_uptodate(fs_info, folio, - eb->start, eb->len); - } + for (int i = 0; i < num_extent_folios(eb); i++) + btrfs_meta_folio_set_uptodate(eb->folios[i], eb); } static void clear_extent_buffer_reading(struct extent_buffer *eb) @@ -3496,10 +3632,7 @@ static void clear_extent_buffer_reading(struct extent_buffer *eb) static void end_bbio_meta_read(struct btrfs_bio *bbio) { struct extent_buffer *eb = bbio->private; - struct btrfs_fs_info *fs_info = eb->fs_info; bool uptodate = !bbio->bio.bi_status; - struct folio_iter fi; - u32 bio_offset = 0; /* * If the extent buffer is marked UPTODATE before the read operation @@ -3521,19 +3654,6 @@ static void end_bbio_meta_read(struct btrfs_bio *bbio) set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); } - bio_for_each_folio_all(fi, &bbio->bio) { - struct folio *folio = fi.folio; - u64 start = eb->start + bio_offset; - u32 len = fi.length; - - if (uptodate) - btrfs_folio_set_uptodate(fs_info, folio, start, len); - else - btrfs_folio_clear_uptodate(fs_info, folio, start, len); - - bio_offset += len; - } - clear_extent_buffer_reading(eb); free_extent_buffer(eb); @@ -3544,7 +3664,6 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, const struct btrfs_tree_parent_check *check) { struct btrfs_bio *bbio; - bool ret; if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; @@ -3584,19 +3703,14 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, bbio->inode = BTRFS_I(eb->fs_info->btree_inode); bbio->file_offset = eb->start; memcpy(&bbio->parent_check, check, sizeof(*check)); - if (eb->fs_info->nodesize < PAGE_SIZE) { - ret = bio_add_folio(&bbio->bio, eb->folios[0], eb->len, - eb->start - folio_pos(eb->folios[0])); - ASSERT(ret); - } else { - int num_folios = num_extent_folios(eb); - - for (int i = 0; i < num_folios; i++) { - struct folio *folio = eb->folios[i]; + for (int i = 0; i < num_extent_folios(eb); i++) { + struct folio *folio = eb->folios[i]; + u64 range_start = max_t(u64, eb->start, folio_pos(folio)); + u32 range_len = min_t(u64, folio_pos(folio) + folio_size(folio), + eb->start + eb->len) - range_start; - ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0); - ASSERT(ret); - } + bio_add_folio_nofail(&bbio->bio, folio, range_len, + offset_in_folio(folio, range_start)); } btrfs_submit_bbio(bbio, mirror_num); return 0; @@ -3785,7 +3899,7 @@ static void assert_eb_folio_uptodate(const struct extent_buffer *eb, int i) if (test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) return; - if (fs_info->nodesize < PAGE_SIZE) { + if (btrfs_meta_is_subpage(fs_info)) { folio = eb->folios[0]; ASSERT(i == 0); if (WARN_ON(!btrfs_subpage_test_uptodate(fs_info, folio, @@ -4271,7 +4385,7 @@ int try_release_extent_buffer(struct folio *folio) { struct extent_buffer *eb; - if (folio_to_fs_info(folio)->nodesize < PAGE_SIZE) + if (btrfs_meta_is_subpage(folio_to_fs_info(folio))) return try_release_subpage_extent_buffer(folio); /* |