diff options
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 225 |
1 files changed, 103 insertions, 122 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index bd091329026c..967f34b70aa8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -49,6 +49,7 @@ #include <trace/events/block.h> #include <linux/fscrypt.h> #include <linux/fsverity.h> +#include <linux/sched/isolation.h> #include "internal.h" @@ -281,13 +282,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) } while (tmp != bh); spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - /* - * If all of the buffers are uptodate then we can set the page - * uptodate. - */ - if (folio_uptodate) - folio_mark_uptodate(folio); - folio_unlock(folio); + folio_end_read(folio, folio_uptodate); return; still_busy: @@ -562,12 +557,6 @@ repeat: return err; } -void emergency_thaw_bdev(struct super_block *sb) -{ - while (sb->s_bdev && !thaw_bdev(sb->s_bdev)) - printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev); -} - /** * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written @@ -920,16 +909,12 @@ int remove_inode_buffers(struct inode *inode) * which may not fail from ordinary buffer allocations. */ struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, - bool retry) + gfp_t gfp) { struct buffer_head *bh, *head; - gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; long offset; struct mem_cgroup *memcg, *old_memcg; - if (retry) - gfp |= __GFP_NOFAIL; - /* The folio lock pins the memcg */ memcg = folio_memcg(folio); old_memcg = set_active_memcg(memcg); @@ -972,7 +957,11 @@ EXPORT_SYMBOL_GPL(folio_alloc_buffers); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry) { - return folio_alloc_buffers(page_folio(page), size, retry); + gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; + if (retry) + gfp |= __GFP_NOFAIL; + + return folio_alloc_buffers(page_folio(page), size, gfp); } EXPORT_SYMBOL_GPL(alloc_page_buffers); @@ -1048,20 +1037,11 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct buffer_head *bh; sector_t end_block; int ret = 0; - gfp_t gfp_mask; - - gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; - - /* - * XXX: __getblk_slow() can not really deal with failure and - * will endlessly loop on improvised global reclaim. Prefer - * looping in the allocator rather than here, at least that - * code knows what it's doing. - */ - gfp_mask |= __GFP_NOFAIL; folio = __filemap_get_folio(inode->i_mapping, index, - FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask); + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); + if (IS_ERR(folio)) + return PTR_ERR(folio); bh = folio_buffers(folio); if (bh) { @@ -1074,7 +1054,10 @@ grow_dev_page(struct block_device *bdev, sector_t block, goto failed; } - bh = folio_alloc_buffers(folio, size, true); + ret = -ENOMEM; + bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT); + if (!bh) + goto failed; /* * Link the folio to the buffers and initialise them. Take the @@ -1225,19 +1208,14 @@ EXPORT_SYMBOL(mark_buffer_dirty); void mark_buffer_write_io_error(struct buffer_head *bh) { - struct super_block *sb; - set_buffer_write_io_error(bh); /* FIXME: do we need to set this in both places? */ if (bh->b_folio && bh->b_folio->mapping) mapping_set_error(bh->b_folio->mapping, -EIO); - if (bh->b_assoc_map) + if (bh->b_assoc_map) { mapping_set_error(bh->b_assoc_map, -EIO); - rcu_read_lock(); - sb = READ_ONCE(bh->b_bdev->bd_super); - if (sb) - errseq_set(&sb->s_wb_err, -EIO); - rcu_read_unlock(); + errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO); + } } EXPORT_SYMBOL(mark_buffer_write_io_error); @@ -1352,7 +1330,7 @@ static void bh_lru_install(struct buffer_head *bh) * failing page migration. * Skip putting upcoming bh into bh_lru until migration is done. */ - if (lru_cache_disabled()) { + if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) { bh_lru_unlock(); return; } @@ -1382,6 +1360,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) check_irqs_on(); bh_lru_lock(); + if (cpu_is_isolated(smp_processor_id())) { + bh_lru_unlock(); + return NULL; + } for (i = 0; i < BH_LRU_SIZE; i++) { struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]); @@ -1426,33 +1408,36 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) } EXPORT_SYMBOL(__find_get_block); -/* - * __getblk_gfp() will locate (and, if necessary, create) the buffer_head - * which corresponds to the passed block_device, block and size. The - * returned buffer has its reference count incremented. +/** + * bdev_getblk - Get a buffer_head in a block device's buffer cache. + * @bdev: The block device. + * @block: The block number. + * @size: The size of buffer_heads for this @bdev. + * @gfp: The memory allocation flags to use. * - * __getblk_gfp() will lock up the machine if grow_dev_page's - * try_to_free_buffers() attempt is failing. FIXME, perhaps? + * Return: The buffer head, or NULL if memory could not be allocated. */ -struct buffer_head * -__getblk_gfp(struct block_device *bdev, sector_t block, - unsigned size, gfp_t gfp) +struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { struct buffer_head *bh = __find_get_block(bdev, block, size); - might_sleep(); - if (bh == NULL) - bh = __getblk_slow(bdev, block, size, gfp); - return bh; + might_alloc(gfp); + if (bh) + return bh; + + return __getblk_slow(bdev, block, size, gfp); } -EXPORT_SYMBOL(__getblk_gfp); +EXPORT_SYMBOL(bdev_getblk); /* * Do async read-ahead on a buffer.. */ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) { - struct buffer_head *bh = __getblk(bdev, block, size); + struct buffer_head *bh = bdev_getblk(bdev, block, size, + GFP_NOWAIT | __GFP_MOVABLE); + if (likely(bh)) { bh_readahead(bh, REQ_RAHEAD); brelse(bh); @@ -1476,7 +1461,17 @@ struct buffer_head * __bread_gfp(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); + struct buffer_head *bh; + + gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + + /* + * Prefer looping in the allocator rather than here, at least that + * code knows what it's doing. + */ + gfp |= __GFP_NOFAIL; + + bh = bdev_getblk(bdev, block, size, gfp); if (likely(bh) && !buffer_uptodate(bh)) bh = __bread_slow(bh); @@ -1539,21 +1534,6 @@ void invalidate_bh_lrus_cpu(void) bh_lru_unlock(); } -void set_bh_page(struct buffer_head *bh, - struct page *page, unsigned long offset) -{ - bh->b_page = page; - BUG_ON(offset >= PAGE_SIZE); - if (PageHighMem(page)) - /* - * This catches illegal uses and preserves the offset: - */ - bh->b_data = (char *)(0 + offset); - else - bh->b_data = page_address(page) + offset; -} -EXPORT_SYMBOL(set_bh_page); - void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset) { @@ -1661,12 +1641,13 @@ EXPORT_SYMBOL(block_invalidate_folio); * block_dirty_folio() via private_lock. try_to_free_buffers * is already excluded via the folio lock. */ -void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize, - unsigned long b_state) +struct buffer_head *create_empty_buffers(struct folio *folio, + unsigned long blocksize, unsigned long b_state) { struct buffer_head *bh, *head, *tail; + gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT | __GFP_NOFAIL; - head = folio_alloc_buffers(folio, blocksize, true); + head = folio_alloc_buffers(folio, blocksize, gfp); bh = head; do { bh->b_state |= b_state; @@ -1688,13 +1669,8 @@ void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize, } folio_attach_private(folio, head); spin_unlock(&folio->mapping->private_lock); -} -EXPORT_SYMBOL(folio_create_empty_buffers); -void create_empty_buffers(struct page *page, - unsigned long blocksize, unsigned long b_state) -{ - folio_create_empty_buffers(page_folio(page), blocksize, b_state); + return head; } EXPORT_SYMBOL(create_empty_buffers); @@ -1789,13 +1765,15 @@ static struct buffer_head *folio_create_buffers(struct folio *folio, struct inode *inode, unsigned int b_state) { + struct buffer_head *bh; + BUG_ON(!folio_test_locked(folio)); - if (!folio_buffers(folio)) - folio_create_empty_buffers(folio, - 1 << READ_ONCE(inode->i_blkbits), - b_state); - return folio_buffers(folio); + bh = folio_buffers(folio); + if (!bh) + bh = create_empty_buffers(folio, + 1 << READ_ONCE(inode->i_blkbits), b_state); + return bh; } /* @@ -2032,7 +2010,7 @@ void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to) } EXPORT_SYMBOL(folio_zero_new_buffers); -static void +static int iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, const struct iomap *iomap) { @@ -2046,7 +2024,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, * current block, then do not map the buffer and let the caller * handle it. */ - BUG_ON(offset >= iomap->offset + iomap->length); + if (offset >= iomap->offset + iomap->length) + return -EIO; switch (iomap->type) { case IOMAP_HOLE: @@ -2058,7 +2037,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, if (!buffer_uptodate(bh) || (offset >= i_size_read(inode))) set_buffer_new(bh); - break; + return 0; case IOMAP_DELALLOC: if (!buffer_uptodate(bh) || (offset >= i_size_read(inode))) @@ -2066,7 +2045,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, set_buffer_uptodate(bh); set_buffer_mapped(bh); set_buffer_delay(bh); - break; + return 0; case IOMAP_UNWRITTEN: /* * For unwritten regions, we always need to ensure that regions @@ -2078,12 +2057,24 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, fallthrough; case IOMAP_MAPPED: if ((iomap->flags & IOMAP_F_NEW) || - offset >= i_size_read(inode)) + offset >= i_size_read(inode)) { + /* + * This can happen if truncating the block device races + * with the check in the caller as i_size updates on + * block devices aren't synchronized by i_rwsem for + * block devices. + */ + if (S_ISBLK(inode->i_mode)) + return -EIO; set_buffer_new(bh); + } bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> inode->i_blkbits; set_buffer_mapped(bh); - break; + return 0; + default: + WARN_ON_ONCE(1); + return -EIO; } } @@ -2124,13 +2115,12 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, clear_buffer_new(bh); if (!buffer_mapped(bh)) { WARN_ON(bh->b_size != blocksize); - if (get_block) { + if (get_block) err = get_block(inode, block, bh, 1); - if (err) - break; - } else { - iomap_to_bh(inode, block, bh, iomap); - } + else + err = iomap_to_bh(inode, block, bh, iomap); + if (err) + break; if (buffer_new(bh)) { clean_bdev_bh_alias(bh); @@ -2180,8 +2170,7 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, } EXPORT_SYMBOL(__block_write_begin); -static int __block_commit_write(struct inode *inode, struct folio *folio, - size_t from, size_t to) +static void __block_commit_write(struct folio *folio, size_t from, size_t to) { size_t block_start, block_end; bool partial = false; @@ -2216,7 +2205,6 @@ static int __block_commit_write(struct inode *inode, struct folio *folio, */ if (!partial) folio_mark_uptodate(folio); - return 0; } /* @@ -2253,7 +2241,6 @@ int block_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct folio *folio = page_folio(page); - struct inode *inode = mapping->host; size_t start = pos - folio_pos(folio); if (unlikely(copied < len)) { @@ -2277,7 +2264,7 @@ int block_write_end(struct file *file, struct address_space *mapping, flush_dcache_folio(folio); /* This could be a short (even 0-length) commit */ - __block_commit_write(inode, folio, start, start + copied); + __block_commit_write(folio, start, start + copied); return copied; } @@ -2437,12 +2424,10 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) if (!nr) { /* - * All buffers are uptodate - we can set the folio uptodate - * as well. But not if get_block() returned an error. + * All buffers are uptodate or get_block() returned an + * error when trying to map them - we can finish the read. */ - if (!page_error) - folio_mark_uptodate(folio); - folio_unlock(folio); + folio_end_read(folio, !page_error); return 0; } @@ -2598,12 +2583,10 @@ int cont_write_begin(struct file *file, struct address_space *mapping, } EXPORT_SYMBOL(cont_write_begin); -int block_commit_write(struct page *page, unsigned from, unsigned to) +void block_commit_write(struct page *page, unsigned from, unsigned to) { struct folio *folio = page_folio(page); - struct inode *inode = folio->mapping->host; - __block_commit_write(inode, folio, from, to); - return 0; + __block_commit_write(folio, from, to); } EXPORT_SYMBOL(block_commit_write); @@ -2649,11 +2632,11 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, end = size - folio_pos(folio); ret = __block_write_begin_int(folio, 0, end, get_block, NULL); - if (!ret) - ret = __block_commit_write(inode, folio, 0, end); - - if (unlikely(ret < 0)) + if (unlikely(ret)) goto out_unlock; + + __block_commit_write(folio, 0, end); + folio_mark_dirty(folio); folio_wait_stable(folio); return 0; @@ -2690,10 +2673,8 @@ int block_truncate_page(struct address_space *mapping, return PTR_ERR(folio); bh = folio_buffers(folio); - if (!bh) { - folio_create_empty_buffers(folio, blocksize, 0); - bh = folio_buffers(folio); - } + if (!bh) + bh = create_empty_buffers(folio, blocksize, 0); /* Find the buffer that contains "offset" */ offset = offset_in_folio(folio, from); @@ -3002,13 +2983,13 @@ EXPORT_SYMBOL(try_to_free_buffers); /* * Buffer-head allocation */ -static struct kmem_cache *bh_cachep __read_mostly; +static struct kmem_cache *bh_cachep __ro_after_init; /* * Once the number of bh's in the machine exceeds this level, we start * stripping them in writeback. */ -static unsigned long max_buffer_heads; +static unsigned long max_buffer_heads __ro_after_init; int buffer_heads_over_limit; |