summaryrefslogtreecommitdiff
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c225
1 files changed, 103 insertions, 122 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index bd091329026c..967f34b70aa8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,6 +49,7 @@
#include <trace/events/block.h>
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
+#include <linux/sched/isolation.h>
#include "internal.h"
@@ -281,13 +282,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
} while (tmp != bh);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
- /*
- * If all of the buffers are uptodate then we can set the page
- * uptodate.
- */
- if (folio_uptodate)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ folio_end_read(folio, folio_uptodate);
return;
still_busy:
@@ -562,12 +557,6 @@ repeat:
return err;
}
-void emergency_thaw_bdev(struct super_block *sb)
-{
- while (sb->s_bdev && !thaw_bdev(sb->s_bdev))
- printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
-}
-
/**
* sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
* @mapping: the mapping which wants those buffers written
@@ -920,16 +909,12 @@ int remove_inode_buffers(struct inode *inode)
* which may not fail from ordinary buffer allocations.
*/
struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
- bool retry)
+ gfp_t gfp)
{
struct buffer_head *bh, *head;
- gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
long offset;
struct mem_cgroup *memcg, *old_memcg;
- if (retry)
- gfp |= __GFP_NOFAIL;
-
/* The folio lock pins the memcg */
memcg = folio_memcg(folio);
old_memcg = set_active_memcg(memcg);
@@ -972,7 +957,11 @@ EXPORT_SYMBOL_GPL(folio_alloc_buffers);
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bool retry)
{
- return folio_alloc_buffers(page_folio(page), size, retry);
+ gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
+ if (retry)
+ gfp |= __GFP_NOFAIL;
+
+ return folio_alloc_buffers(page_folio(page), size, gfp);
}
EXPORT_SYMBOL_GPL(alloc_page_buffers);
@@ -1048,20 +1037,11 @@ grow_dev_page(struct block_device *bdev, sector_t block,
struct buffer_head *bh;
sector_t end_block;
int ret = 0;
- gfp_t gfp_mask;
-
- gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
-
- /*
- * XXX: __getblk_slow() can not really deal with failure and
- * will endlessly loop on improvised global reclaim. Prefer
- * looping in the allocator rather than here, at least that
- * code knows what it's doing.
- */
- gfp_mask |= __GFP_NOFAIL;
folio = __filemap_get_folio(inode->i_mapping, index,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask);
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
bh = folio_buffers(folio);
if (bh) {
@@ -1074,7 +1054,10 @@ grow_dev_page(struct block_device *bdev, sector_t block,
goto failed;
}
- bh = folio_alloc_buffers(folio, size, true);
+ ret = -ENOMEM;
+ bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT);
+ if (!bh)
+ goto failed;
/*
* Link the folio to the buffers and initialise them. Take the
@@ -1225,19 +1208,14 @@ EXPORT_SYMBOL(mark_buffer_dirty);
void mark_buffer_write_io_error(struct buffer_head *bh)
{
- struct super_block *sb;
-
set_buffer_write_io_error(bh);
/* FIXME: do we need to set this in both places? */
if (bh->b_folio && bh->b_folio->mapping)
mapping_set_error(bh->b_folio->mapping, -EIO);
- if (bh->b_assoc_map)
+ if (bh->b_assoc_map) {
mapping_set_error(bh->b_assoc_map, -EIO);
- rcu_read_lock();
- sb = READ_ONCE(bh->b_bdev->bd_super);
- if (sb)
- errseq_set(&sb->s_wb_err, -EIO);
- rcu_read_unlock();
+ errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO);
+ }
}
EXPORT_SYMBOL(mark_buffer_write_io_error);
@@ -1352,7 +1330,7 @@ static void bh_lru_install(struct buffer_head *bh)
* failing page migration.
* Skip putting upcoming bh into bh_lru until migration is done.
*/
- if (lru_cache_disabled()) {
+ if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) {
bh_lru_unlock();
return;
}
@@ -1382,6 +1360,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
check_irqs_on();
bh_lru_lock();
+ if (cpu_is_isolated(smp_processor_id())) {
+ bh_lru_unlock();
+ return NULL;
+ }
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
@@ -1426,33 +1408,36 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
}
EXPORT_SYMBOL(__find_get_block);
-/*
- * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
- * which corresponds to the passed block_device, block and size. The
- * returned buffer has its reference count incremented.
+/**
+ * bdev_getblk - Get a buffer_head in a block device's buffer cache.
+ * @bdev: The block device.
+ * @block: The block number.
+ * @size: The size of buffer_heads for this @bdev.
+ * @gfp: The memory allocation flags to use.
*
- * __getblk_gfp() will lock up the machine if grow_dev_page's
- * try_to_free_buffers() attempt is failing. FIXME, perhaps?
+ * Return: The buffer head, or NULL if memory could not be allocated.
*/
-struct buffer_head *
-__getblk_gfp(struct block_device *bdev, sector_t block,
- unsigned size, gfp_t gfp)
+struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp)
{
struct buffer_head *bh = __find_get_block(bdev, block, size);
- might_sleep();
- if (bh == NULL)
- bh = __getblk_slow(bdev, block, size, gfp);
- return bh;
+ might_alloc(gfp);
+ if (bh)
+ return bh;
+
+ return __getblk_slow(bdev, block, size, gfp);
}
-EXPORT_SYMBOL(__getblk_gfp);
+EXPORT_SYMBOL(bdev_getblk);
/*
* Do async read-ahead on a buffer..
*/
void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
- struct buffer_head *bh = __getblk(bdev, block, size);
+ struct buffer_head *bh = bdev_getblk(bdev, block, size,
+ GFP_NOWAIT | __GFP_MOVABLE);
+
if (likely(bh)) {
bh_readahead(bh, REQ_RAHEAD);
brelse(bh);
@@ -1476,7 +1461,17 @@ struct buffer_head *
__bread_gfp(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
- struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
+ struct buffer_head *bh;
+
+ gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+
+ /*
+ * Prefer looping in the allocator rather than here, at least that
+ * code knows what it's doing.
+ */
+ gfp |= __GFP_NOFAIL;
+
+ bh = bdev_getblk(bdev, block, size, gfp);
if (likely(bh) && !buffer_uptodate(bh))
bh = __bread_slow(bh);
@@ -1539,21 +1534,6 @@ void invalidate_bh_lrus_cpu(void)
bh_lru_unlock();
}
-void set_bh_page(struct buffer_head *bh,
- struct page *page, unsigned long offset)
-{
- bh->b_page = page;
- BUG_ON(offset >= PAGE_SIZE);
- if (PageHighMem(page))
- /*
- * This catches illegal uses and preserves the offset:
- */
- bh->b_data = (char *)(0 + offset);
- else
- bh->b_data = page_address(page) + offset;
-}
-EXPORT_SYMBOL(set_bh_page);
-
void folio_set_bh(struct buffer_head *bh, struct folio *folio,
unsigned long offset)
{
@@ -1661,12 +1641,13 @@ EXPORT_SYMBOL(block_invalidate_folio);
* block_dirty_folio() via private_lock. try_to_free_buffers
* is already excluded via the folio lock.
*/
-void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
- unsigned long b_state)
+struct buffer_head *create_empty_buffers(struct folio *folio,
+ unsigned long blocksize, unsigned long b_state)
{
struct buffer_head *bh, *head, *tail;
+ gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT | __GFP_NOFAIL;
- head = folio_alloc_buffers(folio, blocksize, true);
+ head = folio_alloc_buffers(folio, blocksize, gfp);
bh = head;
do {
bh->b_state |= b_state;
@@ -1688,13 +1669,8 @@ void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
}
folio_attach_private(folio, head);
spin_unlock(&folio->mapping->private_lock);
-}
-EXPORT_SYMBOL(folio_create_empty_buffers);
-void create_empty_buffers(struct page *page,
- unsigned long blocksize, unsigned long b_state)
-{
- folio_create_empty_buffers(page_folio(page), blocksize, b_state);
+ return head;
}
EXPORT_SYMBOL(create_empty_buffers);
@@ -1789,13 +1765,15 @@ static struct buffer_head *folio_create_buffers(struct folio *folio,
struct inode *inode,
unsigned int b_state)
{
+ struct buffer_head *bh;
+
BUG_ON(!folio_test_locked(folio));
- if (!folio_buffers(folio))
- folio_create_empty_buffers(folio,
- 1 << READ_ONCE(inode->i_blkbits),
- b_state);
- return folio_buffers(folio);
+ bh = folio_buffers(folio);
+ if (!bh)
+ bh = create_empty_buffers(folio,
+ 1 << READ_ONCE(inode->i_blkbits), b_state);
+ return bh;
}
/*
@@ -2032,7 +2010,7 @@ void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to)
}
EXPORT_SYMBOL(folio_zero_new_buffers);
-static void
+static int
iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
const struct iomap *iomap)
{
@@ -2046,7 +2024,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
* current block, then do not map the buffer and let the caller
* handle it.
*/
- BUG_ON(offset >= iomap->offset + iomap->length);
+ if (offset >= iomap->offset + iomap->length)
+ return -EIO;
switch (iomap->type) {
case IOMAP_HOLE:
@@ -2058,7 +2037,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
if (!buffer_uptodate(bh) ||
(offset >= i_size_read(inode)))
set_buffer_new(bh);
- break;
+ return 0;
case IOMAP_DELALLOC:
if (!buffer_uptodate(bh) ||
(offset >= i_size_read(inode)))
@@ -2066,7 +2045,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
set_buffer_uptodate(bh);
set_buffer_mapped(bh);
set_buffer_delay(bh);
- break;
+ return 0;
case IOMAP_UNWRITTEN:
/*
* For unwritten regions, we always need to ensure that regions
@@ -2078,12 +2057,24 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
fallthrough;
case IOMAP_MAPPED:
if ((iomap->flags & IOMAP_F_NEW) ||
- offset >= i_size_read(inode))
+ offset >= i_size_read(inode)) {
+ /*
+ * This can happen if truncating the block device races
+ * with the check in the caller as i_size updates on
+ * block devices aren't synchronized by i_rwsem for
+ * block devices.
+ */
+ if (S_ISBLK(inode->i_mode))
+ return -EIO;
set_buffer_new(bh);
+ }
bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
inode->i_blkbits;
set_buffer_mapped(bh);
- break;
+ return 0;
+ default:
+ WARN_ON_ONCE(1);
+ return -EIO;
}
}
@@ -2124,13 +2115,12 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
clear_buffer_new(bh);
if (!buffer_mapped(bh)) {
WARN_ON(bh->b_size != blocksize);
- if (get_block) {
+ if (get_block)
err = get_block(inode, block, bh, 1);
- if (err)
- break;
- } else {
- iomap_to_bh(inode, block, bh, iomap);
- }
+ else
+ err = iomap_to_bh(inode, block, bh, iomap);
+ if (err)
+ break;
if (buffer_new(bh)) {
clean_bdev_bh_alias(bh);
@@ -2180,8 +2170,7 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(__block_write_begin);
-static int __block_commit_write(struct inode *inode, struct folio *folio,
- size_t from, size_t to)
+static void __block_commit_write(struct folio *folio, size_t from, size_t to)
{
size_t block_start, block_end;
bool partial = false;
@@ -2216,7 +2205,6 @@ static int __block_commit_write(struct inode *inode, struct folio *folio,
*/
if (!partial)
folio_mark_uptodate(folio);
- return 0;
}
/*
@@ -2253,7 +2241,6 @@ int block_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct folio *folio = page_folio(page);
- struct inode *inode = mapping->host;
size_t start = pos - folio_pos(folio);
if (unlikely(copied < len)) {
@@ -2277,7 +2264,7 @@ int block_write_end(struct file *file, struct address_space *mapping,
flush_dcache_folio(folio);
/* This could be a short (even 0-length) commit */
- __block_commit_write(inode, folio, start, start + copied);
+ __block_commit_write(folio, start, start + copied);
return copied;
}
@@ -2437,12 +2424,10 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
if (!nr) {
/*
- * All buffers are uptodate - we can set the folio uptodate
- * as well. But not if get_block() returned an error.
+ * All buffers are uptodate or get_block() returned an
+ * error when trying to map them - we can finish the read.
*/
- if (!page_error)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ folio_end_read(folio, !page_error);
return 0;
}
@@ -2598,12 +2583,10 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
}
EXPORT_SYMBOL(cont_write_begin);
-int block_commit_write(struct page *page, unsigned from, unsigned to)
+void block_commit_write(struct page *page, unsigned from, unsigned to)
{
struct folio *folio = page_folio(page);
- struct inode *inode = folio->mapping->host;
- __block_commit_write(inode, folio, from, to);
- return 0;
+ __block_commit_write(folio, from, to);
}
EXPORT_SYMBOL(block_commit_write);
@@ -2649,11 +2632,11 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
end = size - folio_pos(folio);
ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
- if (!ret)
- ret = __block_commit_write(inode, folio, 0, end);
-
- if (unlikely(ret < 0))
+ if (unlikely(ret))
goto out_unlock;
+
+ __block_commit_write(folio, 0, end);
+
folio_mark_dirty(folio);
folio_wait_stable(folio);
return 0;
@@ -2690,10 +2673,8 @@ int block_truncate_page(struct address_space *mapping,
return PTR_ERR(folio);
bh = folio_buffers(folio);
- if (!bh) {
- folio_create_empty_buffers(folio, blocksize, 0);
- bh = folio_buffers(folio);
- }
+ if (!bh)
+ bh = create_empty_buffers(folio, blocksize, 0);
/* Find the buffer that contains "offset" */
offset = offset_in_folio(folio, from);
@@ -3002,13 +2983,13 @@ EXPORT_SYMBOL(try_to_free_buffers);
/*
* Buffer-head allocation
*/
-static struct kmem_cache *bh_cachep __read_mostly;
+static struct kmem_cache *bh_cachep __ro_after_init;
/*
* Once the number of bh's in the machine exceeds this level, we start
* stripping them in writeback.
*/
-static unsigned long max_buffer_heads;
+static unsigned long max_buffer_heads __ro_after_init;
int buffer_heads_over_limit;