summaryrefslogtreecommitdiff
path: root/fs/buffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 19:42:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 19:42:02 -0700
commit7fa8a8ee9400fe8ec188426e40e481717bc5e924 (patch)
treecc8fd6b4f936ec01e73238643757451e20478c07 /fs/buffer.c
parent91ec4b0d11fe115581ce2835300558802ce55e6c (diff)
parent4d4b6d66db63ceed399f1fb1a4b24081d2590eb1 (diff)
downloadlwn-7fa8a8ee9400fe8ec188426e40e481717bc5e924.tar.gz
lwn-7fa8a8ee9400fe8ec188426e40e481717bc5e924.zip
Merge tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Nick Piggin's "shoot lazy tlbs" series, to improve the peformance of switching from a user process to a kernel thread. - More folio conversions from Kefeng Wang, Zhang Peng and Pankaj Raghav. - zsmalloc performance improvements from Sergey Senozhatsky. - Yue Zhao has found and fixed some data race issues around the alteration of memcg userspace tunables. - VFS rationalizations from Christoph Hellwig: - removal of most of the callers of write_one_page() - make __filemap_get_folio()'s return value more useful - Luis Chamberlain has changed tmpfs so it no longer requires swap backing. Use `mount -o noswap'. - Qi Zheng has made the slab shrinkers operate locklessly, providing some scalability benefits. - Keith Busch has improved dmapool's performance, making part of its operations O(1) rather than O(n). - Peter Xu adds the UFFD_FEATURE_WP_UNPOPULATED feature to userfaultd, permitting userspace to wr-protect anon memory unpopulated ptes. - Kirill Shutemov has changed MAX_ORDER's meaning to be inclusive rather than exclusive, and has fixed a bunch of errors which were caused by its unintuitive meaning. - Axel Rasmussen give userfaultfd the UFFDIO_CONTINUE_MODE_WP feature, which causes minor faults to install a write-protected pte. - Vlastimil Babka has done some maintenance work on vma_merge(): cleanups to the kernel code and improvements to our userspace test harness. - Cleanups to do_fault_around() by Lorenzo Stoakes. - Mike Rapoport has moved a lot of initialization code out of various mm/ files and into mm/mm_init.c. - Lorenzo Stoakes removd vmf_insert_mixed_prot(), which was added for DRM, but DRM doesn't use it any more. - Lorenzo has also coverted read_kcore() and vread() to use iterators and has thereby removed the use of bounce buffers in some cases. - Lorenzo has also contributed further cleanups of vma_merge(). - Chaitanya Prakash provides some fixes to the mmap selftesting code. - Matthew Wilcox changes xfs and afs so they no longer take sleeping locks in ->map_page(), a step towards RCUification of pagefaults. - Suren Baghdasaryan has improved mmap_lock scalability by switching to per-VMA locking. - Frederic Weisbecker has reworked the percpu cache draining so that it no longer causes latency glitches on cpu isolated workloads. - Mike Rapoport cleans up and corrects the ARCH_FORCE_MAX_ORDER Kconfig logic. - Liu Shixin has changed zswap's initialization so we no longer waste a chunk of memory if zswap is not being used. - Yosry Ahmed has improved the performance of memcg statistics flushing. - David Stevens has fixed several issues involving khugepaged, userfaultfd and shmem. - Christoph Hellwig has provided some cleanup work to zram's IO-related code paths. - David Hildenbrand has fixed up some issues in the selftest code's testing of our pte state changing. - Pankaj Raghav has made page_endio() unneeded and has removed it. - Peter Xu contributed some rationalizations of the userfaultfd selftests. - Yosry Ahmed has fixed an issue around memcg's page recalim accounting. - Chaitanya Prakash has fixed some arm-related issues in the selftests/mm code. - Longlong Xia has improved the way in which KSM handles hwpoisoned pages. - Peter Xu fixes a few issues with uffd-wp at fork() time. - Stefan Roesch has changed KSM so that it may now be used on a per-process and per-cgroup basis. * tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (369 commits) mm,unmap: avoid flushing TLB in batch if PTE is inaccessible shmem: restrict noswap option to initial user namespace mm/khugepaged: fix conflicting mods to collapse_file() sparse: remove unnecessary 0 values from rc mm: move 'mmap_min_addr' logic from callers into vm_unmapped_area() hugetlb: pte_alloc_huge() to replace huge pte_alloc_map() maple_tree: fix allocation in mas_sparse_area() mm: do not increment pgfault stats when page fault handler retries zsmalloc: allow only one active pool compaction context selftests/mm: add new selftests for KSM mm: add new KSM process and sysfs knobs mm: add new api to enable ksm per process mm: shrinkers: fix debugfs file permissions mm: don't check VMA write permissions if the PTE/PMD indicates write permissions migrate_pages_batch: fix statistics for longterm pin retry userfaultfd: use helper function range_in_vma() lib/show_mem.c: use for_each_populated_zone() simplify code mm: correct arg in reclaim_pages()/reclaim_clean_pages_from_list() fs/buffer: convert create_page_buffers to folio_create_buffers fs/buffer: add folio_create_empty_buffers helper ...
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c89
1 files changed, 60 insertions, 29 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index b3eb905f87d6..a7fc561758b1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -842,7 +842,7 @@ int remove_inode_buffers(struct inode *inode)
}
/*
- * Create the appropriate buffers when given a page for data area and
+ * Create the appropriate buffers when given a folio for data area and
* the size of each buffer.. Use the bh->b_this_page linked list to
* follow the buffers created. Return NULL if unable to create more
* buffers.
@@ -850,8 +850,8 @@ int remove_inode_buffers(struct inode *inode)
* The retry flag is used to differentiate async IO (paging, swapping)
* which may not fail from ordinary buffer allocations.
*/
-struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
- bool retry)
+struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
+ bool retry)
{
struct buffer_head *bh, *head;
gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
@@ -861,12 +861,12 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
if (retry)
gfp |= __GFP_NOFAIL;
- /* The page lock pins the memcg */
- memcg = page_memcg(page);
+ /* The folio lock pins the memcg */
+ memcg = folio_memcg(folio);
old_memcg = set_active_memcg(memcg);
head = NULL;
- offset = PAGE_SIZE;
+ offset = folio_size(folio);
while ((offset -= size) >= 0) {
bh = alloc_buffer_head(gfp);
if (!bh)
@@ -878,8 +878,8 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bh->b_size = size;
- /* Link the buffer to its page */
- set_bh_page(bh, page, offset);
+ /* Link the buffer to its folio */
+ folio_set_bh(bh, folio, offset);
}
out:
set_active_memcg(old_memcg);
@@ -898,6 +898,13 @@ no_grow:
goto out;
}
+EXPORT_SYMBOL_GPL(folio_alloc_buffers);
+
+struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
+ bool retry)
+{
+ return folio_alloc_buffers(page_folio(page), size, retry);
+}
EXPORT_SYMBOL_GPL(alloc_page_buffers);
static inline void
@@ -1484,6 +1491,21 @@ void set_bh_page(struct buffer_head *bh,
}
EXPORT_SYMBOL(set_bh_page);
+void folio_set_bh(struct buffer_head *bh, struct folio *folio,
+ unsigned long offset)
+{
+ bh->b_folio = folio;
+ BUG_ON(offset >= folio_size(folio));
+ if (folio_test_highmem(folio))
+ /*
+ * This catches illegal uses and preserves the offset:
+ */
+ bh->b_data = (char *)(0 + offset);
+ else
+ bh->b_data = folio_address(folio) + offset;
+}
+EXPORT_SYMBOL(folio_set_bh);
+
/*
* Called when truncating a buffer on a page completely.
*/
@@ -1571,18 +1593,17 @@ out:
}
EXPORT_SYMBOL(block_invalidate_folio);
-
/*
* We attach and possibly dirty the buffers atomically wrt
* block_dirty_folio() via private_lock. try_to_free_buffers
- * is already excluded via the page lock.
+ * is already excluded via the folio lock.
*/
-void create_empty_buffers(struct page *page,
- unsigned long blocksize, unsigned long b_state)
+void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
+ unsigned long b_state)
{
struct buffer_head *bh, *head, *tail;
- head = alloc_page_buffers(page, blocksize, true);
+ head = folio_alloc_buffers(folio, blocksize, true);
bh = head;
do {
bh->b_state |= b_state;
@@ -1591,19 +1612,26 @@ void create_empty_buffers(struct page *page,
} while (bh);
tail->b_this_page = head;
- spin_lock(&page->mapping->private_lock);
- if (PageUptodate(page) || PageDirty(page)) {
+ spin_lock(&folio->mapping->private_lock);
+ if (folio_test_uptodate(folio) || folio_test_dirty(folio)) {
bh = head;
do {
- if (PageDirty(page))
+ if (folio_test_dirty(folio))
set_buffer_dirty(bh);
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
bh = bh->b_this_page;
} while (bh != head);
}
- attach_page_private(page, head);
- spin_unlock(&page->mapping->private_lock);
+ folio_attach_private(folio, head);
+ spin_unlock(&folio->mapping->private_lock);
+}
+EXPORT_SYMBOL(folio_create_empty_buffers);
+
+void create_empty_buffers(struct page *page,
+ unsigned long blocksize, unsigned long b_state)
+{
+ folio_create_empty_buffers(page_folio(page), blocksize, b_state);
}
EXPORT_SYMBOL(create_empty_buffers);
@@ -1694,14 +1722,17 @@ static inline int block_size_bits(unsigned int blocksize)
return ilog2(blocksize);
}
-static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
+static struct buffer_head *folio_create_buffers(struct folio *folio,
+ struct inode *inode,
+ unsigned int b_state)
{
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
- b_state);
- return page_buffers(page);
+ if (!folio_buffers(folio))
+ folio_create_empty_buffers(folio,
+ 1 << READ_ONCE(inode->i_blkbits),
+ b_state);
+ return folio_buffers(folio);
}
/*
@@ -1745,8 +1776,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
int nr_underway = 0;
blk_opf_t write_flags = wbc_to_write_flags(wbc);
- head = create_page_buffers(page, inode,
- (1 << BH_Dirty)|(1 << BH_Uptodate));
+ head = folio_create_buffers(page_folio(page), inode,
+ (1 << BH_Dirty) | (1 << BH_Uptodate));
/*
* Be very careful. We have no exclusion from block_dirty_folio
@@ -2009,7 +2040,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
BUG_ON(to > PAGE_SIZE);
BUG_ON(from > to);
- head = create_page_buffers(&folio->page, inode, 0);
+ head = folio_create_buffers(folio, inode, 0);
blocksize = head->b_size;
bbits = block_size_bits(blocksize);
@@ -2295,7 +2326,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
- head = create_page_buffers(&folio->page, inode, 0);
+ head = folio_create_buffers(folio, inode, 0);
blocksize = head->b_size;
bbits = block_size_bits(blocksize);